Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

import os, cPickle as pickle 

 

from scrapy import signals 

from scrapy.xlib.pydispatch import dispatcher 

 

class SpiderState(object): 

    """Store and load spider state during a scraping job""" 

 

    def __init__(self, jobdir=None): 

        self.jobdir = jobdir 

 

    @classmethod 

    def from_crawler(cls, crawler): 

        obj = cls(crawler.settings.get('JOBDIR')) 

        dispatcher.connect(obj.spider_closed, signal=signals.spider_closed) 

        dispatcher.connect(obj.spider_opened, signal=signals.spider_opened) 

        return obj 

 

    def spider_closed(self, spider): 

        if self.jobdir: 

            with open(self.statefn, 'wb') as f: 

                pickle.dump(spider.state, f, protocol=2) 

 

    def spider_opened(self, spider): 

        if self.jobdir and os.path.exists(self.statefn): 

            with open(self.statefn) as f: 

                spider.state = pickle.load(f) 

        else: 

            spider.state = {} 

 

    @property 

    def statefn(self): 

        return os.path.join(self.jobdir, 'spider.state')