Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

""" 

SpiderManager is the class which locates and manages all website-specific 

spiders 

""" 

 

from zope.interface import implements 

 

from scrapy import signals 

from scrapy.interfaces import ISpiderManager 

from scrapy.utils.misc import walk_modules 

from scrapy.utils.spider import iter_spider_classes 

from scrapy.xlib.pydispatch import dispatcher 

 

 

class SpiderManager(object): 

 

    implements(ISpiderManager) 

 

    def __init__(self, spider_modules): 

        self.spider_modules = spider_modules 

        self._spiders = {} 

        for name in self.spider_modules: 

            for module in walk_modules(name): 

                self._load_spiders(module) 

        dispatcher.connect(self.close_spider, signals.spider_closed) 

 

    def _load_spiders(self, module): 

        for spcls in iter_spider_classes(module): 

            self._spiders[spcls.name] = spcls 

 

    @classmethod 

    def from_settings(cls, settings): 

        return cls(settings.getlist('SPIDER_MODULES')) 

 

    @classmethod 

    def from_crawler(cls, crawler): 

        return cls.from_settings(crawler.settings) 

 

    def create(self, spider_name, **spider_kwargs): 

        try: 

            spcls = self._spiders[spider_name] 

        except KeyError: 

            raise KeyError("Spider not found: %s" % spider_name) 

        return spcls(**spider_kwargs) 

 

    def find_by_request(self, request): 

        return [name for name, cls in self._spiders.iteritems() 

            if cls.handles_request(request)] 

 

    def list(self): 

        return self._spiders.keys() 

 

    def close_spider(self, spider, reason): 

        closed = getattr(spider, 'closed', None) 

56        if callable(closed): 

            return closed(reason)