Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

""" 

Base class for Scrapy spiders 

 

See documentation in docs/topics/spiders.rst 

""" 

 

from scrapy import log 

from scrapy.settings import SpiderSettings 

from scrapy.http import Request 

from scrapy.utils.misc import arg_to_iter 

from scrapy.utils.trackref import object_ref 

from scrapy.utils.url import url_is_from_spider 

 

 

class BaseSpider(object_ref): 

    """Base class for scrapy spiders. All spiders must inherit from this 

    class. 

    """ 

 

    name = None 

 

    def __init__(self, name=None, **kwargs): 

        if name is not None: 

            self.name = name 

        elif not getattr(self, 'name', None): 

            raise ValueError("%s must have a name" % type(self).__name__) 

        self.__dict__.update(kwargs) 

        if not hasattr(self, 'start_urls'): 

            self.start_urls = [] 

 

    def log(self, message, level=log.DEBUG): 

        """Log the given messages at the given log level. Always use this 

        method to send log messages from your spider 

        """ 

        log.msg(message, spider=self, level=level) 

 

    def set_crawler(self, crawler): 

        assert not hasattr(self, '_crawler'), "Spider already bounded to %s" % crawler 

        self._crawler = crawler 

 

    @property 

    def crawler(self): 

        assert hasattr(self, '_crawler'), "Spider not bounded to any crawler" 

        return self._crawler 

 

    @property 

    def settings(self): 

        if not hasattr(self, '_settings'): 

            self._settings = SpiderSettings(self, self.crawler.settings) 

        return self._settings 

 

    def start_requests(self): 

        for url in self.start_urls: 

            yield self.make_requests_from_url(url) 

 

    def make_requests_from_url(self, url): 

        return Request(url, dont_filter=True) 

 

    def parse(self, response): 

        raise NotImplementedError 

 

    @classmethod 

    def handles_request(cls, request): 

        return url_is_from_spider(request.url, cls) 

 

    def __str__(self): 

        return "<%s %r at 0x%0x>" % (type(self).__name__, self.name, id(self)) 

 

    __repr__ = __str__ 

 

 

class ObsoleteClass(object): 

    def __init__(self, message): 

        self.message = message 

 

    def __getattr__(self, name): 

        raise AttributeError(self.message) 

 

spiders = ObsoleteClass(""" 

"from scrapy.spider import spiders" no longer works - use "from scrapy.project import crawler" and then access crawler.spiders attribute" 

""")