Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

""" 

Scrapy extension for collecting scraping stats 

""" 

import pprint 

 

from scrapy.xlib.pydispatch import dispatcher 

 

from scrapy.signals import stats_spider_opened, stats_spider_closing, \ 

    stats_spider_closed 

from scrapy.utils.signal import send_catch_log 

from scrapy import signals 

from scrapy import log 

from scrapy.conf import settings 

 

class StatsCollector(object): 

 

    def __init__(self): 

        self._dump = settings.getbool('STATS_DUMP') 

        self._stats = {None: {}} # None is for global stats 

 

    def get_value(self, key, default=None, spider=None): 

        return self._stats[spider].get(key, default) 

 

    def get_stats(self, spider=None): 

        return self._stats[spider] 

 

    def set_value(self, key, value, spider=None): 

        self._stats[spider][key] = value 

 

    def set_stats(self, stats, spider=None): 

        self._stats[spider] = stats 

 

    def inc_value(self, key, count=1, start=0, spider=None): 

        d = self._stats[spider] 

        d[key] = d.setdefault(key, start) + count 

 

    def max_value(self, key, value, spider=None): 

        d = self._stats[spider] 

        d[key] = max(d.setdefault(key, value), value) 

 

    def min_value(self, key, value, spider=None): 

        d = self._stats[spider] 

        d[key] = min(d.setdefault(key, value), value) 

 

    def clear_stats(self, spider=None): 

        self._stats[spider].clear() 

 

    def iter_spider_stats(self): 

        return [x for x in self._stats.iteritems() if x[0]] 

 

    def open_spider(self, spider): 

        self._stats[spider] = {} 

        send_catch_log(stats_spider_opened, spider=spider) 

 

    def close_spider(self, spider, reason): 

        send_catch_log(stats_spider_closing, spider=spider, reason=reason) 

        stats = self._stats.pop(spider) 

        send_catch_log(stats_spider_closed, spider=spider, reason=reason, \ 

            spider_stats=stats) 

63        if self._dump: 

            log.msg("Dumping spider stats:\n" + pprint.pformat(stats), \ 

                spider=spider) 

        self._persist_stats(stats, spider) 

 

    def engine_stopped(self): 

        stats = self.get_stats() 

69        if self._dump: 

            log.msg("Dumping global stats:\n" + pprint.pformat(stats)) 

        self._persist_stats(stats, spider=None) 

 

    def _persist_stats(self, stats, spider=None): 

        pass 

 

class MemoryStatsCollector(StatsCollector): 

 

    def __init__(self): 

        super(MemoryStatsCollector, self).__init__() 

        self.spider_stats = {} 

 

    def _persist_stats(self, stats, spider=None): 

        if spider is not None: 

            self.spider_stats[spider.name] = stats 

 

 

class DummyStatsCollector(StatsCollector): 

 

    def get_value(self, key, default=None, spider=None): 

        return default 

 

    def set_value(self, key, value, spider=None): 

        pass 

 

    def set_stats(self, stats, spider=None): 

        pass 

 

    def inc_value(self, key, count=1, start=0, spider=None): 

        pass 

 

    def max_value(self, key, value, spider=None): 

        pass 

 

    def min_value(self, key, value, spider=None): 

        pass