Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

""" 

Spider Middleware manager 

 

See documentation in docs/topics/spider-middleware.rst 

""" 

 

from twisted.python.failure import Failure 

from scrapy.middleware import MiddlewareManager 

from scrapy.utils.defer import mustbe_deferred 

from scrapy.utils.conf import build_component_list 

 

def _isiterable(possible_iterator): 

    return hasattr(possible_iterator, '__iter__') 

 

class SpiderMiddlewareManager(MiddlewareManager): 

 

    component_name = 'spider middleware' 

 

    @classmethod 

    def _get_mwlist_from_settings(cls, settings): 

        return build_component_list(settings['SPIDER_MIDDLEWARES_BASE'], \ 

            settings['SPIDER_MIDDLEWARES']) 

 

    def _add_middleware(self, mw): 

        super(SpiderMiddlewareManager, self)._add_middleware(mw) 

        if hasattr(mw, 'process_spider_input'): 

            self.methods['process_spider_input'].append(mw.process_spider_input) 

        if hasattr(mw, 'process_spider_output'): 

            self.methods['process_spider_output'].insert(0, mw.process_spider_output) 

        if hasattr(mw, 'process_spider_exception'): 

            self.methods['process_spider_exception'].insert(0, mw.process_spider_exception) 

 

    def scrape_response(self, scrape_func, response, request, spider): 

exit        fname = lambda f:'%s.%s' % (f.im_self.__class__.__name__, f.im_func.__name__) 

 

        def process_spider_input(response): 

            for method in self.methods['process_spider_input']: 

                try: 

                    result = method(response=response, spider=spider) 

                    assert result is None, \ 

                            'Middleware %s must returns None or ' \ 

                            'raise an exception, got %s ' \ 

                            % (fname(method), type(result)) 

                except: 

                    return scrape_func(Failure(), request, spider) 

            return scrape_func(response, request, spider) 

 

        def process_spider_exception(_failure): 

            exception = _failure.value 

57            for method in self.methods['process_spider_exception']: 

                result = method(response=response, exception=exception, spider=spider) 

                assert result is None or _isiterable(result), \ 

                    'Middleware %s must returns None, or an iterable object, got %s ' % \ 

                    (fname(method), type(result)) 

50                if result is not None: 

                    return result 

            return _failure 

 

        def process_spider_output(result): 

            for method in self.methods['process_spider_output']: 

                result = method(response=response, result=result, spider=spider) 

                assert _isiterable(result), \ 

                    'Middleware %s must returns an iterable object, got %s ' % \ 

                    (fname(method), type(result)) 

            return result 

 

        dfd = mustbe_deferred(process_spider_input, response) 

        dfd.addErrback(process_spider_exception) 

        dfd.addCallback(process_spider_output) 

        return dfd