Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

""" 

HttpError Spider Middleware 

 

See documentation in docs/topics/spider-middleware.rst 

""" 

from scrapy.exceptions import IgnoreRequest 

 

 

class HttpError(IgnoreRequest): 

    """A non-200 response was filtered""" 

 

    def __init__(self, response, *args, **kwargs): 

        self.response = response 

        super(HttpError, self).__init__(*args, **kwargs) 

 

 

class HttpErrorMiddleware(object): 

 

    def process_spider_input(self, response, spider): 

        if 200 <= response.status < 300: # common case 

            return 

        meta = response.request.meta 

24        if 'handle_httpstatus_all' in meta: 

            return 

        if 'handle_httpstatus_list' in meta: 

            allowed_statuses = meta['handle_httpstatus_list'] 

        else: 

            allowed_statuses = getattr(spider, 'handle_httpstatus_list', ()) 

        if response.status in allowed_statuses: 

            return 

        raise HttpError(response, 'Ignoring non-200 response') 

 

    def process_spider_exception(self, response, exception, spider): 

        if isinstance(exception, HttpError): 

            return []