Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

""" 

This module implements a class which returns the appropiate Response class 

based on different criterias. 

 

""" 

 

from mimetypes import MimeTypes 

from pkgutil import get_data 

from cStringIO import StringIO 

 

from scrapy.http import Response 

from scrapy.utils.misc import load_object 

from scrapy.utils.python import isbinarytext 

from scrapy.conf import settings 

 

class ResponseTypes(object): 

 

    CLASSES = { 

        'text/html': 'scrapy.http.HtmlResponse', 

        'application/atom+xml': 'scrapy.http.XmlResponse', 

        'application/rdf+xml': 'scrapy.http.XmlResponse', 

        'application/rss+xml': 'scrapy.http.XmlResponse', 

        'application/xhtml+xml': 'scrapy.http.HtmlResponse', 

        'application/vnd.wap.xhtml+xml': 'scrapy.http.HtmlResponse', 

        'application/xml': 'scrapy.http.XmlResponse', 

        'application/json': 'scrapy.http.TextResponse', 

        'application/javascript': 'scrapy.http.TextResponse', 

        'application/x-javascript': 'scrapy.http.TextResponse', 

        'text/xml': 'scrapy.http.XmlResponse', 

        'text/*': 'scrapy.http.TextResponse', 

    } 

 

    def __init__(self): 

        self.CLASSES.update(settings.get('RESPONSE_CLASSES', {})) 

        self.classes = {} 

        self.mimetypes = MimeTypes() 

        mimedata = get_data('scrapy', 'mime.types') 

        self.mimetypes.readfp(StringIO(mimedata)) 

        for mimetype, cls in self.CLASSES.iteritems(): 

            self.classes[mimetype] = load_object(cls) 

 

    def from_mimetype(self, mimetype): 

        """Return the most appropiate Response class for the given mimetype""" 

45        if mimetype is None: 

            return Response 

        elif mimetype in self.classes: 

            return self.classes[mimetype] 

        else: 

            basetype = "%s/*" % mimetype.split('/')[0] 

            return self.classes.get(basetype, Response) 

 

    def from_content_type(self, content_type, content_encoding=None): 

        """Return the most appropiate Response class from an HTTP Content-Type 

        header """ 

        if content_encoding: 

            return Response 

        mimetype = content_type.split(';')[0].strip().lower() 

        return self.from_mimetype(mimetype) 

 

    def from_content_disposition(self, content_disposition): 

        try: 

            filename = content_disposition.split(';')[1].split('=')[1] 

            filename = filename.strip('"\'') 

            return self.from_filename(filename) 

        except IndexError: 

            return Response 

 

    def from_headers(self, headers): 

        """Return the most appropiate Response class by looking at the HTTP 

        headers""" 

        cls = Response 

        if 'Content-Type' in headers: 

            cls = self.from_content_type(headers['Content-type'], \ 

                headers.get('Content-Encoding')) 

        if cls is Response and 'Content-Disposition' in headers: 

            cls = self.from_content_disposition(headers['Content-Disposition']) 

        return cls 

 

    def from_filename(self, filename): 

        """Return the most appropiate Response class from a file name""" 

        mimetype, encoding = self.mimetypes.guess_type(filename) 

        if mimetype and not encoding: 

            return self.from_mimetype(mimetype) 

        else: 

            return Response 

 

    def from_body(self, body): 

        """Try to guess the appropiate response based on the body content. 

        This method is a bit magic and could be improved in the future, but 

        it's not meant to be used except for special cases where response types 

        cannot be guess using more straightforward methods.""" 

        chunk = body[:5000] 

        if isbinarytext(chunk): 

            return self.from_mimetype('application/octet-stream') 

        elif "<html>" in chunk.lower(): 

            return self.from_mimetype('text/html') 

        elif "<?xml" in chunk.lower(): 

            return self.from_mimetype('text/xml') 

        else: 

            return self.from_mimetype('text') 

 

    def from_args(self, headers=None, url=None, filename=None, body=None): 

        """Guess the most appropiate Response class based on the given arguments""" 

        cls = Response 

        if headers is not None: 

            cls = self.from_headers(headers) 

        if cls is Response and url is not None: 

            cls = self.from_filename(url) 

        if cls is Response and filename is not None: 

            cls = self.from_filename(filename) 

        if cls is Response and body is not None: 

            cls = self.from_body(body) 

        return cls 

 

responsetypes = ResponseTypes()