Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

""" 

Common code and definitions used by Link extractors (located in 

scrapy.contrib.linkextractor). 

""" 

 

# common file extensions that are not followed if they occur in links 

IGNORED_EXTENSIONS = [ 

    # images 

    'mng', 'pct', 'bmp', 'gif', 'jpg', 'jpeg', 'png', 'pst', 'psp', 'tif', 

    'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg', 

 

    # audio 

    'mp3', 'wma', 'ogg', 'wav', 'ra', 'aac', 'mid', 'au', 'aiff', 

 

    # video 

    '3gp', 'asf', 'asx', 'avi', 'mov', 'mp4', 'mpg', 'qt', 'rm', 'swf', 'wmv', 

    'm4a', 

 

    # other 

    'css', 'pdf', 'doc', 'exe', 'bin', 'rss', 'zip', 'rar', 

]