Coverage for scrapy/contrib/spiders/sitemap : 76%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
|
if response.url.endswith('/robots.txt'): for url in sitemap_urls_from_robots(response.body): yield Request(url, callback=self._parse_sitemap) else: body = self._get_sitemap_body(response) if body is None: log.msg("Ignoring invalid sitemap: %s" % response, log.WARNING) return
s = Sitemap(body) if s.type == 'sitemapindex': for loc in iterloc(s): if any(x.search(loc) for x in self._follow): yield Request(loc, callback=self._parse_sitemap) elif s.type == 'urlset': for loc in iterloc(s): for r, c in self._cbs: if r.search(loc): yield Request(loc, callback=c) break
"""Return the sitemap body contained in the given response, or None if the response is not a sitemap. """
return x
for d in it: yield d['loc'] |