Here are the examples of the python api scrapy.utils.response.get_meta_refresh taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
3 Examples
3
Source : redirect.py
with MIT License
from autofelix
with MIT License
from autofelix
def process_response(self, request, response, spider):
if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
not isinstance(response, HtmlResponse):
return response
interval, url = get_meta_refresh(response,
ignore_tags=self._ignore_tags)
if url and interval < self._maxdelay:
redirected = self._redirect_request_using_get(request, url)
return self._redirect(redirected, request, spider, 'meta refresh')
return response
3
Source : redirect.py
with The Unlicense
from dspray95
with The Unlicense
from dspray95
def process_response(self, request, response, spider):
if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
not isinstance(response, HtmlResponse):
return response
interval, url = get_meta_refresh(response)
if url and interval < self._maxdelay:
redirected = self._redirect_request_using_get(request, url)
return self._redirect(redirected, request, spider, 'meta refresh')
return response
0
Source : middlewares.py
with MIT License
from Karmenzind
with MIT License
from Karmenzind
def process_response(self, request, response, spider):
url = response.url
if response.status in [301, 307]:
log.msg("trying to redirect us: %s" % url, level=log.INFO)
reason = 'redirect %d' % response.status
return self._retry(request, reason, spider) or response
interval, redirect_url = get_meta_refresh(response)
# handle meta redirect
if redirect_url:
log.msg("trying to redirect us: %s" % url, level=log.INFO)
reason = 'meta'
return self._retry(request, reason, spider) or response
hxs = HtmlXPathSelector(response)
# test for captcha page
captcha = hxs.select(
".//input[contains(@id, 'captchacharacters')]").extract()
if captcha:
log.msg("captcha page %s" % url, level=log.INFO)
reason = 'capcha'
return self._retry(request, reason, spider) or response
return response
class PureRedisMiddleware(HttpProxyMiddleware):