Here are the examples of the python api scrapy.utils.request.referer_str taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
5 Examples
3
Example 1
Project: scrapy Source File: logformatter.py
def crawled(self, request, response, spider):
flags = ' %s' % str(response.flags) if response.flags else ''
return {
'level': logging.DEBUG,
'msg': CRAWLEDMSG,
'args': {
'status': response.status,
'request': request,
'referer': referer_str(request),
'flags': flags,
}
}
3
Example 2
def media_failed(self, failure, request, info):
if not isinstance(failure.value, IgnoreRequest):
referer = referer_str(request)
logger.warning(
'File (unknown-error): Error downloading %(medianame)s from '
'%(request)s referred in <%(referer)s>: %(exception)s',
{'medianame': self.MEDIA_NAME, 'request': request,
'referer': referer, 'exception': failure.value},
extra={'spider': info.spider}
)
raise FileException
0
Example 3
def handle_spider_error(self, _failure, request, response, spider):
exc = _failure.value
if isinstance(exc, CloseSpider):
self.crawler.engine.close_spider(spider, exc.reason or 'cancelled')
return
logger.error(
"Spider error processing %(request)s (referer: %(referer)s)",
{'request': request, 'referer': referer_str(request)},
exc_info=failure_to_exc_info(_failure),
extra={'spider': spider}
)
self.signals.send_catch_log(
signal=signals.spider_error,
failure=_failure, response=response,
spider=spider
)
self.crawler.stats.inc_value(
"spider_exceptions/%s" % _failure.value.__class__.__name__,
spider=spider
)
0
Example 4
Project: scrapy Source File: files.py
def media_to_download(self, request, info):
def _onsuccess(result):
if not result:
return # returning None force download
last_modified = result.get('last_modified', None)
if not last_modified:
return # returning None force download
age_seconds = time.time() - last_modified
age_days = age_seconds / 60 / 60 / 24
if age_days > self.expires:
return # returning None force download
referer = referer_str(request)
logger.debug(
'File (uptodate): Downloaded %(medianame)s from %(request)s '
'referred in <%(referer)s>',
{'medianame': self.MEDIA_NAME, 'request': request,
'referer': referer},
extra={'spider': info.spider}
)
self.inc_stats(info.spider, 'uptodate')
checksum = result.get('checksum', None)
return {'url': request.url, 'path': path, 'checksum': checksum}
path = self.file_path(request, info=info)
dfd = defer.maybeDeferred(self.store.stat_file, path, info)
dfd.addCallbacks(_onsuccess, lambda _: None)
dfd.addErrback(
lambda f:
logger.error(self.__class__.__name__ + '.store.stat_file',
exc_info=failure_to_exc_info(f),
extra={'spider': info.spider})
)
return dfd
0
Example 5
def media_downloaded(self, response, request, info):
referer = referer_str(request)
if response.status != 200:
logger.warning(
'File (code: %(status)s): Error downloading file from '
'%(request)s referred in <%(referer)s>',
{'status': response.status,
'request': request, 'referer': referer},
extra={'spider': info.spider}
)
raise FileException('download-error')
if not response.body:
logger.warning(
'File (empty-content): Empty file from %(request)s referred '
'in <%(referer)s>: no-content',
{'request': request, 'referer': referer},
extra={'spider': info.spider}
)
raise FileException('empty-content')
status = 'cached' if 'cached' in response.flags else 'downloaded'
logger.debug(
'File (%(status)s): Downloaded file from %(request)s referred in '
'<%(referer)s>',
{'status': status, 'request': request, 'referer': referer},
extra={'spider': info.spider}
)
self.inc_stats(info.spider, status)
try:
path = self.file_path(request, response=response, info=info)
checksum = self.file_downloaded(response, request, info)
except FileException as exc:
logger.warning(
'File (error): Error processing file from %(request)s '
'referred in <%(referer)s>: %(errormsg)s',
{'request': request, 'referer': referer, 'errormsg': str(exc)},
extra={'spider': info.spider}, exc_info=True
)
raise
except Exception as exc:
logger.error(
'File (unknown-error): Error processing file from %(request)s '
'referred in <%(referer)s>',
{'request': request, 'referer': referer},
exc_info=True, extra={'spider': info.spider}
)
raise FileException(str(exc))
return {'url': request.url, 'path': path, 'checksum': checksum}