Here are the examples of the python api scrapy.exceptions._InvalidOutput taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
2 Examples
0
Source : middleware.py
with MIT License
from autofelix
with MIT License
from autofelix
def download(self, download_func, request, spider):
@defer.inlineCallbacks
def process_request(request):
for method in self.methods['process_request']:
response = yield method(request=request, spider=spider)
if response is not None and not isinstance(response, (Response, Request)):
raise _InvalidOutput('Middleware %s.process_request must return None, Response or Request, got %s' % \
(six.get_method_self(method).__class__.__name__, response.__class__.__name__))
if response:
defer.returnValue(response)
defer.returnValue((yield download_func(request=request, spider=spider)))
@defer.inlineCallbacks
def process_response(response):
assert response is not None, 'Received None in process_response'
if isinstance(response, Request):
defer.returnValue(response)
for method in self.methods['process_response']:
response = yield method(request=request, response=response, spider=spider)
if not isinstance(response, (Response, Request)):
raise _InvalidOutput('Middleware %s.process_response must return Response or Request, got %s' % \
(six.get_method_self(method).__class__.__name__, type(response)))
if isinstance(response, Request):
defer.returnValue(response)
defer.returnValue(response)
@defer.inlineCallbacks
def process_exception(_failure):
exception = _failure.value
for method in self.methods['process_exception']:
response = yield method(request=request, exception=exception, spider=spider)
if response is not None and not isinstance(response, (Response, Request)):
raise _InvalidOutput('Middleware %s.process_exception must return None, Response or Request, got %s' % \
(six.get_method_self(method).__class__.__name__, type(response)))
if response:
defer.returnValue(response)
defer.returnValue(_failure)
deferred = mustbe_deferred(process_request, request)
deferred.addErrback(process_exception)
deferred.addCallback(process_response)
return deferred
0
Source : spidermw.py
with MIT License
from autofelix
with MIT License
from autofelix
def scrape_response(self, scrape_func, response, request, spider):
fname = lambda f:'%s.%s' % (
six.get_method_self(f).__class__.__name__,
six.get_method_function(f).__name__)
def process_spider_input(response):
for method in self.methods['process_spider_input']:
try:
result = method(response=response, spider=spider)
if result is not None:
raise _InvalidOutput('Middleware {} must return None or raise an exception, got {}' \
.format(fname(method), type(result)))
except _InvalidOutput:
raise
except Exception:
return scrape_func(Failure(), request, spider)
return scrape_func(response, request, spider)
def process_spider_exception(_failure, start_index=0):
exception = _failure.value
# don't handle _InvalidOutput exception
if isinstance(exception, _InvalidOutput):
return _failure
method_list = islice(self.methods['process_spider_exception'], start_index, None)
for method_index, method in enumerate(method_list, start=start_index):
if method is None:
continue
result = method(response=response, exception=exception, spider=spider)
if _isiterable(result):
# stop exception handling by handing control over to the
# process_spider_output chain if an iterable has been returned
return process_spider_output(result, method_index+1)
elif result is None:
continue
else:
raise _InvalidOutput('Middleware {} must return None or an iterable, got {}' \
.format(fname(method), type(result)))
return _failure
def process_spider_output(result, start_index=0):
# items in this iterable do not need to go through the process_spider_output
# chain, they went through it already from the process_spider_exception method
recovered = MutableChain()
def evaluate_iterable(iterable, index):
try:
for r in iterable:
yield r
except Exception as ex:
exception_result = process_spider_exception(Failure(ex), index+1)
if isinstance(exception_result, Failure):
raise
recovered.extend(exception_result)
method_list = islice(self.methods['process_spider_output'], start_index, None)
for method_index, method in enumerate(method_list, start=start_index):
if method is None:
continue
# the following might fail directly if the output value is not a generator
try:
result = method(response=response, result=result, spider=spider)
except Exception as ex:
exception_result = process_spider_exception(Failure(ex), method_index+1)
if isinstance(exception_result, Failure):
raise
return exception_result
if _isiterable(result):
result = evaluate_iterable(result, method_index)
else:
raise _InvalidOutput('Middleware {} must return an iterable, got {}' \
.format(fname(method), type(result)))
return chain(result, recovered)
dfd = mustbe_deferred(process_spider_input, response)
dfd.addCallbacks(callback=process_spider_output, errback=process_spider_exception)
return dfd
def process_start_requests(self, start_requests, spider):