Here are the examples of the python api scrapy.utils.spider.iterate_spider_output taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
10 Examples
3
Example 1
def __call__(self, response):
"""Main response entry point.
This method calls the callback and wraps the returned generator.
"""
output = iterate_spider_output(self.callback(response=response, **self.kwargs))
if not isinstance(output, GeneratorType):
raise ValueError("Callback must return a generator type")
return self._unwindGenerator(output)
3
Example 2
def run_callback(self, response, cb):
items, requests = [], []
for x in iterate_spider_output(cb(response)):
if isinstance(x, (BaseItem, dict)):
items.append(x)
elif isinstance(x, Request):
requests.append(x)
return items, requests
3
Example 3
def _parse_response(self, response, callback, cb_kwargs, follow=True):
if callback:
cb_res = callback(response, **cb_kwargs) or ()
cb_res = self.process_results(response, cb_res)
for requests_or_item in iterate_spider_output(cb_res):
yield requests_or_item
if follow and self._follow_links:
for request_or_item in self._requests_to_follow(response):
yield request_or_item
3
Example 4
def parse_nodes(self, response, nodes):
"""This method is called for the nodes matching the provided tag name
(itertag). Receives the response and an Selector for each node.
Overriding this method is mandatory. Otherwise, you spider won't work.
This method must return either a BaseItem, a Request, or a list
containing any of them.
"""
for selector in nodes:
ret = iterate_spider_output(self.parse_node(response, selector))
for result_item in self.process_results(response, ret):
yield result_item
3
Example 5
Project: scrapy Source File: feed.py
def parse_rows(self, response):
"""Receives a response and a dict (representing each row) with a key for
each provided (or detected) header of the CSV file. This spider also
gives the opportunity to override adapt_response and
process_results methods for pre and post-processing purposes.
"""
for row in csviter(response, self.delimiter, self.headers, self.quotechar):
ret = iterate_spider_output(self.parse_row(response, row))
for result_item in self.process_results(response, ret):
yield result_item
3
Example 6
Project: scrapy Source File: test_utils_spider.py
def test_iterate_spider_output(self):
i = BaseItem()
r = Request('http://scrapytest.org')
o = object()
self.assertEqual(list(iterate_spider_output(i)), [i])
self.assertEqual(list(iterate_spider_output(r)), [r])
self.assertEqual(list(iterate_spider_output(o)), [o])
self.assertEqual(list(iterate_spider_output([r, i, o])), [r, i, o])
0
Example 7
Project: scrapy Source File: __init__.py
def _clean_req(self, request, method, results):
""" stop the request from returning objects and records any errors """
cb = request.callback
@wraps(cb)
def cb_wrapper(response):
try:
output = cb(response)
output = list(iterate_spider_output(output))
except:
case = _create_testcase(method, 'callback')
results.addError(case, sys.exc_info())
def eb_wrapper(failure):
case = _create_testcase(method, 'errback')
exc_info = failure.value, failure.type, failure.getTracebackObject()
results.addError(case, exc_info)
request.callback = cb_wrapper
request.errback = eb_wrapper
0
Example 8
Project: scrapy Source File: __init__.py
def add_pre_hook(self, request, results):
if hasattr(self, 'pre_process'):
cb = request.callback
@wraps(cb)
def wrapper(response):
try:
results.startTest(self.testcase_pre)
self.pre_process(response)
results.stopTest(self.testcase_pre)
except AssertionError:
results.addFailure(self.testcase_pre, sys.exc_info())
except Exception:
results.addError(self.testcase_pre, sys.exc_info())
else:
results.addSuccess(self.testcase_pre)
finally:
return list(iterate_spider_output(cb(response)))
request.callback = wrapper
return request
0
Example 9
Project: scrapy Source File: __init__.py
def add_post_hook(self, request, results):
if hasattr(self, 'post_process'):
cb = request.callback
@wraps(cb)
def wrapper(response):
output = list(iterate_spider_output(cb(response)))
try:
results.startTest(self.testcase_post)
self.post_process(output)
results.stopTest(self.testcase_post)
except AssertionError:
results.addFailure(self.testcase_post, sys.exc_info())
except Exception:
results.addError(self.testcase_post, sys.exc_info())
else:
results.addSuccess(self.testcase_post)
finally:
return output
request.callback = wrapper
return request
0
Example 10
def start_requests(self):
self._postinit_reqs = super(InitSpider, self).start_requests()
return iterate_spider_output(self.init_request())