Here are the examples of the python api scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware.from_crawler taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
1 Examples
0
Example 1
Project: scrapy-splash Source File: test_middleware.py
def test_magic_response_caching(tmpdir):
# prepare middlewares
spider = scrapy.Spider(name='foo')
crawler = _get_crawler({
'HTTPCACHE_DIR': str(tmpdir.join('cache')),
'HTTPCACHE_STORAGE': 'scrapy_splash.SplashAwareFSCacheStorage',
'HTTPCACHE_ENABLED': True
})
cache_mw = HttpCacheMiddleware.from_crawler(crawler)
mw = _get_mw()
cookie_mw = _get_cookie_mw()
def _get_req():
return SplashRequest(
url="http://example.com",
endpoint='execute',
magic_response=True,
args={'lua_source': 'function main(splash) end'},
)
# Emulate Scrapy middleware chain.
# first call
req = _get_req()
req = cookie_mw.process_request(req, spider) or req
req = mw.process_request(req, spider)
req = cache_mw.process_request(req, spider) or req
assert isinstance(req, scrapy.Request) # first call; the cache is empty
resp_data = {
'html': "<html><body>Hello</body></html>",
'render_time': 0.5,
}
resp_body = json.dumps(resp_data).encode('utf8')
resp = TextResponse("http://example.com",
headers={b'Content-Type': b'application/json'},
body=resp_body)
resp2 = cache_mw.process_response(req, resp, spider)
resp3 = mw.process_response(req, resp2, spider)
resp3 = cookie_mw.process_response(req, resp3, spider)
assert resp3.text == "<html><body>Hello</body></html>"
assert resp3.css("body").extract_first() == "<body>Hello</body>"
assert resp3.data['render_time'] == 0.5
# second call
req = _get_req()
req = cookie_mw.process_request(req, spider) or req
req = mw.process_request(req, spider)
cached_resp = cache_mw.process_request(req, spider) or req
# response should be from cache:
assert cached_resp.__class__ is TextResponse
assert cached_resp.body == resp_body
resp2_1 = cache_mw.process_response(req, cached_resp, spider)
resp3_1 = mw.process_response(req, resp2_1, spider)
resp3_1 = cookie_mw.process_response(req, resp3_1, spider)
assert isinstance(resp3_1, scrapy_splash.SplashJsonResponse)
assert resp3_1.body == b"<html><body>Hello</body></html>"
assert resp3_1.text == "<html><body>Hello</body></html>"
assert resp3_1.css("body").extract_first() == "<body>Hello</body>"
assert resp3_1.data['render_time'] == 0.5
assert resp3_1.headers[b'Content-Type'] == b'text/html; charset=utf-8'