scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware.from_crawler

Here are the examples of the python api scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware.from_crawler taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

1 Examples 7

Example 1

Project: scrapy-splash Source File: test_middleware.py
def test_magic_response_caching(tmpdir):
    # prepare middlewares
    spider = scrapy.Spider(name='foo')
    crawler = _get_crawler({
        'HTTPCACHE_DIR': str(tmpdir.join('cache')),
        'HTTPCACHE_STORAGE': 'scrapy_splash.SplashAwareFSCacheStorage',
        'HTTPCACHE_ENABLED': True
    })
    cache_mw = HttpCacheMiddleware.from_crawler(crawler)
    mw = _get_mw()
    cookie_mw = _get_cookie_mw()

    def _get_req():
        return SplashRequest(
            url="http://example.com",
            endpoint='execute',
            magic_response=True,
            args={'lua_source': 'function main(splash) end'},
        )

    # Emulate Scrapy middleware chain.

    # first call
    req = _get_req()
    req = cookie_mw.process_request(req, spider) or req
    req = mw.process_request(req, spider)
    req = cache_mw.process_request(req, spider) or req
    assert isinstance(req, scrapy.Request)  # first call; the cache is empty

    resp_data = {
        'html': "<html><body>Hello</body></html>",
        'render_time': 0.5,
    }
    resp_body = json.dumps(resp_data).encode('utf8')
    resp = TextResponse("http://example.com",
                        headers={b'Content-Type': b'application/json'},
                        body=resp_body)

    resp2 = cache_mw.process_response(req, resp, spider)
    resp3 = mw.process_response(req, resp2, spider)
    resp3 = cookie_mw.process_response(req, resp3, spider)

    assert resp3.text == "<html><body>Hello</body></html>"
    assert resp3.css("body").extract_first() == "<body>Hello</body>"
    assert resp3.data['render_time'] == 0.5

    # second call
    req = _get_req()
    req = cookie_mw.process_request(req, spider) or req
    req = mw.process_request(req, spider)
    cached_resp = cache_mw.process_request(req, spider) or req

    # response should be from cache:
    assert cached_resp.__class__ is TextResponse
    assert cached_resp.body == resp_body
    resp2_1 = cache_mw.process_response(req, cached_resp, spider)
    resp3_1 = mw.process_response(req, resp2_1, spider)
    resp3_1 = cookie_mw.process_response(req, resp3_1, spider)

    assert isinstance(resp3_1, scrapy_splash.SplashJsonResponse)
    assert resp3_1.body == b"<html><body>Hello</body></html>"
    assert resp3_1.text == "<html><body>Hello</body></html>"
    assert resp3_1.css("body").extract_first() == "<body>Hello</body>"
    assert resp3_1.data['render_time'] == 0.5
    assert resp3_1.headers[b'Content-Type'] == b'text/html; charset=utf-8'