scrapy.http.headers.Headers

Here are the examples of the python api scrapy.http.headers.Headers taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

7 Examples 7

3 Source : __init__.py
with MIT License
from autofelix

    def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
        self.headers = Headers(headers or {})
        self.status = int(status)
        self._set_body(body)
        self._set_url(url)
        self.request = request
        self.flags = [] if flags is None else list(flags)

    @property

3 Source : elasticsearch.py
with MIT License
from invanalabs

    def retrieve_response(self, spider, request):
        data = self._read_data(spider, request)

        if data is None:
            return  # not cached
        else:
            if data['status'] == 200 and data['html'] is None:
                return None

        data = self.get_headers(data)
        url = data['url']
        status = data['status']
        headers = Headers(data['headers'])
        body = bytes(data['html'], encoding="utf-8")
        respcls = responsetypes.from_args(headers=headers, url=url)
        response = respcls(url=url, headers=headers, status=status, body=body)
        return response

    def _clean_headers(self, obj):

3 Source : mongodb.py
with MIT License
from invanalabs

    def retrieve_response(self, spider, request):
        data = self._read_data(spider, request)
        if data is None:
            return  # not cached
        url = data['url']
        status = data['status']
        headers = Headers(data['headers'])
        body = data['html']
        respcls = responsetypes.from_args(headers=headers, url=url)
        response = respcls(url=url, headers=headers, status=status, body=body)
        return response

    def _clean_headers(self, obj):

0 Source : __init__.py
with MIT License
from autofelix

    def __init__(self, url, callback=None, method='GET', headers=None, body=None,
                 cookies=None, meta=None, encoding='utf-8', priority=0,
                 dont_filter=False, errback=None, flags=None, cb_kwargs=None):

        self._encoding = encoding  # this one has to be set first
        self.method = str(method).upper()
        self._set_url(url)
        self._set_body(body)
        assert isinstance(priority, int), "Request priority not an integer: %r" % priority
        self.priority = priority

        if callback is not None and not callable(callback):
            raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
        if errback is not None and not callable(errback):
            raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
        assert callback or not errback, "Cannot use errback without a callback"
        self.callback = callback
        self.errback = errback

        self.cookies = cookies or {}
        self.headers = Headers(headers or {}, encoding=encoding)
        self.dont_filter = dont_filter

        self._meta = dict(meta) if meta else None
        self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
        self.flags = [] if flags is None else list(flags)

    @property

0 Source : __init__.py
with The Unlicense
from dspray95

    def __init__(self, url, callback=None, method='GET', headers=None, body=None,
                 cookies=None, meta=None, encoding='utf-8', priority=0,
                 dont_filter=False, errback=None, flags=None):

        self._encoding = encoding  # this one has to be set first
        self.method = str(method).upper()
        self._set_url(url)
        self._set_body(body)
        assert isinstance(priority, int), "Request priority not an integer: %r" % priority
        self.priority = priority

        if callback is not None and not callable(callback):
            raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
        if errback is not None and not callable(errback):
            raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
        assert callback or not errback, "Cannot use errback without a callback"
        self.callback = callback
        self.errback = errback

        self.cookies = cookies or {}
        self.headers = Headers(headers or {}, encoding=encoding)
        self.dont_filter = dont_filter

        self._meta = dict(meta) if meta else None
        self.flags = [] if flags is None else list(flags)

    @property

0 Source : handler.py
with BSD 3-Clause "New" or "Revised" License
from scrapy-plugins

    async def _download_request_with_page(self, request: Request, page: Page) -> Response:
        start_time = time()
        response = await page.goto(request.url)

        page_coroutines = request.meta.get("playwright_page_coroutines") or ()
        if isinstance(page_coroutines, dict):
            page_coroutines = page_coroutines.values()
        for pc in page_coroutines:
            if isinstance(pc, PageCoroutine):
                try:
                    method = getattr(page, pc.method)
                except AttributeError:
                    logger.warning(f"Ignoring {repr(pc)}: could not find coroutine")
                else:
                    result = method(*pc.args, **pc.kwargs)
                    pc.result = await result if isawaitable(result) else result
                    await page.wait_for_load_state(timeout=self.default_navigation_timeout)
            else:
                logger.warning(
                    f"Ignoring {repr(pc)}: expected PageCoroutine, got {repr(type(pc))}"
                )

        body_str = await page.content()
        request.meta["download_latency"] = time() - start_time

        if request.meta.get("playwright_include_page"):
            request.meta["playwright_page"] = page
        else:
            await page.close()
            self.stats.inc_value("playwright/page_count/closed")

        server_ip_address = None
        with suppress(AttributeError, KeyError, ValueError):
            server_addr = await response.server_addr()
            server_ip_address = ip_address(server_addr["ipAddress"])

        with suppress(AttributeError):
            request.meta["playwright_security_details"] = await response.security_details()

        headers = Headers(response.headers)
        headers.pop("Content-Encoding", None)
        encoding = _get_response_encoding(headers, body_str) or "utf-8"
        body = body_str.encode(encoding)
        respcls = responsetypes.from_args(headers=headers, url=page.url, body=body)
        return respcls(
            url=page.url,
            status=response.status,
            headers=headers,
            body=body,
            request=request,
            flags=["playwright"],
            encoding=encoding,
            ip_address=server_ip_address,
        )

    def _increment_request_stats(self, request: PlaywrightRequest) -> None:

0 Source : test_misc.py
with BSD 3-Clause "New" or "Revised" License
from scrapy-plugins

async def test_get_response_encoding():
    assert (
        _get_response_encoding(
            headers=Headers({"content-type": "text/html; charset=UTF-8"}),
            body="",
        )
        == "utf-8"
    )
    assert (
        _get_response_encoding(
            headers=Headers(),
            body="""  <  !doctype html>
 < html lang="cn">
 < head>
   < meta charset="gb2312">
 < /head>
 < /html>
""",
        )
        == "gb18030"
    )
    assert _get_response_encoding(headers=Headers(), body="") is None