Here are the examples of the python api scrapy.http.headers.Headers taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
7 Examples
3
Source : __init__.py
with MIT License
from autofelix
with MIT License
from autofelix
def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
self.headers = Headers(headers or {})
self.status = int(status)
self._set_body(body)
self._set_url(url)
self.request = request
self.flags = [] if flags is None else list(flags)
@property
3
Source : elasticsearch.py
with MIT License
from invanalabs
with MIT License
from invanalabs
def retrieve_response(self, spider, request):
data = self._read_data(spider, request)
if data is None:
return # not cached
else:
if data['status'] == 200 and data['html'] is None:
return None
data = self.get_headers(data)
url = data['url']
status = data['status']
headers = Headers(data['headers'])
body = bytes(data['html'], encoding="utf-8")
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
def _clean_headers(self, obj):
3
Source : mongodb.py
with MIT License
from invanalabs
with MIT License
from invanalabs
def retrieve_response(self, spider, request):
data = self._read_data(spider, request)
if data is None:
return # not cached
url = data['url']
status = data['status']
headers = Headers(data['headers'])
body = data['html']
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
def _clean_headers(self, obj):
0
Source : __init__.py
with MIT License
from autofelix
with MIT License
from autofelix
def __init__(self, url, callback=None, method='GET', headers=None, body=None,
cookies=None, meta=None, encoding='utf-8', priority=0,
dont_filter=False, errback=None, flags=None, cb_kwargs=None):
self._encoding = encoding # this one has to be set first
self.method = str(method).upper()
self._set_url(url)
self._set_body(body)
assert isinstance(priority, int), "Request priority not an integer: %r" % priority
self.priority = priority
if callback is not None and not callable(callback):
raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
if errback is not None and not callable(errback):
raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
assert callback or not errback, "Cannot use errback without a callback"
self.callback = callback
self.errback = errback
self.cookies = cookies or {}
self.headers = Headers(headers or {}, encoding=encoding)
self.dont_filter = dont_filter
self._meta = dict(meta) if meta else None
self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
self.flags = [] if flags is None else list(flags)
@property
0
Source : __init__.py
with The Unlicense
from dspray95
with The Unlicense
from dspray95
def __init__(self, url, callback=None, method='GET', headers=None, body=None,
cookies=None, meta=None, encoding='utf-8', priority=0,
dont_filter=False, errback=None, flags=None):
self._encoding = encoding # this one has to be set first
self.method = str(method).upper()
self._set_url(url)
self._set_body(body)
assert isinstance(priority, int), "Request priority not an integer: %r" % priority
self.priority = priority
if callback is not None and not callable(callback):
raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
if errback is not None and not callable(errback):
raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
assert callback or not errback, "Cannot use errback without a callback"
self.callback = callback
self.errback = errback
self.cookies = cookies or {}
self.headers = Headers(headers or {}, encoding=encoding)
self.dont_filter = dont_filter
self._meta = dict(meta) if meta else None
self.flags = [] if flags is None else list(flags)
@property
0
Source : handler.py
with BSD 3-Clause "New" or "Revised" License
from scrapy-plugins
with BSD 3-Clause "New" or "Revised" License
from scrapy-plugins
async def _download_request_with_page(self, request: Request, page: Page) -> Response:
start_time = time()
response = await page.goto(request.url)
page_coroutines = request.meta.get("playwright_page_coroutines") or ()
if isinstance(page_coroutines, dict):
page_coroutines = page_coroutines.values()
for pc in page_coroutines:
if isinstance(pc, PageCoroutine):
try:
method = getattr(page, pc.method)
except AttributeError:
logger.warning(f"Ignoring {repr(pc)}: could not find coroutine")
else:
result = method(*pc.args, **pc.kwargs)
pc.result = await result if isawaitable(result) else result
await page.wait_for_load_state(timeout=self.default_navigation_timeout)
else:
logger.warning(
f"Ignoring {repr(pc)}: expected PageCoroutine, got {repr(type(pc))}"
)
body_str = await page.content()
request.meta["download_latency"] = time() - start_time
if request.meta.get("playwright_include_page"):
request.meta["playwright_page"] = page
else:
await page.close()
self.stats.inc_value("playwright/page_count/closed")
server_ip_address = None
with suppress(AttributeError, KeyError, ValueError):
server_addr = await response.server_addr()
server_ip_address = ip_address(server_addr["ipAddress"])
with suppress(AttributeError):
request.meta["playwright_security_details"] = await response.security_details()
headers = Headers(response.headers)
headers.pop("Content-Encoding", None)
encoding = _get_response_encoding(headers, body_str) or "utf-8"
body = body_str.encode(encoding)
respcls = responsetypes.from_args(headers=headers, url=page.url, body=body)
return respcls(
url=page.url,
status=response.status,
headers=headers,
body=body,
request=request,
flags=["playwright"],
encoding=encoding,
ip_address=server_ip_address,
)
def _increment_request_stats(self, request: PlaywrightRequest) -> None:
0
Source : test_misc.py
with BSD 3-Clause "New" or "Revised" License
from scrapy-plugins
with BSD 3-Clause "New" or "Revised" License
from scrapy-plugins
async def test_get_response_encoding():
assert (
_get_response_encoding(
headers=Headers({"content-type": "text/html; charset=UTF-8"}),
body="",
)
== "utf-8"
)
assert (
_get_response_encoding(
headers=Headers(),
body=""" < !doctype html>
< html lang="cn">
< head>
< meta charset="gb2312">
< /head>
< /html>
""",
)
== "gb18030"
)
assert _get_response_encoding(headers=Headers(), body="") is None