Here are the examples of the python api scrapy.http.Headers taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
38 Examples
3
Example 1
Project: scrapy Source File: http11.py
def _cb_bodydone(self, result, request, url):
txresponse, body, flags = result
status = int(txresponse.code)
headers = Headers(txresponse.headers.getAllRawHeaders())
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
return respcls(url=url, status=status, headers=headers, body=body, flags=flags)
3
Example 2
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
3
Example 3
Project: scrapy Source File: webclient.py
def _build_response(self, body, request):
request.meta['download_latency'] = self.headers_time-self.start_time
status = int(self.status)
headers = Headers(self.response_headers)
respcls = responsetypes.from_args(headers=headers, url=self._url)
return respcls(url=self._url, status=status, headers=headers, body=body)
3
Example 4
Project: scrapy Source File: httpcache.py
def retrieve_response(self, spider, request):
data = self._read_data(spider, request)
if data is None:
return # not cached
url = data['url']
status = data['status']
headers = Headers(data['headers'])
body = data['body']
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
3
Example 5
Project: scrapy Source File: httpcache.py
def retrieve_response(self, spider, request):
"""Return response if present in cache, or None otherwise."""
metadata = self._read_meta(spider, request)
if metadata is None:
return # not cached
rpath = self._get_request_path(spider, request)
with self._open(os.path.join(rpath, 'response_body'), 'rb') as f:
body = f.read()
with self._open(os.path.join(rpath, 'response_headers'), 'rb') as f:
rawheaders = f.read()
url = metadata.get('response_url')
status = metadata['status']
headers = Headers(headers_raw_to_dict(rawheaders))
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
3
Example 6
def test_basics(self):
h = Headers({'Content-Type': 'text/html', 'Content-Length': 1234})
assert h['Content-Type']
assert h['Content-Length']
self.assertRaises(KeyError, h.__getitem__, 'Accept')
self.assertEqual(h.get('Accept'), None)
self.assertEqual(h.getlist('Accept'), [])
self.assertEqual(h.get('Accept', '*/*'), b'*/*')
self.assertEqual(h.getlist('Accept', '*/*'), [b'*/*'])
self.assertEqual(h.getlist('Accept', ['text/html', 'images/jpeg']),
[b'text/html', b'images/jpeg'])
3
Example 7
def test_single_value(self):
h = Headers()
h['Content-Type'] = 'text/html'
self.assertEqual(h['Content-Type'], b'text/html')
self.assertEqual(h.get('Content-Type'), b'text/html')
self.assertEqual(h.getlist('Content-Type'), [b'text/html'])
3
Example 8
def test_multivalue(self):
h = Headers()
h['X-Forwarded-For'] = hlist = ['ip1', 'ip2']
self.assertEqual(h['X-Forwarded-For'], b'ip2')
self.assertEqual(h.get('X-Forwarded-For'), b'ip2')
self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1', b'ip2'])
assert h.getlist('X-Forwarded-For') is not hlist
3
Example 9
def test_encode_utf8(self):
h = Headers({u'key': u'\xa3'}, encoding='utf-8')
key, val = dict(h).popitem()
assert isinstance(key, bytes), key
assert isinstance(val[0], bytes), val[0]
self.assertEqual(val[0], b'\xc2\xa3')
3
Example 10
Project: scrapy Source File: test_http_headers.py
def test_delete_and_contains(self):
h = Headers()
h['Content-Type'] = 'text/html'
assert 'Content-Type' in h
del h['Content-Type']
assert 'Content-Type' not in h
3
Example 11
def test_setdefault(self):
h = Headers()
hlist = ['ip1', 'ip2']
olist = h.setdefault('X-Forwarded-For', hlist)
assert h.getlist('X-Forwarded-For') is not hlist
assert h.getlist('X-Forwarded-For') is olist
h = Headers()
olist = h.setdefault('X-Forwarded-For', 'ip1')
self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1'])
assert h.getlist('X-Forwarded-For') is olist
3
Example 12
def test_iterables(self):
idict = {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}
h = Headers(idict)
self.assertDictEqual(dict(h),
{b'Content-Type': [b'text/html'],
b'X-Forwarded-For': [b'ip1', b'ip2']})
self.assertSortedEqual(h.keys(),
[b'X-Forwarded-For', b'Content-Type'])
self.assertSortedEqual(h.items(),
[(b'X-Forwarded-For', [b'ip1', b'ip2']),
(b'Content-Type', [b'text/html'])])
self.assertSortedEqual(h.iteritems(),
[(b'X-Forwarded-For', [b'ip1', b'ip2']),
(b'Content-Type', [b'text/html'])])
self.assertSortedEqual(h.values(), [b'ip2', b'text/html'])
3
Example 13
def test_update(self):
h = Headers()
h.update({'Content-Type': 'text/html',
'X-Forwarded-For': ['ip1', 'ip2']})
self.assertEqual(h.getlist('Content-Type'), [b'text/html'])
self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1', b'ip2'])
3
Example 14
def test_copy(self):
h1 = Headers({'header1': ['value1', 'value2']})
h2 = copy.copy(h1)
self.assertEqual(h1, h2)
self.assertEqual(h1.getlist('header1'), h2.getlist('header1'))
assert h1.getlist('header1') is not h2.getlist('header1')
assert isinstance(h2, Headers)
3
Example 15
def test_appendlist(self):
h1 = Headers({'header1': 'value1'})
h1.appendlist('header1', 'value3')
self.assertEqual(h1.getlist('header1'), [b'value1', b'value3'])
h1 = Headers()
h1.appendlist('header1', 'value1')
h1.appendlist('header1', 'value3')
self.assertEqual(h1.getlist('header1'), [b'value1', b'value3'])
3
Example 16
def test_setlistdefault(self):
h1 = Headers({'header1': 'value1'})
h1.setlistdefault('header1', ['value2', 'value3'])
h1.setlistdefault('header2', ['value2', 'value3'])
self.assertEqual(h1.getlist('header1'), [b'value1'])
self.assertEqual(h1.getlist('header2'), [b'value2', b'value3'])
3
Example 17
def test_none_value(self):
h1 = Headers()
h1['foo'] = 'bar'
h1['foo'] = None
h1.setdefault('foo', 'bar')
self.assertEqual(h1.get('foo'), None)
self.assertEqual(h1.getlist('foo'), [])
3
Example 18
def test_int_value(self):
h1 = Headers({'hey': 5})
h1['foo'] = 1
h1.setdefault('bar', 2)
h1.setlist('buz', [1, 'dos', 3])
self.assertEqual(h1.getlist('foo'), [b'1'])
self.assertEqual(h1.getlist('bar'), [b'2'])
self.assertEqual(h1.getlist('buz'), [b'1', b'dos', b'3'])
self.assertEqual(h1.getlist('hey'), [b'5'])
3
Example 19
def test_headers(self):
# Different ways of setting headers attribute
url = 'http://www.scrapy.org'
headers = {b'Accept':'gzip', b'Custom-Header':'nothing to tell you'}
r = self.request_class(url=url, headers=headers)
p = self.request_class(url=url, headers=r.headers)
self.assertEqual(r.headers, p.headers)
self.assertFalse(r.headers is headers)
self.assertFalse(p.headers is r.headers)
# headers must not be unicode
h = Headers({'key1': u'val1', u'key2': 'val2'})
h[u'newkey'] = u'newval'
for k, v in h.iteritems():
self.assert_(isinstance(k, bytes))
for s in v:
self.assert_(isinstance(s, bytes))
3
Example 20
def test_replace(self):
"""Test Request.replace() method"""
r1 = self.request_class("http://www.example.com", method='GET')
hdrs = Headers(r1.headers)
hdrs[b'key'] = b'value'
r2 = r1.replace(method="POST", body="New body", headers=hdrs)
self.assertEqual(r1.url, r2.url)
self.assertEqual((r1.method, r2.method), ("GET", "POST"))
self.assertEqual((r1.body, r2.body), (b'', b"New body"))
self.assertEqual((r1.headers, r2.headers), (self.default_headers, hdrs))
# Empty attributes (which may fail if not compared properly)
r3 = self.request_class("http://www.example.com", meta={'a': 1}, dont_filter=True)
r4 = r3.replace(url="http://www.example.com/2", body=b'', meta={}, dont_filter=False)
self.assertEqual(r4.url, "http://www.example.com/2")
self.assertEqual(r4.body, b'')
self.assertEqual(r4.meta, {})
assert r4.dont_filter is False
3
Example 21
def test_replace(self):
"""Test Response.replace() method"""
hdrs = Headers({"key": "value"})
r1 = self.response_class("http://www.example.com")
r2 = r1.replace(status=301, body=b"New body", headers=hdrs)
assert r1.body == b''
self.assertEqual(r1.url, r2.url)
self.assertEqual((r1.status, r2.status), (200, 301))
self.assertEqual((r1.body, r2.body), (b'', b"New body"))
self.assertEqual((r1.headers, r2.headers), ({}, hdrs))
# Empty attributes (which may fail if not compared properly)
r3 = self.response_class("http://www.example.com", flags=['cached'])
r4 = r3.replace(body=b'', flags=[])
self.assertEqual(r4.body, b'')
self.assertEqual(r4.flags, [])
3
Example 22
Project: scrapy Source File: test_responsetypes.py
def test_from_headers(self):
mappings = [
({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
]
for source, cls in mappings:
source = Headers(source)
retcls = responsetypes.from_headers(source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
3
Example 23
Project: scrapy Source File: test_responsetypes.py
def test_from_args(self):
# TODO: add more tests that check precedence between the different arguments
mappings = [
({'url': 'http://www.example.com/data.csv'}, TextResponse),
# headers takes precedence over url
({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),
]
for source, cls in mappings:
retcls = responsetypes.from_args(**source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
3
Example 24
Project: scrapy Source File: test_utils_gz.py
def test_is_gzipped_case_insensitive(self):
hdrs = Headers({"Content-Type": "Application/X-Gzip"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
3
Example 25
def test_non_standard_line_endings(self):
# regression test for: http://dev.scrapy.org/ticket/258
factory = client.ScrapyHTTPClientFactory(Request(
url='http://foo/bar'))
protocol = client.ScrapyHTTPPageGetter()
protocol.factory = factory
protocol.headers = Headers()
protocol.dataReceived(b"HTTP/1.0 200 OK\n")
protocol.dataReceived(b"Hello: World\n")
protocol.dataReceived(b"Foo: Bar\n")
protocol.dataReceived(b"\n")
self.assertEqual(protocol.headers,
Headers({'Hello': ['World'], 'Foo': ['Bar']}))
3
Example 26
Project: scrapy-splash Source File: utils.py
def headers_to_scrapy(headers):
"""
Return scrapy.http.Headers instance from headers data.
3 data formats are supported:
* {name: value, ...} dict;
* [(name, value), ...] list;
* [{'name': name, 'value': value'}, ...] list (HAR headers format).
"""
if isinstance(headers or {}, dict):
return Headers(headers or {})
if isinstance(headers[0], dict):
return Headers([
(d['name'], d.get('value', ''))
for d in headers
])
return Headers(headers)
3
Example 27
Project: scrapy-splash Source File: test_utils.py
def test_headers_to_scrapy():
assert headers_to_scrapy(None) == Headers()
assert headers_to_scrapy({}) == Headers()
assert headers_to_scrapy([]) == Headers()
html_headers = Headers({'Content-Type': 'text/html'})
assert headers_to_scrapy({'Content-Type': 'text/html'}) == html_headers
assert headers_to_scrapy([('Content-Type', 'text/html')]) == html_headers
assert headers_to_scrapy([{'name': 'Content-Type', 'value': 'text/html'}]) == html_headers
3
Example 28
Project: scrapy-sentry Source File: utils.py
def response_from_dict(response, spider=None, **kwargs):
"""Returns a dict based on a response from a spider"""
url = response.get("url")
status = response.get("status")
headers = Headers([(x, list(map(str, y))) for x, y in
response.get("headers").items()])
body = response.get("body")
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
0
Example 29
Project: scrapy Source File: webclient.py
def __init__(self, request, timeout=180):
self._url = urldefrag(request.url)[0]
# converting to bytes to comply to Twisted interface
self.url = to_bytes(self._url, encoding='ascii')
self.method = to_bytes(request.method, encoding='ascii')
self.body = request.body or None
self.headers = Headers(request.headers)
self.response_headers = None
self.timeout = request.meta.get('download_timeout') or timeout
self.start_time = time()
self.deferred = defer.Deferred().addCallback(self._build_response, request)
# Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
# to have _disconnectedDeferred. See Twisted r32329.
# As Scrapy implements it's own logic to handle redirects is not
# needed to add the callback _waitForDisconnect.
# Specifically this avoids the AttributeError exception when
# clientConnectionFailed method is called.
self._disconnectedDeferred = defer.Deferred()
self._set_connection_attributes(request)
# set Host header based on url
self.headers.setdefault('Host', self.netloc)
# set Content-Length based len of body
if self.body is not None:
self.headers['Content-Length'] = len(self.body)
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault("Connection", "close")
# Content-Length must be specified in POST method even with no body
elif self.method == b'POST':
self.headers['Content-Length'] = 0
0
Example 30
Project: scrapy Source File: test_http_headers.py
def test_encode_latin1(self):
h = Headers({u'key': u'\xa3'}, encoding='latin1')
key, val = dict(h).popitem()
self.assertEqual(val[0], b'\xa3')
0
Example 31
Project: scrapy Source File: test_http_headers.py
def test_encode_multiple(self):
h = Headers({u'key': [u'\xa3']}, encoding='utf-8')
key, val = dict(h).popitem()
self.assertEqual(val[0], b'\xc2\xa3')
0
Example 32
def test_setlist(self):
h1 = Headers({'header1': 'value1'})
self.assertEqual(h1.getlist('header1'), [b'value1'])
h1.setlist('header1', [b'value2', b'value3'])
self.assertEqual(h1.getlist('header1'), [b'value2', b'value3'])
0
Example 33
Project: scrapy Source File: test_utils_gz.py
def test_is_x_gzipped_right(self):
hdrs = Headers({"Content-Type": "application/x-gzip"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
0
Example 34
Project: scrapy Source File: test_utils_gz.py
def test_is_gzipped_right(self):
hdrs = Headers({"Content-Type": "application/gzip"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
0
Example 35
Project: scrapy Source File: test_utils_gz.py
def test_is_gzipped_not_quite(self):
hdrs = Headers({"Content-Type": "application/gzippppp"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertFalse(is_gzipped(r1))
0
Example 36
Project: scrapy Source File: test_utils_gz.py
def test_is_gzipped_wrong(self):
hdrs = Headers({"Content-Type": "application/javascript"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertFalse(is_gzipped(r1))
0
Example 37
Project: scrapy Source File: test_utils_gz.py
def test_is_gzipped_with_charset(self):
hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
0
Example 38
Project: scrapy Source File: test_webclient.py
def test_earlyHeaders(self):
# basic test stolen from twisted HTTPageGetter
factory = client.ScrapyHTTPClientFactory(Request(
url='http://foo/bar',
body="some data",
headers={
'Host': 'example.net',
'User-Agent': 'fooble',
'Cookie': 'blah blah',
'Content-Length': '12981',
'Useful': 'value'}))
self._test(factory,
b"GET /bar HTTP/1.0\r\n"
b"Content-Length: 9\r\n"
b"Useful: value\r\n"
b"Connection: close\r\n"
b"User-Agent: fooble\r\n"
b"Host: example.net\r\n"
b"Cookie: blah blah\r\n"
b"\r\n"
b"some data")
# test minimal sent headers
factory = client.ScrapyHTTPClientFactory(Request('http://foo/bar'))
self._test(factory,
b"GET /bar HTTP/1.0\r\n"
b"Host: foo\r\n"
b"\r\n")
# test a simple POST with body and content-type
factory = client.ScrapyHTTPClientFactory(Request(
method='POST',
url='http://foo/bar',
body='name=value',
headers={'Content-Type': 'application/x-www-form-urlencoded'}))
self._test(factory,
b"POST /bar HTTP/1.0\r\n"
b"Host: foo\r\n"
b"Connection: close\r\n"
b"Content-Type: application/x-www-form-urlencoded\r\n"
b"Content-Length: 10\r\n"
b"\r\n"
b"name=value")
# test a POST method with no body provided
factory = client.ScrapyHTTPClientFactory(Request(
method='POST',
url='http://foo/bar'
))
self._test(factory,
b"POST /bar HTTP/1.0\r\n"
b"Host: foo\r\n"
b"Content-Length: 0\r\n"
b"\r\n")
# test with single and multivalued headers
factory = client.ScrapyHTTPClientFactory(Request(
url='http://foo/bar',
headers={
'X-Meta-Single': 'single',
'X-Meta-Multivalued': ['value1', 'value2'],
}))
self._test(factory,
b"GET /bar HTTP/1.0\r\n"
b"Host: foo\r\n"
b"X-Meta-Multivalued: value1\r\n"
b"X-Meta-Multivalued: value2\r\n"
b"X-Meta-Single: single\r\n"
b"\r\n")
# same test with single and multivalued headers but using Headers class
factory = client.ScrapyHTTPClientFactory(Request(
url='http://foo/bar',
headers=Headers({
'X-Meta-Single': 'single',
'X-Meta-Multivalued': ['value1', 'value2'],
})))
self._test(factory,
b"GET /bar HTTP/1.0\r\n"
b"Host: foo\r\n"
b"X-Meta-Multivalued: value1\r\n"
b"X-Meta-Multivalued: value2\r\n"
b"X-Meta-Single: single\r\n"
b"\r\n")