scrapy.http.Headers

Here are the examples of the python api scrapy.http.Headers taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

38 Examples 7

Example 1

Project: scrapy Source File: http11.py
    def _cb_bodydone(self, result, request, url):
        txresponse, body, flags = result
        status = int(txresponse.code)
        headers = Headers(txresponse.headers.getAllRawHeaders())
        respcls = responsetypes.from_args(headers=headers, url=url, body=body)
        return respcls(url=url, status=status, headers=headers, body=body, flags=flags)

Example 2

Project: scrapy Source File: webclient.py
Function: connection_made
    def connectionMade(self):
        self.headers = Headers() # bucket for response headers

        # Method command
        self.sendCommand(self.factory.method, self.factory.path)
        # Headers
        for key, values in self.factory.headers.items():
            for value in values:
                self.sendHeader(key, value)
        self.endHeaders()
        # Body
        if self.factory.body is not None:
            self.transport.write(self.factory.body)

Example 3

Project: scrapy Source File: webclient.py
    def _build_response(self, body, request):
        request.meta['download_latency'] = self.headers_time-self.start_time
        status = int(self.status)
        headers = Headers(self.response_headers)
        respcls = responsetypes.from_args(headers=headers, url=self._url)
        return respcls(url=self._url, status=status, headers=headers, body=body)

Example 4

Project: scrapy Source File: httpcache.py
    def retrieve_response(self, spider, request):
        data = self._read_data(spider, request)
        if data is None:
            return  # not cached
        url = data['url']
        status = data['status']
        headers = Headers(data['headers'])
        body = data['body']
        respcls = responsetypes.from_args(headers=headers, url=url)
        response = respcls(url=url, headers=headers, status=status, body=body)
        return response

Example 5

Project: scrapy Source File: httpcache.py
    def retrieve_response(self, spider, request):
        """Return response if present in cache, or None otherwise."""
        metadata = self._read_meta(spider, request)
        if metadata is None:
            return  # not cached
        rpath = self._get_request_path(spider, request)
        with self._open(os.path.join(rpath, 'response_body'), 'rb') as f:
            body = f.read()
        with self._open(os.path.join(rpath, 'response_headers'), 'rb') as f:
            rawheaders = f.read()
        url = metadata.get('response_url')
        status = metadata['status']
        headers = Headers(headers_raw_to_dict(rawheaders))
        respcls = responsetypes.from_args(headers=headers, url=url)
        response = respcls(url=url, headers=headers, status=status, body=body)
        return response

Example 6

Project: scrapy Source File: test_http_headers.py
Function: test_basics
    def test_basics(self):
        h = Headers({'Content-Type': 'text/html', 'Content-Length': 1234})
        assert h['Content-Type']
        assert h['Content-Length']

        self.assertRaises(KeyError, h.__getitem__, 'Accept')
        self.assertEqual(h.get('Accept'), None)
        self.assertEqual(h.getlist('Accept'), [])

        self.assertEqual(h.get('Accept', '*/*'), b'*/*')
        self.assertEqual(h.getlist('Accept', '*/*'), [b'*/*'])
        self.assertEqual(h.getlist('Accept', ['text/html', 'images/jpeg']),
                         [b'text/html', b'images/jpeg'])

Example 7

Project: scrapy Source File: test_http_headers.py
Function: test_single_value
    def test_single_value(self):
        h = Headers()
        h['Content-Type'] = 'text/html'
        self.assertEqual(h['Content-Type'], b'text/html')
        self.assertEqual(h.get('Content-Type'), b'text/html')
        self.assertEqual(h.getlist('Content-Type'), [b'text/html'])

Example 8

Project: scrapy Source File: test_http_headers.py
Function: test_multi_value
    def test_multivalue(self):
        h = Headers()
        h['X-Forwarded-For'] = hlist = ['ip1', 'ip2']
        self.assertEqual(h['X-Forwarded-For'], b'ip2')
        self.assertEqual(h.get('X-Forwarded-For'), b'ip2')
        self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1', b'ip2'])
        assert h.getlist('X-Forwarded-For') is not hlist

Example 9

Project: scrapy Source File: test_http_headers.py
Function: test_encode_utf8
    def test_encode_utf8(self):
        h = Headers({u'key': u'\xa3'}, encoding='utf-8')
        key, val = dict(h).popitem()
        assert isinstance(key, bytes), key
        assert isinstance(val[0], bytes), val[0]
        self.assertEqual(val[0], b'\xc2\xa3')

Example 10

Project: scrapy Source File: test_http_headers.py
    def test_delete_and_contains(self):
        h = Headers()
        h['Content-Type'] = 'text/html'
        assert 'Content-Type' in h
        del h['Content-Type']
        assert 'Content-Type' not in h

Example 11

Project: scrapy Source File: test_http_headers.py
Function: test_set_default
    def test_setdefault(self):
        h = Headers()
        hlist = ['ip1', 'ip2']
        olist = h.setdefault('X-Forwarded-For', hlist)
        assert h.getlist('X-Forwarded-For') is not hlist
        assert h.getlist('X-Forwarded-For') is olist

        h = Headers()
        olist = h.setdefault('X-Forwarded-For', 'ip1')
        self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1'])
        assert h.getlist('X-Forwarded-For') is olist

Example 12

Project: scrapy Source File: test_http_headers.py
Function: test_iterables
    def test_iterables(self):
        idict = {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}

        h = Headers(idict)
        self.assertDictEqual(dict(h),
                             {b'Content-Type': [b'text/html'],
                              b'X-Forwarded-For': [b'ip1', b'ip2']})
        self.assertSortedEqual(h.keys(),
                               [b'X-Forwarded-For', b'Content-Type'])
        self.assertSortedEqual(h.items(),
                               [(b'X-Forwarded-For', [b'ip1', b'ip2']),
                                (b'Content-Type', [b'text/html'])])
        self.assertSortedEqual(h.iteritems(),
                               [(b'X-Forwarded-For', [b'ip1', b'ip2']),
                                (b'Content-Type', [b'text/html'])])
        self.assertSortedEqual(h.values(), [b'ip2', b'text/html'])

Example 13

Project: scrapy Source File: test_http_headers.py
Function: test_update
    def test_update(self):
        h = Headers()
        h.update({'Content-Type': 'text/html',
                  'X-Forwarded-For': ['ip1', 'ip2']})
        self.assertEqual(h.getlist('Content-Type'), [b'text/html'])
        self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1', b'ip2'])

Example 14

Project: scrapy Source File: test_http_headers.py
Function: test_copy
    def test_copy(self):
        h1 = Headers({'header1': ['value1', 'value2']})
        h2 = copy.copy(h1)
        self.assertEqual(h1, h2)
        self.assertEqual(h1.getlist('header1'), h2.getlist('header1'))
        assert h1.getlist('header1') is not h2.getlist('header1')
        assert isinstance(h2, Headers)

Example 15

Project: scrapy Source File: test_http_headers.py
Function: test_append_list
    def test_appendlist(self):
        h1 = Headers({'header1': 'value1'})
        h1.appendlist('header1', 'value3')
        self.assertEqual(h1.getlist('header1'), [b'value1', b'value3'])

        h1 = Headers()
        h1.appendlist('header1', 'value1')
        h1.appendlist('header1', 'value3')
        self.assertEqual(h1.getlist('header1'), [b'value1', b'value3'])

Example 16

Project: scrapy Source File: test_http_headers.py
Function: test_setlistdefault
    def test_setlistdefault(self):
        h1 = Headers({'header1': 'value1'})
        h1.setlistdefault('header1', ['value2', 'value3'])
        h1.setlistdefault('header2', ['value2', 'value3'])
        self.assertEqual(h1.getlist('header1'), [b'value1'])
        self.assertEqual(h1.getlist('header2'), [b'value2', b'value3'])

Example 17

Project: scrapy Source File: test_http_headers.py
Function: test_none_value
    def test_none_value(self):
        h1 = Headers()
        h1['foo'] = 'bar'
        h1['foo'] = None
        h1.setdefault('foo', 'bar')
        self.assertEqual(h1.get('foo'), None)
        self.assertEqual(h1.getlist('foo'), [])

Example 18

Project: scrapy Source File: test_http_headers.py
Function: test_int_value
    def test_int_value(self):
        h1 = Headers({'hey': 5})
        h1['foo'] = 1
        h1.setdefault('bar', 2)
        h1.setlist('buz', [1, 'dos', 3])
        self.assertEqual(h1.getlist('foo'), [b'1'])
        self.assertEqual(h1.getlist('bar'), [b'2'])
        self.assertEqual(h1.getlist('buz'), [b'1', b'dos', b'3'])
        self.assertEqual(h1.getlist('hey'), [b'5'])

Example 19

Project: scrapy Source File: test_http_request.py
Function: test_headers
    def test_headers(self):
        # Different ways of setting headers attribute
        url = 'http://www.scrapy.org'
        headers = {b'Accept':'gzip', b'Custom-Header':'nothing to tell you'}
        r = self.request_class(url=url, headers=headers)
        p = self.request_class(url=url, headers=r.headers)

        self.assertEqual(r.headers, p.headers)
        self.assertFalse(r.headers is headers)
        self.assertFalse(p.headers is r.headers)

        # headers must not be unicode
        h = Headers({'key1': u'val1', u'key2': 'val2'})
        h[u'newkey'] = u'newval'
        for k, v in h.iteritems():
            self.assert_(isinstance(k, bytes))
            for s in v:
                self.assert_(isinstance(s, bytes))

Example 20

Project: scrapy Source File: test_http_request.py
Function: test_replace
    def test_replace(self):
        """Test Request.replace() method"""
        r1 = self.request_class("http://www.example.com", method='GET')
        hdrs = Headers(r1.headers)
        hdrs[b'key'] = b'value'
        r2 = r1.replace(method="POST", body="New body", headers=hdrs)
        self.assertEqual(r1.url, r2.url)
        self.assertEqual((r1.method, r2.method), ("GET", "POST"))
        self.assertEqual((r1.body, r2.body), (b'', b"New body"))
        self.assertEqual((r1.headers, r2.headers), (self.default_headers, hdrs))

        # Empty attributes (which may fail if not compared properly)
        r3 = self.request_class("http://www.example.com", meta={'a': 1}, dont_filter=True)
        r4 = r3.replace(url="http://www.example.com/2", body=b'', meta={}, dont_filter=False)
        self.assertEqual(r4.url, "http://www.example.com/2")
        self.assertEqual(r4.body, b'')
        self.assertEqual(r4.meta, {})
        assert r4.dont_filter is False

Example 21

Project: scrapy Source File: test_http_response.py
Function: test_replace
    def test_replace(self):
        """Test Response.replace() method"""
        hdrs = Headers({"key": "value"})
        r1 = self.response_class("http://www.example.com")
        r2 = r1.replace(status=301, body=b"New body", headers=hdrs)
        assert r1.body == b''
        self.assertEqual(r1.url, r2.url)
        self.assertEqual((r1.status, r2.status), (200, 301))
        self.assertEqual((r1.body, r2.body), (b'', b"New body"))
        self.assertEqual((r1.headers, r2.headers), ({}, hdrs))

        # Empty attributes (which may fail if not compared properly)
        r3 = self.response_class("http://www.example.com", flags=['cached'])
        r4 = r3.replace(body=b'', flags=[])
        self.assertEqual(r4.body, b'')
        self.assertEqual(r4.flags, [])

Example 22

Project: scrapy Source File: test_responsetypes.py
    def test_from_headers(self):
        mappings = [
            ({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
            ({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
            ({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
        ]
        for source, cls in mappings:
            source = Headers(source)
            retcls = responsetypes.from_headers(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

Example 23

Project: scrapy Source File: test_responsetypes.py
    def test_from_args(self):
        # TODO: add more tests that check precedence between the different arguments
        mappings = [
            ({'url': 'http://www.example.com/data.csv'}, TextResponse),
            # headers takes precedence over url
            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),


        ]
        for source, cls in mappings:
            retcls = responsetypes.from_args(**source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

Example 24

Project: scrapy Source File: test_utils_gz.py
    def test_is_gzipped_case_insensitive(self):
        hdrs = Headers({"Content-Type": "Application/X-Gzip"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertTrue(is_gzipped(r1))

        hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertTrue(is_gzipped(r1))

Example 25

Project: scrapy Source File: test_webclient.py
Function: test_non_standard_line_endings
    def test_non_standard_line_endings(self):
        # regression test for: http://dev.scrapy.org/ticket/258
        factory = client.ScrapyHTTPClientFactory(Request(
            url='http://foo/bar'))
        protocol = client.ScrapyHTTPPageGetter()
        protocol.factory = factory
        protocol.headers = Headers()
        protocol.dataReceived(b"HTTP/1.0 200 OK\n")
        protocol.dataReceived(b"Hello: World\n")
        protocol.dataReceived(b"Foo: Bar\n")
        protocol.dataReceived(b"\n")
        self.assertEqual(protocol.headers,
            Headers({'Hello': ['World'], 'Foo': ['Bar']}))

Example 26

Project: scrapy-splash Source File: utils.py
def headers_to_scrapy(headers):
    """
    Return scrapy.http.Headers instance from headers data.
    3 data formats are supported:

    * {name: value, ...} dict;
    * [(name, value), ...] list;
    * [{'name': name, 'value': value'}, ...] list (HAR headers format).
    """
    if isinstance(headers or {}, dict):
        return Headers(headers or {})

    if isinstance(headers[0], dict):
        return Headers([
            (d['name'], d.get('value', ''))
            for d in headers
        ])

    return Headers(headers)

Example 27

Project: scrapy-splash Source File: test_utils.py
def test_headers_to_scrapy():
    assert headers_to_scrapy(None) == Headers()
    assert headers_to_scrapy({}) == Headers()
    assert headers_to_scrapy([]) == Headers()

    html_headers = Headers({'Content-Type': 'text/html'})

    assert headers_to_scrapy({'Content-Type': 'text/html'}) == html_headers
    assert headers_to_scrapy([('Content-Type', 'text/html')]) == html_headers
    assert headers_to_scrapy([{'name': 'Content-Type', 'value': 'text/html'}]) == html_headers

Example 28

Project: scrapy-sentry Source File: utils.py
def response_from_dict(response, spider=None, **kwargs):
    """Returns a dict based on a response from a spider"""
    url = response.get("url")
    status = response.get("status")
    headers = Headers([(x, list(map(str, y))) for x, y in
                       response.get("headers").items()])
    body = response.get("body")

    respcls = responsetypes.from_args(headers=headers, url=url)
    response = respcls(url=url, headers=headers, status=status, body=body)
    return response

Example 29

Project: scrapy Source File: webclient.py
    def __init__(self, request, timeout=180):
        self._url = urldefrag(request.url)[0]
        # converting to bytes to comply to Twisted interface
        self.url = to_bytes(self._url, encoding='ascii')
        self.method = to_bytes(request.method, encoding='ascii')
        self.body = request.body or None
        self.headers = Headers(request.headers)
        self.response_headers = None
        self.timeout = request.meta.get('download_timeout') or timeout
        self.start_time = time()
        self.deferred = defer.Deferred().addCallback(self._build_response, request)

        # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
        # to have _disconnectedDeferred. See Twisted r32329.
        # As Scrapy implements it's own logic to handle redirects is not
        # needed to add the callback _waitForDisconnect.
        # Specifically this avoids the AttributeError exception when
        # clientConnectionFailed method is called.
        self._disconnectedDeferred = defer.Deferred()

        self._set_connection_attributes(request)

        # set Host header based on url
        self.headers.setdefault('Host', self.netloc)

        # set Content-Length based len of body
        if self.body is not None:
            self.headers['Content-Length'] = len(self.body)
            # just in case a broken http/1.1 decides to keep connection alive
            self.headers.setdefault("Connection", "close")
        # Content-Length must be specified in POST method even with no body
        elif self.method == b'POST':
            self.headers['Content-Length'] = 0

Example 30

Project: scrapy Source File: test_http_headers.py
    def test_encode_latin1(self):
        h = Headers({u'key': u'\xa3'}, encoding='latin1')
        key, val = dict(h).popitem()
        self.assertEqual(val[0], b'\xa3')

Example 31

Project: scrapy Source File: test_http_headers.py
    def test_encode_multiple(self):
        h = Headers({u'key': [u'\xa3']}, encoding='utf-8')
        key, val = dict(h).popitem()
        self.assertEqual(val[0], b'\xc2\xa3')

Example 32

Project: scrapy Source File: test_http_headers.py
Function: test_set_list
    def test_setlist(self):
        h1 = Headers({'header1': 'value1'})
        self.assertEqual(h1.getlist('header1'), [b'value1'])
        h1.setlist('header1', [b'value2', b'value3'])
        self.assertEqual(h1.getlist('header1'), [b'value2', b'value3'])

Example 33

Project: scrapy Source File: test_utils_gz.py
    def test_is_x_gzipped_right(self):
        hdrs = Headers({"Content-Type": "application/x-gzip"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertTrue(is_gzipped(r1))

Example 34

Project: scrapy Source File: test_utils_gz.py
    def test_is_gzipped_right(self):
        hdrs = Headers({"Content-Type": "application/gzip"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertTrue(is_gzipped(r1))

Example 35

Project: scrapy Source File: test_utils_gz.py
    def test_is_gzipped_not_quite(self):
        hdrs = Headers({"Content-Type": "application/gzippppp"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertFalse(is_gzipped(r1))

Example 36

Project: scrapy Source File: test_utils_gz.py
    def test_is_gzipped_wrong(self):
        hdrs = Headers({"Content-Type": "application/javascript"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertFalse(is_gzipped(r1))

Example 37

Project: scrapy Source File: test_utils_gz.py
    def test_is_gzipped_with_charset(self):
        hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"})
        r1 = Response("http://www.example.com", headers=hdrs)
        self.assertTrue(is_gzipped(r1))

Example 38

Project: scrapy Source File: test_webclient.py
    def test_earlyHeaders(self):
        # basic test stolen from twisted HTTPageGetter
        factory = client.ScrapyHTTPClientFactory(Request(
            url='http://foo/bar',
            body="some data",
            headers={
                'Host': 'example.net',
                'User-Agent': 'fooble',
                'Cookie': 'blah blah',
                'Content-Length': '12981',
                'Useful': 'value'}))

        self._test(factory,
            b"GET /bar HTTP/1.0\r\n"
            b"Content-Length: 9\r\n"
            b"Useful: value\r\n"
            b"Connection: close\r\n"
            b"User-Agent: fooble\r\n"
            b"Host: example.net\r\n"
            b"Cookie: blah blah\r\n"
            b"\r\n"
            b"some data")

        # test minimal sent headers
        factory = client.ScrapyHTTPClientFactory(Request('http://foo/bar'))
        self._test(factory,
            b"GET /bar HTTP/1.0\r\n"
            b"Host: foo\r\n"
            b"\r\n")

        # test a simple POST with body and content-type
        factory = client.ScrapyHTTPClientFactory(Request(
            method='POST',
            url='http://foo/bar',
            body='name=value',
            headers={'Content-Type': 'application/x-www-form-urlencoded'}))

        self._test(factory,
            b"POST /bar HTTP/1.0\r\n"
            b"Host: foo\r\n"
            b"Connection: close\r\n"
            b"Content-Type: application/x-www-form-urlencoded\r\n"
            b"Content-Length: 10\r\n"
            b"\r\n"
            b"name=value")

        # test a POST method with no body provided
        factory = client.ScrapyHTTPClientFactory(Request(
            method='POST',
            url='http://foo/bar'
        ))

        self._test(factory,
            b"POST /bar HTTP/1.0\r\n"
            b"Host: foo\r\n"
            b"Content-Length: 0\r\n"
            b"\r\n")

        # test with single and multivalued headers
        factory = client.ScrapyHTTPClientFactory(Request(
            url='http://foo/bar',
            headers={
                'X-Meta-Single': 'single',
                'X-Meta-Multivalued': ['value1', 'value2'],
                }))

        self._test(factory,
            b"GET /bar HTTP/1.0\r\n"
            b"Host: foo\r\n"
            b"X-Meta-Multivalued: value1\r\n"
            b"X-Meta-Multivalued: value2\r\n"
            b"X-Meta-Single: single\r\n"
            b"\r\n")

        # same test with single and multivalued headers but using Headers class
        factory = client.ScrapyHTTPClientFactory(Request(
            url='http://foo/bar',
            headers=Headers({
                'X-Meta-Single': 'single',
                'X-Meta-Multivalued': ['value1', 'value2'],
                })))

        self._test(factory,
            b"GET /bar HTTP/1.0\r\n"
            b"Host: foo\r\n"
            b"X-Meta-Multivalued: value1\r\n"
            b"X-Meta-Multivalued: value2\r\n"
            b"X-Meta-Single: single\r\n"
            b"\r\n")