Here are the examples of the python api scrapy.utils.gz.gunzip taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
6 Examples
3
Example 1
def _decode(self, body, encoding):
if encoding == b'gzip' or encoding == b'x-gzip':
body = gunzip(body)
if encoding == b'deflate':
try:
body = zlib.decompress(body)
except zlib.error:
# ugly hack to work with raw deflate content that may
# be sent by microsoft servers. For more information, see:
# http://carsten.codimi.de/gzip.yaws/
# http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
# http://www.gzip.org/zlib/zlib_faq.html#faq38
body = zlib.decompress(body, -15)
return body
3
Example 2
def _get_sitemap_body(self, response):
"""Return the sitemap body contained in the given response,
or None if the response is not a sitemap.
"""
if isinstance(response, XmlResponse):
return response.body
elif is_gzipped(response):
return gunzip(response.body)
elif response.url.endswith('.xml'):
return response.body
elif response.url.endswith('.xml.gz'):
return gunzip(response.body)
3
Example 3
Project: scrapy Source File: test_utils_gz.py
def test_gunzip_illegal_eof(self):
with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f:
text = html_to_unicode('charset=cp1252', gunzip(f.read()))[1]
with open(join(SAMPLEDIR, 'unexpected-eof-output.txt'), 'rb') as o:
expected_text = o.read().decode("utf-8")
self.assertEqual(len(text), len(expected_text))
self.assertEqual(text, expected_text)
0
Example 4
Project: scrapy Source File: test_utils_gz.py
def test_gunzip_basic(self):
with open(join(SAMPLEDIR, 'feed-sample1.xml.gz'), 'rb') as f:
text = gunzip(f.read())
self.assertEqual(len(text), 9950)
0
Example 5
Project: scrapy Source File: test_utils_gz.py
def test_gunzip_truncated(self):
with open(join(SAMPLEDIR, 'truncated-crc-error.gz'), 'rb') as f:
text = gunzip(f.read())
assert text.endswith(b'</html')
0
Example 6
Project: scrapy Source File: test_utils_gz.py
def test_gunzip_truncated_short(self):
with open(join(SAMPLEDIR, 'truncated-crc-error-short.gz'), 'rb') as f:
text = gunzip(f.read())
assert text.endswith(b'</html>')