scrapy.utils.python.to_native_str

Here are the examples of the python api scrapy.utils.python.to_native_str taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

34 Examples 7

Example 1

Project: scrapy
Source File: cookies.py
View license
    def _debug_cookie(self, request, spider):
        if self.debug:
            cl = [to_native_str(c, errors='replace')
                  for c in request.headers.getlist('Cookie')]
            if cl:
                cookies = "\n".join("Cookie: {}\n".format(c) for c in cl)
                msg = "Sending cookies to: {}\n{}".format(request, cookies)
                logger.debug(msg, extra={'spider': spider})

Example 2

Project: scrapy
Source File: cookies.py
View license
    def _debug_set_cookie(self, response, spider):
        if self.debug:
            cl = [to_native_str(c, errors='replace')
                  for c in response.headers.getlist('Set-Cookie')]
            if cl:
                cookies = "\n".join("Set-Cookie: {}\n".format(c) for c in cl)
                msg = "Received cookies from: {}\n{}".format(response, cookies)
                logger.debug(msg, extra={'spider': spider})

Example 3

Project: scrapy
Source File: exporters.py
View license
    def _build_row(self, values):
        for s in values:
            try:
                yield to_native_str(s, self.encoding)
            except TypeError:
                yield s

Example 4

Project: scrapy
Source File: cookies.py
View license
    def header_items(self):
        return [
            (to_native_str(k, errors='replace'),
             [to_native_str(x, errors='replace') for x in v])
            for k, v in self.request.headers.items()
        ]

Example 5

Project: scrapy
Source File: text.py
View license
    def _set_url(self, url):
        if isinstance(url, six.text_type):
            if six.PY2 and self.encoding is None:
                raise TypeError("Cannot convert unicode url - %s "
                                "has no encoding" % type(self).__name__)
            self._url = to_native_str(url, self.encoding)
        else:
            super(TextResponse, self)._set_url(url)

Example 6

Project: scrapy
Source File: text.py
View license
    def _body_inferred_encoding(self):
        if self._cached_benc is None:
            content_type = to_native_str(self.headers.get(b'Content-Type', b''))
            benc, ubody = html_to_unicode(content_type, self.body,
                    auto_detect_fun=self._auto_detect_fun,
                    default_encoding=self._DEFAULT_ENCODING)
            self._cached_benc = benc
            self._cached_ubody = ubody
        return self._cached_benc

Example 7

Project: scrapy
Source File: request.py
View license
def referer_str(request):
    """ Return Referer HTTP header suitable for logging. """
    referrer = request.headers.get('Referer')
    if referrer is None:
        return referrer
    return to_native_str(referrer, errors='replace')

Example 8

Project: scrapy
Source File: test_commands.py
View license
    def test_startproject_template_override(self):
        copytree(join(scrapy.__path__[0], 'templates'), self.tmpl)
        with open(join(self.tmpl_proj, 'root_template'), 'w'):
            pass
        assert exists(join(self.tmpl_proj, 'root_template'))

        args = ['--set', 'TEMPLATES_DIR=%s' % self.tmpl]
        p = self.proc('startproject', self.project_name, *args)
        out = to_native_str(retry_on_eintr(p.stdout.read))
        self.assertIn("New Scrapy project %r, using template directory" % self.project_name, out)
        self.assertIn(self.tmpl_proj, out)
        assert exists(join(self.proj_path, 'root_template'))

Example 9

Project: scrapy
Source File: test_commands.py
View license
    def test_template(self, tplname='crawl'):
        args = ['--template=%s' % tplname] if tplname else []
        spname = 'test_spider'
        p = self.proc('genspider', spname, 'test.com', *args)
        out = to_native_str(retry_on_eintr(p.stdout.read))
        self.assertIn("Created spider %r using template %r in module" % (spname, tplname), out)
        self.assertTrue(exists(join(self.proj_mod_path, 'spiders', 'test_spider.py')))
        p = self.proc('genspider', spname, 'test.com', *args)
        out = to_native_str(retry_on_eintr(p.stdout.read))
        self.assertIn("Spider %r already exists in module" % spname, out)

Example 10

Project: scrapy
Source File: test_commands.py
View license
    def test_runspider(self):
        spider = """
import scrapy

class MySpider(scrapy.Spider):
    name = 'myspider'

    def start_requests(self):
        self.logger.debug("It Works!")
        return []
"""
        p = self.runspider(spider)
        log = to_native_str(p.stderr.read())

        self.assertIn("DEBUG: It Works!", log)
        self.assertIn("INFO: Spider opened", log)
        self.assertIn("INFO: Closing spider (finished)", log)
        self.assertIn("INFO: Spider closed (finished)", log)

Example 11

Project: scrapy
Source File: test_commands.py
View license
    def test_start_requests_errors(self):
        p = self.runspider("""
import scrapy

class BadSpider(scrapy.Spider):
    name = "bad"
    def start_requests(self):
        raise Exception("oops!")
        """, name="badspider.py")
        log = to_native_str(p.stderr.read())
        print(log)
        self.assertIn("start_requests", log)
        self.assertIn("badspider.py", log)

Example 12

Project: scrapy
Source File: test_commands.py
View license
    def test_run(self):
        p = self.proc('bench', '-s', 'LOGSTATS_INTERVAL=0.001',
                '-s', 'CLOSESPIDER_TIMEOUT=0.01')
        log = to_native_str(p.stderr.read())
        self.assertIn('INFO: Crawled', log)
        self.assertNotIn('Unhandled Error', log)

Example 13

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_spider_arguments(self):
        _, _, stderr = yield self.execute(['--spider', self.spider_name,
                                           '-a', 'test_arg=1',
                                           '-c', 'parse',
                                           self.url('/html')])
        self.assertIn("DEBUG: It Works!", to_native_str(stderr))

Example 14

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_pipelines(self):
        _, _, stderr = yield self.execute(['--spider', self.spider_name,
                                           '--pipelines',
                                           '-c', 'parse',
                                           self.url('/html')])
        self.assertIn("INFO: It Works!", to_native_str(stderr))

Example 15

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_parse_items(self):
        status, out, stderr = yield self.execute(
            ['--spider', self.spider_name, '-c', 'parse', self.url('/html')]
        )
        self.assertIn("""[{}, {'foo': 'bar'}]""", to_native_str(out))

Example 16

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_parse_items_no_callback_passed(self):
        status, out, stderr = yield self.execute(
            ['--spider', self.spider_name, self.url('/html')]
        )
        self.assertIn("""[{}, {'foo': 'bar'}]""", to_native_str(out))

Example 17

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_wrong_callback_passed(self):
        status, out, stderr = yield self.execute(
            ['--spider', self.spider_name, '-c', 'dummy', self.url('/html')]
        )
        self.assertRegexpMatches(to_native_str(out), """# Scraped Items  -+\n\[\]""")
        self.assertIn("""Cannot find callback""", to_native_str(stderr))

Example 18

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_crawlspider_matching_rule_callback_set(self):
        """If a rule matches the URL, use it's defined callback."""
        status, out, stderr = yield self.execute(
            ['--spider', 'goodcrawl'+self.spider_name, '-r', self.url('/html')]
        )
        self.assertIn("""[{}, {'foo': 'bar'}]""", to_native_str(out))

Example 19

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_crawlspider_matching_rule_default_callback(self):
        """If a rule match but it has no callback set, use the 'parse' callback."""
        status, out, stderr = yield self.execute(
            ['--spider', 'goodcrawl'+self.spider_name, '-r', self.url('/text')]
        )
        self.assertIn("""[{}, {'nomatch': 'default'}]""", to_native_str(out))

Example 20

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_spider_with_no_rules_attribute(self):
        """Using -r with a spider with no rule should not produce items."""
        status, out, stderr = yield self.execute(
            ['--spider', self.spider_name, '-r', self.url('/html')]
        )
        self.assertRegexpMatches(to_native_str(out), """# Scraped Items  -+\n\[\]""")
        self.assertIn("""No CrawlSpider rules found""", to_native_str(stderr))

Example 21

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_crawlspider_missing_callback(self):
        status, out, stderr = yield self.execute(
            ['--spider', 'badcrawl'+self.spider_name, '-r', self.url('/html')]
        )
        self.assertRegexpMatches(to_native_str(out), """# Scraped Items  -+\n\[\]""")

Example 22

Project: scrapy
Source File: test_command_parse.py
View license
    @defer.inlineCallbacks
    def test_crawlspider_no_matching_rule(self):
        """The requested URL has no matching rule, so no items should be scraped"""
        status, out, stderr = yield self.execute(
            ['--spider', 'badcrawl'+self.spider_name, '-r', self.url('/enc-gb18030')]
        )
        self.assertRegexpMatches(to_native_str(out), """# Scraped Items  -+\n\[\]""")
        self.assertIn("""Cannot find a rule that matches""", to_native_str(stderr))

Example 23

Project: scrapy
Source File: test_feedexport.py
View license
    @defer.inlineCallbacks
    def assertExportedJsonLines(self, items, rows, settings=None):
        settings = settings or {}
        settings.update({'FEED_FORMAT': 'jl'})
        data = yield self.exported_data(items, settings)
        parsed = [json.loads(to_native_str(line)) for line in data.splitlines()]
        rows = [{k: v for k, v in row.items() if v} for row in rows]
        self.assertEqual(rows, parsed)

Example 24

Project: scrapy
Source File: test_http_request.py
View license
def _qs(req, encoding='utf-8', to_unicode=False):
    if req.method == 'POST':
        qs = req.body
    else:
        qs = req.url.partition('?')[2]
    if six.PY2:
        uqs = unquote(to_native_str(qs, encoding))
    elif six.PY3:
        uqs = unquote_to_bytes(qs)
    if to_unicode:
        uqs = uqs.decode(encoding)
    return parse_qs(uqs, True)

Example 25

Project: scrapy
Source File: test_http_response.py
View license
    def test_unicode_url(self):
        # instantiate with unicode url without encoding (should set default encoding)
        resp = self.response_class(u"http://www.example.com/")
        self._assert_response_encoding(resp, self.response_class._DEFAULT_ENCODING)

        # make sure urls are converted to str
        resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8')
        assert isinstance(resp.url, str)

        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='utf-8')
        self.assertEqual(resp.url, to_native_str(b'http://www.example.com/price/\xc2\xa3'))
        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='latin-1')
        self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=utf-8"]})
        self.assertEqual(resp.url, to_native_str(b'http://www.example.com/price/\xc2\xa3'))
        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=iso-8859-1"]})
        self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')

Example 26

Project: scrapy
Source File: cookies.py
View license
    def get_header(self, name, default=None):
        return to_native_str(self.request.headers.get(name, default),
                             errors='replace')

Example 27

Project: scrapy
Source File: cookies.py
View license
    def get_all(self, name, default=None):
        return [to_native_str(v, errors='replace')
                for v in self.response.headers.getlist(name)]

Example 28

Project: scrapy
Source File: text.py
View license
    @memoizemethod_noargs
    def _headers_encoding(self):
        content_type = self.headers.get(b'Content-Type', b'')
        return http_content_type_encoding(to_native_str(content_type))

Example 29

Project: scrapy
Source File: lxmlhtml.py
View license
    def _extract_links(self, selector, response_url, response_encoding, base_url):
        links = []
        # hacky way to get the underlying lxml parsed document
        for el, attr, attr_val in self._iter_links(selector.root):
            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
            try:
                attr_val = urljoin(base_url, attr_val)
            except ValueError:
                continue # skipping bogus links
            else:
                url = self.process_attr(attr_val)
                if url is None:
                    continue
            url = to_native_str(url, encoding=response_encoding)
            # to fix relative links after process_value
            url = urljoin(response_url, url)
            link = Link(url, _collect_string_content(el) or u'',
                        nofollow=rel_has_nofollow(el.get('rel')))
            links.append(link)
        return self._deduplicate_if_needed(links)

Example 30

Project: scrapy
Source File: reqser.py
View license
def request_from_dict(d, spider=None):
    """Create Request object from a dict.

    If a spider is given, it will try to resolve the callbacks looking at the
    spider for methods with the same name.
    """
    cb = d['callback']
    if cb and spider:
        cb = _get_method(spider, cb)
    eb = d['errback']
    if eb and spider:
        eb = _get_method(spider, eb)
    return Request(
        url=to_native_str(d['url']),
        callback=cb,
        errback=eb,
        method=d['method'],
        headers=d['headers'],
        body=d['body'],
        cookies=d['cookies'],
        meta=d['meta'],
        encoding=d['_encoding'],
        priority=d['priority'],
        dont_filter=d['dont_filter'])

Example 31

Project: scrapy
Source File: response.py
View license
def response_status_message(status):
    """Return status code plus status text descriptive message
    """
    return '%s %s' % (status, to_native_str(http.RESPONSES.get(int(status), "Unknown Status")))

Example 32

Project: scrapy
Source File: test_commands.py
View license
    def test_runspider_no_spider_found(self):
        p = self.runspider("from scrapy.spiders import Spider\n")
        log = to_native_str(p.stderr.read())
        self.assertIn("No spider found in file", log)

Example 33

Project: scrapy
Source File: test_commands.py
View license
    def test_runspider_file_not_found(self):
        p = self.proc('runspider', 'some_non_existent_file')
        log = to_native_str(p.stderr.read())
        self.assertIn("File not found: some_non_existent_file", log)

Example 34

Project: scrapy
Source File: test_commands.py
View license
    def test_runspider_unable_to_load(self):
        p = self.runspider('', 'myspider.txt')
        log = to_native_str(p.stderr.read())
        self.assertIn('Unable to load', log)