scrapy.crawler.CrawlerRunner

Here are the examples of the python api scrapy.crawler.CrawlerRunner taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

11 Examples 7

Example 1

Project: scrapy
Source File: test.py
View license
def get_crawler(spidercls=None, settings_dict=None):
    """Return an unconfigured Crawler object. If settings_dict is given, it
    will be used to populate the crawler settings with a project level
    priority.
    """
    from scrapy.crawler import CrawlerRunner
    from scrapy.spiders import Spider

    runner = CrawlerRunner(settings_dict)
    return runner.create_crawler(spidercls or Spider)

Example 2

Project: scrapy
Source File: test_crawler.py
View license
    def test_spider_manager_verify_interface(self):
        settings = Settings({
            'SPIDER_LOADER_CLASS': 'tests.test_crawler.SpiderLoaderWithWrongInterface'
        })
        with warnings.catch_warnings(record=True) as w, \
                self.assertRaises(AttributeError):
            CrawlerRunner(settings)
            self.assertEqual(len(w), 1)
            self.assertIn("SPIDER_LOADER_CLASS", str(w[0].message))
            self.assertIn("scrapy.interfaces.ISpiderLoader", str(w[0].message))

Example 3

Project: scrapy
Source File: test_crawler.py
View license
    def test_deprecated_attribute_spiders(self):
        with warnings.catch_warnings(record=True) as w:
            runner = CrawlerRunner(Settings())
            spiders = runner.spiders
            self.assertEqual(len(w), 1)
            self.assertIn("CrawlerRunner.spiders", str(w[0].message))
            self.assertIn("CrawlerRunner.spider_loader", str(w[0].message))
            sl_cls = load_object(runner.settings['SPIDER_LOADER_CLASS'])
            self.assertIsInstance(spiders, sl_cls)

Example 4

Project: scrapy
Source File: __init__.py
View license
    def test_crawler_runner_loading(self):
        module = 'tests.test_spiderloader.test_spiders.spider1'
        runner = CrawlerRunner({'SPIDER_MODULES': [module]})

        self.assertRaisesRegexp(KeyError, 'Spider not found',
                                runner.create_crawler, 'spider2')

        crawler = runner.create_crawler('spider1')
        self.assertTrue(issubclass(crawler.spidercls, scrapy.Spider))
        self.assertEqual(crawler.spidercls.name, 'spider1')

Example 5

Project: autologin
Source File: spiders.py
View license
def crawl_runner(extra_settings=None):
    settings = base_settings.copy()
    if extra_settings is not None:
        settings.update(extra_settings, priority='cmdline')
    if settings.get('SPLASH_URL'):
        settings['DUPEFILTER_CLASS'] = 'scrapy_splash.SplashAwareDupeFilter'
        settings.setdefault('DOWNLOADER_MIDDLEWARES', {}).update({
            'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': None,
            'scrapy_splash.SplashCookiesMiddleware': 723,
            'scrapy_splash.SplashMiddleware': 725,
            'scrapy.downloadermiddlewares.httpcompression'
                '.HttpCompressionMiddleware': 810,
        })
    else:
        settings.setdefault('DOWNLOADER_MIDDLEWARES', {}).update({
            'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': None,
            'autologin.middleware.ExposeCookiesMiddleware': 700,
        })
    return CrawlerRunner(settings)

Example 6

Project: scrapy
Source File: test_crawler.py
View license
    def test_spidermanager_deprecation(self):
        with warnings.catch_warnings(record=True) as w:
            runner = CrawlerRunner({
                'SPIDER_MANAGER_CLASS': 'tests.test_crawler.CustomSpiderLoader'
            })
            self.assertIsInstance(runner.spider_loader, CustomSpiderLoader)
            self.assertEqual(len(w), 1)
            self.assertIn('Please use SPIDER_LOADER_CLASS', str(w[0].message))

Example 7

Project: scrapy
Source File: test_crawl.py
View license
    def setUp(self):
        self.mockserver = MockServer()
        self.mockserver.__enter__()
        self.runner = CrawlerRunner()

Example 8

Project: scrapy
Source File: test_crawler.py
View license
    def test_crawler_runner_accepts_dict(self):
        runner = CrawlerRunner({'foo': 'bar'})
        self.assertEqual(runner.settings['foo'], 'bar')
        self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')

Example 9

Project: scrapy
Source File: test_crawler.py
View license
    def test_crawler_runner_accepts_None(self):
        runner = CrawlerRunner()
        self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')

Example 10

Project: scrapy
Source File: test_feedexport.py
View license
    @defer.inlineCallbacks
    def run_and_export(self, spider_cls, settings=None):
        """ Run spider with specified settings; return exported data. """
        tmpdir = tempfile.mkdtemp()
        res_name = tmpdir + '/res'
        defaults = {
            'FEED_URI': 'file://' + res_name,
            'FEED_FORMAT': 'csv',
        }
        defaults.update(settings or {})
        try:
            with MockServer() as s:
                runner = CrawlerRunner(Settings(defaults))
                yield runner.crawl(spider_cls)

            with open(res_name, 'rb') as f:
                defer.returnValue(f.read())

        finally:
            shutil.rmtree(tmpdir)

Example 11

Project: scrapy-cluster
Source File: tests_online.py
View license
    def test_crawler_process(self):
        runner = CrawlerRunner(self.settings)
        d = runner.crawl(CustomSpider)
        d.addBoth(lambda _: reactor.stop())

        # add crawl to redis
        key = "test-spider:istresearch.com:queue"
        self.redis_conn.zadd(key, self.example_feed, -99)

        # run the spider, give 20 seconds to see the url, crawl it,
        # and send to kafka. Then we kill the reactor
        def thread_func():
            time.sleep(20)
            reactor.stop()

        thread = threading.Thread(target=thread_func)
        thread.start()

        reactor.run()

        # ensure it was sent out to kafka
        message_count = 0
        for message in self.consumer.get_messages():
            if message is None:
                break
            else:
                the_dict = json.loads(message.message.value)
                if the_dict is not None and the_dict['appid'] == 'testapp' \
                        and the_dict['crawlid'] == '01234567890abcdefghijklmn':
                    message_count += 1

        self.assertEquals(message_count, 1)