Here are the examples of the python api scrapy.utils.misc.load_object taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
29 Examples
3
Example 1
@classmethod
def from_settings(cls, settings):
params = {
'server': connection.from_settings(settings),
}
if settings.get('REDIS_ITEMS_KEY'):
params['key'] = settings['REDIS_ITEMS_KEY']
if settings.get('REDIS_ITEMS_SERIALIZER'):
params['serialize_func'] = load_object(
settings['REDIS_ITEMS_SERIALIZER']
)
return cls(**params)
3
Example 2
Project: scrapy-rabbitmq Source File: scheduler.py
@classmethod
def from_settings(cls, settings):
persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST)
queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY)
queue_cls = load_object(settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS))
dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY)
idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE)
server = connection.from_settings(settings)
return cls(server, persist, queue_key, queue_cls, dupefilter_key, idle_before_close)
3
Example 3
def get_application(arguments):
ServiceRoot = load_object(settings.SERVICE_ROOT)
site = Site(ServiceRoot())
application = Application('scrapyrt')
server = TCPServer(arguments.port, site, interface=arguments.ip)
server.setServiceParent(application)
return application
3
Example 4
def run_crawl(self, spider_name, spider_data,
max_requests=None, *args, **kwargs):
crawl_manager_cls = load_object(settings.CRAWL_MANAGER)
manager = crawl_manager_cls(spider_name, spider_data, max_requests)
dfd = manager.crawl(*args, **kwargs)
return dfd
3
Example 5
def __init__(self, crawler, spider_closed_callback):
self.crawler = crawler
self.settings = crawler.settings
self.signals = crawler.signals
self.logformatter = crawler.logformatter
self.slot = None
self.spider = None
self.running = False
self.paused = False
self.scheduler_cls = load_object(self.settings['SCHEDULER'])
downloader_cls = load_object(self.settings['DOWNLOADER'])
self.downloader = downloader_cls(crawler)
self.scraper = Scraper(crawler)
self._spider_closed_callback = spider_closed_callback
3
Example 6
Project: scrapy Source File: scheduler.py
@classmethod
def from_crawler(cls, crawler):
settings = crawler.settings
dupefilter_cls = load_object(settings['DUPEFILTER_CLASS'])
dupefilter = dupefilter_cls.from_settings(settings)
pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
logunser = settings.getbool('LOG_UNSERIALIZABLE_REQUESTS', settings.getbool('SCHEDULER_DEBUG'))
return cls(dupefilter, jobdir=job_dir(settings), logunser=logunser,
stats=crawler.stats, pqclass=pqclass, dqclass=dqclass, mqclass=mqclass)
3
Example 7
def __init__(self, crawler):
self.slot = None
self.spidermw = SpiderMiddlewareManager.from_crawler(crawler)
itemproc_cls = load_object(crawler.settings['ITEM_PROCESSOR'])
self.itemproc = itemproc_cls.from_crawler(crawler)
self.concurrent_items = crawler.settings.getint('CONCURRENT_ITEMS')
self.crawler = crawler
self.signals = crawler.signals
self.logformatter = crawler.logformatter
3
Example 8
def __init__(self, settings, stats):
if not settings.getbool('HTTPCACHE_ENABLED'):
raise NotConfigured
self.policy = load_object(settings['HTTPCACHE_POLICY'])(settings)
self.storage = load_object(settings['HTTPCACHE_STORAGE'])(settings)
self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING')
self.stats = stats
3
Example 9
def __init__(self, settings):
self.settings = settings
self.urifmt = settings['FEED_URI']
if not self.urifmt:
raise NotConfigured
self.format = settings['FEED_FORMAT'].lower()
self.export_encoding = settings['FEED_EXPORT_ENCODING']
self.storages = self._load_components('FEED_STORAGES')
self.exporters = self._load_components('FEED_EXPORTERS')
if not self._storage_supported(self.urifmt):
raise NotConfigured
if not self._exporter_supported(self.format):
raise NotConfigured
self.store_empty = settings.getbool('FEED_STORE_EMPTY')
self._exporting = False
self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None
uripar = settings['FEED_URI_PARAMS']
self._uripar = load_object(uripar) if uripar else lambda x, y: None
3
Example 10
Project: scrapy Source File: feedexport.py
def _load_components(self, setting_prefix):
conf = without_none_values(self.settings.getwithbase(setting_prefix))
d = {}
for k, v in conf.items():
try:
d[k] = load_object(v)
except NotConfigured:
pass
return d
3
Example 11
def __init__(self):
self.classes = {}
self.mimetypes = MimeTypes()
mimedata = get_data('scrapy', 'mime.types').decode('utf8')
self.mimetypes.readfp(StringIO(mimedata))
for mimetype, cls in six.iteritems(self.CLASSES):
self.classes[mimetype] = load_object(cls)
3
Example 12
def __init__(self, crawler, update_vars=None, code=None):
self.crawler = crawler
self.update_vars = update_vars or (lambda x: None)
self.item_class = load_object(crawler.settings['DEFAULT_ITEM_CLASS'])
self.spider = None
self.inthread = not threadable.isInIOThread()
self.code = code
self.vars = {}
3
Example 13
def test_deprecated_attribute_spiders(self):
with warnings.catch_warnings(record=True) as w:
spiders = self.crawler.spiders
self.assertEqual(len(w), 1)
self.assertIn("Crawler.spiders", str(w[0].message))
sl_cls = load_object(self.crawler.settings['SPIDER_LOADER_CLASS'])
self.assertIsInstance(spiders, sl_cls)
self.crawler.spiders
self.assertEqual(len(w), 1, "Warn deprecated access only once")
3
Example 14
Project: scrapy Source File: test_crawler.py
def test_deprecated_attribute_spiders(self):
with warnings.catch_warnings(record=True) as w:
runner = CrawlerRunner(Settings())
spiders = runner.spiders
self.assertEqual(len(w), 1)
self.assertIn("CrawlerRunner.spiders", str(w[0].message))
self.assertIn("CrawlerRunner.spider_loader", str(w[0].message))
sl_cls = load_object(runner.settings['SPIDER_LOADER_CLASS'])
self.assertIsInstance(spiders, sl_cls)
3
Example 15
Project: scrapyd Source File: __init__.py
def get_application(config=None):
if config is None:
config = Config()
apppath = config.get('application', 'scrapyd.app.application')
appfunc = load_object(apppath)
return appfunc(config)
0
Example 16
Project: scrapy-redis Source File: connection.py
def get_redis_from_settings(settings):
"""Returns a redis client instance from given Scrapy settings object.
This function uses ``get_client`` to instantiate the client and uses
``DEFAULT_PARAMS`` global as defaults values for the parameters. You can
override them using the ``REDIS_PARAMS`` setting.
Parameters
----------
settings : Settings
A scrapy settings object. See the supported settings below.
Returns
-------
server
Redis client instance.
Other Parameters
----------------
REDIS_URL : str, optional
Server connection URL.
REDIS_HOST : str, optional
Server host.
REDIS_PORT : str, optional
Server port.
REDIS_PARAMS : dict, optional
Additional client parameters.
"""
params = DEFAULT_PARAMS.copy()
params.update(settings.getdict('REDIS_PARAMS'))
# XXX: Deprecate REDIS_* settings.
for source, dest in SETTINGS_PARAMS_MAP.items():
val = settings.get(source)
if val:
params[dest] = val
# Allow ``redis_cls`` to be a path to a class.
if isinstance(params.get('redis_cls'), six.string_types):
params['redis_cls'] = load_object(params['redis_cls'])
return get_redis(**params)
0
Example 17
def open(self, spider):
self.spider = spider
try:
self.queue = load_object(self.queue_cls)(
server=self.server,
spider=spider,
key=self.queue_key % {'spider': spider.name},
serializer=self.serializer,
)
except TypeError as e:
raise ValueError("Failed to instantiate queue class '%s': %s",
self.queue_cls, e)
try:
self.df = load_object(self.dupefilter_cls)(
server=self.server,
key=self.dupefilter_key % {'spider': spider.name},
debug=spider.settings.getbool('DUPEFILTER_DEBUG'),
)
except TypeError as e:
raise ValueError("Failed to instantiate dupefilter class '%s': %s",
self.dupefilter_cls, e)
if self.flush_on_start:
self.flush()
# notice if there are requests already in the queue to resume the crawl
if len(self.queue):
spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
0
Example 18
def __init__(self, **kwargs):
super(RealtimeApi, self).__init__(self)
for route, resource_path in settings.RESOURCES.iteritems():
resource_cls = load_object(resource_path)
self.putChild(route, resource_cls(self, **kwargs))
0
Example 19
Project: scrapy Source File: check.py
def run(self, args, opts):
# load contracts
contracts = build_component_list(self.settings.getwithbase('SPIDER_CONTRACTS'))
conman = ContractsManager(load_object(c) for c in contracts)
runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)
# contract requests
contract_reqs = defaultdict(list)
spider_loader = self.crawler_process.spider_loader
for spidername in args or spider_loader.list():
spidercls = spider_loader.load(spidername)
spidercls.start_requests = lambda s: conman.from_spider(s, result)
tested_methods = conman.tested_methods_from_spidercls(spidercls)
if opts.list:
for method in tested_methods:
contract_reqs[spidercls.name].append(method)
elif tested_methods:
self.crawler_process.crawl(spidercls)
# start checks
if opts.list:
for spider, methods in sorted(contract_reqs.items()):
if not methods and not opts.verbose:
continue
print(spider)
for method in sorted(methods):
print(' * %s' % method)
else:
start = time.time()
self.crawler_process.start()
stop = time.time()
result.printErrors()
result.printSummary(start, stop)
self.exitcode = int(not result.wasSuccessful())
0
Example 20
def __init__(self, settings):
self.HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])
self.ClientContextFactory = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
0
Example 21
def __init__(self, settings):
self._pool = HTTPConnectionPool(reactor, persistent=True)
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
self._pool._factory.noisy = False
self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
# try method-aware context factory
try:
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
except TypeError:
# use context factory defaults
self._contextFactory = self._contextFactoryClass()
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD).\
Please upgrade your context factory class to handle it or ignore it.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
self._disconnect_timeout = 1
0
Example 22
def _get_handler(self, scheme):
"""Lazy-load the downloadhandler for a scheme
only on the first request for that scheme.
"""
if scheme in self._handlers:
return self._handlers[scheme]
if scheme in self._notconfigured:
return None
if scheme not in self._schemes:
self._notconfigured[scheme] = 'no handler available for that scheme'
return None
path = self._schemes[scheme]
try:
dhcls = load_object(path)
dh = dhcls(self._crawler.settings)
except NotConfigured as ex:
self._notconfigured[scheme] = str(ex)
return None
except Exception as ex:
logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"',
{"clspath": path, "scheme": scheme},
exc_info=True, extra={'crawler': self._crawler})
self._notconfigured[scheme] = str(ex)
return None
else:
self._handlers[scheme] = dh
return self._handlers[scheme]
0
Example 23
Project: scrapy Source File: crawler.py
def __init__(self, spidercls, settings=None):
if isinstance(settings, dict) or settings is None:
settings = Settings(settings)
self.spidercls = spidercls
self.settings = settings.copy()
self.spidercls.update_settings(self.settings)
self.signals = SignalManager(self)
self.stats = load_object(self.settings['STATS_CLASS'])(self)
handler = LogCounterHandler(self, level=settings.get('LOG_LEVEL'))
logging.root.addHandler(handler)
# lambda is assigned to Crawler attribute because this way it is not
# garbage collected after leaving __init__ scope
self.__remove_handler = lambda: logging.root.removeHandler(handler)
self.signals.connect(self.__remove_handler, signals.engine_stopped)
lf_cls = load_object(self.settings['LOG_FORMATTER'])
self.logformatter = lf_cls.from_crawler(self)
self.extensions = ExtensionManager.from_crawler(self)
self.settings.freeze()
self.crawling = False
self.spider = None
self.engine = None
0
Example 24
Project: scrapy Source File: crawler.py
def _get_spider_loader(settings):
""" Get SpiderLoader instance from settings """
if settings.get('SPIDER_MANAGER_CLASS'):
warnings.warn(
'SPIDER_MANAGER_CLASS option is deprecated. '
'Please use SPIDER_LOADER_CLASS.',
category=ScrapyDeprecationWarning, stacklevel=2
)
cls_path = settings.get('SPIDER_MANAGER_CLASS',
settings.get('SPIDER_LOADER_CLASS'))
loader_cls = load_object(cls_path)
try:
verifyClass(ISpiderLoader, loader_cls)
except DoesNotImplement:
warnings.warn(
'SPIDER_LOADER_CLASS (previously named SPIDER_MANAGER_CLASS) does '
'not fully implement scrapy.interfaces.ISpiderLoader interface. '
'Please add all missing methods to avoid unexpected runtime errors.',
category=ScrapyDeprecationWarning, stacklevel=2
)
return loader_cls.from_settings(settings.frozencopy())
0
Example 25
@classmethod
def from_settings(cls, settings, crawler=None):
mwlist = cls._get_mwlist_from_settings(settings)
middlewares = []
enabled = []
for clspath in mwlist:
try:
mwcls = load_object(clspath)
if crawler and hasattr(mwcls, 'from_crawler'):
mw = mwcls.from_crawler(crawler)
elif hasattr(mwcls, 'from_settings'):
mw = mwcls.from_settings(settings)
else:
mw = mwcls()
middlewares.append(mw)
enabled.append(clspath)
except NotConfigured as e:
if e.args:
clsname = clspath.split('.')[-1]
logger.warning("Disabled %(clsname)s: %(eargs)s",
{'clsname': clsname, 'eargs': e.args[0]},
extra={'crawler': crawler})
logger.info("Enabled %(componentname)ss:\n%(enabledlist)s",
{'componentname': cls.component_name,
'enabledlist': pprint.pformat(enabled)},
extra={'crawler': crawler})
return cls(*middlewares)
0
Example 26
def test_load_object(self):
obj = load_object('scrapy.utils.misc.load_object')
assert obj is load_object
self.assertRaises(ImportError, load_object, 'nomodule999.mod.function')
self.assertRaises(NameError, load_object, 'scrapy.utils.misc.load_object999')
0
Example 27
Project: scrapyd Source File: app.py
def application(config):
app = Application("Scrapyd")
http_port = config.getint('http_port', 6800)
bind_address = config.get('bind_address', '0.0.0.0')
poll_interval = config.getfloat('poll_interval', 5)
poller = QueuePoller(config)
eggstorage = FilesystemEggStorage(config)
scheduler = SpiderScheduler(config)
environment = Environment(config)
app.setComponent(IPoller, poller)
app.setComponent(IEggStorage, eggstorage)
app.setComponent(ISpiderScheduler, scheduler)
app.setComponent(IEnvironment, environment)
laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
laucls = load_object(laupath)
launcher = laucls(config, app)
webpath = config.get('webroot', 'scrapyd.website.Root')
webcls = load_object(webpath)
timer = TimerService(poll_interval, poller.poll)
webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address)
log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
bind_address=bind_address, http_port=http_port)
launcher.setServiceParent(app)
timer.setServiceParent(app)
webservice.setServiceParent(app)
return app
0
Example 28
def __init__(self, config, app):
resource.Resource.__init__(self)
self.debug = config.getboolean('debug', False)
self.runner = config.get('runner')
logsdir = config.get('logs_dir')
itemsdir = config.get('items_dir')
local_items = itemsdir and (urlparse(itemsdir).scheme.lower() in ['', 'file'])
self.app = app
self.nodename = config.get('node_name', socket.gethostname())
self.putChild('', Home(self, local_items))
if logsdir:
self.putChild('logs', static.File(logsdir, 'text/plain'))
if local_items:
self.putChild('items', static.File(itemsdir, 'text/plain'))
self.putChild('jobs', Jobs(self, local_items))
services = config.items('services', ())
for servName, servClsName in services:
servCls = load_object(servClsName)
self.putChild(servName, servCls(self))
self.update_projects()
0
Example 29
def __init__(self, settings):
self._enabled = settings.get('WEBDRIVER_BROWSER') is not None
self._fallback_handler = load_object(FALLBACK_HANDLER)(settings)