Suggested API's for "Scrapy" API (Occurances) scrapy.version_info(1) scrapy.spiders(1) scrapy.selector(1) scrapy.item(1) scrapy.http(1) scrapy.exceptions(1) scrapy.__version__(5) scrapy.__path__(3) scrapy.Spider(13) scrapy.Selector(7) scrapy.Scraper(1) scrapy.Request(103) scrapy.Item(3) scrapy.FormRequest(2) scrapy.utils.test(1) scrapy.statscollectors.StatsCollector(2) scrapy.statscollectors.DummyStatsCollector(1) scrapy.squeues.PickleLifoDiskQueue(1) scrapy.squeues.PickleFifoDiskQueue(1) scrapy.squeues.MarshalLifoDiskQueue(1) scrapy.squeues.MarshalFifoDiskQueue(1) scrapy.spiders.Spider(42) scrapy.spiders.Rule(9) scrapy.spiders.CrawlSpider(1) scrapy.spider.Request(2) scrapy.signals.spider_opened(22) scrapy.signals.spider_idle(8) scrapy.signals.spider_error(3) scrapy.signals.spider_closed(10) scrapy.signals.response_received(3) scrapy.signals.response_downloaded(1) scrapy.signals.request_scheduled(1) scrapy.signals.item_scraped(4) scrapy.signals.item_dropped(1) scrapy.signals.engine_stopped(4) scrapy.signals.engine_started(5) scrapy.signalmanager.SignalManager(1) scrapy.shell.inspect_response(1) scrapy.shell.Shell(1) scrapy.settings.overridden_settings(1) scrapy.settings.get_settings_priority(1) scrapy.settings.SettingsAttribute(6) scrapy.settings.Settings(77) scrapy.settings.CrawlerSettings(1) scrapy.settings.BaseSettings(11) scrapy.selector.XPathSelectorList(2) scrapy.selector.Selector(93) scrapy.selector.HtmlXPathSelector(35) scrapy.scrapy_log.start_from_crawler(1) scrapy.resolver.CachingThreadedResolver(1) scrapy.middleware.MiddlewareManager(1) scrapy.mail.MailSender(5) scrapy.logformatter.LogFormatter(1) scrapy.log.start(1) scrapy.log.msg(11) scrapy.log.err(1) scrapy.log.WARNING(9) scrapy.log.INFO(22) scrapy.log.ERROR(8) scrapy.log.DEBUG(13) scrapy.log.CRITICAL(1) scrapy.loader.ItemLoader(20) scrapy.linkextractors.LinkExtractor(9) scrapy.link.Link(45) scrapy.item.ScrapedItem(2) scrapy.item.Item(1) scrapy.item.Field(26) scrapy.item.BaseItem(1) scrapy.http.XmlResponse(12) scrapy.http.TextResponse(31) scrapy.http.Response(92) scrapy.http.Request(184) scrapy.http.HtmlResponse(57) scrapy.http.Headers(38) scrapy.http.FormRequest(13) scrapy.exporters.XmlItemExporter(2) scrapy.exporters.PythonItemExporter(3) scrapy.exporters.PprintItemExporter(1) scrapy.exporters.PickleItemExporter(3) scrapy.exporters.MarshalItemExporter(2) scrapy.exporters.JsonLinesItemExporter(1) scrapy.exporters.JsonItemExporter(1) scrapy.exporters.CsvItemExporter(3) scrapy.exporters.BaseItemExporter(1) scrapy.exceptions.UsageError(11) scrapy.exceptions.NotSupported(3) scrapy.exceptions.NotConfigured(11) scrapy.exceptions.IgnoreRequest(8) scrapy.exceptions.DropItem(14) scrapy.exceptions.ContractFail(2) scrapy.exceptions.CloseSpider(33) scrapy.dupefilters.request_fingerprint(2) scrapy.dupefilters.RFPDupeFilter(3) scrapy.crawler.CrawlerRunner(11) scrapy.crawler.CrawlerProcess(6) scrapy.crawler.Crawler(12) scrapy.contracts.ContractsManager(2) scrapy.conf.settings(1) scrapy.cmdline.execute(1) scrapy.FormRequest.from_response(2) scrapi.util.try_n_times(1) scrapi.util.timestamp(5) scrapi.util.stamp_from_raw(2) scrapi.util.json_without_bytes(3) scrapi.util.format_date_with_slashes(2) scrapi.util.copy_to_unicode(10) scrapi.tasks.task_autoretry(4) scrapi.tasks.run_harvester(2) scrapi.tasks.process_raw(1) scrapi.tasks.process_normalized(1) scrapi.tasks.migrate(7) scrapi.tasks.harvest(3) scrapi.tasks.begin_normalization(1) scrapi.settings.disabled(1) scrapi.settings.USE_FLUENTD(1) scrapi.settings.SHARE_REG_URL(2) scrapi.settings.SENTRY_DSN(2) scrapi.settings.RECORD_HTTP_TRANSACTIONS(1) scrapi.settings.RAW_PROCESSING(2) scrapi.settings.RAISE_IN_TRANSFORMER(2) scrapi.settings.NORMALIZED_PROCESSING(2) scrapi.settings.ELASTIC_URI(2) scrapi.settings.ELASTIC_INST_INDEX(1) scrapi.settings.ELASTIC_INDEX(5) scrapi.settings.DEBUG(3) scrapi.settings.DAYS_BACK(22) scrapi.settings.CELERY_RETRY_DELAY(3) scrapi.settings.CELERY_ALWAYS_EAGER(2) scrapi.settings.CASSANDRA_URI(2) scrapi.settings.CASSANDRA_KEYSPACE(1) scrapi.settings.CANONICAL_PROCESSOR(4) scrapi.requests.time(1) scrapi.requests.settings(2) scrapi.requests.put(1) scrapi.requests.post(1) scrapi.requests.get(27) scrapi.requests.events(2) scrapi.requests.delete(1) scrapi.requests.HarvesterResponse(8) scrapi.registry.values(1) scrapi.registry.keys(3) scrapi.registry.items(1) scrapi.registry.get(1) scrapi.registry.beat_schedule(1) scrapi.processing.process_raw(3) scrapi.processing.process_normalized(3) scrapi.processing.get_processor(11) scrapi.processing.cassandra(1) scrapi.processing.DocumentTuple(7) scrapi.migrations.__dict__(1) scrapi.linter.RawDocument(14) scrapi.linter.NormalizedDocument(8) scrapi.events.settings(1) scrapi.events.logged(10) scrapi.events.log_to_sentry(2) scrapi.events.event(1) scrapi.events.dispatch(4) scrapi.events.creates_task(2) scrapi.events.Skip(4) scrapy.xpath.selector.HtmlXPathSelector(2) scrapy.xlib.tx.TCP4ClientEndpoint(1) scrapy.xlib.tx.HTTPConnectionPool(1) scrapy.utils.url.urljoin_rfc(2) scrapy.utils.url.url_query_parameter(1) scrapy.utils.url.url_is_from_spider(5) scrapy.utils.url.url_is_from_any_domain(3) scrapy.utils.url.url_has_any_extension(1) scrapy.utils.url.guess_scheme(3) scrapy.utils.url.escape_ajax(1) scrapy.utils.url.canonicalize_url(1) scrapy.utils.url.add_http_if_no_scheme(26) scrapy.utils.trackref.print_live_refs(2) scrapy.utils.trackref.iter_all(1) scrapy.utils.trackref.get_oldest(1) scrapy.utils.trackref.format_live_refs(2) scrapy.utils.test.skip_if_no_boto(2) scrapy.utils.test.get_testenv(4) scrapy.utils.test.get_s3_content_and_delete(2) scrapy.utils.test.get_pythonpath(1) scrapy.utils.test.get_crawler(52) scrapy.utils.test.assert_samelines(2) scrapy.utils.test.assert_aws_environ(2) scrapy.utils.template.string_camelcase(3) scrapy.utils.template.render_templatefile(3) scrapy.utils.spider.spidercls_for_request(3) scrapy.utils.spider.iterate_spider_output(10) scrapy.utils.spider.iter_spider_classes(3) scrapy.utils.sitemap.sitemap_urls_from_robots(2) scrapy.utils.sitemap.Sitemap(10) scrapy.utils.signal.send_catch_log_deferred(1) scrapy.utils.signal.send_catch_log(2) scrapy.utils.signal.disconnect_all(2) scrapy.utils.serialize.ScrapyJSONEncoder(5) scrapy.utils.response.response_status_message(2) scrapy.utils.response.response_httprepr(2) scrapy.utils.response.open_in_browser(2) scrapy.utils.response.get_meta_refresh(2) scrapy.utils.response.get_base_url(10) scrapy.utils.request.request_httprepr(3) scrapy.utils.request.request_fingerprint(15) scrapy.utils.request.request_authenticate(1) scrapy.utils.request.referer_str(5) scrapy.utils.reqser.request_to_dict(5) scrapy.utils.reqser.request_from_dict(4) scrapy.utils.reactor.listen_tcp(1) scrapy.utils.reactor.CallLaterOnce(1) scrapy.utils.python.without_none_values(7) scrapy.utils.python.unique_list(5) scrapy.utils.python.unicode_to_str(2) scrapy.utils.python.to_unicode(32) scrapy.utils.python.to_native_str(34) scrapy.utils.python.to_bytes(51) scrapy.utils.python.stringify_dict(4) scrapy.utils.python.retry_on_eintr(2) scrapy.utils.python.re_rsearch(1) scrapy.utils.python.is_listlike(3) scrapy.utils.python.get_spec(1) scrapy.utils.python.get_func_args(2) scrapy.utils.python.flatten(6) scrapy.utils.python.equal_attributes(1) scrapy.utils.python.binary_is_text(5) scrapy.utils.python.WeakKeyCache(1) scrapy.utils.project.project_data_dir(1) scrapy.utils.project.inside_project(1) scrapy.utils.project.get_project_settings(9) scrapy.utils.project.data_path(5) scrapy.utils.ossignal.install_shutdown_handlers(3) scrapy.utils.misc.walk_modules(4) scrapy.utils.misc.rel_has_nofollow(2) scrapy.utils.misc.md5sum(3) scrapy.utils.misc.load_object(29) scrapy.utils.misc.extract_regex(1) scrapy.utils.misc.arg_to_iter(26) scrapy.utils.markup.unquote_markup(1) scrapy.utils.markup.remove_entities(1) scrapy.utils.log.logformatter_adapter(2) scrapy.utils.log.log_scrapy_info(2) scrapy.utils.log.failure_to_exc_info(17) scrapy.utils.log.configure_logging(4) scrapy.utils.log.TopLevelFormatter(2) scrapy.utils.log.StreamLogger(1) scrapy.utils.log.LogCounterHandler(2) scrapy.utils.job.job_dir(3) scrapy.utils.iterators.xmliter(1) scrapy.utils.iterators.csviter(10) scrapy.utils.iterators._body_or_str(2) scrapy.utils.httpobj.urlparse_cached(12) scrapy.utils.http.decode_chunked_transfer(2) scrapy.utils.gz.is_gzipped(9) scrapy.utils.gz.gunzip(6) scrapy.utils.ftp.ftp_makedirs_cwd(1) scrapy.utils.engine.print_engine_status(1) scrapy.utils.engine.get_engine_status(3) scrapy.utils.engine.format_engine_status(1) scrapy.utils.display.pprint(2) scrapy.utils.deprecate.update_classpath(3) scrapy.utils.deprecate.create_deprecated_class(13) scrapy.utils.defer.process_parallel(3) scrapy.utils.defer.process_chain_both(2) scrapy.utils.defer.process_chain(2) scrapy.utils.defer.parallel(1) scrapy.utils.defer.mustbe_deferred(7) scrapy.utils.defer.iter_errback(3) scrapy.utils.defer.defer_succeed(1) scrapy.utils.defer.defer_result(2) scrapy.utils.datatypes.MergeDict(2) scrapy.utils.datatypes.CaselessDict(10) scrapy.utils.console.start_python_console(1) scrapy.utils.console.get_shell_embed_func(3) scrapy.utils.conf.init_env(1) scrapy.utils.conf.get_config(2) scrapy.utils.conf.closest_scrapy_cfg(4) scrapy.utils.conf.build_component_list(11) scrapy.utils.conf.arglist_to_dict(5) scrapy.utils.boto.is_botocore(6) scrapy.utils._signal.send_catch_log_deferred(1) scrapy.utils._signal.send_catch_log(1) scrapy.utils._signal.disconnect_all(1) scrapy.spidermiddlewares.urllength.UrlLengthMiddleware(1) scrapy.spidermiddlewares.referer.RefererMiddleware(1) scrapy.spidermiddlewares.httperror.HttpErrorMiddleware(3) scrapy.spidermiddlewares.httperror.HttpError(1) scrapy.spidermiddlewares.depth.DepthMiddleware(1) scrapy.spiderloader.SpiderLoader.from_settings(4) scrapy.settings.deprecated.check_deprecated_settings(1) scrapy.selector.Selector.xpath(1) scrapy.responsetypes.responsetypes.from_headers(1) scrapy.responsetypes.responsetypes.from_filename(1) scrapy.responsetypes.responsetypes.from_content_type(1) scrapy.responsetypes.responsetypes.from_content_disposition(1) scrapy.responsetypes.responsetypes.from_body(1) scrapy.responsetypes.responsetypes.from_args(13) scrapy.resolver.dnscache.get(2) scrapy.pipelines.images.ImagesPipeline(2) scrapy.pipelines.files.S3FilesStore(1) scrapy.mail.MailSender.from_settings(2) scrapy.log.log.defaultObserver(1) scrapy.loader.processors.TakeFirst(12) scrapy.loader.processors.SelectJmes(1) scrapy.loader.processors.MapCompose(12) scrapy.loader.processors.Join(2) scrapy.loader.processors.Identity(3) scrapy.loader.processors.Compose(4) scrapy.linkextractors.sgml.SgmlLinkExtractor(3) scrapy.linkextractors.sgml.BaseSgmlLinkExtractor(5) scrapy.linkextractors.regex.RegexLinkExtractor(3) scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor(2) scrapy.linkextractors.htmlparser.HtmlParserLinkExtractor(2) scrapy.http.response.ScrapyResponse(2) scrapy.http.request.ScrapyRequest(2) scrapy.http.headers.Headers(3) scrapy.http.cookies.WrappedResponse(1) scrapy.http.cookies.WrappedRequest(1) scrapy.http.Request.__init__(1) scrapy.http.Headers.__setitem__(1) scrapy.http.FormRequest.from_response(4) scrapy.http.FormRequest.__init__(1) scrapy.extensions.spiderstate.SpiderState(2) scrapy.extensions.feedexport.StdoutFeedStorage(1) scrapy.extensions.feedexport.S3FeedStorage(1) scrapy.extensions.feedexport.FileFeedStorage(5) scrapy.extensions.feedexport.FTPFeedStorage(1) scrapy.extensions.feedexport.BlockingFeedStorage(3) scrapy.extension.ExtensionManager.from_crawler(1) scrapy.downloadermiddlewares.stats.DownloaderStats(1) scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware(8) scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware(7) scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware(1) scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware(1) scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware(1) scrapy.downloadermiddlewares.decompression.DecompressionMiddleware(1) scrapy.downloadermiddlewares.cookies.CookiesMiddleware(1) scrapy.crawler.CrawlerRunner.create_crawler(5) scrapy.core.scraper.Scraper(1) scrapy.core.engine.ExecutionEngine(5) scrapy.contrib.loader.XPathItemLoader(2) scrapy.contrib.exporter.JsonLinesItemExporter(1) scrapy.contrib.exporter.JsonItemExporter(1) scrapy.contrib.exporter.CsvItemExporter(1) scrapy.conf.settings.getlist(1) scrapy.conf.settings.getbool(1) scrapy.conf.settings.get(5) scrapy.commands.ScrapyCommand.process_options(3) scrapy.commands.ScrapyCommand.add_options(9) scrapi.tasks.run_harvester.delay(2) scrapi.tasks.app.worker_main(1) scrapi.tasks.app.conf(1) scrapi.settings.mapping.DOCUMENT_MAPPING(1) scrapi.settings.mapping.AUTHOR_MAPPING(1) scrapi.requests.get.json(7) scrapi.requests.get.content(2) scrapi.requests.HarvesterResponse.save(8) scrapi.processing.postgres.Document(2) scrapi.processing.get_processor.documents(1) scrapi.processing.elasticsearch.DatabaseManager(4) scrapi.processing.cassandra.management(1) scrapi.processing.cassandra.connection(2) scrapi.processing.cassandra.DatabaseManager(5) scrapi.processing.HarvesterResponse.save(1) scrapi.processing.HarvesterResponse.get(1) scrapi.linter.document.RawDocument(21) scrapi.linter.document.NormalizedDocument(4) scrapi.harvesters.push_api.gen_harvesters(1) scrapi.base.settings.RAISE_IN_TRANSFORMER(2) scrapi.base.helpers.xml_text_only(1) scrapi.base.helpers.updated_schema(12) scrapi.base.helpers.single_result(1) scrapi.base.helpers.seperate_provider_object_uris(1) scrapi.base.helpers.parse_name(2) scrapi.base.helpers.pack(1) scrapi.base.helpers.oai_process_uris(5) scrapi.base.helpers.oai_process_contributors(1) scrapi.base.helpers.oai_get_records_and_token(2) scrapi.base.helpers.oai_extract_url(1) scrapi.base.helpers.oai_extract_dois(1) scrapi.base.helpers.null_on_error(3) scrapi.base.helpers.gather_object_uris(1) scrapi.base.helpers.gather_identifiers(4) scrapi.base.helpers.format_tags(3) scrapi.base.helpers.format_doi_as_url(1) scrapi.base.helpers.extract_doi_from_text(1) scrapi.base.helpers.default_name_parser(3) scrapi.base.helpers.datetime_formatter(2) scrapi.base.helpers.compose(8) scrapi.base.helpers.build_properties(10) scrapi.base.helpers.CONSTANT(1) sitemap.generator.items.scrapy.item(1) scrapy.xlib.pydispatch.dispatcher.connect(6) scrapy.utils.trackref.live_refs.clear(1) scrapy.utils.request._fingerprint_cache.setdefault(1) scrapy.utils.python.to_unicode.strip(1) scrapy.utils.python.to_native_str.splitlines(1) scrapy.utils.python.to_native_str.split(3) scrapy.utils.python.stringify_dict.items(1) scrapy.utils.project.get_project_settings.copy(1) scrapy.utils.markup.remove_entities.strip(3) scrapy.utils.httpobj.urlparse_cached.scheme(2) scrapy.utils.httpobj.urlparse_cached.netloc(1) scrapy.utils.httpobj.urlparse_cached.hostname(3) scrapy.utils.defer.defer_result.addCallbacks(1) scrapy.utils.datatypes.CaselessDict.fromkeys(1) scrapy.utils.console.DEFAULT_PYTHON_SHELLS.keys(1) scrapy.spidermiddlewares.offsite.OffsiteMiddleware.from_crawler(1) scrapy.selector.Selector.xpath.extract(1) scrapy.responsetypes.responsetypes.mimetypes.guess_type(1) scrapy.pipelines.images.ImagesPipeline.from_settings(2) scrapy.pipelines.files.FilesPipeline.from_settings(4) scrapy.http.response.Response.replace(1) scrapy.extensions.spiderstate.SpiderState.from_crawler(1) scrapy.downloadermiddlewares.useragent.UserAgentMiddleware.from_crawler(1) scrapy.downloadermiddlewares.robotstxt.mw_module_logger.error(1) scrapy.downloadermiddlewares.retry.RetryMiddleware.from_crawler(1) scrapy.downloadermiddlewares.retry.RetryMiddleware.__init__(1) scrapy.downloadermiddlewares.redirect.RedirectMiddleware.from_crawler(1) scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware.from_crawler(1) scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware.from_crawler(1) scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware.from_crawler(1) scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware.from_crawler(1) scrapy.downloadermiddlewares.cookies.CookiesMiddleware.from_crawler(5) scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware.from_crawler(1) scrapy.core.spidermw.SpiderMiddlewareManager.from_crawler(1) scrapy.core.downloader.webclient._parse(1) scrapy.core.downloader.tls.ScrapyClientTLSOptions(1) scrapy.core.downloader.handlers.DownloadHandlers(3) scrapy.core.downloader.client._parse(2) scrapy.core.downloader.client.ScrapyHTTPPageGetter(1) scrapy.core.downloader.client.ScrapyHTTPClientFactory(4) scrapy.contrib.loader.XPathItemLoader.__init__(1) scrapy.contrib.loader.ItemLoader.__init__(1) scrapy.contrib.linkextractors.sgml.SgmlLinkExtractor(4) scrapy.contrib.linkextractors.LinkExtractor.extract_links(4) scrapi.processing.cassandra.VersionModel.objects(1) scrapi.processing.cassandra.DocumentModel.objects(3) scrapi.processing.base.BaseProcessor.__subclasses__(1) scrapi.base.transformer.logger.setLevel(1) frontera.contrib.scrapy.settings_adapter.ScrapySettingsAdapter(3) frontera.contrib.scrapy.manager.ScrapyFrontierManager(1) frontera.contrib.scrapy.converters.ResponseConverter(1) frontera.contrib.scrapy.converters.RequestConverter(1) sitemap.generator.pipelines.scrapy.exceptions.DropItem(1) scrapy.core.downloader.middleware.DownloaderMiddlewareManager.from_crawler(1) scrapy.core.downloader.handlers.s3.S3DownloadHandler(3) scrapy.core.downloader.handlers.ftp.FTPDownloadHandler(1) scrapy.contrib.linkextractors.lxmlhtml.LxmlLinkExtractor.extract_links(1) scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es(4) scrapi.processing.cassandra.DocumentModel.all.limit.get(1) frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler(9) scrapy.core.downloader.handlers.file.FileDownloadHandler.download_request(1) frontera.contrib.scrapy.middlewares.seeds.s3.S3SeedLoader(1) frontera.contrib.scrapy.middlewares.seeds.file.FileSeedLoader(1)