scraper.models.EventWebsite

Here are the examples of the python api scraper.models.EventWebsite taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

1 Examples 7

Example 1

View license
    def setUp(self):
        if os.path.exists(self.IMG_DIR):
            shutil.rmtree(self.IMG_DIR)
        os.mkdir(self.IMG_DIR)

        settings.set('ITEM_PIPELINES', self.dds_settings['ITEM_PIPELINES'], priority='cmdline')
        settings.set('SPLASH_URL', self.dds_settings['SPLASH_URL'], priority='cmdline')
        settings.set('DUPEFILTER_CLASS', self.dds_settings['DUPEFILTER_CLASS'], priority='cmdline')
        settings.set('DOWNLOADER_MIDDLEWARES', self.dds_settings['DOWNLOADER_MIDDLEWARES'], priority='cmdline')
        settings.set('IMAGES_STORE', self.dds_settings['IMAGES_STORE'], priority='cmdline')
        if 'IMAGES_THUMBS' in self.dds_settings:
            settings.set('IMAGES_THUMBS', self.dds_settings['IMAGES_THUMBS'], priority='cmdline')
        if 'DSCRAPER_IMAGES_STORE_FORMAT' in self.dds_settings:
            settings.set('DSCRAPER_IMAGES_STORE_FORMAT', self.dds_settings['DSCRAPER_IMAGES_STORE_FORMAT'], priority='cmdline')

        settings.set('COOKIES_DEBUG', True)
        settings.set('LOG_LEVEL', 'DEBUG')
        settings.set('LOG_ENABLED', False)
        
        #self.crawler = Crawler(settings)
        #self.crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
        #self.crawler.configure()
        
        self.process = CrawlerProcess(settings)
        
        self.sc = ScrapedObjClass(name='Event')
        self.sc.save()
        self.soa_base = ScrapedObjAttr(name='base', attr_type='B', obj_class=self.sc)
        self.soa_base.save()
        self.soa_title = ScrapedObjAttr(name='title', attr_type='S', obj_class=self.sc)
        self.soa_title.save()
        self.soa_url = ScrapedObjAttr(name='url', attr_type='U', obj_class=self.sc, id_field=True)
        self.soa_url.save()
        self.soa_url2 = ScrapedObjAttr(name='url2', attr_type='U', obj_class=self.sc)
        self.soa_url2.save()
        self.soa_desc = ScrapedObjAttr(name='description', attr_type='S', obj_class=self.sc)
        self.soa_desc.save()
        self.soa_desc2 = ScrapedObjAttr(name='description2', attr_type='S', obj_class=self.sc)
        self.soa_desc2.save()
        self.soa_es_1 = ScrapedObjAttr(name='extra_standard_1', attr_type='S', obj_class=self.sc, save_to_db=False)
        self.soa_es_1.save()

        self.scraper = Scraper(name='Event Scraper', scraped_obj_class=self.sc, status='A',)
        self.scraper.save()
        
        self.se_base = ScraperElem(scraped_obj_attr=self.soa_base, scraper=self.scraper, 
        x_path='//ul/li', request_page_type='MP')
        self.se_base.save()
        self.se_title = ScraperElem(scraped_obj_attr=self.soa_title, scraper=self.scraper, 
            x_path='a/text()', request_page_type='MP')
        self.se_title.save()
        self.se_url = ScraperElem(scraped_obj_attr=self.soa_url, scraper=self.scraper, 
            x_path='a/@href', request_page_type='MP')
        self.se_url.save()
        self.se_desc = ScraperElem(scraped_obj_attr=self.soa_desc, scraper=self.scraper, 
            x_path='//div/div[@class="description"]/text()', request_page_type='DP1', mandatory=False)
        self.se_desc.save()
        self.se_es_1 = ScraperElem(scraped_obj_attr=self.soa_es_1, scraper=self.scraper, 
            x_path='a/text()', request_page_type='MP')
        self.se_es_1.save()

        self.rpt_mp  = RequestPageType(page_type='MP', scraper=self.scraper)
        self.rpt_mp.save()
        self.rpt_dp1 = RequestPageType(page_type='DP1', scraper=self.scraper, scraped_obj_attr=self.soa_url)
        self.rpt_dp1.save()
        
        self.sched_rt = SchedulerRuntime()
        self.sched_rt.save()
        
        self.event_website = EventWebsite(pk=1, name='Event Website', scraper=self.scraper,
            url=os.path.join(self.SERVER_URL, 'site_generic/event_main.html'), scraper_runtime=self.sched_rt,)
        self.event_website.save()
        
        for name, signal in list(vars(signals).items()):
            if not name.startswith('_'):
                dispatcher.connect(self.record_signal, signal)