scrapy.log.DEBUG

Here are the examples of the python api scrapy.log.DEBUG taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

14 Examples 7

Example 1

Project: daywatch
Source File: middleware.py
View license
    def process_spider_output(self, response, result, spider):
        for x in result:
            if isinstance(x, Request) and hasattr(spider, 'disallow_urls'):
                if self.should_follow(x, spider):
                    yield x
                else:
                    log.msg("Filtered URL %s: " % (x.url),
                            level=log.DEBUG, spider=spider)
            else:
                yield x

Example 2

Project: scrapy-webdriver
Source File: download.py
View license
    @inthread
    def _download_request(self, request, spider):
        """Download a request URL using webdriver."""
        log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG)
        request.manager.webdriver.get(request.url)
        return WebdriverResponse(request.url, request.manager.webdriver)

Example 3

Project: scrapy-webdriver
Source File: download.py
View license
    @inthread
    def _do_action_request(self, request, spider):
        """Perform an action on a previously webdriver-loaded page."""
        log.msg('Running webdriver actions %s' % request.url, level=log.DEBUG)
        request.actions.perform()
        return WebdriverResponse(request.url, request.manager.webdriver)

Example 4

Project: malspider
Source File: download.py
View license
    @inthread
    def _download_request(self, request, spider):
        """Download a request URL using webdriver."""
        log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG)
        request.manager.webdriver.get(request.url)
        #time.sleep(5)
        take_screenshot = getattr(settings, 'TAKE_SCREENSHOT', None)
        screenshot_loc = getattr(settings, 'SCREENSHOT_LOCATION', None)
        if take_screenshot and screenshot_loc:
          screenshot_location = screenshot_loc + str(randint(10000,10000000)) + '.png'
          request.manager.webdriver.save_screenshot(screenshot_location)
          request.meta['screenshot'] = screenshot_location

        request.meta['User-Agent'] = request.headers.get('User-Agent')
        request.meta['Referer'] = request.headers.get('Referer')
        return WebdriverResponse(request.url, request.manager.webdriver)

Example 5

Project: malspider
Source File: download.py
View license
    @inthread
    def _do_action_request(self, request, spider):
        """Perform an action on a previously webdriver-loaded page."""
        log.msg('Running webdriver actions %s' % request.url, level=log.DEBUG)
        request.actions.perform()
        return WebdriverResponse(request.url, request.manager.webdriver)

Example 6

Project: sogouWeixin
Source File: weixinHistoryArticle.py
View license
    @staticmethod
    def _re_extract(node, pattern):
        log.msg("Content is : %s" % node, level=log.DEBUG)
        log.msg("Pattern is : %s" % pattern, level=log.DEBUG)
        rep = re.compile(pattern)
        if not node:
            return None

        m = rep.search(node)
        return m.group(1) if m else None

Example 7

Project: sogouWeixin
Source File: weixinIDCrawler.py
View license
    def get_renzhenginfo(self, node):
        log.msg("inside get_renzhenginfo.", level = log.DEBUG)
        if node:
            html_part = node.extract()
            if not html_part:
                return None

            renzhenginfo, count = self.subn.subn("", html_part)

            return renzhenginfo
        
        return None

Example 8

Project: sogouWeixin
Source File: weixinIDCrawler.py
View license
    def get_latestarticle(self, node):
        log.msg("inside get_latestarticle.", level = log.DEBUG)
        if node:
            html_part = node.extract()
            if not html_part:
                return None

            article, count = self.subn.subn("", html_part)
            return article
        
        return None

Example 9

Project: scrapy-mongodb
Source File: scrapy_mongodb.py
View license
    def insert_item(self, item, spider):
        """ Process the item and add it to MongoDB

        :type item: (Item object) or [(Item object)]
        :param item: The item(s) to put into MongoDB
        :type spider: BaseSpider object
        :param spider: The spider running the queries
        :returns: Item object
        """
        if not isinstance(item, list):
            item = dict(item)

            if self.config['append_timestamp']:
                item['scrapy-mongodb'] = {'ts': datetime.datetime.utcnow()}

        if self.config['unique_key'] is None:
            try:
                self.collection.insert(item, continue_on_error=True)
                log.msg(
                    u'Stored item(s) in MongoDB {0}/{1}'.format(
                        self.config['database'], self.config['collection']),
                    level=log.DEBUG,
                    spider=spider)
            except errors.DuplicateKeyError:
                log.msg(u'Duplicate key found', level=log.DEBUG)
                if (self.stop_on_duplicate > 0):
                    self.duplicate_key_count += 1
                    if (self.duplicate_key_count >= self.stop_on_duplicate):
                        self.crawler.engine.close_spider(
                            spider,
                            'Number of duplicate key insertion exceeded'
                        )
                pass

        else:
            key = {}
            if isinstance(self.config['unique_key'], list):
                for k in dict(self.config['unique_key']).keys():
                    key[k] = item[k]
            else:
                key[self.config['unique_key']] = item[self.config['unique_key']]

            self.collection.update(key, item, upsert=True)

            log.msg(
                u'Stored item(s) in MongoDB {0}/{1}'.format(
                    self.config['database'], self.config['collection']),
                level=log.DEBUG,
                spider=spider)

        return item

Example 10

Project: sozlukcrawler
Source File: dupefilter.py
View license
    def request_seen(self, request):
        is_seen = is_request_seen(request)

        if not is_seen:
            log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
            seen = Seen(fingerprint=request_fingerprint(request),
                        url=request.url,
                        last_crawl_time=datetime.now())
            try:
                session.add(seen)
                session.commit()
            except:
                session.rollback()
                raise
            finally:
                session.close()
        else:
            log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)

        return is_seen

Example 11

Project: sozlukcrawler
Source File: pipelines.py
View license
    def process_item(self, item, spider):
        """
        Scrape edilen her girdiyi veritabanina ekle. Bu method sayfa process edildikten, icerisindeki
        bilgiler cekildikten ve Item objesi olusturulduktan sonra her seferinde cagriliyor.

        :param item: Parse edilmis nesne
        :type item: Scrapy item
        :param spider: Su anda calisan, spiders/ dizini altinda belirtilen spiderlardan herhangi biri
        :type spider: Scrapy spider
        :return: Gonderilen Item
        :rtype: Scrapy item
        """
        log.msg('[%s] PROCESSING ITEM [item no: %s, baslik: %s]' %
                (spider.name, item['girdi_id'], item['baslik']),
                level=log.DEBUG)

        girdi = Girdi(**item)
        try:
            session.add(girdi)
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return item

Example 12

Project: sogouWeixin
Source File: middleware.py
View license
    def process_request(self, request, spider):
        agent = random.choice(AGENTS)
        request.headers['User-Agent'] = agent
        log.msg("Add agent %s" % agent, level=log.DEBUG)

Example 13

Project: sogouWeixin
Source File: chuansongme.py
View license
    def parse_item(self, response):
        i = ChuansongmeItem()
        
        childs = response.xpath('//*[@id="ld_E6G600_305"]/text()').extract()
        log.msg("userid" + repr(childs), level = log.DEBUG)
        
        if len(childs) == 1: 
            i['userid'] = self.__post_process_userid(childs[0])
        else:
            return
        
        nodes = response.xpath(r'//*[@id="ld_XT398O_291"]/text()').extract()
        log.msg("nickname" + repr(nodes), level = log.DEBUG)
        
        if len(nodes) == 1:
            i['nickname'] = nodes[0]
        else:
            i['nickname'] = None
        
        nodes = response.xpath('//*[@id="__w2_qcGcPvc_text_snip_content"]/text()').extract()
        log.msg("gongneng" + repr(nodes), level = log.DEBUG)
        
        if len(nodes) == 1:
            i['gongneng'] = nodes[0]
        else:
            i['gongneng'] = None
            
        return i

Example 14

Project: sogouWeixin
Source File: weixinIDCrawler.py
View license
    def parse_items(self,response):
        items = self.get_all(response)
        
        res = []
        for item in items:
            info = SogouweixinItem()
            for field in field_xpath.keys():
                if isinstance(field, tuple):
                    rootpath, childpath = field_xpath[field]
                    nodes = item.xpath(rootpath)
                    for node in nodes: 
                        nodetypes = self.parse_type(node)
                        log.msg("processing " + repr(nodetypes), level = log.DEBUG)
                        for key in nodetypes:
                            log.msg("processing " + key, level = log.DEBUG)
                            log.msg("child xpath " + repr(childpath), level = log.DEBUG)
                            log.msg( key + " xpath " + repr(childpath[key]), level = log.DEBUG)
                            vnode = node.xpath(childpath[key])
                            log.msg(key + " node " + repr(vnode), level = log.DEBUG)
                            if not vnode:
                                continue
                            else:
                                vnode = vnode[0]
                            value = self.field_action.get(key)(self, vnode)
                            log.msg(key + ":" + repr(value), level = log.DEBUG)
                            info[key] = value
                else:
                    node = item.xpath(field_xpath[field])
                    if not node:
                        pass
                    else:
                        node = node[0]
                        value = self.field_action.get(field)(self, node)
                        log.msg(field + ":" + repr(value), level = log.DEBUG)
                        info[field] = value
            res.append(info)
            
        return res