Here are the examples of the python api scrapy.log.DEBUG taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
13 Examples
3
Example 1
def process_spider_output(self, response, result, spider):
for x in result:
if isinstance(x, Request) and hasattr(spider, 'disallow_urls'):
if self.should_follow(x, spider):
yield x
else:
log.msg("Filtered URL %s: " % (x.url),
level=log.DEBUG, spider=spider)
else:
yield x
3
Example 2
@inthread
def _download_request(self, request, spider):
"""Download a request URL using webdriver."""
log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG)
request.manager.webdriver.get(request.url)
return WebdriverResponse(request.url, request.manager.webdriver)
3
Example 3
Project: scrapy-webdriver Source File: download.py
@inthread
def _do_action_request(self, request, spider):
"""Perform an action on a previously webdriver-loaded page."""
log.msg('Running webdriver actions %s' % request.url, level=log.DEBUG)
request.actions.perform()
return WebdriverResponse(request.url, request.manager.webdriver)
3
Example 4
@inthread
def _download_request(self, request, spider):
"""Download a request URL using webdriver."""
log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG)
request.manager.webdriver.get(request.url)
#time.sleep(5)
take_screenshot = getattr(settings, 'TAKE_SCREENSHOT', None)
screenshot_loc = getattr(settings, 'SCREENSHOT_LOCATION', None)
if take_screenshot and screenshot_loc:
screenshot_location = screenshot_loc + str(randint(10000,10000000)) + '.png'
request.manager.webdriver.save_screenshot(screenshot_location)
request.meta['screenshot'] = screenshot_location
request.meta['User-Agent'] = request.headers.get('User-Agent')
request.meta['Referer'] = request.headers.get('Referer')
return WebdriverResponse(request.url, request.manager.webdriver)
3
Example 5
Project: sogouWeixin Source File: weixinHistoryArticle.py
@staticmethod
def _re_extract(node, pattern):
log.msg("Content is : %s" % node, level=log.DEBUG)
log.msg("Pattern is : %s" % pattern, level=log.DEBUG)
rep = re.compile(pattern)
if not node:
return None
m = rep.search(node)
return m.group(1) if m else None
3
Example 6
Project: sogouWeixin Source File: weixinIDCrawler.py
def get_renzhenginfo(self, node):
log.msg("inside get_renzhenginfo.", level = log.DEBUG)
if node:
html_part = node.extract()
if not html_part:
return None
renzhenginfo, count = self.subn.subn("", html_part)
return renzhenginfo
return None
3
Example 7
Project: sogouWeixin Source File: weixinIDCrawler.py
def get_latestarticle(self, node):
log.msg("inside get_latestarticle.", level = log.DEBUG)
if node:
html_part = node.extract()
if not html_part:
return None
article, count = self.subn.subn("", html_part)
return article
return None
0
Example 8
def insert_item(self, item, spider):
""" Process the item and add it to MongoDB
:type item: (Item object) or [(Item object)]
:param item: The item(s) to put into MongoDB
:type spider: BaseSpider object
:param spider: The spider running the queries
:returns: Item object
"""
if not isinstance(item, list):
item = dict(item)
if self.config['append_timestamp']:
item['scrapy-mongodb'] = {'ts': datetime.datetime.utcnow()}
if self.config['unique_key'] is None:
try:
self.collection.insert(item, continue_on_error=True)
log.msg(
u'Stored item(s) in MongoDB {0}/{1}'.format(
self.config['database'], self.config['collection']),
level=log.DEBUG,
spider=spider)
except errors.DuplicateKeyError:
log.msg(u'Duplicate key found', level=log.DEBUG)
if (self.stop_on_duplicate > 0):
self.duplicate_key_count += 1
if (self.duplicate_key_count >= self.stop_on_duplicate):
self.crawler.engine.close_spider(
spider,
'Number of duplicate key insertion exceeded'
)
pass
else:
key = {}
if isinstance(self.config['unique_key'], list):
for k in dict(self.config['unique_key']).keys():
key[k] = item[k]
else:
key[self.config['unique_key']] = item[self.config['unique_key']]
self.collection.update(key, item, upsert=True)
log.msg(
u'Stored item(s) in MongoDB {0}/{1}'.format(
self.config['database'], self.config['collection']),
level=log.DEBUG,
spider=spider)
return item
0
Example 9
def request_seen(self, request):
is_seen = is_request_seen(request)
if not is_seen:
log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
seen = Seen(fingerprint=request_fingerprint(request),
url=request.url,
last_crawl_time=datetime.now())
try:
session.add(seen)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
else:
log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)
return is_seen
0
Example 10
def process_item(self, item, spider):
"""
Scrape edilen her girdiyi veritabanina ekle. Bu method sayfa process edildikten, icerisindeki
bilgiler cekildikten ve Item objesi olusturulduktan sonra her seferinde cagriliyor.
:param item: Parse edilmis nesne
:type item: Scrapy item
:param spider: Su anda calisan, spiders/ dizini altinda belirtilen spiderlardan herhangi biri
:type spider: Scrapy spider
:return: Gonderilen Item
:rtype: Scrapy item
"""
log.msg('[%s] PROCESSING ITEM [item no: %s, baslik: %s]' %
(spider.name, item['girdi_id'], item['baslik']),
level=log.DEBUG)
girdi = Girdi(**item)
try:
session.add(girdi)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item
0
Example 11
def process_request(self, request, spider):
agent = random.choice(AGENTS)
request.headers['User-Agent'] = agent
log.msg("Add agent %s" % agent, level=log.DEBUG)
0
Example 12
Project: sogouWeixin Source File: chuansongme.py
def parse_item(self, response):
i = ChuansongmeItem()
childs = response.xpath('//*[@id="ld_E6G600_305"]/text()').extract()
log.msg("userid" + repr(childs), level = log.DEBUG)
if len(childs) == 1:
i['userid'] = self.__post_process_userid(childs[0])
else:
return
nodes = response.xpath(r'//*[@id="ld_XT398O_291"]/text()').extract()
log.msg("nickname" + repr(nodes), level = log.DEBUG)
if len(nodes) == 1:
i['nickname'] = nodes[0]
else:
i['nickname'] = None
nodes = response.xpath('//*[@id="__w2_qcGcPvc_text_snip_content"]/text()').extract()
log.msg("gongneng" + repr(nodes), level = log.DEBUG)
if len(nodes) == 1:
i['gongneng'] = nodes[0]
else:
i['gongneng'] = None
return i
0
Example 13
def parse_items(self,response):
items = self.get_all(response)
res = []
for item in items:
info = SogouweixinItem()
for field in field_xpath.keys():
if isinstance(field, tuple):
rootpath, childpath = field_xpath[field]
nodes = item.xpath(rootpath)
for node in nodes:
nodetypes = self.parse_type(node)
log.msg("processing " + repr(nodetypes), level = log.DEBUG)
for key in nodetypes:
log.msg("processing " + key, level = log.DEBUG)
log.msg("child xpath " + repr(childpath), level = log.DEBUG)
log.msg( key + " xpath " + repr(childpath[key]), level = log.DEBUG)
vnode = node.xpath(childpath[key])
log.msg(key + " node " + repr(vnode), level = log.DEBUG)
if not vnode:
continue
else:
vnode = vnode[0]
value = self.field_action.get(key)(self, vnode)
log.msg(key + ":" + repr(value), level = log.DEBUG)
info[key] = value
else:
node = item.xpath(field_xpath[field])
if not node:
pass
else:
node = node[0]
value = self.field_action.get(field)(self, node)
log.msg(field + ":" + repr(value), level = log.DEBUG)
info[field] = value
res.append(info)
return res