Here are the examples of the python api scrapy.loader.processors.TakeFirst taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
12 Examples
3
Example 1
Project: scrapy Source File: test_loader.py
def test_get_value(self):
il = NameItemLoader()
self.assertEqual(u'FOO', il.get_value([u'foo', u'bar'], TakeFirst(), six.text_type.upper))
self.assertEqual([u'foo', u'bar'], il.get_value([u'name:foo', u'name:bar'], re=u'name:(.*)$'))
self.assertEqual(u'foo', il.get_value([u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$'))
il.add_value('name', [u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$')
self.assertEqual([u'foo'], il.get_collected_values('name'))
il.replace_value('name', u'name:bar', re=u'name:(.*)$')
self.assertEqual([u'bar'], il.get_collected_values('name'))
3
Example 2
Project: scrapy Source File: test_loader.py
def test_get_xpath(self):
l = TestItemLoader(response=self.response)
self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph'])
self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph')
self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), u'pa')
self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), [u'paragraph', 'marta'])
3
Example 3
Project: scrapy Source File: test_loader.py
def test_replace_xpath_multi_fields(self):
l = TestItemLoader(response=self.response)
l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
self.assertEqual(l.get_output_value('name'), [u'Marta'])
l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
3
Example 4
def test_get_css(self):
l = TestItemLoader(response=self.response)
self.assertEqual(l.get_css('p::text'), [u'paragraph'])
self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph')
self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa')
self.assertEqual(l.get_css(['p::text', 'div::text']), [u'paragraph', 'marta'])
self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']),
[u'http://www.scrapy.org', u'/images/logo.png'])
3
Example 5
Project: scrapy Source File: test_loader.py
def test_replace_css_multi_fields(self):
l = TestItemLoader(response=self.response)
l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
self.assertEqual(l.get_output_value('name'), [u'Marta'])
l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x})
self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
3
Example 6
Project: django-dynamic-scraper Source File: django_spider.py
def _set_loader(self, response, from_page, xs, item):
self.from_page = from_page
rpt = self.scraper.get_rpt(from_page)
if not self.from_page == 'MP':
item = response.request.meta['item']
if rpt.content_type == 'J':
json_resp = json.loads(response.body_as_unicode())
self.loader = JsonItemLoader(item=item, selector=json_resp)
else:
self.loader = ItemLoader(item=item, response=response)
else:
if rpt.content_type == 'J':
self.loader = JsonItemLoader(item=item, selector=xs)
else:
self.loader = ItemLoader(item=item, selector=xs)
self.loader.default_output_processor = TakeFirst()
self.loader.log = self.log
3
Example 7
Project: django-dynamic-scraper Source File: django_spider.py
def _set_dummy_loader(self, response, from_page, xs, item):
self.from_page = from_page
rpt = self.scraper.get_rpt(from_page)
if not self.from_page == 'MP':
item = response.request.meta['item']
if rpt.content_type == 'J':
json_resp = json.loads(response.body_as_unicode())
self.dummy_loader = JsonItemLoader(item=DummyItem(), selector=json_resp)
else:
self.dummy_loader = ItemLoader(item=DummyItem(), response=response)
else:
if rpt.content_type == 'J':
self.dummy_loader = JsonItemLoader(item=DummyItem(), selector=xs)
else:
self.dummy_loader = ItemLoader(item=DummyItem(), selector=xs)
self.dummy_loader.default_output_processor = TakeFirst()
self.dummy_loader.log = self.log
0
Example 8
Project: scrapy Source File: test_loader.py
def test_load_item_ignore_none_field_values(self):
def validate_sku(value):
# Let's assume a SKU is only digits.
if value.isdigit():
return value
class MyLoader(ItemLoader):
name_out = Compose(lambda vs: vs[0]) # take first which allows empty values
price_out = Compose(TakeFirst(), float)
sku_out = Compose(TakeFirst(), validate_sku)
valid_fragment = u'SKU: 1234'
invalid_fragment = u'SKU: not available'
sku_re = 'SKU: (.+)'
il = MyLoader(item={})
# Should not return "sku: None".
il.add_value('sku', [invalid_fragment], re=sku_re)
# Should not ignore empty values.
il.add_value('name', u'')
il.add_value('price', [u'0'])
self.assertEqual(il.load_item(), {
'name': u'',
'price': 0.0,
})
il.replace_value('sku', [valid_fragment], re=sku_re)
self.assertEqual(il.load_item()['sku'], u'1234')
0
Example 9
Project: scrapy Source File: test_loader.py
def test_self_referencing_loader(self):
class MyLoader(ItemLoader):
url_out = TakeFirst()
def img_url_out(self, values):
return (self.get_output_value('url') or '') + values[0]
il = MyLoader(item={})
il.add_value('url', 'http://example.com/')
il.add_value('img_url', '1234.png')
self.assertEqual(il.load_item(), {
'url': 'http://example.com/',
'img_url': 'http://example.com/1234.png',
})
il = MyLoader(item={})
il.add_value('img_url', '1234.png')
self.assertEqual(il.load_item(), {
'img_url': '1234.png',
})
0
Example 10
Project: scrapy Source File: test_loader.py
def test_iter_on_input_processor_input(self):
class NameFirstItemLoader(NameItemLoader):
name_in = TakeFirst()
il = NameFirstItemLoader()
il.add_value('name', u'marta')
self.assertEqual(il.get_collected_values('name'), [u'marta'])
il = NameFirstItemLoader()
il.add_value('name', [u'marta', u'jose'])
self.assertEqual(il.get_collected_values('name'), [u'marta'])
il = NameFirstItemLoader()
il.replace_value('name', u'marta')
self.assertEqual(il.get_collected_values('name'), [u'marta'])
il = NameFirstItemLoader()
il.replace_value('name', [u'marta', u'jose'])
self.assertEqual(il.get_collected_values('name'), [u'marta'])
il = NameFirstItemLoader()
il.add_value('name', u'marta')
il.add_value('name', [u'jose', u'pedro'])
self.assertEqual(il.get_collected_values('name'), [u'marta', u'jose'])
0
Example 11
def test_take_first(self):
proc = TakeFirst()
self.assertEqual(proc([None, '', 'hello', 'world']), 'hello')
self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0)
0
Example 12
Project: django-dynamic-scraper Source File: django_spider.py
def _get_processors(self, procs_str):
procs = [TakeFirst(), processors.string_strip,]
if not procs_str:
return procs
procs_tmp = list(procs_str.split(','))
for p in procs_tmp:
p = p.strip()
added = False
if hasattr(processors, p):
procs.append(getattr(processors, p))
added = True
for cp_path in self.conf['CUSTOM_PROCESSORS']:
try:
custom_processors = importlib.import_module(cp_path)
if hasattr(custom_processors, p):
procs.append(getattr(custom_processors, p))
added = True
except ImportError:
pass
if not added:
self.log("Processor '{p}' is not defined!".format(p=p), logging.ERROR)
procs = tuple(procs)
return procs