Here are the examples of the python api scrapy.utils.url.url_is_from_spider taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
5 Examples
3
Example 1
Project: scrapy Source File: test_utils_url.py
def test_url_is_from_spider(self):
spider = Spider(name='example.com')
self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', spider))
self.assertTrue(url_is_from_spider('http://sub.example.com/some/page.html', spider))
self.assertFalse(url_is_from_spider('http://www.example.org/some/page.html', spider))
self.assertFalse(url_is_from_spider('http://www.example.net/some/page.html', spider))
3
Example 2
Project: scrapy Source File: test_utils_url.py
def test_url_is_from_spider_class_attributes(self):
class MySpider(Spider):
name = 'example.com'
self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', MySpider))
self.assertTrue(url_is_from_spider('http://sub.example.com/some/page.html', MySpider))
self.assertFalse(url_is_from_spider('http://www.example.org/some/page.html', MySpider))
self.assertFalse(url_is_from_spider('http://www.example.net/some/page.html', MySpider))
3
Example 3
Project: scrapy Source File: test_utils_url.py
def test_url_is_from_spider_with_allowed_domains(self):
spider = Spider(name='example.com', allowed_domains=['example.org', 'example.net'])
self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', spider))
self.assertTrue(url_is_from_spider('http://sub.example.com/some/page.html', spider))
self.assertTrue(url_is_from_spider('http://example.com/some/page.html', spider))
self.assertTrue(url_is_from_spider('http://www.example.org/some/page.html', spider))
self.assertTrue(url_is_from_spider('http://www.example.net/some/page.html', spider))
self.assertFalse(url_is_from_spider('http://www.example.us/some/page.html', spider))
spider = Spider(name='example.com', allowed_domains=set(('example.com', 'example.net')))
self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', spider))
spider = Spider(name='example.com', allowed_domains=('example.com', 'example.net'))
self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', spider))
3
Example 4
Project: scrapy Source File: test_utils_url.py
def test_url_is_from_spider_with_allowed_domains_class_attributes(self):
class MySpider(Spider):
name = 'example.com'
allowed_domains = ('example.org', 'example.net')
self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', MySpider))
self.assertTrue(url_is_from_spider('http://sub.example.com/some/page.html', MySpider))
self.assertTrue(url_is_from_spider('http://example.com/some/page.html', MySpider))
self.assertTrue(url_is_from_spider('http://www.example.org/some/page.html', MySpider))
self.assertTrue(url_is_from_spider('http://www.example.net/some/page.html', MySpider))
self.assertFalse(url_is_from_spider('http://www.example.us/some/page.html', MySpider))
0
Example 5
Project: scrapy Source File: __init__.py
@classmethod
def handles_request(cls, request):
return url_is_from_spider(request.url, cls)