Here are the examples of the python api acrawler.http.Request taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
15 Examples
3
Source : chain.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
def to_vanilla(self, **kwargs):
urls = []
while self._urls:
urls.append(self._urls.pop())
for url in reversed(urls):
yield Request(url, **self.kws, **kwargs)
def callback(self):
3
Source : crawler.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def start_requests(self):
"""Should be rewritten for your custom spider.
Otherwise it will yield every url in :attr:`start_urls`. Any Request yielded from :meth:`start_requests`
will combine :meth:`parse` to its callbacks and passes all callbacks to Response
"""
for url in self.start_urls:
yield Request(url)
async def parse(self, response: _Response):
3
Source : crawler.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
def web_add_task_query(self, query: dict):
""" This method is to deal with web requests if you enable the web service. New tasks should be
yielded in this method. And Crawler will finish tasks to send response. Should be overwritten.
Args:
query: a multidict.
"""
url = query.pop("url", "")
if url:
task = Request(url=url, **query)
yield task
else:
raise Exception("Not valid url from web request!")
yield None
def web_action_after_query(self, items):
3
Source : handlers.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def _next_requests_from_redis_start(self):
start_key = self.crawler.config.get("REDIS_START_KEY")
if start_key:
while True:
url = await self.redis.spop(start_key)
if url:
url = url.decode()
task = Request(url, callback=self.crawler.parse)
await self.crawler.add_task(task)
await asyncio.sleep(0)
else:
await asyncio.sleep(0.5)
async def on_close(self):
3
Source : test_request.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
def test_fp():
rq = Request("https://httpbin.org/cookies/set?name=crawler&age=18")
assert rq.fingerprint == "7c6accfd1f05cb417373b9f00f3d9b1bd90bbb78"
def test_same_fp():
3
Source : test_request.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
def test_same_fp():
rq1 = Request("https://www.google.com")
rq2 = Request("https://www.google.com")
assert rq1.fingerprint == rq2.fingerprint
rq3 = Request("https://httpbin.org/cookies/set?name=crawler&age=18")
rq4 = Request("https://httpbin.org/cookies/set?age=18&name=crawler")
rq5 = Request("https://httpbin.org/cookies/set?age=18&name=crawler#fragment")
assert rq3.fingerprint == rq4.fingerprint
assert rq3.fingerprint == rq5.fingerprint
def test_diff_fp():
3
Source : test_request.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
def test_diff_fp():
rq1 = Request("https://www.google.com")
rq2 = Request("https://httpbin.org/cookies/set?name=crawler&age=18")
rq3 = Request("https://httpbin.org/cookies/set")
assert rq1.fingerprint != rq2.fingerprint
assert rq2.fingerprint != rq3.fingerprint
@pytest.mark.asyncio
3
Source : test_request.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def test_parse():
def cb(resp):
assert resp.status == 200
return resp.status
rq1 = Request("https://httpbin.org/json", callback=cb)
resp = await rq1.send()
assert await resp.parse()
@pytest.mark.asyncio
3
Source : test_scheduler.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def test_queue():
sdl = Scheduler()
rq1 = Request("https://www.baidu.com")
rq2 = Request("https://www.bing.com")
assert await sdl.produce(rq1)
assert await sdl.produce(rq2)
assert not await sdl.produce(rq1)
@pytest.mark.asyncio
3
Source : test_scheduler.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def test_dont_filter():
sdl = Scheduler()
rq1 = Request("https://www.baidu.com")
rq2 = Request("https://www.bing.com", dont_filter=True)
assert await sdl.produce(rq1)
assert not await sdl.produce(rq1)
assert await sdl.produce(rq2)
assert await sdl.produce(rq2)
@pytest.mark.asyncio
3
Source : test_scheduler.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def test_consume():
sdl = Scheduler()
rq1 = Request("https://www.baidu.com")
rq2 = Request("https://www.bing.com")
assert await sdl.produce(rq1)
assert await sdl.produce(rq2)
assert await sdl.consume() is rq1
0
Source : chain.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
def follow(self, css: str, limit: int = 0, pass_meta=False, **kwargs):
req = ChainRequest()
req.meta(kwargs.pop("meta", {}))
if pass_meta:
req.meta(self.kws.pop("meta", {}))
def fn(resp: Response):
count = 0
for url in resp.sel.css(css).getall():
m = req.kws.pop("meta", {})
if resp.meta:
m.update(resp.meta)
yield Request(url, meta=m, **req.kws, **kwargs)
count += 1
if limit and count >= limit:
break
self.kws["callback"].append(fn)
return req
def paginate(self, css: str, limit: int = 0, pass_meta=False, **kwargs):
0
Source : test_queue.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def pq_push_pop(q):
await q.start()
await q.clear()
task3 = DummyTask("I am task 3.", priority=3)
await q.push(task3)
task1 = DummyTask("I am task 1.", priority=1)
await q.push(task1)
task4 = DummyTask("I am task 4.", priority=3)
await q.push(task4)
task2 = DummyTask("I am task 2.", priority=2)
await q.push(task2)
t = await q.pop()
assert t.val == task3.val
await q.pop()
await q.pop()
t = await q.pop()
assert t.val == task1.val
assert await q.get_length() == 0
req = Request(
"http://httpbin.org/user-agent",
request_config={"headers": {"User-Agent": "TestClient"}},
priority=1,
)
await q.push(req)
task = await q.pop()
resp = await task.fetch()
assert "TestClient" in resp.text
await q.close()
@pytest.mark.asyncio
0
Source : test_request.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def test_send():
rq1 = Request("https://httpbin.org/json")
resp1 = await rq1.send()
assert resp1.json == {
"slideshow": {
"author": "Yours Truly",
"date": "date of publication",
"slides": [
{"title": "Wake up to WonderWidgets!", "type": "all"},
{
"items": [
"Why < em>WonderWidgets < /em> are great",
"Who < em>buys < /em> WonderWidgets",
],
"title": "Overview",
"type": "all",
},
],
"title": "Sample Slide Show",
}
}
@pytest.mark.asyncio
0
Source : test_request.py
with GNU General Public License v3.0
from wind2sing
with GNU General Public License v3.0
from wind2sing
async def test_dumps():
rq1 = Request("https://httpbin.org/json")
await rq1.send()
assert pickle.dumps(rq1)