Here are the examples of the python api tornado.httpclient.AsyncHTTPClient.fetch taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
9 Examples
3
Example 1
def _request(self, request, _callback, async=False):
if async:
return httpclient.AsyncHTTPClient().fetch(request, _callback)
else:
try:
response = httpclient.HTTPClient().fetch(request)
except httpclient.HTTPError, e:
response = e.response
return _callback(response)
3
Example 2
def _send_remote(self, url, data, headers=None, callback=None):
"""
Initialise a Tornado AsyncClient and send the reuqest to the sentry
server. If the callback is a callable, it will be called with the
response.
"""
if headers is None:
headers = {}
return AsyncHTTPClient().fetch(
url, callback, method="POST", body=data, headers=headers,
validate_cert=self.validate_cert
)
3
Example 3
@staticmethod
@tornado.gen.coroutine
def _http(*args, **kwargs):
headers = {
'Accept': 'application/json',
'User-Agent': 'raylu', # http://developer.github.com/v3/#user-agent-required
}
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
response = yield tornado.httpclient.AsyncHTTPClient().fetch(*args, **kwargs)
if response.error:
raise Exception('%s\n%s' % (response.error, response.body))
return response
0
Example 4
Project: pixelated-dispatcher Source File: __init__.py
def forward(self, port=None, host=None):
url = "%s://%s:%s%s" % (
'http', host or "127.0.0.1", port or 80, self.request.uri)
try:
response = AsyncHTTPClient().fetch(
tornado.httpclient.HTTPRequest(
url=url,
method=self.request.method,
body=None if not self.request.body else self.request.body,
headers=self.request.headers,
follow_redirects=False,
request_timeout=REQUEST_TIMEOUT),
self.handle_response)
return response
except tornado.httpclient.HTTPError, x:
if hasattr(x, 'response') and x.response:
self.handle_response(x.response)
except Exception, e:
logger.error('Error forwarding request %s: %s' % (url, e.message))
self.set_status(500)
self.write("Internal server error:\n" + ''.join(traceback.format_exception(*sys.exc_info())))
self.finish()
0
Example 5
Project: pixelated-dispatcher Source File: __init__.py
@gen.coroutine
def _wait_til_agent_is_up(self, agent_runtime):
max_wait = TIMEOUT_WAIT_FOR_AGENT_TO_BE_UP
waited = 0
agent_up = False
port = agent_runtime['port']
url = 'http://127.0.0.1:%d/' % port
logger.error('Checking for user agent on url %s' % url)
start = time.time()
while waited < max_wait:
try:
# define a callback for older tornado versions
def _some_callback(response):
pass
response = yield AsyncHTTPClient(force_instance=True).fetch(
tornado.httpclient.HTTPRequest(
connect_timeout=REQUEST_TIMEOUT, request_timeout=REQUEST_TIMEOUT,
url=url, allow_ipv6=False), _some_callback)
if response.code == 200:
logger.info('Got 200, agent seems to be up')
waited = max_wait
agent_up = True
else:
logger.error('Got response with status code %d' % response.code)
except tornado.httpclient.HTTPError, e:
logger.info('Got exception while checking for agent to be up: %s' % e)
except Exception, e:
logger.info('Got exception while checking for agent to be up: %s' % e)
if waited < max_wait:
yield gen.Task(tornado.ioloop.IOLoop.current().add_timeout, time.time() + TIMEOUT_WAIT_STEP)
waited += TIMEOUT_WAIT_STEP
if not agent_up:
raise PixelatedNotAvailableHTTPError('Failed to start agent')
0
Example 6
@gen.coroutine
def get_links_from_url(url):
"""Download the page at `url` and parse it for links.
Returned links have had the fragment after `#` removed, and have been made
absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
'http://www.tornadoweb.org/en/stable/gen.html'.
"""
try:
response = yield httpclient.AsyncHTTPClient().fetch(url)
print('fetched %s' % url)
html = response.body if isinstance(response.body, str) \
else response.body.decode()
urls = [urljoin(url, remove_fragment(new_url))
for new_url in get_links(html)]
except Exception as e:
print('Exception: %s %s' % (e, url))
raise gen.Return([])
raise gen.Return(urls)
0
Example 7
Project: ImportNewAPI Source File: core.py
@gen.coroutine
def crawl_base_info(url):
"""
从文章列表页面爬基本信息:title, description,cover, create_at
:param url :http://www.importnew.com/all-posts
:return: 返回文章列表集合
"""
response = yield httpclient.AsyncHTTPClient().fetch(url)
soup = BeautifulSoup(response.body)
archives = soup.find_all('div', class_="post floated-thumb")
results = list()
for index, archive in enumerate(archives):
try:
post_thumb = archive.find('div', class_="post-thumb")
cover = post_thumb.a.img['src'] if post_thumb else ""
meta = archive.find("div", class_="post-meta")
url = meta.p.a['href']
try:
title = meta.find(class_="archive-title").string
except AttributeError:
title = meta.find(class_="meta-title").string
description = meta.find('span', class_="excerpt").p.string or ""
try:
create_at = meta.p.contents[2].replace("|", '').strip()
except:
create_at = meta.p.contents[3].replace(u'·', '').strip()
# 抓取时间时同一天发布的文章只能取得到年月日,手动加上时分秒,先解析文章当作最新发布的
create_at = datetime.strptime(create_at, "%Y/%m/%d") - timedelta(minutes=index)
post = {"title": title, 'url': url, 'description': description, 'cover': cover,
'create_at': create_at, }
results.append(post)
except:
print traceback.format_exc()
raise gen.Return(results)
0
Example 8
Project: ImportNewAPI Source File: core.py
@gen.coroutine
def crawl_detail_info(post):
"""
爬文章详情:author, content
:param post: 字典类型,crawl_base_info返回列表中的元素
:return:
"""
try:
response = yield httpclient.AsyncHTTPClient().fetch(post.get('url'), request_timeout=5)
detail_soup = BeautifulSoup(response.body)
except HTTPError:
print traceback.format_exc()
raise gen.Return()
def get_author(soup):
"""
获取作者信息
"""
copyright_area = soup.find(class_="copyright-area")
a_tags = copyright_area.find_all("a")
if a_tags and len(a_tags) == 1:
author_name = a_tags[0].text
elif a_tags and len(a_tags) > 1:
author_name = a_tags[1].text
else:
author_name = ""
return author_name
def get_body(soup):
"""
获取文章正文
"""
body = soup.find(class_="entry")
# 去掉头部版权信息和文末的作者信息和注释
body.find(class_="copyright-area").extract()
author_bio = body.find(id="author-bio")
author_bio.extract() if author_bio else None
comments = body.findAll(text=lambda text: isinstance(text, Comment))
map(lambda x: x.extract(), comments)
html = BeautifulSoup(u"""
<body class="single single-post single-format-standard chrome">
<div class="container" id="wrapper">
<div class="grid-8">
<div class="post type-post status-publish format-standard hentry category-basic tag-47 odd">
""")
title = BeautifulSoup(u"""
<div class="entry-header">
<h1 style="margin-top:10px">{title}</h1>
</div>""".format(title=post.get("title")))
html.find(class_='post').append(title)
html.find(class_="post").append(body)
return str(html)
author = get_author(detail_soup)
content = get_body(detail_soup)
post.update({
"author": author,
"content": content,
})
raise gen.Return(post)
0
Example 9
@classmethod
@coroutine
def fetch_remote_file(cls, pkg_file):
response = yield AsyncHTTPClient().fetch(pkg_file.url)
yield write_file(pkg_file, response.body)