Here are the examples of the python api requests.exceptions.ChunkedEncodingError taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
3 Examples
3
Example 1
def __next__(self):
if self.i >= len(self.contents):
raise StopIteration()
if self.partial_end and self.i >= self.partial_end:
raise requests.exceptions.ChunkedEncodingError("Simulated partial download! Connection reset by peer!")
start = self.i
end = min(self.i + self.buffer_size, len(self.contents))
if self.partial_end:
end = min(end, self.partial_end)
self.i = end
return self.contents[start:end].encode('utf-8')
0
Example 2
Project: exchangelib Source File: util.py
def post_ratelimited(protocol, session, url, headers, data, timeout=None, verify=True, allow_redirects=False):
"""
There are two error-handling policies implemented here: a fail-fast policy intended for stnad-alone scripts which
fails on all responses except HTTP 200. The other policy is intended for long-running tasks that need to respect
rate-limiting errors from the server and paper over outages of up to 1 hour.
Wrap POST requests in a try-catch loop with a lot of error handling logic and some basic rate-limiting. If a request
fails, and some conditions are met, the loop waits in increasing intervals, up to 1 hour, before trying again. The
reason for this is that servers often malfunction for short periods of time, either because of ongoing data
migrations or other maintenance tasks, misconfigurations or heavy load, or because the connecting user has hit a
throttling policy limit.
If the loop exited early, consumers of exchangelib that don't implement their own rate-limiting code could quickly
swamp such a server with new requests. That would only make things worse. Instead, it's better if the request loop
waits patiently until the server is functioning again.
If the connecting user has hit a throttling policy, then the server will start to malfunction in many interesting
ways, but never actually tell the user what is happening. There is no way to distinguish this situation from other
malfunctions. The only cure is to stop making requests.
"""
from socket import timeout as SocketTimeout
import requests.exceptions
# The contract on sessions here is to return the session that ends up being used, or retiring the session if we
# intend to raise an exception. We give up on max_wait timeout, not number of retries
r = None
wait = 10 # seconds
max_wait = 3600 # seconds
redirects = 0
max_redirects = 5 # We don't want to be sent into an endless redirect loop
log_msg = '''\
Retry: %(i)s
Waited: %(wait)s
Timeout: %(timeout)s
Session: %(session_id)s
Thread: %(thread_id)s
Auth type: %(auth)s
URL: %(url)s
Verify: %(verify)s
Allow redirects: %(allow_redirects)s
Response time: %(response_time)s
Status code: %(status_code)s
Request headers: %(request_headers)s
Response headers: %(response_headers)s'''
log_vals = dict(i=0, wait=0, timeout=timeout, session_id=session.session_id, thread_id=get_ident(),
auth=session.auth, url=url, response_time=None, status_code=None, request_headers=headers,
response_headers=None, verify=verify, allow_redirects=allow_redirects)
try:
while True:
log.debug('Session %(session_id)s thread %(thread_id)s: retry %(i)s timeout %(timeout)s POST\'ing to '
'%(url)s after %(wait)s s wait', log_vals)
d1 = datetime.now()
try:
r = session.post(url=url, headers=headers, data=data, allow_redirects=False, timeout=timeout,
verify=verify)
except (requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, ConnectionResetError,
requests.exceptions.ReadTimeout, SocketTimeout):
log.debug(
'Session %(session_id)s thread %(thread_id)s: timeout or connection error POST\'ing to %(url)s',
log_vals)
r = DummyResponse()
r.request.headers = headers
r.headers = {'DummyResponseHeader': None}
d2 = datetime.now()
log_vals['response_time'] = str(d2 - d1)
log_vals['status_code'] = r.status_code
log_vals['request_headers'] = r.request.headers
log_vals['response_headers'] = r.headers
log.debug(log_msg, log_vals)
log.debug('Request data: %s', data)
log.debug('Response data: %s', getattr(r, 'text'))
# The genericerrorpage.htm/internalerror.asp is ridiculous behaviour for random outages. Redirect to
# '/internalsite/internalerror.asp' or '/internalsite/initparams.aspx' is caused by e.g. SSL certificate
# f*ckups on the Exchange server.
if (r.status_code == 401) \
or (r.headers.get('connection') == 'close') \
or (r.status_code == 302 and r.headers.get('location').lower() ==
'/ews/genericerrorpage.htm?aspxerrorpath=/ews/exchange.asmx')\
or (r.status_code == 503):
# Maybe stale session. Get brand new one. But wait a bit, since the server may be rate-limiting us.
# This can be 302 redirect to error page, 401 authentication error or 503 service unavailable
if r.status_code not in (302, 401, 503):
# Only retry if we didn't get a useful response
break
if not protocol.credentials.is_service_account:
break
log_vals['i'] += 1
log_vals['wait'] = wait # We set it to 0 initially
if wait > max_wait:
# We lost patience. Session is cleaned up in outer loop
raise RateLimitError(
'Session %(session_id)s URL %(url)s: Max timeout reached' % log_vals)
log.info("Session %(session_id)s thread %(thread_id)s: Connection error on URL %(url)s "
"(code %(status_code)s). Cool down %(wait)s secs", log_vals)
time.sleep(wait) # Increase delay for every retry
wait *= 2
session = protocol.renew_session(session)
log_vals['wait'] = wait
log_vals['session_id'] = session.session_id
continue
if r.status_code == 302:
# If we get a normal 302 redirect, requests will issue a GET to that URL. We still want to POST
try:
redirect_url, server, has_ssl = get_redirect_url(response=r, allow_relative=False)
except RelativeRedirect as e:
log.debug("'allow_redirects' only supports relative redirects (%s -> %s)", url, e.value)
raise RedirectError(url=e.value)
if not allow_redirects:
raise TransportError('Redirect not allowed but we were redirected (%s -> %s)' % (url, redirect_url))
url = redirect_url
log_vals['url'] = url
log.debug('302 Redirected to %s', url)
redirects += 1
if redirects > max_redirects:
raise TransportError('Max redirect count exceeded')
continue
break
except (RateLimitError, RedirectError) as e:
log.warning(e.value)
protocol.retire_session(session)
raise
except Exception as e:
# Let higher layers handle this. Add data for better debugging.
log_msg = '%(exc_cls)s: %(exc_msg)s\n' + log_msg
log_vals['exc_cls'] = e.__class__.__name__
log_vals['exc_msg'] = str(e)
log_msg += '\nRequest data: %(data)s'
log_vals['data'] = data
log_msg += '\nResponse data: %(text)s'
try:
log_vals['text'] = r.text
except (NameError, AttributeError):
log_vals['text'] = ''
log.error(log_msg, log_vals)
protocol.retire_session(session)
raise
if r.status_code != 200:
if r.text and is_xml(r.text):
# Some genius at Microsoft thinks it's OK to send 500 error messages with valid SOAP response
log.debug('Got status code %s but trying to parse content anyway', r.status_code)
else:
# This could be anything. Let higher layers handle this
protocol.retire_session(session)
log_msg += '\nRequest data: %(data)s'
log_vals['data'] = data
try:
log_msg += '\nResponse data: %(text)s'
log_vals['text'] = r.text
except (NameError, AttributeError):
pass
raise TransportError('Unknown failure\n' + log_msg % log_vals)
log.debug('Session %(session_id)s thread %(thread_id)s: Useful response from %(url)s', log_vals)
return r, session
0
Example 3
Project: twitter-tap Source File: tap.py
def main():
csv_path = "data/"
FORMAT = '[%(asctime)-15s] %(levelname)s: %(message)s'
try:
import pymongo
from twython.exceptions import TwythonRateLimitError, TwythonError
from twython import Twython
except ImportError:
logging.basicConfig(format=FORMAT)
logger = logging.getLogger('twitter')
logger.fatal("Could not import, try running pip install -r requirements.txt")
sys.exit(1)
def parse_datetime(string):
return datetime(*(parsedate(string)[:6]))
logging_dict = {
"DEBUG":logging.DEBUG,
"INFO":logging.INFO,
"WARN":logging.WARN,
"ERROR":logging.ERROR,
"CRITICAL":logging.CRITICAL,
"FATAL":logging.FATAL,
}
def exit_gracefully(signal, frame):
logger.warn("Shutdown signal received! Shutting down.")
sys.exit(0)
signal.signal(signal.SIGINT, exit_gracefully)
signal.signal(signal.SIGTERM, exit_gracefully)
parser = argparse.ArgumentParser(description='Twitter acquisition pipeline using the search API: Query the Twitter API and store Tweets in MongoDB. The tweets can be obtained either by the search API or the streaming API. The arguments and options are different based on the type of acquisition.')
subparsers = parser.add_subparsers(dest='subcommand',help='Use either search or stream for acquiring tweets. For help with these commands please enter "tap stream help" or "tap search help".')
parser_search = subparsers.add_parser('search', help='In order to run this you must provide a query or a geocode, the consumer secret and either the consumer key or the access token. Consumer key and secret can be obtained at the http://apps.twitter.com/ website, while the access token will be obtained when first connecting with the key and secret.')
#search specific arguments
parser_search.add_argument('-q', '--query', type=six.text_type, dest='query', default="", help='A UTF-8 search query of 1,000 characters maximum, including operators. Queries may additionally be limited by complexity. Information on how to construct a query is available at https://dev.twitter.com/docs/using-search')
parser_search.add_argument('-g', '--geocode', type=six.text_type, dest='geocode', help='Returns tweets by users located within a given radius of the given latitude/longitude. The location is preferentially taking from the Geotagging API, but will fall back to their Twitter profile. The parameter value is specified by "latitude,longitude,radius", where radius units must be specified as either "mi" (miles) or "km" (kilometers). Note that you cannot use the near operator via the API to geocode arbitrary locations; however you can use this geocode parameter to search near geocodes directly. A maximum of 1,000 distinct "sub-regions" will be considered when using the radius modifier. Example value: 37.781157,-122.398720,1mi')
parser_search.add_argument('-l', '--lang', type=six.text_type, dest='lang', help='Restricts tweets to the given language, given by an ISO 639-1 code. Language detection is best-effort.\nExample value: eu')
parser_search.add_argument('-r', '--result-type', '--result_type', type=six.text_type, default='mixed', dest='result_type', choices=["mixed","recent","popular"],help='Specifies what type of search results you would prefer to receive. The current default is "mixed". Valid values include: "mixed" - Include both popular and real time results in the response. "recent" - return only the most recent results in the response. "popular" - return only the most popular results in the response.')
parser_search.add_argument('-w', '--wait', type=float, dest='waittime', default=2.0, help='Mandatory sleep time before executing a query. The default value is 2, which should ensure that the rate limit of 450 per 15 minutes is never reached.')
parser_search.add_argument('-c', '--clean', dest='clean', action='store_true', default=False, help="Set this switch to use a clean since_id.")
parser_search.add_argument('-ql', '--query-load', type=six.text_type, dest='query_load', help="Load query terms from filename. Loads csv files, just pass in a filename without the extension")
#search api auth specific
parser_search.add_argument('-ck', '--consumer-key', '--consumer_key', type=six.text_type, dest='consumer_key', help="The consumer key that you obtain when you create an app at https://apps.twitter.com/")
parser_search.add_argument('-cs', '--consumer-secret', '--consumer_secret', type=six.text_type, dest='consumer_secret', help="The consumer secret that you obtain when you create an app at https://apps.twitter.com/")
parser_search.add_argument('-at', '--access-token', '--access_token', type=six.text_type, dest='access_token', help="You can use consumer_key and access_token instead of consumer_key and consumer_secret. This will make authentication faster, as the token will not be fetched. The access token will be printed to the standard output when connecting with the consumer_key and consumer_secret.")
#mongoDB specific arguments
parser_search.add_argument('-d', '--db', type=six.text_type, dest='dburi', default='mongodb://localhost:27017/twitter', help='MongoDB URI, example: mongodb://dbuser:dbpassword@localhost:27017/dbname Defaults to mongodb://localhost:27017/twitter')
parser_search.add_argument('-qc', '--queries-collection','--queries_collection', dest='queries_collection', type=six.text_type, default='queries', help='The name of the collection for storing the highest since_id for each query. Default is queries.')
parser_search.add_argument('-tc', '--tweets-collection','--tweets_collection', dest='tweets_collection', type=six.text_type, default='tweets', help='The name of the collection for storing tweets. Default is tweets.')
parser_search.add_argument('-v', '--verbosity', type=six.text_type, dest='loglevel', default='WARN', choices=["DEBUG","INFO","WARN","ERROR","CRITICAL","FATAL"], help='The level of verbosity.')
parser_stream = subparsers.add_parser('stream', help='Obtain tweets using the streaming API. If you do not provide any arguments, the sample stream will be tracked. For a personalized stream at least one of the following must be entered: follow, track, or locations. The default access level allows up to 400 track keywords, 5,000 follow userids and 25 0.1-360 degree location boxes.')
#mongoDB specific arguments
parser_stream.add_argument('-d', '--db', type=six.text_type, dest='dburi', default='mongodb://localhost:27017/twitter', help='MongoDB URI, example: mongodb://dbuser:dbpassword@localhost:27017/dbname Defaults to mongodb://localhost:27017/twitter')
parser_stream.add_argument('-tc', '--tweets-collection','--tweets_collection', dest='tweets_collection', type=six.text_type, default='tweets', help='The name of the collection for storing tweets. Default is tweets.')
#stream api specific
parser_stream.add_argument('-f', '--follow', type=six.text_type, dest='follow', help='A comma separated list of user IDs, indicating the users to return statuses for in the stream. More information at https://dev.twitter.com/docs/streaming-apis/parameters#follow')
parser_stream.add_argument('-t', '--track', type=six.text_type, dest='track', help='Keywords to track. Phrases of keywords are specified by a comma-separated list. More information at https://dev.twitter.com/docs/streaming-apis/parameters#track')
parser_stream.add_argument('-l', '--locations', type=six.text_type, dest='locations', help='A comma-separated list of longitude,latitude pairs specifying a set of bounding boxes to filter Tweets by. On geolocated Tweets falling within the requested bounding boxes will be included—unlike the Search API, the user\'s location field is not used to filter tweets. Each bounding box should be specified as a pair of longitude and latitude pairs, with the southwest corner of the bounding box coming first. For example: "-122.75,36.8,-121.75,37.8" will track all tweets from San Francisco. NOTE: Bounding boxes do not act as filters for other filter parameters. More information at https://dev.twitter.com/docs/streaming-apis/parameters#locations')
parser_stream.add_argument('-fl', '--follow-load', type=six.text_type, dest='follow_load', help="Specify a filename to load and append account IDs from. Loads csv files, just pass in a filename without the extension")
parser_stream.add_argument('-tl', '--track-load', type=six.text_type, dest='track_load', help="Specify a filename to load append terms from. Loads csv files, just pass in a filename without the extension")
parser_stream.add_argument('-fh', '--firehose', action='store_true', default=False, dest='firehose', help="Use this option to receive all public tweets if there are no keywords, users or locations to track. This requires special permission from Twitter. Otherwise a sample of 1% of tweets will be returned.")
#stream api auth specific
parser_stream.add_argument('-ck', '--consumer-key', '--consumer_key', type=six.text_type, dest='consumer_key', help="The consumer key that you obtain when you create an app at https://apps.twitter.com/")
parser_stream.add_argument('-cs', '--consumer-secret', '--consumer_secret', type=six.text_type, dest='consumer_secret', help="The consumer secret that you obtain when you create an app at https://apps.twitter.com/")
parser_stream.add_argument('-at', '--access-token', '--access_token', type=six.text_type, dest='access_token', help="You can generate your user access token at http://apps.twitter.com by clicking 'Create my access token'.")
parser_stream.add_argument('-ats', '--access-token-secret', '--access_token_secret', type=six.text_type, dest='access_token_secret', help="You can generate your user access token secret at http://apps.twitter.com by clicking 'Create my access token'.")
parser_stream.add_argument('-v', '--verbosity', type=six.text_type, dest='loglevel', default='WARN', choices=["DEBUG","INFO","WARN","ERROR","CRITICAL","FATAL"], help='The level of verbosity.')
if len(sys.argv)<2:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
if len(sys.argv)<3:
if args.subcommand=='search':
parser_search.print_help()
if args.subcommand=='stream':
parser_stream.print_help()
sys.exit(1)
def load_csv_file(filename):
"""
Accepts a file name and loads it as a list
"""
try:
with open(csv_path + filename + '.csv', 'r') as f:
reader = csv.reader(f)
temp = list(reader)
# flatten to 1D, it gets loaded as 2D array
result = [x for sublist in temp for x in sublist]
except IOError as e:
print ("I/O error({0}): {1}".format(e.errno, e.strerror))
else:
f.closed
return result
def build_query_string(query_words, api_type):
""" 0 for search api, 1 for stream api
"""
if api_type == 0:
result = ''.join([q + ' OR ' for q in query_words[0:(len(query_words)-1)]])
return result + str(query_words[len(query_words)-1])
elif api_type == 1:
result = ''.join([q + ',' for q in query_words[0:(len(query_words)-1)]])
return result + str(query_words[len(query_words)-1])
def load_query(filename, api_type):
""" 0 for search api, 1 for stream api
"""
keywords = load_csv_file(filename)
if api_type == 0:
return build_query_string(keywords, 0)
else:
return build_query_string(keywords, 1)
if args.subcommand=='search':
if args.query_load:
query = load_query(args.query_load, 0)
geocode = args.geocode
lang = args.lang
loglevel = args.loglevel
waittime = args.waittime
clean_since_id = args.clean
result_type = args.result_type
else:
query = args.query
geocode = args.geocode
lang = args.lang
loglevel = args.loglevel
waittime = args.waittime
clean_since_id = args.clean
result_type = args.result_type
CONSUMER_KEY = args.consumer_key
CONSUMER_SECRET = args.consumer_secret
ACCESS_TOKEN = args.access_token
MONGODB_URI = args.dburi
logging.basicConfig(format=FORMAT,level=logging_dict[loglevel],stream=sys.stdout)
logger = logging.getLogger('twitter')
if CONSUMER_SECRET is None and ACCESS_TOKEN is None:
logger.fatal("Consumer secret or access token is required.")
sys.exit(1)
if args.query_load and args.query:
logger.fatal("Please use -lf or -q, not both.")
sys.exit(1)
# here we get the access token if it is not provided with the options
if not ACCESS_TOKEN:
logger.warn("No access token provided in options. Obtaining one now...")
token_getter = Twython(CONSUMER_KEY,CONSUMER_SECRET,oauth_version=2)
ACCESS_TOKEN = token_getter.obtain_access_token()
logger.warn("Access token: "+ACCESS_TOKEN)
twitter = Twython(CONSUMER_KEY, access_token=ACCESS_TOKEN)
try:
client = pymongo.MongoClient(MONGODB_URI)
except:
logger.fatal("Couldn't connect to MongoDB. Please check your --db argument settings.")
sys.exit(1)
parsed_dburi = pymongo.uri_parser.parse_uri(MONGODB_URI)
db = client[parsed_dburi['database']]
queries = db[args.queries_collection]
tweets = db[args.tweets_collection]
queries.ensure_index([("query",pymongo.ASCENDING),("geocode",pymongo.ASCENDING),("lang",pymongo.ASCENDING)],unique=True)
tweets.ensure_index("id",direction=pymongo.DESCENDING,unique=True)
tweets.ensure_index([("coordinates.coordinates",pymongo.GEO2D),])
if not clean_since_id:
current_query = queries.find_one({'query':query,'geocode':geocode,'lang':lang})
else:
current_query = None
if current_query:
since_id = current_query['since_id']
else:
since_id = None
def perform_query(**kwargs):
while True:
sleep(waittime)
try:
results = twitter.search(**kwargs)
except TwythonRateLimitError:
logger.warn("Rate limit reached, taking a break for a minute...\n")
sleep(60)
continue
except TwythonError as err:
logger.error("Some other error occured, taking a break for half a minute: "+str(err))
sleep(30)
continue
return results
def save_tweets(statuses,current_since_id):
for status in statuses:
status['created_at']=parse_datetime(status['created_at'])
try:
status['user']['created_at']=parse_datetime(status['user']['created_at'])
except:
pass
tweets.update({'id':status['id']},status,upsert=True)
current_id = longtype(status['id'])
if current_id>current_since_id:
current_since_id = current_id
if len(statuses)==0:
logger.debug("No new tweets. Taking a break for 10 seconds...")
sleep(10)
else:
logger.debug("Received "+str(len(statuses))+" tweets.")
return current_since_id
logger.info("Collecting tweets from the search API...")
while True:
results = perform_query(q=query,geocode=geocode,lang=lang,count=100,since_id=since_id,result_type=result_type)
refresh_url = results['search_metadata'].get('refresh_url')
p = urlparse.urlparse(refresh_url)
# we will now compute the new since_id as the maximum of all returned ids
#new_since_id = dict(urlparse.parse_qsl(p.query))['since_id']
logger.debug("Rate limit for current window: "+str(twitter.get_lastfunction_header(header="x-rate-limit-remaining")))
if since_id:
current_since_id = longtype(since_id)
else:
current_since_id = 0
new_since_id = save_tweets(results['statuses'],current_since_id)
next_results = results['search_metadata'].get('next_results')
while next_results:
p = urlparse.urlparse(next_results)
next_results_max_id = dict(urlparse.parse_qsl(p.query))['max_id']
results = perform_query(q=query,geocode=geocode,lang=lang,count=100,since_id=since_id,max_id=next_results_max_id,result_type=result_type)
next_results = results['search_metadata'].get('next_results')
logger.debug("Rate limit for current window: "+str(twitter.get_lastfunction_header(header="x-rate-limit-remaining")))
new_since_id = save_tweets(results['statuses'],new_since_id)
new_since_id = str(new_since_id)
queries.update({'query':query,'geocode':geocode,'lang':lang},{"$set":{'since_id':new_since_id}},upsert=True)
since_id = new_since_id
if args.subcommand=='stream':
from twython import TwythonStreamer
loglevel = args.loglevel
logging.basicConfig(format=FORMAT,level=logging_dict[loglevel],stream=sys.stdout)
logger = logging.getLogger('twitter')
if args.consumer_key is None or args.consumer_secret is None or args.access_token is None or args.access_token_secret is None:
logger.fatal("Consumer key, consumer secret, access token and access token secret are all required when using the streaming API.")
sys.exit(1)
try:
client = pymongo.MongoClient(args.dburi)
except:
logger.fatal("Couldn't connect to MongoDB. Please check your --db argument settings.")
sys.exit(1)
parsed_dburi = pymongo.uri_parser.parse_uri(args.dburi)
db = client[parsed_dburi['database']]
tweets = db[args.tweets_collection]
tweets.ensure_index("id",direction=pymongo.DESCENDING,unique=True)
tweets.ensure_index([("coordinates.coordinates",pymongo.GEO2D),])
class TapStreamer(TwythonStreamer):
def on_success(self, data):
if 'text' in data:
data['created_at']=parse_datetime(data['created_at'])
try:
data['user']['created_at']=parse_datetime(data['user']['created_at'])
except:
pass
try:
tweets.insert(data)
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
logger.error("Couldn't save a tweet: "+str(exc_obj))
if 'limit' in data:
logger.warn("The filtered stream has matched more Tweets than its current rate limit allows it to be delivered.")
def on_error(self, status_code, data):
logger.error("Received error code "+str(status_code)+".")
stream = TapStreamer(args.consumer_key, args.consumer_secret, args.access_token, args.access_token_secret)
logger.info("Collecting tweets from the streaming API...")
if args.follow or args.track or args.locations or args.follow_load or args.track_load:
if args.track_load and args.track is None:
args.track = load_query(args.track_load,1)
if args.follow_load and args.follow is None:
args.follow = load_query(args.follow_load,1)
if args.follow_load and args.follow:
prep_follow = load_query(args.follow_load,1)
args.follow += ',' + prep_follow
if args.track_load and args.track:
prep_track = load_query(args.track_load,1)
args.track += ',' + prep_track
# https://github.com/ryanmcgrath/twython/issues/288#issuecomment-66360160
while True:
try:
stream.statuses.filter(follow=args.follow,track=args.track,locations=args.locations)
except requests.exceptions.ChunkedEncodingError as e:
e = sys.exc_info()[0]
print('ERROR:',e)
continue
elif args.firehose:
stream.statuses.firehose()
else:
stream.statuses.sample()