Here are the examples of the python api logging.info taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
172 Examples
0
Example 151
Project: jottalib Source File: cli.py
def download(argv=None):
def download_jfsfile(remote_object, tofolder=None, checksum=False):
'Helper function to get a jfsfile and store it in a local folder, optionally checksumming it. Returns boolean'
if tofolder is None:
tofolder = '.' # with no arguments, store in current dir
total_size = remote_object.size
if remote_object.state in (JFS.ProtoFile.STATE_CORRUPT, JFS.ProtoFile.STATE_INCOMPLETE):
puts(colored.red('%s was NOT downloaded successfully - Incomplete file' % remote_file.name))
return False
topath = os.path.join(tofolder, remote_object.name)
with open(topath, 'wb') as fh:
bytes_read = 0
puts(colored.white('Downloading: %s, size: %s \t' % (remote_object.name,
print_size(total_size, humanize=True))))
with ProgressBar(expected_size=total_size) as bar:
for chunk_num, chunk in enumerate(remote_object.stream()):
fh.write(chunk)
bytes_read += len(chunk)
bar.show(bytes_read)
if checksum:
md5_lf = JFS.calculate_md5(open(topath, 'rb'))
md5_jf = remote_object.md5
logging.info('%s - Checksum for downloaded file' % md5_lf)
logging.info('%s - Checksum for server file' % md5_jf)
if md5_lf != md5_jf:
puts(colored.blue('%s - Checksum for downloaded file' % md5_lf))
puts(colored.blue('%s - Checksum for server file' % md5_jf))
puts(colored.red('%s was NOT downloaded successfully - cheksum mismatch' % remote_object.name))
return False
puts(colored.green('%s was downloaded successfully - checksum matched' % remote_object.name))
return True
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description='Download a file or folder from Jottacloud.')
parser.add_argument('remoteobject',
help='The path to the file or folder that you want to download',
type=commandline_text)
parser.add_argument('-l', '--loglevel',
help='Logging level. Default: %(default)s.',
choices=('debug', 'info', 'warning', 'error'),
default='warning')
parser.add_argument('-c', '--checksum',
help='Verify checksum of file after download',
action='store_true' )
#parser.add_argument('-r', '--resume',
# help='Will not download the files again if it exist in path',
# action='store_true' )
args = parse_args_and_apply_logging_level(parser, argv)
jfs = JFS.JFS()
if args.remoteobject.startswith('//'):
# break out of root_folder
root_folder = jfs.rootpath
item_path = posixpath.join(root_folder, args.remoteobject[2:])
else:
root_folder = get_root_dir(jfs).path
item_path = posixpath.join(root_folder, args.remoteobject)
logging.info('Root folder path: %s' % root_folder)
logging.info('Command line path to object: %s' % args.remoteobject)
logging.info('Jotta path to object: %s' % item_path)
remote_object = jfs.getObject(item_path)
if isinstance(remote_object, JFS.JFSFile):
if download_jfsfile(remote_object, checksum=args.checksum):
logging.info('%r downloaded successfully', remote_object.path)
return True
else:
puts(colored.red('%r download failed' % remote_object.path))
return False
else: #if it's not a file it has to be a folder
incomplete_files = [] #Create an list where we can store incomplete files
checksum_error_files = [] #Create an list where we can store checksum error files
zero_files = [] #Create an list where we can store zero files
long_path = [] #Create an list where we can store skipped files and folders because of long path
puts(colored.blue("Getting index for folder: %s" % remote_object.name))
fileTree = remote_object.filedirlist().tree #Download the folder tree
puts(colored.blue('Total number of folders to download: %d' % len(fileTree)))
topdir = os.path.dirname(item_path)
logging.info("topdir: %r", topdir)
#Iterate through each folder
for folder in fileTree:
#We need to strip the path to the folder path from account,device and mountpoint details
logging.debug("folder: %r", folder)
_abs_folder_path = posixpath.join(JFS.JFS_ROOT, folder[1:])
logging.debug("absolute folder path : %r", _abs_folder_path)
_rel_folder_path = _abs_folder_path[len(topdir)+1:]
logging.info('relative folder path: %r', _rel_folder_path)
if len(_rel_folder_path) > 250: #Windows has a limit of 250 characters in path
puts(colored.red('%s was NOT downloaded successfully - path too long' % _rel_folder_path))
long_path.append(_rel_folder_path)
else:
logging.info('Entering a new folder: %s' % _rel_folder_path)
if not os.path.exists(_rel_folder_path): #Create the folder locally if it doesn't exist
os.makedirs(_rel_folder_path)
for _file in fileTree[folder]: #Enter the folder and download the files within
logging.info("file: %r", _file)
#This is the absolute path to the file that is going to be downloaded
abs_path_to_object = posixpath.join(topdir, _rel_folder_path, _file.name)
logging.info('Downloading the file from: %s' % abs_path_to_object)
if _file.state in (JFS.ProtoFile.STATE_CORRUPT, JFS.ProtoFile.STATE_INCOMPLETE):
#Corrupt and incomplete files will be skipped
puts(colored.red('%s was NOT downloaded successfully - Incomplete or corrupt file' % _file.name))
incomplete_files.append(posixpath.join(_rel_folder_path,_file.name))
continue
remote_object = jfs.getObject(abs_path_to_object)
remote_file = remote_object
total_size = remote_file.size
if total_size == 0: # Indicates an zero file
puts(colored.red('%s was NOT downloaded successfully - zero file' % remote_file.name))
zero_files.append(posixpath.join(_rel_folder_path,remote_file.name))
continue
if len(posixpath.join(_rel_folder_path,remote_file.name)) > 250: #Windows has a limit of 250 characters in path
puts(colored.red('%s was NOT downloaded successfully - path too long' % remote_file.name))
long_path.append(posixpath.join(_rel_folder_path,remote_file.name))
continue
#TODO: implement args.resume:
if not download_jfsfile(remote_file, tofolder=_rel_folder_path, checksum=args.checksum):
# download failed
puts(colored.red("Download failed: %r" % remote_file.path))
#Incomplete files
if len(incomplete_files)> 0:
with codecs.open("incomplete_files.txt", "w", "utf-8") as text_file:
for item in incomplete_files:
text_file.write("%s\n" % item)
print('Incomplete files (not downloaded): %d' % len(incomplete_files))
for _files in incomplete_files:
logging.info("Incomplete: %r", _files)
#Checksum error files
if len(checksum_error_files)> 0:
with codecs.open("checksum_error_files.txt", "w", "utf-8") as text_file:
for item in checksum_error_files:
text_file.write("%s\n" % item)
print('Files with checksum error (not downloaded): %d' % len(checksum_error_files))
for _files in checksum_error_files:
logging.info("Checksum error: %r", _files)
#zero files
if len(zero_files)> 0:
with codecs.open("zero_files.txt", "w", "utf-8") as text_file:
for item in zero_files:
text_file.write("%s\n" % item)
print('Files with zero size (not downloaded): %d' % len(zero_files))
for _files in zero_files:
logging.info("Zero sized files: %r", _files)
#long path
if len(long_path)> 0:
with codecs.open("long_path.txt", "w", "utf-8") as text_file:
for item in long_path:
text_file.write("%s\n" % item)
print('Folder and files not downloaded because of path too long: %d' % len(long_path))
for _files in long_path:
logging.info("Path too long: %r", _files)
return True
0
Example 152
def returner(ret):
# Customized to split up the change events and send to Splunk.
opts = _get_options()
logging.info('Options: %s' % json.dumps(opts))
http_event_collector_key = opts['token']
http_event_collector_host = opts['indexer']
hec_ssl = opts['http_event_server_ssl']
proxy = opts['proxy']
timeout = opts['timeout']
extras = opts['extras']
# Set up the collector
hec = http_event_collector(http_event_collector_key, http_event_collector_host, http_event_server_ssl=hec_ssl, proxy=proxy, timeout=timeout)
# Check whether or not data is batched:
if isinstance(ret, dict): # Batching is disabled
data = [ret]
else:
data = ret
# Sometimes there are duplicate events in the list. Dedup them:
data = _dedupList(data)
minion_id = __opts__['id']
fqdn = __grains__['fqdn']
master = __grains__['master']
try:
fqdn_ip4 = __grains__['fqdn_ip4'][0]
except IndexError:
fqdn_ip4 = __grains__['ipv4'][0]
for item in data:
alert = item['return']
event = {}
payload = {}
if('change' in alert): # Linux, normal pulsar
# The second half of the change will be '|IN_ISDIR' for directories
change = alert['change'].split('|')[0]
# Skip the IN_IGNORED events
if change == 'IN_IGNORED':
continue
if len(alert['change'].split('|')) == 2:
object_type = 'directory'
else:
object_type = 'file'
actions = defaultdict(lambda: 'unknown')
actions['IN_ACCESS'] = 'read'
actions['IN_ATTRIB'] = 'acl_modified'
actions['IN_CLOSE_NOWRITE'] = 'read'
actions['IN_CLOSE_WRITE'] = 'read'
actions['IN_CREATE'] = 'created'
actions['IN_DELETE'] = 'deleted'
actions['IN_DELETE_SELF'] = 'deleted'
actions['IN_MODIFY'] = 'modified'
actions['IN_MOVE_SELF'] = 'modified'
actions['IN_MOVED_FROM'] = 'modified'
actions['IN_MOVED_TO'] = 'modified'
actions['IN_OPEN'] = 'read'
actions['IN_MOVE'] = 'modified'
actions['IN_CLOSE'] = 'read'
event['action'] = actions[change]
event['change_type'] = 'filesystem'
event['object_category'] = object_type
event['object_path'] = alert['path']
event['file_name'] = alert['name']
event['file_path'] = alert['tag']
if alert['stats']: # Gather more data if the change wasn't a delete
stats = alert['stats']
event['object_id'] = stats['inode']
event['file_acl'] = stats['mode']
event['file_create_time'] = stats['ctime']
event['file_modify_time'] = stats['mtime']
event['file_size'] = stats['size'] / 1024.0 # Convert bytes to kilobytes
event['user'] = stats['user']
event['group'] = stats['group']
if object_type == 'file':
event['file_hash'] = alert['checksum']
event['file_hash_type'] = alert['checksum_type']
else: # Windows, win_pulsar
change = alert['Accesses']
if alert['Hash'] == 'Item is a directory':
object_type = 'directory'
else:
object_type = 'file'
actions = defaultdict(lambda: 'unknown')
actions['Delete'] = 'deleted'
actions['Read Control'] = 'read'
actions['Write DAC'] = 'acl_modified'
actions['Write Owner'] = 'modified'
actions['Synchronize'] = 'modified'
actions['Access Sys Sec'] = 'read'
actions['Read Data'] = 'read'
actions['Write Data'] = 'modified'
actions['Append Data'] = 'modified'
actions['Read EA'] = 'read'
actions['Write EA'] = 'modified'
actions['Execute/Traverse'] = 'read'
actions['Read Attributes'] = 'read'
actions['Write Attributes'] = 'acl_modified'
actions['Query Key Value'] = 'read'
actions['Set Key Value'] = 'modified'
actions['Create Sub Key'] = 'created'
actions['Enumerate Sub-Keys'] = 'read'
actions['Notify About Changes to Keys'] = 'read'
actions['Create Link'] = 'created'
actions['Print'] = 'read'
event['action'] = actions[change]
event['change_type'] = 'filesystem'
event['object_category'] = object_type
event['object_path'] = alert['Object Name']
event['file_name'] = os.path.basename(alert['Object Name'])
event['file_path'] = os.path.dirname(alert['Object Name'])
# TODO: Should we be reporting 'EntryType' or 'TimeGenerated?
# EntryType reports whether attempt to change was successful.
event.update({'master': master})
event.update({'minion_id': minion_id})
event.update({'dest_host': fqdn})
event.update({'dest_ip': fqdn_ip4})
for extra in extras:
extra_name = 'custom_' + extra
extra_value = __salt__['config.get'](extra, '')
if isinstance(extra_value, str):
event.update({extra_name: extra_value})
payload.update({'host': fqdn})
payload.update({'index': opts['index']})
payload.update({'sourcetype': opts['sourcetype']})
payload.update({'event': event})
hec.batchEvent(payload)
hec.flushBatch()
return
0
Example 153
Project: dockit Source File: install.py
def talktoDocker(
pulloption, baseimage, imagetag, numcontainers, dockerfile,
dockerrepo, buildoption, startoption, gluster_mode,
gluster_install, gluster_volume):
new_image_tag = ''
flag = flag1 = gluster_flag = 0
cons_ids = []
logger.debug(
"Docker image name :%s \t Image Tag:%s \t number of Containers:%s",
baseimage, imagetag, numcontainers)
try:
connret = dockit.DockerCli(
"connect", pulloption, baseimage, imagetag, numcontainers,
dockerfile, dockit_log_file, dockerrepo, buildoption)
if connret:
logger.info("Successfully connected to docker deamon: \n"
"\t \t \t pull/build/start containers accordingly.")
else:
logger.error("Connection return failed..exiting.")
sys.exit(1)
if pulloption:
logger.debug("Proceeding with actions on Image:%s", baseimage)
# if dockerrepo == None:
# logger.debug(
# "Base image pulling is not supported with "
# "this version of dockit \n"
# " please provide dockerrepo")
# sys.exit(1)
pullret = connret.pullC()
if pullret:
logger.info("Done with pulling.. continuing")
if dockerrepo and baseimage:
new_image_tag = dockerrepo+'/'+baseimage+':'+'latest'
flag1 = 1
logger.debug("new_image_tag:%s", new_image_tag)
else:
logger.error("Error when pulling ")
else:
logger.info("Not trying to pull image:%s.. continuing", baseimage)
if buildoption:
logger.debug("Continuing build process with %s", dockerfile)
built_image = connret.buildC()
if built_image:
logger.info(
" Image built from docker file :%s with id:%s and tag:%s",
built_image, built_image['Id'], built_image['RepoTags'])
if imagetag:
logger.debug("Image tag:%s", imagetag)
new_image_tag = imagetag+':latest'
flag = 1
logger.debug("new_image_tag:%s", new_image_tag)
else:
logger.error(
"Failed when building from docker file:\n"
"Check docker file path and options ")
else:
logger.debug("Not trying to build the image from docker file")
if startoption:
if flag or flag1:
logger.debug("Flag:%s \t Flag1:%s image tag:\t %s",
flag, flag1, new_image_tag)
else:
if baseimage and imagetag:
new_image_tag = baseimage+':'+imagetag
logger.debug("Using image tag :%s", new_image_tag)
ret_exist = connret.image_by_tag(new_image_tag)
if ret_exist:
logger.debug("Image exists :%s with ID:%s ",
ret_exist, ret_exist['Id'])
logger.info("Going to run the containers")
if gluster_mode:
if gluster_volume:
gluster_flag = 1
else:
gluster_flag = 0
runret = connret.runC(
ret_exist['RepoTags'][0], gluster_flag, gluster_config, )
if runret:
if not connret.container_ips:
logger.critical(
"Something went wrong when spawning "
"containers:exiting")
sys.exit(1)
logger.info(
"Containers are running successfully.."
"please login and work!!!!")
print (60 * '-')
logger.info("Details about running containers..\n")
logger.info(
"Container IPs \t : %s\n ", connret.container_ips)
for c in connret.cons_ids:
c_id = dict(connret.cons_ids[0])['Id']
cons_ids.append(c_id)
logger.info("Container Ids \t : %s \n ", cons_ids)
print (60 * '-')
# todo : Its possible to auto login to these containers
# via below , commenting it out for now
# loginC(connret.container_ips, connret.cons_ids)
if gluster_mode:
gluster_cli = create_vol.glusteractions()
if gluster_cli:
logger.debug("Successfully created gluster client")
run_helper.rh_config_dict[
'SERVER_IP_ADDRS'] = connret.container_ips
else:
logger.error("Failed to create gluster client")
run_helper.con_pass = getpass.getpass()
if gluster_install:
ginst = gluster_config.get(
'GLUSTER_VERSION', '3.5')
if ginst:
gluster_cli.gluster_install(ginst)
else:
logger.debug(
"Failed to get Gluster Version from dict.")
else:
logger.info("Gluster installation not required")
if gluster_volume:
run_helper.rh_config_dict[
'VOL_TYPE'] = gluster_config['VOL_TYPE']
run_helper.rh_config_dict['SERVER_EXPORT_DIR'] = \
gluster_config['SERVER_EXPORT_DIR']
run_helper.rh_config_dict['TRANS_TYPE'] = 'tcp'
run_helper.rh_config_dict[
'VOLNAME'] = gluster_config['VOLNAME']
logger.debug(
"Successfully filled configuration details:%s",
run_helper.rh_config_dict)
gluster_cli.create_gluster_volume(start=True)
logging.info(
'Gluster Volume operations done! '
'Please mount volume :%s in your client',
gluster_config['VOLNAME'])
else:
logger.debug(
"Gluster Volume creation not required")
else:
logger.info("Done!")
else:
logger.error("Failed when starting/inspecting containers")
else:
logger.error(
"Image + tag does not exist.. "
"I cant start container from this..exiting")
sys.exit(1)
else:
logger.debug("Not trying to start containers..")
logger.info("Dockit finished...")
return True
except Exception as e:
logger.critical("Failed on :%s", e)
sys.exit(1)
0
Example 154
Project: centinel Source File: cli.py
def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction,
exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp):
"""
For each VPN, check if there are experiments and scan with it if
necessary
Note: the expected directory structure is
args.directory
-----vpns (contains the OpenVPN config files
-----configs (contains the Centinel config files)
-----exps (contains the experiments directories)
:param directory: root directory that contains vpn configs and
centinel client configs
:param auth_file: a text file with username at first line and
password at second line
:param crt_file: optional root certificate file
:param tls_auth: additional key
:param key_direction: must specify if tls_auth is used
:param exclude_list: optional list of exluded countries
:param shuffle_lists: shuffle vpn list if set true
:param vm_num: number of VMs that are running currently
:param vm_index: index of current VM
:param reduce_vp: reduce number of vantage points
:return:
"""
logging.info("Starting to run the experiments for each VPN")
logging.warn("Excluding vantage points from: %s" % exclude_list)
# iterate over each VPN
vpn_dir = return_abs_path(directory, "vpns")
conf_dir = return_abs_path(directory, "configs")
home_dir = return_abs_path(directory, "home")
if auth_file is not None:
auth_file = return_abs_path(directory, auth_file)
if crt_file is not None:
crt_file = return_abs_path(directory, crt_file)
if tls_auth is not None:
tls_auth = return_abs_path(directory, tls_auth)
conf_list = sorted(os.listdir(conf_dir))
# determine VPN provider
vpn_provider = None
if "hma" in directory:
vpn_provider = "hma"
elif "ipvanish" in directory:
vpn_provider = "ipvanish"
elif "purevpn" in directory:
vpn_provider = "purevpn"
elif "vpngate" in directory:
vpn_provider = "vpngate"
if vpn_provider:
logging.info("Detected VPN provider is %s" % vpn_provider)
else:
logging.warning("Cannot determine VPN provider!")
# reduce size of list if reduce_vp is true
if reduce_vp:
logging.info("Reducing list size. Original size: %d" % len(conf_list))
country_asn_set = set()
reduced_conf_set = set()
for filename in conf_list:
centinel_config = os.path.join(conf_dir, filename)
config = centinel.config.Configuration()
config.parse_config(centinel_config)
vp_ip = os.path.splitext(filename)[0]
try:
meta = centinel.backend.get_meta(config.params, vp_ip)
if 'country' in meta and 'as_number' in meta \
and meta['country'] and meta['as_number']:
country_asn = '_'.join([meta['country'], meta['as_number']])
if country_asn not in country_asn_set:
country_asn_set.add(country_asn)
reduced_conf_set.add(filename)
else:
# run this endpoint if missing info
reduced_conf_set.add(filename)
except:
logging.warning("Failed to geolocate %s" % vp_ip)
reduced_conf_set.add(filename)
conf_list = list(reduced_conf_set)
logging.info("List size reduced. New size: %d" % len(conf_list))
# sort file list to ensure the same filename sequence in each VM
conf_list = sorted(conf_list)
# only select its own portion according to vm_num and vm_index
chunk_size = len(conf_list) / vm_num
last_chunk_additional = len(conf_list) % vm_num
start_pointer = 0 + (vm_index - 1) * chunk_size
end_pointer = start_pointer + chunk_size
if vm_index == vm_num:
end_pointer += last_chunk_additional
conf_list = conf_list[start_pointer:end_pointer]
if shuffle_lists:
shuffle(conf_list)
number = 1
total = len(conf_list)
external_ip = get_external_ip()
if external_ip is None:
logging.error("No network connection, exiting...")
return
# getting namesevers that should be excluded
local_nameservers = dns.resolver.Resolver().nameservers
for filename in conf_list:
# Check network connection first
time.sleep(5)
logging.info("Checking network connectivity...")
current_ip = get_external_ip()
if current_ip is None:
logging.error("Network connection lost!")
break
elif current_ip != external_ip:
logging.error("VPN still connected! IP: %s" % current_ip)
if len(openvpn.OpenVPN.connected_instances) == 0:
logging.error("No active OpenVPN instance found! Exiting...")
break
else:
logging.warn("Trying to disconnect VPN")
for instance in openvpn.OpenVPN.connected_instances:
instance.stop()
time.sleep(5)
current_ip = get_external_ip()
if current_ip is None or current_ip != external_ip:
logging.error("Stopping VPN failed! Exiting...")
break
logging.info("Disconnecting VPN successfully")
# start centinel for this endpoint
logging.info("Moving onto (%d/%d) %s" % (number, total, filename))
number += 1
vpn_config = os.path.join(vpn_dir, filename)
centinel_config = os.path.join(conf_dir, filename)
# before starting the VPN, check if there are any experiments
# to run
config = centinel.config.Configuration()
config.parse_config(centinel_config)
# assuming that each VPN config file has a name like:
# [ip-address].ovpn, we can extract IP address from filename
# and use it to geolocate and fetch experiments before connecting
# to VPN.
vpn_address, extension = os.path.splitext(filename)
country = None
try:
meta = centinel.backend.get_meta(config.params,
vpn_address)
if 'country' in meta:
country = meta['country']
except:
logging.exception("%s: Failed to geolocate %s" % (filename, vpn_address))
if country and exclude_list and country in exclude_list:
logging.info("%s: Skipping this server (%s)" % (filename, country))
continue
# try setting the VPN info (IP and country) to get appropriate
# experiemnts and input data.
try:
centinel.backend.set_vpn_info(config.params, vpn_address, country)
except Exception as exp:
logging.exception("%s: Failed to set VPN info: %s" % (filename, exp))
logging.info("%s: Synchronizing." % filename)
try:
centinel.backend.sync(config.params)
except Exception as exp:
logging.exception("%s: Failed to sync: %s" % (filename, exp))
if not experiments_available(config.params):
logging.info("%s: No experiments available." % filename)
try:
centinel.backend.set_vpn_info(config.params, vpn_address, country)
except Exception as exp:
logging.exception("Failed to set VPN info: %s" % exp)
continue
# add exclude_nameservers to scheduler
sched_path = os.path.join(home_dir, filename, "experiments", "scheduler.info")
if os.path.exists(sched_path):
with open(sched_path, 'r+') as f:
sched_info = json.load(f)
for task in sched_info:
if "python_exps" in sched_info[task] and "baseline" in sched_info[task]["python_exps"]:
if "params" in sched_info[task]["python_exps"]["baseline"]:
sched_info[task]["python_exps"]["baseline"]["params"]["exclude_nameservers"] = \
local_nameservers
else:
sched_info[task]["python_exps"]["baseline"]["params"] = \
{"exclude_nameservers": local_nameservers}
# write back to same file
f.seek(0)
json.dump(sched_info, f, indent=2)
f.truncate()
logging.info("%s: Starting VPN." % filename)
vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config,
crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction)
vpn.start()
if not vpn.started:
logging.error("%s: Failed to start VPN!" % filename)
vpn.stop()
time.sleep(5)
continue
logging.info("%s: Running Centinel." % filename)
try:
client = centinel.client.Client(config.params, vpn_provider)
centinel.conf = config.params
# do not use client logging config
# client.setup_logging()
client.run()
except Exception as exp:
logging.exception("%s: Error running Centinel: %s" % (filename, exp))
logging.info("%s: Stopping VPN." % filename)
vpn.stop()
time.sleep(5)
logging.info("%s: Synchronizing." % filename)
try:
centinel.backend.sync(config.params)
except Exception as exp:
logging.exception("%s: Failed to sync: %s" % (filename, exp))
# try setting the VPN info (IP and country) to the correct address
# after sync is over.
try:
centinel.backend.set_vpn_info(config.params, vpn_address, country)
except Exception as exp:
logging.exception("Failed to set VPN info: %s" % exp)
0
Example 155
Project: pychemqt Source File: qtelemental.py
def __init__(self, parent=None):
super(qtelemental, self).__init__(parent)
self.setWindowIcon(QtGui.QIcon(QtGui.QPixmap(
os.environ["pychemqt"]+"/images/button/PeriodicTableIcon.png")))
self.setWindowTitle(
QtWidgets.QApplication.translate("pychemqt", "Periodic Table"))
layout = QtWidgets.QGridLayout(self)
layout.setSpacing(2)
for i in range(1, 119):
element = Elemental(i)
b = boton(element, self)
if element.group == 0:
if i < 80:
j = i-58
else:
j = i-90
layout.addWidget(b, element.period+4, j+4, 1, 1)
elif i == 57 or i == 89:
layout.addWidget(b, element.period+4, element.group, 1, 1)
else:
layout.addWidget(b, element.period, element.group, 1, 1)
layout.addItem(QtWidgets.QSpacerItem(
10, 10, QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed),
8, 0, 1, 20)
layout.addItem(QtWidgets.QSpacerItem(
10, 10, QtWidgets.QSizePolicy.Expanding,
QtWidgets.QSizePolicy.Expanding), 12, 0, 1, 20)
asterisco = QtWidgets.QLabel("*")
asterisco.setFont(font20)
asterisco.setAlignment(alignment)
layout.addWidget(asterisco, 6, 3)
asterisco2 = QtWidgets.QLabel("**")
asterisco2.setFont(font20)
asterisco2.setAlignment(alignment)
layout.addWidget(asterisco2, 7, 3)
asterisco_ = QtWidgets.QLabel("*")
asterisco_.setFont(font20)
asterisco_.setAlignment(alignment)
layout.addWidget(asterisco_, 10, 2)
asterisco2_ = QtWidgets.QLabel("**")
asterisco2_.setFont(font20)
asterisco2_.setAlignment(alignment)
layout.addWidget(asterisco2_, 11, 2)
self.Info = QtWidgets.QFrame()
layout.addWidget(self.Info, 0, 5, 3, 3)
layoutInfo = QtWidgets.QGridLayout(self.Info)
layoutInfo.setSpacing(1)
layoutInfo.setContentsMargins(2, 0, 2, 0)
self.Info.setFrameShape(QtWidgets.QFrame.StyledPanel)
self.Info.setFrameShadow(QtWidgets.QFrame.Raised)
self.Info.setAutoFillBackground(True)
self.Info.setPalette(palette)
self.numero_atomico = QtWidgets.QLabel()
self.numero_atomico.setToolTip(
QtWidgets.QApplication.translate("pychemqt", "Atomic number"))
layoutInfo.addWidget(self.numero_atomico, 1, 1)
self.simbolo = QtWidgets.QLabel()
self.simbolo.setAlignment(alignment)
self.simbolo.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Symbol"))
self.simbolo.setFont(font11)
layoutInfo.addWidget(self.simbolo, 1, 3)
self.nombre = QtWidgets.QLabel()
self.nombre.setAlignment(QtCore.Qt.AlignCenter)
self.nombre.setFont(font_title)
layoutInfo.addWidget(self.nombre, 2, 1, 1, 3)
font8 = QtGui.QFont()
font8.setPointSize(8)
self.peso_atomico = QtWidgets.QLabel()
self.peso_atomico.setFont(font8)
self.peso_atomico.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Atomic mass, g/mol"))
layoutInfo.addWidget(self.peso_atomico, 3, 1)
self.densidad = QtWidgets.QLabel()
self.densidad.setFont(font8)
self.densidad.setAlignment(alignment)
self.densidad.setToolTip(QtWidgets.QApplication.translate(
"pychemqt",
"Density:\nBrown: Solid, kg/l\nBlue: Liquid, kg/l\n"
"Green: Gas, g/l"))
layoutInfo.addWidget(self.densidad, 3, 3)
self.Tf = QtWidgets.QLabel()
self.Tf.setFont(font8)
self.Tf.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Melting Point, K"))
layoutInfo.addWidget(self.Tf, 4, 1)
self.Heat_f = QtWidgets.QLabel()
self.Heat_f.setFont(font8)
self.Heat_f.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Heat of fusion, kJmol"))
self.Heat_f.setAlignment(alignment)
layoutInfo.addWidget(self.Heat_f, 4, 3)
self.Tb = QtWidgets.QLabel()
self.Tb.setFont(font8)
self.Tb.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Boiling Point, K"))
layoutInfo.addWidget(self.Tb, 5, 1)
self.Heat_b = QtWidgets.QLabel()
self.Heat_b.setFont(font8)
self.Heat_b.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Heat of vaporization, kJmol"))
self.Heat_b.setAlignment(alignment)
layoutInfo.addWidget(self.Heat_b, 5, 3)
self.configuracion = QtWidgets.QLabel()
self.configuracion.setFont(font7)
self.configuracion.setAlignment(QtCore.Qt.AlignCenter)
self.configuracion.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Electronic configuration"))
layoutInfo.addWidget(self.configuracion, 6, 1, 1, 3)
self.Info2 = QtWidgets.QFrame()
layout.addWidget(self.Info2, 0, 8, 3, 3)
layoutInfo2 = QtWidgets.QGridLayout(self.Info2)
layoutInfo2.setSpacing(1)
layoutInfo2.setContentsMargins(2, 0, 2, 0)
self.Info2.setFrameShape(QtWidgets.QFrame.StyledPanel)
self.Info2.setFrameShadow(QtWidgets.QFrame.Raised)
self.Info2.setAutoFillBackground(True)
self.Info2.setPalette(palette)
self.atomic_volume = QtWidgets.QLabel()
self.atomic_volume.setFont(font8)
self.atomic_volume.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Atomic volume")+", cm³/mol")
layoutInfo2.addWidget(self.atomic_volume, 1, 1)
self.atomic_radius = QtWidgets.QLabel()
self.atomic_radius.setFont(font8)
self.atomic_radius.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Atomic radius") + ", pm")
layoutInfo2.addWidget(self.atomic_radius, 2, 1)
self.covalent_radius = QtWidgets.QLabel()
self.covalent_radius.setFont(font8)
self.covalent_radius.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Covalent radius") + ", pm")
layoutInfo2.addWidget(self.covalent_radius, 3, 1)
self.vanderWaals_radius = QtWidgets.QLabel()
self.vanderWaals_radius.setFont(font8)
self.vanderWaals_radius.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Van der Waals radius")+", pm")
layoutInfo2.addWidget(self.vanderWaals_radius, 4, 1)
self.ionic_radii = QtWidgets.QLabel()
self.ionic_radii.setFont(font7)
self.ionic_radii.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Ionic radii")+", pm")
layoutInfo2.addWidget(self.ionic_radii, 5, 1, 1, 3)
self.electronegativity = QtWidgets.QLabel()
self.electronegativity.setFont(font8)
self.electronegativity.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Electronegativity, Pauling scale"))
self.electronegativity.setAlignment(
QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
layoutInfo2.addWidget(self.electronegativity, 1, 3)
self.Cp = QtWidgets.QLabel()
self.Cp.setFont(font8)
self.Cp.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Specific heat capacitiy") + ", kJ/kgK")
self.Cp.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
layoutInfo2.addWidget(self.Cp, 2, 3)
self.k = QtWidgets.QLabel()
self.k.setFont(font8)
self.k.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Thermal conductivity") + ", W/mK")
self.k.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
layoutInfo2.addWidget(self.k, 3, 3)
self.first_ionization = QtWidgets.QLabel()
self.first_ionization.setFont(font8)
self.first_ionization.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "First ionization energy") + ", kJ/mol")
self.first_ionization.setAlignment(
QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
layoutInfo2.addWidget(self.first_ionization, 4, 3)
self.oxidation = QtWidgets.QLabel()
self.oxidation.setFont(font8)
self.oxidation.setToolTip(QtWidgets.QApplication.translate(
"pychemqt", "Oxidation states"))
self.oxidation.setAlignment(
QtCore.Qt.AlignCenter | QtCore.Qt.AlignVCenter)
layoutInfo2.addWidget(self.oxidation, 6, 1, 1, 3)
elemento = Elemental(1)
self.actualizar(elemento)
logging.info(QtWidgets.QApplication.translate(
"pychemqt", "Starting periodic table tool"))
0
Example 156
Project: disco-dop Source File: runexp.py
def getgrammars(trees, sents, stages, testmaxwords, resultdir,
numproc, lexmodel, simplelexsmooth, top):
"""Read off the requested grammars."""
tbfanout, n = treetransforms.treebankfanout(trees)
logging.info('binarized treebank fan-out: %d #%d', tbfanout, n)
mappings = [None for _ in stages]
for n, stage in enumerate(stages):
traintrees = trees
stage.mapping = None
prevn = 0
if n and stage.prune:
prevn = [a.name for a in stages].index(stage.prune)
if stage.split:
traintrees = [treetransforms.binarize(
treetransforms.splitdiscnodes(
tree.copy(True),
stage.markorigin),
childchar=':', dot=True, ids=grammar.UniqueIDs())
for tree in traintrees]
logging.info('splitted discontinuous nodes')
if stage.collapse:
traintrees, mappings[n] = treebanktransforms.collapselabels(
[tree.copy(True) for tree in traintrees],
tbmapping=treebanktransforms.MAPPINGS[
stage.collapse[0]][stage.collapse[1]])
logging.info('collapsed phrase labels for multilevel '
'coarse-to-fine parsing to %s level %d',
*stage.collapse)
if n and mappings[prevn] is not None:
# Given original labels A, convert CTF mapping1 A => C,
# and mapping2 A => B to a mapping B => C.
mapping1, mapping2 = mappings[prevn], mappings[n]
if mappings[n] is None:
stage.mapping = {a: mapping1[a] for a in mapping1}
else:
stage.mapping = {mapping2[a]: mapping1[a] for a in mapping2}
if stage.mode.startswith('pcfg'):
if tbfanout != 1 and not stage.split:
raise ValueError('Cannot extract PCFG from treebank '
'with discontinuities.')
backtransform = extrarules = None
if lexmodel and simplelexsmooth:
extrarules = lexicon.simplesmoothlexicon(lexmodel)
if stage.mode == 'mc-rerank':
from . import _fragments
gram = parser.DictObj(_fragments.getctrees(zip(trees, sents)))
tree = gram.trees1.extract(0, gram.vocab)
gram.start = tree[:tree.index(' ')].lstrip('(')
with gzip.open('%s/%s.train.pickle.gz' % (resultdir, stage.name),
'wb') as out:
out.write(pickle.dumps(gram, protocol=-1))
elif stage.dop:
if stage.dop in ('doubledop', 'dop1'):
if stage.dop == 'doubledop':
(xgrammar, backtransform,
altweights, fragments) = grammar.doubledop(
traintrees, sents, binarized=stage.binarized,
iterate=stage.iterate, complement=stage.complement,
numproc=numproc, maxdepth=stage.maxdepth,
maxfrontier=stage.maxfrontier,
extrarules=extrarules)
elif stage.dop == 'dop1':
(xgrammar, backtransform,
altweights, fragments) = grammar.dop1(
traintrees, sents, binarized=stage.binarized,
maxdepth=stage.maxdepth,
maxfrontier=stage.maxfrontier,
extrarules=extrarules)
# dump fragments
with codecs.getwriter('utf8')(gzip.open('%s/%s.fragments.gz' %
(resultdir, stage.name), 'w')) as out:
out.writelines('%s\t%d\n' % (a, len(b))
for a, b in fragments)
elif stage.dop == 'reduction':
xgrammar, altweights = grammar.dopreduction(
traintrees, sents, packedgraph=stage.packedgraph,
extrarules=extrarules)
else:
raise ValueError('unrecognized DOP model: %r' % stage.dop)
nodes = sum(len(list(a.subtrees())) for a in traintrees)
if lexmodel and not simplelexsmooth: # FIXME: altweights?
xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
msg = grammar.grammarinfo(xgrammar)
rules, lex = grammar.writegrammar(
xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
resultdir, stage.name), 'wb')) as rulesfile:
rulesfile.write(rules)
with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
resultdir, stage.name), 'wb')) as lexiconfile:
lexiconfile.write(lex)
gram = Grammar(rules, lex, start=top,
binarized=stage.binarized)
for name in altweights:
gram.register('%s' % name, altweights[name])
logging.info('DOP model based on %d sentences, %d nodes, '
'%d nonterminals', len(traintrees), nodes, len(gram.toid))
logging.info(msg)
if stage.estimator != 'rfe':
gram.switch('%s' % stage.estimator)
logging.info(gram.testgrammar()[1])
if stage.dop in ('doubledop', 'dop1'):
# backtransform keys are line numbers to rules file;
# to see them together do:
# $ paste <(zcat dop.rules.gz) <(zcat dop.backtransform.gz)
with codecs.getwriter('utf8')(gzip.open(
'%s/%s.backtransform.gz' % (resultdir, stage.name),
'wb')) as out:
out.writelines('%s\n' % a for a in backtransform)
if n and stage.prune:
msg = gram.getmapping(stages[prevn].grammar,
striplabelre=None if stages[prevn].dop
else re.compile('@.+$'),
neverblockre=re.compile('.+}<'),
splitprune=stage.splitprune and stages[prevn].split,
markorigin=stages[prevn].markorigin,
mapping=stage.mapping)
else:
# recoverfragments() relies on this mapping to identify
# binarization nodes
msg = gram.getmapping(None,
striplabelre=None,
neverblockre=re.compile('.+}<'),
splitprune=False, markorigin=False,
mapping=stage.mapping)
logging.info(msg)
elif n and stage.prune: # dop reduction
msg = gram.getmapping(stages[prevn].grammar,
striplabelre=None if stages[prevn].dop
and stages[prevn].dop not in ('doubledop', 'dop1')
else re.compile('@[-0-9]+$'),
neverblockre=re.compile(stage.neverblockre)
if stage.neverblockre else None,
splitprune=stage.splitprune and stages[prevn].split,
markorigin=stages[prevn].markorigin,
mapping=stage.mapping)
if stage.mode == 'dop-rerank':
gram.getrulemapping(
stages[prevn].grammar, re.compile(r'@[-0-9]+\b'))
logging.info(msg)
# write prob models
np.savez_compressed('%s/%s.probs.npz' % (resultdir, stage.name),
**{name: mod for name, mod
in zip(gram.modelnames, gram.models)})
else: # not stage.dop
xgrammar = grammar.treebankgrammar(traintrees, sents,
extrarules=extrarules)
logging.info('induced %s based on %d sentences',
('PCFG' if tbfanout == 1 or stage.split else 'PLCFRS'),
len(traintrees))
if stage.split or os.path.exists('%s/pcdist.txt' % resultdir):
logging.info(grammar.grammarinfo(xgrammar))
else:
logging.info(grammar.grammarinfo(xgrammar,
dump='%s/pcdist.txt' % resultdir))
if lexmodel and not simplelexsmooth:
xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
rules, lex = grammar.writegrammar(
xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
resultdir, stage.name), 'wb')) as rulesfile:
rulesfile.write(rules)
with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
resultdir, stage.name), 'wb')) as lexiconfile:
lexiconfile.write(lex)
gram = Grammar(rules, lex, start=top)
logging.info(gram.testgrammar()[1])
if n and stage.prune:
msg = gram.getmapping(stages[prevn].grammar,
striplabelre=None,
neverblockre=re.compile(stage.neverblockre)
if stage.neverblockre else None,
splitprune=stage.splitprune and stages[prevn].split,
markorigin=stages[prevn].markorigin,
mapping=stage.mapping)
logging.info(msg)
logging.info('wrote grammar to %s/%s.{rules,lex%s}.gz',
resultdir, stage.name,
',backtransform' if stage.dop in ('doubledop', 'dop1') else '')
outside = None
if stage.estimates in ('SX', 'SXlrgaps'):
if stage.estimates == 'SX' and tbfanout != 1 and not stage.split:
raise ValueError('SX estimate requires PCFG.')
elif stage.mode != 'plcfrs':
raise ValueError('estimates require parser w/agenda.')
begin = time.clock()
logging.info('computing %s estimates', stage.estimates)
if stage.estimates == 'SX':
outside = estimates.getpcfgestimates(gram, testmaxwords,
gram.toid[trees[0].label])
elif stage.estimates == 'SXlrgaps':
outside = estimates.getestimates(gram, testmaxwords,
gram.toid[trees[0].label])
logging.info('estimates done. cpu time elapsed: %gs',
time.clock() - begin)
np.savez_compressed('%s/%s.outside.npz' % (
resultdir, stage.name), outside=outside)
logging.info('saved %s estimates', stage.estimates)
elif stage.estimates:
raise ValueError('unrecognized value; specify SX or SXlrgaps.')
stage.update(grammar=gram, backtransform=backtransform,
outside=outside)
if any(stage.mapping is not None for stage in stages):
with codecs.getwriter('utf8')(gzip.open('%s/mapping.json.gz' % (
resultdir), 'wb')) as mappingfile:
mappingfile.write(json.dumps([stage.mapping for stage in stages]))
0
Example 157
Project: Nuitka Source File: MainControl.py
def makeSourceDirectory(main_module):
""" Get the full list of modules imported, create code for all of them.
"""
# We deal with a lot of details here, but rather one by one, and split makes
# no sense, pylint: disable=R0912,R0914
assert main_module.isCompiledPythonModule()
# The global context used to generate code.
global_context = CodeGeneration.makeGlobalContext()
assert main_module in ModuleRegistry.getDoneModules()
# We might have chosen to include it as bytecode, and only compiled it for
# fun, and to find its imports. In this case, now we just can drop it. Or
# a module may shadow a frozen module, but be a different one, then we can
# drop the frozen one.
# TODO: This really should be done when the compiled module comes into
# existence.
for module in ModuleRegistry.getDoneUserModules():
if module.isCompiledPythonModule():
uncompiled_module = ModuleRegistry.getUncompiledModule(
module_name = module.getFullName(),
module_filename = module.getCompileTimeFilename()
)
if uncompiled_module is not None:
# We now need to decide which one to keep, compiled or uncompiled
# module. Some uncompiled modules may have been asked by the user
# or technically required. By default, frozen code if it exists
# is preferred, as it will be from standalone mode adding it.
if uncompiled_module.isUserProvided():
ModuleRegistry.removeDoneModule(module)
else:
ModuleRegistry.removeUncompiledModule(uncompiled_module)
# Lets check if the recurse-to modules are actually present, and warn the
# user if one of those was not found.
for any_case_module in Options.getShallFollowModules():
for module in ModuleRegistry.getDoneUserModules():
if module.getFullName() == any_case_module:
break
else:
warning(
"Didn't recurse to '%s', apparently not used." % \
any_case_module
)
# Prepare code generation, i.e. execute finalization for it.
for module in ModuleRegistry.getDoneModules():
if module.isCompiledPythonModule():
Finalization.prepareCodeGeneration(module)
# Pick filenames.
source_dir = getSourceDirectoryPath(main_module)
module_filenames = pickSourceFilenames(
source_dir = source_dir,
modules = ModuleRegistry.getDoneModules()
)
# First pass, generate code and use constants doing so, but prepare the
# final code generation only, because constants code will be added at the
# end only.
prepared_modules = {}
for module in ModuleRegistry.getDoneModules():
if module.isCompiledPythonModule():
cpp_filename = module_filenames[module]
prepared_modules[cpp_filename] = CodeGeneration.prepareModuleCode(
global_context = global_context,
module = module,
module_name = module.getFullName(),
)
# Main code constants need to be allocated already too.
if module is main_module and not Options.shallMakeModule():
prepared_modules[cpp_filename][1].getConstantCode(0)
# Second pass, generate the actual module code into the files.
for module in ModuleRegistry.getDoneModules():
if module.isCompiledPythonModule():
cpp_filename = module_filenames[module]
template_values, module_context = prepared_modules[cpp_filename]
source_code = CodeGeneration.generateModuleCode(
module_context = module_context,
template_values = template_values
)
writeSourceCode(
filename = cpp_filename,
source_code = source_code
)
if Options.isShowInclusion():
info("Included compiled module '%s'." % module.getFullName())
elif module.isPythonShlibModule():
target_filename = Utils.joinpath(
getStandaloneDirectoryPath(main_module),
*module.getFullName().split('.')
)
if Utils.getOS() == "Windows":
target_filename += ".pyd"
else:
target_filename += ".so"
target_dir = Utils.dirname(target_filename)
if not Utils.isDir(target_dir):
Utils.makePath(target_dir)
shutil.copy(
module.getFilename(),
target_filename
)
standalone_entry_points.append(
(target_filename, module.getPackage())
)
elif module.isUncompiledPythonModule():
pass
else:
assert False, module
writeSourceCode(
filename = Utils.joinpath(
source_dir,
"__constants.c"
),
source_code = ConstantCodes.getConstantsDefinitionCode(
context = global_context
)
)
helper_decl_code, helper_impl_code = CodeGeneration.generateHelpersCode(
ModuleRegistry.getDoneUserModules()
)
writeSourceCode(
filename = Utils.joinpath(source_dir, "__helpers.h"),
source_code = helper_decl_code
)
writeSourceCode(
filename = Utils.joinpath(source_dir, "__helpers.c"),
source_code = helper_impl_code
)
0
Example 158
Project: where-do-you-go Source File: geomodel.py
@staticmethod
def proximity_fetch(query, center, max_results=10, max_distance=0):
"""Performs a proximity/radius fetch on the given query.
Fetches at most <max_results> entities matching the given query,
ordered by ascending distance from the given center point, and optionally
limited by the given maximum distance.
This method uses a greedy algorithm that starts by searching high-resolution
geocells near the center point and gradually looking in lower and lower
resolution cells until max_results entities have been found matching the
given query and no closer possible entities can be found.
Args:
query: A db.Query on entities of this kind.
center: A geotypes.Point or db.GeoPt indicating the center point around
which to search for matching entities.
max_results: An int indicating the maximum number of desired results.
The default is 10, and the larger this number, the longer the fetch
will take.
max_distance: An optional number indicating the maximum distance to
search, in meters.
Returns:
The fetched entities, sorted in ascending order by distance to the search
center.
Raises:
Any exceptions that google.appengine.ext.db.Query.fetch() can raise.
"""
# TODO(romannurik): check for GqlQuery
results = []
searched_cells = set()
# The current search geocell containing the lat,lon.
cur_containing_geocell = geocell.compute(center)
# The currently-being-searched geocells.
# NOTES:
# * Start with max possible.
# * Must always be of the same resolution.
# * Must always form a rectangular region.
# * One of these must be equal to the cur_containing_geocell.
cur_geocells = [cur_containing_geocell]
closest_possible_next_result_dist = 0
# Assumes both a and b are lists of (entity, dist) tuples, *sorted by dist*.
# NOTE: This is an in-place merge, and there are guaranteed
# no duplicates in the resulting list.
def _merge_results_in_place(a, b):
util.merge_in_place(a, b,
cmp_fn=lambda x, y: cmp(x[1], y[1]),
dup_fn=lambda x, y: x[0].key() == y[0].key())
sorted_edges = [(0,0)]
sorted_edge_distances = [0]
while cur_geocells:
closest_possible_next_result_dist = sorted_edge_distances[0]
if max_distance and closest_possible_next_result_dist > max_distance:
break
cur_geocells_unique = list(set(cur_geocells).difference(searched_cells))
# Run query on the next set of geocells.
cur_resolution = len(cur_geocells[0])
temp_query = copy.deepcopy(query) # TODO(romannurik): is this safe?
temp_query.filter('location_geocells IN', cur_geocells_unique)
# Update results and sort.
new_results = temp_query.fetch(1000)
if DEBUG:
logging.info('fetch complete for %s' % (','.join(cur_geocells_unique),))
searched_cells.update(cur_geocells)
# Begin storing distance from the search result entity to the
# search center along with the search result itself, in a tuple.
new_results = [(entity, geomath.distance(center, entity.location))
for entity in new_results]
new_results = sorted(new_results, lambda dr1, dr2: cmp(dr1[1], dr2[1]))
new_results = new_results[:max_results]
# Merge new_results into results or the other way around, depending on
# which is larger.
if len(results) > len(new_results):
_merge_results_in_place(results, new_results)
else:
_merge_results_in_place(new_results, results)
results = new_results
results = results[:max_results]
sorted_edges, sorted_edge_distances = \
util.distance_sorted_edges(cur_geocells, center)
if len(results) == 0 or len(cur_geocells) == 4:
# Either no results (in which case we optimize by not looking at
# adjacents, go straight to the parent) or we've searched 4 adjacent
# geocells, in which case we should now search the parents of those
# geocells.
cur_containing_geocell = cur_containing_geocell[:-1]
cur_geocells = list(set([cell[:-1] for cell in cur_geocells]))
if not cur_geocells or not cur_geocells[0]:
break # Done with search, we've searched everywhere.
elif len(cur_geocells) == 1:
# Get adjacent in one direction.
# TODO(romannurik): Watch for +/- 90 degree latitude edge case geocells.
nearest_edge = sorted_edges[0]
cur_geocells.append(geocell.adjacent(cur_geocells[0], nearest_edge))
elif len(cur_geocells) == 2:
# Get adjacents in perpendicular direction.
nearest_edge = util.distance_sorted_edges([cur_containing_geocell],
center)[0][0]
if nearest_edge[0] == 0:
# Was vertical, perpendicular is horizontal.
perpendicular_nearest_edge = [x for x in sorted_edges if x[0] != 0][0]
else:
# Was horizontal, perpendicular is vertical.
perpendicular_nearest_edge = [x for x in sorted_edges if x[0] == 0][0]
cur_geocells.extend(
[geocell.adjacent(cell, perpendicular_nearest_edge)
for cell in cur_geocells])
# We don't have enough items yet, keep searching.
if len(results) < max_results:
if DEBUG:
logging.debug('have %d results but want %d results, '
'continuing search' % (len(results), max_results))
continue
if DEBUG:
logging.debug('have %d results' % (len(results),))
# If the currently max_results'th closest item is closer than any
# of the next test geocells, we're done searching.
current_farthest_returnable_result_dist = \
geomath.distance(center, results[max_results - 1][0].location)
if (closest_possible_next_result_dist >=
current_farthest_returnable_result_dist):
if DEBUG:
logging.debug('DONE next result at least %f away, '
'current farthest is %f dist' %
(closest_possible_next_result_dist,
current_farthest_returnable_result_dist))
break
if DEBUG:
logging.debug('next result at least %f away, '
'current farthest is %f dist' %
(closest_possible_next_result_dist,
current_farthest_returnable_result_dist))
if DEBUG:
logging.info('proximity query looked '
'in %d geocells' % len(searched_cells))
return [entity for (entity, dist) in results[:max_results]
if not max_distance or dist < max_distance]
0
Example 159
def run(self):
def recvFully(sock, byteslen):
buf = b''
while byteslen != 0:
t = sock.recv(byteslen)
if t == b'':
raise Exception('End connection in socksHandshake')
buf += t
byteslen -= len(t)
return buf
def tryOrd(b):
try:
return ord(b)
except TypeError:
return b
def recvSocksAddr(sock):
buf = sock.recv(1) # atyp
if buf == b'\x01':
hostname = recvFully(sock, 4)
buf += hostname
hostname = b'.'.join([str(tryOrd(b)).encode('iso-8859-1') for b in hostname])
elif buf == b'\x03':
hostnameLen = sock.recv(1)
buf += hostnameLen
hostname = recvFully(sock, ord(hostnameLen))
buf += hostname
elif buf == b'\x04':
hostname = recvFully(sock, 16)
buf += hostname
hostname = b':'.join([str(tryOrd(b)).encode('iso-8859-1') for b in hostname])
else:
raise Exception('Unknown atyp')
port = recvFully(sock, 2)
buf += port
port = tryOrd(port[0]) << 8 | tryOrd(port[1])
return buf, (hostname, port)
def localHandshake():
self.client.recv(1) # ver
nmethods = self.client.recv(1)
recvFully(self.client, ord(nmethods))
self.client.send(b'\x05\x00')
self.client.recv(1) # ver
cmd = self.client.recv(1)
if cmd != b'\x01':
raise Exception('Non connect cmd not implemented yet')
self.client.recv(1) # rsv
buf, addr = recvSocksAddr(self.client)
self.parent.connect(addr)
self.hostname = addr[0]
self.client.send(b'\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00')
def socksHandshake():
clientBuf = b''
clientBuf += self.client.recv(1) # ver
nmethods = self.client.recv(1)
clientBuf += nmethods
clientBuf += recvFully(self.client, ord(nmethods))
self.parent.send(clientBuf)
parentBuf = b''
parentBuf += self.parent.recv(1) # ver
method = self.parent.recv(1)
if method != b'\x00':
raise Exception('Non no-authentication socks protocol not implemented yet')
parentBuf += method
self.client.send(parentBuf)
clientBuf = b''
clientBuf += self.client.recv(1) # ver
cmd = self.client.recv(1)
if cmd != b'\x01':
raise Exception('Non connect cmd not implemented yet')
clientBuf += cmd
clientBuf += self.client.recv(1) # rsv
buf, addr = recvSocksAddr(self.client)
self.hostname = addr[0]
clientBuf += buf
self.parent.send(clientBuf)
parentBuf = b''
parentBuf += self.parent.recv(1) # ver
rep = self.parent.recv(1)
if rep != b'\x00':
logging.info('socksHandshake connect failed')
parentBuf += self.parent.recv(1) # rsv
buf, addr = recvSocksAddr(self.parent)
parentBuf += buf
self.client.send(parentBuf)
def connectHandshake():
def recvHeaderFully(sock):
endBytes = b'\r\n\r\n'
idx = 0
buf = b''
while True:
t = sock.recv(65536)
if t == b'':
raise Exception('End connection in connectHandshake')
buf += t
try:
idx = buf.index(endBytes, idx)
idx += len(endBytes)
return buf[ : idx], buf[idx : ]
except ValueError:
idx = len(buf) - len(endBytes) + 1
request, clientData = recvHeaderFully(self.client)
method, host, protocol = re.split(b'\\s+', request[ : request.index(b'\r\n')]) # Although not meets RFC, it's no matter. Because it's a local proxy.
if method.upper() != b'CONNECT':
raise Exception('Not a CONNECT(HTTPS) proxy.')
self.hostname, port = host.split(b':')
self.parent.send(request)
response, parentData = recvHeaderFully(self.parent)
self.client.send(response)
return clientData, parentData
def startClientParentPipe():
if clientData != b'':
self.parent.send(clientData)
pipe = Pipe()
pipe.setSockPair(self.client, self.parent)
pipe.start()
def startParentClientPipe():
if parentData != b'':
self.client.send(parentData)
self.setSockPair(self.parent, self.client)
self.pipeData()
def startPipe():
startClientParentPipe()
startParentClientPipe()
def sslCheckCertification(packet):
if tryOrd(packet[5]) != 0x0b:
return
certChainLen = (tryOrd(packet[9]) << 16) | (tryOrd(packet[10]) << 8) | tryOrd(packet[11])
certChain = packet[12 : 12 + certChainLen]
self.certStore.checkCert(self.hostname, certChain)
def sslGetPacket(sock, data):
missDataLen = 5 - len(data)
if missDataLen > 0:
data += recvFully(sock, missDataLen)
missDataLen = 5 + ((tryOrd(data[3]) << 8) | tryOrd(data[4])) - len(data)
if missDataLen > 0:
data += recvFully(sock, missDataLen)
packetLen = 5 + ((tryOrd(data[3]) << 8) | tryOrd(data[4]))
return data[ : packetLen], data[packetLen : ]
self.parent = socket.socket()
try:
if self.config['parentProxyType'] != ProxyType.NONE:
self.parent.connect((self.config['parentProxyHost'], self.config['parentProxyPort']))
if self.config['parentProxyType'] == ProxyType.NONE:
localHandshake()
clientData = b''
parentData = b''
elif self.config['parentProxyType'] == ProxyType.SOCKS:
socksHandshake()
clientData = b''
parentData = b''
elif self.config['parentProxyType'] == ProxyType.CONNECT:
clientData, parentData = connectHandshake()
else:
assert(False)
if clientData == b'':
clientData = self.client.recv(65536)
if tryOrd(clientData[0]) != 0x16: # Not SSL Handshake
startPipe()
return
startClientParentPipe()
while True:
packet, parentData = sslGetPacket(self.parent, parentData)
if tryOrd(packet[0]) == 0x17: # Start SSL Application Data
self.client.send(packet)
break
sslCheckCertification(packet)
self.client.send(packet)
startParentClientPipe()
except Exception:
logging.exception('Exception in Tunnel.run:')
finally:
self.client.close()
self.parent.close()
0
Example 160
Project: FaST-LMM Source File: heritability_spatial_correction.py
def heritability_spatial_correction(G_kernel, spatial_coor, spatial_iid, alpha_list, alpha_power, pheno,
map_function = map, cache_folder=None,
jackknife_count=500, permute_plus_count=10000, permute_times_count=10000, seed=0,
just_testing=False, always_remote=False, allow_gxe2 = True
):
"""
Function measuring heritability with correction for spatial location.
:param G_kernel: A kernel that tells the genetic similarity between all pairs of individuals. The kernel can be given
explicitly, for example with a :class:`.KernelData`. The kernel can also be given implicitly by providing a set of
SNPs or the name of a BED file.
:type G_kernel: a :class:`.KernelReader`, :class:`.SnpReader` or a string
:param spatial_coor: The position of each individual given by two coordinates. Any units are allowed, but the two values
must be compatible so that distance can be determined via Pythagoras' theorem. (So, longitude and latitude should
not be used unless the locations are near the Equator.)
:type spatial_coor: a iid_count x 2 array
:param spatial_iid: A ndarray of the iids. Each iid is a ndarray of two strings (a family ID and a case ID) that identifies an individual.
:type spatial_iid: array of strings with shape [iid_count,2]
:param alpha_list: a list of numbers to search to find the best alpha, which is the similarity scale. The similarity of two individuals
is here defined as exp(-(distance_between/alpha)**alpha_power). If the closest individuals are 100 units apart and the farthest
individuals are 4e6 units apart, a reasonable alpha_list might be: [int(v) for v in np.logspace(np.log10(100),np.log10(1e10), 100)]
The function's reports on the alphas chosen. If an extreme alpha is picked, change alpha_list to cover more range.
:type alpha_list: list of numbers
:param alpha_power: 2 (a good choice) means that similarity goes with area. 1 means with distance.
:type alpha_list: number
:param pheno: The target values(s) to predict. It can be a file name readable via :class:`SnpReader.Pheno` or any :class:`.SnpReader`.
:type pheno: a :class:`.SnpReader` or string
:param cache_folder: (default 'None') The name of a directory in which to save intermediate results. If 'None', then no intermediate results are saved.
:type cache_folder: a string
:param map_function: (default 'map') A function with the same inputs and functionality as Python's 'map' function.
Can be used to run 'heritability_spatial_correction' on a cluster.
:type map_function: a function
:param jackknife_count: (default 500) The number of jackknife groups to use when calculating standard errors (SE). Changing to a small number, 2,
speeds up calculation at the cost of unusable SEs.
:type jackknife_count: number
:param permute_plus_count: (default 10000) The number of permutations used when calculating P values. Changing to a small number, 1,
speeds up calculation at the cost of unusable P values.
:type permute_plus_count: number
:param permute_times_count: (default 10000) The number of permutations used when calculating P values. Changing to a small number, 1,
speeds up calculation at the cost of unusable P values.
:type permute_times_count: number
:param seed: (default 0) The random seed used by jackknifing and permutation.
:type seed: number
:param just_testing: (default False) If true, skips actual LMM-related search and calculation.
:type just_testing: bool
:rtype: Pandas dataframe with one row per phenotyper. Columns include "h2uncorr", "h2corr", etc.
"""
######################
# Prepare the inputs
######################
from fastlmm.inference.fastlmm_predictor import _kernel_fixup, _pheno_fixup
G_kernel = _kernel_fixup(G_kernel, iid_if_none=None, standardizer=Unit()) # Create a kernel from an in-memory kernel, some snps, or a text file.
pheno = _pheno_fixup(pheno,iid_if_none=G_kernel.iid, missing='NA') # Create phenotype data from in-memory data or a text file.
if cache_folder is not None:
pstutil.create_directory_if_necessary(cache_folder,isfile=False)
jackknife_seed = seed or 1954692566L
permute_plus_seed = seed or 2372373100L
permute_times_seed = seed or 2574440128L
######################
# Find 'alpha', the scale for distance
######################
# create the alpha table (unless it is already there)
alpha_table_fn = "{0}/alpha_table.{1}.txt".format(cache_folder,pheno.sid_count) # create a name for the alpha_table cache file
if cache_folder is not None and os.path.exists(alpha_table_fn):
alpha_table = pd.read_csv(alpha_table_fn, delimiter = '\t',index_col=False, comment=None)
else:
# create the list of arguments to run
arg_list = []
for phen_target in pheno.sid:
pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
for alpha in alpha_list:
#pheno, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (jackknife_index, jackknife_count, jackknife_seed),
arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (-1, 0, None),
# (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
(-1, 0, None), (-1, 0, None), just_testing, False, True and allow_gxe2, None)
arg_list.append(arg_tuple)
# Run "run_line" on each set of arguments and save to file
return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
return_list = [line for line in return_list if line is not None] #Remove 'None' results
alpha_table = pd.DataFrame(return_list)
if cache_folder is not None:
_write_csv(alpha_table,False,alpha_table_fn)
# read the alpha table and find the best values
grouped = alpha_table.groupby("phen")
alpha_dict = {}
for phen, phen_table in grouped:
best_index_corr = phen_table['nLLcorr'].idxmin() # with Pandas, this returns the index in the parent table, not the group table
best_index_gxe2 = phen_table['nLL_gxe2'].idxmin() if allow_gxe2 else 0
alpha_corr = alpha_table.iloc[best_index_corr]['alpha']
alpha_gxe2 = alpha_table.iloc[best_index_gxe2]['alpha']
alpha_dict[phen] = alpha_corr, alpha_gxe2
logging.info(alpha_dict)
######################
# Use jackknifing to compute h2uncorr, SE, h2corr, SE, e2, SE, gxe2, SE
######################
jackknife_count_actual = min(jackknife_count,G_kernel.iid_count)
# Set up the run and do it (unless it has already been run)
jackknife_table_fn = "{0}/jackknife.{1}.count{2}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual)
if cache_folder is not None and os.path.exists(jackknife_table_fn):
jackknife_table = pd.read_csv(jackknife_table_fn, delimiter = '\t',index_col=False, comment=None)
else:
arg_list = []
for phen_target in pheno.sid:
pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
alpha_corr, alpha_gxe2 = alpha_dict[phen_target]
alpha_set = set([alpha_corr, alpha_gxe2]) #If these are the same, then only need to do half the work
for alpha in alpha_set:
logging.debug(alpha)
do_uncorr = (alpha == alpha_corr)
do_gxe2 = (alpha == alpha_gxe2) and allow_gxe2
for jackknife in range(-1, jackknife_count_actual):
# pheno, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (jackknife_index, jackknife_count, jackknife_seed),
arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (jackknife, jackknife_count_actual, jackknife_seed),
# (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
(-1,0,None), (-1,0,None), just_testing, do_uncorr, do_gxe2, None)
arg_list.append(arg_tuple)
# Run "run_line" on each set of arguments and save to file
return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
return_list = [line for line in return_list if line is not None] #Remove 'None' results
jackknife_table = pd.DataFrame(return_list)
if cache_folder is not None:
_write_csv(jackknife_table, False, jackknife_table_fn)
# get the real (that is, unjackknifed) values
jackknife_table["diff"] = jackknife_table.h2uncorr-jackknife_table.h2corr # Compute the diff = h2uncorr-h2corr column
results_both = jackknife_table[jackknife_table.jackknife_index==-1] # Create a table of the real (non-jackknifed) results for both alphas (which may be the same)
del results_both["jackknife_index"]
results_corr = results_both[results_both.alpha == [alpha_dict[phen][0] for phen in results_both.phen]] #Create version for g+e's alpha
results_gxe2 = results_both[results_both.alpha == [alpha_dict[phen][1] for phen in results_both.phen]] #Create version for gxe's alpha
#remove unwanted columns
for delcol in ["a2_gxe2","gxe2","nLL_gxe2","permute_plus_count","permute_plus_index","permute_plus_seed","permute_times_count","permute_times_index","permute_times_seed","jackknife_count","jackknife_seed"]:
del results_corr[delcol]
for delcol in ["a2","e2","h2corr","h2uncorr","nLLcorr","nLLuncorr","diff","permute_plus_count","permute_plus_index","permute_plus_seed","permute_times_count","permute_times_index","permute_times_seed","jackknife_count","jackknife_seed"]:
del results_gxe2[delcol]
#Use a pivottable to compute the jackknifed SE's
corr_rows = np.logical_and(jackknife_table.jackknife_index!=-1,jackknife_table.alpha==[alpha_dict[phen][0] for phen in jackknife_table.phen])
jk_table_corr = pd.pivot_table(jackknife_table[corr_rows], values=['h2uncorr','h2corr','diff','e2'], index=['phen'], columns=[], aggfunc=np.std)
jk_table_corr["h2uncorr SE"] = jk_table_corr["h2uncorr"] * np.sqrt(jackknife_count_actual-1)
jk_table_corr["h2corr SE"] = jk_table_corr["h2corr"] * np.sqrt(jackknife_count_actual-1)
jk_table_corr["diff SE"] = jk_table_corr["diff"] * np.sqrt(jackknife_count_actual-1)
jk_table_corr["e2 SE"] = jk_table_corr["e2"] * np.sqrt(jackknife_count_actual-1)
del jk_table_corr["h2uncorr"]
del jk_table_corr["h2corr"]
del jk_table_corr["diff"]
del jk_table_corr["e2"]
gxe2_rows = np.logical_and(jackknife_table.jackknife_index!=-1,jackknife_table.alpha==[alpha_dict[phen][1] for phen in jackknife_table.phen])
jk_table_gxe2 = pd.pivot_table(jackknife_table[gxe2_rows], values=['gxe2'], index=['phen'], columns=[], aggfunc=np.std)
jk_table_gxe2["gxe2 SE"] = jk_table_gxe2["gxe2"] * np.sqrt(jackknife_count_actual-1)
del jk_table_gxe2["gxe2"]
#Join the SE's to the main results table
results_corr = results_corr.join(jk_table_corr, on='phen')
results_gxe2 = results_gxe2.join(jk_table_gxe2, on='phen')
#compute pValue columns
results_corr["P (diff=0)"] = stats.t.sf(results_corr["diff"]/results_corr["diff SE"],df=jackknife_count_actual-1)*2 #two sided
results_corr["from SE, one-sided, P (e2=0)"] = stats.t.sf(results_corr["e2"]/results_corr["e2 SE"],df=jackknife_count_actual-1)
results_gxe2["from SE, one-sided, P (gxe2=0)"] = stats.t.sf(results_gxe2["gxe2"]/results_gxe2["gxe2 SE"],df=jackknife_count_actual-1) #one sided
if cache_folder is not None:
_write_csv(results_corr, False, "{0}/jackknife_corr_summary.{1}.jackknife{2}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual))
_write_csv(results_gxe2, False, "{0}/jackknife_gxe2_summary.{1}.jackknife{2}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual))
######################
# compute p(e2=0) via permutation
######################
permplus_table_fn = "{0}/permutation.GPlusE.{1}.count{2}.txt".format(cache_folder, pheno.sid_count, permute_plus_count)
if cache_folder is not None and os.path.exists(permplus_table_fn):
permplus_table = pd.read_csv(permplus_table_fn, delimiter = '\t',index_col=False, comment=None)
else:
arg_list = []
for phen_target in pheno.sid:
pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
alpha_corr, alpha_gxe2 = alpha_dict[phen_target]
for jackknife_index in range(-1,permute_plus_count):
# pheno, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (jackknife_index, jackknife_count, jackknife_seed),
arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha_corr, alpha_power, (-1,0,None),
# (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
(jackknife_index, permute_plus_count,permute_plus_seed), (-1,0,None), just_testing, False, False, None)
arg_list.append(arg_tuple)
# Run "run_line" on each set of arguments and save to file
return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
return_list = [line for line in return_list if line is not None] #Remove 'None' results
permplus_table = pd.DataFrame(return_list)
if cache_folder is not None:
_write_csv(permplus_table, False, permplus_table_fn)
#Create a table of the real nLL for each pheno
real_result_permplus = permplus_table[permplus_table.permute_plus_index==-1][['phen','nLLcorr']]
real_result_permplus.rename(columns={'nLLcorr':'nLLcorr_real'},inplace=True)
real_result_permplus.set_index(['phen'],inplace=True)
# Create a table of the permutation runs and add the real nLL to each row
perm_table = permplus_table[permplus_table.permute_plus_index!=-1]
result = perm_table.join(real_result_permplus, on='phen')
result['P(e2)'] = [1.0 if b else 0.0 for b in result.nLLcorr <= result.nLLcorr_real] # create a column showing where the perm is better (or as good) as the real
# Use pivottable to find the fraction of of times when permutation is better
pivot_table_plus = pd.pivot_table(result, values=['P(e2)'], index=['phen'], columns=[], aggfunc=np.mean)
if cache_folder is not None:
summary_permplus_table_fn = "{0}/summary.permutation.GPlusE.{1}.count{2}.txt".format(cache_folder, pheno.sid_count, permute_plus_count)
_write_csv(pivot_table_plus, True, summary_permplus_table_fn)
################################################
# compute p(gxe2=0) via permutation
################################################
#Only process phenos for which gxe2 is not 0
nonzero = set(results_gxe2[results_gxe2.gxe2 !=0].phen)
permtimes_phenotypes = set(pheno.sid) & nonzero #intersection
permtimes_table_list = []
for phen_target in permtimes_phenotypes:
permtimes_table_fn = "{0}/permutation.GxE/{1}.count{2}.txt".format(cache_folder, phen_target, permute_times_count)
if cache_folder is not None and os.path.exists(permtimes_table_fn):
permtime_results = pd.read_csv(permtimes_table_fn, delimiter = '\t',index_col=False, comment=None)
else:
arg_list = []
pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
alpha_corr, alpha_gxe2 = alpha_dict[phen_target]
a2 = float(permplus_table[permplus_table.phen==phen_target][permplus_table.permute_plus_index == -1]['a2'])
for permute_index in range(-1,permute_times_count):
# pheno, G_kernel, spatial_coor, spatial_iid, alpha, alpha_powerm (permute_index, permute_count, permute_seed),
arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha_gxe2, alpha_power, (-1,0,None),
# (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
(-1,0,None), (permute_index, permute_times_count,permute_times_seed), just_testing, False, allow_gxe2, a2)
arg_list.append(arg_tuple)
# Run "run_line" on each set of arguments and save to file
return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
return_list = [line for line in return_list if line is not None] #Remove 'None' results
permtime_results = pd.DataFrame(return_list)
if cache_folder is not None:
pstutil.create_directory_if_necessary(permtimes_table_fn)
_write_csv(permtime_results,False,permtimes_table_fn)
permtimes_table_list.append(permtime_results)
if permtimes_table_list: #not empty
permtimes_table = pd.concat(permtimes_table_list)
logging.info(permtimes_table.head())
#Create a table of the real nLL for each pheno
real_result_permtimes = permtimes_table[permtimes_table.permute_times_index==-1][['phen','nLL_gxe2']]
real_result_permtimes.rename(columns={'nLL_gxe2':'nLL_gxe2_real'},inplace=True)
real_result_permtimes.set_index(['phen'],inplace=True)
# Create a table of the permutation runs and add the real nLL to reach row
summary_permtimes_table_fn = "{0}/summary.permutation.GxE.{1}.count{2}.txt".format(cache_folder,len(permtimes_phenotypes), permute_times_count)
perm_table = permtimes_table[permtimes_table.permute_times_index!=-1]
resultx = perm_table.join(real_result_permtimes, on='phen')
resultx['P(gxe2)'] = [1.0 if b else 0.0 for b in resultx.nLL_gxe2 <= resultx.nLL_gxe2_real] # create a column showing where the perm is better (or as good) as the real
# Use pivottable to find the fraction of of times when permutation is better
pivot_table_times = pd.pivot_table(resultx, values=['P(gxe2)'], index=['phen'], columns=[], aggfunc=np.mean)
if cache_folder is not None:
_write_csv(pivot_table_times,True,summary_permtimes_table_fn)
#######################
# Create final table of results by combining the summary tables
#######################
#Rename some columns
results_corr.rename(columns={"h2uncorr SE":"SE (h2uncorr)","h2corr SE":"SE (h2corr)","e2 SE":"SE (e2)"}, inplace=True)
#Rename some columns and join results
results_gxe2.rename(columns={"alpha":"alpha_gxe2","gxe2 SE":"SE (gxe2)","h2corr_raw":"h2corr_raw_gxe2"}, inplace=True)
del results_gxe2['alpha_power']
results_gxe2.set_index(["phen"],inplace=True)
final0 = results_corr.join(results_gxe2, on='phen')
#Rename some columns and join results
pivot_table_plus.rename(columns={"P(e2)":"P(e2=0)"}, inplace=True)
final1 = final0.join(pivot_table_plus, on='phen')
#Rename some columns and join results
if permtimes_table_list: #not empty
pivot_table_times.rename(columns={"P(gxe2)":"P(gxe2=0)"}, inplace=True)
final2 = final1.join(pivot_table_times, on='phen')
else:
final2 = final1.copy()
final2["P(gxe2=0)"] = np.nan
#Rename 'phen' and select final columns
final2.rename(columns={"phen":"phenotype"}, inplace=True)
final3 = final2[["phenotype","h2uncorr","SE (h2uncorr)","h2corr","SE (h2corr)","P (diff=0)","e2","SE (e2)","P(e2=0)","alpha","alpha_gxe2","gxe2","SE (gxe2)","P(gxe2=0)"]].copy()
#Rename sort the phenotypes
final3['lower'] = [pheno_one.lower() for pheno_one in final3.phenotype]
final3.sort(['lower'],inplace=True)
del final3['lower']
if cache_folder is not None:
summary_final_table_fn = "{0}/summary.final.{1}.{2}.{3}.{4}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual,permute_plus_count,permute_times_count)
_write_csv(final3,False,summary_final_table_fn)
return final3
0
Example 161
Project: FaST-LMM Source File: feature_selection_two_kernel.py
def run_select(self, G0, G_bg, y, cov=None):
"""set up two kernel feature selection
Parameters
----------
G0 : numpy array of shape (num_ind, num_snps)
Data matrix from which foreground snps will be selected
G0_bg : numpy array of shape (num_ind, num_snps)
Data matrix containing background snps on which will be conditioned
y : numpy vector of shape (num_ind, )
Vector of phenotypes
cov : numpy array of shape (num_ind, num_covariates) or None
Covariates to be used as fixed effects
Returns
-------
best_k, feat_idx, best_mix, best_delta: tuple(int, np.array(int), float, float)
best_k is the best number of SNPs selected,
feat_idx is a np.array of integers denoting the indices of these snps,
best_mix is the best mixing coefficient between foreground and background kernel,
best_delta is the best regularization coefficient
"""
num_ind = len(y)
if cov is None:
cov = np.ones((num_ind,1))
else:
logging.info("normalizing covariates")
cov = cov.copy()
cov = 1./np.sqrt((cov**2).sum() / float(cov.shape[0])) * cov
cov.flags.writeable = False
# normalize to diag(K) = N
norm_factor = 1./np.sqrt((G_bg**2).sum() / float(G_bg.shape[0]))
# we copy in case G and G_bg are pointing to the same object
G_bg = norm_factor * G_bg
K_bg_full = G_bg.dot(G_bg.T)
K_bg_full.flags.writeable = False
# some asserts
np.testing.assert_almost_equal(sum(np.diag(K_bg_full)), G_bg.shape[0])
if self.debug:
norm_factor_check = 1./np.sqrt(G_bg.shape[1])
np.testing.assert_array_almost_equal(norm_factor, norm_factor_check, decimal=1)
for kfold_idx, (train_idx, test_idx) in enumerate(KFold(num_ind, n_folds=self.n_folds, random_state=self.random_state, shuffle=True)):
t0 = time.time()
logging.info("running fold: %i" % kfold_idx)
y_train = y.take(train_idx, axis=0)
y_test = y.take(test_idx, axis=0)
G0_train = G0.take(train_idx, axis=0)
G0_test = G0.take(test_idx, axis=0)
G_bg_train = G_bg.take(train_idx, axis=0)
G_bg_test = G_bg.take(test_idx, axis=0)
cov_train = cov.take(train_idx, axis=0)
cov_test = cov.take(test_idx, axis=0)
# write protect data
y_train.flags.writeable = False
y_test.flags.writeable = False
G0_train.flags.writeable = False
G0_test.flags.writeable = False
G_bg_train.flags.writeable = False
G_bg_test.flags.writeable = False
cov_train.flags.writeable = False
cov_test.flags.writeable = False
# precompute background kernel
K_bg_train = K_bg_full.take(train_idx, axis=0).take(train_idx, axis=1)
K_bg_train.flags.writeable = False
if self.measure != "mse":
K_bg_test = K_bg_full.take(test_idx, axis=0).take(test_idx, axis=1)
K_bg_test.flags.writeable = False
# rank features
if self.order_by_lmm:
logging.info("using linear mixed model to rank features")
t0 = time.time()
gwas = FastGwas(G_bg_train, G0_train, y_train, delta=None, train_pcs=None, mixing=0.0, cov=cov_train)
gwas.run_gwas()
_pval = gwas.p_values
logging.info("time taken: %s" % (str(time.time()-t0)))
else:
logging.info("using linear regression to rank features")
_F,_pval = lin_reg.f_regression_block(lin_reg.f_regression_cov_alt, G0_train, y_train, blocksize=10000, C=cov_train)
feat_idx = np.argsort(_pval)
for k_idx, max_k in enumerate(self.grid_k):
feat_idx_subset = feat_idx[0:max_k]
G_fs_train = G0_train.take(feat_idx_subset, axis=1)
G_fs_test = G0_test.take(feat_idx_subset, axis=1)
# normalize to sum(diag)=N
norm_factor = 1./np.sqrt((G_fs_train**2).sum() / float(G_fs_train.shape[0]))
G_fs_train *= norm_factor
G_fs_test *= norm_factor
G_fs_train.flags.writeable = False
G_fs_test.flags.writeable = False
# asserts
if self.debug:
norm_factor_check = 1.0 / np.sqrt(max_k)
np.testing.assert_array_almost_equal(norm_factor, norm_factor_check, decimal=1)
np.testing.assert_almost_equal(sum(np.diag(G_fs_train.dot(G_fs_train.T))), G_fs_train.shape[0])
logging.info("k: %i" % (max_k))
# use LMM
from fastlmm.inference.lmm_cov import LMM as fastLMM
if G_bg_train.shape[1] <= G_bg_train.shape[0]:
lmm = fastLMM(X=cov_train, Y=y_train[:,np.newaxis], G=G_bg_train)
else:
lmm = fastLMM(X=cov_train, Y=y_train[:,np.newaxis], K=K_bg_train)
W = G_fs_train.copy()
UGup,UUGup = lmm.rotate(W)
i_up = np.zeros((G_fs_train.shape[1]), dtype=np.bool)
i_G1 = np.ones((G_fs_train.shape[1]), dtype=np.bool)
t0 = time.time()
res = lmm.findH2_2K(nGridH2=10, minH2=0.0, maxH2=0.99999, i_up=i_up, i_G1=i_G1, UW=UGup, UUW=UUGup)
logging.info("time taken for k=%i: %s" % (max_k, str(time.time()-t0)))
# recover a2 from alternate parameterization
a2 = res["h2_1"] / float(res["h2"] + res["h2_1"])
h2 = res["h2"] + res["h2_1"]
delta = (1-h2) / h2
#res_cov = res
# do final prediction using lmm.py
from fastlmm.inference import LMM
lmm = LMM(forcefullrank=False)
lmm.setG(G0=G_bg_train, G1=G_fs_train, a2=a2)
lmm.setX(cov_train)
lmm.sety(y_train)
# we take an additional step to estimate betas on covariates (not given from new model)
res = lmm.nLLeval(delta=delta, REML=True)
# predict on test set
lmm.setTestData(Xstar=cov_test, G0star=G_bg_test, G1star=G_fs_test)
out = lmm.predictMean(beta=res["beta"], delta=delta)
mse = mean_squared_error(y_test, out)
logging.info("mse: %f" % (mse))
self.mse[kfold_idx, k_idx] = mse
self.mixes[kfold_idx, k_idx] = a2
self.deltas[kfold_idx, k_idx] = delta
if self.measure != "mse":
K_test_test = a2 * G_fs_test.dot(G_fs_test.T) + (1.0-a2) * K_bg_test
ll = lmm.nLLeval_test(y_test, res["beta"], sigma2=res["sigma2"], delta=delta, Kstar_star=K_test_test, robust=True)
if self.debug:
ll2 = lmm.nLLeval_test(y_test, res["beta"], sigma2=res["sigma2"], delta=delta, Kstar_star=None, robust=True)
np.testing.assert_almost_equal(ll, ll2, decimal=4)
logging.info("ll: %f" % (ll))
self.ll[kfold_idx, k_idx] = ll
logging.info("time taken for fold: %s" % str(time.time()-t0))
best_k, best_mix, best_delta = self.select_best_k()
logging.info("best_k: %i, best_mix: %f, best_delta: %f" % (best_k, best_mix, best_delta))
# final scan
if self.order_by_lmm:
logging.info("final scan using LMM")
gwas = FastGwas(G_bg, G0, y, delta=None, train_pcs=None, mixing=0.0, cov=cov)
gwas.run_gwas()
_pval = gwas.p_values
feat_idx = np.argsort(_pval)[0:best_k]
else:
logging.info("final scan using LR")
_F,_pval = lin_reg.f_regression_block(lin_reg.f_regression_cov_alt, G0, y, C=cov, blocksize=10000)
logging.info("number of snps selected: %i" % (best_k))
return best_k, feat_idx, best_mix, best_delta
0
Example 162
Project: FaST-LMM Source File: test_fastlmm_predictor.py
def test_lmm(self):
do_plot = False
iid_count = 500
seed = 0
import pylab
logging.info("TestLmmTrain test_lmm")
iid = [["cid{0}P{1}".format(iid_index,iid_index//250)]*2 for iid_index in xrange(iid_count)]
train_idx = np.r_[10:iid_count] # iids 10 and on
test_idx = np.r_[0:10] # the first 10 iids
#Every person is 100% related to everyone in one of 5 families
K0a = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
for iid_index0 in xrange(iid_count):
for iid_index1 in xrange(iid_count):
K0a.val[iid_index0,iid_index1] = 1 if iid_index0 % 5 == iid_index1 % 5 else 0
if iid_index1 < iid_index0:
assert K0a.val[iid_index0,iid_index1] == K0a.val[iid_index1,iid_index0]
#every person lives on a line from 0 to 1
# They are related to every other person as a function of distance on the line
np.random.seed(seed)
home = np.random.random([iid_count])
K0b = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
for iid_index in xrange(iid_count):
K0b.val[iid_index,:] = 1 - np.abs(home-home[iid_index])**.1
#make covar just numbers 0,1,...
covar = SnpData(iid=iid,sid=["x"],val=np.array([[float(num)] for num in xrange(iid_count)]))
covariate_train = covar[train_idx,:].read()
covariate_test = covar[test_idx,:].read()
for name, h2, K0 in [("clones", 1, K0a),("line_world",.75,K0b)]:
sigma2x = 100
varg = sigma2x * h2
vare = sigma2x * (1-h2)
#######################################################################
#make pheno # pheno = 2*covar+100+normal(0,1)*2.5+normal(0,K)*7.5
#######################################################################
#random.multivariate_normal is sensitive to mkl_num_thread, so we control it.
if 'MKL_NUM_THREADS' in os.environ:
mkl_num_thread = os.environ['MKL_NUM_THREADS']
else:
mkl_num_thread = None
os.environ['MKL_NUM_THREADS'] = '1'
np.random.seed(seed)
p1 = covar.val * 2.0 + 100
p2 = np.random.normal(size=covar.val.shape)*np.sqrt(vare)
p3 = (np.random.multivariate_normal(np.zeros(iid_count),K0.val)*np.sqrt(varg)).reshape(-1,1)
if mkl_num_thread is not None:
os.environ['MKL_NUM_THREADS'] = mkl_num_thread
else:
del os.environ['MKL_NUM_THREADS']
pheno = SnpData(iid=iid,sid=["pheno0"],val= p1 + p2 + p3)
pheno_train = pheno[train_idx,:].read()
pheno_test = pheno[test_idx,:].read()
if do_plot:
#Plot training x and y, testing x and y
pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
pylab.suptitle(name + ": Plot training x and y, testing x and y")
pylab.show()
Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
bs=lsqSol[0] #weights
r2=lsqSol[1] #squared residuals
D=lsqSol[2] #rank of design matrix
N=pheno_train.iid_count
REML = False
if not REML:
sigma2 = float(r2/N)
nLL = N*0.5*np.log(2*np.pi*sigma2) + N*0.5
else:
sigma2 = float(r2 / (N-D))
nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term
predicted = Xtest.dot(bs)
yerr = [np.sqrt(sigma2)] * len(predicted)
if do_plot:
pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
pylab.xlim([-1, 10])
pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
pylab.suptitle(name + ": real linear regression: actual to prediction")
pylab.show()
for factor in [1,100,.02]:
K0 = K0.read()
K0.val *= factor
K0_train = K0[train_idx]
K0_whole_test = K0[:,test_idx]
#Learn model, save, load
fastlmmx = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
v2 = np.var(p2)
v3 = np.var(p3)
logging.debug("Original h2 of {0}. Generated h2 of {1}. Learned h2 of {2}".format(h2, v3/(v2+v3), fastlmmx.h2raw))
filename = self.tempout_dir + "/model_lmm.flm.p"
pstutil.create_directory_if_necessary(filename)
joblib.dump(fastlmmx, filename)
fastlmm = joblib.load(filename)
do_test_on_train = True
if do_test_on_train:
#Predict with model (test on train)
predicted_pheno, covar_pheno = fastlmm.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
output_file = self.file_name("lmma_"+name)
Dat.write(output_file,predicted_pheno)
covar2 = SnpData(iid=covar_pheno.row,sid=covar_pheno.col[:,1],val=covar_pheno.val) #kludge to write kernel to text format
output_file = self.file_name("lmma.cov_"+name)
Dat.write(output_file,covar2)
yerr = np.sqrt(np.diag(covar_pheno.val))
predicted = predicted_pheno.val
if do_plot:
pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
pylab.xlim([0, 50])
pylab.ylim([100, 200])
pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
pylab.show()
self.compare_files(predicted_pheno,"lmma_"+name)
self.compare_files(covar2,"lmma.cov_"+name)
predicted_pheno0, covar_pheno0 = fastlmm.predict(K0_whole_test=K0_train[:,0], X=covariate_train[0,:]) #test on train #0
assert np.abs(predicted_pheno0.val[0,0] - predicted_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
assert np.abs(covar_pheno0.val[0,0] - covar_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
#Predict with model (test on test)
predicted_phenoB, covar_phenoB = fastlmm.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on test
output_file = self.file_name("lmmb_"+name)
Dat.write(output_file,predicted_phenoB)
covar2 = SnpData(iid=covar_phenoB.row,sid=covar_phenoB.col[:,1],val=covar_phenoB.val) #kludge to write kernel to text format
output_file = self.file_name("lmmb.cov_"+name)
Dat.write(output_file,covar2)
yerr = np.sqrt(np.diag(covar_phenoB.val))
predicted = predicted_phenoB.val
if do_plot:
pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
pylab.xlim([-1, 10])
pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
pylab.show()
self.compare_files(predicted_phenoB,"lmmb_"+name)
self.compare_files(covar2,"lmmb.cov_"+name)
predicted_phenoB0, covar_phenoB0 = fastlmm.predict(K0_whole_test=K0_whole_test[:,0], X=covariate_test[0,:]) #test on a single test case
assert np.abs(predicted_phenoB0.val[0,0] - predicted_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
assert np.abs(covar_phenoB0.val[0,0] - covar_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
#Predict with model test on some train and some test
some_idx = range(covar.iid_count)
some_idx.remove(train_idx[0])
some_idx.remove(test_idx[0])
covariate_some = covar[some_idx,:]
K0_whole_some = K0[:,some_idx]
predicted_phenoC, covar_phenoC = fastlmm.predict(K0_whole_test=K0_whole_some, X=covariate_some)
for idxC, iidC in enumerate(predicted_phenoC.iid):
meanC = predicted_phenoC.val[idxC]
varC = covar_phenoC.val[idxC,idxC]
if iidC in predicted_pheno.iid:
predicted_pheno_ref = predicted_pheno
covar_pheno_ref = covar_pheno
else:
assert iidC in predicted_phenoB.iid
predicted_pheno_ref = predicted_phenoB
covar_pheno_ref = covar_phenoB
idx_ref = predicted_pheno_ref.iid_to_index([iidC])[0]
mean_ref = predicted_pheno_ref.val[idx_ref]
var_ref = covar_pheno_ref.val[idx_ref,idx_ref]
assert np.abs(meanC - mean_ref) < 1e-6
assert np.abs(varC - var_ref) < 1e-6
0
Example 163
Project: readerisdead Source File: reader_archive.py
def main():
base.log.init()
base.atom.init()
parser = argparse.ArgumentParser(
description='Comprehensive archive of a Google Reader account')
# Credentials
parser.add_argument('--use_client_login' ,action='store_true',
help='Instead of OAuth, use ClientLogin for '
'authentication. You will be prompted for a '
'username and password')
parser.add_argument('--oauth_refresh_token', default='',
help='A previously obtained refresh token (used to bypass '
'OAuth setup')
parser.add_argument('--account', default='',
help='Google Account to save the archive for. Omit to '
'specify via standard input')
parser.add_argument('--password', default='',
help='Password for the account. Omit to specify via '
'standard input')
# Output options
parser.add_argument('--output_directory', default='./',
help='Directory where to place archive data.')
# Fetching options
parser.add_argument('--stream_items_chunk_size', type=int, default=10000,
help='Number of items refs to request per stream items '
'API call (higher is more efficient)')
parser.add_argument('--max_items_per_stream', type=int, default=0,
help='If non-zero, will cap the number of items that are '
'fetched per feed or tag')
parser.add_argument('--item_bodies_chunk_size', type=int, default=250,
help='Number of items refs per request for fetching their '
'bodies (higher is more efficient)')
parser.add_argument('--comments_chunk_size', type=int, default=250,
help='Number of items per request for fetching comments '
'on shared items (higher is more efficient)')
parser.add_argument('--max_streams', type=int, default=0,
help='Maxmium number of streams to archive (0 for no'
'limit, only mean to be used for development)')
parser.add_argument('--parallelism', type=int, default=10,
help='Number of requests to make in parallel.')
parser.add_argument('--http_retry_count', type=int, default=1,
help='Number of retries to make in the case of HTTP '
'request errors.')
# Miscellaneous.
parser.add_argument('--additional_item_refs_file_path', default='',
help='Path to JSON file listing additional tag item refs '
'to fetch')
args = parser.parse_args()
output_directory = base.paths.normalize(args.output_directory)
base.paths.ensure_exists(output_directory)
def output_sub_directory(name):
directory_path = os.path.join(output_directory, name)
base.paths.ensure_exists(directory_path)
return directory_path
api_responses_directory = output_sub_directory('_raw_data')
streams_directory = output_sub_directory('streams')
data_directory = output_sub_directory('data')
items_directory = output_sub_directory('items')
comments_directory = output_sub_directory('comments')
if args.use_client_login:
authenticated_url_fetcher = base.url_fetcher.ClientLoginUrlFetcher(
args.account, args.password)
else:
authenticated_url_fetcher = base.url_fetcher.OAuthUrlFetcher(
args.oauth_refresh_token)
api = base.api.Api(
authenticated_url_fetcher=authenticated_url_fetcher,
http_retry_count=args.http_retry_count,
cache_directory=api_responses_directory)
user_info = api.fetch_user_info()
logging.info(
'Created API instance for %s (%s)', user_info.user_id, user_info.email)
logging.info('Saving preferences')
_save_preferences(api, data_directory)
logging.info('Gathering streams to fetch')
stream_ids = _get_stream_ids(api, user_info.user_id, data_directory)
if args.max_streams and len(stream_ids) > args.max_streams:
stream_ids = stream_ids[:args.max_streams]
logging.info('%d streams to fetch, gathering item refs:', len(stream_ids))
item_ids, item_refs_total = _fetch_and_save_item_refs(
stream_ids, api, args, streams_directory, user_info.user_id)
logging.info('%s unique items refs (%s total), grouping by chunk.',
'{:,}'.format(len(item_ids)),
'{:,}'.format(item_refs_total))
logging.info('Grouped item refs, getting item bodies:')
item_ids_chunks = _chunk_item_ids(item_ids, args.item_bodies_chunk_size)
item_bodies_to_fetch = len(item_ids)
fetched_item_bodies = [0]
missing_item_bodies = set()
def report_item_bodies_progress(requested_item_ids, found_item_ids):
if found_item_ids is None:
missing_item_bodies.update(set(requested_item_ids).difference(
base.api.not_found_items_ids_to_ignore))
return
fetched_item_bodies[0] += len(found_item_ids)
missing_item_bodies.update(
set(requested_item_ids).difference(set(found_item_ids)).difference(
base.api.not_found_items_ids_to_ignore))
logging.info(' Fetched %s/%s item bodies (%s could not be loaded)',
'{:,}'.format(fetched_item_bodies[0]),
'{:,}'.format(item_bodies_to_fetch),
'{:,}'.format(len(missing_item_bodies)))
base.worker.do_work(
lambda: FetchWriteItemBodiesWorker(api, items_directory),
item_ids_chunks,
args.parallelism,
report_progress=report_item_bodies_progress)
if missing_item_bodies:
logging.warn('Item bodies could not be loaded for: %s',
', '.join([i.compact_form() for i in missing_item_bodies]))
broadcast_stream_ids = [
stream_id for stream_id in stream_ids
if stream_id.startswith('user/') and
stream_id.endswith('/state/com.google/broadcast')
]
logging.info(
'Fetching comments from %d shared item streams.',
len(broadcast_stream_ids))
encoded_sharers = api.fetch_encoded_sharers()
remaining_broadcast_stream_ids = [len(broadcast_stream_ids)]
def report_comments_progress(_, comments_by_item_id):
if comments_by_item_id is None:
return
remaining_broadcast_stream_ids[0] -= 1
comment_count = sum((len(c) for c in comments_by_item_id.values()), 0)
logging.info(' Fetched %s comments, %s shared items streams left.',
'{:,}'.format(comment_count),
'{:,}'.format(remaining_broadcast_stream_ids[0]))
all_comments = {}
comments_for_broadcast_streams = base.worker.do_work(
lambda: FetchCommentsWorker(
api, encoded_sharers, args.comments_chunk_size),
broadcast_stream_ids,
args.parallelism,
report_progress=report_comments_progress)
total_comment_count = 0
for comments_for_broadcast_stream in comments_for_broadcast_streams:
if not comments_for_broadcast_stream:
continue
for item_id, comments in comments_for_broadcast_stream.iteritems():
total_comment_count += len(comments)
all_comments.setdefault(item_id, []).extend(comments)
logging.info('Writing %s comments from %s items.',
'{:,}'.format(total_comment_count),
'{:,}'.format(len(all_comments)))
for item_id, comments in all_comments.items():
item_comments_file_path = os.path.join(base.paths.item_id_to_file_path(
comments_directory, item_id), item_id.compact_form())
base.paths.ensure_exists(os.path.dirname(item_comments_file_path))
with open(item_comments_file_path, 'w') as item_comments_file:
item_comments_file.write(json.dumps([c.to_json() for c in comments]))
with open(os.path.join(output_directory, 'README'), 'w') as readme_file:
readme_file.write('See https://github.com/mihaip/readerisdead/'
'wiki/reader_archive-Format.\n')
0
Example 164
Project: streamspigot Source File: twitterdisplay.py
def body_as_html(self):
status = self._status
text_as_html = []
footer_as_html = []
def add_status_chunks(status, skip_entity_urls=[]):
entities = list(
(status.hashtags or []) +
(status.urls or []) +
(status.user_mentions or []) +
(status.medias or []))
entities = [e for e in entities
if e.start_index != -1 and e.end_index != -1]
entities.sort(cmp=lambda e1,e2: e1.start_index - e2.start_index)
last_entity_start = 0
last_entity_end = 0
for e in entities:
add_tweet_chunk(status.text[last_entity_end:e.start_index])
entity_anchor_text = status.text[e.start_index:e.end_index]
entity_url = None
if isinstance(e, twitter.Hashtag):
entity_url = 'search?q=%23' + e.text
elif isinstance(e, twitter.Url):
entity_url = e.expanded_url or e.url
entity_url_anchor_text = \
e.display_url or e.expanded_url or e.url
if entity_url_anchor_text:
entity_anchor_text = escape(entity_url_anchor_text)
maybe_add_thumbnail_chunk(e.expanded_url or e.url)
elif isinstance(e, twitter.User):
entity_url = e.screen_name
elif isinstance(e, twitter.Media):
def add_media_thumbnail():
link_url, _, _ = e.GetUrlForSize(
twitter.Media.LARGE_SIZE)
thumb_url, thumb_width, thumb_height = e.GetUrlForSize(
twitter.Media.THUMB_SIZE
if self._thumbnail_size ==
thumbnails.SMALL_THUMBNAIL
else twitter.Media.MEDIUM_SIZE)
add_footer_thumbnail_chunk(
link_url , thumb_url, thumb_width, thumb_height)
entity_url = e.url
entity_url_anchor_text = \
e.display_url or e.expanded_url or e.url
if entity_url_anchor_text:
entity_anchor_text = escape(entity_url_anchor_text)
if e.type == 'photo':
add_media_thumbnail()
elif e.type == 'animated_gif' or e.type == 'video':
if e.video_variants:
video_attributes = [
'loop="loop"',
'muted="muted"',
'autoplay="autoplay"',
# Even though we don't normally want controls,
# NewsBlur strips out the autoplay attribute,
# so they're needed to initiate playback on the
# desktop.
'controls="controls"',
'poster="%s"' % e.media_url,
]
width = None
height = None
size = e.sizes.get(twitter.Media.MEDIUM_SIZE)
if size:
width = size[0]
height = size[1]
add_footer_video_chunk(
e.video_variants,
" ".join(video_attributes),
width,
height)
else:
add_media_thumbnail()
else:
logging.info("Unknown media type: %s", e.type)
# Don't display the entity if it's outside the display range.
# We only hide entities after the end of the display text
# range, we still want to display usernames at the start of
# the text since it's easier to scan.
if status.display_text_range:
if e.start_index >= status.display_text_range[1]:
last_entity_start = e.start_index
last_entity_end = e.end_index
continue
if e.start_index == last_entity_start and \
e.end_index == last_entity_end:
# For tweets with multiple pictures we will get multiple
# entities that point to the same span of text in the
# tweet. We want to insert thumbnails for each one, but only
# add one anchor.
continue
if entity_url:
if entity_url not in skip_entity_urls:
add_raw_chunk('<a href="')
add_escaped_chunk(entity_url)
add_raw_chunk('" %s>' % _LINK_ATTRIBUTES)
add_tweet_chunk(entity_anchor_text)
add_raw_chunk('</a>')
else:
add_tweet_chunk(entity_anchor_text)
last_entity_start = e.start_index
last_entity_end = e.end_index
if status.display_text_range:
add_tweet_chunk(
status.text[last_entity_end:status.display_text_range[1]])
else:
add_tweet_chunk(status.text[last_entity_end:])
if footer_as_html:
add_raw_chunk('<p>')
text_as_html.extend(footer_as_html)
add_raw_chunk('</p>')
del footer_as_html[:]
escape = xml.sax.saxutils.escape
def add_raw_chunk(chunk):
text_as_html.append(chunk)
def add_tweet_chunk(chunk):
# Unescape then and re-escape everything so that we can have a
# consistent level of escaping.
chunk = _unescape_tweet_chunk(chunk)
# We also remove control characters (which are not allowed in XML)
# now, instead of earlier, since otherwise all of the entity offsets
# would be wrong.
chunk = base.util.strip_control_characters(chunk)
# Insert zero-width spaces after punctuation and every so often in
# longer tokens to make sure that the display wraps. Has to be done
# this way since NewsBlur's CSS whitelist does not allow
# "word-break: break-word" and its HTML whitelist does not allow
# <wbr> tags.
run_length = 0
chunk_with_breaks = u""
for c in chunk:
chunk_with_breaks += c
run_length += 1
if c in string.whitespace:
run_length = 0
elif c in string.punctuation or run_length > 24:
chunk_with_breaks += u"\u200B"
run_length = 0
chunk = chunk_with_breaks
# HTML-escape
chunk = escape(chunk)
# Convert newlines to HTML (Twitter seems to normalize all line
# endings to \n).
chunk = chunk.replace('\n', '<br/>')
add_raw_chunk(chunk)
def add_escaped_chunk(chunk):
add_raw_chunk(escape(chunk))
def add_footer_raw_chunk(chunk):
footer_as_html.append(chunk)
def add_footer_thumbnail_chunk(
link_url, thumb_url, thumb_width, thumb_height):
img_styles = ['padding:2px']
img_attributes = ''
# Force the width to be "100%" and reset the margins to override the
# "full bleed" style set by NewsBlur (see https://github.com/
# samuelclay/NewsBlur/commit/93c4ddfc30e6b126118e07e76bdf367ff84b).
# There needs to be a space between the value and !important since
# its CSS sanitizer breaks up tokens via whitespace only (
# https://github.com/samuelclay/NewsBlur/blob/
# 4aead01e3442eadfcbb7e5cf451e55184386a/utils/feedparser.py#L2539)
# The triggering conditions match the NB-large-image class being
# added in https://github.com/samuelclay/NewsBlur/blob/
# fb3b37a46028a1222be2f1f5f6f0cea63e895666/clients/ios/static/
# storyDetailView.js#L63
if thumb_width >= 320-24 and thumb_height >= 50 or \
(not thumb_width and not thumb_height and
self._thumbnail_size == thumbnails.LARGE_THUMBNAIL):
img_styles.append('width:100% !important')
img_styles.append('margin: 0 !important')
if thumb_width and thumb_height:
img_attributes = ' width="%d" height="%d"' % (
thumb_width, thumb_height)
add_footer_raw_chunk(
'<a href="%s" border="0">'
'<img src="%s" alt="" style="%s"%s/>'
'</a>' % (
escape(link_url),
escape(thumb_url),
";".join(img_styles),
img_attributes
))
def add_footer_iframe_chunk(iframe_url, iframe_width, iframe_height):
# "frameborder" is not a whitelisted HTML attribute in NewsBlur.
# "border" is not on its CSS whitelist either, but "border-color"
# is.
iframe_attributes = ' style="border-color: transparent"'
if iframe_width and iframe_height:
iframe_attributes += ' width="%d" height="%d"' % (
iframe_width, iframe_height)
add_footer_raw_chunk(
'<iframe src="%s" frameborder="0"%s allowfullscreen="true"></iframe>'
% (escape(iframe_url), iframe_attributes))
def add_footer_video_chunk(
video_variants, video_attributes, width=None, height=None):
if width:
video_attributes += (' width="%d" '
'style="width:100%%;max-width:%dpx"') % (width, width)
add_footer_raw_chunk('<video %s>' % video_attributes)
for variant in video_variants:
if variant.url:
add_footer_raw_chunk('<source src="%s" type="%s"/>' % (
variant.url, variant.content_type or ''))
add_footer_raw_chunk('</video>')
def maybe_add_thumbnail_chunk(url):
iframe_url, iframe_width, iframe_height = \
thumbnails.get_iframe_info(url)
if iframe_url:
add_footer_iframe_chunk(
iframe_url, iframe_width, iframe_height)
return
thumb_url, thumb_width, thumb_height = \
thumbnails.get_thumbnail_info(url, self._thumbnail_size)
if thumb_url:
add_footer_thumbnail_chunk(
url, thumb_url, thumb_width, thumb_height)
def add_status(status):
if status.retweeted_status:
add_raw_chunk('RT: <a href="')
add_escaped_chunk(status.retweeted_status.user.screen_name)
add_raw_chunk('" %s>@' % _LINK_ATTRIBUTES)
add_escaped_chunk(status.retweeted_status.user.screen_name)
add_raw_chunk('</a>: ')
add_status(status.retweeted_status)
elif status.quoted_status:
quoted_screen_name = status.quoted_status.user.screen_name
add_status_chunks(status, skip_entity_urls=[
"https://twitter.com/%s/status/%s" %
(quoted_screen_name, status.quoted_status.id)
])
add_raw_chunk('<div style="padding:10px;margin:5px 0;background:%s">' %
CONSTANTS.BUBBLE_QUOTED_COLOR)
add_raw_chunk('<a href="')
add_escaped_chunk(quoted_screen_name)
add_raw_chunk('" %s>@' % _LINK_ATTRIBUTES)
add_escaped_chunk(quoted_screen_name)
add_raw_chunk('</a>: ')
add_status(status.quoted_status)
add_raw_chunk('</div>')
else:
add_status_chunks(status)
add_status(status)
result = ''.join(text_as_html)
return result
0
Example 165
Project: spym Source File: cpu.py
def execute_single(self, instr):
if instr.name == 'add':
# add rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs + rt)
elif instr.name == 'addu':
# TODO: make this actually work, because right now it's identical
# to addu
# addu rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs + rt)
elif instr.name == 'addi':
# addi rt, rs, imm
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
imm = get_imm(instr.ops[2])
self.r.write(rd, rs + imm)
elif instr.name == 'addiu':
# TODO: make this actually work, because right now it's identical
# to addi
# addiu rt, rs, imm
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
imm = get_imm(instr.ops[2])
self.r.write(rd, rs + imm)
elif instr.name == 'sub':
# sub rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs - rt)
elif instr.name == 'and':
# and rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs & rt)
elif instr.name == 'andi':
# andi rt, rs, imm
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
imm = get_imm(instr.ops[2])
self.r.write(rd, rs & imm)
elif instr.name == 'or':
# or rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs | rt)
elif instr.name == 'ori':
# ori rt, rs, imm
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
imm = get_imm(instr.ops[2])
self.r.write(rd, rs | imm)
elif instr.name == 'xor':
# xor rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs ^ rt)
elif instr.name == 'xori':
# xori rt, rs, imm
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
imm = get_imm(instr.ops[2])
self.r.write(rd, rs ^ imm)
elif instr.name == 'sll':
# sll rd, rt, shamt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
shamt = get_imm(instr.ops[2])
self.r.write(rd, rt << shamt)
elif instr.name == 'srl':
# srl rd, rt, shamt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
shamt = get_imm(instr.ops[2])
self.r.write(rd, rt >> shamt)
elif instr.name == 'sllv':
# sllv rd, rt, rs
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rt << rs)
elif instr.name == 'srlv':
# srlv rd, rs, rt
rd = instr.ops[0]
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
self.r.write(rd, rs >> rt)
elif instr.name == 'slt':
# slt rd, rs, rt
tmp = 1 if self.r.read(instr.ops[1]) < self.r.read(instr.ops[2]) else 0
self.r.write(instr.ops[0], tmp)
elif instr.name == 'slti':
# slti rd, rs, imm
rs = instr.ops[1]
imm = get_imm(instr.ops[2])
tmp = 1 if self.r.read(rs) < imm else 0
self.r.write(instr.ops[0], tmp)
elif instr.name == 'beq':
# beq rs, rt, label
# TODO: the semantics aren't quite right here. branch instructions'
# imm field contains the offset to the branch destination expressed
# as the /number of words/
if (self.r.read(instr.ops[0]) == self.r.read(instr.ops[1])):
self._set_pc_label(instr.ops[2])
elif instr.name == 'bne':
# bne rs, rt, label
if (self.r.read(instr.ops[0]) != self.r.read(instr.ops[1])):
self._set_pc_label(instr.ops[2])
elif instr.name == 'blt':
# blt rs, rt, label
if (self.r.read(instr.ops[0]) < self.r.read(instr.ops[1])):
self._set_pc_label(instr.ops[2])
elif instr.name == 'bgt':
# bgt rs, rt, label
if (self.r.read(instr.ops[0]) > self.r.read(instr.ops[1])):
self._set_pc_label(instr.ops[2])
elif instr.name == 'ble':
# ble rs, rt, label
if (self.r.read(instr.ops[0]) <= self.r.read(instr.ops[1])):
self._set_pc_label(instr.ops[2])
elif instr.name == 'bge':
# bge rs, rt, label
if (self.r.read(instr.ops[0]) >= self.r.read(instr.ops[1])):
self._set_pc_label(instr.ops[2])
elif instr.name == 'j':
# j label
self._set_pc_label(instr.ops[0])
elif instr.name == 'jal':
# jal label
self.r.write('ra', self.r.pc)
self._set_pc_label(instr.ops[0])
elif instr.name == 'jr':
# jr rs
self._set_pc(self.r.read(instr.ops[0]))
elif instr.name == 'jalr':
# jalr rs
self.r.write('ra', self.r.pc)
self._set_pc(self.r.read(instr.ops[0]))
elif instr.name == 'lb':
# lb rt, offs(rs)
rt = instr.ops[0]
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
read = struct.unpack('<b', self.dmem.read(addr, 1))[0]
self.r.write(rt, read)
elif instr.name == 'lbu':
# lbu rt, offs(rs)
rt = instr.ops[0]
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
read = struct.unpack('<B', self.dmem.read(addr, 1))[0]
self.r.write(rt, read)
elif instr.name == 'lh':
# lh rt, offs(rs)
rt = instr.ops[0]
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
read = struct.unpack('<h', self.dmem.read(addr, 2))[0]
self.r.write(rt, read)
elif instr.name == 'lhu':
# lhu rt, offs(rs)
rt = instr.ops[0]
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
read = struct.unpack('<H', self.dmem.read(addr, 2))[0]
self.r.write(rt, read)
elif instr.name == 'lw':
# TODO: lw will always treat that memory as signed, even when it
# potentially should be unsigned? With sw we can detect if they're
# trying to write a negative number and change the struct.pack
# arg accordingly, but with lw, we have no indication
# lw rt, offs(rs)
rd = instr.ops[0]
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
read = struct.unpack('<I', self.dmem.read(addr, 4))[0]
self.r.write(rd, read)
elif instr.name == 'lui':
# lui rt, imm
rt = instr.ops[0]
imm = get_imm(instr.ops[1])
self.r.write(rt, (imm << 16) & 0xffffffff)
elif instr.name == 'li':
# li rd, imm
rd = instr.ops[0]
imm = get_imm(instr.ops[1])
self.r.write(rd, imm)
elif instr.name == 'la':
# la rd, label
rd = instr.ops[0]
label = instr.ops[1]
self.r.write(rd, datatab[label])
elif instr.name == 'sb':
# sb rt, offs(rs)
rt = self.r.read(instr.ops[0])
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
self.dmem.write(addr, struct.pack('<b' if rt < 0 else '<B', rt))
elif instr.name == 'sh':
# sb rt, offs(rs)
rt = self.r.read(instr.ops[0])
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
self.dmem.write(addr, struct.pack('<h' if rt < 0 else '<H', rt))
elif instr.name == 'sw':
# sw rt, offs(rs)
rt = self.r.read(instr.ops[0])
offs = get_imm(instr.ops[1])
addr = self.r.read(instr.ops[2]) + offs
self.dmem.write(addr, struct.pack('<i' if rt < 0 else '<I', rt))
elif instr.name == 'move':
# move rd, rs
self.r.write(instr.ops[0], self.r.read(instr.ops[1]))
elif instr.name == 'div':
# div rs, rt
rs = self.r.read(instr.ops[0])
rt = self.r.read(instr.ops[1])
self.r.lo = rs / rt
self.r.hi = rs % rt
elif instr.name == 'mul':
# mul rd, rs, rt
rs = self.r.read(instr.ops[1])
rt = self.r.read(instr.ops[2])
mult = (rs * rt) & 0xffffffff
self.r.write(instr.ops[0], mult)
elif instr.name == 'mult':
# mult rs, rt
rs = self.r.read(instr.ops[0])
rt = self.r.read(instr.ops[1])
mult = (rs * rt) & 0xffffffffffffffff
self.r.hi = mult >> 32
self.r.lo = mult & 0xffffffff
elif instr.name == 'mfhi':
# mfhi rd
self.r.write(instr.ops[0], self.r.hi)
elif instr.name == 'mflo':
# mfhi rd
self.r.write(instr.ops[0], self.r.lo)
elif instr.name == 'syscall':
# syscall
id = self.r.read('v0')
if id == 10:
# exit
log.info('\n*** exiting ***')
raise Exception('exit syscall')
elif id == 1:
# print_int
# not using log here because this will always show up and we
# want to suppress newline
print self.r.read('a0'),
sys.stdout.flush()
elif id == 4:
# print_string
ptr = self.r.read('a0')
null = self.dmem.memory.find('\x00', ptr)
# not using log here, see above
print self.dmem.memory[ptr:None if null == -1 else null],
sys.stdout.flush()
elif id == 5:
# read_int
try:
inp = int(raw_input())
self.r.write('v0', inp)
except Exception:
raise Exception('input not integer')
else:
raise Exception('bad syscall id')
else:
raise Exception('bad instruction: {}'.format(instr.name))
0
Example 166
Project: conpot Source File: guardian_ast_server.py
def handle(self, sock, addr):
session = conpot_core.get_session('guardian_ast', addr[0], addr[1])
logger.info('New GuardianAST connection from %s:%d. (%s)', addr[0], addr[1], session.id)
session.add_event({'type': 'NEW_CONNECTION'})
current_time = datetime.datetime.utcnow()
fill_start = self.fill_offset_time - datetime.timedelta(minutes=313)
fill_stop = self.fill_offset_time - datetime.timedelta(minutes=303)
# Default Product names, change based off country needs
product1 = self.databus.get_value('product1').ljust(22)
product2 = self.databus.get_value('product2').ljust(22)
product3 = self.databus.get_value('product3').ljust(22)
product4 = self.databus.get_value('product4').ljust(22)
# Create random Numbers for the volumes
#
# this will crate an initial Volume and then the second value based
# off the orig value.
vol1 = self.databus.get_value('vol1')
vol1tc = random.randint(vol1, vol1+200)
vol2 = self.databus.get_value('vol2')
vol2tc = random.randint(vol2, vol2+200)
vol3 = self.databus.get_value('vol3')
vol3tc = random.randint(vol3, vol3+200)
vol4 = self.databus.get_value('vol4')
vol4tc = random.randint(vol4, vol4+200)
# unfilled space ULLAGE
ullage1 = str(self.databus.get_value('ullage1'))
ullage2 = str(self.databus.get_value('ullage2'))
ullage3 = str(self.databus.get_value('ullage3'))
ullage4 = str(self.databus.get_value('ullage3'))
# Height of tank
height1 = str(self.databus.get_value('height1')).ljust(5, '0')
height2 = str(self.databus.get_value('height2')).ljust(5, '0')
height3 = str(self.databus.get_value('height3')).ljust(5, '0')
height4 = str(self.databus.get_value('height4')).ljust(5, '0')
# Water in tank, this is a variable that needs to be low
h2o1 = str(self.databus.get_value('h2o1')).ljust(4, '0')
h2o2 = str(self.databus.get_value('h2o2')).ljust(4, '0')
h2o3 = str(self.databus.get_value('h2o3')).ljust(4, '0')
h2o4 = str(self.databus.get_value('h2o4')).ljust(4, '0')
# Temperature of the tank, this will need to be between 50 - 60
temp1 = str(self.databus.get_value('temp1')).ljust(5, '0')
temp2 = str(self.databus.get_value('temp2')).ljust(5, '0')
temp3 = str(self.databus.get_value('temp3')).ljust(5, '0')
temp4 = str(self.databus.get_value('temp4')).ljust(5, '0')
station = self.databus.get_value('station_name')
# This function is to set-up up the message to be sent upon a successful I20100 command being sent
# The final message is sent with a current date/time stamp inside of the main loop.
def I20100():
ret = '\nI20100\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
ret += '\n\n' + station + '\n\n\n\nIN-TANK INVENTORY\n\n'
ret += 'TANK PRODUCT VOLUME TC VOLUME ULLAGE HEIGHT WATER TEMP'
ret += '\n 1 ' + product1 + str(vol1) + ' ' + str(vol1tc) + ' ' + ullage1 + ' ' + height1 + ' ' + h2o1 + ' ' + temp1
ret += '\n 2 ' + product2 + str(vol2) + ' ' + str(vol2tc) + ' ' + ullage2 + ' ' + height2 + ' ' + h2o2 + ' ' + temp2
ret += '\n 3 ' + product3 + str(vol3) + ' ' + str(vol3tc) + ' ' + ullage3 + ' ' + height3 + ' ' + h2o3 + ' ' + temp3
ret += '\n 4 ' + product4 + str(vol4) + ' ' + str(vol4tc) + ' ' + ullage4 + ' ' + height4 + ' ' + h2o4 + ' ' + temp4
ret += '\n'
return ret
###########################################################################
#
# Only one Tank is listed currently in the I20200 command
#
###########################################################################
def I20200():
ret = '\nI20200\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
ret += '\n\n' + station + '\n\n\n\nDELIVERY REPORT\n\n'
ret += 'T 1:' + product1 + '\nINCREASE DATE / TIME GALLONS TC GALLONS WATER TEMP DEG F HEIGHT\n\n'
ret += ' END: ' + str(fill_stop.strftime('%m/%d/%Y %H:%M')) + ' ' + str(vol1 + 300) + ' ' + str(vol1tc + 300) + ' ' + h2o1 + ' ' + temp1 + ' ' + height1 + '\n'
ret += ' START: ' + str(fill_start.strftime('%m/%d/%Y %H:%M')) + ' ' + str(vol1 - 300) + ' ' + str(vol1tc - 300) + ' ' + h2o1 + ' ' + temp1 + ' ' + str(float(height1) - 23) + '\n'
ret += ' AMOUNT: ' + str(vol1) + ' ' + str(vol1tc) + '\n\n'
return ret
###########################################################################
#
# I20300 In-Tank Leak Detect Report
#
###########################################################################
def I20300():
ret = '\nI20300\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
ret += '\n\n' + station + '\n\n\n'
ret += 'TANK 1 ' + product1 + '\n TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
ret += 'TANK 2 ' + product2 + '\n TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
ret += 'TANK 3 ' + product3 + '\n TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
ret += 'TANK 4 ' + product4 + '\n TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
return ret
###########################################################################
# Shift report command I20400 only one item in report at this time,
# but can always add more if needed
###########################################################################
def I20400():
ret = '\nI20400\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
ret += '\n\n' + station + '\n\n\n\nSHIFT REPORT\n\n'
ret += 'SHIFT 1 TIME: 12:00 AM\n\nTANK PRODUCT\n\n'
ret += ' 1 ' + product1 + ' VOLUME TC VOLUME ULLAGE HEIGHT WATER TEMP\n'
ret += 'SHIFT 1 STARTING VALUES ' + str(vol1) + ' ' + str(vol1tc) + ' ' + ullage1 + ' ' + height1 + ' ' + h2o1 + ' ' + temp1 + '\n'
ret += ' ENDING VALUES ' + str(vol1 + 940) + ' ' + str(vol1tc + 886) + ' ' + str(int(ullage1) + 345) + ' ' + str(float(height1) + 53) + ' ' + h2o1 + ' ' + temp1 + '\n'
ret += ' DELIVERY VALUE 0\n'
ret += ' TOTALS 940\n\n'
return ret
###########################################################################
# I20500 In-Tank Status Report
###########################################################################
def I20500():
ret = '\nI20500\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
ret += '\n\n\n' + station + '\n\n\n'
ret += 'TANK PRODUCT STATUS\n\n'
ret += ' 1 ' + product1 + ' NORMAL\n\n'
ret += ' 2 ' + product2 + ' HIGH WATER ALARM\n'
ret += ' HIGH WATER WARNING\n\n'
ret += ' 3 ' + product3 + ' NORMAL\n\n'
ret += ' 4 ' + product4 + ' NORMAL\n\n'
return ret
while True:
try:
# Get the initial data
response = sock.recv(4096)
# The connection has been closed
if not response:
break
while not ('\n' in response or '00' in response):
response += sock.recv(4096)
# if first value is not ^A then do nothing
# thanks John(achillean) for the help
if response[0] != '\x01':
logger.info('Non ^A command attempt %s:%d. (%s)', addr[0], addr[1], session.id)
break
# if response is less than 6, than do nothing
if len(response) < 6:
logger.info('Invalid command attempt %s:%d. (%s)', addr[0], addr[1], session.id)
break
cmds = {"I20100": I20100, "I20200": I20200, "I20300": I20300, "I20400": I20400, "I20500": I20500}
cmd = response[1:7] # strip ^A and \n out
session.add_event({'command': cmd})
if cmd in cmds:
logger.info('%s command attempt %s:%d. (%s)', cmd, addr[0], addr[1], session.id)
sock.send(cmds[cmd]())
elif cmd.startswith("S6020"):
# change the tank name
if cmd.startswith("S60201"):
# split string into two, the command, and the data
TEMP = response.split('S60201')
# if length is less than two, print error
if len(TEMP) < 2:
sock.send("9999FF1B\n")
# Else the command was entered correctly and continue
else:
# Strip off the carrage returns and new lines
TEMP1 = TEMP[1].rstrip("\r\n")
# if Length is less than 22
if len(TEMP1) < 22:
# pad the result to have 22 chars
product1 = TEMP1.ljust(22)
elif len(TEMP1) > 22:
# else only print 22 chars if the result was longer
product1 = TEMP1[:20] + " "
else:
# else it fits fine (22 chars)
product1 = TEMP1
logger.info('S60201: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
# Follows format for S60201 for comments
elif cmd.startswith("S60202"):
TEMP = response.split('S60202')
if len(TEMP) < 2:
sock.send("9999FF1B\n")
else:
TEMP1 = TEMP[1].rstrip("\r\n")
if len(TEMP1) < 22:
product2 = TEMP1.ljust(22)
elif len(TEMP1) > 22:
product2 = TEMP1[:20] + " "
else:
product2 = TEMP1
logger.info('S60202: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
# Follows format for S60201 for comments
elif cmd.startswith("S60203"):
TEMP = response.split('S60203')
if len(TEMP) < 2:
sock.send("9999FF1B\n")
else:
TEMP1 = TEMP[1].rstrip("\r\n")
if len(TEMP1) < 22:
product3 = TEMP1.ljust(22)
elif len(TEMP1) > 22:
product3 = TEMP1[:20] + " "
else:
product3 = TEMP1
logger.info('S60203: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
# Follows format for S60201 for comments
elif cmd.startswith("S60204"):
TEMP = response.split('S60204')
if len(TEMP) < 2:
sock.send("9999FF1B\n")
else:
TEMP1 = TEMP[1].rstrip("\r\n")
if len(TEMP1) < 22:
product4 = TEMP1.ljust(22)
elif len(TEMP1) > 22:
product4 = TEMP1[:20] + " "
else:
product4 = TEMP1
logger.info('S60204: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
# Follows format for S60201 for comments
elif cmd.startswith("S60200"):
TEMP = response.split('S60200')
if len(TEMP) < 2:
# 9999 indicates that the command was not understood and
# FF1B is the checksum for the 9999
sock.send("9999FF1B\n")
else:
TEMP1 = TEMP[1].rstrip("\r\n")
if len(TEMP1) < 22:
product1 = TEMP1.ljust(22)
product2 = TEMP1.ljust(22)
product3 = TEMP1.ljust(22)
product4 = TEMP1.ljust(22)
elif len(TEMP1) > 22:
product1 = TEMP1[:20] + " "
product2 = TEMP1[:20] + " "
product3 = TEMP1[:20] + " "
product4 = TEMP1[:20] + " "
else:
product1 = TEMP1
product2 = TEMP1
product3 = TEMP1
product4 = TEMP1
logger.info('S60200: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
else:
sock.send("9999FF1B\n")
# Else it is a currently unsupported command so print the error message found in the manual
# 9999 indicates that the command was not understood and FF1B is the checksum for the 9999
else:
sock.send("9999FF1B\n")
# log what was entered
logger.info('%s command attempt %s:%d. (%s)', response, addr[0], addr[1], session.id)
except Exception, e:
print 'Unknown Error: {}'.format(str(e))
raise
except KeyboardInterrupt:
break
logger.info('GuardianAST client disconnected %s:%d. (%s)', addr[0], addr[1], session.id)
session.add_event({'type': 'CONNECTION_LOST'})
0
Example 167
Project: dipy Source File: reconst.py
def run(self, input_files, bvalues, bvectors, mask_files, b0_threshold=0.0,
save_metrics=[],
out_dir='', out_tensor='tensors.nii.gz', out_fa='fa.nii.gz',
out_ga='ga.nii.gz', out_rgb='rgb.nii.gz', out_md='md.nii.gz',
out_ad='ad.nii.gz', out_rd='rd.nii.gz', out_mode='mode.nii.gz',
out_evec='evecs.nii.gz', out_eval='evals.nii.gz'):
""" Workflow for tensor reconstruction and for computing DTI metrics.
Performs a tensor reconstruction on the files by 'globing'
``input_files`` and saves the DTI metrics in a directory specified by
``out_dir``.
Parameters
----------
input_files : string
Path to the input volumes. This path may contain wildcards to
process multiple inputs at once.
bvalues : string
Path to the bvalues files. This path may contain wildcards to use
multiple bvalues files at once.
bvectors : string
Path to the bvalues files. This path may contain wildcards to use
multiple bvalues files at once.
mask_files : string
Path to the input masks. This path may contain wildcards to use
multiple masks at once. (default: No mask used)
b0_threshold : float, optional
Threshold used to find b=0 directions (default 0.0)
save_metrics : variable string, optional
List of metrics to save.
Possible values: fa, ga, rgb, md, ad, rd, mode, tensor, evec, eval
(default [] (all))
out_dir : string, optional
Output directory (default input file directory)
out_tensor : string, optional
Name of the tensors volume to be saved (default 'tensors.nii.gz')
out_fa : string, optional
Name of the fractional anisotropy volume to be saved
(default 'fa.nii.gz')
out_ga : string, optional
Name of the geodesic anisotropy volume to be saved
(default 'ga.nii.gz')
out_rgb : string, optional
Name of the color fa volume to be saved (default 'rgb.nii.gz')
out_md : string, optional
Name of the mean diffusivity volume to be saved
(default 'md.nii.gz')
out_ad : string, optional
Name of the axial diffusivity volume to be saved
(default 'ad.nii.gz')
out_rd : string, optional
Name of the radial diffusivity volume to be saved
(default 'rd.nii.gz')
out_mode : string, optional
Name of the mode volume to be saved (default 'mode.nii.gz')
out_evec : string, optional
Name of the eigenvectors volume to be saved
(default 'evecs.nii.gz')
out_eval : string, optional
Name of the eigenvalues to be saved (default 'evals.nii.gz')
"""
io_it = self.get_io_iterator()
for dwi, bval, bvec, mask, otensor, ofa, oga, orgb, omd, oad, orad, \
omode, oevecs, oevals in io_it:
logging.info('Computing DTI metrics for {0}'.format(dwi))
img = nib.load(dwi)
data = img.get_data()
affine = img.get_affine()
if mask is None:
mask = None
else:
mask = nib.load(mask).get_data().astype(np.bool)
tenfit, _ = self.get_fitted_tensor(data, mask, bval, bvec,
b0_threshold)
if not save_metrics:
save_metrics = ['fa', 'md', 'rd', 'ad', 'ga', 'rgb', 'mode',
'evec', 'eval', 'tensor']
FA = fractional_anisotropy(tenfit.evals)
FA[np.isnan(FA)] = 0
FA = np.clip(FA, 0, 1)
if 'tensor' in save_metrics:
tensor_vals = lower_triangular(tenfit.quadratic_form)
correct_order = [0, 1, 3, 2, 4, 5]
tensor_vals_reordered = tensor_vals[..., correct_order]
fiber_tensors = nib.Nifti1Image(tensor_vals_reordered.astype(
np.float32), affine)
nib.save(fiber_tensors, otensor)
if 'fa' in save_metrics:
fa_img = nib.Nifti1Image(FA.astype(np.float32), affine)
nib.save(fa_img, ofa)
if 'ga' in save_metrics:
GA = geodesic_anisotropy(tenfit.evals)
ga_img = nib.Nifti1Image(GA.astype(np.float32), affine)
nib.save(ga_img, oga)
if 'rgb' in save_metrics:
RGB = color_fa(FA, tenfit.evecs)
rgb_img = nib.Nifti1Image(np.array(255 * RGB, 'uint8'), affine)
nib.save(rgb_img, orgb)
if 'md' in save_metrics:
MD = mean_diffusivity(tenfit.evals)
md_img = nib.Nifti1Image(MD.astype(np.float32), affine)
nib.save(md_img, omd)
if 'ad' in save_metrics:
AD = axial_diffusivity(tenfit.evals)
ad_img = nib.Nifti1Image(AD.astype(np.float32), affine)
nib.save(ad_img, oad)
if 'rd' in save_metrics:
RD = radial_diffusivity(tenfit.evals)
rd_img = nib.Nifti1Image(RD.astype(np.float32), affine)
nib.save(rd_img, orad)
if 'mode' in save_metrics:
MODE = get_mode(tenfit.quadratic_form)
mode_img = nib.Nifti1Image(MODE.astype(np.float32), affine)
nib.save(mode_img, omode)
if 'evec' in save_metrics:
evecs_img = nib.Nifti1Image(tenfit.evecs.astype(np.float32), affine)
nib.save(evecs_img, oevecs)
if 'eval' in save_metrics:
evals_img = nib.Nifti1Image(tenfit.evals.astype(np.float32), affine)
nib.save(evals_img, oevals)
logging.info('DTI metrics saved in {0}'.
format(os.path.dirname(oevals)))
0
Example 168
Project: openode Source File: notify_users.py
def immediately_notify_users(post):
# we don't want to disturb original routine
try:
# set default language TODO - language per user - add user atribute
old_lang = get_language()
activate(django_settings.LANGUAGE_CODE)
DEBUG_THIS_COMMAND = getattr(django_settings, 'DEBUG_SEND_EMAIL_NOTIFICATIONS', True)
# compose subject according to the post type
subject_line = _('Notification')
if post.post_type == const.POST_TYPE_QUESTION:
subject_line += ': ' + _('Question')
elif post.post_type == const.POST_TYPE_DOCUMENT:
subject_line += ': ' + _('Docuement')
elif post.post_type == const.POST_TYPE_COMMENT:
subject_line += ': ' + _('Comment')
elif post.post_type == const.POST_TYPE_THREAD_POST:
if post.thread.thread_type == const.THREAD_TYPE_QUESTION:
subject_line += ': ' + _('Answer')
elif post.thread.thread_type == const.THREAD_TYPE_DISCUSSION:
subject_line += ': ' + _('Discussion post')
else:
# post type is probably only a description, do nothing
activate(old_lang)
return False
subject_line += ' - ' + post.thread.title
# compose message according to post type
url_prefix = openode_settings.APP_URL
# link to node
# text = u'<p>%s: <a href="%s">%s</a></p>' % (_('Node'), url_prefix + post.thread.node.get_absolute_url(), post.thread.node.full_title())
text = u'<p>%s: %s</p>' % (_('Node'), post.thread.node.full_title())
# title according to the post type
text += '<h2>'
if post.last_edited_by:
# post was updated
if post.post_type == const.POST_TYPE_QUESTION:
text += _('Updated question')
elif post.post_type == const.POST_TYPE_DOCUMENT:
text += _('Updated docuement')
elif post.post_type == const.POST_TYPE_COMMENT:
text += _('Updated comment')
elif post.post_type == const.POST_TYPE_THREAD_POST:
if post.thread.thread_type == const.THREAD_TYPE_QUESTION:
text += _('Updated answer')
elif post.thread.thread_type == const.THREAD_TYPE_DISCUSSION:
text += _('Updated discussion post')
else:
# post is new
if post.post_type == const.POST_TYPE_QUESTION:
text += _('New question')
elif post.post_type == const.POST_TYPE_DOCUMENT:
text += _('New docuement')
elif post.post_type == const.POST_TYPE_COMMENT:
text += _('New comment')
elif post.post_type == const.POST_TYPE_THREAD_POST:
if post.thread.thread_type == const.THREAD_TYPE_QUESTION:
text += _('New answer')
elif post.thread.thread_type == const.THREAD_TYPE_DISCUSSION:
text += _('New discussion post')
text += '</h2>'
# link to post
if post.post_type == const.POST_TYPE_DOCUMENT:
url = url_prefix + post.thread.get_absolute_url()
else:
url = url_prefix + post.get_absolute_url()
text += '<p><a href="%(url)s">%(url)s</a></p>' % {"url": url}
# author
text += '<p>'
if post.last_edited_by:
# post was updated
text += _(u'%(datetime)s changed by <strong>%(user)s</strong>') % {'datetime': humanize_datetime(post.last_edited_at, 0), 'user': post.last_edited_by.screen_name}
else:
# post is new
text += _(u'%(datetime)s created by <strong>%(user)s</strong>') % {'datetime': humanize_datetime(post.added_at, 0), 'user': post.author.screen_name}
text += '</p>'
# show post text
text += post.html
# show related post if convenient
if post.post_type == const.POST_TYPE_THREAD_POST and post.thread.thread_type == const.THREAD_TYPE_QUESTION:
text += '<h3>'
text += _('Question')
text += '</h3>'
# text += '<p><a href="%s">%s</a></p>' % (url_prefix + post.thread._main_post().get_absolute_url(), url_prefix + post.thread._main_post().get_absolute_url())
text += post.thread._main_post().html
elif post.post_type == const.POST_TYPE_COMMENT:
text += '<h3>'
text += _('Commented post')
text += '</h3>'
# text += '<p><a href="%s">%s</a></p>' % (url_prefix + post.parent.get_absolute_url(), url_prefix + post.parent.get_absolute_url())
text += post.parent.html
# message bottom
text += '<hr />'
text += '<p>'
text += _('Please remember that you can always adjust frequency of the email updates or turn them off entirely in your profile.')
text += '</p>'
text += '<p>'
text += _('If you believe that this message was sent in an error, please contact us.')
text += '</p>'
# render email
data = {
'text': text,
'site_name': openode_settings.APP_SHORT_NAME,
'site_url': openode_settings.APP_URL
}
template = get_template('email/instant_notification.html')
message = template.render(data)
recipients = {}
# get all thread followers
for user in post.thread.followed_by.filter(notification_subscriptions__frequency='i', notification_subscriptions__feed_type='q_sel'):
recipients[user.pk] = user
# get all node followers
for user in post.thread.node.followed_by.filter(notification_subscriptions__frequency='i', notification_subscriptions__feed_type='q_sel'):
recipients[user.pk] = user
# remove author of this editation from recipients
if post.last_edited_by:
# post was updated
recipients.pop(post.last_edited_by.pk, None)
else:
# post is new
recipients.pop(post.author.pk, None)
# send all emails
for user in recipients.values():
if DEBUG_THIS_COMMAND:
recipient_email = django_settings.ADMINS[0][1]
else:
recipient_email = user.email
mail.send_mail(subject_line, message, django_settings.DEFAULT_FROM_EMAIL, [recipient_email], raise_on_failure=True)
logging.info('Email notification sent: %s' % repr({
"user": user.screen_name,
"user_email": recipient_email,
"user_pk": user.pk,
"post_pk": post.pk
}))
activate(old_lang)
return True
except Exception, e:
logging.error('Email notification - failed to send immediate notification for post: %s' % repr({
"post_pk": post.pk,
"error": e
}))
return False
0
Example 169
Project: monasca-agent Source File: daemon.py
def main():
options, args = util.get_parsed_args()
config = cfg.Config()
collector_config = config.get_config(['Main', 'Api', 'Logging'])
autorestart = collector_config.get('autorestart', False)
collector_restart_interval = collector_config.get(
'collector_restart_interval', 24)
if collector_restart_interval in range(1, 49):
pass
else:
log.error("Collector_restart_interval = {0} is out of legal range"
" [1, 48]. Reset collector_restart_interval to 24".format(collector_restart_interval))
collector_restart_interval = 24
COMMANDS = [
'start',
'stop',
'restart',
'foreground',
'status',
'info',
'check',
'check_all',
'configcheck',
'jmx',
]
if len(args) < 1:
sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
return 2
command = args[0]
if command not in COMMANDS:
sys.stderr.write("Unknown command: %s\n" % command)
return 3
pid_file = util.PidFile('monasca-agent')
if options.clean:
pid_file.clean()
agent = CollectorDaemon(pid_file.get_path(), autorestart)
if command in START_COMMANDS:
log.info('Agent version %s' % config.get_version())
if 'start' == command:
log.info('Start daemon')
agent.start()
elif 'stop' == command:
log.info('Stop daemon')
agent.stop()
elif 'restart' == command:
log.info('Restart daemon')
agent.restart()
elif 'status' == command:
agent.status()
elif 'info' == command:
return agent.info(verbose=options.verbose)
elif 'foreground' == command:
logging.info('Running in foreground')
if autorestart:
# Set-up the supervisor callbacks and fork it.
logging.info('Running Agent with auto-restart ON')
# Run in the standard foreground.
agent.run(collector_config)
elif 'check' == command:
check_name = args[1]
checks = util.load_check_directory()
for check in checks['initialized_checks']:
if check.name == check_name:
run_check(check)
elif 'check_all' == command:
print("Loading check directory...")
checks = util.load_check_directory()
print("...directory loaded.\n")
for check in checks['initialized_checks']:
run_check(check)
elif 'configcheck' == command or 'configtest' == command:
all_valid = True
paths = util.Paths()
for conf_path in glob.glob(os.path.join(paths.get_confd_path(), "*.yaml")):
basename = os.path.basename(conf_path)
try:
config.check_yaml(conf_path)
except Exception as e:
all_valid = False
print("%s contains errors:\n %s" % (basename, e))
else:
print("%s is valid" % basename)
if all_valid:
print("All yaml files passed. You can now run the Monitoring agent.")
return 0
else:
print("Fix the invalid yaml files above in order to start the Monitoring agent. "
"A useful external tool for yaml parsing can be found at "
"http://yaml-online-parser.appspot.com/")
return 1
elif 'jmx' == command:
if len(args) < 2 or args[1] not in jmxfetch.JMX_LIST_COMMANDS.keys():
print("#" * 80)
print("JMX tool to be used to help configure your JMX checks.")
print("See http://docs.datadoghq.com/integrations/java/ for more information")
print("#" * 80)
print("\n")
print("You have to specify one of the following commands:")
for command, desc in jmxfetch.JMX_LIST_COMMANDS.iteritems():
print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr")
print("\n")
else:
jmx_command = args[1]
checks_list = args[2:]
paths = util.Paths()
confd_path = paths.get_confd_path()
# Start JMXFetch if needed
should_run = jmxfetch.JMXFetch.init(confd_path,
config,
15,
jmx_command,
checks_list,
reporter="console")
if not should_run:
print("Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_path)
print("Have you enabled any JMX checks ?")
return 0
0
Example 170
Project: GoAgent-Always-Available Source File: gae.py
def application(environ, start_response):
if environ['REQUEST_METHOD'] == 'GET' and 'HTTP_X_URLFETCH_PS1' not in environ:
timestamp = long(os.environ['CURRENT_VERSION_ID'].split('.')[1])/2**28
ctime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(timestamp+8*3600))
start_response('200 OK', [('Content-Type', 'text/plain')])
yield 'GoAgent Python Server %s works, deployed at %s\n' % (__version__, ctime)
if len(__password__) > 2:
yield 'Password: %s%s%s' % (__password__[0], '*'*(len(__password__)-2), __password__[-1])
raise StopIteration
start_response('200 OK', [('Content-Type', 'image/gif')])
if environ['REQUEST_METHOD'] == 'HEAD':
raise StopIteration
options = environ.get('HTTP_X_URLFETCH_OPTIONS', '')
if 'rc4' in options and not __password__:
yield format_response(400, {'Content-Type': 'text/html; charset=utf-8'}, message_html('400 Bad Request', 'Bad Request (options) - please set __password__ in gae.py', 'please set __password__ and upload gae.py again'))
raise StopIteration
try:
if 'HTTP_X_URLFETCH_PS1' in environ:
payload = inflate(base64.b64decode(environ['HTTP_X_URLFETCH_PS1']))
body = inflate(base64.b64decode(environ['HTTP_X_URLFETCH_PS2'])) if 'HTTP_X_URLFETCH_PS2' in environ else ''
else:
wsgi_input = environ['wsgi.input']
input_data = wsgi_input.read(int(environ.get('CONTENT_LENGTH', '0')))
if 'rc4' in options:
input_data = RC4Cipher(__password__).encrypt(input_data)
payload_length, = struct.unpack('!h', input_data[:2])
payload = inflate(input_data[2:2+payload_length])
body = input_data[2+payload_length:]
raw_response_line, payload = payload.split('\r\n', 1)
method, url = raw_response_line.split()[:2]
headers = {}
for line in payload.splitlines():
key, value = line.split(':', 1)
headers[key.title()] = value.strip()
except (zlib.error, KeyError, ValueError):
import traceback
yield format_response(500, {'Content-Type': 'text/html; charset=utf-8'}, message_html('500 Internal Server Error', 'Bad Request (payload) - Possible Wrong Password', '<pre>%s</pre>' % traceback.format_exc()))
raise StopIteration
kwargs = {}
any(kwargs.__setitem__(x[len('x-urlfetch-'):].lower(), headers.pop(x)) for x in headers.keys() if x.lower().startswith('x-urlfetch-'))
if 'Content-Encoding' in headers and body:
if headers['Content-Encoding'] == 'deflate':
body = inflate(body)
headers['Content-Length'] = str(len(body))
del headers['Content-Encoding']
logging.info('%s "%s %s %s" - -', environ['REMOTE_ADDR'], method, url, 'HTTP/1.1')
if __password__ and __password__ != kwargs.get('password', ''):
yield format_response(403, {'Content-Type': 'text/html; charset=utf-8'}, message_html('403 Wrong password', 'Wrong password(%r)' % kwargs.get('password', ''), 'GoAgent proxy.ini password is wrong!'))
raise StopIteration
netloc = urlparse.urlparse(url).netloc
if __hostsdeny__ and netloc.endswith(__hostsdeny__):
yield format_response(403, {'Content-Type': 'text/html; charset=utf-8'}, message_html('403 Hosts Deny', 'Hosts Deny(%r)' % netloc, detail='url=%r' % url))
raise StopIteration
if len(url) > MAX_URL_LENGTH:
yield format_response(400, {'Content-Type': 'text/html; charset=utf-8'}, message_html('400 Bad Request', 'length of URL too long(greater than %r)' % MAX_URL_LENGTH, detail='url=%r' % url))
raise StopIteration
if netloc.startswith(('127.0.0.', '::1', 'localhost')):
yield format_response(400, {'Content-Type': 'text/html; charset=utf-8'}, message_html('GoAgent %s is Running' % __version__, 'Now you can visit some websites', ''.join('<a href="https://%s/">%s</a><br/>' % (x, x) for x in ('google.com', 'mail.google.com'))))
raise StopIteration
fetchmethod = getattr(urlfetch, method, None)
if not fetchmethod:
yield format_response(405, {'Content-Type': 'text/html; charset=utf-8'}, message_html('405 Method Not Allowed', 'Method Not Allowed: %r' % method, detail='Method Not Allowed URL=%r' % url))
raise StopIteration
timeout = int(kwargs.get('timeout', URLFETCH_TIMEOUT))
validate_certificate = bool(int(kwargs.get('validate', 0)))
maxsize = int(kwargs.get('maxsize', 0))
# https://www.freebsdchina.org/forum/viewtopic.php?t=54269
accept_encoding = headers.get('Accept-Encoding', '') or headers.get('Bccept-Encoding', '')
errors = []
for i in xrange(int(kwargs.get('fetchmax', URLFETCH_MAX))):
try:
response = urlfetch.fetch(url, body, fetchmethod, headers, allow_truncated=False, follow_redirects=False, deadline=timeout, validate_certificate=validate_certificate)
break
except apiproxy_errors.OverQuotaError as e:
time.sleep(5)
except urlfetch.DeadlineExceededError as e:
errors.append('%r, timeout=%s' % (e, timeout))
logging.error('DeadlineExceededError(timeout=%s, url=%r)', timeout, url)
time.sleep(1)
timeout *= 2
except urlfetch.DownloadError as e:
errors.append('%r, timeout=%s' % (e, timeout))
logging.error('DownloadError(timeout=%s, url=%r)', timeout, url)
time.sleep(1)
timeout *= 2
except urlfetch.ResponseTooLargeError as e:
errors.append('%r, timeout=%s' % (e, timeout))
response = e.response
logging.error('ResponseTooLargeError(timeout=%s, url=%r) response(%r)', timeout, url, response)
m = re.search(r'=\s*(\d+)-', headers.get('Range') or headers.get('range') or '')
if m is None:
headers['Range'] = 'bytes=0-%d' % (maxsize or URLFETCH_MAXSIZE)
else:
headers.pop('Range', '')
headers.pop('range', '')
start = int(m.group(1))
headers['Range'] = 'bytes=%s-%d' % (start, start+(maxsize or URLFETCH_MAXSIZE))
timeout *= 2
except urlfetch.SSLCertificateError as e:
errors.append('%r, should validate=0 ?' % e)
logging.error('%r, timeout=%s', e, timeout)
except Exception as e:
errors.append(str(e))
if i == 0 and method == 'GET':
timeout *= 2
else:
error_string = '<br />\n'.join(errors)
if not error_string:
logurl = 'https://appengine.google.com/logs?&app_id=%s' % os.environ['APPLICATION_ID']
error_string = 'Internal Server Error. <p/>try <a href="javascript:window.location.reload(true);">refresh</a> or goto <a href="%s" target="_blank">appengine.google.com</a> for details' % logurl
yield format_response(502, {'Content-Type': 'text/html; charset=utf-8'}, message_html('502 Urlfetch Error', 'Python Urlfetch Error: %r' % method, error_string))
raise StopIteration
#logging.debug('url=%r response.status_code=%r response.headers=%r response.content[:1024]=%r', url, response.status_code, dict(response.headers), response.content[:1024])
status_code = int(response.status_code)
data = response.content
response_headers = response.headers
content_type = response_headers.get('content-type', '')
if status_code == 200 and maxsize and len(data) > maxsize and response_headers.get('accept-ranges', '').lower() == 'bytes' and int(response_headers.get('content-length', 0)):
status_code = 206
response_headers['Content-Range'] = 'bytes 0-%d/%d' % (maxsize-1, len(data))
data = data[:maxsize]
if status_code == 200 and 'content-encoding' not in response_headers and 512 < len(data) < URLFETCH_DEFLATE_MAXSIZE and content_type.startswith(('text/', 'application/json', 'application/javascript')):
if 'gzip' in accept_encoding:
response_headers['Content-Encoding'] = 'gzip'
compressobj = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
dataio = io.BytesIO()
dataio.write('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
dataio.write(compressobj.compress(data))
dataio.write(compressobj.flush())
dataio.write(struct.pack('<LL', zlib.crc32(data) & 0xFFFFFFFFL, len(data) & 0xFFFFFFFFL))
data = dataio.getvalue()
elif 'deflate' in accept_encoding:
response_headers['Content-Encoding'] = 'deflate'
data = deflate(data)
response_headers['Content-Length'] = str(len(data))
if 'rc4' not in options:
yield format_response(status_code, response_headers, '')
yield data
else:
cipher = RC4Cipher(__password__)
yield cipher.encrypt(format_response(status_code, response_headers, ''))
yield cipher.encrypt(data)
0
Example 171
Project: owtf Source File: owtf.py
def process_options(user_args):
try:
db_plugin = ServiceLocator.get_component("db_plugin")
valid_groups = db_plugin.GetAllGroups()
valid_types = db_plugin.GetAllTypes() + ['all', 'quiet']
arg = parse_options(user_args, valid_groups, valid_types)
except KeyboardInterrupt as e:
usage("Invalid OWTF option(s) %s" % e)
# Default settings:
profiles = {}
plugin_group = arg.PluginGroup
if arg.CustomProfile: # Custom profiles specified
# Quick pseudo-validation check
for profile in arg.CustomProfile.split(','):
chunks = profile.split(':')
if len(chunks) != 2 or not os.path.exists(chunks[1]):
usage("Invalid Profile")
else: # profile "ok" :)
profiles[chunks[0]] = chunks[1]
if arg.OnlyPlugins:
arg.OnlyPlugins, plugin_groups = get_plugins_from_arg(arg.OnlyPlugins)
try:
# Set Plugin Group according to plugin list specified
plugin_group = plugin_groups[0]
except IndexError:
usage("Please use either OWASP/OWTF codes or Plugin names")
logging.info("Defaulting Plugin Group to '%s' based on list of plugins supplied" % plugin_group)
if arg.ExceptPlugins:
arg.ExceptPlugins, plugin_groups = get_plugins_from_arg(arg.ExceptPlugins)
if arg.TOR_mode:
arg.TOR_mode = arg.TOR_mode.split(":")
if(arg.TOR_mode[0] == "help"):
from framework.http.proxy.tor_manager import TOR_manager
TOR_manager.msg_configure_tor()
exit(0)
if len(arg.TOR_mode) == 1:
if arg.TOR_mode[0] != "help":
usage("Invalid argument for TOR-mode")
elif len(arg.TOR_mode) != 5:
usage("Invalid argument for TOR-mode")
else:
# Enables OutboundProxy.
if arg.TOR_mode[0] == '':
outbound_proxy_ip = "127.0.0.1"
else:
outbound_proxy_ip = arg.TOR_mode[0]
if arg.TOR_mode[1] == '':
outbound_proxy_port = "9050" # default TOR port
else:
outbound_proxy_port = arg.TOR_mode[1]
arg.OutboundProxy = "socks://%s:%s" % (outbound_proxy_ip, outbound_proxy_port)
if arg.Botnet_mode: # Checking arguments
arg.Botnet_mode = arg.Botnet_mode.split(":")
if arg.Botnet_mode[0] == "miner" and len(arg.Botnet_mode) != 1:
usage("Invalid argument for Botnet mode\n Mode must be miner or list")
if arg.Botnet_mode[0] == "list":
if len(arg.Botnet_mode) != 2:
usage("Invalid argument for Botnet mode\n Mode must be miner or list")
if not os.path.isfile(os.path.expanduser(arg.Botnet_mode[1])):
usage("Error Proxy List not found! Please check the path.")
if arg.OutboundProxy:
arg.OutboundProxy = arg.OutboundProxy.split('://')
if len(arg.OutboundProxy) == 2:
arg.OutboundProxy = arg.OutboundProxy + arg.OutboundProxy.pop().split(':')
if arg.OutboundProxy[0] not in ["socks", "http"]:
usage("Invalid argument for Outbound Proxy")
else:
arg.OutboundProxy = arg.OutboundProxy.pop().split(':')
# OutboundProxy should be type://ip:port
if (len(arg.OutboundProxy) not in [2, 3]):
usage("Invalid argument for Outbound Proxy")
else: # Check if the port is an int.
try:
int(arg.OutboundProxy[-1])
except ValueError:
usage("Invalid port provided for Outbound Proxy")
if arg.InboundProxy:
arg.InboundProxy = arg.InboundProxy.split(':')
# InboundProxy should be (ip:)port:
if len(arg.InboundProxy) not in [1, 2]:
usage("Invalid argument for Inbound Proxy")
else:
try:
int(arg.InboundProxy[-1])
except ValueError:
usage("Invalid port for Inbound Proxy")
plugin_types_for_group = db_plugin.GetTypesForGroup(plugin_group)
if arg.PluginType == 'all':
arg.PluginType = plugin_types_for_group
elif arg.PluginType == 'quiet':
arg.PluginType = ['passive', 'semi_passive']
scope = arg.Targets or [] # Arguments at the end are the URL target(s)
num_targets = len(scope)
if plugin_group != 'auxiliary' and num_targets == 0 and not arg.list_plugins:
# TODO: Fix this
pass
elif num_targets == 1: # Check if this is a file
if os.path.isfile(scope[0]):
logging.info("Scope file: trying to load targets from it ..")
new_scope = []
for target in open(scope[0]).read().split("\n"):
CleanTarget = target.strip()
if not CleanTarget:
continue # Skip blank lines
new_scope.append(CleanTarget)
if len(new_scope) == 0: # Bad file
usage("Please provide a scope file (1 target x line)")
scope = new_scope
for target in scope:
if target[0] == "-":
usage("Invalid Target: " + target)
args = ''
if plugin_group == 'auxiliary':
# For auxiliary plugins, the scope are the parameters.
args = scope
# auxiliary plugins do not have targets, they have metasploit-like parameters.
scope = ['auxiliary']
return {
'list_plugins': arg.list_plugins,
'Force_Overwrite': arg.ForceOverwrite,
'Interactive': arg.Interactive == 'yes',
'Simulation': arg.Simulation,
'Scope': scope,
'argv': sys.argv,
'PluginType': arg.PluginType,
'OnlyPlugins': arg.OnlyPlugins,
'ExceptPlugins': arg.ExceptPlugins,
'InboundProxy': arg.InboundProxy,
'OutboundProxy': arg.OutboundProxy,
'OutboundProxyAuth': arg.OutboundProxyAuth,
'Profiles': profiles,
'PluginGroup': plugin_group,
'RPort': arg.RPort,
'PortWaves': arg.PortWaves,
'ProxyMode': arg.ProxyMode,
'TOR_mode': arg.TOR_mode,
'Botnet_mode': arg.Botnet_mode,
'nowebui': arg.nowebui,
'Args': args
}
0
Example 172
Project: GenomicConsensus Source File: arrow.py
def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
depthLimit, arrowConfig):
"""
High-level routine for calling the consensus for a
window of the genome given a cmp.h5.
Identifies the coverage contours of the window in order to
identify subintervals where a good consensus can be called.
Creates the desired "no evidence consensus" where there is
inadequate coverage.
"""
winId, winStart, winEnd = refWindow
logging.info("Arrow operating on %s" %
reference.windowToString(refWindow))
if options.fancyChunking:
# 1) identify the intervals with adequate coverage for arrow
# consensus; restrict to intervals of length > 10
alnHits = U.readsInWindow(alnFile, refWindow,
depthLimit=20000,
minMapQV=arrowConfig.minMapQV,
strategy="long-and-strand-balanced",
stratum=options.readStratum,
barcode=options.barcode)
starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
ends = np.fromiter((hit.tEnd for hit in alnHits), np.int)
intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage,
starts, ends, minLength=10)
coverageGaps = holes(refWindow, intervals)
allIntervals = sorted(intervals + coverageGaps)
if len(allIntervals) > 1:
logging.info("Usable coverage in %s: %r" %
(reference.windowToString(refWindow), intervals))
else:
allIntervals = [ (winStart, winEnd) ]
# 2) pull out the reads we will use for each interval
# 3) call consensusForAlignments on the interval
subConsensi = []
variants = []
for interval in allIntervals:
intStart, intEnd = interval
intRefSeq = referenceContig[intStart:intEnd]
subWin = subWindow(refWindow, interval)
windowRefSeq = referenceContig[intStart:intEnd]
alns = U.readsInWindow(alnFile, subWin,
depthLimit=depthLimit,
minMapQV=arrowConfig.minMapQV,
strategy="long-and-strand-balanced",
stratum=options.readStratum,
barcode=options.barcode)
clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)
if len([ a for a in clippedAlns
if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage:
logging.debug("%s: Reads being used: %s" %
(reference.windowToString(subWin),
" ".join([str(hit.readName) for hit in alns])))
alnsUsed = [] if options.reportEffectiveCoverage else None
css = U.consensusForAlignments(subWin,
intRefSeq,
clippedAlns,
arrowConfig,
alnsUsed=alnsUsed)
# Tabulate the coverage implied by these alignments, as
# well as the post-filtering ("effective") coverage
siteCoverage = U.coverageInWindow(subWin, alns)
effectiveSiteCoverage = U.coverageInWindow(subWin, alnsUsed) if options.reportEffectiveCoverage else None
variants_ = U.variantsFromConsensus(subWin, windowRefSeq,
css.sequence, css.confidence, siteCoverage, effectiveSiteCoverage,
options.aligner,
ai=None)
filteredVars = filterVariants(options.minCoverage,
options.minConfidence,
variants_)
# Annotate?
if options.annotateGFF:
annotateVariants(filteredVars, clippedAlns)
variants += filteredVars
# Dump?
maybeDumpEvidence = \
((options.dumpEvidence == "all") or
(options.dumpEvidence == "outliers") or
(options.dumpEvidence == "variants") and (len(variants) > 0))
if maybeDumpEvidence:
refId, refStart, refEnd = subWin
refName = reference.idToName(refId)
windowDirectory = os.path.join(
options.evidenceDirectory,
refName,
"%d-%d" % (refStart, refEnd))
ev = ArrowEvidence.fromConsensus(css)
if options.dumpEvidence != "outliers":
ev.save(windowDirectory)
elif (np.max(ev.delta) > 20):
# Mathematically I don't think we should be seeing
# deltas > 6 in magnitude, but let's just restrict
# attention to truly bonkers outliers.
ev.save(windowDirectory)
else:
css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
subWin, intRefSeq)
subConsensi.append(css)
# 4) glue the subwindow consensus objects together to form the
# full window consensus
css = join(subConsensi)
# 5) Return
return css, variants