logging.info

Here are the examples of the python api logging.info taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

172 Examples 7

Example 151

Project: jottalib Source File: cli.py
def download(argv=None):

    def download_jfsfile(remote_object, tofolder=None, checksum=False):
        'Helper function to get a jfsfile and store it in a local folder, optionally checksumming it. Returns boolean'
        if tofolder is None:
            tofolder = '.' # with no arguments, store in current dir
        total_size = remote_object.size
        if remote_object.state in (JFS.ProtoFile.STATE_CORRUPT, JFS.ProtoFile.STATE_INCOMPLETE):
            puts(colored.red('%s was NOT downloaded successfully - Incomplete file' % remote_file.name))
            return False
        topath = os.path.join(tofolder, remote_object.name)
        with open(topath, 'wb') as fh:
            bytes_read = 0
            puts(colored.white('Downloading: %s, size: %s \t' % (remote_object.name, 
                                                                 print_size(total_size, humanize=True))))   
            with ProgressBar(expected_size=total_size) as bar:
                for chunk_num, chunk in enumerate(remote_object.stream()):
                    fh.write(chunk)
                    bytes_read += len(chunk)
                    bar.show(bytes_read)
        if checksum:
            md5_lf = JFS.calculate_md5(open(topath, 'rb'))
            md5_jf = remote_object.md5
            logging.info('%s - Checksum for downloaded file' % md5_lf)
            logging.info('%s - Checksum for server file' % md5_jf)
            if md5_lf != md5_jf:
                puts(colored.blue('%s - Checksum for downloaded file' % md5_lf))
                puts(colored.blue('%s - Checksum for server file' % md5_jf))
                puts(colored.red('%s was NOT downloaded successfully - cheksum mismatch' % remote_object.name))
                return False
            puts(colored.green('%s was downloaded successfully - checksum  matched' % remote_object.name))
        return True

    if argv is None:
        argv = sys.argv[1:]
    parser = argparse.ArgumentParser(description='Download a file or folder from Jottacloud.')
    parser.add_argument('remoteobject',
                        help='The path to the file or folder that you want to download',
                       type=commandline_text)
    parser.add_argument('-l', '--loglevel',
                        help='Logging level. Default: %(default)s.',
                        choices=('debug', 'info', 'warning', 'error'),
                        default='warning')
    parser.add_argument('-c', '--checksum',
                        help='Verify checksum of file after download',
                        action='store_true' )
    #parser.add_argument('-r', '--resume',
    #                    help='Will not download the files again if it exist in path',
    #                    action='store_true' )
    args = parse_args_and_apply_logging_level(parser, argv)
    jfs = JFS.JFS()

    if args.remoteobject.startswith('//'):
        # break out of root_folder
        root_folder = jfs.rootpath
        item_path = posixpath.join(root_folder, args.remoteobject[2:])
    else:
        root_folder = get_root_dir(jfs).path
        item_path = posixpath.join(root_folder, args.remoteobject)

    logging.info('Root folder path: %s' % root_folder)
    logging.info('Command line path to object: %s' % args.remoteobject)
    logging.info('Jotta path to object: %s' % item_path)
    remote_object = jfs.getObject(item_path)
    if isinstance(remote_object, JFS.JFSFile):
        if download_jfsfile(remote_object, checksum=args.checksum):
            logging.info('%r downloaded successfully', remote_object.path)
            return True
        else:
            puts(colored.red('%r download failed' % remote_object.path))
            return False

    else: #if it's not a file it has to be a folder
        incomplete_files = [] #Create an list where we can store incomplete files
        checksum_error_files = [] #Create an list where we can store checksum error files
        zero_files = [] #Create an list where we can store zero files
        long_path = [] #Create an list where we can store skipped files and folders because of long path
        puts(colored.blue("Getting index for folder: %s" % remote_object.name))
        fileTree = remote_object.filedirlist().tree #Download the folder tree
        puts(colored.blue('Total number of folders to download: %d' % len(fileTree)))
        topdir = os.path.dirname(item_path)
        logging.info("topdir: %r", topdir)

        #Iterate through each folder
        for folder in fileTree:
            #We need to strip the path to the folder path from account,device and mountpoint details
            logging.debug("folder: %r", folder)

            _abs_folder_path = posixpath.join(JFS.JFS_ROOT, folder[1:])
            logging.debug("absolute folder path  : %r", _abs_folder_path)
            _rel_folder_path = _abs_folder_path[len(topdir)+1:]
            logging.info('relative folder path: %r', _rel_folder_path)

            if len(_rel_folder_path) > 250: #Windows has a limit of 250 characters in path
                puts(colored.red('%s was NOT downloaded successfully - path too long' % _rel_folder_path))
                long_path.append(_rel_folder_path)
            else:
                logging.info('Entering a new folder: %s' % _rel_folder_path)
                if not os.path.exists(_rel_folder_path): #Create the folder locally if it doesn't exist
                    os.makedirs(_rel_folder_path)
                for _file in fileTree[folder]: #Enter the folder and download the files within
                    logging.info("file: %r", _file)
                    #This is the absolute path to the file that is going to be downloaded
                    abs_path_to_object = posixpath.join(topdir, _rel_folder_path, _file.name)
                    logging.info('Downloading the file from: %s' % abs_path_to_object)
                    if _file.state in (JFS.ProtoFile.STATE_CORRUPT, JFS.ProtoFile.STATE_INCOMPLETE):
                        #Corrupt and incomplete files will be skipped
                        puts(colored.red('%s was NOT downloaded successfully - Incomplete or corrupt file' % _file.name))
                        incomplete_files.append(posixpath.join(_rel_folder_path,_file.name))
                        continue
                    remote_object = jfs.getObject(abs_path_to_object)
                    remote_file = remote_object
                    total_size = remote_file.size
                    if total_size == 0: # Indicates an zero file
                        puts(colored.red('%s was NOT downloaded successfully - zero file' % remote_file.name))
                        zero_files.append(posixpath.join(_rel_folder_path,remote_file.name))
                        continue
                    if len(posixpath.join(_rel_folder_path,remote_file.name)) > 250: #Windows has a limit of 250 characters in path
                        puts(colored.red('%s was NOT downloaded successfully - path too long' % remote_file.name))
                        long_path.append(posixpath.join(_rel_folder_path,remote_file.name))
                        continue
                    #TODO: implement args.resume:
                    if not download_jfsfile(remote_file, tofolder=_rel_folder_path, checksum=args.checksum):
                        # download failed
                        puts(colored.red("Download failed: %r" % remote_file.path))
        #Incomplete files
        if len(incomplete_files)> 0:
            with codecs.open("incomplete_files.txt", "w", "utf-8") as text_file:
                for item in incomplete_files:
                    text_file.write("%s\n" % item)
        print('Incomplete files (not downloaded): %d' % len(incomplete_files))
        for _files in incomplete_files:
            logging.info("Incomplete: %r", _files)

        #Checksum error files
        if len(checksum_error_files)> 0:
            with codecs.open("checksum_error_files.txt", "w", "utf-8") as text_file:
                for item in checksum_error_files:
                    text_file.write("%s\n" % item)
        print('Files with checksum error (not downloaded): %d' % len(checksum_error_files))
        for _files in checksum_error_files:
            logging.info("Checksum error: %r", _files)

        #zero files
        if len(zero_files)> 0:
            with codecs.open("zero_files.txt", "w", "utf-8") as text_file:
                for item in zero_files:
                    text_file.write("%s\n" % item)
        print('Files with zero size (not downloaded): %d' % len(zero_files))
        for _files in zero_files:
            logging.info("Zero sized files: %r", _files)

        #long path
        if len(long_path)> 0:
            with codecs.open("long_path.txt", "w", "utf-8") as text_file:
                for item in long_path:
                    text_file.write("%s\n" % item)
        print('Folder and files not downloaded because of path too long: %d' % len(long_path))
        for _files in long_path:
            logging.info("Path too long: %r", _files)
        return True

Example 152

Project: Quasar Source File: splunk_pulsar_return.py
Function: returner
def returner(ret):
    # Customized to split up the change events and send to Splunk.
    opts = _get_options()
    logging.info('Options: %s' % json.dumps(opts))
    http_event_collector_key = opts['token']
    http_event_collector_host = opts['indexer']
    hec_ssl = opts['http_event_server_ssl']
    proxy = opts['proxy']
    timeout = opts['timeout']
    extras = opts['extras']
    # Set up the collector
    hec = http_event_collector(http_event_collector_key, http_event_collector_host, http_event_server_ssl=hec_ssl, proxy=proxy, timeout=timeout)
    # Check whether or not data is batched:
    if isinstance(ret, dict):  # Batching is disabled
        data = [ret]
    else:
        data = ret
    # Sometimes there are duplicate events in the list. Dedup them:
    data = _dedupList(data)
    minion_id = __opts__['id']
    fqdn = __grains__['fqdn']
    master = __grains__['master']
    try:
        fqdn_ip4 = __grains__['fqdn_ip4'][0]
    except IndexError:
        fqdn_ip4 = __grains__['ipv4'][0]

    for item in data:
        alert = item['return']
        event = {}
        payload = {}
        if('change' in alert):  # Linux, normal pulsar
            # The second half of the change will be '|IN_ISDIR' for directories
            change = alert['change'].split('|')[0]
            # Skip the IN_IGNORED events
            if change == 'IN_IGNORED':
                continue
            if len(alert['change'].split('|')) == 2:
                object_type = 'directory'
            else:
                object_type = 'file'

            actions = defaultdict(lambda: 'unknown')
            actions['IN_ACCESS'] = 'read'
            actions['IN_ATTRIB'] = 'acl_modified'
            actions['IN_CLOSE_NOWRITE'] = 'read'
            actions['IN_CLOSE_WRITE'] = 'read'
            actions['IN_CREATE'] = 'created'
            actions['IN_DELETE'] = 'deleted'
            actions['IN_DELETE_SELF'] = 'deleted'
            actions['IN_MODIFY'] = 'modified'
            actions['IN_MOVE_SELF'] = 'modified'
            actions['IN_MOVED_FROM'] = 'modified'
            actions['IN_MOVED_TO'] = 'modified'
            actions['IN_OPEN'] = 'read'
            actions['IN_MOVE'] = 'modified'
            actions['IN_CLOSE'] = 'read'

            event['action'] = actions[change]
            event['change_type'] = 'filesystem'
            event['object_category'] = object_type
            event['object_path'] = alert['path']
            event['file_name'] = alert['name']
            event['file_path'] = alert['tag']

            if alert['stats']:  # Gather more data if the change wasn't a delete
                stats = alert['stats']
                event['object_id'] = stats['inode']
                event['file_acl'] = stats['mode']
                event['file_create_time'] = stats['ctime']
                event['file_modify_time'] = stats['mtime']
                event['file_size'] = stats['size'] / 1024.0  # Convert bytes to kilobytes
                event['user'] = stats['user']
                event['group'] = stats['group']
                if object_type == 'file':
                    event['file_hash'] = alert['checksum']
                    event['file_hash_type'] = alert['checksum_type']

        else: # Windows, win_pulsar
            change = alert['Accesses']
            if alert['Hash'] == 'Item is a directory':
                object_type = 'directory'
            else:
                object_type = 'file'

            actions = defaultdict(lambda: 'unknown')
            actions['Delete'] = 'deleted'
            actions['Read Control'] = 'read'
            actions['Write DAC'] = 'acl_modified'
            actions['Write Owner'] = 'modified'
            actions['Synchronize'] = 'modified'
            actions['Access Sys Sec'] = 'read'
            actions['Read Data'] = 'read'
            actions['Write Data'] = 'modified'
            actions['Append Data'] = 'modified'
            actions['Read EA'] = 'read'
            actions['Write EA'] = 'modified'
            actions['Execute/Traverse'] = 'read'
            actions['Read Attributes'] = 'read'
            actions['Write Attributes'] = 'acl_modified'
            actions['Query Key Value'] = 'read'
            actions['Set Key Value'] = 'modified'
            actions['Create Sub Key'] = 'created'
            actions['Enumerate Sub-Keys'] = 'read'
            actions['Notify About Changes to Keys'] = 'read'
            actions['Create Link'] = 'created'
            actions['Print'] = 'read'

            event['action'] = actions[change]
            event['change_type'] = 'filesystem'
            event['object_category'] = object_type
            event['object_path'] = alert['Object Name']
            event['file_name'] = os.path.basename(alert['Object Name'])
            event['file_path'] = os.path.dirname(alert['Object Name'])
            # TODO: Should we be reporting 'EntryType' or 'TimeGenerated?
            #   EntryType reports whether attempt to change was successful.

        event.update({'master': master})
        event.update({'minion_id': minion_id})
        event.update({'dest_host': fqdn})
        event.update({'dest_ip': fqdn_ip4})

        for extra in extras:
            extra_name = 'custom_' + extra
            extra_value = __salt__['config.get'](extra, '')
            if isinstance(extra_value, str):
                event.update({extra_name: extra_value})

        payload.update({'host': fqdn})
        payload.update({'index': opts['index']})
        payload.update({'sourcetype': opts['sourcetype']})
        payload.update({'event': event})
        hec.batchEvent(payload)

    hec.flushBatch()
    return

Example 153

Project: dockit Source File: install.py
def talktoDocker(
        pulloption, baseimage, imagetag, numcontainers, dockerfile,
        dockerrepo, buildoption, startoption, gluster_mode,
        gluster_install, gluster_volume):

    new_image_tag = ''
    flag = flag1 = gluster_flag = 0
    cons_ids = []
    logger.debug(
        "Docker image name :%s \t Image Tag:%s \t number of Containers:%s",
        baseimage, imagetag, numcontainers)

    try:
        connret = dockit.DockerCli(
            "connect", pulloption, baseimage, imagetag, numcontainers,
            dockerfile, dockit_log_file, dockerrepo, buildoption)
        if connret:
            logger.info("Successfully connected to docker deamon: \n"
                        "\t \t \t pull/build/start containers accordingly.")

        else:
            logger.error("Connection return failed..exiting.")

            sys.exit(1)

        if pulloption:
            logger.debug("Proceeding with actions on Image:%s", baseimage)
            # if dockerrepo == None:
            #    logger.debug(
            #       "Base image pulling is not supported with "
            #       "this version of dockit \n"
            #       " please provide dockerrepo")
            #    sys.exit(1)
            pullret = connret.pullC()
            if pullret:
                logger.info("Done with pulling.. continuing")
                if dockerrepo and baseimage:
                    new_image_tag = dockerrepo+'/'+baseimage+':'+'latest'
                    flag1 = 1
                logger.debug("new_image_tag:%s", new_image_tag)
            else:
                logger.error("Error when pulling ")
        else:
            logger.info("Not trying to pull image:%s.. continuing", baseimage)
        if buildoption:
            logger.debug("Continuing build process with %s", dockerfile)

            built_image = connret.buildC()
            if built_image:
                logger.info(
                    " Image built from docker file :%s with id:%s and tag:%s",
                    built_image, built_image['Id'], built_image['RepoTags'])
                if imagetag:
                    logger.debug("Image tag:%s", imagetag)
                    new_image_tag = imagetag+':latest'
                    flag = 1
                logger.debug("new_image_tag:%s", new_image_tag)

            else:
                logger.error(
                    "Failed when building from docker file:\n"
                    "Check docker file path and options ")

        else:
            logger.debug("Not trying to build the image from docker file")

        if startoption:

            if flag or flag1:
                logger.debug("Flag:%s \t Flag1:%s image tag:\t %s",
                             flag, flag1, new_image_tag)

            else:
                if baseimage and imagetag:
                    new_image_tag = baseimage+':'+imagetag
                logger.debug("Using image tag :%s", new_image_tag)

            ret_exist = connret.image_by_tag(new_image_tag)

            if ret_exist:
                logger.debug("Image exists :%s with ID:%s  ",
                             ret_exist, ret_exist['Id'])
                logger.info("Going to run the containers")

                if gluster_mode:
                    if gluster_volume:
                        gluster_flag = 1
                    else:
                        gluster_flag = 0
                runret = connret.runC(
                    ret_exist['RepoTags'][0], gluster_flag, gluster_config, )
                if runret:
                    if not connret.container_ips:
                        logger.critical(
                            "Something went wrong when spawning "
                            "containers:exiting")
                        sys.exit(1)

                    logger.info(
                        "Containers are running successfully.."
                        "please login and work!!!!")
                    print (60 * '-')
                    logger.info("Details about running containers..\n")
                    logger.info(
                        "Container IPs \t : %s\n ", connret.container_ips)

                    for c in connret.cons_ids:
                        c_id = dict(connret.cons_ids[0])['Id']
                        cons_ids.append(c_id)
                    logger.info("Container Ids \t : %s \n ", cons_ids)
                    print (60 * '-')
                    # todo : Its possible to auto login to these containers
                    # via below , commenting it out for now
                    # loginC(connret.container_ips, connret.cons_ids)
                    if gluster_mode:
                        gluster_cli = create_vol.glusteractions()

                        if gluster_cli:
                            logger.debug("Successfully created gluster client")
                            run_helper.rh_config_dict[
                                'SERVER_IP_ADDRS'] = connret.container_ips
                        else:
                            logger.error("Failed to create gluster client")
                        run_helper.con_pass = getpass.getpass()
                        if gluster_install:
                            ginst = gluster_config.get(
                                'GLUSTER_VERSION', '3.5')
                            if ginst:
                                gluster_cli.gluster_install(ginst)
                            else:
                                logger.debug(
                                    "Failed to get Gluster Version from dict.")
                        else:
                            logger.info("Gluster installation not required")
                        if gluster_volume:

                            run_helper.rh_config_dict[
                                'VOL_TYPE'] = gluster_config['VOL_TYPE']
                            run_helper.rh_config_dict['SERVER_EXPORT_DIR'] = \
                                gluster_config['SERVER_EXPORT_DIR']
                            run_helper.rh_config_dict['TRANS_TYPE'] = 'tcp'
                            run_helper.rh_config_dict[
                                'VOLNAME'] = gluster_config['VOLNAME']
                            logger.debug(
                                "Successfully filled configuration details:%s",
                                run_helper.rh_config_dict)
                            gluster_cli.create_gluster_volume(start=True)
                            logging.info(
                                'Gluster Volume operations done! '
                                'Please mount volume :%s in your client',
                                gluster_config['VOLNAME'])
                        else:
                            logger.debug(
                                "Gluster Volume creation not required")
                    else:
                        logger.info("Done!")
                else:
                    logger.error("Failed when starting/inspecting containers")
            else:
                logger.error(
                    "Image + tag does not exist.. "
                    "I cant start container from this..exiting")

                sys.exit(1)
        else:
            logger.debug("Not trying to start containers..")
            logger.info("Dockit finished...")
            return True

    except Exception as e:
        logger.critical("Failed on :%s", e)
        sys.exit(1)

Example 154

Project: centinel Source File: cli.py
def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction,
              exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp):
    """
    For each VPN, check if there are experiments and scan with it if
    necessary

    Note: the expected directory structure is
    args.directory
    -----vpns (contains the OpenVPN config files
    -----configs (contains the Centinel config files)
    -----exps (contains the experiments directories)

    :param directory: root directory that contains vpn configs and
                      centinel client configs
    :param auth_file: a text file with username at first line and
                      password at second line
    :param crt_file: optional root certificate file
    :param tls_auth: additional key
    :param key_direction: must specify if tls_auth is used
    :param exclude_list: optional list of exluded countries
    :param shuffle_lists: shuffle vpn list if set true
    :param vm_num: number of VMs that are running currently
    :param vm_index: index of current VM
    :param reduce_vp: reduce number of vantage points
    :return:
    """

    logging.info("Starting to run the experiments for each VPN")
    logging.warn("Excluding vantage points from: %s" % exclude_list)

    # iterate over each VPN
    vpn_dir = return_abs_path(directory, "vpns")
    conf_dir = return_abs_path(directory, "configs")
    home_dir = return_abs_path(directory, "home")
    if auth_file is not None:
        auth_file = return_abs_path(directory, auth_file)
    if crt_file is not None:
        crt_file = return_abs_path(directory, crt_file)
    if tls_auth is not None:
        tls_auth = return_abs_path(directory, tls_auth)
    conf_list = sorted(os.listdir(conf_dir))

    # determine VPN provider
    vpn_provider = None
    if "hma" in directory:
        vpn_provider = "hma"
    elif "ipvanish" in directory:
        vpn_provider = "ipvanish"
    elif "purevpn" in directory:
        vpn_provider = "purevpn"
    elif "vpngate" in directory:
        vpn_provider = "vpngate"
    if vpn_provider:
        logging.info("Detected VPN provider is %s" % vpn_provider)
    else:
        logging.warning("Cannot determine VPN provider!")

    # reduce size of list if reduce_vp is true
    if reduce_vp:
        logging.info("Reducing list size. Original size: %d" % len(conf_list))
        country_asn_set = set()
        reduced_conf_set = set()
        for filename in conf_list:
            centinel_config = os.path.join(conf_dir, filename)
            config = centinel.config.Configuration()
            config.parse_config(centinel_config)
            vp_ip = os.path.splitext(filename)[0]

            try:
                meta = centinel.backend.get_meta(config.params, vp_ip)
                if 'country' in meta and 'as_number' in meta \
                        and meta['country'] and meta['as_number']:
                    country_asn = '_'.join([meta['country'], meta['as_number']])
                    if country_asn not in country_asn_set:
                        country_asn_set.add(country_asn)
                        reduced_conf_set.add(filename)
                else:
                    # run this endpoint if missing info
                    reduced_conf_set.add(filename)
            except:
                logging.warning("Failed to geolocate %s" % vp_ip)
                reduced_conf_set.add(filename)

        conf_list = list(reduced_conf_set)
        logging.info("List size reduced. New size: %d" % len(conf_list))

    # sort file list to ensure the same filename sequence in each VM
    conf_list = sorted(conf_list)

    # only select its own portion according to vm_num and vm_index
    chunk_size = len(conf_list) / vm_num
    last_chunk_additional = len(conf_list) % vm_num
    start_pointer = 0 + (vm_index - 1) * chunk_size
    end_pointer = start_pointer + chunk_size
    if vm_index == vm_num:
        end_pointer += last_chunk_additional
    conf_list = conf_list[start_pointer:end_pointer]

    if shuffle_lists:
        shuffle(conf_list)

    number = 1
    total = len(conf_list)

    external_ip = get_external_ip()
    if external_ip is None:
        logging.error("No network connection, exiting...")
        return

    # getting namesevers that should be excluded
    local_nameservers = dns.resolver.Resolver().nameservers

    for filename in conf_list:
        # Check network connection first
        time.sleep(5)
        logging.info("Checking network connectivity...")
        current_ip = get_external_ip()
        if current_ip is None:
            logging.error("Network connection lost!")
            break
        elif current_ip != external_ip:
            logging.error("VPN still connected! IP: %s" % current_ip)
            if len(openvpn.OpenVPN.connected_instances) == 0:
                logging.error("No active OpenVPN instance found! Exiting...")
                break
            else:
                logging.warn("Trying to disconnect VPN")
                for instance in openvpn.OpenVPN.connected_instances:
                    instance.stop()
                    time.sleep(5)

                current_ip = get_external_ip()
                if current_ip is None or current_ip != external_ip:
                    logging.error("Stopping VPN failed! Exiting...")
                    break

            logging.info("Disconnecting VPN successfully")

        # start centinel for this endpoint
        logging.info("Moving onto (%d/%d) %s" % (number, total, filename))

        number += 1
        vpn_config = os.path.join(vpn_dir, filename)
        centinel_config = os.path.join(conf_dir, filename)

        # before starting the VPN, check if there are any experiments
        # to run
        config = centinel.config.Configuration()
        config.parse_config(centinel_config)

        # assuming that each VPN config file has a name like:
        # [ip-address].ovpn, we can extract IP address from filename
        # and use it to geolocate and fetch experiments before connecting
        # to VPN.
        vpn_address, extension = os.path.splitext(filename)
        country = None
        try:
            meta = centinel.backend.get_meta(config.params,
                                             vpn_address)
            if 'country' in meta:
                country = meta['country']
        except:
            logging.exception("%s: Failed to geolocate %s" % (filename, vpn_address))

        if country and exclude_list and country in exclude_list:
            logging.info("%s: Skipping this server (%s)" % (filename, country))
            continue

        # try setting the VPN info (IP and country) to get appropriate
        # experiemnts and input data.
        try:
            centinel.backend.set_vpn_info(config.params, vpn_address, country)
        except Exception as exp:
            logging.exception("%s: Failed to set VPN info: %s" % (filename, exp))

        logging.info("%s: Synchronizing." % filename)
        try:
            centinel.backend.sync(config.params)
        except Exception as exp:
            logging.exception("%s: Failed to sync: %s" % (filename, exp))

        if not experiments_available(config.params):
            logging.info("%s: No experiments available." % filename)
            try:
                centinel.backend.set_vpn_info(config.params, vpn_address, country)
            except Exception as exp:
                logging.exception("Failed to set VPN info: %s" % exp)
            continue

        # add exclude_nameservers to scheduler
        sched_path = os.path.join(home_dir, filename, "experiments", "scheduler.info")
        if os.path.exists(sched_path):
            with open(sched_path, 'r+') as f:
                sched_info = json.load(f)
                for task in sched_info:
                    if "python_exps" in sched_info[task] and "baseline" in sched_info[task]["python_exps"]:
                        if "params" in sched_info[task]["python_exps"]["baseline"]:
                            sched_info[task]["python_exps"]["baseline"]["params"]["exclude_nameservers"] = \
                                local_nameservers
                        else:
                            sched_info[task]["python_exps"]["baseline"]["params"] = \
                                {"exclude_nameservers": local_nameservers}

                # write back to same file
                f.seek(0)
                json.dump(sched_info, f, indent=2)
                f.truncate()

        logging.info("%s: Starting VPN." % filename)

        vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config,
                              crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction)

        vpn.start()
        if not vpn.started:
            logging.error("%s: Failed to start VPN!" % filename)
            vpn.stop()
            time.sleep(5)
            continue

        logging.info("%s: Running Centinel." % filename)
        try:
            client = centinel.client.Client(config.params, vpn_provider)
            centinel.conf = config.params
            # do not use client logging config
            # client.setup_logging()
            client.run()
        except Exception as exp:
            logging.exception("%s: Error running Centinel: %s" % (filename, exp))

        logging.info("%s: Stopping VPN." % filename)
        vpn.stop()
        time.sleep(5)

        logging.info("%s: Synchronizing." % filename)
        try:
            centinel.backend.sync(config.params)
        except Exception as exp:
            logging.exception("%s: Failed to sync: %s" % (filename, exp))

        # try setting the VPN info (IP and country) to the correct address
        # after sync is over.
        try:
            centinel.backend.set_vpn_info(config.params, vpn_address, country)
        except Exception as exp:
            logging.exception("Failed to set VPN info: %s" % exp)

Example 155

Project: pychemqt Source File: qtelemental.py
    def __init__(self, parent=None):
        super(qtelemental, self).__init__(parent)
        self.setWindowIcon(QtGui.QIcon(QtGui.QPixmap(
            os.environ["pychemqt"]+"/images/button/PeriodicTableIcon.png")))
        self.setWindowTitle(
            QtWidgets.QApplication.translate("pychemqt", "Periodic Table"))
        layout = QtWidgets.QGridLayout(self)
        layout.setSpacing(2)
        for i in range(1, 119):
            element = Elemental(i)
            b = boton(element, self)
            if element.group == 0:
                if i < 80:
                    j = i-58
                else:
                    j = i-90
                layout.addWidget(b, element.period+4, j+4, 1, 1)
            elif i == 57 or i == 89:
                layout.addWidget(b, element.period+4, element.group, 1, 1)
            else:
                layout.addWidget(b, element.period, element.group, 1, 1)
        layout.addItem(QtWidgets.QSpacerItem(
            10, 10, QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed),
            8, 0, 1, 20)
        layout.addItem(QtWidgets.QSpacerItem(
            10, 10, QtWidgets.QSizePolicy.Expanding,
            QtWidgets.QSizePolicy.Expanding), 12, 0, 1, 20)
        asterisco = QtWidgets.QLabel("*")
        asterisco.setFont(font20)
        asterisco.setAlignment(alignment)
        layout.addWidget(asterisco, 6, 3)
        asterisco2 = QtWidgets.QLabel("**")
        asterisco2.setFont(font20)
        asterisco2.setAlignment(alignment)
        layout.addWidget(asterisco2, 7, 3)
        asterisco_ = QtWidgets.QLabel("*")
        asterisco_.setFont(font20)
        asterisco_.setAlignment(alignment)
        layout.addWidget(asterisco_, 10, 2)
        asterisco2_ = QtWidgets.QLabel("**")
        asterisco2_.setFont(font20)
        asterisco2_.setAlignment(alignment)
        layout.addWidget(asterisco2_, 11, 2)

        self.Info = QtWidgets.QFrame()
        layout.addWidget(self.Info, 0, 5, 3, 3)
        layoutInfo = QtWidgets.QGridLayout(self.Info)
        layoutInfo.setSpacing(1)
        layoutInfo.setContentsMargins(2, 0, 2, 0)
        self.Info.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.Info.setFrameShadow(QtWidgets.QFrame.Raised)
        self.Info.setAutoFillBackground(True)
        self.Info.setPalette(palette)
        self.numero_atomico = QtWidgets.QLabel()
        self.numero_atomico.setToolTip(
            QtWidgets.QApplication.translate("pychemqt", "Atomic number"))
        layoutInfo.addWidget(self.numero_atomico, 1, 1)
        self.simbolo = QtWidgets.QLabel()
        self.simbolo.setAlignment(alignment)
        self.simbolo.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Symbol"))
        self.simbolo.setFont(font11)
        layoutInfo.addWidget(self.simbolo, 1, 3)
        self.nombre = QtWidgets.QLabel()
        self.nombre.setAlignment(QtCore.Qt.AlignCenter)
        self.nombre.setFont(font_title)
        layoutInfo.addWidget(self.nombre, 2, 1, 1, 3)
        font8 = QtGui.QFont()
        font8.setPointSize(8)
        self.peso_atomico = QtWidgets.QLabel()
        self.peso_atomico.setFont(font8)
        self.peso_atomico.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Atomic mass, g/mol"))
        layoutInfo.addWidget(self.peso_atomico, 3, 1)
        self.densidad = QtWidgets.QLabel()
        self.densidad.setFont(font8)
        self.densidad.setAlignment(alignment)
        self.densidad.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt",
            "Density:\nBrown: Solid, kg/l\nBlue: Liquid, kg/l\n"
            "Green: Gas, g/l"))
        layoutInfo.addWidget(self.densidad, 3, 3)
        self.Tf = QtWidgets.QLabel()
        self.Tf.setFont(font8)
        self.Tf.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Melting Point, K"))
        layoutInfo.addWidget(self.Tf, 4, 1)
        self.Heat_f = QtWidgets.QLabel()
        self.Heat_f.setFont(font8)
        self.Heat_f.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Heat of fusion, kJmol"))
        self.Heat_f.setAlignment(alignment)
        layoutInfo.addWidget(self.Heat_f, 4, 3)
        self.Tb = QtWidgets.QLabel()
        self.Tb.setFont(font8)
        self.Tb.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Boiling Point, K"))
        layoutInfo.addWidget(self.Tb, 5, 1)
        self.Heat_b = QtWidgets.QLabel()
        self.Heat_b.setFont(font8)
        self.Heat_b.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Heat of vaporization, kJmol"))
        self.Heat_b.setAlignment(alignment)
        layoutInfo.addWidget(self.Heat_b, 5, 3)

        self.configuracion = QtWidgets.QLabel()
        self.configuracion.setFont(font7)
        self.configuracion.setAlignment(QtCore.Qt.AlignCenter)
        self.configuracion.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Electronic configuration"))
        layoutInfo.addWidget(self.configuracion, 6, 1, 1, 3)

        self.Info2 = QtWidgets.QFrame()
        layout.addWidget(self.Info2, 0, 8, 3, 3)
        layoutInfo2 = QtWidgets.QGridLayout(self.Info2)
        layoutInfo2.setSpacing(1)
        layoutInfo2.setContentsMargins(2, 0, 2, 0)
        self.Info2.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.Info2.setFrameShadow(QtWidgets.QFrame.Raised)
        self.Info2.setAutoFillBackground(True)
        self.Info2.setPalette(palette)
        self.atomic_volume = QtWidgets.QLabel()
        self.atomic_volume.setFont(font8)
        self.atomic_volume.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Atomic volume")+", cm³/mol")
        layoutInfo2.addWidget(self.atomic_volume, 1, 1)
        self.atomic_radius = QtWidgets.QLabel()
        self.atomic_radius.setFont(font8)
        self.atomic_radius.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Atomic radius") + ", pm")
        layoutInfo2.addWidget(self.atomic_radius, 2, 1)
        self.covalent_radius = QtWidgets.QLabel()
        self.covalent_radius.setFont(font8)
        self.covalent_radius.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Covalent radius") + ", pm")
        layoutInfo2.addWidget(self.covalent_radius, 3, 1)
        self.vanderWaals_radius = QtWidgets.QLabel()
        self.vanderWaals_radius.setFont(font8)
        self.vanderWaals_radius.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Van der Waals radius")+", pm")
        layoutInfo2.addWidget(self.vanderWaals_radius, 4, 1)
        self.ionic_radii = QtWidgets.QLabel()
        self.ionic_radii.setFont(font7)
        self.ionic_radii.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Ionic radii")+", pm")
        layoutInfo2.addWidget(self.ionic_radii, 5, 1, 1, 3)
        self.electronegativity = QtWidgets.QLabel()
        self.electronegativity.setFont(font8)
        self.electronegativity.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Electronegativity, Pauling scale"))
        self.electronegativity.setAlignment(
            QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
        layoutInfo2.addWidget(self.electronegativity, 1, 3)
        self.Cp = QtWidgets.QLabel()
        self.Cp.setFont(font8)
        self.Cp.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Specific heat capacitiy") + ", kJ/kgK")
        self.Cp.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
        layoutInfo2.addWidget(self.Cp, 2, 3)
        self.k = QtWidgets.QLabel()
        self.k.setFont(font8)
        self.k.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Thermal conductivity") + ", W/mK")
        self.k.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
        layoutInfo2.addWidget(self.k, 3, 3)
        self.first_ionization = QtWidgets.QLabel()
        self.first_ionization.setFont(font8)
        self.first_ionization.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "First ionization energy") + ", kJ/mol")
        self.first_ionization.setAlignment(
            QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter)
        layoutInfo2.addWidget(self.first_ionization, 4, 3)

        self.oxidation = QtWidgets.QLabel()
        self.oxidation.setFont(font8)
        self.oxidation.setToolTip(QtWidgets.QApplication.translate(
            "pychemqt", "Oxidation states"))
        self.oxidation.setAlignment(
            QtCore.Qt.AlignCenter | QtCore.Qt.AlignVCenter)
        layoutInfo2.addWidget(self.oxidation, 6, 1, 1, 3)

        elemento = Elemental(1)
        self.actualizar(elemento)
        logging.info(QtWidgets.QApplication.translate(
            "pychemqt", "Starting periodic table tool"))

Example 156

Project: disco-dop Source File: runexp.py
def getgrammars(trees, sents, stages, testmaxwords, resultdir,
		numproc, lexmodel, simplelexsmooth, top):
	"""Read off the requested grammars."""
	tbfanout, n = treetransforms.treebankfanout(trees)
	logging.info('binarized treebank fan-out: %d #%d', tbfanout, n)
	mappings = [None for _ in stages]
	for n, stage in enumerate(stages):
		traintrees = trees
		stage.mapping = None
		prevn = 0
		if n and stage.prune:
			prevn = [a.name for a in stages].index(stage.prune)
		if stage.split:
			traintrees = [treetransforms.binarize(
					treetransforms.splitdiscnodes(
						tree.copy(True),
						stage.markorigin),
					childchar=':', dot=True, ids=grammar.UniqueIDs())
					for tree in traintrees]
			logging.info('splitted discontinuous nodes')
		if stage.collapse:
			traintrees, mappings[n] = treebanktransforms.collapselabels(
					[tree.copy(True) for tree in traintrees],
					tbmapping=treebanktransforms.MAPPINGS[
						stage.collapse[0]][stage.collapse[1]])
			logging.info('collapsed phrase labels for multilevel '
					'coarse-to-fine parsing to %s level %d',
					*stage.collapse)
		if n and mappings[prevn] is not None:
			# Given original labels A, convert CTF mapping1 A => C,
			# and mapping2 A => B to a mapping B => C.
			mapping1, mapping2 = mappings[prevn], mappings[n]
			if mappings[n] is None:
				stage.mapping = {a: mapping1[a] for a in mapping1}
			else:
				stage.mapping = {mapping2[a]: mapping1[a] for a in mapping2}
		if stage.mode.startswith('pcfg'):
			if tbfanout != 1 and not stage.split:
				raise ValueError('Cannot extract PCFG from treebank '
						'with discontinuities.')
		backtransform = extrarules = None
		if lexmodel and simplelexsmooth:
			extrarules = lexicon.simplesmoothlexicon(lexmodel)
		if stage.mode == 'mc-rerank':
			from . import _fragments
			gram = parser.DictObj(_fragments.getctrees(zip(trees, sents)))
			tree = gram.trees1.extract(0, gram.vocab)
			gram.start = tree[:tree.index(' ')].lstrip('(')
			with gzip.open('%s/%s.train.pickle.gz' % (resultdir, stage.name),
					'wb') as out:
				out.write(pickle.dumps(gram, protocol=-1))
		elif stage.dop:
			if stage.dop in ('doubledop', 'dop1'):
				if stage.dop == 'doubledop':
					(xgrammar, backtransform,
							altweights, fragments) = grammar.doubledop(
							traintrees, sents, binarized=stage.binarized,
							iterate=stage.iterate, complement=stage.complement,
							numproc=numproc, maxdepth=stage.maxdepth,
							maxfrontier=stage.maxfrontier,
							extrarules=extrarules)
				elif stage.dop == 'dop1':
					(xgrammar, backtransform,
							altweights, fragments) = grammar.dop1(
							traintrees, sents, binarized=stage.binarized,
							maxdepth=stage.maxdepth,
							maxfrontier=stage.maxfrontier,
							extrarules=extrarules)
				# dump fragments
				with codecs.getwriter('utf8')(gzip.open('%s/%s.fragments.gz' %
						(resultdir, stage.name), 'w')) as out:
					out.writelines('%s\t%d\n' % (a, len(b))
							for a, b in fragments)
			elif stage.dop == 'reduction':
				xgrammar, altweights = grammar.dopreduction(
						traintrees, sents, packedgraph=stage.packedgraph,
						extrarules=extrarules)
			else:
				raise ValueError('unrecognized DOP model: %r' % stage.dop)
			nodes = sum(len(list(a.subtrees())) for a in traintrees)
			if lexmodel and not simplelexsmooth:  # FIXME: altweights?
				xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
			msg = grammar.grammarinfo(xgrammar)
			rules, lex = grammar.writegrammar(
					xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
			with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
					resultdir, stage.name), 'wb')) as rulesfile:
				rulesfile.write(rules)
			with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
					resultdir, stage.name), 'wb')) as lexiconfile:
				lexiconfile.write(lex)
			gram = Grammar(rules, lex, start=top,
					binarized=stage.binarized)
			for name in altweights:
				gram.register('%s' % name, altweights[name])
			logging.info('DOP model based on %d sentences, %d nodes, '
				'%d nonterminals', len(traintrees), nodes, len(gram.toid))
			logging.info(msg)
			if stage.estimator != 'rfe':
				gram.switch('%s' % stage.estimator)
			logging.info(gram.testgrammar()[1])
			if stage.dop in ('doubledop', 'dop1'):
				# backtransform keys are line numbers to rules file;
				# to see them together do:
				# $ paste <(zcat dop.rules.gz) <(zcat dop.backtransform.gz)
				with codecs.getwriter('utf8')(gzip.open(
						'%s/%s.backtransform.gz' % (resultdir, stage.name),
						'wb')) as out:
					out.writelines('%s\n' % a for a in backtransform)
				if n and stage.prune:
					msg = gram.getmapping(stages[prevn].grammar,
							striplabelre=None if stages[prevn].dop
								else re.compile('@.+$'),
							neverblockre=re.compile('.+}<'),
							splitprune=stage.splitprune and stages[prevn].split,
							markorigin=stages[prevn].markorigin,
							mapping=stage.mapping)
				else:
					# recoverfragments() relies on this mapping to identify
					# binarization nodes
					msg = gram.getmapping(None,
							striplabelre=None,
							neverblockre=re.compile('.+}<'),
							splitprune=False, markorigin=False,
							mapping=stage.mapping)
				logging.info(msg)
			elif n and stage.prune:  # dop reduction
				msg = gram.getmapping(stages[prevn].grammar,
						striplabelre=None if stages[prevn].dop
							and stages[prevn].dop not in ('doubledop', 'dop1')
							else re.compile('@[-0-9]+$'),
						neverblockre=re.compile(stage.neverblockre)
							if stage.neverblockre else None,
						splitprune=stage.splitprune and stages[prevn].split,
						markorigin=stages[prevn].markorigin,
						mapping=stage.mapping)
				if stage.mode == 'dop-rerank':
					gram.getrulemapping(
							stages[prevn].grammar, re.compile(r'@[-0-9]+\b'))
				logging.info(msg)
			# write prob models
			np.savez_compressed('%s/%s.probs.npz' % (resultdir, stage.name),
					**{name: mod for name, mod
						in zip(gram.modelnames, gram.models)})
		else:  # not stage.dop
			xgrammar = grammar.treebankgrammar(traintrees, sents,
					extrarules=extrarules)
			logging.info('induced %s based on %d sentences',
				('PCFG' if tbfanout == 1 or stage.split else 'PLCFRS'),
				len(traintrees))
			if stage.split or os.path.exists('%s/pcdist.txt' % resultdir):
				logging.info(grammar.grammarinfo(xgrammar))
			else:
				logging.info(grammar.grammarinfo(xgrammar,
						dump='%s/pcdist.txt' % resultdir))
			if lexmodel and not simplelexsmooth:
				xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
			rules, lex = grammar.writegrammar(
					xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
			with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
					resultdir, stage.name), 'wb')) as rulesfile:
				rulesfile.write(rules)
			with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
					resultdir, stage.name), 'wb')) as lexiconfile:
				lexiconfile.write(lex)
			gram = Grammar(rules, lex, start=top)
			logging.info(gram.testgrammar()[1])
			if n and stage.prune:
				msg = gram.getmapping(stages[prevn].grammar,
					striplabelre=None,
					neverblockre=re.compile(stage.neverblockre)
						if stage.neverblockre else None,
					splitprune=stage.splitprune and stages[prevn].split,
					markorigin=stages[prevn].markorigin,
					mapping=stage.mapping)
				logging.info(msg)
		logging.info('wrote grammar to %s/%s.{rules,lex%s}.gz',
				resultdir, stage.name,
				',backtransform' if stage.dop in ('doubledop', 'dop1') else '')

		outside = None
		if stage.estimates in ('SX', 'SXlrgaps'):
			if stage.estimates == 'SX' and tbfanout != 1 and not stage.split:
				raise ValueError('SX estimate requires PCFG.')
			elif stage.mode != 'plcfrs':
				raise ValueError('estimates require parser w/agenda.')
			begin = time.clock()
			logging.info('computing %s estimates', stage.estimates)
			if stage.estimates == 'SX':
				outside = estimates.getpcfgestimates(gram, testmaxwords,
						gram.toid[trees[0].label])
			elif stage.estimates == 'SXlrgaps':
				outside = estimates.getestimates(gram, testmaxwords,
						gram.toid[trees[0].label])
			logging.info('estimates done. cpu time elapsed: %gs',
					time.clock() - begin)
			np.savez_compressed('%s/%s.outside.npz' % (
					resultdir, stage.name), outside=outside)
			logging.info('saved %s estimates', stage.estimates)
		elif stage.estimates:
			raise ValueError('unrecognized value; specify SX or SXlrgaps.')

		stage.update(grammar=gram, backtransform=backtransform,
				outside=outside)

	if any(stage.mapping is not None for stage in stages):
		with codecs.getwriter('utf8')(gzip.open('%s/mapping.json.gz' % (
				resultdir), 'wb')) as mappingfile:
			mappingfile.write(json.dumps([stage.mapping for stage in stages]))

Example 157

Project: Nuitka Source File: MainControl.py
def makeSourceDirectory(main_module):
    """ Get the full list of modules imported, create code for all of them.

    """
    # We deal with a lot of details here, but rather one by one, and split makes
    # no sense, pylint: disable=R0912,R0914

    assert main_module.isCompiledPythonModule()

    # The global context used to generate code.
    global_context = CodeGeneration.makeGlobalContext()

    assert main_module in ModuleRegistry.getDoneModules()

    # We might have chosen to include it as bytecode, and only compiled it for
    # fun, and to find its imports. In this case, now we just can drop it. Or
    # a module may shadow a frozen module, but be a different one, then we can
    # drop the frozen one.
    # TODO: This really should be done when the compiled module comes into
    # existence.
    for module in ModuleRegistry.getDoneUserModules():
        if module.isCompiledPythonModule():
            uncompiled_module = ModuleRegistry.getUncompiledModule(
                module_name     = module.getFullName(),
                module_filename = module.getCompileTimeFilename()
            )

            if uncompiled_module is not None:
                # We now need to decide which one to keep, compiled or uncompiled
                # module. Some uncompiled modules may have been asked by the user
                # or technically required. By default, frozen code if it exists
                # is preferred, as it will be from standalone mode adding it.
                if uncompiled_module.isUserProvided():
                    ModuleRegistry.removeDoneModule(module)
                else:
                    ModuleRegistry.removeUncompiledModule(uncompiled_module)

    # Lets check if the recurse-to modules are actually present, and warn the
    # user if one of those was not found.
    for any_case_module in Options.getShallFollowModules():
        for module in ModuleRegistry.getDoneUserModules():
            if module.getFullName() == any_case_module:
                break
        else:
            warning(
                "Didn't recurse to '%s', apparently not used." % \
                any_case_module
            )

    # Prepare code generation, i.e. execute finalization for it.
    for module in ModuleRegistry.getDoneModules():
        if module.isCompiledPythonModule():
            Finalization.prepareCodeGeneration(module)

    # Pick filenames.
    source_dir = getSourceDirectoryPath(main_module)

    module_filenames = pickSourceFilenames(
        source_dir = source_dir,
        modules    = ModuleRegistry.getDoneModules()
    )

    # First pass, generate code and use constants doing so, but prepare the
    # final code generation only, because constants code will be added at the
    # end only.
    prepared_modules = {}

    for module in ModuleRegistry.getDoneModules():
        if module.isCompiledPythonModule():
            cpp_filename = module_filenames[module]

            prepared_modules[cpp_filename] = CodeGeneration.prepareModuleCode(
                global_context = global_context,
                module         = module,
                module_name    = module.getFullName(),
            )

            # Main code constants need to be allocated already too.
            if module is main_module and not Options.shallMakeModule():
                prepared_modules[cpp_filename][1].getConstantCode(0)

    # Second pass, generate the actual module code into the files.
    for module in ModuleRegistry.getDoneModules():
        if module.isCompiledPythonModule():
            cpp_filename = module_filenames[module]

            template_values, module_context = prepared_modules[cpp_filename]

            source_code = CodeGeneration.generateModuleCode(
                module_context  = module_context,
                template_values = template_values
            )

            writeSourceCode(
                filename    = cpp_filename,
                source_code = source_code
            )

            if Options.isShowInclusion():
                info("Included compiled module '%s'." % module.getFullName())
        elif module.isPythonShlibModule():
            target_filename = Utils.joinpath(
                getStandaloneDirectoryPath(main_module),
                *module.getFullName().split('.')
            )

            if Utils.getOS() == "Windows":
                target_filename += ".pyd"
            else:
                target_filename += ".so"

            target_dir = Utils.dirname(target_filename)

            if not Utils.isDir(target_dir):
                Utils.makePath(target_dir)

            shutil.copy(
                module.getFilename(),
                target_filename
            )

            standalone_entry_points.append(
                (target_filename, module.getPackage())
            )
        elif module.isUncompiledPythonModule():
            pass
        else:
            assert False, module

    writeSourceCode(
        filename    = Utils.joinpath(
            source_dir,
            "__constants.c"
        ),
        source_code = ConstantCodes.getConstantsDefinitionCode(
            context = global_context
        )
    )

    helper_decl_code, helper_impl_code = CodeGeneration.generateHelpersCode(
        ModuleRegistry.getDoneUserModules()
    )

    writeSourceCode(
        filename    = Utils.joinpath(source_dir, "__helpers.h"),
        source_code = helper_decl_code
    )

    writeSourceCode(
        filename    = Utils.joinpath(source_dir, "__helpers.c"),
        source_code = helper_impl_code
    )

Example 158

Project: where-do-you-go Source File: geomodel.py
  @staticmethod
  def proximity_fetch(query, center, max_results=10, max_distance=0):
    """Performs a proximity/radius fetch on the given query.

    Fetches at most <max_results> entities matching the given query,
    ordered by ascending distance from the given center point, and optionally
    limited by the given maximum distance.

    This method uses a greedy algorithm that starts by searching high-resolution
    geocells near the center point and gradually looking in lower and lower
    resolution cells until max_results entities have been found matching the
    given query and no closer possible entities can be found.

    Args:
      query: A db.Query on entities of this kind.
      center: A geotypes.Point or db.GeoPt indicating the center point around
          which to search for matching entities.
      max_results: An int indicating the maximum number of desired results.
          The default is 10, and the larger this number, the longer the fetch
          will take.
      max_distance: An optional number indicating the maximum distance to
          search, in meters.

    Returns:
      The fetched entities, sorted in ascending order by distance to the search
      center.

    Raises:
      Any exceptions that google.appengine.ext.db.Query.fetch() can raise.
    """
    # TODO(romannurik): check for GqlQuery
    results = []

    searched_cells = set()

    # The current search geocell containing the lat,lon.
    cur_containing_geocell = geocell.compute(center)

    # The currently-being-searched geocells.
    # NOTES:
    #     * Start with max possible.
    #     * Must always be of the same resolution.
    #     * Must always form a rectangular region.
    #     * One of these must be equal to the cur_containing_geocell.
    cur_geocells = [cur_containing_geocell]

    closest_possible_next_result_dist = 0

    # Assumes both a and b are lists of (entity, dist) tuples, *sorted by dist*.
    # NOTE: This is an in-place merge, and there are guaranteed
    # no duplicates in the resulting list.
    def _merge_results_in_place(a, b):
      util.merge_in_place(a, b,
                        cmp_fn=lambda x, y: cmp(x[1], y[1]),
                        dup_fn=lambda x, y: x[0].key() == y[0].key())

    sorted_edges = [(0,0)]
    sorted_edge_distances = [0]

    while cur_geocells:
      closest_possible_next_result_dist = sorted_edge_distances[0]
      if max_distance and closest_possible_next_result_dist > max_distance:
        break

      cur_geocells_unique = list(set(cur_geocells).difference(searched_cells))

      # Run query on the next set of geocells.
      cur_resolution = len(cur_geocells[0])
      temp_query = copy.deepcopy(query)  # TODO(romannurik): is this safe?
      temp_query.filter('location_geocells IN', cur_geocells_unique)

      # Update results and sort.
      new_results = temp_query.fetch(1000)
      if DEBUG:
        logging.info('fetch complete for %s' % (','.join(cur_geocells_unique),))

      searched_cells.update(cur_geocells)

      # Begin storing distance from the search result entity to the
      # search center along with the search result itself, in a tuple.
      new_results = [(entity, geomath.distance(center, entity.location))
                     for entity in new_results]
      new_results = sorted(new_results, lambda dr1, dr2: cmp(dr1[1], dr2[1]))
      new_results = new_results[:max_results]

      # Merge new_results into results or the other way around, depending on
      # which is larger.
      if len(results) > len(new_results):
        _merge_results_in_place(results, new_results)
      else:
        _merge_results_in_place(new_results, results)
        results = new_results

      results = results[:max_results]

      sorted_edges, sorted_edge_distances = \
          util.distance_sorted_edges(cur_geocells, center)

      if len(results) == 0 or len(cur_geocells) == 4:
        # Either no results (in which case we optimize by not looking at
        # adjacents, go straight to the parent) or we've searched 4 adjacent
        # geocells, in which case we should now search the parents of those
        # geocells.
        cur_containing_geocell = cur_containing_geocell[:-1]
        cur_geocells = list(set([cell[:-1] for cell in cur_geocells]))
        if not cur_geocells or not cur_geocells[0]:
          break  # Done with search, we've searched everywhere.

      elif len(cur_geocells) == 1:
        # Get adjacent in one direction.
        # TODO(romannurik): Watch for +/- 90 degree latitude edge case geocells.
        nearest_edge = sorted_edges[0]
        cur_geocells.append(geocell.adjacent(cur_geocells[0], nearest_edge))

      elif len(cur_geocells) == 2:
        # Get adjacents in perpendicular direction.
        nearest_edge = util.distance_sorted_edges([cur_containing_geocell],
                                                   center)[0][0]
        if nearest_edge[0] == 0:
          # Was vertical, perpendicular is horizontal.
          perpendicular_nearest_edge = [x for x in sorted_edges if x[0] != 0][0]
        else:
          # Was horizontal, perpendicular is vertical.
          perpendicular_nearest_edge = [x for x in sorted_edges if x[0] == 0][0]

        cur_geocells.extend(
            [geocell.adjacent(cell, perpendicular_nearest_edge)
             for cell in cur_geocells])

      # We don't have enough items yet, keep searching.
      if len(results) < max_results:
        if DEBUG:
          logging.debug('have %d results but want %d results, '
                        'continuing search' % (len(results), max_results))
        continue

      if DEBUG:
        logging.debug('have %d results' % (len(results),))

      # If the currently max_results'th closest item is closer than any
      # of the next test geocells, we're done searching.
      current_farthest_returnable_result_dist = \
          geomath.distance(center, results[max_results - 1][0].location)
      if (closest_possible_next_result_dist >=
          current_farthest_returnable_result_dist):
        if DEBUG:
          logging.debug('DONE next result at least %f away, '
                        'current farthest is %f dist' %
                        (closest_possible_next_result_dist,
                         current_farthest_returnable_result_dist))
        break

      if DEBUG:
        logging.debug('next result at least %f away, '
                      'current farthest is %f dist' %
                      (closest_possible_next_result_dist,
                       current_farthest_returnable_result_dist))

    if DEBUG:
      logging.info('proximity query looked '
                   'in %d geocells' % len(searched_cells))

    return [entity for (entity, dist) in results[:max_results]
            if not max_distance or dist < max_distance]

Example 159

Project: schcp Source File: scphcp.py
Function: run
    def run(self):

        def recvFully(sock, byteslen):
            buf = b''
            while byteslen != 0:
                t = sock.recv(byteslen)
                if t == b'':
                    raise Exception('End connection in socksHandshake')
                buf += t
                byteslen -= len(t)
            return buf

        def tryOrd(b):
            try:
                return ord(b)
            except TypeError:
                return b

        def recvSocksAddr(sock):
            buf = sock.recv(1)  #   atyp
            if buf == b'\x01':
                hostname = recvFully(sock, 4)
                buf += hostname
                hostname = b'.'.join([str(tryOrd(b)).encode('iso-8859-1') for b in hostname])
            elif buf == b'\x03':
                hostnameLen = sock.recv(1)
                buf += hostnameLen
                hostname = recvFully(sock, ord(hostnameLen))
                buf += hostname
            elif buf == b'\x04':
                hostname = recvFully(sock, 16)
                buf += hostname
                hostname = b':'.join([str(tryOrd(b)).encode('iso-8859-1') for b in hostname])
            else:
                raise Exception('Unknown atyp')
            port = recvFully(sock, 2)
            buf += port
            port = tryOrd(port[0]) << 8 | tryOrd(port[1])
            return buf, (hostname, port)

        def localHandshake():
            self.client.recv(1) #   ver
            nmethods = self.client.recv(1)
            recvFully(self.client, ord(nmethods))
            self.client.send(b'\x05\x00')
            self.client.recv(1) #   ver
            cmd = self.client.recv(1)
            if cmd != b'\x01':
                raise Exception('Non connect cmd not implemented yet')
            self.client.recv(1) #   rsv
            buf, addr = recvSocksAddr(self.client)
            self.parent.connect(addr)
            self.hostname = addr[0]
            self.client.send(b'\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00')

        def socksHandshake():
            clientBuf = b''
            clientBuf += self.client.recv(1) #   ver
            nmethods = self.client.recv(1)
            clientBuf += nmethods
            clientBuf += recvFully(self.client, ord(nmethods))
            self.parent.send(clientBuf)
            parentBuf = b''
            parentBuf += self.parent.recv(1) #   ver
            method = self.parent.recv(1)
            if method != b'\x00':
                raise Exception('Non no-authentication socks protocol not implemented yet')
            parentBuf += method
            self.client.send(parentBuf)
            clientBuf = b''
            clientBuf += self.client.recv(1) #   ver
            cmd = self.client.recv(1)
            if cmd != b'\x01':
                raise Exception('Non connect cmd not implemented yet')
            clientBuf += cmd
            clientBuf += self.client.recv(1) #   rsv
            buf, addr = recvSocksAddr(self.client)
            self.hostname = addr[0]
            clientBuf += buf
            self.parent.send(clientBuf)
            parentBuf = b''
            parentBuf += self.parent.recv(1) #   ver
            rep = self.parent.recv(1)
            if rep != b'\x00':
                logging.info('socksHandshake connect failed')
            parentBuf += self.parent.recv(1) #   rsv
            buf, addr = recvSocksAddr(self.parent)
            parentBuf += buf
            self.client.send(parentBuf)

        def connectHandshake():

            def recvHeaderFully(sock):
                endBytes = b'\r\n\r\n'
                idx = 0
                buf = b''
                while True:
                    t = sock.recv(65536)
                    if t == b'':
                        raise Exception('End connection in connectHandshake')
                    buf += t
                    try:
                        idx = buf.index(endBytes, idx)
                        idx += len(endBytes)
                        return buf[ : idx], buf[idx : ]
                    except ValueError:
                        idx = len(buf) - len(endBytes) + 1

            request, clientData = recvHeaderFully(self.client)
            method, host, protocol = re.split(b'\\s+', request[ : request.index(b'\r\n')])    #   Although not meets RFC, it's no matter.  Because it's a local proxy.
            if method.upper() != b'CONNECT':
                raise Exception('Not a CONNECT(HTTPS) proxy.')
            self.hostname, port = host.split(b':')
            self.parent.send(request)
            response, parentData = recvHeaderFully(self.parent)
            self.client.send(response)
            return clientData, parentData

        def startClientParentPipe():
            if clientData != b'':
                self.parent.send(clientData)
            pipe = Pipe()
            pipe.setSockPair(self.client, self.parent)
            pipe.start()

        def startParentClientPipe():
            if parentData != b'':
                self.client.send(parentData)
            self.setSockPair(self.parent, self.client)
            self.pipeData()

        def startPipe():
            startClientParentPipe()
            startParentClientPipe()

        def sslCheckCertification(packet):
            if tryOrd(packet[5]) != 0x0b:
                return
            certChainLen = (tryOrd(packet[9]) << 16) | (tryOrd(packet[10]) << 8) | tryOrd(packet[11])
            certChain = packet[12 : 12 + certChainLen]
            self.certStore.checkCert(self.hostname, certChain)

        def sslGetPacket(sock, data):
            missDataLen = 5 - len(data)
            if missDataLen > 0:
                data += recvFully(sock, missDataLen)
            missDataLen = 5 + ((tryOrd(data[3]) << 8) | tryOrd(data[4])) - len(data)
            if missDataLen > 0:
                data += recvFully(sock, missDataLen)
            packetLen = 5 + ((tryOrd(data[3]) << 8) | tryOrd(data[4]))
            return data[ : packetLen], data[packetLen : ]

        self.parent = socket.socket()
        try:
            if self.config['parentProxyType'] != ProxyType.NONE:
                self.parent.connect((self.config['parentProxyHost'], self.config['parentProxyPort']))
            if self.config['parentProxyType'] == ProxyType.NONE:
                localHandshake()
                clientData = b''
                parentData = b''
            elif self.config['parentProxyType'] == ProxyType.SOCKS:
                socksHandshake()
                clientData = b''
                parentData = b''
            elif self.config['parentProxyType'] == ProxyType.CONNECT:
                clientData, parentData = connectHandshake()
            else:
                assert(False)
            if clientData == b'':
                clientData = self.client.recv(65536)
            if tryOrd(clientData[0]) != 0x16:    #   Not SSL Handshake
                startPipe()
                return
            startClientParentPipe()
            while True:
                packet, parentData = sslGetPacket(self.parent, parentData)
                if tryOrd(packet[0]) == 0x17:    #   Start SSL Application Data
                    self.client.send(packet)
                    break
                sslCheckCertification(packet)
                self.client.send(packet)
            startParentClientPipe()
        except Exception:
            logging.exception('Exception in Tunnel.run:')
        finally:
            self.client.close()
            self.parent.close()

Example 160

Project: FaST-LMM Source File: heritability_spatial_correction.py
def heritability_spatial_correction(G_kernel, spatial_coor, spatial_iid, alpha_list, alpha_power, pheno, 
                     map_function = map, cache_folder=None, 
                     jackknife_count=500, permute_plus_count=10000, permute_times_count=10000, seed=0,
                     just_testing=False,  always_remote=False, allow_gxe2 = True
                     ):
    """
    Function measuring heritability with correction for spatial location.

    :param G_kernel: A kernel that tells the genetic similarity between all pairs of individuals. The kernel can be given 
      explicitly, for example with a :class:`.KernelData`. The kernel can also be given implicitly by providing a set of
      SNPs or the name of a BED file.
    :type G_kernel: a :class:`.KernelReader`, :class:`.SnpReader` or a string

    :param spatial_coor: The position of each individual given by two coordinates. Any units are allowed, but the two values
       must be compatible so that distance can be determined via Pythagoras' theorem. (So, longitude and latitude should
       not be used unless the locations are near the Equator.) 
    :type spatial_coor: a iid_count x 2 array

    :param spatial_iid: A ndarray of the iids. Each iid is a ndarray of two strings (a family ID and a case ID) that identifies an individual.
    :type spatial_iid: array of strings with shape [iid_count,2]

    :param alpha_list: a list of numbers to search to find the best alpha, which is the similarity scale. The similarity of two individuals
      is here defined as exp(-(distance_between/alpha)**alpha_power). If the closest individuals are 100 units apart and the farthest
      individuals are 4e6 units apart, a reasonable alpha_list might be: [int(v) for v in np.logspace(np.log10(100),np.log10(1e10), 100)]
      The function's reports on the alphas chosen. If an extreme alpha is picked, change alpha_list to cover more range.
    :type alpha_list: list of numbers

    :param alpha_power: 2 (a good choice) means that similarity goes with area. 1 means with distance.
    :type alpha_list: number

    :param pheno: The target values(s) to predict. It can be a file name readable via :class:`SnpReader.Pheno` or any :class:`.SnpReader`.
    :type pheno: a :class:`.SnpReader` or string

    :param cache_folder: (default 'None') The name of a directory in which to save intermediate results. If 'None', then no intermediate results are saved.
    :type cache_folder: a string

    :param map_function: (default 'map') A function with the same inputs and functionality as Python's 'map' function.
       Can be used to run 'heritability_spatial_correction' on a cluster.
    :type map_function: a function

    :param jackknife_count: (default 500) The number of jackknife groups to use when calculating standard errors (SE). Changing to a small number, 2, 
       speeds up calculation at the cost of unusable SEs.
    :type jackknife_count: number

    :param permute_plus_count: (default 10000) The number of permutations used when calculating P values. Changing to a small number, 1, 
       speeds up calculation at the cost of unusable P values.
    :type permute_plus_count: number

    :param permute_times_count: (default 10000) The number of permutations used when calculating P values. Changing to a small number, 1, 
       speeds up calculation at the cost of unusable P values.
    :type permute_times_count: number

    :param seed: (default 0) The random seed used by jackknifing and permutation.
    :type seed: number

    :param just_testing: (default False) If true, skips actual LMM-related search and calculation.
    :type just_testing: bool

    :rtype: Pandas dataframe with one row per phenotyper. Columns include "h2uncorr", "h2corr", etc.

    """

    ######################
    # Prepare the inputs
    ######################

    from fastlmm.inference.fastlmm_predictor import _kernel_fixup, _pheno_fixup
    G_kernel = _kernel_fixup(G_kernel, iid_if_none=None, standardizer=Unit())  # Create a kernel from an in-memory kernel, some snps, or a text file.
    pheno = _pheno_fixup(pheno,iid_if_none=G_kernel.iid, missing='NA') # Create phenotype data from in-memory data or a text file.

    if cache_folder is not None:
        pstutil.create_directory_if_necessary(cache_folder,isfile=False)

    
    jackknife_seed = seed or 1954692566L
    permute_plus_seed = seed or 2372373100L
    permute_times_seed = seed or 2574440128L

    ######################
    # Find 'alpha', the scale for distance
    ######################

    # create the alpha table (unless it is already there)
    alpha_table_fn = "{0}/alpha_table.{1}.txt".format(cache_folder,pheno.sid_count) # create a name for the alpha_table cache file
    if cache_folder is not None and os.path.exists(alpha_table_fn):
        alpha_table = pd.read_csv(alpha_table_fn, delimiter = '\t',index_col=False, comment=None)
    else:
        # create the list of arguments to run    
        arg_list = []   
        for phen_target in pheno.sid:
            pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
            for alpha in alpha_list:
                            #pheno, G_kernel, spatial_coor, spatial_iid, alpha,     alpha_power,  (jackknife_index, jackknife_count, jackknife_seed),
                arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (-1,     0,     None),  
                             # (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2,               a2
                               (-1,     0,     None),                                       (-1,     0,     None),                                          just_testing, False,     True and allow_gxe2,   None)
                arg_list.append(arg_tuple)

        # Run "run_line" on each set of arguments and save to file
        return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
        return_list = [line for line in return_list if line is not None] #Remove 'None' results
        alpha_table = pd.DataFrame(return_list)
        if cache_folder is not None:
            _write_csv(alpha_table,False,alpha_table_fn)

    # read the alpha table and find the best values
    grouped = alpha_table.groupby("phen")
    alpha_dict = {}
    for phen, phen_table in grouped:
        best_index_corr = phen_table['nLLcorr'].idxmin() # with Pandas, this returns the index in the parent table, not the group table
        best_index_gxe2 = phen_table['nLL_gxe2'].idxmin() if allow_gxe2 else 0
        alpha_corr = alpha_table.iloc[best_index_corr]['alpha']
        alpha_gxe2 = alpha_table.iloc[best_index_gxe2]['alpha']
        alpha_dict[phen] = alpha_corr, alpha_gxe2
    logging.info(alpha_dict)


    ######################
    # Use jackknifing to compute h2uncorr, SE, h2corr, SE, e2, SE, gxe2, SE
    ######################

    jackknife_count_actual = min(jackknife_count,G_kernel.iid_count)

    # Set up the run and do it (unless it has already been run)
    jackknife_table_fn = "{0}/jackknife.{1}.count{2}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual)
    if cache_folder is not None and os.path.exists(jackknife_table_fn):
        jackknife_table = pd.read_csv(jackknife_table_fn, delimiter = '\t',index_col=False, comment=None)
    else:
        arg_list = []
        for phen_target in pheno.sid:
            pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
            alpha_corr, alpha_gxe2 = alpha_dict[phen_target]
            alpha_set = set([alpha_corr, alpha_gxe2]) #If these are the same, then only need to do half the work
            for alpha in alpha_set:
                logging.debug(alpha)
                do_uncorr = (alpha == alpha_corr)
                do_gxe2   = (alpha == alpha_gxe2) and allow_gxe2
                for jackknife in range(-1, jackknife_count_actual):
                               # pheno, G_kernel, spatial_coor, spatial_iid, alpha,     alpha_power, (jackknife_index, jackknife_count,         jackknife_seed),
                    arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha, alpha_power, (jackknife,       jackknife_count_actual,  jackknife_seed),
                                    # (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
                                    (-1,0,None),                                                 (-1,0,None),                                                    just_testing, do_uncorr, do_gxe2, None)
                    arg_list.append(arg_tuple)    

        # Run "run_line" on each set of arguments and save to file
        return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
        return_list = [line for line in return_list if line is not None] #Remove 'None' results
        jackknife_table = pd.DataFrame(return_list)
        if cache_folder is not None:
            _write_csv(jackknife_table, False, jackknife_table_fn)


    # get the real (that is, unjackknifed) values    
    jackknife_table["diff"] = jackknife_table.h2uncorr-jackknife_table.h2corr # Compute the diff = h2uncorr-h2corr column
    results_both = jackknife_table[jackknife_table.jackknife_index==-1]  # Create a table of the real (non-jackknifed) results for both alphas (which may be the same)
    del results_both["jackknife_index"]
    results_corr = results_both[results_both.alpha == [alpha_dict[phen][0] for phen in results_both.phen]] #Create version for g+e's alpha
    results_gxe2 = results_both[results_both.alpha == [alpha_dict[phen][1] for phen in results_both.phen]] #Create version for gxe's alpha
    #remove unwanted columns
    for delcol in ["a2_gxe2","gxe2","nLL_gxe2","permute_plus_count","permute_plus_index","permute_plus_seed","permute_times_count","permute_times_index","permute_times_seed","jackknife_count","jackknife_seed"]:
        del results_corr[delcol]
    for delcol in ["a2","e2","h2corr","h2uncorr","nLLcorr","nLLuncorr","diff","permute_plus_count","permute_plus_index","permute_plus_seed","permute_times_count","permute_times_index","permute_times_seed","jackknife_count","jackknife_seed"]:
        del results_gxe2[delcol]

    #Use a pivottable to compute the jackknifed SE's
    corr_rows = np.logical_and(jackknife_table.jackknife_index!=-1,jackknife_table.alpha==[alpha_dict[phen][0] for phen in jackknife_table.phen])
    jk_table_corr = pd.pivot_table(jackknife_table[corr_rows], values=['h2uncorr','h2corr','diff','e2'], index=['phen'], columns=[], aggfunc=np.std)
    jk_table_corr["h2uncorr SE"] = jk_table_corr["h2uncorr"] * np.sqrt(jackknife_count_actual-1)
    jk_table_corr["h2corr SE"] = jk_table_corr["h2corr"] * np.sqrt(jackknife_count_actual-1)
    jk_table_corr["diff SE"] = jk_table_corr["diff"] * np.sqrt(jackknife_count_actual-1)
    jk_table_corr["e2 SE"] = jk_table_corr["e2"] * np.sqrt(jackknife_count_actual-1)
    del jk_table_corr["h2uncorr"]
    del jk_table_corr["h2corr"]
    del jk_table_corr["diff"]
    del jk_table_corr["e2"]
    gxe2_rows = np.logical_and(jackknife_table.jackknife_index!=-1,jackknife_table.alpha==[alpha_dict[phen][1] for phen in jackknife_table.phen])
    jk_table_gxe2 = pd.pivot_table(jackknife_table[gxe2_rows], values=['gxe2'], index=['phen'], columns=[], aggfunc=np.std)
    jk_table_gxe2["gxe2 SE"] = jk_table_gxe2["gxe2"] * np.sqrt(jackknife_count_actual-1)
    del jk_table_gxe2["gxe2"]

    #Join the SE's to the main results table
    results_corr = results_corr.join(jk_table_corr, on='phen')
    results_gxe2 = results_gxe2.join(jk_table_gxe2, on='phen')

    #compute pValue columns
    results_corr["P (diff=0)"] = stats.t.sf(results_corr["diff"]/results_corr["diff SE"],df=jackknife_count_actual-1)*2 #two sided
    results_corr["from SE, one-sided, P (e2=0)"] = stats.t.sf(results_corr["e2"]/results_corr["e2 SE"],df=jackknife_count_actual-1)
    results_gxe2["from SE, one-sided, P (gxe2=0)"] = stats.t.sf(results_gxe2["gxe2"]/results_gxe2["gxe2 SE"],df=jackknife_count_actual-1)   #one sided

    if cache_folder is not None:
        _write_csv(results_corr, False, "{0}/jackknife_corr_summary.{1}.jackknife{2}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual))
        _write_csv(results_gxe2, False, "{0}/jackknife_gxe2_summary.{1}.jackknife{2}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual))


    ######################
    # compute p(e2=0) via permutation
    ######################

    permplus_table_fn = "{0}/permutation.GPlusE.{1}.count{2}.txt".format(cache_folder, pheno.sid_count, permute_plus_count)
    if cache_folder is not None and os.path.exists(permplus_table_fn):
        permplus_table = pd.read_csv(permplus_table_fn, delimiter = '\t',index_col=False, comment=None)
    else:
        arg_list = []
        for phen_target in pheno.sid:
            pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
            alpha_corr, alpha_gxe2 = alpha_dict[phen_target]
            for jackknife_index in range(-1,permute_plus_count):
                           # pheno, G_kernel, spatial_coor, spatial_iid, alpha,          alpha_power,    (jackknife_index, jackknife_count, jackknife_seed),
                arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha_corr, alpha_power, (-1,0,None),
                             # (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
                             (jackknife_index, permute_plus_count,permute_plus_seed),       (-1,0,None),                                                    just_testing, False,    False,    None)
                arg_list.append(arg_tuple)

        # Run "run_line" on each set of arguments and save to file
        return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
        return_list = [line for line in return_list if line is not None] #Remove 'None' results
        permplus_table = pd.DataFrame(return_list)
        if cache_folder is not None:
            _write_csv(permplus_table, False, permplus_table_fn)


    #Create a table of the real nLL for each pheno
    real_result_permplus = permplus_table[permplus_table.permute_plus_index==-1][['phen','nLLcorr']]
    real_result_permplus.rename(columns={'nLLcorr':'nLLcorr_real'},inplace=True)
    real_result_permplus.set_index(['phen'],inplace=True)

    # Create a table of the permutation runs and add the real nLL to each row
    perm_table = permplus_table[permplus_table.permute_plus_index!=-1]
    result = perm_table.join(real_result_permplus, on='phen')
    result['P(e2)'] = [1.0 if b else 0.0 for b in result.nLLcorr <= result.nLLcorr_real] # create a column showing where the perm is better (or as good) as the real
    # Use pivottable to find the fraction of of times when permutation is better
    pivot_table_plus = pd.pivot_table(result, values=['P(e2)'], index=['phen'], columns=[], aggfunc=np.mean)
    if cache_folder is not None:
        summary_permplus_table_fn = "{0}/summary.permutation.GPlusE.{1}.count{2}.txt".format(cache_folder, pheno.sid_count, permute_plus_count)
        _write_csv(pivot_table_plus, True, summary_permplus_table_fn)

    ################################################
    # compute p(gxe2=0) via permutation
    ################################################

    #Only process phenos for which gxe2 is not 0
    nonzero = set(results_gxe2[results_gxe2.gxe2 !=0].phen)
    permtimes_phenotypes = set(pheno.sid) & nonzero #intersection
    permtimes_table_list = []
    for phen_target in permtimes_phenotypes:
        permtimes_table_fn = "{0}/permutation.GxE/{1}.count{2}.txt".format(cache_folder, phen_target, permute_times_count)

        if cache_folder is not None and os.path.exists(permtimes_table_fn):
            permtime_results = pd.read_csv(permtimes_table_fn, delimiter = '\t',index_col=False, comment=None)
        else:
            arg_list = []
            pheno_one = pheno[:,pheno.col_to_index([phen_target])] # Look at only this pheno_target
            alpha_corr, alpha_gxe2 = alpha_dict[phen_target]
            a2 = float(permplus_table[permplus_table.phen==phen_target][permplus_table.permute_plus_index == -1]['a2'])
            for permute_index in range(-1,permute_times_count):
                           # pheno, G_kernel, spatial_coor, spatial_iid, alpha,          alpha_powerm (permute_index, permute_count, permute_seed),
                arg_tuple = (pheno_one, G_kernel, spatial_coor, spatial_iid, alpha_gxe2, alpha_power, (-1,0,None),
                             # (permute_plus_index, permute_plus_count, permute_plus_seed), (permute_times_index, permute_times_count, permute_times_seed) ,just_testing, do_uncorr, do_gxe2, a2
                            (-1,0,None),                                                    (permute_index, permute_times_count,permute_times_seed),        just_testing, False,     allow_gxe2,    a2)
                arg_list.append(arg_tuple)    

            # Run "run_line" on each set of arguments and save to file
            return_list = map_function(work_item, arg_list) if len(arg_list)>1 or always_remote else map(work_item, arg_list)
            return_list = [line for line in return_list if line is not None] #Remove 'None' results
            permtime_results = pd.DataFrame(return_list)
            if cache_folder is not None:
                pstutil.create_directory_if_necessary(permtimes_table_fn)
                _write_csv(permtime_results,False,permtimes_table_fn)
        permtimes_table_list.append(permtime_results)

    if permtimes_table_list: #not empty
        permtimes_table = pd.concat(permtimes_table_list)
        logging.info(permtimes_table.head())

        #Create a table of the real nLL for each pheno
        real_result_permtimes = permtimes_table[permtimes_table.permute_times_index==-1][['phen','nLL_gxe2']]
        real_result_permtimes.rename(columns={'nLL_gxe2':'nLL_gxe2_real'},inplace=True)
        real_result_permtimes.set_index(['phen'],inplace=True)

        # Create a table of the permutation runs and add the real nLL to reach row
        summary_permtimes_table_fn = "{0}/summary.permutation.GxE.{1}.count{2}.txt".format(cache_folder,len(permtimes_phenotypes), permute_times_count)

        perm_table = permtimes_table[permtimes_table.permute_times_index!=-1]
        resultx = perm_table.join(real_result_permtimes, on='phen')
        resultx['P(gxe2)'] = [1.0 if b else 0.0 for b in resultx.nLL_gxe2 <= resultx.nLL_gxe2_real] # create a column showing where the perm is better (or as good) as the real
        # Use pivottable to find the fraction of of times when permutation is better
        pivot_table_times = pd.pivot_table(resultx, values=['P(gxe2)'], index=['phen'], columns=[], aggfunc=np.mean)
        if cache_folder is not None:
            _write_csv(pivot_table_times,True,summary_permtimes_table_fn)


    #######################
    # Create final table of results by combining the summary tables
    #######################

    #Rename some columns
    results_corr.rename(columns={"h2uncorr SE":"SE (h2uncorr)","h2corr SE":"SE (h2corr)","e2 SE":"SE (e2)"}, inplace=True)

    #Rename some columns and join results
    results_gxe2.rename(columns={"alpha":"alpha_gxe2","gxe2 SE":"SE (gxe2)","h2corr_raw":"h2corr_raw_gxe2"}, inplace=True)
    del results_gxe2['alpha_power']
    results_gxe2.set_index(["phen"],inplace=True)
    final0 = results_corr.join(results_gxe2, on='phen')

    #Rename some columns and join results
    pivot_table_plus.rename(columns={"P(e2)":"P(e2=0)"}, inplace=True)
    final1 = final0.join(pivot_table_plus, on='phen')

    #Rename some columns and join results
    if permtimes_table_list: #not empty
        pivot_table_times.rename(columns={"P(gxe2)":"P(gxe2=0)"}, inplace=True)
        final2 = final1.join(pivot_table_times, on='phen')
    else:
        final2 = final1.copy()
        final2["P(gxe2=0)"] = np.nan

    #Rename 'phen' and select final columns
    final2.rename(columns={"phen":"phenotype"}, inplace=True)
    final3 = final2[["phenotype","h2uncorr","SE (h2uncorr)","h2corr","SE (h2corr)","P (diff=0)","e2","SE (e2)","P(e2=0)","alpha","alpha_gxe2","gxe2","SE (gxe2)","P(gxe2=0)"]].copy()

    #Rename sort the phenotypes
    final3['lower'] = [pheno_one.lower() for pheno_one in final3.phenotype]
    final3.sort(['lower'],inplace=True)
    del final3['lower']

    if cache_folder is not None:
        summary_final_table_fn = "{0}/summary.final.{1}.{2}.{3}.{4}.txt".format(cache_folder, pheno.sid_count, jackknife_count_actual,permute_plus_count,permute_times_count)
        _write_csv(final3,False,summary_final_table_fn)
    
    return final3

Example 161

Project: FaST-LMM Source File: feature_selection_two_kernel.py
    def run_select(self, G0, G_bg, y, cov=None):
        """set up two kernel feature selection
    
        Parameters
        ----------
        G0 : numpy array of shape (num_ind, num_snps)
            Data matrix from which foreground snps will be selected

        G0_bg : numpy array of shape (num_ind, num_snps)
            Data matrix containing background snps on which will be conditioned

        y : numpy vector of shape (num_ind, )
            Vector of phenotypes

        cov : numpy array of shape (num_ind, num_covariates) or None
            Covariates to be used as fixed effects

        Returns
        -------
        best_k, feat_idx, best_mix, best_delta: tuple(int, np.array(int), float, float)
            best_k is the best number of SNPs selected,
            feat_idx is a np.array of integers denoting the indices of these snps,
            best_mix is the best mixing coefficient between foreground and background kernel,
            best_delta is the best regularization coefficient
        """

        num_ind = len(y)

        if cov is None:
            cov = np.ones((num_ind,1))
        else:
            logging.info("normalizing covariates")
            cov = cov.copy()
            cov = 1./np.sqrt((cov**2).sum() / float(cov.shape[0])) * cov
        cov.flags.writeable = False
        
        # normalize to diag(K) = N
        norm_factor = 1./np.sqrt((G_bg**2).sum() / float(G_bg.shape[0]))

        # we copy in case G and G_bg are pointing to the same object
        G_bg = norm_factor * G_bg
       
        K_bg_full = G_bg.dot(G_bg.T)
        K_bg_full.flags.writeable = False
        
        # some asserts
        np.testing.assert_almost_equal(sum(np.diag(K_bg_full)), G_bg.shape[0])
        if self.debug:
            norm_factor_check = 1./np.sqrt(G_bg.shape[1])
            np.testing.assert_array_almost_equal(norm_factor, norm_factor_check, decimal=1)
            

        for kfold_idx, (train_idx, test_idx) in enumerate(KFold(num_ind, n_folds=self.n_folds, random_state=self.random_state, shuffle=True)):

            t0 = time.time()
            logging.info("running fold: %i" % kfold_idx)

            y_train = y.take(train_idx, axis=0)
            y_test = y.take(test_idx, axis=0)
            G0_train = G0.take(train_idx, axis=0)
            G0_test = G0.take(test_idx, axis=0)

            G_bg_train = G_bg.take(train_idx, axis=0)
            G_bg_test = G_bg.take(test_idx, axis=0)

            cov_train = cov.take(train_idx, axis=0)
            cov_test = cov.take(test_idx, axis=0)

            # write protect data
            y_train.flags.writeable = False
            y_test.flags.writeable = False
            G0_train.flags.writeable = False
            G0_test.flags.writeable = False
            G_bg_train.flags.writeable = False
            G_bg_test.flags.writeable = False
            cov_train.flags.writeable = False
            cov_test.flags.writeable = False

            # precompute background kernel
            K_bg_train = K_bg_full.take(train_idx, axis=0).take(train_idx, axis=1) 
            K_bg_train.flags.writeable = False

            if self.measure != "mse":
                K_bg_test = K_bg_full.take(test_idx, axis=0).take(test_idx, axis=1)
                K_bg_test.flags.writeable = False

            # rank features
            if self.order_by_lmm:
                logging.info("using linear mixed model to rank features")
                t0 = time.time()
                gwas = FastGwas(G_bg_train, G0_train, y_train, delta=None, train_pcs=None, mixing=0.0, cov=cov_train)
                gwas.run_gwas()
                _pval = gwas.p_values
                logging.info("time taken: %s" % (str(time.time()-t0)))
            else:
                logging.info("using linear regression to rank features")
                _F,_pval = lin_reg.f_regression_block(lin_reg.f_regression_cov_alt, G0_train, y_train, blocksize=10000, C=cov_train)

            feat_idx = np.argsort(_pval)
            
            for k_idx, max_k in enumerate(self.grid_k):

                feat_idx_subset = feat_idx[0:max_k]
                G_fs_train = G0_train.take(feat_idx_subset, axis=1)
                G_fs_test = G0_test.take(feat_idx_subset, axis=1)

                # normalize to sum(diag)=N
                norm_factor = 1./np.sqrt((G_fs_train**2).sum() / float(G_fs_train.shape[0]))

                G_fs_train *= norm_factor
                G_fs_test *= norm_factor
                                
                G_fs_train.flags.writeable = False
                G_fs_test.flags.writeable = False

                # asserts
                if self.debug:
                    norm_factor_check = 1.0 / np.sqrt(max_k)
                    np.testing.assert_array_almost_equal(norm_factor, norm_factor_check, decimal=1)
                    np.testing.assert_almost_equal(sum(np.diag(G_fs_train.dot(G_fs_train.T))), G_fs_train.shape[0])

                logging.info("k: %i" % (max_k))

                # use LMM
                from fastlmm.inference.lmm_cov import LMM as fastLMM

                if G_bg_train.shape[1] <= G_bg_train.shape[0]:
                    lmm = fastLMM(X=cov_train, Y=y_train[:,np.newaxis], G=G_bg_train)
                else:
                    lmm = fastLMM(X=cov_train, Y=y_train[:,np.newaxis], K=K_bg_train)

                W = G_fs_train.copy()
                UGup,UUGup = lmm.rotate(W)
                
                i_up = np.zeros((G_fs_train.shape[1]), dtype=np.bool)
                i_G1 = np.ones((G_fs_train.shape[1]), dtype=np.bool)
                t0 = time.time()
                res = lmm.findH2_2K(nGridH2=10, minH2=0.0, maxH2=0.99999, i_up=i_up, i_G1=i_G1, UW=UGup, UUW=UUGup)
                logging.info("time taken for k=%i: %s" % (max_k, str(time.time()-t0)))
                
                # recover a2 from alternate parameterization
                a2 = res["h2_1"] / float(res["h2"] + res["h2_1"])
                h2 = res["h2"] + res["h2_1"]
                delta = (1-h2) / h2
                #res_cov = res


                # do final prediction using lmm.py
                from fastlmm.inference import LMM
                lmm = LMM(forcefullrank=False)
                lmm.setG(G0=G_bg_train, G1=G_fs_train, a2=a2)
                lmm.setX(cov_train)
                lmm.sety(y_train)

                # we take an additional step to estimate betas on covariates (not given from new model)
                res = lmm.nLLeval(delta=delta, REML=True)
                
                # predict on test set
                lmm.setTestData(Xstar=cov_test, G0star=G_bg_test, G1star=G_fs_test)
                out = lmm.predictMean(beta=res["beta"], delta=delta)

                mse = mean_squared_error(y_test, out)
                logging.info("mse: %f" % (mse))

                self.mse[kfold_idx, k_idx] = mse

                self.mixes[kfold_idx, k_idx] = a2
                self.deltas[kfold_idx, k_idx] = delta

                if self.measure != "mse":
                    K_test_test = a2 * G_fs_test.dot(G_fs_test.T) + (1.0-a2) * K_bg_test 
                    ll = lmm.nLLeval_test(y_test, res["beta"], sigma2=res["sigma2"], delta=delta, Kstar_star=K_test_test, robust=True)

                    if self.debug:
                        ll2 = lmm.nLLeval_test(y_test, res["beta"], sigma2=res["sigma2"], delta=delta, Kstar_star=None, robust=True)
                        np.testing.assert_almost_equal(ll, ll2, decimal=4)

                    logging.info("ll: %f" % (ll))
                    self.ll[kfold_idx, k_idx]  = ll
                    

            logging.info("time taken for fold: %s" % str(time.time()-t0))
        

        best_k, best_mix, best_delta = self.select_best_k()

        logging.info("best_k: %i, best_mix: %f, best_delta: %f" % (best_k, best_mix, best_delta))

        # final scan 
        if self.order_by_lmm:
            logging.info("final scan using LMM")
            gwas = FastGwas(G_bg, G0, y, delta=None, train_pcs=None, mixing=0.0, cov=cov)
            gwas.run_gwas()
            _pval = gwas.p_values
            feat_idx = np.argsort(_pval)[0:best_k]
        else:
            logging.info("final scan using LR")
            _F,_pval = lin_reg.f_regression_block(lin_reg.f_regression_cov_alt, G0, y, C=cov, blocksize=10000)
        
        logging.info("number of snps selected: %i" % (best_k))

        return best_k, feat_idx, best_mix, best_delta

Example 162

Project: FaST-LMM Source File: test_fastlmm_predictor.py
    def test_lmm(self):
        do_plot = False
        iid_count = 500
        seed = 0


        import pylab
        logging.info("TestLmmTrain test_lmm")

        iid = [["cid{0}P{1}".format(iid_index,iid_index//250)]*2 for iid_index in xrange(iid_count)]
        train_idx = np.r_[10:iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids


        #Every person is 100% related to everyone in one of 5 families
        K0a = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
        for iid_index0 in xrange(iid_count):
            for iid_index1 in xrange(iid_count):
                K0a.val[iid_index0,iid_index1] = 1 if iid_index0 % 5 == iid_index1 % 5 else 0
                if iid_index1 < iid_index0:
                    assert K0a.val[iid_index0,iid_index1] == K0a.val[iid_index1,iid_index0]

        #every person lives on a line from 0 to 1
        # They are related to every other person as a function of distance on the line
        np.random.seed(seed)
        home = np.random.random([iid_count])
        K0b = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
        for iid_index in xrange(iid_count):
            K0b.val[iid_index,:] = 1 - np.abs(home-home[iid_index])**.1

        #make covar just numbers 0,1,...
        covar = SnpData(iid=iid,sid=["x"],val=np.array([[float(num)] for num in xrange(iid_count)]))
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()

        for name, h2, K0 in [("clones", 1, K0a),("line_world",.75,K0b)]:

            sigma2x = 100
            varg = sigma2x * h2
            vare = sigma2x * (1-h2)

            #######################################################################
            #make pheno  # pheno = 2*covar+100+normal(0,1)*2.5+normal(0,K)*7.5
            #######################################################################
            #random.multivariate_normal is sensitive to mkl_num_thread, so we control it.
            if 'MKL_NUM_THREADS' in os.environ:
                mkl_num_thread = os.environ['MKL_NUM_THREADS']
            else:
                mkl_num_thread = None
            os.environ['MKL_NUM_THREADS'] = '1'
            np.random.seed(seed)
            p1 = covar.val * 2.0 + 100
            p2 = np.random.normal(size=covar.val.shape)*np.sqrt(vare)
            p3 = (np.random.multivariate_normal(np.zeros(iid_count),K0.val)*np.sqrt(varg)).reshape(-1,1)
            if mkl_num_thread is not None:
                os.environ['MKL_NUM_THREADS'] = mkl_num_thread
            else:
                del os.environ['MKL_NUM_THREADS']
            pheno = SnpData(iid=iid,sid=["pheno0"],val= p1 + p2 + p3)

            pheno_train = pheno[train_idx,:].read()
            pheno_test = pheno[test_idx,:].read()

            if do_plot:
                #Plot training x and y, testing x and y
                pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
                pylab.suptitle(name + ": Plot training x and y, testing x and y")
                pylab.show()

            Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
            Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
            lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
            bs=lsqSol[0] #weights
            r2=lsqSol[1] #squared residuals
            D=lsqSol[2]  #rank of design matrix
            N=pheno_train.iid_count
            REML = False
            if not REML:
                sigma2 = float(r2/N)
                nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
            else:
                sigma2 = float(r2 / (N-D))
                nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
                nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

            predicted = Xtest.dot(bs)
            yerr = [np.sqrt(sigma2)] * len(predicted)
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name + ": real linear regression: actual to prediction")
                pylab.show()

            for factor in [1,100,.02]:
                K0 = K0.read()
                K0.val *= factor

                K0_train = K0[train_idx]
                K0_whole_test = K0[:,test_idx]

                #Learn model, save, load
                fastlmmx = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                v2 = np.var(p2)
                v3 = np.var(p3)
                logging.debug("Original h2 of {0}. Generated h2 of {1}. Learned h2 of {2}".format(h2, v3/(v2+v3), fastlmmx.h2raw))
                
                
                filename = self.tempout_dir + "/model_lmm.flm.p"
                pstutil.create_directory_if_necessary(filename)
                joblib.dump(fastlmmx, filename) 
                fastlmm = joblib.load(filename)


                do_test_on_train = True
                if do_test_on_train:
                    #Predict with model (test on train)
                    predicted_pheno, covar_pheno = fastlmm.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                    output_file = self.file_name("lmma_"+name)
                    Dat.write(output_file,predicted_pheno)
                    covar2 = SnpData(iid=covar_pheno.row,sid=covar_pheno.col[:,1],val=covar_pheno.val) #kludge to write kernel to text format
                    output_file = self.file_name("lmma.cov_"+name)
                    Dat.write(output_file,covar2)

                    yerr = np.sqrt(np.diag(covar_pheno.val))
                    predicted = predicted_pheno.val
                    if do_plot:
                        pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                        pylab.xlim([0, 50])
                        pylab.ylim([100, 200])
                        pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                        pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                        pylab.show()

                    self.compare_files(predicted_pheno,"lmma_"+name)
                    self.compare_files(covar2,"lmma.cov_"+name)

                    predicted_pheno0, covar_pheno0 = fastlmm.predict(K0_whole_test=K0_train[:,0], X=covariate_train[0,:]) #test on train #0
                    assert np.abs(predicted_pheno0.val[0,0] - predicted_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
                    assert np.abs(covar_pheno0.val[0,0] - covar_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"


                #Predict with model (test on test)
                predicted_phenoB, covar_phenoB  = fastlmm.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on test
                output_file = self.file_name("lmmb_"+name)
                Dat.write(output_file,predicted_phenoB)
                covar2 = SnpData(iid=covar_phenoB.row,sid=covar_phenoB.col[:,1],val=covar_phenoB.val) #kludge to write kernel to text format
                output_file = self.file_name("lmmb.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar_phenoB.val))
                predicted = predicted_phenoB.val
                if do_plot:
                    pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                    pylab.xlim([-1, 10])
                    pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_phenoB,"lmmb_"+name)
                self.compare_files(covar2,"lmmb.cov_"+name)

                predicted_phenoB0, covar_phenoB0  = fastlmm.predict(K0_whole_test=K0_whole_test[:,0], X=covariate_test[0,:]) #test on a single test case
                assert np.abs(predicted_phenoB0.val[0,0] - predicted_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
                assert np.abs(covar_phenoB0.val[0,0] - covar_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"

                #Predict with model test on some train and some test
                some_idx = range(covar.iid_count)
                some_idx.remove(train_idx[0])
                some_idx.remove(test_idx[0])
                covariate_some = covar[some_idx,:]
                K0_whole_some = K0[:,some_idx]
                predicted_phenoC, covar_phenoC  = fastlmm.predict(K0_whole_test=K0_whole_some, X=covariate_some)
                for idxC, iidC in enumerate(predicted_phenoC.iid):
                    meanC = predicted_phenoC.val[idxC]
                    varC = covar_phenoC.val[idxC,idxC]
                    if iidC in predicted_pheno.iid:
                        predicted_pheno_ref = predicted_pheno
                        covar_pheno_ref = covar_pheno
                    else:
                        assert iidC in predicted_phenoB.iid
                        predicted_pheno_ref = predicted_phenoB
                        covar_pheno_ref = covar_phenoB
                    idx_ref = predicted_pheno_ref.iid_to_index([iidC])[0]
                    mean_ref = predicted_pheno_ref.val[idx_ref]
                    var_ref = covar_pheno_ref.val[idx_ref,idx_ref]
                    assert np.abs(meanC - mean_ref) < 1e-6
                    assert np.abs(varC - var_ref) < 1e-6

Example 163

Project: readerisdead Source File: reader_archive.py
def main():
  base.log.init()
  base.atom.init()

  parser = argparse.ArgumentParser(
      description='Comprehensive archive of a Google Reader account')

  # Credentials
  parser.add_argument('--use_client_login' ,action='store_true',
                      help='Instead of OAuth, use ClientLogin for '
                            'authentication. You will be prompted for a '
                            'username and password')
  parser.add_argument('--oauth_refresh_token', default='',
                      help='A previously obtained refresh token (used to bypass '
                            'OAuth setup')
  parser.add_argument('--account', default='',
                      help='Google Account to save the archive for. Omit to '
                          'specify via standard input')
  parser.add_argument('--password', default='',
                      help='Password for the account. Omit to specify via '
                          'standard input')

  # Output options
  parser.add_argument('--output_directory', default='./',
                      help='Directory where to place archive data.')

  # Fetching options
  parser.add_argument('--stream_items_chunk_size', type=int, default=10000,
                      help='Number of items refs to request per stream items '
                           'API call (higher is more efficient)')
  parser.add_argument('--max_items_per_stream', type=int, default=0,
                      help='If non-zero, will cap the number of items that are '
                            'fetched per feed or tag')
  parser.add_argument('--item_bodies_chunk_size', type=int, default=250,
                      help='Number of items refs per request for fetching their '
                           'bodies (higher is more efficient)')
  parser.add_argument('--comments_chunk_size', type=int, default=250,
                      help='Number of items per request for fetching comments '
                           'on shared items (higher is more efficient)')
  parser.add_argument('--max_streams', type=int, default=0,
                      help='Maxmium number of streams to archive (0 for no'
                           'limit, only mean to be used for development)')
  parser.add_argument('--parallelism', type=int, default=10,
                      help='Number of requests to make in parallel.')
  parser.add_argument('--http_retry_count', type=int, default=1,
                      help='Number of retries to make in the case of HTTP '
                           'request errors.')

  # Miscellaneous.
  parser.add_argument('--additional_item_refs_file_path', default='',
                      help='Path to JSON file listing additional tag item refs '
                           'to fetch')

  args = parser.parse_args()

  output_directory = base.paths.normalize(args.output_directory)
  base.paths.ensure_exists(output_directory)
  def output_sub_directory(name):
    directory_path = os.path.join(output_directory, name)
    base.paths.ensure_exists(directory_path)
    return directory_path
  api_responses_directory = output_sub_directory('_raw_data')
  streams_directory = output_sub_directory('streams')
  data_directory = output_sub_directory('data')
  items_directory = output_sub_directory('items')
  comments_directory = output_sub_directory('comments')

  if args.use_client_login:
    authenticated_url_fetcher = base.url_fetcher.ClientLoginUrlFetcher(
        args.account, args.password)
  else:
    authenticated_url_fetcher = base.url_fetcher.OAuthUrlFetcher(
        args.oauth_refresh_token)
  api = base.api.Api(
      authenticated_url_fetcher=authenticated_url_fetcher,
      http_retry_count=args.http_retry_count,
      cache_directory=api_responses_directory)

  user_info = api.fetch_user_info()
  logging.info(
    'Created API instance for %s (%s)', user_info.user_id, user_info.email)

  logging.info('Saving preferences')
  _save_preferences(api, data_directory)

  logging.info('Gathering streams to fetch')
  stream_ids = _get_stream_ids(api, user_info.user_id, data_directory)
  if args.max_streams and len(stream_ids) > args.max_streams:
    stream_ids = stream_ids[:args.max_streams]
  logging.info('%d streams to fetch, gathering item refs:', len(stream_ids))

  item_ids, item_refs_total = _fetch_and_save_item_refs(
      stream_ids, api, args, streams_directory, user_info.user_id)
  logging.info('%s unique items refs (%s total), grouping by chunk.',
      '{:,}'.format(len(item_ids)),
      '{:,}'.format(item_refs_total))

  logging.info('Grouped item refs, getting item bodies:')

  item_ids_chunks = _chunk_item_ids(item_ids, args.item_bodies_chunk_size)

  item_bodies_to_fetch = len(item_ids)
  fetched_item_bodies = [0]
  missing_item_bodies = set()
  def report_item_bodies_progress(requested_item_ids, found_item_ids):
    if found_item_ids is None:
      missing_item_bodies.update(set(requested_item_ids).difference(
          base.api.not_found_items_ids_to_ignore))
      return
    fetched_item_bodies[0] += len(found_item_ids)
    missing_item_bodies.update(
        set(requested_item_ids).difference(set(found_item_ids)).difference(
            base.api.not_found_items_ids_to_ignore))
    logging.info('  Fetched %s/%s item bodies (%s could not be loaded)',
        '{:,}'.format(fetched_item_bodies[0]),
        '{:,}'.format(item_bodies_to_fetch),
        '{:,}'.format(len(missing_item_bodies)))
  base.worker.do_work(
      lambda: FetchWriteItemBodiesWorker(api, items_directory),
      item_ids_chunks,
      args.parallelism,
      report_progress=report_item_bodies_progress)

  if missing_item_bodies:
    logging.warn('Item bodies could not be loaded for: %s',
        ', '.join([i.compact_form() for i in missing_item_bodies]))

  broadcast_stream_ids = [
      stream_id for stream_id in stream_ids
      if stream_id.startswith('user/') and
          stream_id.endswith('/state/com.google/broadcast')
  ]
  logging.info(
      'Fetching comments from %d shared item streams.',
      len(broadcast_stream_ids))
  encoded_sharers = api.fetch_encoded_sharers()
  remaining_broadcast_stream_ids = [len(broadcast_stream_ids)]
  def report_comments_progress(_, comments_by_item_id):
    if comments_by_item_id is None:
      return
    remaining_broadcast_stream_ids[0] -= 1
    comment_count = sum((len(c) for c in comments_by_item_id.values()), 0)
    logging.info('  Fetched %s comments, %s shared items streams left.',
        '{:,}'.format(comment_count),
        '{:,}'.format(remaining_broadcast_stream_ids[0]))
  all_comments = {}
  comments_for_broadcast_streams = base.worker.do_work(
      lambda: FetchCommentsWorker(
          api, encoded_sharers, args.comments_chunk_size),
      broadcast_stream_ids,
      args.parallelism,
      report_progress=report_comments_progress)
  total_comment_count = 0
  for comments_for_broadcast_stream in comments_for_broadcast_streams:
    if not comments_for_broadcast_stream:
      continue
    for item_id, comments in comments_for_broadcast_stream.iteritems():
      total_comment_count += len(comments)
      all_comments.setdefault(item_id, []).extend(comments)

  logging.info('Writing %s comments from %s items.',
      '{:,}'.format(total_comment_count),
      '{:,}'.format(len(all_comments)))
  for item_id, comments in all_comments.items():
    item_comments_file_path = os.path.join(base.paths.item_id_to_file_path(
        comments_directory, item_id), item_id.compact_form())
    base.paths.ensure_exists(os.path.dirname(item_comments_file_path))
    with open(item_comments_file_path, 'w') as item_comments_file:
      item_comments_file.write(json.dumps([c.to_json() for c in comments]))

  with open(os.path.join(output_directory, 'README'), 'w') as readme_file:
    readme_file.write('See https://github.com/mihaip/readerisdead/'
        'wiki/reader_archive-Format.\n')

Example 164

Project: streamspigot Source File: twitterdisplay.py
    def body_as_html(self):
        status = self._status
        text_as_html = []
        footer_as_html = []

        def add_status_chunks(status, skip_entity_urls=[]):
            entities = list(
                (status.hashtags or []) +
                (status.urls or []) +
                (status.user_mentions or []) +
                (status.medias or []))
            entities = [e for e in entities
                if e.start_index != -1 and e.end_index != -1]
            entities.sort(cmp=lambda e1,e2: e1.start_index - e2.start_index)
            last_entity_start = 0
            last_entity_end = 0

            for e in entities:
                add_tweet_chunk(status.text[last_entity_end:e.start_index])

                entity_anchor_text = status.text[e.start_index:e.end_index]
                entity_url = None

                if isinstance(e, twitter.Hashtag):
                    entity_url = 'search?q=%23' + e.text
                elif isinstance(e, twitter.Url):
                    entity_url = e.expanded_url or e.url
                    entity_url_anchor_text = \
                        e.display_url or e.expanded_url or e.url
                    if entity_url_anchor_text:
                        entity_anchor_text = escape(entity_url_anchor_text)
                    maybe_add_thumbnail_chunk(e.expanded_url or e.url)
                elif isinstance(e, twitter.User):
                    entity_url = e.screen_name
                elif isinstance(e, twitter.Media):
                    def add_media_thumbnail():
                        link_url, _, _ = e.GetUrlForSize(
                            twitter.Media.LARGE_SIZE)
                        thumb_url, thumb_width, thumb_height = e.GetUrlForSize(
                            twitter.Media.THUMB_SIZE
                                if self._thumbnail_size ==
                                    thumbnails.SMALL_THUMBNAIL
                                else twitter.Media.MEDIUM_SIZE)
                        add_footer_thumbnail_chunk(
                            link_url , thumb_url, thumb_width, thumb_height)

                    entity_url = e.url
                    entity_url_anchor_text = \
                        e.display_url or e.expanded_url or e.url
                    if entity_url_anchor_text:
                        entity_anchor_text = escape(entity_url_anchor_text)
                    if e.type == 'photo':
                        add_media_thumbnail()
                    elif e.type == 'animated_gif' or e.type == 'video':
                        if e.video_variants:
                            video_attributes = [
                                'loop="loop"',
                                'muted="muted"',
                                'autoplay="autoplay"',
                                # Even though we don't normally want controls,
                                # NewsBlur strips out the autoplay attribute,
                                # so they're needed to initiate playback on the
                                # desktop.
                                'controls="controls"',
                                'poster="%s"' % e.media_url,
                            ]
                            width = None
                            height = None
                            size = e.sizes.get(twitter.Media.MEDIUM_SIZE)
                            if size:
                                width = size[0]
                                height = size[1]
                            add_footer_video_chunk(
                                e.video_variants,
                                " ".join(video_attributes),
                                width,
                                height)
                        else:
                            add_media_thumbnail()
                    else:
                      logging.info("Unknown media type: %s", e.type)

                # Don't display the entity if it's outside the display range.
                # We only hide entities after the end of the display text
                # range, we still want to display usernames at the start of
                # the text since it's easier to scan.
                if status.display_text_range:
                  if e.start_index >= status.display_text_range[1]:
                    last_entity_start = e.start_index
                    last_entity_end = e.end_index
                    continue

                if e.start_index == last_entity_start and \
                      e.end_index == last_entity_end:
                    # For tweets with multiple pictures we will get multiple
                    # entities that point to the same span of text in the
                    # tweet. We want to insert thumbnails for each one, but only
                    # add one anchor.
                    continue

                if entity_url:
                    if entity_url not in skip_entity_urls:
                        add_raw_chunk('<a href="')
                        add_escaped_chunk(entity_url)
                        add_raw_chunk('" %s>' % _LINK_ATTRIBUTES)
                        add_tweet_chunk(entity_anchor_text)
                        add_raw_chunk('</a>')
                else:
                    add_tweet_chunk(entity_anchor_text)

                last_entity_start = e.start_index
                last_entity_end = e.end_index

            if status.display_text_range:
              add_tweet_chunk(
                  status.text[last_entity_end:status.display_text_range[1]])
            else:
              add_tweet_chunk(status.text[last_entity_end:])

            if footer_as_html:
                add_raw_chunk('<p>')
                text_as_html.extend(footer_as_html)
                add_raw_chunk('</p>')
                del footer_as_html[:]

        escape = xml.sax.saxutils.escape

        def add_raw_chunk(chunk):
            text_as_html.append(chunk)

        def add_tweet_chunk(chunk):
            # Unescape then and re-escape everything so that we can have a
            # consistent level of escaping.
            chunk = _unescape_tweet_chunk(chunk)

            # We also remove control characters (which are not allowed in XML)
            # now, instead of earlier, since otherwise all of the entity offsets
            # would be wrong.
            chunk = base.util.strip_control_characters(chunk)

            # Insert zero-width spaces after punctuation and every so often in
            # longer tokens to make sure that the display wraps. Has to be done
            # this way since NewsBlur's CSS whitelist does not allow
            # "word-break: break-word" and its HTML whitelist does not allow
            # <wbr> tags.
            run_length = 0
            chunk_with_breaks = u""
            for c in chunk:
              chunk_with_breaks += c
              run_length += 1
              if c in string.whitespace:
                run_length = 0
              elif c in string.punctuation or run_length > 24:
                chunk_with_breaks += u"\u200B"
                run_length = 0
            chunk = chunk_with_breaks

            # HTML-escape
            chunk = escape(chunk)

            # Convert newlines to HTML (Twitter seems to normalize all line
            # endings to \n).
            chunk = chunk.replace('\n', '<br/>')

            add_raw_chunk(chunk)

        def add_escaped_chunk(chunk):
            add_raw_chunk(escape(chunk))

        def add_footer_raw_chunk(chunk):
            footer_as_html.append(chunk)

        def add_footer_thumbnail_chunk(
                link_url, thumb_url, thumb_width, thumb_height):
            img_styles = ['padding:2px']
            img_attributes = ''
            # Force the width to be "100%" and reset the margins to override the
            # "full bleed" style set by NewsBlur (see https://github.com/
            # samuelclay/NewsBlur/commit/93c4ddfc30e6b126118e07e76bdf367ff84b).
            # There needs to be a space between the value and !important since
            # its CSS sanitizer breaks up tokens via whitespace only (
            # https://github.com/samuelclay/NewsBlur/blob/
            # 4aead01e3442eadfcbb7e5cf451e55184386a/utils/feedparser.py#L2539)
            # The triggering conditions match the NB-large-image class being
            # added in https://github.com/samuelclay/NewsBlur/blob/
            # fb3b37a46028a1222be2f1f5f6f0cea63e895666/clients/ios/static/
            # storyDetailView.js#L63
            if thumb_width >= 320-24 and thumb_height >= 50 or \
                (not thumb_width and not thumb_height and
                    self._thumbnail_size == thumbnails.LARGE_THUMBNAIL):
                img_styles.append('width:100% !important')
                img_styles.append('margin: 0 !important')
            if thumb_width and thumb_height:
                img_attributes = ' width="%d" height="%d"' % (
                    thumb_width, thumb_height)

            add_footer_raw_chunk(
                '<a href="%s" border="0">'
                  '<img src="%s" alt="" style="%s"%s/>'
                '</a>' % (
                    escape(link_url),
                    escape(thumb_url),
                    ";".join(img_styles),
                    img_attributes
                ))

        def add_footer_iframe_chunk(iframe_url, iframe_width, iframe_height):
            # "frameborder" is not a whitelisted HTML attribute in NewsBlur.
            # "border" is not on its CSS whitelist either, but "border-color"
            # is.
            iframe_attributes = ' style="border-color: transparent"'
            if iframe_width and iframe_height:
                iframe_attributes += ' width="%d" height="%d"' % (
                    iframe_width, iframe_height)
            add_footer_raw_chunk(
                '<iframe src="%s" frameborder="0"%s allowfullscreen="true"></iframe>'
                % (escape(iframe_url), iframe_attributes))

        def add_footer_video_chunk(
                video_variants, video_attributes, width=None, height=None):
            if width:
                video_attributes += (' width="%d" '
                    'style="width:100%%;max-width:%dpx"') % (width, width)
            add_footer_raw_chunk('<video %s>' % video_attributes)
            for variant in video_variants:
                if variant.url:
                    add_footer_raw_chunk('<source src="%s" type="%s"/>' % (
                        variant.url, variant.content_type or ''))
            add_footer_raw_chunk('</video>')

        def maybe_add_thumbnail_chunk(url):
            iframe_url, iframe_width, iframe_height = \
                thumbnails.get_iframe_info(url)
            if iframe_url:
                add_footer_iframe_chunk(
                    iframe_url, iframe_width, iframe_height)
                return

            thumb_url, thumb_width, thumb_height = \
                thumbnails.get_thumbnail_info(url, self._thumbnail_size)
            if thumb_url:
                add_footer_thumbnail_chunk(
                    url, thumb_url, thumb_width, thumb_height)

        def add_status(status):
            if status.retweeted_status:
                add_raw_chunk('RT: <a href="')
                add_escaped_chunk(status.retweeted_status.user.screen_name)
                add_raw_chunk('" %s>@' % _LINK_ATTRIBUTES)
                add_escaped_chunk(status.retweeted_status.user.screen_name)
                add_raw_chunk('</a>: ')
                add_status(status.retweeted_status)
            elif status.quoted_status:
                quoted_screen_name = status.quoted_status.user.screen_name
                add_status_chunks(status, skip_entity_urls=[
                    "https://twitter.com/%s/status/%s" %
                        (quoted_screen_name, status.quoted_status.id)
                ])
                add_raw_chunk('<div style="padding:10px;margin:5px 0;background:%s">' %
                    CONSTANTS.BUBBLE_QUOTED_COLOR)
                add_raw_chunk('<a href="')
                add_escaped_chunk(quoted_screen_name)
                add_raw_chunk('" %s>@' % _LINK_ATTRIBUTES)
                add_escaped_chunk(quoted_screen_name)
                add_raw_chunk('</a>: ')
                add_status(status.quoted_status)
                add_raw_chunk('</div>')
            else:
                add_status_chunks(status)
        add_status(status)

        result = ''.join(text_as_html)
        return result

Example 165

Project: spym Source File: cpu.py
    def execute_single(self, instr):
        if instr.name == 'add':
            # add rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs + rt)
        elif instr.name == 'addu':
            # TODO: make this actually work, because right now it's identical
            # to addu
            # addu rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs + rt)
        elif instr.name == 'addi':
            # addi rt, rs, imm
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            imm = get_imm(instr.ops[2])
            self.r.write(rd, rs + imm)
        elif instr.name == 'addiu':
            # TODO: make this actually work, because right now it's identical
            # to addi
            # addiu rt, rs, imm
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            imm = get_imm(instr.ops[2])
            self.r.write(rd, rs + imm)
        elif instr.name == 'sub':
            # sub rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs - rt)
        elif instr.name == 'and':
            # and rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs & rt)
        elif instr.name == 'andi':
            # andi rt, rs, imm
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            imm = get_imm(instr.ops[2])
            self.r.write(rd, rs & imm)
        elif instr.name == 'or':
            # or rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs | rt)
        elif instr.name == 'ori':
            # ori rt, rs, imm
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            imm = get_imm(instr.ops[2])
            self.r.write(rd, rs | imm)
        elif instr.name == 'xor':
            # xor rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs ^ rt)
        elif instr.name == 'xori':
            # xori rt, rs, imm
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            imm = get_imm(instr.ops[2])
            self.r.write(rd, rs ^ imm)
        elif instr.name == 'sll':
            # sll rd, rt, shamt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            shamt = get_imm(instr.ops[2])
            self.r.write(rd, rt << shamt)
        elif instr.name == 'srl':
            # srl rd, rt, shamt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            shamt = get_imm(instr.ops[2])
            self.r.write(rd, rt >> shamt)
        elif instr.name == 'sllv':
            # sllv rd, rt, rs
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rt << rs)
        elif instr.name == 'srlv':
            # srlv rd, rs, rt
            rd = instr.ops[0]
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            self.r.write(rd, rs >> rt)
        elif instr.name == 'slt':
            # slt rd, rs, rt
            tmp = 1 if self.r.read(instr.ops[1]) < self.r.read(instr.ops[2]) else 0
            self.r.write(instr.ops[0], tmp)
        elif instr.name == 'slti':
            # slti rd, rs, imm
            rs = instr.ops[1]
            imm = get_imm(instr.ops[2])
            tmp = 1 if self.r.read(rs) < imm else 0
            self.r.write(instr.ops[0], tmp)
        elif instr.name == 'beq':
            # beq rs, rt, label
            # TODO: the semantics aren't quite right here. branch instructions'
            # imm field contains the offset to the branch destination expressed
            # as the /number of words/
            if (self.r.read(instr.ops[0]) == self.r.read(instr.ops[1])):
                self._set_pc_label(instr.ops[2])
        elif instr.name == 'bne':
            # bne rs, rt, label
            if (self.r.read(instr.ops[0]) != self.r.read(instr.ops[1])):
                self._set_pc_label(instr.ops[2])
        elif instr.name == 'blt':
            # blt rs, rt, label
            if (self.r.read(instr.ops[0]) < self.r.read(instr.ops[1])):
                self._set_pc_label(instr.ops[2])
        elif instr.name == 'bgt':
            # bgt rs, rt, label
            if (self.r.read(instr.ops[0]) > self.r.read(instr.ops[1])):
                self._set_pc_label(instr.ops[2])
        elif instr.name == 'ble':
            # ble rs, rt, label
            if (self.r.read(instr.ops[0]) <= self.r.read(instr.ops[1])):
                self._set_pc_label(instr.ops[2])
        elif instr.name == 'bge':
            # bge rs, rt, label
            if (self.r.read(instr.ops[0]) >= self.r.read(instr.ops[1])):
                self._set_pc_label(instr.ops[2])
        elif instr.name == 'j':
            # j label
            self._set_pc_label(instr.ops[0])
        elif instr.name == 'jal':
            # jal label
            self.r.write('ra', self.r.pc)
            self._set_pc_label(instr.ops[0])
        elif instr.name == 'jr':
            # jr rs
            self._set_pc(self.r.read(instr.ops[0]))
        elif instr.name == 'jalr':
            # jalr rs
            self.r.write('ra', self.r.pc)
            self._set_pc(self.r.read(instr.ops[0]))
        elif instr.name == 'lb':
            # lb rt, offs(rs)
            rt = instr.ops[0]
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            read = struct.unpack('<b', self.dmem.read(addr, 1))[0]
            self.r.write(rt, read)
        elif instr.name == 'lbu':
            # lbu rt, offs(rs)
            rt = instr.ops[0]
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            read = struct.unpack('<B', self.dmem.read(addr, 1))[0]
            self.r.write(rt, read)
        elif instr.name == 'lh':
            # lh rt, offs(rs)
            rt = instr.ops[0]
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            read = struct.unpack('<h', self.dmem.read(addr, 2))[0]
            self.r.write(rt, read)
        elif instr.name == 'lhu':
            # lhu rt, offs(rs)
            rt = instr.ops[0]
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            read = struct.unpack('<H', self.dmem.read(addr, 2))[0]
            self.r.write(rt, read)
        elif instr.name == 'lw':
            # TODO: lw will always treat that memory as signed, even when it
            # potentially should be unsigned? With sw we can detect if they're
            # trying to write a negative number and change the struct.pack
            # arg accordingly, but with lw, we have no indication

            # lw rt, offs(rs)
            rd = instr.ops[0]
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            read = struct.unpack('<I', self.dmem.read(addr, 4))[0]
            self.r.write(rd, read)
        elif instr.name == 'lui':
            # lui rt, imm
            rt = instr.ops[0]
            imm = get_imm(instr.ops[1])
            self.r.write(rt, (imm << 16) & 0xffffffff)
        elif instr.name == 'li':
            # li rd, imm
            rd = instr.ops[0]
            imm = get_imm(instr.ops[1])
            self.r.write(rd, imm)
        elif instr.name == 'la':
            # la rd, label
            rd = instr.ops[0]
            label = instr.ops[1]
            self.r.write(rd, datatab[label])
        elif instr.name == 'sb':
            # sb rt, offs(rs)
            rt = self.r.read(instr.ops[0])
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            self.dmem.write(addr, struct.pack('<b' if rt < 0 else '<B', rt))
        elif instr.name == 'sh':
            # sb rt, offs(rs)
            rt = self.r.read(instr.ops[0])
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            self.dmem.write(addr, struct.pack('<h' if rt < 0 else '<H', rt))
        elif instr.name == 'sw':
            # sw rt, offs(rs)
            rt = self.r.read(instr.ops[0])
            offs = get_imm(instr.ops[1])
            addr = self.r.read(instr.ops[2]) + offs
            self.dmem.write(addr, struct.pack('<i' if rt < 0 else '<I', rt))
        elif instr.name == 'move':
            # move rd, rs
            self.r.write(instr.ops[0], self.r.read(instr.ops[1]))
        elif instr.name == 'div':
            # div rs, rt
            rs = self.r.read(instr.ops[0])
            rt = self.r.read(instr.ops[1])
            self.r.lo = rs / rt
            self.r.hi = rs % rt
        elif instr.name == 'mul':
            # mul rd, rs, rt
            rs = self.r.read(instr.ops[1])
            rt = self.r.read(instr.ops[2])
            mult = (rs * rt) & 0xffffffff
            self.r.write(instr.ops[0], mult)
        elif instr.name == 'mult':
            # mult rs, rt
            rs = self.r.read(instr.ops[0])
            rt = self.r.read(instr.ops[1])
            mult = (rs * rt) & 0xffffffffffffffff
            self.r.hi = mult >> 32
            self.r.lo = mult & 0xffffffff
        elif instr.name == 'mfhi':
            # mfhi rd
            self.r.write(instr.ops[0], self.r.hi)
        elif instr.name == 'mflo':
            # mfhi rd
            self.r.write(instr.ops[0], self.r.lo)
        elif instr.name == 'syscall':
            # syscall
            id = self.r.read('v0')
            if id == 10:
                # exit
                log.info('\n*** exiting ***')
                raise Exception('exit syscall')
            elif id == 1:
                # print_int
                # not using log here because this will always show up and we
                # want to suppress newline
                print self.r.read('a0'),
                sys.stdout.flush()
            elif id == 4:
                # print_string
                ptr = self.r.read('a0')
                null = self.dmem.memory.find('\x00', ptr)
                # not using log here, see above
                print self.dmem.memory[ptr:None if null == -1 else null],
                sys.stdout.flush()
            elif id == 5:
                # read_int
                try:
                    inp = int(raw_input())
                    self.r.write('v0', inp)
                except Exception:
                    raise Exception('input not integer')
            else:
                raise Exception('bad syscall id')

        else:
            raise Exception('bad instruction: {}'.format(instr.name))

Example 166

Project: conpot Source File: guardian_ast_server.py
    def handle(self, sock, addr):
        session = conpot_core.get_session('guardian_ast', addr[0], addr[1])
        logger.info('New GuardianAST connection from %s:%d. (%s)', addr[0], addr[1], session.id)
        session.add_event({'type': 'NEW_CONNECTION'})
        current_time = datetime.datetime.utcnow()
        fill_start = self.fill_offset_time - datetime.timedelta(minutes=313)
        fill_stop = self.fill_offset_time - datetime.timedelta(minutes=303)
        # Default Product names, change based off country needs
        product1 = self.databus.get_value('product1').ljust(22)
        product2 = self.databus.get_value('product2').ljust(22)
        product3 = self.databus.get_value('product3').ljust(22)
        product4 = self.databus.get_value('product4').ljust(22)

        # Create random Numbers for the volumes
        #
        # this will crate an initial Volume and then the second value based
        # off the orig value.
        vol1 = self.databus.get_value('vol1')
        vol1tc = random.randint(vol1, vol1+200)
        vol2 = self.databus.get_value('vol2')
        vol2tc = random.randint(vol2, vol2+200)
        vol3 = self.databus.get_value('vol3')
        vol3tc = random.randint(vol3, vol3+200)
        vol4 = self.databus.get_value('vol4')
        vol4tc = random.randint(vol4, vol4+200)

        # unfilled space ULLAGE
        ullage1 = str(self.databus.get_value('ullage1'))
        ullage2 = str(self.databus.get_value('ullage2'))
        ullage3 = str(self.databus.get_value('ullage3'))
        ullage4 = str(self.databus.get_value('ullage3'))

        # Height of tank
        height1 = str(self.databus.get_value('height1')).ljust(5, '0')
        height2 = str(self.databus.get_value('height2')).ljust(5, '0')
        height3 = str(self.databus.get_value('height3')).ljust(5, '0')
        height4 = str(self.databus.get_value('height4')).ljust(5, '0')

        # Water in tank, this is a variable that needs to be low
        h2o1 = str(self.databus.get_value('h2o1')).ljust(4, '0')
        h2o2 = str(self.databus.get_value('h2o2')).ljust(4, '0')
        h2o3 = str(self.databus.get_value('h2o3')).ljust(4, '0')
        h2o4 = str(self.databus.get_value('h2o4')).ljust(4, '0')

        # Temperature of the tank, this will need to be between 50 - 60
        temp1 = str(self.databus.get_value('temp1')).ljust(5, '0')
        temp2 = str(self.databus.get_value('temp2')).ljust(5, '0')
        temp3 = str(self.databus.get_value('temp3')).ljust(5, '0')
        temp4 = str(self.databus.get_value('temp4')).ljust(5, '0')

        station = self.databus.get_value('station_name')

        # This function is to set-up up the message to be sent upon a successful I20100 command being sent
        # The final message is sent with a current date/time stamp inside of the main loop.
        def I20100():
            ret = '\nI20100\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
            ret += '\n\n' + station + '\n\n\n\nIN-TANK INVENTORY\n\n'
            ret += 'TANK PRODUCT             VOLUME TC VOLUME   ULLAGE   HEIGHT    WATER     TEMP'
            ret += '\n  1  ' + product1 + str(vol1) + '      ' + str(vol1tc) + '     ' + ullage1 + '    ' + height1 + '     ' + h2o1 + '    ' + temp1
            ret += '\n  2  ' + product2 + str(vol2) + '      ' + str(vol2tc) + '     ' + ullage2 + '    ' + height2 + '     ' + h2o2 + '    ' + temp2
            ret += '\n  3  ' + product3 + str(vol3) + '      ' + str(vol3tc) + '     ' + ullage3 + '    ' + height3 + '     ' + h2o3 + '    ' + temp3
            ret += '\n  4  ' + product4 + str(vol4) + '      ' + str(vol4tc) + '     ' + ullage4 + '    ' + height4 + '     ' + h2o4 + '    ' + temp4
            ret += '\n'
            return ret

        ###########################################################################
        #
        # Only one Tank is listed currently in the I20200 command
        #
        ###########################################################################
        def I20200():
            ret = '\nI20200\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
            ret += '\n\n' + station + '\n\n\n\nDELIVERY REPORT\n\n'
            ret += 'T 1:' + product1 + '\nINCREASE   DATE / TIME             GALLONS TC GALLONS WATER  TEMP DEG F  HEIGHT\n\n'

            ret += '      END: ' + str(fill_stop.strftime('%m/%d/%Y %H:%M')) + '         ' + str(vol1 + 300) + '       ' + str(vol1tc + 300) + '   ' + h2o1 + '      ' + temp1 + '    ' + height1 + '\n'
            ret += '    START: ' + str(fill_start.strftime('%m/%d/%Y %H:%M')) + '         ' + str(vol1 - 300) + '       ' + str(vol1tc - 300) + '   ' + h2o1 + '      ' + temp1 + '    ' + str(float(height1) - 23) + '\n'
            ret += '   AMOUNT:                          ' + str(vol1) + '       ' + str(vol1tc) + '\n\n'
            return ret

        ###########################################################################
        #
        # I20300 In-Tank Leak Detect Report
        #
        ###########################################################################
        def I20300():
            ret = '\nI20300\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
            ret += '\n\n' + station + '\n\n\n'
            ret += 'TANK 1    ' + product1 + '\n    TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
            ret += 'TANK 2    ' + product2 + '\n    TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
            ret += 'TANK 3    ' + product3 + '\n    TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
            ret += 'TANK 4    ' + product4 + '\n    TEST STATUS: OFF\nLEAK DATA NOT AVAILABLE ON THIS TANK\n\n'
            return ret

        ###########################################################################
        # Shift report command I20400 only one item in report at this time,
        # but can always add more if needed
        ###########################################################################
        def I20400():
            ret = '\nI20400\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
            ret += '\n\n' + station + '\n\n\n\nSHIFT REPORT\n\n'
            ret += 'SHIFT 1 TIME: 12:00 AM\n\nTANK PRODUCT\n\n'
            ret += '  1  ' + product1 + ' VOLUME TC VOLUME  ULLAGE  HEIGHT  WATER   TEMP\n'
            ret += 'SHIFT  1 STARTING VALUES      ' + str(vol1) + '     ' + str(vol1tc) + '    ' + ullage1 + '   ' + height1 + '   ' + h2o1 + '    ' + temp1 + '\n'
            ret += '         ENDING VALUES        ' + str(vol1 + 940) + '     ' + str(vol1tc + 886) + '    ' + str(int(ullage1) + 345) + '   ' + str(float(height1) + 53) + '  ' + h2o1 + '    ' + temp1 + '\n'
            ret += '         DELIVERY VALUE          0\n'
            ret += '         TOTALS                940\n\n'
            return ret

        ###########################################################################
        # I20500 In-Tank Status Report
        ###########################################################################
        def I20500():
            ret = '\nI20500\n' + str(current_time.strftime('%m/%d/%Y %H:%M'))
            ret += '\n\n\n' + station + '\n\n\n'
            ret += 'TANK   PRODUCT                 STATUS\n\n'
            ret += '  1    ' + product1 + '  NORMAL\n\n'
            ret += '  2    ' + product2 + '  HIGH WATER ALARM\n'
            ret += '                               HIGH WATER WARNING\n\n'
            ret += '  3    ' + product3 + '  NORMAL\n\n'
            ret += '  4    ' + product4 + '  NORMAL\n\n'
            return ret

        while True:
            try:
                # Get the initial data
                response = sock.recv(4096)
                # The connection has been closed
                if not response:
                    break

                while not ('\n' in response or '00' in response):
                    response += sock.recv(4096)
                # if first value is not ^A then do nothing
                # thanks John(achillean) for the help
                if response[0] != '\x01':
                    logger.info('Non ^A command attempt %s:%d. (%s)', addr[0], addr[1], session.id)
                    break
                # if response is less than 6, than do nothing
                if len(response) < 6:
                    logger.info('Invalid command attempt %s:%d. (%s)', addr[0], addr[1], session.id)
                    break

                cmds = {"I20100": I20100, "I20200": I20200, "I20300": I20300, "I20400": I20400, "I20500": I20500}
                cmd = response[1:7]  # strip ^A and \n out
                session.add_event({'command': cmd})
                if cmd in cmds:
                    logger.info('%s command attempt %s:%d. (%s)', cmd, addr[0], addr[1], session.id)
                    sock.send(cmds[cmd]())
                elif cmd.startswith("S6020"):
                    # change the tank name
                    if cmd.startswith("S60201"):
                        # split string into two, the command, and the data
                        TEMP = response.split('S60201')
                        # if length is less than two, print error
                        if len(TEMP) < 2:
                            sock.send("9999FF1B\n")
                        # Else the command was entered correctly and continue
                        else:
                            # Strip off the carrage returns and new lines
                            TEMP1 = TEMP[1].rstrip("\r\n")
                            # if Length is less than 22
                            if len(TEMP1) < 22:
                                # pad the result to have 22 chars
                                product1 = TEMP1.ljust(22)
                            elif len(TEMP1) > 22:
                                # else only print 22 chars if the result was longer
                                product1 = TEMP1[:20] + "  "
                            else:
                                # else it fits fine (22 chars)
                                product1 = TEMP1
                        logger.info('S60201: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
                    # Follows format for S60201 for comments
                    elif cmd.startswith("S60202"):
                        TEMP = response.split('S60202')
                        if len(TEMP) < 2:
                            sock.send("9999FF1B\n")
                        else:
                            TEMP1 = TEMP[1].rstrip("\r\n")
                            if len(TEMP1) < 22:
                                product2 = TEMP1.ljust(22)
                            elif len(TEMP1) > 22:
                                product2 = TEMP1[:20] + "  "
                            else:
                                product2 = TEMP1
                        logger.info('S60202: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
                    # Follows format for S60201 for comments
                    elif cmd.startswith("S60203"):
                        TEMP = response.split('S60203')
                        if len(TEMP) < 2:
                            sock.send("9999FF1B\n")
                        else:
                            TEMP1 = TEMP[1].rstrip("\r\n")
                            if len(TEMP1) < 22:
                                product3 = TEMP1.ljust(22)
                            elif len(TEMP1) > 22:
                                product3 = TEMP1[:20] + "  "
                            else:
                                product3 = TEMP1
                        logger.info('S60203: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
                    # Follows format for S60201 for comments
                    elif cmd.startswith("S60204"):
                        TEMP = response.split('S60204')
                        if len(TEMP) < 2:
                            sock.send("9999FF1B\n")
                        else:
                            TEMP1 = TEMP[1].rstrip("\r\n")
                            if len(TEMP1) < 22:
                                product4 = TEMP1.ljust(22)
                            elif len(TEMP1) > 22:
                                product4 = TEMP1[:20] + "  "
                            else:
                                product4 = TEMP1
                        logger.info('S60204: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
                    # Follows format for S60201 for comments
                    elif cmd.startswith("S60200"):
                        TEMP = response.split('S60200')
                        if len(TEMP) < 2:
                            # 9999 indicates that the command was not understood and
                            # FF1B is the checksum for the 9999
                            sock.send("9999FF1B\n")
                        else:
                            TEMP1 = TEMP[1].rstrip("\r\n")
                            if len(TEMP1) < 22:
                                product1 = TEMP1.ljust(22)
                                product2 = TEMP1.ljust(22)
                                product3 = TEMP1.ljust(22)
                                product4 = TEMP1.ljust(22)
                            elif len(TEMP1) > 22:
                                product1 = TEMP1[:20] + "  "
                                product2 = TEMP1[:20] + "  "
                                product3 = TEMP1[:20] + "  "
                                product4 = TEMP1[:20] + "  "
                            else:
                                product1 = TEMP1
                                product2 = TEMP1
                                product3 = TEMP1
                                product4 = TEMP1
                        logger.info('S60200: %s command attempt %s:%d. (%s)', TEMP1, addr[0], addr[1], session.id)
                    else:
                        sock.send("9999FF1B\n")
                # Else it is a currently unsupported command so print the error message found in the manual
                # 9999 indicates that the command was not understood and FF1B is the checksum for the 9999
                else:
                    sock.send("9999FF1B\n")
                    # log what was entered
                    logger.info('%s command attempt %s:%d. (%s)', response, addr[0], addr[1], session.id)
            except Exception, e:
                print 'Unknown Error: {}'.format(str(e))
                raise
            except KeyboardInterrupt:
                break
        logger.info('GuardianAST client disconnected %s:%d. (%s)', addr[0], addr[1], session.id)
        session.add_event({'type': 'CONNECTION_LOST'})

Example 167

Project: dipy Source File: reconst.py
    def run(self, input_files, bvalues, bvectors, mask_files, b0_threshold=0.0,
            save_metrics=[],
            out_dir='', out_tensor='tensors.nii.gz', out_fa='fa.nii.gz',
            out_ga='ga.nii.gz', out_rgb='rgb.nii.gz', out_md='md.nii.gz',
            out_ad='ad.nii.gz', out_rd='rd.nii.gz', out_mode='mode.nii.gz',
            out_evec='evecs.nii.gz', out_eval='evals.nii.gz'):

        """ Workflow for tensor reconstruction and for computing DTI metrics.
        Performs a tensor reconstruction on the files by 'globing'
        ``input_files`` and saves the DTI metrics in a directory specified by
        ``out_dir``.

        Parameters
        ----------
        input_files : string
            Path to the input volumes. This path may contain wildcards to
            process multiple inputs at once.
        bvalues : string
            Path to the bvalues files. This path may contain wildcards to use
            multiple bvalues files at once.
        bvectors : string
            Path to the bvalues files. This path may contain wildcards to use
            multiple bvalues files at once.
        mask_files : string
            Path to the input masks. This path may contain wildcards to use
            multiple masks at once. (default: No mask used)
        b0_threshold : float, optional
            Threshold used to find b=0 directions (default 0.0)
        save_metrics : variable string, optional
            List of metrics to save.
            Possible values: fa, ga, rgb, md, ad, rd, mode, tensor, evec, eval
            (default [] (all))
        out_dir : string, optional
            Output directory (default input file directory)
        out_tensor : string, optional
            Name of the tensors volume to be saved (default 'tensors.nii.gz')
        out_fa : string, optional
            Name of the fractional anisotropy volume to be saved
            (default 'fa.nii.gz')
        out_ga : string, optional
            Name of the geodesic anisotropy volume to be saved
            (default 'ga.nii.gz')
        out_rgb : string, optional
            Name of the color fa volume to be saved (default 'rgb.nii.gz')
        out_md : string, optional
            Name of the mean diffusivity volume to be saved
            (default 'md.nii.gz')
        out_ad : string, optional
            Name of the axial diffusivity volume to be saved
            (default 'ad.nii.gz')
        out_rd : string, optional
            Name of the radial diffusivity volume to be saved
            (default 'rd.nii.gz')
        out_mode : string, optional
            Name of the mode volume to be saved (default 'mode.nii.gz')
        out_evec : string, optional
            Name of the eigenvectors volume to be saved
            (default 'evecs.nii.gz')
        out_eval : string, optional
            Name of the eigenvalues to be saved (default 'evals.nii.gz')
        """
        io_it = self.get_io_iterator()

        for dwi, bval, bvec, mask, otensor, ofa, oga, orgb, omd, oad, orad, \
            omode, oevecs, oevals in io_it:

            logging.info('Computing DTI metrics for {0}'.format(dwi))

            img = nib.load(dwi)
            data = img.get_data()
            affine = img.get_affine()

            if mask is None:
                mask = None
            else:
                mask = nib.load(mask).get_data().astype(np.bool)

            tenfit, _ = self.get_fitted_tensor(data, mask, bval, bvec,
                                               b0_threshold)

            if not save_metrics:
                save_metrics = ['fa', 'md', 'rd', 'ad', 'ga', 'rgb', 'mode',
                                'evec', 'eval', 'tensor']

            FA = fractional_anisotropy(tenfit.evals)
            FA[np.isnan(FA)] = 0
            FA = np.clip(FA, 0, 1)

            if 'tensor' in save_metrics:
                tensor_vals = lower_triangular(tenfit.quadratic_form)
                correct_order = [0, 1, 3, 2, 4, 5]
                tensor_vals_reordered = tensor_vals[..., correct_order]
                fiber_tensors = nib.Nifti1Image(tensor_vals_reordered.astype(
                    np.float32), affine)
                nib.save(fiber_tensors, otensor)

            if 'fa' in save_metrics:
                fa_img = nib.Nifti1Image(FA.astype(np.float32), affine)
                nib.save(fa_img, ofa)

            if 'ga' in save_metrics:
                GA = geodesic_anisotropy(tenfit.evals)
                ga_img = nib.Nifti1Image(GA.astype(np.float32), affine)
                nib.save(ga_img, oga)

            if 'rgb' in save_metrics:
                RGB = color_fa(FA, tenfit.evecs)
                rgb_img = nib.Nifti1Image(np.array(255 * RGB, 'uint8'), affine)
                nib.save(rgb_img, orgb)

            if 'md' in save_metrics:
                MD = mean_diffusivity(tenfit.evals)
                md_img = nib.Nifti1Image(MD.astype(np.float32), affine)
                nib.save(md_img, omd)

            if 'ad' in save_metrics:
                AD = axial_diffusivity(tenfit.evals)
                ad_img = nib.Nifti1Image(AD.astype(np.float32), affine)
                nib.save(ad_img, oad)

            if 'rd' in save_metrics:
                RD = radial_diffusivity(tenfit.evals)
                rd_img = nib.Nifti1Image(RD.astype(np.float32), affine)
                nib.save(rd_img, orad)

            if 'mode' in save_metrics:
                MODE = get_mode(tenfit.quadratic_form)
                mode_img = nib.Nifti1Image(MODE.astype(np.float32), affine)
                nib.save(mode_img, omode)

            if 'evec' in save_metrics:
                evecs_img = nib.Nifti1Image(tenfit.evecs.astype(np.float32), affine)
                nib.save(evecs_img, oevecs)

            if 'eval' in save_metrics:
                evals_img = nib.Nifti1Image(tenfit.evals.astype(np.float32), affine)
                nib.save(evals_img, oevals)

            logging.info('DTI metrics saved in {0}'.
                         format(os.path.dirname(oevals)))

Example 168

Project: openode Source File: notify_users.py
def immediately_notify_users(post):

    # we don't want to disturb original routine
    try:
        # set default language TODO - language per user - add user atribute
        old_lang = get_language()
        activate(django_settings.LANGUAGE_CODE)

        DEBUG_THIS_COMMAND = getattr(django_settings, 'DEBUG_SEND_EMAIL_NOTIFICATIONS', True)

        # compose subject according to the post type
        subject_line = _('Notification')
        if post.post_type == const.POST_TYPE_QUESTION:
            subject_line += ': ' + _('Question')
        elif post.post_type == const.POST_TYPE_DOCUMENT:
            subject_line += ': ' + _('Docuement')
        elif post.post_type == const.POST_TYPE_COMMENT:
            subject_line += ': ' + _('Comment')
        elif post.post_type == const.POST_TYPE_THREAD_POST:
            if post.thread.thread_type == const.THREAD_TYPE_QUESTION:
                subject_line += ': ' + _('Answer')
            elif post.thread.thread_type == const.THREAD_TYPE_DISCUSSION:
                subject_line += ': ' + _('Discussion post')
        else:
            # post type is probably only a description, do nothing
            activate(old_lang)
            return False

        subject_line += ' - ' + post.thread.title

        # compose message according to post type
        url_prefix = openode_settings.APP_URL
        # link to node
        # text = u'<p>%s: <a href="%s">%s</a></p>' % (_('Node'), url_prefix + post.thread.node.get_absolute_url(), post.thread.node.full_title())
        text = u'<p>%s: %s</p>' % (_('Node'), post.thread.node.full_title())
        # title according to the post type
        text += '<h2>'
        if post.last_edited_by:
            # post was updated
            if post.post_type == const.POST_TYPE_QUESTION:
                text += _('Updated question')
            elif post.post_type == const.POST_TYPE_DOCUMENT:
                text += _('Updated docuement')
            elif post.post_type == const.POST_TYPE_COMMENT:
                text += _('Updated comment')
            elif post.post_type == const.POST_TYPE_THREAD_POST:
                if post.thread.thread_type == const.THREAD_TYPE_QUESTION:
                    text += _('Updated answer')
                elif post.thread.thread_type == const.THREAD_TYPE_DISCUSSION:
                    text += _('Updated discussion post')
        else:
            # post is new
            if post.post_type == const.POST_TYPE_QUESTION:
                text += _('New question')
            elif post.post_type == const.POST_TYPE_DOCUMENT:
                text += _('New docuement')
            elif post.post_type == const.POST_TYPE_COMMENT:
                text += _('New comment')
            elif post.post_type == const.POST_TYPE_THREAD_POST:
                if post.thread.thread_type == const.THREAD_TYPE_QUESTION:
                    text += _('New answer')
                elif post.thread.thread_type == const.THREAD_TYPE_DISCUSSION:
                    text += _('New discussion post')
        text += '</h2>'

        # link to post
        if post.post_type == const.POST_TYPE_DOCUMENT:
            url = url_prefix + post.thread.get_absolute_url()
        else:
            url = url_prefix + post.get_absolute_url()
        text += '<p><a href="%(url)s">%(url)s</a></p>' % {"url": url}

        # author
        text += '<p>'
        if post.last_edited_by:
            # post was updated
            text += _(u'%(datetime)s changed by <strong>%(user)s</strong>') % {'datetime': humanize_datetime(post.last_edited_at, 0), 'user': post.last_edited_by.screen_name}
        else:
            # post is new
            text += _(u'%(datetime)s created by <strong>%(user)s</strong>') % {'datetime': humanize_datetime(post.added_at, 0), 'user': post.author.screen_name}
        text += '</p>'

        # show post text
        text += post.html

        # show related post if convenient
        if post.post_type == const.POST_TYPE_THREAD_POST and post.thread.thread_type == const.THREAD_TYPE_QUESTION:
            text += '<h3>'
            text += _('Question')
            text += '</h3>'
            # text += '<p><a href="%s">%s</a></p>' % (url_prefix + post.thread._main_post().get_absolute_url(), url_prefix + post.thread._main_post().get_absolute_url())
            text += post.thread._main_post().html
        elif post.post_type == const.POST_TYPE_COMMENT:
            text += '<h3>'
            text += _('Commented post')
            text += '</h3>'
            # text += '<p><a href="%s">%s</a></p>' % (url_prefix + post.parent.get_absolute_url(), url_prefix + post.parent.get_absolute_url())
            text += post.parent.html

        # message bottom
        text += '<hr />'
        text += '<p>'
        text += _('Please remember that you can always adjust frequency of the email updates or turn them off entirely in your profile.')
        text += '</p>'
        text += '<p>'
        text += _('If you believe that this message was sent in an error, please contact us.')
        text += '</p>'

        # render email
        data = {
            'text': text,
            'site_name': openode_settings.APP_SHORT_NAME,
            'site_url': openode_settings.APP_URL
        }
        template = get_template('email/instant_notification.html')
        message = template.render(data)

        recipients = {}
        # get all thread followers
        for user in post.thread.followed_by.filter(notification_subscriptions__frequency='i', notification_subscriptions__feed_type='q_sel'):
            recipients[user.pk] = user

        # get all node followers
        for user in post.thread.node.followed_by.filter(notification_subscriptions__frequency='i', notification_subscriptions__feed_type='q_sel'):
            recipients[user.pk] = user

        # remove author of this editation from recipients
        if post.last_edited_by:
            # post was updated
            recipients.pop(post.last_edited_by.pk, None)
        else:
            # post is new
            recipients.pop(post.author.pk, None)

        # send all emails
        for user in recipients.values():
            if DEBUG_THIS_COMMAND:
                recipient_email = django_settings.ADMINS[0][1]
            else:
                recipient_email = user.email

            mail.send_mail(subject_line, message, django_settings.DEFAULT_FROM_EMAIL, [recipient_email], raise_on_failure=True)
            logging.info('Email notification sent: %s' % repr({
                "user": user.screen_name,
                "user_email": recipient_email,
                "user_pk": user.pk,
                "post_pk": post.pk
            }))

        activate(old_lang)
        return True

    except Exception, e:
        logging.error('Email notification - failed to send immediate notification for post: %s' % repr({
            "post_pk": post.pk,
            "error": e
        }))

    return False

Example 169

Project: monasca-agent Source File: daemon.py
def main():
    options, args = util.get_parsed_args()
    config = cfg.Config()
    collector_config = config.get_config(['Main', 'Api', 'Logging'])
    autorestart = collector_config.get('autorestart', False)

    collector_restart_interval = collector_config.get(
        'collector_restart_interval', 24)
    if collector_restart_interval in range(1, 49):
        pass
    else:
        log.error("Collector_restart_interval = {0} is out of legal range"
                  " [1, 48]. Reset collector_restart_interval to 24".format(collector_restart_interval))
        collector_restart_interval = 24

    COMMANDS = [
        'start',
        'stop',
        'restart',
        'foreground',
        'status',
        'info',
        'check',
        'check_all',
        'configcheck',
        'jmx',
    ]

    if len(args) < 1:
        sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
        return 2

    command = args[0]
    if command not in COMMANDS:
        sys.stderr.write("Unknown command: %s\n" % command)
        return 3

    pid_file = util.PidFile('monasca-agent')

    if options.clean:
        pid_file.clean()

    agent = CollectorDaemon(pid_file.get_path(), autorestart)

    if command in START_COMMANDS:
        log.info('Agent version %s' % config.get_version())

    if 'start' == command:
        log.info('Start daemon')
        agent.start()

    elif 'stop' == command:
        log.info('Stop daemon')
        agent.stop()

    elif 'restart' == command:
        log.info('Restart daemon')
        agent.restart()

    elif 'status' == command:
        agent.status()

    elif 'info' == command:
        return agent.info(verbose=options.verbose)

    elif 'foreground' == command:
        logging.info('Running in foreground')
        if autorestart:
            # Set-up the supervisor callbacks and fork it.
            logging.info('Running Agent with auto-restart ON')
        # Run in the standard foreground.
        agent.run(collector_config)

    elif 'check' == command:
        check_name = args[1]
        checks = util.load_check_directory()
        for check in checks['initialized_checks']:
            if check.name == check_name:
                run_check(check)

    elif 'check_all' == command:
        print("Loading check directory...")
        checks = util.load_check_directory()
        print("...directory loaded.\n")
        for check in checks['initialized_checks']:
            run_check(check)

    elif 'configcheck' == command or 'configtest' == command:
        all_valid = True
        paths = util.Paths()
        for conf_path in glob.glob(os.path.join(paths.get_confd_path(), "*.yaml")):
            basename = os.path.basename(conf_path)
            try:
                config.check_yaml(conf_path)
            except Exception as e:
                all_valid = False
                print("%s contains errors:\n    %s" % (basename, e))
            else:
                print("%s is valid" % basename)
        if all_valid:
            print("All yaml files passed. You can now run the Monitoring agent.")
            return 0
        else:
            print("Fix the invalid yaml files above in order to start the Monitoring agent. "
                  "A useful external tool for yaml parsing can be found at "
                  "http://yaml-online-parser.appspot.com/")
            return 1

    elif 'jmx' == command:

        if len(args) < 2 or args[1] not in jmxfetch.JMX_LIST_COMMANDS.keys():
            print("#" * 80)
            print("JMX tool to be used to help configure your JMX checks.")
            print("See http://docs.datadoghq.com/integrations/java/ for more information")
            print("#" * 80)
            print("\n")
            print("You have to specify one of the following commands:")
            for command, desc in jmxfetch.JMX_LIST_COMMANDS.iteritems():
                print("      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
            print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr")
            print("\n")

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            paths = util.Paths()
            confd_path = paths.get_confd_path()
            # Start JMXFetch if needed
            should_run = jmxfetch.JMXFetch.init(confd_path,
                                                config,
                                                15,
                                                jmx_command,
                                                checks_list,
                                                reporter="console")
            if not should_run:
                print("Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_path)
                print("Have you enabled any JMX checks ?")

    return 0

Example 170

Project: GoAgent-Always-Available Source File: gae.py
def application(environ, start_response):
    if environ['REQUEST_METHOD'] == 'GET' and 'HTTP_X_URLFETCH_PS1' not in environ:
        timestamp = long(os.environ['CURRENT_VERSION_ID'].split('.')[1])/2**28
        ctime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(timestamp+8*3600))
        start_response('200 OK', [('Content-Type', 'text/plain')])
        yield 'GoAgent Python Server %s works, deployed at %s\n' % (__version__, ctime)
        if len(__password__) > 2:
            yield 'Password: %s%s%s' % (__password__[0], '*'*(len(__password__)-2), __password__[-1])
        raise StopIteration

    start_response('200 OK', [('Content-Type', 'image/gif')])

    if environ['REQUEST_METHOD'] == 'HEAD':
        raise StopIteration

    options = environ.get('HTTP_X_URLFETCH_OPTIONS', '')
    if 'rc4' in options and not __password__:
        yield format_response(400, {'Content-Type': 'text/html; charset=utf-8'}, message_html('400 Bad Request', 'Bad Request (options) - please set __password__ in gae.py', 'please set __password__ and upload gae.py again'))
        raise StopIteration

    try:
        if 'HTTP_X_URLFETCH_PS1' in environ:
            payload = inflate(base64.b64decode(environ['HTTP_X_URLFETCH_PS1']))
            body = inflate(base64.b64decode(environ['HTTP_X_URLFETCH_PS2'])) if 'HTTP_X_URLFETCH_PS2' in environ else ''
        else:
            wsgi_input = environ['wsgi.input']
            input_data = wsgi_input.read(int(environ.get('CONTENT_LENGTH', '0')))
            if 'rc4' in options:
                input_data = RC4Cipher(__password__).encrypt(input_data)
            payload_length, = struct.unpack('!h', input_data[:2])
            payload = inflate(input_data[2:2+payload_length])
            body = input_data[2+payload_length:]
        raw_response_line, payload = payload.split('\r\n', 1)
        method, url = raw_response_line.split()[:2]
        headers = {}
        for line in payload.splitlines():
            key, value = line.split(':', 1)
            headers[key.title()] = value.strip()
    except (zlib.error, KeyError, ValueError):
        import traceback
        yield format_response(500, {'Content-Type': 'text/html; charset=utf-8'}, message_html('500 Internal Server Error', 'Bad Request (payload) - Possible Wrong Password', '<pre>%s</pre>' % traceback.format_exc()))
        raise StopIteration

    kwargs = {}
    any(kwargs.__setitem__(x[len('x-urlfetch-'):].lower(), headers.pop(x)) for x in headers.keys() if x.lower().startswith('x-urlfetch-'))

    if 'Content-Encoding' in headers and body:
        if headers['Content-Encoding'] == 'deflate':
            body = inflate(body)
            headers['Content-Length'] = str(len(body))
            del headers['Content-Encoding']

    logging.info('%s "%s %s %s" - -', environ['REMOTE_ADDR'], method, url, 'HTTP/1.1')

    if __password__ and __password__ != kwargs.get('password', ''):
        yield format_response(403, {'Content-Type': 'text/html; charset=utf-8'}, message_html('403 Wrong password', 'Wrong password(%r)' % kwargs.get('password', ''), 'GoAgent proxy.ini password is wrong!'))
        raise StopIteration

    netloc = urlparse.urlparse(url).netloc

    if __hostsdeny__ and netloc.endswith(__hostsdeny__):
        yield format_response(403, {'Content-Type': 'text/html; charset=utf-8'}, message_html('403 Hosts Deny', 'Hosts Deny(%r)' % netloc, detail='url=%r' % url))
        raise StopIteration

    if len(url) > MAX_URL_LENGTH:
        yield format_response(400, {'Content-Type': 'text/html; charset=utf-8'}, message_html('400 Bad Request', 'length of URL too long(greater than %r)' % MAX_URL_LENGTH, detail='url=%r' % url))
        raise StopIteration

    if netloc.startswith(('127.0.0.', '::1', 'localhost')):
        yield format_response(400, {'Content-Type': 'text/html; charset=utf-8'}, message_html('GoAgent %s is Running' % __version__, 'Now you can visit some websites', ''.join('<a href="https://%s/">%s</a><br/>' % (x, x) for x in ('google.com', 'mail.google.com'))))
        raise StopIteration

    fetchmethod = getattr(urlfetch, method, None)
    if not fetchmethod:
        yield format_response(405, {'Content-Type': 'text/html; charset=utf-8'}, message_html('405 Method Not Allowed', 'Method Not Allowed: %r' % method, detail='Method Not Allowed URL=%r' % url))
        raise StopIteration

    timeout = int(kwargs.get('timeout', URLFETCH_TIMEOUT))
    validate_certificate = bool(int(kwargs.get('validate', 0)))
    maxsize = int(kwargs.get('maxsize', 0))
    # https://www.freebsdchina.org/forum/viewtopic.php?t=54269
    accept_encoding = headers.get('Accept-Encoding', '') or headers.get('Bccept-Encoding', '')
    errors = []
    for i in xrange(int(kwargs.get('fetchmax', URLFETCH_MAX))):
        try:
            response = urlfetch.fetch(url, body, fetchmethod, headers, allow_truncated=False, follow_redirects=False, deadline=timeout, validate_certificate=validate_certificate)
            break
        except apiproxy_errors.OverQuotaError as e:
            time.sleep(5)
        except urlfetch.DeadlineExceededError as e:
            errors.append('%r, timeout=%s' % (e, timeout))
            logging.error('DeadlineExceededError(timeout=%s, url=%r)', timeout, url)
            time.sleep(1)
            timeout *= 2
        except urlfetch.DownloadError as e:
            errors.append('%r, timeout=%s' % (e, timeout))
            logging.error('DownloadError(timeout=%s, url=%r)', timeout, url)
            time.sleep(1)
            timeout *= 2
        except urlfetch.ResponseTooLargeError as e:
            errors.append('%r, timeout=%s' % (e, timeout))
            response = e.response
            logging.error('ResponseTooLargeError(timeout=%s, url=%r) response(%r)', timeout, url, response)
            m = re.search(r'=\s*(\d+)-', headers.get('Range') or headers.get('range') or '')
            if m is None:
                headers['Range'] = 'bytes=0-%d' % (maxsize or URLFETCH_MAXSIZE)
            else:
                headers.pop('Range', '')
                headers.pop('range', '')
                start = int(m.group(1))
                headers['Range'] = 'bytes=%s-%d' % (start, start+(maxsize or URLFETCH_MAXSIZE))
            timeout *= 2
        except urlfetch.SSLCertificateError as e:
            errors.append('%r, should validate=0 ?' % e)
            logging.error('%r, timeout=%s', e, timeout)
        except Exception as e:
            errors.append(str(e))
            if i == 0 and method == 'GET':
                timeout *= 2
    else:
        error_string = '<br />\n'.join(errors)
        if not error_string:
            logurl = 'https://appengine.google.com/logs?&app_id=%s' % os.environ['APPLICATION_ID']
            error_string = 'Internal Server Error. <p/>try <a href="javascript:window.location.reload(true);">refresh</a> or goto <a href="%s" target="_blank">appengine.google.com</a> for details' % logurl
        yield format_response(502, {'Content-Type': 'text/html; charset=utf-8'}, message_html('502 Urlfetch Error', 'Python Urlfetch Error: %r' % method, error_string))
        raise StopIteration

    #logging.debug('url=%r response.status_code=%r response.headers=%r response.content[:1024]=%r', url, response.status_code, dict(response.headers), response.content[:1024])

    status_code = int(response.status_code)
    data = response.content
    response_headers = response.headers
    content_type = response_headers.get('content-type', '')
    if status_code == 200 and maxsize and len(data) > maxsize and response_headers.get('accept-ranges', '').lower() == 'bytes' and int(response_headers.get('content-length', 0)):
        status_code = 206
        response_headers['Content-Range'] = 'bytes 0-%d/%d' % (maxsize-1, len(data))
        data = data[:maxsize]
    if status_code == 200 and 'content-encoding' not in response_headers and 512 < len(data) < URLFETCH_DEFLATE_MAXSIZE and content_type.startswith(('text/', 'application/json', 'application/javascript')):
        if 'gzip' in accept_encoding:
            response_headers['Content-Encoding'] = 'gzip'
            compressobj = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
            dataio = io.BytesIO()
            dataio.write('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
            dataio.write(compressobj.compress(data))
            dataio.write(compressobj.flush())
            dataio.write(struct.pack('<LL', zlib.crc32(data) & 0xFFFFFFFFL, len(data) & 0xFFFFFFFFL))
            data = dataio.getvalue()
        elif 'deflate' in accept_encoding:
            response_headers['Content-Encoding'] = 'deflate'
            data = deflate(data)
    response_headers['Content-Length'] = str(len(data))
    if 'rc4' not in options:
        yield format_response(status_code, response_headers, '')
        yield data
    else:
        cipher = RC4Cipher(__password__)
        yield cipher.encrypt(format_response(status_code, response_headers, ''))
        yield cipher.encrypt(data)

Example 171

Project: owtf Source File: owtf.py
def process_options(user_args):
    try:
        db_plugin = ServiceLocator.get_component("db_plugin")
        valid_groups = db_plugin.GetAllGroups()
        valid_types = db_plugin.GetAllTypes() + ['all', 'quiet']
        arg = parse_options(user_args, valid_groups, valid_types)
    except KeyboardInterrupt as e:
        usage("Invalid OWTF option(s) %s" % e)

    # Default settings:
    profiles = {}
    plugin_group = arg.PluginGroup

    if arg.CustomProfile:  # Custom profiles specified
        # Quick pseudo-validation check
        for profile in arg.CustomProfile.split(','):
            chunks = profile.split(':')
            if len(chunks) != 2 or not os.path.exists(chunks[1]):
                usage("Invalid Profile")
            else:  # profile "ok" :)
                profiles[chunks[0]] = chunks[1]

    if arg.OnlyPlugins:
        arg.OnlyPlugins, plugin_groups = get_plugins_from_arg(arg.OnlyPlugins)
        try:
            # Set Plugin Group according to plugin list specified
            plugin_group = plugin_groups[0]
        except IndexError:
            usage("Please use either OWASP/OWTF codes or Plugin names")
        logging.info("Defaulting Plugin Group to '%s' based on list of plugins supplied" % plugin_group)

    if arg.ExceptPlugins:
        arg.ExceptPlugins, plugin_groups = get_plugins_from_arg(arg.ExceptPlugins)

    if arg.TOR_mode:
        arg.TOR_mode = arg.TOR_mode.split(":")
        if(arg.TOR_mode[0] == "help"):
            from framework.http.proxy.tor_manager import TOR_manager
            TOR_manager.msg_configure_tor()
            exit(0)
        if len(arg.TOR_mode) == 1:
            if arg.TOR_mode[0] != "help":
                usage("Invalid argument for TOR-mode")
        elif len(arg.TOR_mode) != 5:
            usage("Invalid argument for TOR-mode")
        else:
            # Enables OutboundProxy.
            if arg.TOR_mode[0] == '':
                outbound_proxy_ip = "127.0.0.1"
            else:
                outbound_proxy_ip = arg.TOR_mode[0]
            if arg.TOR_mode[1] == '':
                outbound_proxy_port = "9050"  # default TOR port
            else:
                outbound_proxy_port = arg.TOR_mode[1]
            arg.OutboundProxy = "socks://%s:%s" % (outbound_proxy_ip, outbound_proxy_port)

    if arg.Botnet_mode:  # Checking arguments
        arg.Botnet_mode = arg.Botnet_mode.split(":")
        if arg.Botnet_mode[0] == "miner" and len(arg.Botnet_mode) != 1:
            usage("Invalid argument for Botnet mode\n Mode must be miner or list")
        if arg.Botnet_mode[0] == "list":
            if len(arg.Botnet_mode) != 2:
                usage("Invalid argument for Botnet mode\n Mode must be miner or list")
            if not os.path.isfile(os.path.expanduser(arg.Botnet_mode[1])):
                usage("Error Proxy List not found! Please check the path.")

    if arg.OutboundProxy:
        arg.OutboundProxy = arg.OutboundProxy.split('://')
        if len(arg.OutboundProxy) == 2:
            arg.OutboundProxy = arg.OutboundProxy + arg.OutboundProxy.pop().split(':')
            if arg.OutboundProxy[0] not in ["socks", "http"]:
                usage("Invalid argument for Outbound Proxy")
        else:
            arg.OutboundProxy = arg.OutboundProxy.pop().split(':')
        # OutboundProxy should be type://ip:port
        if (len(arg.OutboundProxy) not in [2, 3]):
            usage("Invalid argument for Outbound Proxy")
        else:  # Check if the port is an int.
            try:
                int(arg.OutboundProxy[-1])
            except ValueError:
                usage("Invalid port provided for Outbound Proxy")

    if arg.InboundProxy:
        arg.InboundProxy = arg.InboundProxy.split(':')
        # InboundProxy should be (ip:)port:
        if len(arg.InboundProxy) not in [1, 2]:
            usage("Invalid argument for Inbound Proxy")
        else:
            try:
                int(arg.InboundProxy[-1])
            except ValueError:
                usage("Invalid port for Inbound Proxy")

    plugin_types_for_group = db_plugin.GetTypesForGroup(plugin_group)
    if arg.PluginType == 'all':
        arg.PluginType = plugin_types_for_group
    elif arg.PluginType == 'quiet':
        arg.PluginType = ['passive', 'semi_passive']

    scope = arg.Targets or []  # Arguments at the end are the URL target(s)
    num_targets = len(scope)
    if plugin_group != 'auxiliary' and num_targets == 0 and not arg.list_plugins:
        # TODO: Fix this
        pass
    elif num_targets == 1:  # Check if this is a file
        if os.path.isfile(scope[0]):
            logging.info("Scope file: trying to load targets from it ..")
            new_scope = []
            for target in open(scope[0]).read().split("\n"):
                CleanTarget = target.strip()
                if not CleanTarget:
                    continue  # Skip blank lines
                new_scope.append(CleanTarget)
            if len(new_scope) == 0:  # Bad file
                usage("Please provide a scope file (1 target x line)")
            scope = new_scope

    for target in scope:
        if target[0] == "-":
            usage("Invalid Target: " + target)

    args = ''
    if plugin_group == 'auxiliary':
        # For auxiliary plugins, the scope are the parameters.
        args = scope
        # auxiliary plugins do not have targets, they have metasploit-like parameters.
        scope = ['auxiliary']
    return {
        'list_plugins': arg.list_plugins,
        'Force_Overwrite': arg.ForceOverwrite,
        'Interactive': arg.Interactive == 'yes',
        'Simulation': arg.Simulation,
        'Scope': scope,
        'argv': sys.argv,
        'PluginType': arg.PluginType,
        'OnlyPlugins': arg.OnlyPlugins,
        'ExceptPlugins': arg.ExceptPlugins,
        'InboundProxy': arg.InboundProxy,
        'OutboundProxy': arg.OutboundProxy,
        'OutboundProxyAuth': arg.OutboundProxyAuth,
        'Profiles': profiles,
        'PluginGroup': plugin_group,
        'RPort': arg.RPort,
        'PortWaves': arg.PortWaves,
        'ProxyMode': arg.ProxyMode,
        'TOR_mode': arg.TOR_mode,
        'Botnet_mode': arg.Botnet_mode,
        'nowebui': arg.nowebui,
        'Args': args
    }

Example 172

Project: GenomicConsensus Source File: arrow.py
def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a cmp.h5.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" %
                 reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile, refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends   = np.fromiter((hit.tEnd   for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage,
                                      starts, ends, minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [ (winStart, winEnd) ]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile, subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([ a for a in clippedAlns
                 if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin),
                           " ".join([str(hit.readName) for hit in alns])))

            alnsUsed = [] if options.reportEffectiveCoverage else None
            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           arrowConfig,
                                           alnsUsed=alnsUsed)

            # Tabulate the coverage implied by these alignments, as
            # well as the post-filtering ("effective") coverage
            siteCoverage = U.coverageInWindow(subWin, alns)
            effectiveSiteCoverage = U.coverageInWindow(subWin, alnsUsed) if options.reportEffectiveCoverage else None

            variants_ = U.variantsFromConsensus(subWin, windowRefSeq,
                                                css.sequence, css.confidence, siteCoverage, effectiveSiteCoverage,
                                                options.aligner,
                                                ai=None)

            filteredVars =  filterVariants(options.minCoverage,
                                           options.minConfidence,
                                           variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            maybeDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "outliers") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if maybeDumpEvidence:
                refId, refStart, refEnd = subWin
                refName = reference.idToName(refId)
                windowDirectory = os.path.join(
                    options.evidenceDirectory,
                    refName,
                    "%d-%d" % (refStart, refEnd))
                ev = ArrowEvidence.fromConsensus(css)
                if options.dumpEvidence != "outliers":
                    ev.save(windowDirectory)
                elif (np.max(ev.delta) > 20):
                    # Mathematically I don't think we should be seeing
                    # deltas > 6 in magnitude, but let's just restrict
                    # attention to truly bonkers outliers.
                    ev.save(windowDirectory)

        else:
            css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                                 subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants
See More Examples - Go to Next Page
Page 1 Page 2 Page 3 Page 4 Selected