six.moves.map

Here are the examples of the python api six.moves.map taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

178 Examples 7

Example 101

Project: anitya Source File: app.py
def modify_rst(rst):
    ''' Downgrade some of our rst directives if docutils is too old. '''

    try:
        # The rst features we need were introduced in this version
        minimum = [0, 9]
        version = list(map(int, docutils.__version__.split('.')))

        # If we're at or later than that version, no need to downgrade
        if version >= minimum:
            return rst
    except Exception:
        # If there was some error parsing or comparing versions, run the
        # substitutions just to be safe.
        pass

    # Otherwise, make code-blocks into just literal blocks.
    substitutions = {
        '.. code-block:: javascript': '::',
    }
    for old, new in substitutions.items():
        rst = rst.replace(old, new)

    return rst

Example 102

Project: jug Source File: hash.py
def hash_update(M, elems):
    '''
    M = hash_update(M, elems)

    Update the hash object ``M`` with the sequence ``elems``.

    Parameters
    ----------
    M : hashlib object
        An object on which the update method will be called
    elems : sequence of 2-tuples

    Returns
    -------
    M : hashlib object
        This is the same object as the argument
    '''
    from six.moves import cPickle as pickle
    from six.moves import map
    import six

    try:
        import numpy as np
    except ImportError:
        np = None
    for n,e in elems:
        M.update(pickle.dumps(n))
        if hasattr(e, '__jug_hash__'):
            M.update(e.__jug_hash__())
        elif type(e) in (list, tuple):
            M.update(repr(type(e)).encode('utf-8'))
            hash_update(M, enumerate(e))
        elif type(e) == set:
            M.update('set')
            # With randomized hashing, different runs of Python might result in
            # different orders, so sort. We cannot trust that all the elements
            # in the set will be comparable, so we convert them to their hashes
            # beforehand.
            items = list(map(hash_one, e))
            items.sort()
            hash_update(M, enumerate(items))
        elif type(e) == dict:
            M.update(six.b('dict'))
            items = [(hash_one(k),v) for k,v in e.items()]
            items.sort(key=(lambda k_v:k_v[0]))

            hash_update(M, items)
        elif np is not None and type(e) == np.ndarray:
            M.update(six.b('np.ndarray'))
            M.update(pickle.dumps(e.dtype))
            M.update(pickle.dumps(e.shape))
            try:
                buffer = e.data
                M.update(buffer)
            except:
                M.update(e.copy().data)
        else:
            M.update(pickle.dumps(e))
    return M

Example 103

Project: cabby Source File: abstract.py
Function: discover_services
    def discover_services(self, uri=None, cache=True):
        '''
        Discover services advertised by TAXII server.

        This method will send discovery request to a service, defined
        by ``uri`` or constructor's connection parameters.

        :param str uri: URI path to a specific TAXII service
        :param bool cache: if discovered services should be cached

        :return: list of TAXII services
        :rtype: list of :py:class:`cabby.entities.DetailedServiceInstance`
                (or :py:class:`cabby.entities.InboxDetailedService`)

        :raises ValueError:
                if URI provided is invalid or schema is not supported
        :raises `cabby.exceptions.HTTPError`:
                if HTTP error happened
        :raises `cabby.exceptions.UnsuccessfulStatusError`:
                if Status Message received and status_type is not `SUCCESS`
        :raises `cabby.exceptions.ServiceNotFoundError`:
                if no Discovery servicefound
        :raises `cabby.exceptions.AmbiguousServicesError`:
                more than one service with type specified
        :raises `cabby.exceptions.NoURIProvidedError`:
                no URI provided and client can't discover services
        '''

        uri = uri or self.discovery_path

        if not uri:
            raise NoURIProvidedError('Discovery service URI is not specified')

        response = self._discovery_request(uri)

        services = list(map(
            to_detailed_service_instance_entity,
            response.service_instances))

        self.log.info("%d services discovered", len(services))

        if cache:
            self.services = services

        return services

Example 104

Project: ete Source File: scheduler.py
def schedule(workflow_task_processor, pending_tasks, schedule_time, execution, debug, norender):
    # Adjust debug mode
    if debug == "all":
        log.setLevel(10)
    pending_tasks = set(pending_tasks)

    ## ===================================
    ## INITIALIZE BASIC VARS
    execution, run_detached = execution
    thread2tasks = defaultdict(list)
    for task in pending_tasks:
        thread2tasks[task.configid].append(task)
    expected_threads = set(thread2tasks.keys())
    past_threads = {}
    thread_errors = defaultdict(list)
    ## END OF VARS AND SHORTCUTS
    ## ===================================

    cores_total = GLOBALS["_max_cores"]
    if cores_total > 0:
        job_queue = Queue()

        back_launcher = Process(target=background_job_launcher,
                                args=(job_queue, run_detached,
                                      GLOBALS["launch_time"], cores_total))
        back_launcher.start()
    else:
        job_queue = None
        back_launcher = None

    GLOBALS["_background_scheduler"] = back_launcher
    GLOBALS["_job_queue"] = job_queue
    # Captures Ctrl-C for debuging DEBUG
    #signal.signal(signal.SIGINT, control_c)

    last_report_time = None

    BUG = set()
    try:
        # Enters into task scheduling
        while pending_tasks:
            wtime = schedule_time

            # ask SGE for running jobs
            if execution == "sge":
                #sgeid2jobs = db.get_sge_tasks()
                #qstat_jobs = sge.qstat()
                pass
            else:
                qstat_jobs = None

            # Show summary of pending tasks per thread
            thread2tasks = defaultdict(list)
            for task in pending_tasks:
                thread2tasks[task.configid].append(task)
            set_logindent(0)
            log.log(28, "@@13: Updating tasks status:@@1: (%s)" % (ctime()))
            info_lines = []
            for tid, tlist in six.iteritems(thread2tasks):
                threadname = GLOBALS[tid]["_name"]
                sizelist = ["%s" %getattr(_ts, "size", "?") for _ts in tlist]
                info = "Thread @@13:%s@@1:: pending tasks: @@8:%s@@1: of sizes: %s" %(
                    threadname, len(tlist), ', '.join(sizelist))
                info_lines.append(info)

            for line in info_lines:
                log.log(28, line)

            if GLOBALS["email"]  and last_report_time is None:
                last_report_time = time()
                send_mail(GLOBALS["email"], "Your NPR process has started", '\n'.join(info_lines))

            ## ================================
            ## CHECK AND UPDATE CURRENT TASKS
            checked_tasks = set()
            check_start_time = time()
            to_add_tasks = set()

            GLOBALS["cached_status"] = {}
            for task in sorted(pending_tasks, key=cmp_to_key(sort_tasks)):
                # Avoids endless periods without new job submissions
                elapsed_time = time() - check_start_time
                #if not back_launcher and pending_tasks and \
                #        elapsed_time > schedule_time * 2:
                #    log.log(26, "@@8:Interrupting task checks to schedule new jobs@@1:")
                #    db.commit()
                #    wtime = launch_jobs(sorted(pending_tasks, sort_tasks),
                #                        execution, run_detached)
                #    check_start_time = time()

                # Enter debuging mode if necessary
                if debug and log.level > 10 and task.taskid.startswith(debug):
                    log.setLevel(10)
                    log.debug("ENTERING IN DEBUGGING MODE")
                thread2tasks[task.configid].append(task)

                # Update tasks and job statuses

                if task.taskid not in checked_tasks:
                    try:
                        show_task_info(task)
                        task.status = task.get_status(qstat_jobs)
                        db.dataconn.commit()
                        if back_launcher and task.status not in set("DE"):
                            for j, cmd in task.iter_waiting_jobs():
                                j.status = "Q"
                                GLOBALS["cached_status"][j.jobid] = "Q"
                                if j.jobid not in BUG:
                                    if not os.path.exists(j.jobdir):
                                        os.makedirs(j.jobdir)
                                    for ifile, outpath in six.iteritems(j.input_files):
                                        try:
                                            _tid, _did = ifile.split(".")
                                            _did = int(_did)
                                        except (IndexError, ValueError):
                                            dataid = ifile
                                        else:
                                            dataid = db.get_dataid(_tid, _did)

                                        if not outpath:
                                            outfile = pjoin(GLOBALS["input_dir"], ifile)
                                        else:
                                            outfile = pjoin(outpath, ifile)

                                        if not os.path.exists(outfile):
                                            open(outfile, "w").write(db.get_data(dataid))

                                    log.log(24, "  @@8:Queueing @@1: %s from %s" %(j, task))
                                    if execution:
                                        with open(pjoin(GLOBALS[task.configid]["_outpath"], "commands.log"), "a") as CMD_LOGGER:
                                            print('\t'.join([task.tname, task.taskid, j.jobname, j.jobid, j.get_launch_cmd()]), file=CMD_LOGGER)
                                            
                                        job_queue.put([j.jobid, j.cores, cmd, j.status_file])
                                BUG.add(j.jobid)

                        update_task_states_recursively(task)
                        db.commit()
                        checked_tasks.add(task.taskid)
                    except TaskError as e:
                        log.error("Errors found in %s" %task)
                        import traceback
                        traceback.print_exc()
                        if GLOBALS["email"]:
                            threadname = GLOBALS[task.configid]["_name"]
                            send_mail(GLOBALS["email"], "Errors found in %s!" %threadname,
                                      '\n'.join(map(str, [task, e.value, e.msg])))
                        pending_tasks.discard(task)
                        thread_errors[task.configid].append([task, e.value, e.msg])
                        continue
                else:
                    # Set temporary Queued state to avoids launching
                    # jobs from clones
                    task.status = "Q"
                    if log.level < 24:
                        show_task_info(task)

                if task.status == "D":
                    #db.commit()
                    show_task_info(task)
                    logindent(3)


                    # Log commands of every task
                    # if 'cmd_log_file' not in GLOBALS[task.configid]:
                    #      GLOBALS[task.configid]['cmd_log_file'] = pjoin(GLOBALS[task.configid]["_outpath"], "cmd.log")
                    #      O = open(GLOBALS[task.configid]['cmd_log_file'], "w")
                    #      O.close()

                    # cmd_lines =  get_cmd_log(task)
                    # CMD_LOG = open(GLOBALS[task.configid]['cmd_log_file'], "a")
                    # print(task, file=CMD_LOG)
                    # for c in cmd_lines:
                    #     print('   '+'\t'.join(map(str, c)), file=CMD_LOG)
                    # CMD_LOG.close()
                    #

                    try:
                        #wkname = GLOBALS[task.configid]['_name']
                        create_tasks = workflow_task_processor(task, task.target_wkname)
                    except TaskError as e:
                        log.error("Errors found in %s" %task)
                        pending_tasks.discard(task)
                        thread_errors[task.configid].append([task, e.value, e.msg])
                        continue
                    else:
                        logindent(-3)

                        to_add_tasks.update(create_tasks)
                        pending_tasks.discard(task)

                elif task.status == "E":
                    log.error("task contains errors: %s " %task)
                    log.error("Errors found in %s")
                    pending_tasks.discard(task)
                    thread_errors[task.configid].append([task, None, "Found (E) task status"])

            #db.commit()
            #if not back_launcher:
            #    wtime = launch_jobs(sorted(pending_tasks, sort_tasks),
            #                    execution, run_detached)

            # Update global task list with recently added jobs to be check
            # during next cycle
            pending_tasks.update(to_add_tasks)

            ## END CHECK AND UPDATE CURRENT TASKS
            ## ================================

            if wtime:
                set_logindent(0)
                log.log(28, "@@13:Waiting %s seconds@@1:" %wtime)
                sleep(wtime)
            else:
                sleep(schedule_time)

            # Dump / show ended threads
            error_lines = []
            for configid, etasks in six.iteritems(thread_errors):
                error_lines.append("Thread @@10:%s@@1: contains errors:" %\
                            (GLOBALS[configid]["_name"]))
                for error in etasks:
                    error_lines.append(" ** %s" %error[0])
                    e_obj = error[1] if error[1] else error[0]
                    error_path = e_obj.jobdir if isjob(e_obj) else e_obj.taskid
                    if e_obj is not error[0]:
                        error_lines.append("      -> %s" %e_obj)
                    error_lines.append("      -> %s" %error_path)
                    error_lines.append("        -> %s" %error[2])
            for eline in error_lines:
                log.error(eline)

            pending_threads = set([ts.configid for ts in pending_tasks])
            finished_threads = expected_threads - (pending_threads | set(thread_errors.keys()))
            just_finished_lines = []
            finished_lines = []
            for configid in finished_threads:
                # configid is the the same as threadid in master tasks
                final_tree_file = pjoin(GLOBALS[configid]["_outpath"],
                                        GLOBALS["inputname"] + ".final_tree")
                threadname = GLOBALS[configid]["_name"]

                if configid in past_threads:
                    log.log(28, "Done thread @@12:%s@@1: in %d iteration(s)",
                            threadname, past_threads[configid])
                    finished_lines.append("Finished %s in %d iteration(s)" %(
                            threadname, past_threads[configid]))
                else:

                    log.log(28, "Assembling final tree...")
                    main_tree, treeiters =  assembly_tree(configid)
                    past_threads[configid] = treeiters - 1

                    log.log(28, "Done thread @@12:%s@@1: in %d iteration(s)",
                            threadname, past_threads[configid])


                    log.log(28, "Writing final tree for @@13:%s@@1:\n   %s\n   %s",
                            threadname, final_tree_file+".nw",
                            final_tree_file+".nwx (newick extended)")
                    main_tree.write(outfile=final_tree_file+".nw")
                    main_tree.write(outfile=final_tree_file+ ".nwx", features=[],
                                    format_root_node=True)

                    if hasattr(main_tree, "tree_phylip_alg"):
                        log.log(28, "Writing final tree alignment @@13:%s@@1:\n   %s",
                                threadname, final_tree_file+".used_alg.fa")

                        alg = SeqGroup(get_stored_data(main_tree.tree_phylip_alg), format="iphylip_relaxed")
                        OUT = open(final_tree_file+".used_alg.fa", "w")
                        for name, seq, comments in alg:
                            realname = db.get_seq_name(name)
                            print(">%s\n%s" %(realname, seq), file=OUT)
                        OUT.close()

                    
                    if hasattr(main_tree, "alg_path"):
                        log.log(28, "Writing root node alignment @@13:%s@@1:\n   %s",
                                threadname, final_tree_file+".fa")

                        alg = SeqGroup(get_stored_data(main_tree.alg_path))
                        OUT = open(final_tree_file+".fa", "w")
                        for name, seq, comments in alg:
                            realname = db.get_seq_name(name)
                            print(">%s\n%s" %(realname, seq), file=OUT)
                        OUT.close()

                    if hasattr(main_tree, "clean_alg_path"):
                        log.log(28, "Writing root node trimmed alignment @@13:%s@@1:\n   %s",
                                threadname, final_tree_file+".trimmed.fa")

                        alg = SeqGroup(get_stored_data(main_tree.clean_alg_path))
                        OUT = open(final_tree_file+".trimmed.fa", "w")
                        for name, seq, comments in alg:
                            realname = db.get_seq_name(name)
                            print(">%s\n%s" %(realname, seq), file=OUT)
                        OUT.close()

                    if norender == False:
                        log.log(28, "Generating tree image for @@13:%s@@1:\n   %s",
                                threadname, final_tree_file+".png")
                        for lf in main_tree:
                            lf.add_feature("sequence", alg.get_seq(lf.safename))
                        try:
                            from .visualize import draw_tree
                            draw_tree(main_tree, GLOBALS[configid], final_tree_file+".png")
                        except Exception as e:
                            log.warning('@@8:something went wrong when generating the tree image. Try manually :(@@1:')
                            if DEBUG:
                                import traceback, sys
                                traceback.print_exc(file=sys.stdout)

                    just_finished_lines.append("Finished %s in %d iteration(s)" %(
                            threadname, past_threads[configid]))
            if GLOBALS["email"]:
                if not pending_tasks:
                    all_lines = finished_lines + just_finished_lines + error_lines
                    send_mail(GLOBALS["email"], "Your NPR process has ended", '\n'.join(all_lines))

                elif GLOBALS["email_report_time"] and time() - last_report_time >= \
                        GLOBALS["email_report_time"]:
                    all_lines = info_lines + error_lines + just_finished_lines
                    send_mail(GLOBALS["email"], "Your NPR report", '\n'.join(all_lines))
                    last_report_time = time()

                elif just_finished_lines:
                    send_mail(GLOBALS["email"], "Finished threads!",
                              '\n'.join(just_finished_lines))

            log.log(26, "")
    except:
        raise

    if thread_errors:
        log.error("Done with ERRORS")
    else:
        log.log(28, "Done")

    return thread_errors

Example 105

Project: elasticsearch-dsl-py Source File: utils.py
Function: iter
    def __iter__(self):
        return map(lambda i: _wrap(i, self._obj_wrapper), self._l_)

Example 106

Project: dit Source File: distribution.py
def prepare_string(dist, digits=None, exact=False, tol=1e-9,
                   show_mask=False, str_outcomes=False):
    """
    Prepares a distribution for a string representation.

    Parameters
    ----------
    dist : distribution
        The distribution to be stringified.
    digits : int or None
        The probabilities will be rounded to the specified number of
        digits, using NumPy's around function. If `None`, then no rounding
        is performed. Note, if the number of digits is greater than the
        precision of the floats, then the resultant number of digits will
        match that smaller precision.
    exact : bool
        If `True`, then linear probabilities will be displayed, even if
        the underlying pmf contains log probabilities.  The closest
        rational fraction within a tolerance specified by `tol` is used
        as the display value.
    tol : float
        If `exact` is `True`, then the probabilities will be displayed
        as the closest rational fraction within `tol`.
    show_mask : bool
        If `True`, show the mask for marginal distributions.
    str_outcomes : bool
        If `True`, then attempt to convert outcomes which are tuples to just
        strings.  This is only a dislplay technique.

    Returns
    -------
    pmf : sequence
        The formatted pmf.  This could be a NumPy array (possibly rounded)
        or a list of Fraction instances.
    outcomes : sequence
        The formated outcomes.
    base : str or float
        The base of the formatted pmf.
    colsep : str
        The column separation for printing.
    max_length : int
        The length of the largest outcome, as a string.
    pstr : str
        A informative string representing the probability of an outcome.
        This will be 'p(x)' xor 'log p(x)'.

    """
    colsep = '   '

    # Create outcomes with wildcards, if desired and possible.
    if show_mask:
        if not dist.is_joint():
            msg = '`show_mask` can be `True` only for joint distributions'
            raise ditException(msg)

        if show_mask != True and show_mask != False:
            # The user is specifying what the mask should look like.
            wc = show_mask
        else:
            wc = '*'

        ctor = dist._outcome_ctor
        def outcome_wc(outcome):
            """
            Builds the wildcarded outcome.

            """
            i = 0
            e = []
            for is_masked in dist._mask:
                if is_masked:
                    symbol = wc
                else:
                    symbol = outcome[i]
                    i += 1
                e.append(symbol)

            e = ctor(e)
            return e
        outcomes = map(outcome_wc, dist.outcomes)
    else:
        outcomes = dist.outcomes

    # Convert outcomes to strings, if desired and possible.
    if str_outcomes:
        if not dist.is_joint():
            msg = '`str_outcomes` can be `True` only for joint distributions'
            raise ditException(msg)

        try:
            # First, convert the elements of the outcome to strings.
            outcomes_ = [map(str, outcome) for outcome in outcomes]
            # Now convert the entire outcome to a string
            outcomes_ = map(lambda o: ''.join(o), outcomes_)
            # Force the iterators to expand in case there are exceptions.
            outcomes = list(outcomes_)
        except:
            outcomes = map(str, outcomes)
    else:
        outcomes = map(str, outcomes)

    outcomes = list(outcomes)
    if len(outcomes):
        max_length = max(map(len, outcomes))
    else:
        max_length = 0

    # 1) Convert to linear probabilities, if necessary.
    if exact:
        # Copy to avoid precision loss
        d = dist.copy(base='linear')
    else:
        d = dist

    # 2) Round, if necessary, possibly after converting to linear probabilities.
    if digits is not None and digits is not False:
        pmf = d.pmf.round(digits)
    else:
        pmf = d.pmf

    # 3) Construct fractions, if necessary.
    if exact:
        pmf = [approximate_fraction(x, tol) for x in pmf]

    if d.is_log():
        pstr = 'log p(x)'
    else:
        pstr = 'p(x)'

    base = d.get_base()

    return pmf, outcomes, base, colsep, max_length, pstr

Example 107

Project: pywb Source File: cdxops.py
def cdx_filter(cdx_iter, filter_strings):
    """
    filter CDX by regex if each filter is :samp:`{field}:{regex}` form,
    apply filter to :samp:`cdx[{field}]`.
    """
    # Support single strings as well
    if isinstance(filter_strings, str):
        filter_strings = [filter_strings]

    filters = []

    class Filter:
        def __init__(self, string):
            # invert filter
            self.invert = string.startswith('!')
            if self.invert:
                string = string[1:]

            # exact match
            if string.startswith('='):
                string = string[1:]
                self.compare_func = self.exact
            # contains match
            elif string.startswith('~'):
                string = string[1:]
                self.compare_func = self.contains
            else:
                self.compare_func = self.regex

            parts = string.split(':', 1)
            # no field set, apply filter to entire cdx
            if len(parts) == 1:
                self.field = ''
            # apply filter to cdx[field]
            else:
                self.field = parts[0]
                self.field = CDXObject.CDX_ALT_FIELDS.get(self.field,
                                                          self.field)
                string = parts[1]

            # make regex if regex mode
            if self.compare_func == self.regex:
                self.regex = re.compile(string)
            else:
                self.filter_str = string

        def __call__(self, cdx):
            if not self.field:
                val = str(cdx)
            else:
                val = cdx.get(self.field, '')

            matched = self.compare_func(val)

            return matched ^ self.invert

        def exact(self, val):
            return (self.filter_str == val)

        def contains(self, val):
            return (self.filter_str in val)

        def regex(self, val):
            return self.regex.match(val) is not None

    filters = list(map(Filter, filter_strings))

    for cdx in cdx_iter:
        if all(x(cdx) for x in filters):
            yield cdx

Example 108

Project: CouchPotatoServer Source File: variable.py
Function: flattenlist
def flattenList(l):
    if isinstance(l, list):
        return sum(map(flattenList, l))
    else:
        return l

Example 109

Project: custodian Source File: handlers.py
    def check(self):
        if self.wall_time:
            run_time = datetime.datetime.now() - self.start_time
            total_secs = run_time.total_seconds()
            if not self.electronic_step_stop:
                try:
                    # Intelligently determine time per ionic step.
                    o = Oszicar("OSZICAR")
                    nsteps = len(o.ionic_steps)
                    time_per_step = total_secs / nsteps
                except Exception:
                    time_per_step = 0
            else:
                try:
                    # Intelligently determine approximate time per electronic
                    # step.
                    o = Oszicar("OSZICAR")
                    if len(o.ionic_steps) == 0:
                        nsteps = 0
                    else:
                        nsteps = sum(map(len, o.electronic_steps))
                    if nsteps > self.prev_check_nscf_steps:
                        steps_time = datetime.datetime.now() - \
                            self.prev_check_time
                        steps_secs = steps_time.total_seconds()
                        step_timing = self.buffer_time * ceil(
                            (steps_secs /
                             (nsteps - self.prev_check_nscf_steps)) /
                            self.buffer_time)
                        self.electronic_steps_timings.append(step_timing)
                        self.prev_check_nscf_steps = nsteps
                        self.prev_check_time = datetime.datetime.now()
                    time_per_step = max(self.electronic_steps_timings)
                except Exception as ex:
                    time_per_step = 0

            # If the remaining time is less than average time for 3 ionic
            # steps or buffer_time.
            time_left = self.wall_time - total_secs
            if time_left < max(time_per_step * 3, self.buffer_time):
                return True

        return False

Example 110

Project: debsources Source File: updater.py
def parse_stages(stages):
    return set(map(parse_stage, stages.split()))

Example 111

Project: ete Source File: ete_ncbiquery.py
Function: run
def run(args):
    # add lineage profiles/stats

    import re
    from .. import PhyloTree, NCBITaxa

    # dump tree by default
    if not args.tree and not args.info and not args.descendants:
        args.tree = True

        
    ncbi = NCBITaxa(args.dbfile, args.taxdumpfile)

    if args.create:
        sys.exit(0)
    all_taxids = {}
    all_names = set()
    queries = []

    if not args.search:
        log.error('Search terms should be provided (i.e. --search) ')
        sys.exit(-1)
    for n in args.search:
        queries.append(n)
        try:
            all_taxids[int(n)] = None
        except ValueError:
            all_names.add(n.strip())

    # translate names
    name2tax = ncbi.get_name_translator(all_names)
    for tids in name2tax.values():
        for tid in tids:
            all_taxids[tid] = None

    not_found_names = all_names - set(name2tax.keys())
    if args.fuzzy and not_found_names:
        log.warn("%s unknown names", len(not_found_names))
        for name in not_found_names:
            # enable extension loading
            tax, realname, sim = ncbi.get_fuzzy_name_translation(name, args.fuzzy)
            if tax:
                all_taxids[tax] = None
                name2tax[name] = [tax]
                name2realname[name] = realname
                name2score[name] = "Fuzzy:%0.2f" %sim

    if not_found_names:
        log.warn("[%s] could not be translated into taxids!" %','.join(not_found_names))

    if args.tree:
        if len(all_taxids) == 1:
            target_taxid = list(all_taxids.keys())[0]
            log.info("Dumping NCBI descendants tree for %s" %(target_taxid))
            t = ncbi.get_descendant_taxa(target_taxid, collapse_subspecies=args.collapse_subspecies, rank_limit=args.rank_limit, return_tree=True)
        else:
            log.info("Dumping NCBI taxonomy of %d taxa..." %(len(all_taxids)))
            t = ncbi.get_topology(list(all_taxids.keys()),
                              intermediate_nodes=args.full_lineage,
                              rank_limit=args.rank_limit,
                              collapse_subspecies=args.collapse_subspecies)

        id2name = ncbi.get_taxid_translator([n.name for n in t.traverse()])
        for n in t.traverse():
            n.add_features(taxid=n.name)
            n.add_features(sci_name=str(id2name.get(int(n.name), "?")))
            n.name = "%s - %s" %(id2name.get(int(n.name), n.name), n.name)
            lineage = ncbi.get_lineage(n.taxid)
            n.add_features(named_lineage = '|'.join(ncbi.translate_to_names(lineage)))
        dump(t, features=["taxid", "name", "rank", "bgcolor", "sci_name",
                          "collapse_subspecies", "named_lineage"])
    elif args.descendants:
        log.info("Dumping NCBI taxonomy of %d taxa..." %(len(all_taxids)))
        print('# ' + '\t'.join(["Taxid", "Sci.Name", "Rank", "descendant_taxids", "descendant_names"]))
        translator = ncbi.get_taxid_translator(all_taxids)
        ranks = ncbi.get_rank(all_taxids)
        for taxid in all_taxids:
            descendants = ncbi.get_descendant_taxa(taxid, collapse_subspecies=args.collapse_subspecies, rank_limit=args.rank_limit)
            print('\t'.join([str(taxid), translator.get(taxid, taxid), ranks.get(taxid, ''),
                             '|'.join(map(str, descendants)),
                             '|'.join(map(str, ncbi.translate_to_names(descendants)))]))

    elif args.info:
        print('# ' + '\t'.join(["Taxid", "Sci.Name", "Rank", "Named Lineage", "Taxid Lineage"]))
        translator = ncbi.get_taxid_translator(all_taxids)
        ranks = ncbi.get_rank(all_taxids)
        for taxid, name in six.iteritems(translator):
            lineage = ncbi.get_lineage(taxid)
            named_lineage = ','.join(ncbi.translate_to_names(lineage))
            lineage_string = ','.join(map(str, lineage))
            print('\t'.join([str(taxid), name, ranks.get(taxid, ''), named_lineage, lineage_string]))

Example 112

Project: ete Source File: codemlparser.py
def parse_paml (pamout, model):
    '''
    parser function for codeml files,
    with values of w,dN,dS etc... dependending of the model
    tested.
    '''
    # if multiple dataset in same file we divide the outfile and model.name+x
    if not '*' in str (model.properties['params']['ndata']):
        divide_data (pamout, model)
        return
    all_lines = open (pamout).readlines()
    # if we do not have tree, load it
    if model._tree is None:
        from ..evol import EvolTree
        model._tree = EvolTree (re.findall ('\(.*\);', ''.join(all_lines))[2])
        model._tree._label_as_paml()
    # starts parsing
    for i, line in enumerate (all_lines):
        if line is '\n':
            continue
        # codon frequency
        if line.startswith('Codon frequencies under model'):
            model.stats ['codonFreq'] = []
            for j in range (16):
                line = list(map (float, re.findall ('\d\.\d+', all_lines [i+j+1])))
                model.stats ['codonFreq'] += [line]
            continue
        if line.startswith('Nei & Gojobori 1986'):
            model.stats ['codonFreq'] = []
        if 'codonFreq' not in model.stats:
            continue
        ######################
        # start serious staff
        line = line.rstrip()
        # lnL and number of parameters
        if line.startswith ('lnL'):
            try:
                line = re.sub ('.* np: *(\d+)\): +(-\d+\.\d+).*',
                               '\\1 \\2', line)
                model.stats ['np' ] = int   (line.split()[0])
                model.stats ['lnL'] = float (line.split()[1])
            except ValueError:
                line = re.sub ('.* np: *(\d+)\): +(nan).*',
                               '\\1 \\2', line)
                model.stats ['np' ] = int   (line.split()[0])
                model.stats ['lnL'] = float ('-inf')
            continue
        # get labels of internal branches
        if line.count('..') >= 2:
            labels = re.findall ('\d+\.\.\d+', line + ' ')
            _check_paml_labels (model._tree, labels, pamout, model)
            continue
        # retrieve kappa
        if line.startswith ('kappa '):
            try:
                model.stats ['kappa'] = float (re.sub ('.*(\d+\.\d+).*',
                                                       '\\1', line))
            except ValueError:
                model.stats ['kappa'] = 'nan'
        # retrieve dS dN t w N S and if present, errors. from summary table
        if line.count('..') == 1 and line.startswith (' '):
            if not re.match (' +\d+\.\.\d+ +\d+\.\d+ ', line):
                if re.match (' +( +\d+\.\d+){8}', all_lines [i+1]):
                    _get_values (model, line.split ()[0]+'  '+all_lines [i+1])
                continue
            _get_values (model, line)
            continue

Example 113

Project: dit Source File: distconst.py
def insert_rvf(d, func, index=-1):
    """
    Returns a new distribution with an added random variable at index `index`.

    The new random variable must be a function of the other random variables.
    By this, we mean that the entropy of the new random variable conditioned
    on the original random variables should be zero.

    Parameters
    ----------
    dist : Distribution
        The distribution used to construct the new distribution.
    func : callable | list of callable
        A function which takes a single argument---the value of the previous
        random variables---and returns a new random variable. Note, the return
        value will be added to the outcome using `__add__`, and so it should be
        a hashable, orderable sequence (as every outcome must be). If a list of
        callables is provided, then multiple random variables are added
        simultaneously and will appear in the same order as the list.
    index : int
        The index at which to insert the random variable. A value of -1 is
        will append the random variable to the end.

    Returns
    -------
    d : Distribution
        The new distribution.

    Examples
    --------
    >>> d = dit.Distribution(['00', '01', '10', '11'], [1/4]*4)
    >>> def xor(outcome):
    ...    return str(int(outcome[0] != outcome[1]))
    ...
    >>> d2 = dit.insert_rvf(d, xor)
    >>> d.outcomes
    ('000', '011', '101', '110')

    """
    try:
        func[0]
    except TypeError:
        funcs = [func]
    else:
        funcs = func

    partial_outcomes = [map(func, d.outcomes) for func in funcs]

    # Now "flatten" the new contributions.
    partial_outcomes = [d._outcome_ctor([o for o_list in outcome for o in o_list])
                        for outcome in zip(*partial_outcomes)]

    new_outcomes = zip(d.outcomes, partial_outcomes)
    if index == -1:
        outcomes = [old + new for old, new in new_outcomes]
    else:
        outcomes = [old[:index] + new + old[index:] for old, new in new_outcomes]

    d2 = Distribution(outcomes, d.pmf.copy(), base=d.get_base())
    return d2

Example 114

Project: ete Source File: cog_creator.py
def brh_cogs2(DB, species, missing_factor=0.0, seed_sp=None, min_score=0):
    """It scans all precalculate BRH relationships among the species
       passed as an argument, and detects Clusters of Orthologs
       according to several criteria:

       min_score: the min coverage/overalp value required for a
       blast to be a reliable hit.

       missing_factor: the min percentage of species in which a
       given seq must have  orthologs.

    """
    def _sort_cogs(cogs1, cogs2):
        seed1, mx1, avg1, ncogs1 = cogs1
        seed2, mx2, avg2, ncogs2 = cogs2
        for i, j in ((mx1, mx2), (avg1, avg2), (ncogs1, ncogs2)):
            v = -1 * cmp(i, j)
            if v != 0:
                break
        return v

    log.log(26, "Searching BRH orthologs")
    species = set(map(str, species))

    min_species = len(species) - round(missing_factor * len(species))

    if seed_sp == "auto":
        sp_to_test = list(species)
    elif seed_sp == "largest":
        cmd = """SELECT taxid, size FROM species"""
        db.seqcursor.execute(cmd)
        sp2size = {}
        for tax, counter in db.seqcursor.fetchall():
            if tax in species:
                sp2size[tax] = counter

        sorted_sp = sorted(list(sp2size.items()), lambda x,y: cmp(x[1],y[1]))
        log.log(24, sorted_sp[:6])
        largest_sp = sorted_sp[-1][0]
        sp_to_test = [largest_sp]
        log.log(28, "Using %s as search seed. Proteome size=%s genes" %\
            (largest_sp, sp2size[largest_sp]))
    else:
        sp_to_test = [str(seed_sp)]

    analysis_txt = StringIO()
    if sp_to_test:
        log.log(26, "Finding best COG selection...")
        seed2size = get_sorted_seeds(seed_sp, species, sp_to_test, min_species, DB)
        size_analysis = []
        for seedname, content in six.iteritems(seed2size):
            cog_sizes = [size for seq, size in content]
            mx, avg = _max(cog_sizes), round(_mean(cog_sizes))
            size_analysis.append([seedname, mx, avg, len(content)])
        size_analysis.sort(_sort_cogs)
        #print '\n'.join(map(str, size_analysis))
        seed = size_analysis[0][0]
        print_as_table(size_analysis[:25], stdout=analysis_txt,
                   header=["Seed","largest COG", "avg COG size", "total COGs"])
        if size_analysis[0][1] < len(species)-1:
            print(size_analysis[0][1])
            raise ValueError("Current COG selection parameters do not permit to cover all species")

    log.log(28, analysis_txt.getvalue())
    # The following loop tests each possible seed if none is
    # specified.
    log.log(28, "Computing Clusters of Orthologs groups (COGs)")
    log.log(28, "Min number of species per COG: %d" %min_species)
    cogs_selection = []
    log.log(26,"Using seed species:%s", seed)
    species_side1 = ','.join(map(quote, [s for s in species if str(s)>str(seed)]))
    species_side2 = ','.join(map(quote, [s for s in species if str(s)<str(seed)]))
    pairs1 = []
    pairs2 = []
    # Select all ids with matches in the target species, and
    # return the total number of species covered by each of
    # such ids.
    if species_side1 != "":
        cmd = """SELECT seqid1, taxid1, seqid2, taxid2 from ortho_pair WHERE
            taxid1="%s" AND taxid2 IN (%s) """ % (seed, species_side1)
        DB.orthocursor.execute(cmd)
        pairs1 = DB.orthocursor.fetchall()

    if species_side2 != "":
        cmd = """SELECT seqid2, taxid2, seqid1, taxid1 from ortho_pair WHERE
            taxid1 IN (%s) AND taxid2 = "%s" """ % (species_side2, seed)
        DB.orthocursor.execute(cmd)
        pairs2 = DB.orthocursor.fetchall()

    cog_candidates = defaultdict(set)
    for seq1, sp1, seq2, sp2 in pairs1 + pairs2:
        s1 = (sp1, seq1)
        s2 = (sp2, seq2)
        cog_candidates[(sp1, seq1)].update([s1, s2])

    all_cogs = [cand for cand in list(cog_candidates.values()) if
                len(cand) >= min_species]

    # CHECK CONSISTENCY
    seqs = set()
    for cand in all_cogs:
        seqs.update([b for a,b  in cand if a == seed])
    pre_selected_seqs = set([v[0] for v in seed2size[seed]])
    if len(seqs & pre_selected_seqs) != len(set(seed2size[seed])) or\
            len(seqs & pre_selected_seqs) != len(seqs):
        print("old method seqs", len(seqs), "new seqs", len(set(seed2size[seed])), "Common", len(seqs & pre_selected_seqs))
        raise ValueError("ooops")

    cog_sizes = [len(cog) for cog in all_cogs]
    cog_spsizes = [len(set([e[0] for e in cog])) for cog in all_cogs]

    if [1 for i in range(len(cog_sizes)) if cog_sizes[i] != cog_spsizes[i]]:
        raise ValueError("Inconsistent COG found")

    if cog_sizes:
        cogs_selection.append([seed, all_cogs])
    log.log(26, "Found %d COGs" % len(all_cogs))

    recoded_cogs = []
    for cog in all_cogs:
        named_cog = ["%s%s%s" %(x[0], GLOBALS["spname_delimiter"],x[1]) for x in cog]
        recoded_cogs.append(named_cog)

    return recoded_cogs, analysis_txt.getvalue()

Example 115

Project: soupy Source File: soupy.py
Function: str
    def __str__(self):
        return ''.join(map(_uniquote, self._items))

Example 116

Project: orderedmultidict Source File: orderedmultidict.py
Function: str
    def __str__(self):
        return '{%s}' % ', '.join(
            map(lambda p: '%r: %r' % (p[0], p[1]), self.iterallitems()))

Example 117

Project: dit Source File: helpers.py
def construct_alphabets(outcomes):
    """
    Construct minimal alphabets for each random variable.

    In the process, it verifies that each outcome is a sequence and that all
    outcomes have the same length.

    Parameters
    ----------
    outcomes : sequence
        A nonempty sequence of outcomes.  Each outcome in `outcomes` should
        be a sequence---these are the elements which determine the alphabet
        for each random variable.

    Returns
    -------
    alphabets : tuple
        The constructed alphabet for each random variable.

    Examples
    --------
    >>> construct_alphabets([(0,1), (1,1)])
    ((0,1), (1,))

    Raises
    ------
    ditException
        When there are no outcomes.
        When not every outcome is a sequence.
        When not all outcomes have the same length.

    """
    # During validation, each outcome is checked to be of the proper class,
    # length, and also a sequence.  However, this function is called before
    # validation and will result in hard to decipher error messages if we
    # don't at least verify that each outcome is a container of the same
    # length.

    # Make sure outcomes is a sequence
    try:
        L = len(outcomes)
    except TypeError:
        raise TypeError('`outcomes` must be a sequence.')

    if L == 0:
        raise ditException('`outcomes` must not be empty.')

    # Make sure each outcome is sized.  They really should be sequences,
    # but this check is sufficient for now.
    try:
        lengths = list(map(len, outcomes))
    except TypeError:
        raise ditException('One or more outcomes is not sized. len() fails.')
    else:
        outcome_length = lengths[0]

    # Make sure each outcome has the same length.
    equal_lengths = np.alltrue(np.equal(lengths, outcome_length))
    if not equal_lengths:
        raise ditException('Not all outcomes have the same length.')

    alphabets = _construct_alphabets(outcomes)
    return alphabets

Example 118

Project: python-mode Source File: format.py
Function: process_tokens
    def process_tokens(self, tokens):
        """process tokens and search for :

         _ non strict indentation (i.e. not always using the <indent> parameter as
           indent unit)
         _ too long lines (i.e. longer than <max_chars>)
         _ optionally bad construct (if given, bad_construct must be a compiled
           regular expression).
        """
        self._bracket_stack = [None]
        indents = [0]
        check_equal = False
        line_num = 0
        self._lines = {}
        self._visited_lines = {}
        token_handlers = self._prepare_token_dispatcher()
        self._last_line_ending = None

        self._current_line = ContinuedLineState(tokens, self.config)
        for idx, (tok_type, token, start, _, line) in enumerate(tokens):
            if start[0] != line_num:
                line_num = start[0]
                # A tokenizer oddity: if an indented line contains a multi-line
                # docstring, the line member of the INDENT token does not contain
                # the full line; therefore we check the next token on the line.
                if tok_type == tokenize.INDENT:
                    self.new_line(TokenWrapper(tokens), idx-1, idx+1)
                else:
                    self.new_line(TokenWrapper(tokens), idx-1, idx)

            if tok_type == tokenize.NEWLINE:
                # a program statement, or ENDMARKER, will eventually follow,
                # after some (possibly empty) run of tokens of the form
                #     (NL | COMMENT)* (INDENT | DEDENT+)?
                # If an INDENT appears, setting check_equal is wrong, and will
                # be undone when we see the INDENT.
                check_equal = True
                self._process_retained_warnings(TokenWrapper(tokens), idx)
                self._current_line.next_logical_line()
                self._check_line_ending(token, line_num)
            elif tok_type == tokenize.INDENT:
                check_equal = False
                self.check_indent_level(token, indents[-1]+1, line_num)
                indents.append(indents[-1]+1)
            elif tok_type == tokenize.DEDENT:
                # there's nothing we need to check here!  what's important is
                # that when the run of DEDENTs ends, the indentation of the
                # program statement (or ENDMARKER) that triggered the run is
                # equal to what's left at the top of the indents stack
                check_equal = True
                if len(indents) > 1:
                    del indents[-1]
            elif tok_type == tokenize.NL:
                self._check_continued_indentation(TokenWrapper(tokens), idx+1)
                self._current_line.next_physical_line()
            elif tok_type != tokenize.COMMENT:
                self._current_line.handle_line_start(idx)
                # This is the first concrete token following a NEWLINE, so it
                # must be the first token of the next program statement, or an
                # ENDMARKER; the "line" argument exposes the leading whitespace
                # for this statement; in the case of ENDMARKER, line is an empty
                # string, so will properly match the empty string with which the
                # "indents" stack was seeded
                if check_equal:
                    check_equal = False
                    self.check_indent_level(line, indents[-1], line_num)

            if tok_type == tokenize.NUMBER and token.endswith('l'):
                self.add_message('lowercase-l-suffix', line=line_num)

            try:
                handler = token_handlers[token]
            except KeyError:
                pass
            else:
                handler(tokens, idx)

        line_num -= 1 # to be ok with "wc -l"
        if line_num > self.config.max_module_lines:
            # Get the line where the too-many-lines (or its message id)
            # was disabled or default to 1.
            symbol = self.linter.msgs_store.check_message_id('too-many-lines')
            names = (symbol.msgid, 'too-many-lines')
            line = next(filter(None,
                               map(self.linter._pragma_lineno.get, names)), 1)
            self.add_message('too-many-lines',
                             args=(line_num, self.config.max_module_lines),
                             line=line)

Example 119

Project: box-python-sdk Source File: test_group.py
@pytest.fixture()
def mock_membership_responses(mock_membership_dict_stream):
    """
    Returns a generator method that takes params: total, page_size.
    The generator generates a sequence of 'group membership' mock_box_responses each containing page_size
    items, until 'total' entries have been returned
    """
    # pylint:disable=redefined-outer-name
    def number_entries_per_response(total, page_size, hidden_in_batch):
        if not hidden_in_batch:
            hidden_in_batch = repeat(0)
        quotient, remainder = divmod(total, page_size)
        max_items_in_batch = chain(repeat(page_size, quotient), (remainder,))
        return map(sub, max_items_in_batch, hidden_in_batch)

    def take(iterable, number):
        return list(islice(iterable, number))

    def membership_responses(total, page_size, hidden_in_batch=None):
        offset = 0
        for number_entries in number_entries_per_response(total, page_size, hidden_in_batch):
            entries = take(mock_membership_dict_stream, number_entries)

            mock_box_response = Mock(BoxResponse)
            mock_network_response = Mock(DefaultNetworkResponse)
            mock_box_response.network_response = mock_network_response
            mock_box_response.json.return_value = {
                'entries': entries,
                'total_count': total,
                'offset': offset,
                'limit': page_size,
            }
            offset += number_entries
            mock_box_response.status_code = 200
            mock_box_response.ok = True
            yield mock_box_response

    return membership_responses

Example 120

Project: PyEMMA Source File: patches.py
    def _random_access_generator(self, f):
        with f:
            curr_size = 0
            coords = []
            leftovers = []
            chunksize = self._chunksize
            if chunksize == 0:
                chunksize = np.iinfo(int).max
            for k, g in groupby(enumerate(self._stride), lambda a: a[0] - a[1]):
                grouped_stride = list(map(itemgetter(1), g))
                seek_to = grouped_stride[0] - f.tell()
                f.seek(seek_to, whence=1)
                group_size = len(grouped_stride)
                if curr_size + group_size > chunksize:
                    leftovers = grouped_stride
                else:
                    local_traj_data = _read_traj_data(self._atom_indices, f, group_size, **self._kwargs)
                    coords.append(local_traj_data)
                    curr_size += len(grouped_stride)
                if curr_size == chunksize:
                    yield _join_traj_data(coords, self._topology)
                    chunksize = self._chunksize
                    curr_size = 0
                    coords = []
                while leftovers:
                    local_chunk = leftovers[:min(chunksize, len(leftovers))]
                    local_traj_data = _read_traj_data(self._atom_indices, f, len(local_chunk), **self._kwargs)
                    coords.append(local_traj_data)
                    leftovers = leftovers[min(chunksize, len(leftovers)):]
                    curr_size += len(local_chunk)
                    if curr_size == chunksize:
                        yield _join_traj_data(coords, self._topology)
                        curr_size = 0
                        coords = []
            if coords:
                yield _join_traj_data(coords, self._topology)

            raise StopIteration("delivered all RA indices")

Example 121

Project: skll Source File: writers.py
    def _write_header(self, feature_set, output_file, filter_features):
        """
        Called before lines are written to file, so that headers can be written
        for files that need them.

        :param feature_set: The FeatureSet being written to a file.
        :type feature_set: FeatureSet
        :param output_file: The file being written to.
        :type output_file: file
        :param filter_features: If only writing a subset of the features in the
                                FeatureSet to ``output_file``, these are the
                                features to include in this file.
        :type filter_features: set of str
        """
        fieldnames = self._get_fieldnames(filter_features)
        if self.label_col in fieldnames:
            fieldnames.remove(self.label_col)

        # Add relation to header
        print("@relation '{}'\n".format(self.relation), file=output_file)

        # Loop through fields writing the header info for the ARFF file
        for field in fieldnames:
            print("@attribute '{}' numeric".format(field.replace('\\', '\\\\')
                                                   .replace("'", "\\'")),
                  file=output_file)

        # Print class label header if necessary
        if self.regression:
            print("@attribute {} numeric".format(self.label_col),
                  file=output_file)
        else:
            if self.feat_set.has_labels:
                print("@attribute {} ".format(self.label_col) +
                      "{" + ','.join(map(str,
                                         sorted(set(self.feat_set.labels)))) +
                      "}", file=output_file)
        fieldnames.append(self.label_col)

        # Create CSV writer to handle missing values for lines in data section
        # and to ignore the instance values for non-numeric attributes
        self._dict_writer = DictWriter(output_file, fieldnames, restval=0,
                                       extrasaction='ignore', dialect='arff')

        # Finish header and start data section
        print("\n@data", file=output_file)

Example 122

Project: elasticsearch-dsl-py Source File: field.py
Function: serialize
    def serialize(self, data):
        if isinstance(data, (list, AttrList)):
            return list(map(self._serialize, data))
        return self._serialize(data)

Example 123

Project: soupy Source File: soupy.py
Function: each
    def each(self, *funcs):
        """
        Call `func` on each element in the collection.

        If multiple functions are provided, each item
        in the output will be a tuple of each
        func(item) in self.

        Returns a new Collection.

        Example:

            >>> col = Collection([Scalar(1), Scalar(2)])
            >>> col.each(Q * 10)
            Collection([Scalar(10), Scalar(20)])
            >>> col.each(Q * 10, Q - 1)
            Collection([Scalar((10, 0)), Scalar((20, 1))])
        """

        funcs = list(map(_make_callable, funcs))

        if len(funcs) == 1:
            return Collection(map(funcs[0], self._items))

        tupler = lambda item: Scalar(
            tuple(_unwrap(func(item)) for func in funcs))
        return Collection(map(tupler, self._items))

Example 124

Project: elasticsearch-dsl-py Source File: utils.py
Function: to_dict
    def to_dict(self):
        """
        Serialize the DSL object to plain dict
        """
        d = {}
        for pname, value in iteritems(self._params):
            pinfo = self._param_defs.get(pname)

            # typed param
            if pinfo and 'type' in pinfo:
                # don't serialize empty lists and dicts for typed fields
                if value in ({}, []):
                    continue

                # multi-values are serialized as list of dicts
                if pinfo.get('multi'):
                    value = list(map(lambda x: x.to_dict(), value))

                # squash all the hash values into one dict
                elif pinfo.get('hash'):
                    value = dict((k, v.to_dict()) for k, v in iteritems(value))

                # serialize single values
                else:
                    value = value.to_dict()

            # serialize anything with to_dict method
            elif hasattr(value, 'to_dict'):
                value = value.to_dict()

            d[pname] = value
        return {self.name: d}

Example 125

Project: dit Source File: lattice.py
def induced_sigalg(dist, rvs, rv_mode=None):
    """
    Returns the induced sigma-algebra of the random variable defined by `rvs`.

    Parameters
    ----------
    dist : Distribution
        The distribution which defines the base sigma-algebra.
    rvs : list
        The indexes of the random variable used to calculate the induced
        sigma algebra.
    rv_mode : str, None
        Specifies how to interpret the elements of `rvs`. Valid options are:
        {'indices', 'names'}. If equal to 'indices', then the elements of
        `rvs` are interpreted as random variable indices. If equal to 'names',
        the the elements are interpreted as random variable names. If `None`,
        then the value of `dist._rv_mode` is consulted.

    Returns
    -------
    F : frozenset of frozensets
        The induced sigma-algebra.

    """
    # This is brute force and ugly.
    #
    # Implementation:
    #   1) Find induced atoms from atoms of new sigma-algebra:
    #           X^{-1}(A) = { w : X(w) \in A }
    #       where A = \{a\} and a is a nonzero outcome in the marginal.
    #   2) Generate sigma algebra from induced atoms.
    #
    # Step 2 may not be necessary.
    #
    indexes = parse_rvs(dist, rvs, rv_mode=rv_mode, unique=True, sort=True)[1]

    # This creates a mapping from new outcomes (defined by rvs) to the
    # original outcomes which map to those new outcomes. This defines a
    # partition of the original outcomes.
    d = defaultdict(list)
    ctor = dist._outcome_ctor
    for outcome, _ in dist.zipped(mode='atoms'):
        # Build a list of inner outcomes. "c" stands for "constructed".
        # We need to iterate over all atoms, not just those in pmf since
        # we are trying to partition the sample space.
        c_outcome = ctor([outcome[i] for i in indexes])
        d[c_outcome].append(outcome)

    atoms = frozenset(map(frozenset, d.values()))
    F = sigma_algebra(atoms)
    return F

Example 126

Project: ete Source File: ete_compare.py
def run(args):
    from .. import Tree
    from ..utils import print_table

    def iter_differences(set1, set2, unrooted=False):
        for s1 in set1:
            pairs = []
            for r1 in set2:
                if unrooted:
                    d = euc_dist_unrooted(s1, r1)
                else:
                    d = euc_dist(s1, r1)
                if d < 1:
                    pairs.append((d,r1))
            yield s1, pairs


    col_sizes = [15, 15] + [7] * 8

    header = ['source', 'ref', 'E.size', 'nRF',
              'RF', 'maxRF', "src-branches",
              "ref-branches", "subtrees", "treekoD" ]

    if args.taboutput:
        print('# ' + '\t'.join(header))
    elif args.show_mismatches or args.show_matches:
        pass
    else:
        print_table([header,
                     ["=========================="] * 10],
                    fix_col_width=col_sizes, wrap_style="cut")


    if args.treeko:
        from .. import PhyloTree
        tree_class = PhyloTree
    else:
        tree_class = Tree

    for stree_name in src_tree_iterator(args):
        stree = tree_class(stree_name, format=args.src_newick_format)

        # Parses attrs if necessary
        src_tree_attr = args.src_tree_attr
        if args.src_attr_parser:
            for leaf in stree:
                leaf.add_feature('tempattr', re.search(
                    args.src_attr_parser, getattr(leaf, args.src_tree_attr)).groups()[0])
            src_tree_attr = 'tempattr'

        for rtree_name in ref_tree_iterator(args):
            rtree = tree_class(rtree_name, format=args.ref_newick_format)

            # Parses attrs if necessary
            ref_tree_attr = args.ref_tree_attr
            if args.ref_attr_parser:
                for leaf in rtree:
                    leaf.add_feature('tempattr', re.search(
                        args.ref_attr_parser, getattr(leaf, args.ref_tree_attr)).groups()[0])
                ref_tree_attr = 'tempattr'

            r = stree.compare(rtree,
                              ref_tree_attr=ref_tree_attr,
                              source_tree_attr=src_tree_attr,
                              min_support_ref=args.min_support_ref,
                              min_support_source = args.min_support_src,
                              unrooted=args.unrooted,
                              has_duplications=args.treeko)



            if args.show_mismatches or args.show_matches or args.show_edges:
                if args.show_mismatches:
                    src = r['source_edges'] - r['ref_edges']
                    ref = r['ref_edges'] - r['source_edges']
                elif args.show_matches:
                    src = r['source_edges'] & r['ref_edges']
                    ref = r['ref_edges'] & r['source_edges']
                elif args.show_edges:
                    src = r['source_edges']
                    ref = r['ref_edges']

                if args.unrooted:
                    for tag, part in [("src: %s"%stree_name, src), ("ref: %s"%rtree_name, ref)]:
                        print("%s\t%s" %(tag, '\t'.join(
                            map(lambda x: '%s|%s' %(','.join(x[0]), ','.join(x[1])), part))))
                else:
                    for tag, part in [("src: %s"%stree_name, src), ("ref: %s"%rtree_name, ref)]:
                        print("%s\t%s" %(tag, '\t'.join([','.join(p) for p in part])))
            else:
                data = [shorten_str(stree_name, 15, reverse=True),
                        shorten_str(rtree_name, 15, reverse=True),
                        r['effective_tree_size'],
                        r['norm_rf'],
                        r['rf'], r['max_rf'],
                        r["source_edges_in_ref"],
                        r["ref_edges_in_source"],
                        r['source_subtrees'],
                        r['treeko_dist']]

                if r['effective_tree_size'] == 0:
                    for i in range(3, len(data)):
                        data[i] = -1

                if args.taboutput:
                    print('\t'.join(map(str, data)))
                else:
                    print_table([list(map(as_str, data))],
                                fix_col_width = col_sizes, wrap_style='cut')

Example 127

Project: ete Source File: ncbiquery.py
    def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collapse_subspecies=False, annotate=True):
        """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree
        containing all of them.

        :param False intermediate_nodes: If True, single child nodes
        representing the complete lineage of leaf nodes are kept. Otherwise, the
        tree is pruned to contain the first common ancestor of each group.

        :param None rank_limit: If valid NCBI rank name is provided, the tree is
        pruned at that given level. For instance, use rank="species" to get rid
        of sub-species or strain leaf nodes.

        :param False collapse_subspecies: If True, any item under the species
        rank will be collapsed into the species upper node.

        """
        from .. import PhyloTree
        taxids, merged_conversion = self._translate_merged(taxids)        
        if len(taxids) == 1:
            root_taxid = int(list(taxids)[0])
            with open(self.dbfile+".traverse.pkl", "rb") as CACHED_TRAVERSE:
                prepostorder = pickle.load(CACHED_TRAVERSE)
            descendants = {}
            found = 0
            nodes = {}
            hit = 0
            visited = set()            
            start = prepostorder.index(root_taxid)
            try:
            	end = prepostorder.index(root_taxid, start+1)
            	subtree = prepostorder[start:end+1]
            except ValueError:
                # If root taxid is not found in postorder, must be a tip node
            	subtree = [root_taxid]
            leaves = set([v for v, count in Counter(subtree).items() if count == 1])
            nodes[root_taxid] = PhyloTree(name=str(root_taxid))
            current_parent = nodes[root_taxid]
            for tid in subtree:
                if tid in visited:
                    current_parent = nodes[tid].up
                else:
                    visited.add(tid)
                    nodes[tid] = PhyloTree(name=str(tid))
                    current_parent.add_child(nodes[tid])
                    if tid not in leaves:
                        current_parent = nodes[tid]
            root = nodes[root_taxid]
        else:
            taxids = set(map(int, taxids))
            sp2track = {}
            elem2node = {}
            id2lineage = self.get_lineage_translator(taxids)
            all_taxids = set()
            for lineage in id2lineage.values():
                all_taxids.update(lineage)                
            id2rank = self.get_rank(all_taxids)
            for sp in taxids:
                track = []
                lineage = id2lineage[sp]

                for elem in lineage:
                    if elem not in elem2node:
                        node = elem2node.setdefault(elem, PhyloTree())
                        node.name = str(elem)
                        node.taxid = elem
                        node.add_feature("rank", str(id2rank.get(int(elem), "no rank")))
                    else:
                        node = elem2node[elem]
                    track.append(node)
                sp2track[sp] = track
            # generate parent child relationships
            for sp, track in six.iteritems(sp2track):
                parent = None
                for elem in track:
                    if parent and elem not in parent.children:
                        parent.add_child(elem)
                    if rank_limit and elem.rank == rank_limit:
                        break
                    parent = elem
            root = elem2node[1]

        #remove onechild-nodes
        if not intermediate_nodes:
            for n in root.get_descendants():
                if len(n.children) == 1 and int(n.name) not in taxids:
                    n.delete(prevent_nondicotomic=False)

        if len(root.children) == 1:
            tree = root.children[0].detach()
        else:
            tree = root

        if collapse_subspecies:
            to_detach = []
            for node in tree.traverse():
                if node.rank == "species":
                    to_detach.extend(node.children)
            for n in to_detach:
                n.detach()

        if annotate:
            self.annotate_tree(tree)

        return tree

Example 128

Project: dit Source File: distconst.py
def uniform_distribution(outcome_length, alphabet_size, base=None):
    """
    Returns a uniform distribution.

    Parameters
    ----------
    outcome_length : int
        The length of the outcomes.

    alphabet_size : int, list of lists
        The alphabets used to construct the outcomes of the distribution. If an
        integer, then the alphabet for each random variable will be the same,
        consisting of integers from 0 to k-1 where k is the alphabet size.
        If a list, then the elements are used as the alphabet for each random
        variable.  If the list has a single element, then it will be used
        as the alphabet for each random variable.

    base : float, 'linear', 'e'
        The desired base for the distribution probabilities.

    Returns
    -------
    d : Distribution.
        A uniform distribution.

    Examples
    --------
    Each random variable has the same standardized alphabet: [0,1]
    >>> d = dit.uniform_distribution(2, 2)

    Each random variable has its own alphabet.
    >>> d = dit.uniform_distribution(2, [[0,1],[1,2]])

    Both random variables have ['H','T'] as an alphabet.
    >>> d = dit.uniform_distribution(2, [['H','T']])

    """
    try:
        int(alphabet_size)
    except TypeError:
        # Assume it is a list of lists.
        alphabet = alphabet_size

        # Autoextend if only one alphabet is provided.
        if len(alphabet) == 1:
            alphabet = [alphabet[0]] * outcome_length
        elif len(alphabet) != outcome_length:
            raise TypeError("outcome_length does not match number of rvs.")
    else:
        # Build the standard alphabet.
        alphabet = [tuple(range(alphabet_size))] * outcome_length

    try:
        Z = np.prod(list(map(len, alphabet)))
        try:
            # for some reason numpypy.prod returns a list, and pypy can't handle
            #   multiplying a list by a numpy float.
            Z = int(Z[0])
        except:
            pass
    except TypeError:
        raise TypeError("alphabet_size must be an int or list of lists.")

    pmf = [1/Z] * Z
    outcomes = tuple(product(*alphabet))
    d = Distribution(outcomes, pmf, base='linear')

    # Maybe we should use ditParams['base'] when base is None?
    if base is not None:
        d.set_base(base)

    return d

Example 129

Project: ete Source File: phylobuild.py
def main(args):
    """ Read and parse all configuration and command line options,
    setup global variables and data, and initialize the master task of
    all workflows. """

    global log
    log = logging.getLogger("main")

    base_dir = GLOBALS["basedir"]

    # -------------------------------------
    # READ CONFIG FILE AND PARSE WORKFLOWS
    # -------------------------------------

    # Load and check config file


    if args.custom_config:
        concat_config = open(args.base_config).readlines()
        concat_config += open(args.custom_config).readlines()
        base_config = check_config(concat_config)
    else:
        base_config = check_config(args.base_config)
        
    # Check for config file overwriting
    clearname = os.path.basename(args.base_config)
    local_conf_file = pjoin(base_dir, "ete_build.cfg")
    if pexist(base_dir):
        if hascontent(local_conf_file):
            if not args.clearall and not args.resume:
                raise ConfigError("Output directory seems to contain"
                                  " data from a previous run."
                                  " Use --clearall to restart the analysis or --resume to continue.")

    # Creates a tree splitter config block on the fly. In the future this
    # options should be more accessible by users.
    base_config['default_tree_splitter'] = {
        '_app' : 'treesplitter',
        '_max_outgroup_size' : '10%', # dynamic or fixed selection of out seqs.
        '_min_outgroup_support' : 0.9, # avoids fixing labile nodes as monophyletic
        '_outgroup_topology_dist' : False}


    # prepare workflow config dictionaries
    workflow_types = defaultdict(list)
    TARGET_CLADES = set()
    VALID_WORKFLOW_TYPES = set(['genetree', 'supermatrix'])
    # extract workflow filters


    def parse_workflows(names, target_wtype, parse_filters=False):
        parsed_workflows = []
        if not names:
            return parsed_workflows

        for wkname in names:
            if parse_filters:
                wfilters = {}
                fields = [_f.strip() for _f in wkname.split(",")]
                if len(fields) == 1:
                    wkname = fields[0]
                else:
                    wkname = fields[-1]
                    for f in fields[:-1]:
                        if f.startswith("size-range:"): # size filter
                            f = f.replace("size-range:",'')
                            try:
                                min_size, max_size = list(map(int, f.split('-')))
                                if min_size < 0 or min_size > max_size:
                                    raise ValueError

                            except ValueError:
                                raise ConfigError('size filter should consist of two integer numbers (i.e. 50-100). Found [%s] instead' %f)
                            wfilters["max_size"] = max_size
                            wfilters["min_size"] = min_size
                        elif f.startswith("seq-sim-range:"):
                            f = f.replace("seq-sim-range:",'')
                            try:
                                min_seq_sim, max_seq_sim  = map(float, f.split('-'))
                                if min_seq_sim > 1 or min_seq_sim < 0:
                                    raise ValueError
                                if max_seq_sim > 1 or max_seq_sim < 0:
                                    raise ValueError
                                if min_seq_sim > max_seq_sim:
                                    raise ValueError
                            except ValueError:
                                raise ConfigError('sequence similarity filter should consist of two float numbers between 0 and 1 (i.e. 0-0.95). Found [%s] instead' %f)
                            wfilters["min_seq_sim"] = min_seq_sim
                            wfilters["max_seq_sim"] = max_seq_sim
                        else:
                            raise ConfigError('Unknown workflow filter [%s]' %f)

            if target_wtype == "genetree" and wkname in base_config.get('genetree_meta_workflow', {}):
                temp_workflows = [x.lstrip('@') for x in base_config['genetree_meta_workflow'][wkname]]
            elif target_wtype == "supermatrix" and wkname in base_config.get('supermatrix_meta_workflow', {}):
                temp_workflows = [x.lstrip('@') for x in base_config['supermatrix_meta_workflow'][wkname]]
            else:
                temp_workflows = [wkname]

            # if wkname not in base_config and wkname in base_config.get('meta_workflow', {}):
            #     temp_workflows = [x.lstrip('@') for x in base_config['meta_workflow'][wkname]]
            # else:
            #     temp_workflows = [wkname]

            for _w in temp_workflows:
                if target_wtype == "genetree":
                    base_config.update(build_genetree_workflow(_w))
                elif target_wtype == "supermatrix":
                    base_config.update(build_supermatrix_workflow(_w))
                parse_block(_w, base_config)
                
                if _w not in base_config:
                    list_workflows(base_config)
                    raise ConfigError('[%s] workflow or meta-workflow name is not found in the config file.' %_w)
                wtype = base_config[_w]['_app']
                if wtype not in VALID_WORKFLOW_TYPES:
                    raise ConfigError('[%s] is not a valid workflow: %s?' %(_w, wtype))
                if wtype != target_wtype:
                    raise ConfigError('[%s] is not a valid %s workflow' %(wkname, target_wtype))

            if parse_filters:
                if len(temp_workflows) == 1:
                    parsed_workflows.extend([(temp_workflows[0], wfilters)])
                else:
                    raise ConfigError('Meta-workflows with multiple threads are not allowed as recursive workflows [%s]' %wkname)
            else:
                parsed_workflows.extend(temp_workflows)
        return parsed_workflows

    genetree_workflows = parse_workflows(args.workflow, "genetree")
    supermatrix_workflows = parse_workflows(args.supermatrix_workflow, "supermatrix")

    # Stop if mixing types of meta-workflows
    if supermatrix_workflows and len(genetree_workflows) > 1:
        raise ConfigError("A single genetree workflow must be specified when used in combination with super-matrix workflows.")

    # Sets master workflow type
    if supermatrix_workflows:
        WORKFLOW_TYPE = "supermatrix"
        master_workflows = supermatrix_workflows
    else:
        WORKFLOW_TYPE = "genetree"
        master_workflows = genetree_workflows

    # Parse npr workflows and filters
    npr_workflows = []
    use_npr = False
    if args.npr_workflows is not None:
        use_npr = True
        npr_workflows = parse_workflows(args.npr_workflows, WORKFLOW_TYPE, parse_filters=True)

    # setup workflows and create a separate config dictionary for each of them
    run2config = {}
    for wkname in master_workflows:
        config = dict(base_config)
        run2config[wkname] = config

        appset = config[config[wkname]['_appset'][1:]]

        # Initialized application command line commands for this workflow
        config['app'] = {}
        config['threading'] = {}

        apps_to_test = {}
        for k, (appsrc, cores) in six.iteritems(appset):
            cores = int(cores)
            if appsrc == "built-in":
                #cores = int(config["threading"].get(k, args.maxcores))
                cores = min(args.maxcores, cores)
                config["threading"][k] = cores
                cmd = apps.get_call(k, APPSPATH, base_dir, str(cores))
                config["app"][k] = cmd
                apps_to_test[k] = cmd

        # Copy config file
        config["_outpath"] = pjoin(base_dir, wkname)
        config["_nodeinfo"] = defaultdict(dict)
        try:
            os.makedirs(config["_outpath"])
        except OSError:
            pass

        # setup genetree workflow as the processor of concat alignment jobs
        if WORKFLOW_TYPE == "supermatrix":
            concatenator = config[wkname]["_alg_concatenator"][1:]
            config[concatenator]["_workflow"] = '@%s' % genetree_workflows[0]

        # setup npr options for master workflows
        if use_npr:
            config['_npr'] = {
                # register root workflow as the main workflow if the contrary not said
                "wf_type": WORKFLOW_TYPE,
                "workflows": npr_workflows if npr_workflows else [(wkname, {})],
                'nt_switch_thr': args.nt_switch_thr,
                'max_iters': args.max_iters,
                }

            #config[wkname]['_npr'] = '@'+npr_config
            #target_levels = config[npr_config].get('_target_levels', [])
            #target_dict = config['_optimized_levels'] = {}
            #for tg in target_levels:
                # If target level name starts with ~, we allow para and
                # poly-phyletic grouping of the species in such level
                #strict_monophyly = True
                #if tg.startswith("~"):
                    #tg = target_level.lstrip("~")
                    #strict_monophyly = False
                #tg = tg.lower()
                # We add the level as non-optimized
                #target_dict[target_level] = [False, strict_monophyly]
            #TARGET_CLADES.update(target_levels)
        else:
            config['_npr'] = {
                'nt_switch_thr': args.nt_switch_thr,
            }


    # dump log config file
    with open(local_conf_file, "w") as OUTPUT:
        with open(args.base_config) as INPUT:
            OUTPUT.write(INPUT.read()) # replace by simple copy?

    TARGET_CLADES.discard('')

    if WORKFLOW_TYPE == 'genetree':
        from .phylobuild_lib.workflow.genetree import pipeline
    elif WORKFLOW_TYPE == 'supermatrix':
        from .phylobuild_lib.workflow.supermatrix import pipeline

    #if args.arch == "auto":
    #    arch = "64 " if sys.maxsize > 2**32 else "32"
    #else:
    #    arch = args.arch

    arch = "64 " if sys.maxsize > 2**32 else "32"

    print(__DESCRIPTION__)

    # check application binary files
    if not args.nochecks:
        log.log(28, "Testing x86-%s portable applications..." % arch)
        apps.test_apps(apps_to_test)

    log.log(28, "Starting ETE-build execution at %s" %(ctime()))
    log.log(28, "Output directory %s" %(GLOBALS["output_dir"]))


    # -------------------------------------
    # PATH CONFIGs
    # -------------------------------------

    # Set up paths
    gallery_dir = os.path.join(base_dir, "gallery")
    sge_dir = pjoin(base_dir, "sge_jobs")
    tmp_dir = pjoin(base_dir, "tmp")
    tasks_dir = os.path.realpath(args.tasks_dir) if args.tasks_dir else  pjoin(base_dir, "tasks")
    input_dir = pjoin(base_dir, "input")
    db_dir = os.path.realpath(args.db_dir) if args.db_dir else  pjoin(base_dir, "db")

    GLOBALS["db_dir"] = db_dir
    GLOBALS["sge_dir"] = sge_dir
    GLOBALS["tmp"] = tmp_dir
    GLOBALS["gallery_dir"] = gallery_dir
    GLOBALS["tasks_dir"] = tasks_dir
    GLOBALS["input_dir"] = input_dir

    GLOBALS["nprdb_file"]  = pjoin(db_dir, "npr.db")
    GLOBALS["datadb_file"]  = pjoin(db_dir, "data.db")
    
    GLOBALS["seqdb_file"]  = pjoin(db_dir, "seq.db") if not args.seqdb else args.seqdb

    # Clear databases if necessary
    if args.clearall:
        log.log(28, "Erasing all existing npr data...")
        shutil.rmtree(GLOBALS["tasks_dir"]) if pexist(GLOBALS["tasks_dir"]) else None
        shutil.rmtree(GLOBALS["tmp"]) if pexist(GLOBALS["tmp"]) else None
        shutil.rmtree(GLOBALS["input_dir"]) if pexist(GLOBALS["input_dir"]) else None

        if not args.seqdb:
            silent_remove(GLOBALS["seqdb_file"])

        silent_remove(GLOBALS["datadb_file"])
        silent_remove(pjoin(base_dir, "nprdata.tar"))
        silent_remove(pjoin(base_dir, "nprdata.tar.gz"))
        #silent_remove(pjoin(base_dir, "npr.log"))
        silent_remove(pjoin(base_dir, "npr.log.gz"))
    else:
        if args.softclear:
            log.log(28, "Erasing precomputed data (reusing task directory)")
            shutil.rmtree(GLOBALS["tmp"]) if pexist(GLOBALS["tmp"]) else None
            shutil.rmtree(GLOBALS["input_dir"]) if pexist(GLOBALS["input_dir"]) else None
            os.remove(GLOBALS["datadb_file"]) if pexist(GLOBALS["datadb_file"]) else None
        if args.clearseqs and pexist(GLOBALS["seqdb_file"]) and not args.seqdb:
            log.log(28, "Erasing existing sequence database...")
            os.remove(GLOBALS["seqdb_file"])

    if not args.clearall and base_dir != GLOBALS["output_dir"]:
        log.log(24, "Copying previous output files to scratch directory: %s..." %base_dir)
        try:
            shutil.copytree(pjoin(GLOBALS["output_dir"], "db"), db_dir)
        except IOError as e:
            print(e)
            pass

        try:
            shutil.copytree(pjoin(GLOBALS["output_dir"], "tasks/"), pjoin(base_dir, "tasks/"))
        except IOError as e:
            try:
                shutil.copy(pjoin(GLOBALS["output_dir"], "nprdata.tar.gz"), base_dir)
            except IOError as e:
                pass

        # try: os.system("cp -a %s/* %s/" %(GLOBALS["output_dir"],  base_dir))
        # except Exception: pass


    # UnCompress packed execution data
    if pexist(os.path.join(base_dir,"nprdata.tar.gz")):
        log.warning("Compressed data found. Extracting content to start execution...")
        cmd = "cd %s && gunzip -f nprdata.tar.gz && tar -xf nprdata.tar && rm nprdata.tar" % base_dir
        os.system(cmd)

    # Create dir structure
    for dirname in [tmp_dir, tasks_dir, input_dir, db_dir]:
        try:
            os.makedirs(dirname)
        except OSError:
            log.warning("Using existing dir: %s", dirname)


    # -------------------------------------
    # DATA READING AND CHECKING
    # -------------------------------------

    # Set number of CPUs available

    if WORKFLOW_TYPE == "supermatrix" and not args.cogs_file:
        raise ConfigError("Species tree workflow requires a list of COGS"
                          " to be supplied through the --cogs"
                          " argument.")
    elif WORKFLOW_TYPE == "supermatrix":
        GLOBALS["cogs_file"] = os.path.abspath(args.cogs_file)

    GLOBALS["seqtypes"] = set()
    if args.nt_seed_file:
        GLOBALS["seqtypes"].add("nt")
        GLOBALS["inputname"] = os.path.split(args.nt_seed_file)[-1]

    if args.aa_seed_file:
        GLOBALS["seqtypes"].add("aa")
        GLOBALS["inputname"] = os.path.split(args.aa_seed_file)[-1]

    # Initialize db if necessary, otherwise extract basic info
    db.init_nprdb(GLOBALS["nprdb_file"])
    db.init_datadb(GLOBALS["datadb_file"])

    # Species filter
    if args.spfile:
        target_species = set([line.strip() for line in open(args.spfile)])
        target_species.discard("")
        log.log(28, "Enabling %d species", len(target_species))
    else:
        target_species = None
    
    # Load supermatrix data
    if WORKFLOW_TYPE == "supermatrix":
        observed_species= set()
        target_seqs = set()
        for cog_number, seq_cogs in iter_cog_seqs(args.cogs_file, args.spname_delimiter):
            for seqname, spcode, seqcode in seq_cogs:
                if target_species is None or spcode in target_species:
                    observed_species.add(spcode)
                    target_seqs.add(seqname)            
                
        if target_species is not None:
            if target_species - observed_species:
                raise DataError("The following target_species could not be found in COGs file: %s" %(','.join(target_species-observed_species)))
        else:
            target_species = observed_species
        log.warning("COG file restriction: %d sequences from %s species " %(len(target_seqs), len(target_species)))
    else:
        target_seqs = None

    GLOBALS["target_species"] = target_species
    
    # Check and load data
    ERROR = ""
    if not pexist(GLOBALS["seqdb_file"]):
        db.init_seqdb(GLOBALS["seqdb_file"])
        seqname2seqid = None
        if args.aa_seed_file:
            seqname2seqid = seqio.load_sequences(args, "aa", target_seqs, target_species, seqname2seqid)
            if not target_seqs:
                target_seqs = list(seqname2seqid.keys())
                
        if args.nt_seed_file:
            seqname2seqid = seqio.load_sequences(args, "nt", target_seqs, target_species, seqname2seqid)
        # Integrity checks?
        pass
            
    else:
        db.init_seqdb(GLOBALS["seqdb_file"])
        log.warning("Reusing sequences from existing database!")
        if target_seqs is None:
            seqname2seqid = db.get_seq_name_dict()
        else:
            seqname2seqid = db.get_seq_name_dict()
            if target_seqs - set(seqname2seqid.keys()):
                raise DataError("The following sequence names in COGs file"
                                " are not found in current database: %s" %(
                                    ','.join(target_seqs - db_seqs)))
                      
    log.warning("%d target sequences" %len(seqname2seqid))
    GLOBALS["target_sequences"] = seqname2seqid.values()
        
    if ERROR:
        with open(pjoin(base_dir, "error.log"), "w") as OUTPUT:
            OUTPUT.write(' '.join(arguments) + "\n\n" + ERROR)
        raise DataError("Errors were found while loading data. Please"
                        " check error file for details")

    # Prepare target taxa levels, if any
    if WORKFLOW_TYPE == "supermatrix" and args.lineages_file and TARGET_CLADES:
        sp2lin = {}
        lin2sp = defaultdict(set)
        all_sorted_levels = []
        for line in open(args.lineages_file):
            sp, lineage = line.split("\t")
            sp = sp.strip()
            if sp in target_species:
                sp2lin[sp] = [x.strip().lower() for x in lineage.split(",")]
                for lin in sp2lin[sp]:
                    if lin not in lin2sp:
                        all_sorted_levels.append(lin)
                    lin2sp[lin].add(sp)
        # any target species without lineage information?
        if target_species - set(sp2lin):
            missing = target_species - set(sp2lin)
            log.warning("%d species not found in lineages file" %len(missing))

        # So, the following levels (with at least 2 species) could be optimized
        avail_levels = [(lin, len(lin2sp[lin])) for lin in all_sorted_levels if len(lin2sp[lin])>=2]
        log.log(26, "Available levels for NPR optimization:\n%s", '\n'.join(["% 30s (%d spcs)"%x for x in avail_levels]))
        avail_levels = set([lv[0] for lv in avail_levels])
        GLOBALS["lineages"] = (sp2lin, lin2sp)
        
    # if no lineages file, raise an error
    elif WORKFLOW_TYPE == "supermatrix" and TARGET_CLADES:
        raise ConfigError("The use of target_levels requires a species lineage file provided through the --lineages option")

    # -------------------------------------
    # MISC
    # -------------------------------------

    GLOBALS["_max_cores"] = args.maxcores
    log.debug("Enabling %d CPU cores" %args.maxcores)


    # how task will be executed
    if args.no_execute:
        execution = (None, False)
    # elif args.sge_execute:
    #     execution = ("sge", False)
    else:
        if args.monitor:
            execution =("insitu", True) # True is for run-detached flag
        else:
            execution = ("insitu", False)

    # Scheduling starts here
    log.log(28, "ETE build starts now!")

    # This initialises all pipelines
    pending_tasks = []
    start_time = ctime()
    for wkname, config in six.iteritems(run2config):
        # Feeds pending task with the first task of the workflow
        config["_name"] = wkname
        new_tasks = pipeline(None, wkname, config)
        if not new_tasks:
            continue # skips pipelines not fitting workflow filters
        thread_id = new_tasks[0].threadid
        config["_configid"] = thread_id
        GLOBALS[thread_id] = config
        pending_tasks.extend(new_tasks)

        # Clear info from previous runs
        open(os.path.join(config["_outpath"], "runid"), "a").write('\t'.join([thread_id, GLOBALS["nprdb_file"]+"\n"]))
        # Write command line info
        cmd_info = '\t'.join([start_time, thread_id, str(args.monitor), GLOBALS["cmdline"]])
        open(pjoin(config["_outpath"], "command_lines"), "a").write(cmd_info+"\n")

    thread_errors = schedule(pipeline, pending_tasks, args.schedule_time,
                             execution, args.debug, args.noimg)
    db.close()

    if not thread_errors:
        if GLOBALS.get('_background_scheduler', None):
            GLOBALS['_background_scheduler'].terminate()

        if args.compress:
            log.log(28, "Compressing intermediate data...")
            cmd = "cd %s && tar --remove-files -cf nprdata.tar tasks/ && gzip -f nprdata.tar; if [ -e npr.log ]; then gzip -f npr.log; fi;" %\
              GLOBALS["basedir"]
            os.system(cmd)
        log.log(28, "Deleting temporal data...")
        cmd = "cd %s && rm -rf tmp/" %GLOBALS["basedir"]
        os.system(cmd)
        cmd = "cd %s && rm -rf input/" %GLOBALS["basedir"]
        os.system(cmd)
        GLOBALS["citator"].show()
    else:
        raise DataError("Errors found in some tasks")

Example 130

Project: soupy Source File: soupy.py
    def _wrap_multi(self, func):
        vals = func(self._value)
        return Collection(map(Node, vals))

Example 131

Project: ete Source File: cog_creator.py
def brh_cogs(DB, species, missing_factor=0.0, seed_sp=None, min_score=0):
    """It scans all precalculate BRH relationships among the species
       passed as an argument, and detects Clusters of Orthologs
       according to several criteria:

       min_score: the min coverage/overalp value required for a
       blast to be a reliable hit.

       missing_factor: the min percentage of species in which a
       given seq must have  orthologs.

    """
    log.log(26, "Searching BRH orthologs")
    species = set(map(str, species))

    min_species = len(species) - round(missing_factor * len(species))

    if seed_sp == "auto":
        # seed2size = get_sorted_seeds(seed_sp, species, species, min_species, DB)
        # sort_seeds =  sorted([(len(size), sp) for sp, size in seed2size.iteritems()])
        # sp_to_test = [sort_seeds[-1][1]]
        sp_to_test = list(species)
    elif seed_sp == "largest":
        cmd = """SELECT taxid, size FROM species"""
        db.seqcursor.execute(cmd)
        sp2size = {}
        for tax, counter in db.seqcursor.fetchall():
            if tax in species:
                sp2size[tax] = counter

        sorted_sp = sorted(list(sp2size.items()), lambda x,y: cmp(x[1],y[1]))
        log.log(24, sorted_sp[:6])
        largest_sp = sorted_sp[-1][0]
        sp_to_test = [largest_sp]
        log.log(28, "Using %s as search seed. Proteome size=%s genes" %\
            (largest_sp, sp2size[largest_sp]))
    else:
        sp_to_test = [str(seed_sp)]

    # The following loop tests each possible seed if none is
    # specified.
    log.log(28, "Detecting Clusters of Orthologs groups (COGs)")
    log.log(28, "Min number of species per COG: %d" %min_species)
    cogs_selection = []

    for j, seed in enumerate(sp_to_test):
        log.log(26,"Testing new seed species:%s (%d/%d)", seed, j+1, len(sp_to_test))
        species_side1 = ','.join(map(quote, [s for s in species if str(s)>str(seed)]))
        species_side2 = ','.join(map(quote, [s for s in species if str(s)<str(seed)]))
        pairs1 = []
        pairs2 = []
        # Select all ids with matches in the target species, and
        # return the total number of species covered by each of
        # such ids.
        if species_side1 != "":
            cmd = """SELECT seqid1, taxid1, seqid2, taxid2 from ortho_pair WHERE
            taxid1="%s" AND taxid2 IN (%s) """ %\
            (seed, species_side1)
            DB.orthocursor.execute(cmd)
            pairs1 = DB.orthocursor.fetchall()

        if species_side2 != "":
            cmd = """SELECT seqid2, taxid2, seqid1, taxid1 from ortho_pair WHERE
            taxid1 IN (%s) AND taxid2 = "%s" """ %\
            (species_side2, seed)

            #taxid2="%s" AND taxid1 IN (%s) AND score >= %s""" %\
            #(seed, species_side2, min_score)
            DB.orthocursor.execute(cmd)
            pairs2 = DB.orthocursor.fetchall()

        cog_candidates = defaultdict(set)
        for seq1, sp1, seq2, sp2 in pairs1 + pairs2:
            s1 = (sp1, seq1)
            s2 = (sp2, seq2)
            cog_candidates[(sp1, seq1)].update([s1, s2])

        all_cogs = [cand for cand in list(cog_candidates.values()) if
                    len(cand) >= min_species]

        cog_sizes = [len(cog) for cog in all_cogs]
        cog_spsizes = [len(set([e[0] for e in cog])) for cog in all_cogs]

        if [1 for i in range(len(cog_sizes)) if cog_sizes[i] != cog_spsizes[i]]:
            # for i in xrange(len(cog_sizes)):
            #     if cog_sizes[i] != cog_spsizes[i]:
            #         print cog_sizes[i], cog_spsizes[i]
            #         raw_input()
            raise ValueError("Inconsistent COG found")

        if cog_sizes:
            cogs_selection.append([seed, all_cogs])
        log.log(26, "Found %d COGs" % len(all_cogs))

    def _sort_cogs(cogs1, cogs2):
        cogs1 = cogs1[1] # discard seed info
        cogs2 = cogs2[1] # discard seed info
        cog_sizes1 = [len(cog) for cog in cogs1]
        cog_sizes2 = [len(cog) for cog in cogs2]
        mx1, mn1, avg1 = _max(cog_sizes1), _min(cog_sizes1), round(_mean(cog_sizes1))
        mx2, mn2, avg2 = _max(cog_sizes2), _min(cog_sizes2), round(_mean(cog_sizes2))

        # we want to maximize all these values in the following order:
        for i, j in ((mx1, mx2), (avg1, avg2), (len(cogs1), len(cogs2))):
            v = -1 * cmp(i, j)
            if v != 0:
                break
        return v

    log.log(26, "Finding best COG selection...")
    cogs_selection.sort(_sort_cogs)
    lines = []
    for seed, all_cogs in cogs_selection:
        cog_sizes = [len(cog) for cog in all_cogs]
        mx, mn, avg = max(cog_sizes), min(cog_sizes), round(_mean(cog_sizes))
        lines.append([seed, mx, mn, avg, len(all_cogs)])
    analysis_txt = StringIO()
    print_as_table(lines[:25], stdout=analysis_txt,
                   header=["Seed","largest COG", "smallest COGs", "avg COG size", "total COGs"])
    log.log(28, "Analysis details:\n"+analysis_txt.getvalue())
    best_seed, best_cogs = cogs_selection[0]
    cog_sizes = [len(cog) for cog in best_cogs]

    # Not necessary since they will be sorted differently later on
    #best_cogs.sort(lambda x,y: cmp(len(x), len(y)), reverse=True)

    if max(cog_sizes) < len(species):
        raise ValueError("Current COG selection parameters do not permit to cover all species")

    recoded_cogs = []
    for cog in best_cogs:
        named_cog = ["%s%s%s" %(x[0], GLOBALS["spname_delimiter"],x[1]) for x in cog]
        recoded_cogs.append(named_cog)

    return recoded_cogs, analysis_txt.getvalue()

Example 132

Project: dit Source File: distconst.py
    def from_mapping(self, mapping, force=True):
        """
        Returns a callable implementing a random variable via a mapping.

        Parameters
        ----------
        mapping : dict
            A mapping from outcomes to values of the new random variable.

        force : bool
            Ideally, the values of `mapping` should be satisfy the requirements
            of all outcomes (hashable, ordered sequences), but if `force` is
            `True`, we will attempt to use the distribution's outcome
            constructor and make sure that they are. If they are not, then
            the outcomes will be placed into a 1-tuple. This is strictly
            a convenience for users. As an example, suppose the outcomes are
            strings, the values of `mapping` can also be strings without issue.
            However, if the outcomes are tuples of integers, then the values
            *should* also be tuples. When `force` is `True`, then the values
            can be integers and then they will be transformed into 1-tuples.

        Returns
        -------
        func : function
            A callable implementing the desired function. It receives a single
            argument, the outcome, and returns an outcome for the calculation.

        Examples
        --------
        >>> d = dit.Distribution(['00', '01', '10', '11'], [1/4]*4)
        >>> bf = dit.RVFunctions(d)
        >>> mapping = {'00': '0', '01': '1', '10': '1', '11': '0'}
        >>> d = dit.insert_rvf(d, bf.from_mapping(mapping))
        >>> d.outcomes
        ('000', '011', '101', '110')

        Same example as above but now with tuples.

        >>> d = dit.Distribution([(0,0), (0,1), (1,0), (1,1)], [1/4]*4)
        >>> bf = dit.RVFunctions(d)
        >>> mapping = {(0,0): 0, (0,1): 1, (1,0): 1, (1,1): 0}
        >>> d = dit.insert_rvf(d, bf.from_mapping(mapping, force=True))
        >>> d.outcomes
        ((0, 0, 0), (0, 1, 1), (1, 0, 1), (1, 1, 0))

        See Also
        --------
        dit.modify_outcomes

        """
        ctor = self.ctor
        if force:
            try:
                list(map(ctor, mapping.values()))
            except (TypeError, ditException):
                values = [ctor([o]) for o in mapping.values()]
                mapping = dict(zip(mapping.keys(), values))

        def func(outcome):
            return mapping[outcome]

        return func

Example 133

Project: ete Source File: webapp.py
Function: call
    def __call__(self, environ, start_response):
        """ This function is executed when the application is called
        by the WSGI apache module. It is, therefore, in charge of
        answering web requests."""
        path = environ['PATH_INFO'].split("/")
        start_response('202 OK', [('content-type', 'text/plain')])
        if environ['REQUEST_METHOD'].upper() == 'GET' and  environ['QUERY_STRING']:
            self.queries = cgi.parse_qs(environ['QUERY_STRING'])
        elif environ['REQUEST_METHOD'].upper() == 'POST' and environ['wsgi.input']:
            self.queries = cgi.parse_qs(environ['wsgi.input'].read())
        else:
            self.queries = {}

        method = path[1]
        treeid = self.queries.get("treeid", [None])[0]
        nodeid = self.queries.get("nid", [None])[0]
        textface = self.queries.get("textface", [None])[0]
        actions = self.queries.get("show_actions", [None])[0]
        tree = self.queries.get("tree", [None])[0]
        search_term = self.queries.get("search_term", [None])[0]
        aindex = self.queries.get("aindex", [None])[0]

        if method == "draw":
            # if not treeid is given, generate one
            if not treeid:
                treeid = md5(str(time.time())).hexdigest()

            if not self._load_tree(treeid, tree):
                return "draw: Cannot load the tree: %s" %treeid

            if self._custom_tree_renderer:
                t = self._treeid2tree[treeid]
                return self._custom_tree_renderer(t, treeid, self)
            elif t and treeid:
                return self._get_tree_img(treeid=treeid)
            else:
                return "No tree to draw"

        elif method == "get_menu":
            if not self._load_tree(treeid):
                return "get_menu: Cannot load the tree: %s" %treeid

            if nodeid:
                tree_index = self._treeid2index[treeid]
                node = tree_index[nodeid]
            else:
                node = None

            if textface:
                header = str(textface).strip()
            else:
                header = "Menu"
            html = """<div id="ete_popup_header"><span id="ete_popup_header_text">%s</span><div id="ete_close_popup" onClick='hide_popup();'></div></div><ul>""" %\
                (header)
            for i in map(int, actions.split(",")):
                aname, target, handler, checker, html_generator = self.actions[i]
                if html_generator:
                    html += html_generator(i, treeid, nodeid, textface, node)
                else:
                    html += """<li><a  href='javascript:void(0);' onClick='hide_popup(); run_action("%s", "%s", "%s");'> %s </a></li> """ %\
                        (treeid, nodeid, i, aname)
            html += '</ul>'
            return html

        elif method == "action":
            if not self._load_tree(treeid):
                return "action: Cannot load the tree: %s" %treeid

            if aindex is None:
                # just refresh tree
                return self._get_tree_img(treeid=treeid)
            else:
                aname, target, handler, checker, html_generator = self.actions[int(aindex)]

            if target in set(["node", "face", "layout"]):
                return self._get_tree_img(treeid=treeid, pre_drawing_action=[target, handler, [nodeid]])
            elif target in set(["search"]):
                return self._get_tree_img(treeid=treeid, pre_drawing_action=[target, handler, [search_term]])
            elif target in set(["refresh"]):
                return self._get_tree_img(treeid=treeid)
            return "Bad guy"

        elif self._external_app_handler:
            return self._external_app_handler(environ, start_response, self.queries)
        else:
            return  '\n'.join(map(str, list(environ.items()))) + str(self.queries) + '\t\n'.join(environ['wsgi.input'])

Example 134

Project: box-python-sdk Source File: enum.py
Function: iter
    def __iter__(cls):
        return chain(super(ExtendableEnumMeta, cls).__iter__(), chain.from_iterable(map(iter, cls.__subclasses__())))

Example 135

Project: formencode Source File: schema.py
Function: convert_from_python
    def _convert_from_python(self, value_dict, state):
        chained = self.chained_validators[:]
        chained.reverse()
        finished = []
        for validator in chained:
            __traceback_info__ = (
                'for_python chained_validator %s (finished %s)') % (
                validator, ', '.join(map(repr, finished)) or 'none')
            finished.append(validator)
            value_dict = validator.from_python(value_dict, state)
        self.assert_dict(value_dict, state)
        new = {}
        errors = {}
        unused = list(self.fields.keys())
        if state is not None:
            previous_key = getattr(state, 'key', None)
            previous_full_dict = getattr(state, 'full_dict', None)
            state.full_dict = value_dict
        try:
            __traceback_info__ = None
            for name, value in six.iteritems(value_dict):
                __traceback_info__ = 'for_python in %s' % name
                try:
                    unused.remove(name)
                except ValueError:
                    if not self.allow_extra_fields:
                        raise Invalid(self.message('notExpected',
                            state, name=repr(name)), value_dict, state)
                    if not self.filter_extra_fields:
                        new[name] = value
                else:
                    if state is not None:
                        state.key = name
                    try:
                        new[name] = self.fields[name].from_python(value, state)
                    except Invalid as e:
                        errors[name] = e

            del __traceback_info__

            for name in unused:
                validator = self.fields[name]
                if state is not None:
                    state.key = name
                try:
                    new[name] = validator.from_python(None, state)
                except Invalid as e:
                    errors[name] = e

            if errors:
                raise Invalid(
                    format_compound_error(errors),
                    value_dict, state, error_dict=errors)

            pre = self.pre_validators[:]
            pre.reverse()
            if state is not None:
                state.key = previous_key

            for validator in pre:
                __traceback_info__ = 'for_python pre_validator %s' % validator
                new = validator.from_python(new, state)

            return new

        finally:
            if state is not None:
                state.key = previous_key
                state.full_dict = previous_full_dict

Example 136

Project: dit Source File: distribution.py
Function: to_string
    def to_string(self, digits=None, exact=None, tol=1e-9):
        """
        Returns a string representation of the distribution.

        Parameters
        ----------
        digits : int or None
            The probabilities will be rounded to the specified number of
            digits, using NumPy's around function. If `None`, then no rounding
            is performed. Note, if the number of digits is greater than the
            precision of the floats, then the resultant number of digits will
            match that smaller precision.
        exact : bool
            If `True`, then linear probabilities will be displayed, even if
            the underlying pmf contains log probabilities.  The closest
            rational fraction within a tolerance specified by `tol` is used
            as the display value.
        tol : float
            If `exact` is `True`, then the probabilities will be displayed
            as the closest rational fraction within `tol`.

        Returns
        -------
        s : str
            A string representation of the distribution.

        """
        from six import StringIO
        s = StringIO()

        if exact is None:
            exact = ditParams['print.exact']

        x = prepare_string(self, digits, exact, tol)
        pmf, outcomes, base, colsep, max_length, pstr = x

        headers = ["Class: ",
                   "Alphabet: ",
                   "Base: "]
        vals = [self.__class__.__name__,
                self.alphabet, # pylint: disable=no-member
                base]

        L = max(map(len, headers))
        for head, val in zip(headers, vals):
            s.write("{0}{1}\n".format(head.ljust(L), val))
        s.write("\n")

        s.write(''.join(['x'.ljust(max_length), colsep, pstr, "\n"]))
        for o, p in zip(outcomes, pmf):
            s.write(''.join([o.ljust(max_length), colsep, str(p), "\n"]))

        s.seek(0)
        s = s.read()
        # Remove the last \n
        s = s[:-1]

        return s

Example 137

Project: orderedmultidict Source File: test_orderedmultidict.py
Function: set_up
    def setUp(self):
        self.inits = [
            {}, {1: 1}, {1: 1, 2: 2, 3: 3}, {None: None}, {
                None: None, 1: 1, 2: 2}, {False: False},
        ]
        self.inits += list(map(itemlist, [
            [], [(1, 1)], [(1, 1), (2, 2)], [(1, 1), (2, 2), (1, 1)],
            [(1, 1), (1, 1), (1, 1)], [(None, None), (None, None)],
            [(False, False)],
            [(None, 1), (1, None), (None, None), (None, 1), (1, None)],
        ]))

        # Updates to test update() and updateall().
        self.updates = [
            {}, {7: 7}, {7: 7, 8: 8, 9: 9}, {None: None}, {1: 1, 2: 2}]
        self.updates += list(map(itemlist, [
            [], [(7, 7)], [(7, 7), (8, 8), (9, 9)], [(None, 'none')],
            [(9, 9), (1, 2)], [(7, 7), (7, 7), (8, 8), (7, 77)],
            [(1, 11), (1, 111), (1, 1111), (2, 22),
                (2, 222), ('a', 'a'), ('a', 'aa')],
        ]))

        self.keyword_updates = [
            {}, {'1': 1}, {'1': 1, '2': 2}, {
                'sup': 'pumps', 'scewps': None}, {'aa': 'aa'},
        ]

        # Items not initially in any of the multidict inputs self.inits.
        self.nonitems = [
            (44, 44), (None, 44), (55, None), ('a', 'b'), (11, 11), (22, 22)]

        # Keys not initially in any of the multidict inputs self.inits or in
        # self.nonitems.
        self.nonkeys = [_unique, 'asdfasdosduf', 'oaisfiapsn', 'ioausopdaui']

        self.valuelist = [1, 2, 3, None, 'a', 'b', object()]

Example 138

Project: nzbToMedia Source File: eventlog.py
Function: format_records
	def format_records(self, records=None):
		if records is None:
			records = self.get_records()
		return map(self.format_record, records)

Example 139

Project: pdfquery Source File: pdfquery.py
    def get_tree(self, *page_numbers):
        """
            Return lxml.etree.ElementTree for entire docuement, or page numbers
            given if any.
        """
        cache_key = "_".join(map(str, _flatten(page_numbers)))
        tree = self._parse_tree_cacher.get(cache_key)
        if tree is None:
            # set up root
            root = parser.makeelement("pdfxml")
            if self.doc.info:
                for k, v in list(self.doc.info[0].items()):
                    k = obj_to_string(k)
                    v = obj_to_string(resolve1(v))
                    try:
                        root.set(k, v)
                    except ValueError as e:
                        # Sometimes keys have a character in them, like ':',
                        # that isn't allowed in XML attribute names.
                        # If that happens we just replace non-word characters
                        # with '_'.
                        if "Invalid attribute name" in e.args[0]:
                            k = re.sub('\W', '_', k)
                            root.set(k, v)

            # Parse pages and append to root.
            # If nothing was passed in for page_numbers, we do this for all
            # pages, but if None was explicitly passed in, we skip it.
            if not(len(page_numbers) == 1 and page_numbers[0] is None):
                if page_numbers:
                    pages = [[n, self.get_layout(self.get_page(n))] for n in
                             _flatten(page_numbers)]
                else:
                    pages = enumerate(self.get_layouts())
                for n, page in pages:
                    page = self._xmlize(page)
                    page.set('page_index', obj_to_string(n))
                    page.set('page_label', self.doc.get_page_number(n))
                    root.append(page)
                self._clean_text(root)

            # wrap root in ElementTree
            tree = etree.ElementTree(root)
            self._parse_tree_cacher.set(cache_key, tree)

        return tree

Example 140

Project: django-oscar-api Source File: basic.py
Function: get_query_set
    def get_queryset(self):
        qs = super(BasketList, self).get_queryset()
        return map(
            functools.partial(assign_basket_strategy, request=self.request),
            qs)

Example 141

Project: chalk Source File: __init__.py
def format_txt(fg, txt, bg, opts):
    fg, txt, bg, opts = map(convert_to_str, (fg, txt, bg, opts))
    return make_code(fg, bg, opts) + txt + _clear_formatting

Example 142

Project: zulip Source File: users.py
def get_bots_backend(request, user_profile):
    # type: (HttpRequest, UserProfile) -> HttpResponse
    bot_profiles = UserProfile.objects.filter(is_bot=True, is_active=True,
                                              bot_owner=user_profile)
    bot_profiles = bot_profiles.select_related('default_sending_stream', 'default_events_register_stream')
    bot_profiles = bot_profiles.order_by('date_joined')

    def bot_info(bot_profile):
        # type: (UserProfile) -> Dict[str, Any]
        default_sending_stream = get_stream_name(bot_profile.default_sending_stream)
        default_events_register_stream = get_stream_name(bot_profile.default_events_register_stream)

        return dict(
            username=bot_profile.email,
            full_name=bot_profile.full_name,
            api_key=bot_profile.api_key,
            avatar_url=avatar_url(bot_profile),
            default_sending_stream=default_sending_stream,
            default_events_register_stream=default_events_register_stream,
            default_all_public_streams=bot_profile.default_all_public_streams,
        )

    return json_success({'bots': list(map(bot_info, bot_profiles))})

Example 143

Project: mapproxy Source File: sqlite.py
    def _tile_set_params_dict(self, tile_set):
        level = tile_set.level
        tile_width, tile_height = self.grid.tile_size
        matrix_width, matrix_height = self.grid.grid_sizes[level]
        params = {
        'layer_id' : self.layer_id,
        'bbox' : ', '.join(map(str, [v for v in self.grid.bbox])),
        'srs' : self.grid.srs.srs_code,
        'format' : self.file_ext,
        'min_tile_col' : tile_set.grid[0],
        'max_tile_col' : tile_set.grid[2],
        'min_tile_row' : tile_set.grid[1],
        'max_tile_row' : tile_set.grid[3],
        'tile_width' : tile_width,
        'tile_height' : tile_height,
        'matrix_width' : matrix_width,
        'matrix_height' : matrix_height,
        'matrix_id' : level,
        'matrix_set_name' : self.grid.name,
        'table_name' : tile_set.table_name
        }
        return params

Example 144

Project: cabby Source File: converters.py
def to_content_binding_entities(raw_bindings):
    return list(map(to_content_binding_entity, raw_bindings))

Example 145

Project: debsources Source File: mainlib.py
Function: add_arguments
def add_arguments(cmdline):
    """populate `cmdline` --- an `argpase.ArgumentParser` --- with cmdline
    options shared across several Debsources tools

    """
    cmdline.add_argument('--backend', '-b',
                         metavar='BACKEND',
                         action='append',
                         help='only affect a specific backend (one of: db, fs,'
                         'hooks, hooks.db, hooks.fs). By default all backends'
                         'are enabled; the special value "none" disables all'
                         'backends. Can be specified multiple times. Warning:'
                         'using this you can mess up the update logic, use at '
                         'your own risk.',
                         dest='backends')
    cmdline.add_argument('--config', '-c', dest='conffile',
                         help='alternate configuration file')
    cmdline.add_argument('--dburi', '-u', dest='dburi',
                         help='database URI, e.g. postgresql:///mydbname.'
                         'Override configuration file setting "db_uri"')
    cmdline.add_argument('--dry-run', '-d', dest='dry',
                         action='store_true',
                         help='enable dry run mode')
    cmdline.add_argument('--single-transaction', dest='single_transaction',
                         choices=['yes', 'no'],
                         help='use a single big DB transaction, instead of '
                         'smaller per-package transactions (default: yes)')
    cmdline.add_argument('--stage', '-s',
                         metavar='STAGE',
                         action='append',
                         help='only perform a specific update stage '
                         '(one of: %s). By default all update stages are '
                         'performed. Can be specified multiple times. Warning:'
                         'using this you can mess up the update logic, use at'
                         'your own risk.' %
                         list(map(updater.pp_stage, updater.UPDATE_STAGES)),
                         dest='stages')
    cmdline.add_argument('--trigger', '-t',
                         metavar='EVENT/HOOK',
                         action='append',
                         help='force trigger of (Python) HOOK for EVENT. By '
                         'default all registered hooks are triggered for all '
                         'changed packages. Event is one of: %s. Hook is one '
                         'of the available hooks. Can be specified multiple '
                         'times. Warning: if not used with "--backend none" '
                         'it might lead to multiple execution of the same '
                         'hook. E.g.: -t add-package/checksums' %
                         string.join(updater.KNOWN_EVENTS, ', '),
                         dest='force_triggers')
    cmdline.add_argument('--verbose', '-v',
                         action='count',
                         help='increase console verbosity')

Example 146

Project: zulip Source File: messages.py
def exclude_muting_conditions(user_profile, narrow):
    # type: (UserProfile, Iterable[Dict[str, Any]]) -> List[Selectable]
    conditions = []
    stream_name = get_stream_name_from_narrow(narrow)

    if stream_name is None:
        rows = Subscription.objects.filter(
            user_profile=user_profile,
            active=True,
            in_home_view=False,
            recipient__type=Recipient.STREAM
        ).values('recipient_id')
        muted_recipient_ids = [row['recipient_id'] for row in rows]
        condition = not_(column("recipient_id").in_(muted_recipient_ids))
        conditions.append(condition)

    muted_topics = ujson.loads(user_profile.muted_topics)
    if muted_topics:
        if stream_name is not None:
            muted_topics = [m for m in muted_topics if m[0].lower() == stream_name]
            if not muted_topics:
                return conditions

        muted_streams = bulk_get_streams(user_profile.realm,
                                         [muted[0] for muted in muted_topics])
        muted_recipients = bulk_get_recipients(Recipient.STREAM,
                                               [stream.id for stream in six.itervalues(muted_streams)])
        recipient_map = dict((s.name.lower(), muted_recipients[s.id].id)
                             for s in six.itervalues(muted_streams))

        muted_topics = [m for m in muted_topics if m[0].lower() in recipient_map]

        if muted_topics:
            def mute_cond(muted):
                # type: (Tuple[str, str]) -> Selectable
                stream_cond = column("recipient_id") == recipient_map[muted[0].lower()]
                topic_cond = func.upper(column("subject")) == func.upper(muted[1])
                return and_(stream_cond, topic_cond)

            condition = not_(or_(*list(map(mute_cond, muted_topics))))
            return conditions + [condition]

    return conditions

Example 147

Project: box-python-sdk Source File: enum.py
Function: dir
    def __dir__(cls):
        return list(set(super(ExtendableEnumMeta, cls).__dir__()).union(*map(dir, cls.__subclasses__())))

Example 148

Project: soupy Source File: soupy.py
Function: zip
    def zip(self, *others):
        """
        Zip the items of this collection with one or more
        other sequences, and wrap the result.

        Unlike Python's zip, all sequences must be the same length.

        Parameters:

            others: One or more iterables or Collections

        Returns:

            A new collection.

        Examples:

            >>> c1 = Collection([Scalar(1), Scalar(2)])
            >>> c2 = Collection([Scalar(3), Scalar(4)])
            >>> c1.zip(c2).val()
            [(1, 3), (2, 4)]
        """
        args = [_unwrap(item) for item in (self,) + others]
        ct = self.count()
        if not all(len(arg) == ct for arg in args):
            raise ValueError("Arguments are not all the same length")
        return Collection(map(Wrapper.wrap, zip(*args)))

Example 149

Project: dit Source File: lattice.py
def insert_rv(dist, idx, sigalg):
    """
    Returns a new distribution with a random variable inserted at index `idx`.

    The random variable is constructed according to its induced sigma-algebra.

    Parameters
    ----------
    dist : Distribution
        The distribution which defines the base sigma-algebra.
    idx : int
        The index at which to insert the random variable. To append, set `idx`
        to be equal to -1 or dist.outcome_length().
    sigalg : frozenset
        The sigma-algebra induced by the random variable.

    Returns
    -------
    d : Distribution
        The new distribution.

    """
    from itertools import chain

    if idx == -1:
        idx = dist.outcome_length()

    if not 0 <= idx <= dist.outcome_length():
        raise IndexError('Invalid insertion index.')

    # Provide sane sorting of atoms
    atoms = atom_set(sigalg)
    atoms = [sorted(atom) for atom in atoms]
    atoms.sort(key=quasilexico_key)
    labels = range(len(atoms))
    if dist._outcome_class == str:
        # Then the labels for the new random variable must be strings.
        labels = map(str, labels)

    # Create an index from outcomes to atoms.
    atom_of = {}
    for label, atom in zip(labels, atoms):
        for outcome in atom:
            atom_of[outcome] = label

    if idx == dist.outcome_length():
        def new_outcome_ctor(outcome, ctor=dist._outcome_ctor):
            """The end of the outcome"""
            new_outcome = [outcome, [atom_of[outcome]]]
            return ctor(chain.from_iterable(new_outcome))
    elif idx == 0:
        def new_outcome_ctor(outcome, ctor=dist._outcome_ctor):
            """The beginning of the outcome"""
            new_outcome = [[atom_of[outcome]], outcome]
            return ctor(chain.from_iterable(new_outcome))
    else:
        def new_outcome_ctor(outcome, ctor=dist._outcome_ctor):
            """In the middle of the outcome"""
            new_outcome = [outcome[:idx], [atom_of[outcome]], outcome[idx:]]
            return ctor(chain.from_iterable(new_outcome))

    d = dit.modify_outcomes(dist, new_outcome_ctor)
    return d

Example 150

Project: ete Source File: ncbiquery.py
    def annotate_tree(self, t, taxid_attr="name", tax2name=None, tax2track=None, tax2rank=None):
        """Annotate a tree containing taxids as leaf names by adding the  'taxid',
        'sci_name', 'lineage', 'named_lineage' and 'rank' additional attributes.

        :param t: a Tree (or Tree derived) instance.

        :param name taxid_attr: Allows to set a custom node attribute containing
        the taxid number associated to each node (i.e. species in PhyloTree instances).

        :param tax2name,tax2track,tax2rank: Use these arguments to provide
        pre-calculated dictionaries providing translation from taxid number and
        names,track lineages and ranks.
        """

        taxids = set()
        for n in t.traverse():
            try:
                tid = int(getattr(n, taxid_attr))
            except (ValueError,AttributeError):
                pass
            else:
                taxids.add(tid)
        merged_conversion = {}

        taxids, merged_conversion = self._translate_merged(taxids)
        
        if not tax2name or taxids - set(map(int, list(tax2name.keys()))):
            tax2name = self.get_taxid_translator(taxids)
        if not tax2track or taxids - set(map(int, list(tax2track.keys()))):
            tax2track = self.get_lineage_translator(taxids)

        all_taxid_codes = set([_tax for _lin in list(tax2track.values()) for _tax in _lin])
        extra_tax2name = self.get_taxid_translator(list(all_taxid_codes - set(tax2name.keys())))
        tax2name.update(extra_tax2name)

        tax2common_name = self.get_common_names(tax2name.keys())

        if not tax2rank:
            tax2rank = self.get_rank(list(tax2name.keys()))

        n2leaves = t.get_cached_content()

        for n in t.traverse('postorder'):
            try:
                node_taxid = int(getattr(n, taxid_attr))
            except (ValueError, AttributeError):
                node_taxid = None

            n.add_features(taxid = node_taxid)
            if node_taxid:
                if node_taxid in merged_conversion:
                    node_taxid = merged_conversion[node_taxid]
                n.add_features(sci_name = tax2name.get(node_taxid, getattr(n, taxid_attr, '')),
                               common_name = tax2common_name.get(node_taxid, ''),
                               lineage = tax2track[node_taxid],
                               rank = tax2rank.get(node_taxid, 'Unknown'),                               
                               named_lineage = [tax2name.get(tax, str(tax)) for tax in tax2track[node_taxid]])
            elif n.is_leaf():
                n.add_features(sci_name = getattr(n, taxid_attr, 'NA'),
                               common_name = '',
                               lineage = [],
                               rank = 'Unknown',
                               named_lineage = [])
            else:
                lineage = self._common_lineage([lf.lineage for lf in n2leaves[n]])
                ancestor = lineage[-1]
                n.add_features(sci_name = tax2name.get(ancestor, str(ancestor)),
                               common_name = tax2common_name.get(ancestor, ''),
                               taxid = ancestor,
                               lineage = lineage,
                               rank = tax2rank.get(ancestor, 'Unknown'),
                               named_lineage = [tax2name.get(tax, str(tax)) for tax in lineage])

        return tax2name, tax2track, tax2rank
See More Examples - Go to Next Page
Page 1 Page 2 Page 3 Selected Page 4