re.sub

Here are the examples of the python api re.sub taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 51

Project: raspberry_pwn
Source File: dump.py
View license
    def dbTableValues(self, tableValues):
        replication = None
        rtable = None
        dumpFP = None
        appendToFile = False

        if tableValues is None:
            return

        db = tableValues["__infos__"]["db"]
        if not db:
            db = "All"
        table = tableValues["__infos__"]["table"]

        if hasattr(conf, "api"):
            self._write(tableValues, content_type=CONTENT_TYPE.DUMP_TABLE)
            return

        dumpDbPath = os.path.join(conf.dumpPath, re.sub(r"[^\w]", "_", unsafeSQLIdentificatorNaming(db)))

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            replication = Replication(os.path.join(conf.dumpPath, "%s.sqlite3" % unsafeSQLIdentificatorNaming(db)))
        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
            if not os.path.isdir(dumpDbPath):
                os.makedirs(dumpDbPath, 0755)

            dumpFileName = os.path.join(dumpDbPath, "%s.%s" % (unsafeSQLIdentificatorNaming(table), conf.dumpFormat.lower()))
            appendToFile = os.path.isfile(dumpFileName) and any((conf.limitStart, conf.limitStop))
            dumpFP = openFile(dumpFileName, "wb" if not appendToFile else "ab")

        count = int(tableValues["__infos__"]["count"])
        separator = str()
        field = 1
        fields = len(tableValues) - 1

        columns = prioritySortColumns(tableValues.keys())

        if conf.col:
            cols = conf.col.split(',')
            columns = sorted(columns, key=lambda _: cols.index(_) if _ in cols else 0)

        for column in columns:
            if column != "__infos__":
                info = tableValues[column]
                lines = "-" * (int(info["length"]) + 2)
                separator += "+%s" % lines

        separator += "+"
        self._write("Database: %s\nTable: %s" % (unsafeSQLIdentificatorNaming(db) if db else "Current database", unsafeSQLIdentificatorNaming(table)))

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            cols = []

            for column in columns:
                if column != "__infos__":
                    colType = Replication.INTEGER

                    for value in tableValues[column]['values']:
                        try:
                            if not value or value == " ":  # NULL
                                continue

                            int(value)
                        except ValueError:
                            colType = None
                            break

                    if colType is None:
                        colType = Replication.REAL

                        for value in tableValues[column]['values']:
                            try:
                                if not value or value == " ":  # NULL
                                    continue

                                float(value)
                            except ValueError:
                                colType = None
                                break

                    cols.append((unsafeSQLIdentificatorNaming(column), colType if colType else Replication.TEXT))

            rtable = replication.createTable(table, cols)
        elif conf.dumpFormat == DUMP_FORMAT.HTML:
            dataToDumpFile(dumpFP, "<!DOCTYPE html>\n<html>\n<head>\n")
            dataToDumpFile(dumpFP, "<meta http-equiv=\"Content-type\" content=\"text/html;charset=%s\">\n" % UNICODE_ENCODING)
            dataToDumpFile(dumpFP, "<title>%s</title>\n" % ("%s%s" % ("%s." % db if METADB_SUFFIX not in db else "", table)))
            dataToDumpFile(dumpFP, HTML_DUMP_CSS_STYLE)
            dataToDumpFile(dumpFP, "\n</head>\n<body>\n<table>\n<thead>\n<tr>\n")

        if count == 1:
            self._write("[1 entry]")
        else:
            self._write("[%d entries]" % count)

        self._write(separator)

        for column in columns:
            if column != "__infos__":
                info = tableValues[column]

                column = unsafeSQLIdentificatorNaming(column)
                maxlength = int(info["length"])
                blank = " " * (maxlength - len(column))

                self._write("| %s%s" % (column, blank), newline=False)

                if not appendToFile:
                    if conf.dumpFormat == DUMP_FORMAT.CSV:
                        if field == fields:
                            dataToDumpFile(dumpFP, "%s" % safeCSValue(column))
                        else:
                            dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(column), conf.csvDel))
                    elif conf.dumpFormat == DUMP_FORMAT.HTML:
                        dataToDumpFile(dumpFP, "<th>%s</th>" % cgi.escape(column).encode("ascii", "xmlcharrefreplace"))

                field += 1

        if conf.dumpFormat == DUMP_FORMAT.HTML:
            dataToDumpFile(dumpFP, "\n</tr>\n</thead>\n<tbody>\n")

        self._write("|\n%s" % separator)

        if conf.dumpFormat == DUMP_FORMAT.CSV:
            dataToDumpFile(dumpFP, "\n" if not appendToFile else "")

        elif conf.dumpFormat == DUMP_FORMAT.SQLITE:
            rtable.beginTransaction()

        if count > TRIM_STDOUT_DUMP_SIZE:
            warnMsg = "console output will be trimmed to "
            warnMsg += "last %d rows due to " % TRIM_STDOUT_DUMP_SIZE
            warnMsg += "large table size"
            logger.warning(warnMsg)

        for i in xrange(count):
            console = (i >= count - TRIM_STDOUT_DUMP_SIZE)
            field = 1
            values = []

            if conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "<tr>")

            for column in columns:
                if column != "__infos__":
                    info = tableValues[column]

                    if len(info["values"]) <= i:
                        continue

                    if info["values"][i] is None:
                        value = u''
                    else:
                        value = getUnicode(info["values"][i])
                        value = DUMP_REPLACEMENTS.get(value, value)

                    values.append(value)
                    maxlength = int(info["length"])
                    blank = " " * (maxlength - len(value))
                    self._write("| %s%s" % (value, blank), newline=False, console=console)

                    if len(value) > MIN_BINARY_DISK_DUMP_SIZE and r'\x' in value:
                        try:
                            mimetype = magic.from_buffer(value, mime=True)
                            if any(mimetype.startswith(_) for _ in ("application", "image")):
                                if not os.path.isdir(dumpDbPath):
                                    os.makedirs(dumpDbPath, 0755)

                                filepath = os.path.join(dumpDbPath, "%s-%d.bin" % (unsafeSQLIdentificatorNaming(column), randomInt(8)))
                                warnMsg = "writing binary ('%s') content to file '%s' " % (mimetype, filepath)
                                logger.warn(warnMsg)

                                with open(filepath, "wb") as f:
                                    _ = safechardecode(value, True)
                                    f.write(_)
                        except magic.MagicException, err:
                            logger.debug(str(err))

                    if conf.dumpFormat == DUMP_FORMAT.CSV:
                        if field == fields:
                            dataToDumpFile(dumpFP, "%s" % safeCSValue(value))
                        else:
                            dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(value), conf.csvDel))
                    elif conf.dumpFormat == DUMP_FORMAT.HTML:
                        dataToDumpFile(dumpFP, "<td>%s</td>" % cgi.escape(value).encode("ascii", "xmlcharrefreplace"))

                    field += 1

            if conf.dumpFormat == DUMP_FORMAT.SQLITE:
                try:
                    rtable.insert(values)
                except SqlmapValueException:
                    pass
            elif conf.dumpFormat == DUMP_FORMAT.CSV:
                dataToDumpFile(dumpFP, "\n")
            elif conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "</tr>\n")

            self._write("|", console=console)

        self._write("%s\n" % separator)

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            rtable.endTransaction()
            logger.info("table '%s.%s' dumped to sqlite3 database '%s'" % (db, table, replication.dbpath))

        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
            if conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "</tbody>\n</table>\n</body>\n</html>")
            else:
                dataToDumpFile(dumpFP, "\n")
            dumpFP.close()
            logger.info("table '%s.%s' dumped to %s file '%s'" % (db, table, conf.dumpFormat, dumpFileName))

Example 52

Project: tp-qemu
Source File: multi_disk.py
View license
@error.context_aware
def run(test, params, env):
    """
    Test multi disk suport of guest, this case will:
    1) Create disks image in configuration file.
    2) Start the guest with those disks.
    3) Checks qtree vs. test params. (Optional)
    4) Create partition on those disks.
    5) Get disk dev filenames in guest.
    6) Format those disks in guest.
    7) Copy file into / out of those disks.
    8) Compare the original file and the copied file using md5 or fc comand.
    9) Repeat steps 3-5 if needed.

    :param test: QEMU test object
    :param params: Dictionary with the test parameters
    :param env: Dictionary with test environment.
    """
    def _add_param(name, value):
        """ Converts name+value to stg_params string """
        if value:
            value = re.sub(' ', '\\ ', value)
            return " %s:%s " % (name, value)
        else:
            return ''

    def _do_post_cmd(session):
        cmd = params.get("post_cmd")
        if cmd:
            session.cmd_status_output(cmd)
        session.close()

    def _get_disk_index(session, image_size, disk_indexs):
        list_disk_cmd = "echo list disk > disk && "
        list_disk_cmd += "echo exit >> disk && diskpart /s disk"
        disks = session.cmd_output(list_disk_cmd)
        size_type = image_size[-1] + "B"
        disk_size = ""

        if size_type == "MB":
            disk_size = image_size[:-1] + " MB"
        elif size_type == "GB" and int(image_size[:-1]) < 8:
            disk_size = str(int(image_size[:-1])*1024) + " MB"
        else:
            disk_size = image_size[:-1] + " GB"

        regex_str = 'Disk (\d+).*?%s.*?%s' % (disk_size, disk_size)
        for disk in disks.splitlines():
            if disk.startswith("  Disk"):
                o = re.findall(regex_str, disk, re.I | re.M)
                if o:
                    disk_indexs.append(o[0])

    error.context("Parsing test configuration", logging.info)
    stg_image_num = 0
    stg_params = params.get("stg_params", "")
    # Compatibility
    stg_params += _add_param("image_size", params.get("stg_image_size"))
    stg_params += _add_param("image_format", params.get("stg_image_format"))
    stg_params += _add_param("image_boot", params.get("stg_image_boot", "no"))
    stg_params += _add_param("drive_format", params.get("stg_drive_format"))
    stg_params += _add_param("drive_cache", params.get("stg_drive_cache"))
    if params.get("stg_assign_index") != "no":
        # Assume 0 and 1 are already occupied (hd0 and cdrom)
        stg_params += _add_param("drive_index", 'range(2,n)')
    param_matrix = {}

    stg_params = stg_params.split(' ')
    i = 0
    while i < len(stg_params) - 1:
        if not stg_params[i].strip():
            i += 1
            continue
        if stg_params[i][-1] == '\\':
            stg_params[i] = '%s %s' % (stg_params[i][:-1],
                                       stg_params.pop(i + 1))
        i += 1

    rerange = []
    has_name = False
    for i in xrange(len(stg_params)):
        if not stg_params[i].strip():
            continue
        (cmd, parm) = stg_params[i].split(':', 1)
        if cmd == "image_name":
            has_name = True
        if _RE_RANGE1.match(parm):
            parm = _range(parm)
            if parm is False:
                raise error.TestError("Incorrect cfg: stg_params %s looks "
                                      "like range(..) but doesn't contain "
                                      "numbers." % cmd)
            param_matrix[cmd] = parm
            if type(parm) is str:
                # When we know the stg_image_num, substitute it.
                rerange.append(cmd)
                continue
        else:
            # ',' separated list of values
            parm = parm.split(',')
            j = 0
            while j < len(parm) - 1:
                if parm[j][-1] == '\\':
                    parm[j] = '%s,%s' % (parm[j][:-1], parm.pop(j + 1))
                j += 1
            param_matrix[cmd] = parm
        stg_image_num = max(stg_image_num, len(parm))

    stg_image_num = int(params.get('stg_image_num', stg_image_num))
    for cmd in rerange:
        param_matrix[cmd] = _range(param_matrix[cmd], stg_image_num)
    # param_table* are for pretty print of param_matrix
    param_table = []
    param_table_header = ['name']
    if not has_name:
        param_table_header.append('image_name')
    for _ in param_matrix:
        param_table_header.append(_)

    stg_image_name = params.get('stg_image_name', 'images/%s')
    for i in xrange(stg_image_num):
        name = "stg%d" % i
        params['images'] += " %s" % name
        param_table.append([])
        param_table[-1].append(name)
        if not has_name:
            params["image_name_%s" % name] = stg_image_name % name
            param_table[-1].append(params.get("image_name_%s" % name))
        for parm in param_matrix.iteritems():
            params['%s_%s' % (parm[0], name)] = str(parm[1][i % len(parm[1])])
            param_table[-1].append(params.get('%s_%s' % (parm[0], name)))

    if params.get("multi_disk_params_only") == 'yes':
        # Only print the test param_matrix and finish
        logging.info('Newly added disks:\n%s',
                     utils.matrix_to_string(param_table, param_table_header))
        return

    # Always recreate VMs and disks
    error.context("Start the guest with new disks", logging.info)
    for vm_name in params.objects("vms"):
        vm_params = params.object_params(vm_name)
        env_process.process_images(env_process.preprocess_image, test,
                                   vm_params)

    error.context("Start the guest with those disks", logging.info)
    vm = env.get_vm(params["main_vm"])
    vm.create(timeout=max(10, stg_image_num), params=params)
    session = vm.wait_for_login(timeout=int(params.get("login_timeout", 360)))

    n_repeat = int(params.get("n_repeat", "1"))
    file_system = [_.strip() for _ in params.get("file_system").split()]
    cmd_timeout = float(params.get("cmd_timeout", 360))
    re_str = params["re_str"]
    black_list = params["black_list"].split()
    stg_image_size = params.get("stg_image_size")
    disk_indexs = []

    have_qtree = True
    out = vm.monitor.human_monitor_cmd("info qtree", debug=False)
    if "unknown command" in str(out):
        have_qtree = False

    if (params.get("check_guest_proc_scsi") == "yes") and have_qtree:
        error.context("Verifying qtree vs. test params")
        err = 0
        qtree = qemu_qtree.QtreeContainer()
        qtree.parse_info_qtree(vm.monitor.info('qtree'))
        disks = qemu_qtree.QtreeDisksContainer(qtree.get_nodes())
        (tmp1, tmp2) = disks.parse_info_block(vm.monitor.info_block())
        err += tmp1 + tmp2
        err += disks.generate_params()
        err += disks.check_disk_params(params)
        (tmp1, tmp2, _, _) = disks.check_guests_proc_scsi(
            session.cmd_output('cat /proc/scsi/scsi'))
        err += tmp1 + tmp2

        if err:
            raise error.TestFail("%s errors occurred while verifying"
                                 " qtree vs. params" % err)
        if params.get('multi_disk_only_qtree') == 'yes':
            return

    try:
        cmd = params.get("clean_cmd")
        if cmd:
            session.cmd_status_output(cmd)

        if params.get("os_type") == "windows":
            error.context("Create partition on those disks", logging.info)
            # Get the disk index
            _get_disk_index(session, stg_image_size, disk_indexs)
            if len(disk_indexs) < stg_image_num:
                err_msg = "Set disks num: %d" % stg_image_num
                err_msg += ", Get disks num in guest: %d" % len(disk_indexs)
                raise error.TestFail("Fail to list all the volumes, %s" % err_msg)

            # Random select one file system from file_system
            index = random.randint(0, (len(file_system) - 1))
            fs_type = file_system[index].strip()
            for i in xrange(stg_image_num):
                utils_misc.format_windows_disk(session, disk_indexs[i], None,
                                               None, fs_type)

        error.context("Get disks dev filenames in guest", logging.info)
        cmd = params["list_volume_command"]
        s, output = session.cmd_status_output(cmd, timeout=cmd_timeout)
        if s != 0:
            raise error.TestFail("List volume command failed with cmd '%s'.\n"
                                 "Output is: %s\n" % (cmd, output))

        output = session.cmd_output(cmd, timeout=cmd_timeout)
        disks = re.findall(re_str, output)
        disks = map(string.strip, disks)
        disks.sort()
        logging.debug("Volume list that meet regular expressions: %s",
                      " ".join(disks))

        images = params.get("images").split()
        if len(disks) < len(images):
            logging.debug("disks: %s , images: %s", len(disks), len(images))
            raise error.TestFail("Fail to list all the volumes!")

        if params.get("os_type") == "linux":
            output = session.cmd_output("mount")
            li = re.findall(r"^/dev/(%s)\d*" % re_str, output, re.M)
            if li:
                black_list.extend(li)
        else:
            black_list.extend(utils_misc.get_winutils_vol(session))
        disks = set(disks)
        black_list = set(black_list)
        logging.info("No need to check volume '%s'", (disks & black_list))
        disks = disks - black_list
    except Exception:
        _do_post_cmd(session)
        raise

    try:
        for i in range(n_repeat):
            logging.info("iterations: %s", (i + 1))
            error.context("Format those disks in guest", logging.info)
            for disk in disks:
                disk = disk.strip()
                error.context("Preparing disk: %s..." % disk)

                # Random select one file system from file_system
                index = random.randint(0, (len(file_system) - 1))
                fs = file_system[index].strip()
                cmd = params["format_command"] % (fs, disk)
                error.context("formatting test disk")
                session.cmd(cmd, timeout=cmd_timeout)
                cmd = params.get("mount_command")
                if cmd:
                    cmd = cmd % (disk, disk, disk)
                    session.cmd(cmd)

            error.context("Cope file into / out of those disks", logging.info)
            for disk in disks:
                disk = disk.strip()

                error.context("Performing I/O on disk: %s..." % disk)
                cmd_list = params["cmd_list"].split()
                for cmd_l in cmd_list:
                    cmd = params.get(cmd_l)
                    if cmd:
                        session.cmd(cmd % disk, timeout=cmd_timeout)

                cmd = params["compare_command"]
                key_word = params["check_result_key_word"]
                output = session.cmd_output(cmd)
                if key_word not in output:
                    raise error.TestFail("Files on guest os root fs and disk "
                                         "differ")

            if params.get("umount_command"):
                cmd = params.get("show_mount_cmd")
                output = session.cmd_output(cmd)
                disks = re.findall(re_str, output)
                disks.sort()
                for disk in disks:
                    disk = disk.strip()
                    error.context("Unmounting disk: %s..." % disk)
                    cmd = params.get("umount_command") % (disk, disk)
                    session.cmd(cmd)
    finally:
        cmd = params.get("show_mount_cmd")
        if cmd:
            try:
                output = session.cmd_output(cmd)
                disks = re.findall(re_str, output)
                disks.sort()
                for disk in disks:
                    error.context("Unmounting disk: %s..." % disk)
                    cmd = params["umount_command"] % (disk, disk)
                    session.cmd(cmd)
            except Exception, err:
                logging.warn("Get error when cleanup, '%s'", err)

        _do_post_cmd(session)

Example 53

Project: spladder
Source File: settings.py
View license
def parse_args(options, identity='main'):

    ### load all default settings
    CFG = default_settings()

    ### general options
    if options.verbose in ['n', 'y']:
        CFG['verbose'] = (options.verbose == 'y')
    else:
        print >> sys.stderr, 'ERROR: option verbose should have value y or n, but has %s' % options.verbose
        sys.exit(1)

    if options.debug in ['n', 'y']:
        CFG['debug'] = (options.debug == 'y')
    else:
        print >> sys.stderr, 'ERROR: option debug should have value y or n, but has %s' % options.debug
        sys.exit(1)

    CFG['event_types'] = options.event_types.strip(',').split(',')

    if options.outdir == '-':
        print >> sys.stderr, 'ERROR: please provide the mandatory parameter: out directory\n\n'
        options.parser.print_help()
        sys.exit(2)
    else:
        if not os.path.exists(options.outdir):
            print >> sys.stderr, 'WARNING: Output directory %s does not exist - will be created\n\n' % options.outdir
            try:
                os.makedirs(options.outdir)
            except OSError:
                print >> sys.stderr, 'ERROR: Output directory %s can not be created.\n\n' % options.outdir
                sys.exit(2)
        CFG['out_dirname'] = options.outdir

    ### options specific for main program
    if identity == 'main':
        if options.insert_ir in ['n', 'y']:
            CFG['do_insert_intron_retentions'] = (options.insert_ir == 'y')
        else:
            print >> sys.stderr, 'ERROR: option insert_ir should have value y or n, but has %s' % options.insert_ir
            sys.exit(1)

        if options.insert_es in ['n', 'y']:
            CFG['do_insert_cassette_exons'] = (options.insert_es == 'y')
        else:
            print >> sys.stderr, 'ERROR: option insert_es should have value y or n, but has %s' % options.insert_es
            sys.exit(1)

        if options.insert_ni in ['n', 'y']:
            CFG['do_insert_intron_edges'] = (options.insert_ni == 'y')
        else:
            print >> sys.stderr, 'ERROR: option insert_ni should have value y or n, but has %s' % options.insert_ni
            sys.exit(1)

        if options.remove_se in ['n', 'y']:
            CFG['do_remove_short_exons'] = (options.remove_se == 'y')
        else:
            print >> sys.stderr, 'ERROR: option remove_se should have value y or n, but has %s' % options.remove_se
            sys.exit(1)

        if options.infer_sg in ['n', 'y']:
            CFG['do_infer_splice_graph'] = (options.infer_sg == 'y')
        else:
            print >> sys.stderr, 'ERROR: option infer_sg should have value y or n, but has %s' % options.infer_sg
            sys.exit(1)

        if options.var_aware in ['n', 'y']:
            CFG['var_aware'] = (options.var_aware == 'y')
        else:
            print >> sys.stderr, 'ERROR: option var_aware should have value y or n, but has %s' % options.var_aware
            sys.exit(1)

        if options.primary_only in ['n', 'y']:
            CFG['primary_only'] = (options.primary_only == 'y')
        else:
            print >> sys.stderr, 'ERROR: option primary_only should have value y or n, but has %s' % options.primary_only
            sys.exit(1)

        if options.intron_cov in ['n', 'y']:
            CFG['count_intron_cov'] = (options.intron_cov == 'y')
        else:
            print >> sys.stderr, 'ERROR: option intron_cov should have value y or n, but has %s' % options.intron_cov

        if options.quantify_graph in ['n', 'y']:
            CFG['count_segment_graph'] = (options.quantify_graph == 'y')
        else:
            print >> sys.stderr, 'ERROR: option quantify_graph should have value y or n, but has %s' % options.quantify_graph

        if options.ignore_mismatches in ['n', 'y']:
            CFG['ignore_mismatch_tag'] = (options.ignore_mismatches == 'y')
        else:
            print >> sys.stderr, 'ERROR: option ignore mismatches bam should have value y or n, but has %s' % options.ignore_mismatches
    
        if options.output_struc in ['n', 'y']:
            CFG['output_struc'] = (options.output_struc == 'y')
            CFG['output_confirmed_struc'] = (options.output_struc == 'y')
        else:
            print >> sys.stderr, 'ERROR: option output struc value y or n, but has %s' % options.output_struc
    
        ### option to store sparse BAM representation
        if options.sparse_bam in ['n', 'y']:
            CFG['bam_to_sparse'] = (options.sparse_bam == 'y')
        else:
            print >> sys.stderr, 'ERROR: option sparse_bam should have value y or n, but has %s' % options.sparse_bam

        CFG['insert_intron_iterations'] = options.iterations
        if options.spladderfile != '-':
            CFG['spladder_infile'] = options.spladderfile

        ### settings for the alt splice part
        CFG['same_genestruct_for_all_samples'] = (options.same_genome == 'y')
        if options.replicates != '-':
            CFG['replicate_idxs'] = [int(x) for x in options.replicates.split(',')]
        CFG['curate_alt_prime_events'] = (options.curate_alt_prime == 'y')

        ### open log file, if specified
        if options.logfile != '-':
            CFG['log_fname'] = options.logfile
            CFG['fd_log'] = open(options.logfile, 'w')
        else:
            CFG['log_fname'] = 'stdout'
            CFG['fd_log'] = sys.stdout

        #if options.user != '-':
        #    CFG['user_settings'] = options.user

        ### alt splice analysis
        CFG['run_as_analysis'] = (options.extract_as == 'y')
        
        ### mandatory parameters for main spladder
        if options.bams == '-':
            print >> sys.stderr, 'ERROR: please provide the mandatory parameter: bam files\n\n'
            options.parser.print_help()
            sys.exit(2)
        else:
            CFG['bam_fnames'] = options.bams.strip(',').split(',')
            ### check existence of files
            for fname in CFG['bam_fnames']:
                if not os.path.isfile(fname):
                    print >> sys.stderr, 'ERROR: Input file %s can not be found\n\n' % fname
                    sys.exit(2)

        if options.annotation == '-':
            print >> sys.stderr, 'ERROR: please provide the mandatory parameter: annotation\n\n'
            options.parser.print_help()
            sys.exit(2)
        elif not os.path.isfile(options.annotation):
            print >> sys.stderr, 'ERROR: Annotation file %s can not be found\n\n' % options.annotation
            sys.exit(2)
        else:
            CFG['anno_fname'] = options.annotation
        
        if options.refstrain != '-':
            CFG['reference_strain'] = options.refstrain
            ref_tag = '%s:' % options.refstrain
        else:
            ref_tag = ''

        ### rproc options
        if options.pyproc == 'y':
            CFG['rproc'] = (options.pyproc == 'y')
            CFG['options_rproc'] = dict()
            CFG['options_rproc']['mem_req_resubmit']  = [30000, 60000, 80000]
            CFG['options_rproc']['time_req_resubmit'] = [60*60, 80*60, 90*60]
            CFG['options_rproc']['resubmit'] = 3
            CFG['options_rproc']['priority'] = 100
            CFG['options_rproc']['addpaths'] = CFG['paths']


    if identity in ['main', 'test']:
        ### parallel processing
        CFG['parallel'] = options.parallel

        CFG['merge_strategy'] = options.merge
        CFG['read_length'] = options.readlen
        CFG['confidence_level'] = options.confidence

        if options.validate_sg in ['n', 'y']:
            CFG['validate_splicegraphs'] = (options.validate_sg == 'y')
        else:
            print >> sys.stderr, 'ERROR: validate_sg matlab should have value y or n, but has %s' % options.validate_sg
            sys.exit(1)
    
    if identity == 'test':
        CFG['multiTest'] = options.correction
        CFG['max_0_frac'] = options.max_0_frac
        CFG['min_count'] = options.min_count
        
        if options.non_alt_norm in ['n', 'y']:
            CFG['non_alt_norm'] = (options.non_alt_norm == 'y')
        else:
            print >> sys.stderr, 'ERROR: option non_alt_norm should have value y or n, but has %s' % options.non_alt_norm
            sys.exit(1)

        if options.matlab in ['n', 'y']:
            CFG['is_matlab'] = (options.matlab == 'y')
        else:
            print >> sys.stderr, 'ERROR: option matlab should have value y or n, but has %s' % options.matlab
            sys.exit(1)

        if options.conditionA == '-':
            print >> sys.stderr, 'ERROR: At least one sample for condition A required'
            sys.exit(1)
        if options.conditionB == '-':
            print >> sys.stderr, 'ERROR: At least one sample for condition B required'
            sys.exit(1)

        if options.diagnose_plots in ['n', 'y']:
            CFG['diagnose_plots'] = (options.diagnose_plots == 'y')
        else:
            print >> sys.stderr, 'ERROR: option diagnose_plots should have value y or n, but has %s' % options.diagnose_plots
            sys.exit(1)

        CFG['conditionA'] = [os.path.basename(x).replace('.bam', '') for x in options.conditionA.strip(',').split(',')]
        CFG['conditionB'] = [os.path.basename(x).replace('.bam', '') for x in options.conditionB.strip(',').split(',')]
        if len(CFG['conditionA']) > 0 and CFG['conditionA'][0].lower().endswith('txt'):
            CFG['conditionA'] = [str(x) for x in sp.loadtxt(CFG['conditionA'][0], dtype='str')]
        if len(CFG['conditionB']) > 0 and CFG['conditionB'][0].lower().endswith('txt'):
            CFG['conditionB'] = [str(x) for x in sp.loadtxt(CFG['conditionB'][0], dtype='str')]

    ### check if we got a list of bam files in a text file instead of a comma separated list
    if len(CFG['bam_fnames']) > 0 and CFG['bam_fnames'][0].split('.')[-1] == 'txt':
        CFG['bam_fnames'] = [str(x) for x in sp.atleast_1d(sp.loadtxt(CFG['bam_fnames'][0], dtype='str'))]

    ### assemble strain list
    CFG['samples'] = []
    CFG['strains'] = []
    for i in range(len(CFG['bam_fnames'])):
        if options.label != '-':
            CFG['samples'].append('%s_%s' % (options.label, re.sub(r'(.bam|.hdf5)$', '', CFG['bam_fnames'][i].split('/')[-1])))
        else:
            CFG['samples'].append(re.sub(r'(.bam|.hdf5)$', '', CFG['bam_fnames'][i].split('/')[-1]))
        CFG['strains'].append('%s%s' % (ref_tag, CFG['samples'][-1]))
    CFG['strains'] = sp.array(CFG['strains'])

    ### adapt graph validation requirement to max number of samples
    CFG['sg_min_edge_count'] = min(CFG['sg_min_edge_count'], len(CFG['samples']))

    return CFG

Example 54

Project: flask-funnel
Source File: manager.py
View license
@manager.command
def bundle_assets():
    """Compress and minify assets"""
    YUI_COMPRESSOR_BIN = current_app.config.get('YUI_COMPRESSOR_BIN')

    path_to_jar = YUI_COMPRESSOR_BIN

    tmp_files = []

    def get_path(item):
        """Get the static path of an item"""
        return os.path.join(current_app.static_folder, item)

    def fix_urls(filename, compressed_file):
        """Fix relative paths in URLs for bundles"""
        print("Fixing URL's in %s" % filename)

        def fix_urls_regex(url, relpath):
            """Callback to fix relative path"""
            url = url.group(1).strip('"\'')
            if url.startswith(('data:', 'http:', 'https:', 'attr(')):
                return url
            else:
                url = os.path.relpath(url, relpath)
                return 'url(%s)' % url

        css_content = ''
        with open(get_path(filename), 'r') as css_in:
            css_content = css_in.read()

        relpath = os.path.relpath(os.path.dirname(compressed_file),
                                  get_path(os.path.dirname(filename)))

        parse = lambda url: fix_urls_regex(url, relpath)

        css_parsed = re.sub('url\(([^)]*?)\)', parse, css_content)

        out_file = get_path(os.path.join(current_app.config.get('BUNDLES_DIR'),
                                         'tmp', '%s.tmp' % filename))

        if not os.path.exists(os.path.dirname(out_file)):
            os.makedirs(os.path.dirname(out_file))

        with open(out_file, 'w') as css_out:
            css_out.write(css_parsed)

        return os.path.relpath(out_file, get_path('.'))

    def preprocess_file(filename, compressed_file):
        """Preprocess the file"""
        if filename.startswith('//'):
            url = 'http:%s' % filename
        elif filename.startswith(('http:', 'https:')):
            url = filename
        else:
            url = None

        if url:
            ext_media_path = get_path('external')

            if not os.path.exists(ext_media_path):
                os.makedirs(ext_media_path)

            filename = os.path.basename(url)
            if filename.endswith(('.js', '.css', '.less')):
                fp = get_path(filename.lstrip('/'))
                file_path = os.path.join(ext_media_path, fp)

                try:
                    req = urlopen(url)
                    print(' - Fetching %s ...' % url)
                except HTTPError as e:
                    print(' - HTTP Error %s for %s, %s' % (url, filename,
                                                           str(e.code)))
                    return None
                except URLError as e:
                    print(' - Invalid URL %s for %s, %s' % (url, filename,
                                                            str(e.reason)))
                    return None

                with open(file_path, 'w+') as fp:
                    try:
                        shutil.copyfileobj(req, fp)
                    except shutil.Error:
                        print(' - Could not copy file %s' % filename)
                filename = os.path.join('external', filename)
            else:
                print(' - Not a valid remote file %s' % filename)
                return None

        filename = preprocess(filename.lstrip('/'))

        if url is None and filename.endswith('.css'):
            filename = fix_urls(filename, compressed_file)
            tmp_files.append(filename)

        return get_path(filename.lstrip('/'))

    def minify(ftype, file_in, file_out):
        """Minify the file"""
        if ftype == 'js' and 'UGLIFY_BIN' in current_app.config:
            o = {'method': 'UglifyJS',
                 'bin': current_app.config.get('UGLIFY_BIN')}
            subprocess.call("%s -o %s %s" % (o['bin'], file_out, file_in),
                            shell=True, stdout=subprocess.PIPE)
        elif ftype == 'css' and 'CLEANCSS_BIN' in current_app.config:
            o = {'method': 'clean-css',
                 'bin': current_app.config.get('CLEANCSS_BIN')}
            subprocess.call("%s -o %s %s" % (o['bin'], file_out, file_in),
                            shell=True, stdout=subprocess.PIPE)
        else:
            o = {'method': 'YUI Compressor',
                 'bin': current_app.config.get('JAVA_BIN')}
            variables = (o['bin'], path_to_jar, file_in, file_out)
            subprocess.call("%s -jar %s %s -o %s" % variables,
                            shell=True, stdout=subprocess.PIPE)

        print("Minifying %s (using %s)" % (file_in, o['method']))

    # Assemble bundles and process
    bundles = {
        'css': current_app.config.get('CSS_BUNDLES'),
        'js': current_app.config.get('JS_BUNDLES'),
    }

    for ftype, bundle in bundles.items():
        for name, files in bundle.items():
            concatenated_file = get_path(os.path.join(
                current_app.config.get('BUNDLES_DIR'), ftype,
                '%s-all.%s' % (name, ftype,)))
            compressed_file = get_path(os.path.join(
                current_app.config.get('BUNDLES_DIR'), ftype,
                '%s-min.%s' % (name, ftype,)))

            if not os.path.exists(os.path.dirname(concatenated_file)):
                os.makedirs(os.path.dirname(concatenated_file))

            all_files = []
            for fn in files:
                processed = preprocess_file(fn, compressed_file)
                print('Processed: %s' % processed)
                if processed is not None:
                    all_files.append(processed)

            # Concatenate
            if len(all_files) == 0:
                print("Warning: '%s' is an empty bundle." % bundle)

            all_files = ' '.join(all_files)

            subprocess.call("cat %s > %s" % (all_files, concatenated_file),
                            shell=True)

            # Minify
            minify(ftype, concatenated_file, compressed_file)

            # Post process
            postprocess(compressed_file, fix_path=False)

            # Remove concatenated file
            print('Remove concatenated file')
            os.remove(concatenated_file)

    # Cleanup
    print('Clean up temporary files')
    for file in tmp_files:
        try:
            os.remove(get_path(file))
            os.rmdir(os.path.dirname(get_path(file)))
        except OSError:
            pass

    try:
        os.rmdir(get_path(os.path.join(current_app.config.get('BUNDLES_DIR'),
                                       'tmp')))
    except OSError:
        pass

Example 55

Project: pyfpdf
Source File: ttfonts.py
View license
    def extractInfo(self): 
        #################/
        # name - Naming table
        #################/
        self.sFamilyClass = 0
        self.sFamilySubClass = 0

        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if (format != 0):
            die("Unknown name table format " + format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1:'',2:'',3:'',4:'',6:''}
        K = list(names.keys())
        nameCount = len(names)
        for i in range(numRecords): 
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if (nameId not in K): continue
            N = ''
            if (platformId == 3 and encodingId == 1 and languageId == 0x409):  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                self.seek(string_data_offset + offset)
                if (length % 2 != 0):
                    die("PostScript name is UTF-16BE string of odd length")
                length //= 2
                N = ''
                while (length > 0):
                    char = self.read_ushort()
                    N += (chr(char))
                    length -= 1
                self._pos = opos
                self.seek(opos)
            
            elif (platformId == 1 and encodingId == 0 and languageId == 0):  # Macintosh, Roman, English, PS Name
                opos = self._pos
                N = self.get_chunk(string_data_offset + offset, length).decode("latin1")
                self._pos = opos
                self.seek(opos)
            
            if (N and names[nameId]==''):
                names[nameId] = N
                nameCount -= 1
                if (nameCount==0): break
            
        
        if (names[6]):
            psName = names[6]
        elif (names[4]):
            psName = re.sub(' ','-',names[4])
        elif (names[1]):
            psName = re.sub(' ','-',names[1])
        else:
            psName = ''
        if (not psName):
            die("Could not find PostScript font name")
        self.name = psName
        if (names[1]):
            self.familyName = names[1]  
        else:  
            self.familyName = psName 
        if (names[2]):
            self.styleName = names[2]
        else:
            self.styleName = 'Regular' 
        if (names[4]):
            self.fullName = names[4]
        else:
            self.fullName = psName 
        if (names[3]):
            self.uniqueFontID = names[3]
        else:
            self.uniqueFontID = psName 
        if (names[6]):
            self.fullName = names[6] 

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(18) 
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = 1000 / float(unitsPerEm)
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = [(xMin*scale), (yMin*scale), (xMax*scale), (yMax*scale)]
        self.skip(3*2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()
        if (glyphDataFormat != 0):
            die('Unknown glyph data format ' + glyphDataFormat)

        #################/
        # hhea metrics table
        #################/
        # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
        if ("hhea" in self.tables):
            self.seek_table("hhea")
            self.skip(4)
            hheaAscender = self.read_short()
            hheaDescender = self.read_short()
            self.ascent = (hheaAscender *scale)
            self.descent = (hheaDescender *scale)
        

        #################/
        # OS/2 - OS/2 and Windows metrics table
        #################/
        if ("OS/2" in self.tables): 
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if (fsType == 0x0002 or (fsType & 0x0300) != 0): 
                die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
                self.restrictedUse = True
            
            self.skip(20)
            sF = self.read_short()
            self.sFamilyClass = (sF >> 8)
            self.sFamilySubClass = (sF & 0xFF)
            self._pos += 10  #PANOSE = 10 byte length
            panose = self.fh.read(10)
            self.skip(26)
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            if (not self.ascent): 
                self.ascent = (sTypoAscender*scale)
            if (not self.descent): 
                self.descent = (sTypoDescender*scale)
            if (version > 1):
                self.skip(16)
                sCapHeight = self.read_short()
                self.capHeight = (sCapHeight*scale)
            else:
                self.capHeight = self.ascent            
        
        else:
            usWeightClass = 500
            if (not self.ascent): self.ascent = (yMax*scale)
            if (not self.descent): self.descent = (yMin*scale)
            self.capHeight = self.ascent
        
        self.stemV = 50 + int(pow((usWeightClass / 65.0),2))

        #################/
        # post - PostScript table
        #################/
        self.seek_table("post")
        self.skip(4) 
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short() * scale
        self.underlineThickness = self.read_short() * scale
        isFixedPitch = self.read_ulong()

        self.flags = 4

        if (self.italicAngle!= 0):
            self.flags = self.flags | 64
        if (usWeightClass >= 600):
            self.flags = self.flags | 262144
        if (isFixedPitch):
            self.flags = self.flags | 1

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32) 
        metricDataFormat = self.read_ushort()
        if (metricDataFormat != 0):
            die('Unknown horizontal metric data format '.metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if (numberOfHMetrics == 0):
            die('Number of horizontal metrics is 0')

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0
        
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1) or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    if (not unicode_cmap_offset):
                        unicode_cmap_offset = cmap_offset + offset
                    break
                    
            self.seek(save_pos)
        
        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:    
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        #################/
        # hmtx - Horizontal metrics table
        #################/
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)

Example 56

Project: youtube-dl
Source File: dailymotion.py
View license
    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage_no_ff(
            'https://www.dailymotion.com/video/%s' % video_id, video_id)

        age_limit = self._rta_search(webpage)

        description = self._og_search_description(webpage) or self._html_search_meta(
            'description', webpage, 'description')

        view_count_str = self._search_regex(
            (r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
             r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
            webpage, 'view count', fatal=False)
        if view_count_str:
            view_count_str = re.sub(r'\s', '', view_count_str)
        view_count = str_to_int(view_count_str)
        comment_count = int_or_none(self._search_regex(
            r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
            webpage, 'comment count', fatal=False))

        player_v5 = self._search_regex(
            [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
             r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
             r'buildPlayer\(({.+?})\);',
             r'var\s+config\s*=\s*({.+?});'],
            webpage, 'player v5', default=None)
        if player_v5:
            player = self._parse_json(player_v5, video_id)
            metadata = player['metadata']

            self._check_error(metadata)

            formats = []
            for quality, media_list in metadata['qualities'].items():
                for media in media_list:
                    media_url = media.get('url')
                    if not media_url:
                        continue
                    type_ = media.get('type')
                    if type_ == 'application/vnd.lumberjack.manifest':
                        continue
                    ext = mimetype2ext(type_) or determine_ext(media_url)
                    if ext == 'm3u8':
                        formats.extend(self._extract_m3u8_formats(
                            media_url, video_id, 'mp4', preference=-1,
                            m3u8_id='hls', fatal=False))
                    elif ext == 'f4m':
                        formats.extend(self._extract_f4m_formats(
                            media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
                    else:
                        f = {
                            'url': media_url,
                            'format_id': 'http-%s' % quality,
                            'ext': ext,
                        }
                        m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
                        if m:
                            f.update({
                                'width': int(m.group('width')),
                                'height': int(m.group('height')),
                            })
                        formats.append(f)
            self._sort_formats(formats)

            title = metadata['title']
            duration = int_or_none(metadata.get('duration'))
            timestamp = int_or_none(metadata.get('created_time'))
            thumbnail = metadata.get('poster_url')
            uploader = metadata.get('owner', {}).get('screenname')
            uploader_id = metadata.get('owner', {}).get('id')

            subtitles = {}
            subtitles_data = metadata.get('subtitles', {}).get('data', {})
            if subtitles_data and isinstance(subtitles_data, dict):
                for subtitle_lang, subtitle in subtitles_data.items():
                    subtitles[subtitle_lang] = [{
                        'ext': determine_ext(subtitle_url),
                        'url': subtitle_url,
                    } for subtitle_url in subtitle.get('urls', [])]

            return {
                'id': video_id,
                'title': title,
                'description': description,
                'thumbnail': thumbnail,
                'duration': duration,
                'timestamp': timestamp,
                'uploader': uploader,
                'uploader_id': uploader_id,
                'age_limit': age_limit,
                'view_count': view_count,
                'comment_count': comment_count,
                'formats': formats,
                'subtitles': subtitles,
            }

        # vevo embed
        vevo_id = self._search_regex(
            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
            webpage, 'vevo embed', default=None)
        if vevo_id:
            return self.url_result('vevo:%s' % vevo_id, 'Vevo')

        # fallback old player
        embed_page = self._download_webpage_no_ff(
            'https://www.dailymotion.com/embed/video/%s' % video_id,
            video_id, 'Downloading embed page')

        timestamp = parse_iso8601(self._html_search_meta(
            'video:release_date', webpage, 'upload date'))

        info = self._parse_json(
            self._search_regex(
                r'var info = ({.*?}),$', embed_page,
                'video info', flags=re.MULTILINE),
            video_id)

        self._check_error(info)

        formats = []
        for (key, format_id) in self._FORMATS:
            video_url = info.get(key)
            if video_url is not None:
                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
                if m_size is not None:
                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
                else:
                    width, height = None, None
                formats.append({
                    'url': video_url,
                    'ext': 'mp4',
                    'format_id': format_id,
                    'width': width,
                    'height': height,
                })
        self._sort_formats(formats)

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, webpage)

        title = self._og_search_title(webpage, default=None)
        if title is None:
            title = self._html_search_regex(
                r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
                'title')

        return {
            'id': video_id,
            'formats': formats,
            'uploader': info['owner.screenname'],
            'timestamp': timestamp,
            'title': title,
            'description': description,
            'subtitles': video_subtitles,
            'thumbnail': info['thumbnail_url'],
            'age_limit': age_limit,
            'view_count': view_count,
            'duration': info['duration']
        }

Example 57

Project: attention-lvcsr
Source File: function.py
View license
def function(inputs, outputs=None, mode=None, updates=None, givens=None,
             no_default_updates=False, accept_inplace=False, name=None,
             rebuild_strict=True, allow_input_downcast=None, profile=None,
             on_unused_input=None):
    """
    Return a callable object that will calculate `outputs` from `inputs`.

    Parameters
    ----------
    inputs : list of either Variable or In instances.
        Function parameters, these are not allowed to be shared variables.
    outputs : list or dict of Variables or Out instances.
        If it is a dict, the keys must be strings. Expressions to compute.
    mode : string or `Mode` instance.
        Compilation mode.
    updates : iterable over pairs (shared_variable, new_expression). List, tuple
              or OrderedDict.
        Updates the values for SharedVariable inputs according to these
        expressions.
    givens : iterable over pairs (Var1, Var2) of Variables. List, tuple or dict.
             The Var1 and Var2 in each pair must have the same Type.
        Specific substitutions to make in the computation graph (Var2 replaces
        Var1).
    no_default_updates: either bool or list of Variables
        If True, do not perform any automatic update on Variables. If False
        (default), perform them all. Else, perform automatic updates on all
        Variables that are neither in "updates" nor in "no_default_updates".
    name : str
        An optional name for this function. The profile mode will print the time
        spent in this function.
    rebuild_strict : bool
        True (Default) is the safer and better tested setting, in which case
        `givens` must substitute new variables with the same Type as the
        variables they replace.
        False is a you-better-know-what-you-are-doing setting, that permits
        `givens` to replace variables with new variables of any Type.
        The consequence of changing a Type is that all results depending on that
        variable may have a different Type too (the graph is rebuilt from inputs
        to outputs). If one of the new types does not make sense for one of the
        Ops in the graph, an Exception will be raised.
    allow_input_downcast: bool or None
        True means that the values passed as inputs when calling the function
        can be silently downcasted to fit the dtype of the corresponding
        Variable, which may lose precision. False means that it will only be
        cast to a more general, or precise, type. None (default) is almost like
        False, but allows downcasting of Python float scalars to floatX.
    profile: None, True, or ProfileStats instance
        Accumulate profiling information into a given ProfileStats instance.
        If argument is `True` then a new ProfileStats instance will be used.
        If argument is a string, a new ProfileStats instance will be created
        with that string as its ``message`` attribute.
        This profiling object will be available via self.profile.
    on_unused_input
        What to do if a variable in the 'inputs' list is not used in the graph.
        Possible values are 'raise', 'warn', 'ignore' and None.

    Returns
    -------
    Function instance
        A callable object that will compute the outputs (given the inputs) and
        update the implicit function arguments according to the `updates`.

    Notes
    -----
    Regarding givens: Be careful to make sure that these
    substitutions are independent--behaviour when Var1 of one pair
    appears in the graph leading to Var2 in another expression is
    undefined.  Replacements specified with givens are different
    from optimizations in that Var2 is not expected to be
    equivalent to Var1.


    Internal documentation:

        What happens when you call theano.function?
           1. RemoveShared: shared variables are just an abstraction to make
        things more convenient for the user. The shared variables are
        transformed into implicit inputs and implicit outputs. The
        optimizations don't see which variables are shared or not.
           2. FunctionGraph: determines whether a graph is valid. For example,
        suppose
        you merge the two apply nodes in our example above, ie, do the
        addition and the tanh at the same time. If you propose a merge that
        changes the resulting dtype or broadcastable pattern of V4, the fgraph
        will detect this.
                    inplace optimizations: say we have an apply node that
        does + on V1 and V2, with output V3. We can change the output to be
        V1, to use less memory. theano must be told that this optimization is
        happening though, so that other parts of the graph are given the
        correct (pre + or post + ) version of V1.
                  fgraph will raise an error if any of these types of
        modifications causes an error
                  fgraph also adds a field called "clients" to all variables.
        clients is a list of apply nodes that use the variable. this makes it
        possible to traverse the graph in both directions. this is useful for
        determining whether to do some optimizations. for example, a fusion
        operation that removes V3 is not very helpful if V3 is also needed for
        some other apply node. fusion operations result in a composite op that
        takes a minigraph of theano scalars and uses this to do elemwise
        operations on theano tensors
         3. Optimization
               How well do optimizations apply to new ops?
                 Usually there are no optimizations for new ops. In fact, new
        ops can disrupt patterns and break currently working optimizations.
        Since the Print op, for example, is not known by any optimization,
        setting a Print op in the middle of a pattern that is usually
        optimized out will block the optimization. for example, log(1+x)
        optimizes to log1p(x) but log(1+Print(x)) is unaffected by
        optimizations.
                 One exception is elemwise ops. If you implement your new op
        as a scalar op then it will automatically work with all the elemwise
        fusion machinery.

                 Local optimizations try to replace some node in the graph
        with a different node. In the case of log(1+x), we want to replace the
        log node.

                 def opt_log1p(node):
                    if not isinstance(node.op,Elemwise):
                       return
                    if not isinstance(node.op.scalar_op, log):
                       return
                    inp = node.inputs[0]
                    if not inp.owner:
                       return
                    if not isinstance(inp.owner.op, add):
                       return
                    inp2 = inp.owner.inputs
                    check that this has length 2, and that one of the inputs
        is 1. assign the other input to x
                    return log1p(x)


         4. Linker
               The linker uses a Python loop to execute the code associated
               with all the Apply nodes in the graph in the correct order.
               The CVM is a linker that replaces this Python loop with a C
               loop to avoid continuously changing between Python and C.
               The CVM is faster for 2 reasons:
                 1) Its internal logic is in C, so no Python interpreter
                    overhead.
                 2) It makes native calls from the VM logic into thunks that
                    have been compiled using the CLinker.
               The VM is a linker that was developed to prototype the CVM. it
        was easier to develop the VM in Python then translate it to C instead
        of just writing it in C from scratch.
               CVM stands for C Virtual Machine.

    """
    if isinstance(outputs, dict):
        output_items = list(outputs.items())

        for item_pair in output_items:
            assert isinstance(item_pair[0], string_types)

        output_items_sorted = sorted(output_items)

        output_keys = []
        outputs = []
        for pair in output_items_sorted:
            output_keys.append(pair[0])
            outputs.append(pair[1])

    else:
        output_keys = None

    if name is None:
        # Determine possible file names
        source_file = re.sub('\.pyc?', '.py', __file__)
        compiled_file = source_file + 'c'

        stack = tb.extract_stack()
        idx = len(stack) - 1

        last_frame = stack[idx]
        if (last_frame[0] == source_file or last_frame[0] == compiled_file):
            func_frame = stack[idx - 1]
            while "theano/gof" in func_frame[0] and idx > 0:
                idx -= 1
                # This can hapen if we call var.eval()
                func_frame = stack[idx - 1]
            name = func_frame[0] + ':' + str(func_frame[1])

    if updates is None:
        updates = []

    if (isinstance(updates, dict) and
            not isinstance(updates, compat.OrderedDict) and
            len(updates) > 1):
        warnings.warn(
            "The parameter 'updates' of theano.function()"
            " expects an OrderedDict,"
            " got " + str(type(updates)) + ". Using "
            "a standard dictionary here results in "
            "non-deterministic behavior. You should use an OrderedDict"
            " if you are using Python 2.7 (theano.compat.OrderedDict"
            " for older python), or use a list of (shared, update)"
            " pairs. Do not just convert your dictionary to this type before"
            " the call as the conversion will still be non-deterministic.",
            stacklevel=2)

    if givens is None:
        givens = []
    if not isinstance(inputs, (list, tuple)):
        raise Exception("Input variables of a Theano function should be "
                        "contained in a list, even when there is a single "
                        "input.")

    # compute some features of the arguments:
    uses_tuple = any([isinstance(i, (list, tuple)) for i in inputs])
    uses_updates = bool(updates)
    uses_givens = bool(givens)

    # See if we have any mutable / borrow inputs
    check_for_aliased_inputs = False
    for i in inputs:
        if (isinstance(i, In) and ((hasattr(i, 'borrow') and i.borrow) or
                                   (hasattr(i, 'mutable') and i.mutable))):
            check_for_aliased_inputs = True

    if uses_tuple:
        # we must use old semantics in this case.
        if profile:
            raise NotImplementedError("profiling not supported in old-style "
                                      "function")
        if uses_updates or uses_givens:
            raise NotImplementedError(
                "In() instances and tuple inputs trigger the old "
                "semantics, which disallow using updates and givens")
        fn = orig_function(inputs, outputs,
                           mode=mode,
                           accept_inplace=accept_inplace, name=name)
    else:
        # note: pfunc will also call orig_function -- orig_function is
        #      a choke point that all compilation must pass through
        fn = pfunc(params=inputs,
                   outputs=outputs,
                   mode=mode,
                   updates=updates,
                   givens=givens,
                   no_default_updates=no_default_updates,
                   accept_inplace=accept_inplace, name=name,
                   rebuild_strict=rebuild_strict,
                   allow_input_downcast=allow_input_downcast,
                   on_unused_input=on_unused_input,
                   profile=profile,
                   output_keys=output_keys)
    # We need to add the flag check_aliased inputs if we have any mutable or
    # borrowed used defined inputs
    fn._check_for_aliased_inputs = check_for_aliased_inputs
    return fn

Example 58

Project: pyfrc
Source File: cli_deploy.py
View license
    def run(self, options, robot_class, **static_options):
        
        from .. import config
        config.mode = 'upload'
        
        # run the test suite before uploading
        if not options.skip_tests:
            from .cli_test import PyFrcTest
            
            tester = PyFrcTest()
            
            retval = tester.run_test([], robot_class, options.builtin, ignore_missing_test=True)
            if retval != 0:
                print_err("ERROR: Your robot tests failed, aborting upload.")
                if not sys.stdin.isatty():
                    print_err("- Use --skip-tests if you want to upload anyways")
                    return retval
                
                print()
                if not yesno('- Upload anyways?'):
                    return retval
                
                if not yesno('- Are you sure? Your robot code may crash!'):
                    return retval
                
                print()
                print("WARNING: Uploading code against my better judgement...")
        
        # upload all files in the robot.py source directory
        robot_file = abspath(inspect.getfile(robot_class))
        robot_path = dirname(robot_file)
        robot_filename = basename(robot_file)
        cfg_filename = join(robot_path, '.deploy_cfg')
        
        if not options.nonstandard and robot_filename != 'robot.py':
            print_err("ERROR: Your robot code must be in a file called robot.py (launched from %s)!" % robot_filename)
            print_err()
            print_err("If you really want to do this, then specify the --nonstandard argument")
            return 1
        
        # This probably should be configurable... oh well
        
        deploy_dir = '/home/lvuser'
        py_deploy_dir = '%s/py' % deploy_dir
        
        # note below: deployed_cmd appears that it only can be a single line
        
        # In 2015, there were stdout/stderr issues. In 2016, they seem to
        # have been fixed, but need to use -u for it to really work properly
        
        if options.debug:
            deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/rpath-lib/ /usr/local/frc/bin/netconsole-host /usr/local/bin/python3 -u %s/%s -v run' % (py_deploy_dir, robot_filename)
            deployed_cmd_fname = 'robotDebugCommand'
            extra_cmd = 'touch /tmp/frcdebug; chown lvuser:ni /tmp/frcdebug'
        else:
            deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/rpath-lib/ /usr/local/frc/bin/netconsole-host /usr/local/bin/python3 -u -O %s/%s run' % (py_deploy_dir, robot_filename)
            deployed_cmd_fname = 'robotCommand'
            extra_cmd = ''

        if options.in_place:
            del_cmd = ''
        else:
            del_cmd = "[ -d %(py_deploy_dir)s ] && rm -rf %(py_deploy_dir)s"

        del_cmd %= {"py_deploy_dir": py_deploy_dir}
        
        check_version = '/usr/local/bin/python3 -c "exec(open(\\"$SITEPACKAGES/wpilib/version.py\\", \\"r\\").read(), globals()); print(\\"WPILib version on robot is \\" + __version__);exit(0) if __version__ == \\"%s\\" else exit(89)"' % wpilib.__version__
        if options.no_version_check:
            check_version = ''
        
        # This is a nasty bit of code now...
        sshcmd = inspect.cleandoc("""
            /bin/bash -ce '[ -x /usr/local/bin/python3 ] || exit 87
            SITEPACKAGES=$(/usr/local/bin/python3 -c "import site; print(site.getsitepackages()[0])")
            [ -f $SITEPACKAGES/wpilib/version.py ] || exit 88
            %(check_version)s
            %(del_cmd)s
            echo "%(cmd)s" > %(deploy_dir)s/%(cmd_fname)s
            %(extra_cmd)s'
        """)
              
        sshcmd %= {
            'del_cmd': del_cmd,
            'deploy_dir': deploy_dir,
            'cmd': deployed_cmd,
            'cmd_fname': deployed_cmd_fname,
            'extra_cmd': extra_cmd,
            'check_version': check_version
        }
        
        sshcmd = re.sub("\n+", ";", sshcmd)
        
        nc_thread = None
        
        try:
            controller = installer.ssh_from_cfg(cfg_filename,
                                                username='lvuser',
                                                password='',
                                                hostname=options.robot,
                                                allow_mitm=True,
                                                no_resolve=options.no_resolve)
            
            # Housekeeping first
            logger.debug('SSH: %s', sshcmd)
            controller.ssh(sshcmd)
            
            # Copy the files over, copy to a temporary directory first
            # -> this is inefficient, but it's easier in sftp
            tmp_dir = tempfile.mkdtemp()
            py_tmp_dir = join(tmp_dir, 'py')
                    
            try:
                self._copy_to_tmpdir(py_tmp_dir, robot_path)
                controller.sftp(py_tmp_dir, deploy_dir, mkdir=not options.in_place)
            finally:
                shutil.rmtree(tmp_dir)
            
            # start the netconsole listener now if requested, *before* we
            # actually start the robot code, so we can see all messages
            if options.nc:
                from netconsole import run
                nc_event = threading.Event()
                nc_thread = threading.Thread(target=run,
                                             kwargs={'init_event': nc_event},
                                             daemon=True)
                nc_thread.start()
                nc_event.wait(5)
                logger.info("Netconsole is listening...")
            
            if not options.in_place:
                # Restart the robot code and we're done!
                sshcmd = "/bin/bash -ce '" + \
                         '. /etc/profile.d/natinst-path.sh; ' + \
                         'chown -R lvuser:ni %s; ' + \
                         '/usr/local/frc/bin/frcKillRobot.sh -t -r' + \
                         "'"
            
                sshcmd %= (py_deploy_dir)
            
                logger.debug('SSH: %s', sshcmd)
                controller.ssh(sshcmd)
            
        except installer.SshExecError as e:
            if e.retval == 87:
                print_err("ERROR: python3 was not found on the roboRIO: have you installed robotpy?")
            elif e.retval == 88:
                print_err("ERROR: WPILib was not found on the roboRIO: have you installed robotpy?")
            elif e.retval == 89:
                print_err("ERROR: expected WPILib version %s" % wpilib.__version__)
                print_err()
                print_err("You should either:")
                print_err("- If the robot version is older, upgrade the RobotPy on your robot")
                print_err("- Otherwise, upgrade pyfrc on your computer")
                print_err()
                print_err("Alternatively, you can specify --no-version-check to skip this check")
            else:
                print_err("ERROR: %s" % e)
            return 1
        except installer.Error as e:
            print_err("ERROR: %s" % e)
            return 1
        else:
            print("\nSUCCESS: Deploy was successful!")
        
        if nc_thread is not None:
            nc_thread.join()
        
        return 0

Example 59

Project: rpmlint
Source File: BinariesCheck.py
View license
    def __init__(self, pkg, path, file, is_ar, is_shlib):
        self.readelf_error = False
        self.needed = []
        self.rpath = []
        self.undef = []
        self.unused = []
        self.comment = False
        self.soname = False
        self.non_pic = True
        self.stack = False
        self.exec_stack = False
        self.exit_calls = []
        self.forbidden_calls = []
        fork_called = False
        self.tail = ''

        self.setgid = False
        self.setuid = False
        self.setgroups = False
        self.chroot = False
        self.chdir = False
        self.chroot_near_chdir = False
        self.mktemp = False

        is_debug = path.endswith('.debug')

        cmd = ['env', 'LC_ALL=C', 'readelf', '-W', '-S', '-l', '-d', '-s']
        cmd.append(path)
        res = Pkg.getstatusoutput(cmd)
        if not res[0]:
            lines = res[1].splitlines()
            for l in lines:
                r = BinaryInfo.needed_regex.search(l)
                if r:
                    self.needed.append(r.group(1))
                    continue

                r = BinaryInfo.rpath_regex.search(l)
                if r:
                    for p in r.group(1).split(':'):
                        self.rpath.append(p)
                    continue

                if BinaryInfo.comment_regex.search(l):
                    self.comment = True
                    continue

                if BinaryInfo.pic_regex.search(l):
                    self.non_pic = False
                    continue

                r = BinaryInfo.soname_regex.search(l)
                if r:
                    self.soname = r.group(1)
                    continue

                r = BinaryInfo.stack_regex.search(l)
                if r:
                    self.stack = True
                    flags = r.group(1)
                    if flags and BinaryInfo.stack_exec_regex.search(flags):
                        self.exec_stack = True
                    continue

                if l.startswith("Symbol table"):
                    break

            for l in lines:
                r = BinaryInfo.call_regex.search(l)
                if not r:
                    continue
                l = r.group(1)

                if BinaryInfo.mktemp_call_regex.search(l):
                    self.mktemp = True

                if BinaryInfo.setgid_call_regex.search(l):
                    self.setgid = True

                if BinaryInfo.setuid_call_regex.search(l):
                    self.setuid = True

                if BinaryInfo.setgroups_call_regex.search(l):
                    self.setgroups = True

                if BinaryInfo.chdir_call_regex.search(l):
                    self.chdir = True

                if BinaryInfo.chroot_call_regex.search(l):
                    self.chroot = True

                if BinaryInfo.forbidden_functions:
                    for r_name, func in BinaryInfo.forbidden_functions.items():
                        ret = func['f_regex'].search(l)
                        if ret:
                            self.forbidden_calls.append(r_name)

                if is_shlib:
                    r = BinaryInfo.exit_call_regex.search(l)
                    if r:
                        self.exit_calls.append(r.group(1))
                        continue
                    r = BinaryInfo.fork_call_regex.search(l)
                    if r:
                        fork_called = True
                        continue

            # check if we don't have a string that will automatically
            # waive the presence of a forbidden call
            if self.forbidden_calls:
                cmd = ['env', 'LC_ALL=C', 'strings']
                cmd.append(path)
                res = Pkg.getstatusoutput(cmd)
                if not res[0]:
                    for l in res[1].splitlines():
                        # as we need to remove elements, iterate backwards
                        for i in range(len(self.forbidden_calls) - 1, -1, -1):
                            func = self.forbidden_calls[i]
                            f = BinaryInfo.forbidden_functions[func]
                            if 'waiver_regex' not in f:
                                continue
                            r = f['waiver_regex'].search(l)
                            if r:
                                del self.forbidden_calls[i]

            if self.non_pic:
                self.non_pic = 'TEXTREL' in res[1]

            # Ignore all exit() calls if fork() is being called.
            # Does not have any context at all but without this kludge, the
            # number of false positives would probably be intolerable.
            if fork_called:
                self.exit_calls = []

            # check if chroot is near chdir (since otherwise, chroot is called
            # without chdir)
            # Currently this implementation works only on x86_64 due to reliance
            # on x86_64 specific assembly. Skip it on other architectures
            if pkg.arch == 'x86_64' and self.chroot and self.chdir:
                p = subprocess.Popen(
                    ['env', 'LC_ALL=C', 'objdump', '-d', path],
                    stdout=subprocess.PIPE, bufsize=-1)
                with p.stdout:
                    index = 0
                    chroot_index = -99
                    chdir_index = -99
                    for line in p.stdout:
                        res = BinaryInfo.objdump_call_regex.search(line)
                        if not res:
                            continue
                        if b'@plt' not in res.group(1):
                            pass
                        elif b'[email protected]' in res.group(1):
                            chroot_index = index
                            if abs(chroot_index - chdir_index) <= 2:
                                self.chroot_near_chdir = True
                                break
                        elif b'[email protected]' in res.group(1):
                            chdir_index = index
                            if abs(chroot_index - chdir_index) <= 2:
                                self.chroot_near_chdir = True
                                break
                        index += 1
                if p.wait() and not self.chroot_near_chdir:
                    printWarning(pkg, 'binaryinfo-objdump-failed', file)
                    self.chroot_near_chdir = True  # avoid false positive

        else:
            self.readelf_error = True
            printWarning(pkg, 'binaryinfo-readelf-failed',
                         file, re.sub('\n.*', '', res[1]))

        try:
            with open(path, 'rb') as fobj:
                fobj.seek(-12, os.SEEK_END)
                self.tail = Pkg.b2s(fobj.read())
        except Exception as e:
            printWarning(pkg, 'binaryinfo-tail-failed %s: %s' % (file, e))

        # Undefined symbol and unused direct dependency checks make sense only
        # for installed packages.
        # skip debuginfo: https://bugzilla.redhat.com/190599
        if not is_ar and not is_debug and isinstance(pkg, Pkg.InstalledPkg):
            # We could do this with objdump, but it's _much_ simpler with ldd.
            res = Pkg.getstatusoutput(
                ('env', 'LC_ALL=C', 'ldd', '-d', '-r', path))
            if not res[0]:
                for l in res[1].splitlines():
                    undef = BinaryInfo.undef_regex.search(l)
                    if undef:
                        self.undef.append(undef.group(1))
                if self.undef:
                    cmd = self.undef[:]
                    cmd.insert(0, 'c++filt')
                    try:
                        res = Pkg.getstatusoutput(cmd)
                        if not res[0]:
                            self.undef = res[1].splitlines()
                    except:
                        pass
            else:
                printWarning(pkg, 'ldd-failed', file)
            res = Pkg.getstatusoutput(
                ('env', 'LC_ALL=C', 'ldd', '-r', '-u', path))
            if res[0]:
                # Either ldd doesn't grok -u (added in glibc 2.3.4) or we have
                # unused direct dependencies
                in_unused = False
                for l in res[1].splitlines():
                    if not l.rstrip():
                        pass
                    elif l.startswith('Unused direct dependencies'):
                        in_unused = True
                    elif in_unused:
                        unused = BinaryInfo.unused_regex.search(l)
                        if unused:
                            self.unused.append(unused.group(1))
                        else:
                            in_unused = False

Example 60

View license
    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage_no_ff(
            'https://www.dailymotion.com/video/%s' % video_id, video_id)

        age_limit = self._rta_search(webpage)

        description = self._og_search_description(webpage) or self._html_search_meta(
            'description', webpage, 'description')

        view_count_str = self._search_regex(
            (r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
             r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
            webpage, 'view count', fatal=False)
        if view_count_str:
            view_count_str = re.sub(r'\s', '', view_count_str)
        view_count = str_to_int(view_count_str)
        comment_count = int_or_none(self._search_regex(
            r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
            webpage, 'comment count', fatal=False))

        player_v5 = self._search_regex(
            [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
             r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
             r'buildPlayer\(({.+?})\);',
             r'var\s+config\s*=\s*({.+?});'],
            webpage, 'player v5', default=None)
        if player_v5:
            player = self._parse_json(player_v5, video_id)
            metadata = player['metadata']

            self._check_error(metadata)

            formats = []
            for quality, media_list in metadata['qualities'].items():
                for media in media_list:
                    media_url = media.get('url')
                    if not media_url:
                        continue
                    type_ = media.get('type')
                    if type_ == 'application/vnd.lumberjack.manifest':
                        continue
                    ext = mimetype2ext(type_) or determine_ext(media_url)
                    if ext == 'm3u8':
                        formats.extend(self._extract_m3u8_formats(
                            media_url, video_id, 'mp4', preference=-1,
                            m3u8_id='hls', fatal=False))
                    elif ext == 'f4m':
                        formats.extend(self._extract_f4m_formats(
                            media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
                    else:
                        f = {
                            'url': media_url,
                            'format_id': 'http-%s' % quality,
                            'ext': ext,
                        }
                        m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
                        if m:
                            f.update({
                                'width': int(m.group('width')),
                                'height': int(m.group('height')),
                            })
                        formats.append(f)
            self._sort_formats(formats)

            title = metadata['title']
            duration = int_or_none(metadata.get('duration'))
            timestamp = int_or_none(metadata.get('created_time'))
            thumbnail = metadata.get('poster_url')
            uploader = metadata.get('owner', {}).get('screenname')
            uploader_id = metadata.get('owner', {}).get('id')

            subtitles = {}
            subtitles_data = metadata.get('subtitles', {}).get('data', {})
            if subtitles_data and isinstance(subtitles_data, dict):
                for subtitle_lang, subtitle in subtitles_data.items():
                    subtitles[subtitle_lang] = [{
                        'ext': determine_ext(subtitle_url),
                        'url': subtitle_url,
                    } for subtitle_url in subtitle.get('urls', [])]

            return {
                'id': video_id,
                'title': title,
                'description': description,
                'thumbnail': thumbnail,
                'duration': duration,
                'timestamp': timestamp,
                'uploader': uploader,
                'uploader_id': uploader_id,
                'age_limit': age_limit,
                'view_count': view_count,
                'comment_count': comment_count,
                'formats': formats,
                'subtitles': subtitles,
            }

        # vevo embed
        vevo_id = self._search_regex(
            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
            webpage, 'vevo embed', default=None)
        if vevo_id:
            return self.url_result('vevo:%s' % vevo_id, 'Vevo')

        # fallback old player
        embed_page = self._download_webpage_no_ff(
            'https://www.dailymotion.com/embed/video/%s' % video_id,
            video_id, 'Downloading embed page')

        timestamp = parse_iso8601(self._html_search_meta(
            'video:release_date', webpage, 'upload date'))

        info = self._parse_json(
            self._search_regex(
                r'var info = ({.*?}),$', embed_page,
                'video info', flags=re.MULTILINE),
            video_id)

        self._check_error(info)

        formats = []
        for (key, format_id) in self._FORMATS:
            video_url = info.get(key)
            if video_url is not None:
                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
                if m_size is not None:
                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
                else:
                    width, height = None, None
                formats.append({
                    'url': video_url,
                    'ext': 'mp4',
                    'format_id': format_id,
                    'width': width,
                    'height': height,
                })
        self._sort_formats(formats)

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, webpage)

        title = self._og_search_title(webpage, default=None)
        if title is None:
            title = self._html_search_regex(
                r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
                'title')

        return {
            'id': video_id,
            'formats': formats,
            'uploader': info['owner.screenname'],
            'timestamp': timestamp,
            'title': title,
            'description': description,
            'subtitles': video_subtitles,
            'thumbnail': info['thumbnail_url'],
            'age_limit': age_limit,
            'view_count': view_count,
            'duration': info['duration']
        }

Example 61

Project: khard
Source File: config.py
View license
        def __init__(self):
            self.config = None
            self.address_book_list = []
            self.original_uid_dict = {}
            self.uid_dict = {}

            # set locale
            locale.setlocale(locale.LC_ALL, '')

            # load config file
            xdg_config_home = os.environ.get("XDG_CONFIG_HOME") or \
                os.path.expanduser("~/.config")
            config_file = os.environ.get("KHARD_CONFIG") or \
                os.path.join(xdg_config_home, "khard", "khard.conf")
            if not os.path.exists(config_file):
                print("Config file %s not available" % config_file)
                sys.exit(2)

            # parse config file contents
            try:
                self.config = configobj.ConfigObj(
                    config_file, interpolation=False)
            except configobj.ParseError as err:
                print("Error in config file\n%s" % err)
                sys.exit(2)

            # general settings
            if "general" not in self.config:
                print('Error in config file\n'
                      'Missing main section "[general]".')
                sys.exit(2)

            # debug
            if 'debug' not in self.config['general']:
                self.config['general']['debug'] = False
            elif self.config['general']['debug'] == "yes":
                self.config['general']['debug'] = True
            elif self.config['general']['debug'] == "no":
                self.config['general']['debug'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for debug parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # editor
            self.config['general']['editor'] = \
                self.config['general'].get("editor") \
                or os.environ.get("EDITOR")
            if self.config['general']['editor'] is None:
                print("Error in config file\n"
                      "Set path to your preferred text editor in khard's "
                      "config file or the $EDITOR shell variable\n"
                      "Example for khard.conf: editor = vim")
                sys.exit(2)
            self.config['general']['editor'] = find_executable(
                os.path.expanduser(self.config['general']['editor']))
            if self.config['general']['editor'] is None:
                print("Error in config file\n"
                      "Invalid editor path or executable not found.")
                sys.exit(2)

            # merge editor
            self.config['general']['merge_editor'] = \
                self.config['general'].get("merge_editor") \
                or os.environ.get("MERGE_EDITOR")
            if self.config['general']['merge_editor'] is None:
                print("Error in config file\nSet path to your preferred text "
                      "merge editor in khard's config file or the "
                      "$MERGE_EDITOR shell variable\n"
                      "Example for khard.conf: merge_editor = vimdiff")
                sys.exit(2)
            self.config['general']['merge_editor'] = find_executable(
                os.path.expanduser(self.config['general']['merge_editor']))
            if self.config['general']['merge_editor'] is None:
                print("Error in config file\n"
                      "Invalid merge editor path or executable not found.")
                sys.exit(2)

            # default action
            if "default_action" not in self.config['general']:
                print("Error in config file\n"
                      "Missing default action parameter.")
                sys.exit(2)
            elif self.config['general']['default_action'] not in \
                    Actions.get_list_of_all_actions():
                print("Error in config file\nInvalid value for default_action "
                      "parameter\nPossible values: %s" % ', '.join(
                          sorted(Actions.get_list_of_all_actions())))
                sys.exit(2)

            # contact table settings
            if "contact table" not in self.config:
                self.config['contact table'] = {}

            # sort contact table by first or last name
            if "sort" not in self.config['contact table']:
                self.config['contact table']['sort'] = "first_name"
            elif self.config['contact table']['sort'] not in \
                    ["first_name", "last_name"]:
                print("Error in config file\n"
                      "Invalid value for sort parameter\n"
                      "Possible values: first_name, last_name")
                sys.exit(2)

            # display names in contact table by first or last name
            if "display" not in self.config['contact table']:
                # if display by name attribute is not present in the config
                # file use the sort attribute value for backwards compatibility
                self.config['contact table']['display'] = \
                        self.config['contact table']['sort']
            elif self.config['contact table']['display'] not in \
                    ["first_name", "last_name"]:
                print("Error in config file\n"
                      "Invalid value for display parameter\n"
                      "Possible values: first_name, last_name")
                sys.exit(2)

            # reverse contact table
            if 'reverse' not in self.config['contact table']:
                self.config['contact table']['reverse'] = False
            elif self.config['contact table']['reverse'] == "yes":
                self.config['contact table']['reverse'] = True
            elif self.config['contact table']['reverse'] == "no":
                self.config['contact table']['reverse'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for reverse parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # group contact table by address book
            if "group_by_addressbook" not in self.config['contact table']:
                self.config['contact table']['group_by_addressbook'] = False
            elif self.config['contact table']['group_by_addressbook'] == "yes":
                self.config['contact table']['group_by_addressbook'] = True
            elif self.config['contact table']['group_by_addressbook'] == "no":
                self.config['contact table']['group_by_addressbook'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for group_by_addressbook parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # nickname
            if "show_nicknames" not in self.config['contact table']:
                self.config['contact table']['show_nicknames'] = False
            elif self.config['contact table']['show_nicknames'] == "yes":
                self.config['contact table']['show_nicknames'] = True
            elif self.config['contact table']['show_nicknames'] == "no":
                self.config['contact table']['show_nicknames'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for show_nicknames parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # show uids
            if "show_uids" not in self.config['contact table']:
                self.config['contact table']['show_uids'] = True
            elif self.config['contact table']['show_uids'] == "yes":
                self.config['contact table']['show_uids'] = True
            elif self.config['contact table']['show_uids'] == "no":
                self.config['contact table']['show_uids'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for show_uids parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # vcard settings
            if "vcard" not in self.config:
                self.config['vcard'] = {}

            # get supported private objects
            if "private_objects" not in self.config['vcard']:
                self.config['vcard']['private_objects'] = []
            else:
                # check if object only contains letters, digits or -
                for object in self.config['vcard']['private_objects']:
                    if object != re.sub("[^a-zA-Z0-9-]", "", object):
                        print("Error in config file\n"
                              "private object %s may only contain letters, "
                              "digits and the \"-\" character." % object)
                        sys.exit(2)
                    if object == re.sub("[^-]", "", object) \
                            or object.startswith("-") \
                            or object.endswith("-"):
                        print("Error in config file\n"
                              "A \"-\" in a private object label must be "
                              "at least surrounded by one letter or digit.")
                        sys.exit(2)

            # preferred vcard version
            if "preferred_version" not in self.config['vcard']:
                self.config['vcard']['preferred_version'] = "3.0"
            elif self.config['vcard']['preferred_version'] not in \
                    self.get_supported_vcard_versions():
                print("Error in config file\n"
                      "Invalid value for preferred_version parameter\n"
                      "Possible values: %s"
                      % self.get_supported_vcard_versions())
                sys.exit(2)

            # speed up program by pre-searching in the vcard source files
            if 'search_in_source_files' not in self.config['vcard']:
                self.config['vcard']['search_in_source_files'] = False
            elif self.config['vcard']['search_in_source_files'] == "yes":
                self.config['vcard']['search_in_source_files'] = True
            elif self.config['vcard']['search_in_source_files'] == "no":
                self.config['vcard']['search_in_source_files'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for search_in_source_files parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # skip unparsable vcards
            if 'skip_unparsable' not in self.config['vcard']:
                self.config['vcard']['skip_unparsable'] = False
            elif self.config['vcard']['skip_unparsable'] == "yes":
                self.config['vcard']['skip_unparsable'] = True
            elif self.config['vcard']['skip_unparsable'] == "no":
                self.config['vcard']['skip_unparsable'] = False
            else:
                print("Error in config file\n"
                      "Invalid value for skip_unparsable parameter\n"
                      "Possible values: yes, no")
                sys.exit(2)

            # load address books
            if "addressbooks" not in self.config:
                print('Error in config file\n'
                      'Missing main section "[addressbooks]".')
                sys.exit(2)
            if len(self.config['addressbooks'].keys()) == 0:
                print("Error in config file\n"
                      "No address book entries available.")
                sys.exit(2)
            for name in self.config['addressbooks'].keys():
                # create address book object
                try:
                    address_book = AddressBook(
                        name, self.config['addressbooks'][name]['path'])
                except KeyError as e:
                    print("Error in config file\n"
                          "Missing path to the \"%s\" address book." % name)
                    sys.exit(2)
                except IOError as e:
                    print("Error in config file\n%s" % e)
                    sys.exit(2)
                else:
                    # add address book to list
                    self.address_book_list.append(address_book)

Example 62

Project: SickRage
Source File: parser.py
View license
    def _parse_string(self, name):  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
        if not name:
            return

        matches = []
        bestResult = None

        for (cur_regex_num, cur_regex_name, cur_regex) in self.compiled_regexes:
            match = cur_regex.match(name)

            if not match:
                continue

            result = ParseResult(name)
            result.which_regex = [cur_regex_name]
            result.score = 0 - cur_regex_num

            named_groups = match.groupdict().keys()

            if 'series_name' in named_groups:
                result.series_name = match.group('series_name')
                if result.series_name:
                    result.series_name = self.clean_series_name(result.series_name)
                    result.score += 1

            if 'series_num' in named_groups and match.group('series_num'):
                result.score += 1

            if 'season_num' in named_groups:
                tmp_season = int(match.group('season_num'))
                if cur_regex_name == 'bare' and tmp_season in (19, 20):
                    continue
                result.season_number = tmp_season
                result.score += 1

            if 'ep_num' in named_groups:
                ep_num = self._convert_number(match.group('ep_num'))
                if 'extra_ep_num' in named_groups and match.group('extra_ep_num'):
                    result.episode_numbers = range(ep_num, self._convert_number(match.group('extra_ep_num')) + 1)
                    result.score += 1
                else:
                    result.episode_numbers = [ep_num]
                result.score += 3

            if 'ep_ab_num' in named_groups:
                ep_ab_num = self._convert_number(match.group('ep_ab_num'))
                if 'extra_ab_ep_num' in named_groups and match.group('extra_ab_ep_num'):
                    result.ab_episode_numbers = range(ep_ab_num,
                                                      self._convert_number(match.group('extra_ab_ep_num')) + 1)
                    result.score += 1
                else:
                    result.ab_episode_numbers = [ep_ab_num]
                result.score += 1

            if 'air_date' in named_groups:
                air_date = match.group('air_date')
                try:
                    assert re.sub(r'[^\d]*', '', air_date) != '112263'
                    result.air_date = dateutil.parser.parse(air_date, fuzzy_with_tokens=True)[0].date()
                    result.score += 1
                except Exception:
                    continue

            if 'extra_info' in named_groups:
                tmp_extra_info = match.group('extra_info')

                # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season
                if tmp_extra_info and cur_regex_name == 'season_only' and re.search(
                        r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I):
                    continue
                result.extra_info = tmp_extra_info
                result.score += 1

            if 'release_group' in named_groups:
                result.release_group = match.group('release_group')
                result.score += 1

            if 'version' in named_groups:
                # assigns version to anime file if detected using anime regex. Non-anime regex receives -1
                version = match.group('version')
                if version:
                    result.version = version
                else:
                    result.version = 1
            else:
                result.version = -1

            matches.append(result)

        if matches:
            # pick best match with highest score based on placement
            bestResult = max(sorted(matches, reverse=True, key=lambda x: x.which_regex), key=lambda x: x.score)

            show = None
            if not self.naming_pattern:
                # try and create a show object for this result
                show = helpers.get_show(bestResult.series_name, self.tryIndexers)

            # confirm passed in show object indexer id matches result show object indexer id
            if show:
                if self.showObj and show.indexerid != self.showObj.indexerid:
                    show = None
                bestResult.show = show
            elif not show and self.showObj:
                bestResult.show = self.showObj

            # if this is a naming pattern test or result doesn't have a show object then return best result
            if not bestResult.show or self.naming_pattern:
                return bestResult

            # get quality
            bestResult.quality = common.Quality.nameQuality(name, bestResult.show.is_anime)

            new_episode_numbers = []
            new_season_numbers = []
            new_absolute_numbers = []

            # if we have an air-by-date show then get the real season/episode numbers
            if bestResult.is_air_by_date:
                airdate = bestResult.air_date.toordinal()
                main_db_con = db.DBConnection()
                sql_result = main_db_con.select(
                    "SELECT season, episode FROM tv_episodes WHERE showid = ? and indexer = ? and airdate = ?",
                    [bestResult.show.indexerid, bestResult.show.indexer, airdate])

                season_number = None
                episode_numbers = []

                if sql_result:
                    season_number = int(sql_result[0][0])
                    episode_numbers = [int(sql_result[0][1])]

                if season_number is None or not episode_numbers:
                    try:
                        lINDEXER_API_PARMS = sickbeard.indexerApi(bestResult.show.indexer).api_params.copy()

                        lINDEXER_API_PARMS['language'] = bestResult.show.lang or sickbeard.INDEXER_DEFAULT_LANGUAGE

                        t = sickbeard.indexerApi(bestResult.show.indexer).indexer(**lINDEXER_API_PARMS)

                        epObj = t[bestResult.show.indexerid].airedOn(bestResult.air_date)[0]

                        season_number = int(epObj["seasonnumber"])
                        episode_numbers = [int(epObj["episodenumber"])]
                    except sickbeard.indexer_episodenotfound:
                        logger.log(u"Unable to find episode with date " + str(bestResult.air_date) + " for show " + bestResult.show.name + ", skipping", logger.WARNING)
                        episode_numbers = []
                    except sickbeard.indexer_error as e:
                        logger.log(u"Unable to contact " + sickbeard.indexerApi(bestResult.show.indexer).name + ": " + ex(e), logger.WARNING)
                        episode_numbers = []

                for epNo in episode_numbers:
                    s = season_number
                    e = epNo

                    if bestResult.show.is_scene:
                        (s, e) = scene_numbering.get_indexer_numbering(bestResult.show.indexerid,
                                                                       bestResult.show.indexer,
                                                                       season_number,
                                                                       epNo)
                    new_episode_numbers.append(e)
                    new_season_numbers.append(s)

            elif bestResult.show.is_anime and bestResult.ab_episode_numbers:
                scene_season = scene_exceptions.get_scene_exception_by_name(bestResult.series_name)[1]
                for epAbsNo in bestResult.ab_episode_numbers:
                    a = epAbsNo

                    if bestResult.show.is_scene:
                        a = scene_numbering.get_indexer_absolute_numbering(bestResult.show.indexerid,
                                                                           bestResult.show.indexer, epAbsNo,
                                                                           True, scene_season)

                    (s, e) = helpers.get_all_episodes_from_absolute_number(bestResult.show, [a])

                    new_absolute_numbers.append(a)
                    new_episode_numbers.extend(e)
                    new_season_numbers.append(s)

            elif bestResult.season_number and bestResult.episode_numbers:
                for epNo in bestResult.episode_numbers:
                    s = bestResult.season_number
                    e = epNo

                    if bestResult.show.is_scene:
                        (s, e) = scene_numbering.get_indexer_numbering(bestResult.show.indexerid,
                                                                       bestResult.show.indexer,
                                                                       bestResult.season_number,
                                                                       epNo)
                    if bestResult.show.is_anime:
                        a = helpers.get_absolute_number_from_season_and_episode(bestResult.show, s, e)
                        if a:
                            new_absolute_numbers.append(a)

                    new_episode_numbers.append(e)
                    new_season_numbers.append(s)

            # need to do a quick sanity check heregex.  It's possible that we now have episodes
            # from more than one season (by tvdb numbering), and this is just too much
            # for sickbeard, so we'd need to flag it.
            new_season_numbers = list(set(new_season_numbers))  # remove duplicates
            if len(new_season_numbers) > 1:
                raise InvalidNameException("Scene numbering results episodes from "
                                           "seasons %s, (i.e. more than one) and "
                                           "sickrage does not support this.  "
                                           "Sorry." % (str(new_season_numbers)))

            # I guess it's possible that we'd have duplicate episodes too, so lets
            # eliminate them
            new_episode_numbers = list(set(new_episode_numbers))
            new_episode_numbers.sort()

            # maybe even duplicate absolute numbers so why not do them as well
            new_absolute_numbers = list(set(new_absolute_numbers))
            new_absolute_numbers.sort()

            if new_absolute_numbers:
                bestResult.ab_episode_numbers = new_absolute_numbers

            if new_season_numbers and new_episode_numbers:
                bestResult.episode_numbers = new_episode_numbers
                bestResult.season_number = new_season_numbers[0]

            if bestResult.show.is_scene:
                logger.log(
                    u"Converted parsed result " + bestResult.original_name + " into " + str(bestResult).decode('utf-8',
                                                                                                               'xmlcharrefreplace'),
                    logger.DEBUG)

        # CPU sleep
        time.sleep(0.02)

        return bestResult

Example 63

Project: nrvr-commander
Source File: ssh.py
View license
    def __init__(self,
                 fromPath, toPath,
                 fromSshParameters=None, toSshParameters=None,
                 recurseDirectories=False,
                 preserveTimes=True):
        """Create new ScpCommand instance.
        
        Will wait until completed.
        
        Captures returncode, and output.
        
        Either fromPath or toPath is expected to be local, i.e. without user and without IP address.
        Correspondingly either fromSshParameters or toSshParameters must NOT be assigned an SshParameters
        instance and remain default None.
        
        fromPath
            one path or a list of paths.
            
            Absolute paths strongly recommended.
        
        toPath
            one path.
            
            Absolute path strongly recommended.
            
            Must be directory if more than one fromPath.
        
        fromSshParameters
            an SshParameters instance.
        
        toSshParameters
            an SshParameters instance.
        
        recurseDirectories
            a hint for when fromSshParameters."""
        if not _gotPty:
            # cannot use scp if no pty
            raise Exception("must have module pty available to use scp command"
                            ", which is known to be available in Python 2.6 on Linux, but not on Windows")
        #
        if fromSshParameters and toSshParameters:
            raise Exception("cannot copy if both fromSshParameters and toSshParameters, only one or other")
        if not fromSshParameters and not toSshParameters:
            raise Exception("cannot copy if neither fromSshParameters nor toSshParameters, requires one or other")
        #
        if not isinstance(fromPath, (list, tuple)): # should be one string for one path to copy from
            fromPaths = [fromPath]
        else: # should be a list of strings for multiple paths to copy from
            fromPaths = fromPath
        if len(fromPaths) == 0:
            raise Exception("cannot copy zero files, requires at least one")
        if fromSshParameters: # get files from remote
            if len(fromPaths) > 1 or recurseDirectories:
                if not os.path.isdir(toPath):
                    raise Exception("cannot copy multiple files into a file, must copy into a directory, not into %s" % toPath)
            self._fromSpecification = \
                [fromSshParameters.user + "@" + IPAddress.asString(fromSshParameters.ipaddress) + ":" + " ".join(fromPaths)]
            self._toSpecification = toPath
            self._ipaddress = fromSshParameters.ipaddress
            self._pwd = fromSshParameters.pwd
        else: # put files to remote
            anyFromDirectory = False
            for path in fromPaths:
                if os.path.isdir(path):
                    anyFromDirectory = True
                    break
            if anyFromDirectory:
                recurseDirectories = True # mandatory in this case
            self._fromSpecification = fromPaths
            self._toSpecification = \
                toSshParameters.user + "@" + IPAddress.asString(toSshParameters.ipaddress) + ":" + toPath
            self._ipaddress = toSshParameters.ipaddress
            self._pwd = toSshParameters.pwd
        self._args = ["scp"]
        if preserveTimes:
            self._args.append("-p")
        if recurseDirectories:
            self._args.append("-r")
        self._args.extend(self._fromSpecification) # a list because possibly more than one
        self._args.append(self._toSpecification)
        #
        self._output = ""
        self._returncode = None
        #
        # fork and connect child to a pseudo-terminal
        self._pid, self._fd = pty.fork()
        if self._pid == 0:
            # in child process
            os.execvp("scp", self._args)
        else:
            # in parent process
            if self._pwd:
                # if given a password then apply
                promptedForPassword = False
                outputTillPrompt = ""
                # look for password prompt
                while not promptedForPassword:
                    try:
                        newOutput = os.read(self._fd, 1024)
                        if not len(newOutput):
                            # end has been reached
                            # was raise Exception("unexpected end of output from scp")
                            raise Exception("failing to connect for scp\n" + 
                                            outputTillPrompt)
                        # ssh has been observed returning "\r\n" for newline, but we want "\n"
                        newOutput = SshCommand._crLfRegex.sub("\n", newOutput)
                        outputTillPrompt += newOutput
                        if SshCommand._acceptPromptRegex.search(outputTillPrompt):
                            # e.g. "Are you sure you want to continue connecting (yes/no)? "
                            raise Exception("cannot proceed unless having accepted host key\n" +
                                            outputTillPrompt +
                                            '\nE.g. invoke SshCommand.acceptKnownHostKey(SshParameters("{0}",user,pwd)).'.format(self._ipaddress))
                        if SshCommand._pwdPromptRegex.search(outputTillPrompt):
                            # e.g. "10.123.45.67's password: "
                            promptedForPassword = True
                    except EnvironmentError:
                        # e.g. "@    WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!     @" and closing
                        raise Exception("failing to connect for scp\n" + 
                                        outputTillPrompt)
                os.write(self._fd, self._pwd + "\n")
            # look for output
            endOfOutput = False
            outputSincePrompt = ""
            try:
                while not endOfOutput:
                    try:
                        newOutput = os.read(self._fd, 1024)
                        if len(newOutput):
                            outputSincePrompt += newOutput
                        else:
                            # end has been reached
                            endOfOutput = True
                    except EnvironmentError as e:
                        # some ideas maybe at http://bugs.python.org/issue5380
                        if e.errno == 5: # errno.EIO:
                            # seen when pty closes OSError: [Errno 5] Input/output error
                            endOfOutput = True
                        else:
                            # we accept what we got so far, for now
                            endOfOutput = True
            finally:
                # remove any leading space (maybe there after "password:" prompt) and
                # remove first newline (is there after entering password and "\n")
                self._output = re.sub(r"^\s*?\n(.*)$", r"\1", outputSincePrompt)
                #
                # get returncode
                try:
                    ignorePidAgain, waitEncodedStatusIndication = os.waitpid(self._pid, 0)
                    if os.WIFEXITED(waitEncodedStatusIndication):
                        # normal exit(status) call
                        self._returncode = os.WEXITSTATUS(waitEncodedStatusIndication)
                        # raise an exception if there is a reason
                        exceptionMessage = ""
                        if self._returncode:
                            exceptionMessage += "returncode: " + str(self._returncode)
                        if exceptionMessage:
                            commandDescription = "scp from:\n\t" + str(self._fromSpecification)
                            commandDescription += "\nto:\n\t" + self._toSpecification
                            commandDescription += "\nargs:\n\t" + str(self._args)
                            exceptionMessage = commandDescription + "\n" + exceptionMessage
                            exceptionMessage += "\noutput:\n" + self._output
                            raise ScpCommandException(exceptionMessage)
                    else:
                        # e.g. os.WIFSIGNALED or os.WIFSTOPPED
                        self._returncode = -1
                        raise ScpCommandException("scp did not exit normally")
                except OSError:
                    # supposedly can occur
                    self._returncode = -1
                    raise ScpCommandException("scp did not exit normally")

Example 64

Project: utter-pool
Source File: __init__.py
View license
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
            parse_email=False, tokenizer=HTMLSanitizer):
    """Convert URL-like strings in an HTML fragment to links.

    linkify() converts strings that look like URLs or domain names in a
    blob of text that may be an HTML fragment to links, while preserving
    (a) links already in the string, (b) urls found in attributes, and
    (c) email addresses.
    """
    text = force_unicode(text)

    if not text:
        return u''

    parser = html5lib.HTMLParser(tokenizer=tokenizer)

    forest = parser.parseFragment(text)

    def replace_nodes(tree, new_frag, node):
        new_tree = parser.parseFragment(new_frag)
        for n in new_tree.childNodes:
            # Prevent us from re-parsing links new links as existing links.
            if n.name == 'a':
                n._seen = True
            tree.insertBefore(n, node)
        tree.removeChild(node)
        # Return the number of new nodes.
        return len(new_tree.childNodes) - 1

    def strip_wrapping_parentheses(fragment):
        """Strips wrapping parentheses.

        Returns a tuple of the following format::

            (string stripped from wrapping parentheses,
             count of stripped opening parentheses,
             count of stripped closing parentheses)
        """
        opening_parentheses = closing_parentheses = 0
        # Count consecutive opening parentheses
        # at the beginning of the fragment (string).
        for char in fragment:
            if char == '(':
                opening_parentheses += 1
            else:
                break

        if opening_parentheses:
            newer_frag = ''
            # Cut the consecutive opening brackets from the fragment.
            fragment = fragment[opening_parentheses:]
            # Reverse the fragment for easier detection of parentheses
            # inside the URL.
            reverse_fragment = fragment[::-1]
            skip = False
            for char in reverse_fragment:
                # Remove the closing parentheses if it has a matching
                # opening parentheses (they are balanced).
                if (char == ')' and
                        closing_parentheses < opening_parentheses and
                        not skip):
                    closing_parentheses += 1
                    continue
                # Do not remove ')' from the URL itself.
                elif char != ')':
                    skip = True
                newer_frag += char
            fragment = newer_frag[::-1]

        return fragment, opening_parentheses, closing_parentheses

    def apply_callbacks(attrs, new):
        for cb in callbacks:
            attrs = cb(attrs, new)
            if attrs is None:
                return None
        return attrs

    def linkify_nodes(tree, parse_text=True):
        # I know this isn't Pythonic, but we're sometimes mutating
        # tree.childNodes, which ends up breaking the loop and causing us to
        # reparse code.
        children = len(tree.childNodes)
        current = 0  # A pointer to the "current" node.
        while current < children:
            node = tree.childNodes[current]
            if node.type == NODE_TEXT and parse_text:
                new_frag = _render(node)
                # Look for email addresses?
                if parse_email:
                    new_frag = re.sub(email_re, email_repl, new_frag)
                    if new_frag != _render(node):
                        adj = replace_nodes(tree, new_frag, node)
                        children += adj
                        current += adj
                        linkify_nodes(tree)
                        continue
                new_frag = re.sub(url_re, link_repl, new_frag)
                if new_frag != _render(node):
                    adj = replace_nodes(tree, new_frag, node)
                    children += adj
                    current += adj
            elif node.name == 'a' and not getattr(node, '_seen', False):
                if 'href' in node.attributes:
                    attrs = node.attributes
                    _text = attrs['_text'] = ''.join(_render(c) for
                                                     c in node.childNodes)
                    attrs = apply_callbacks(attrs, False)
                    if attrs is not None:
                        text = force_unicode(attrs.pop('_text'))
                        node.attributes = attrs
                        for n in node.childNodes:
                            node.removeChild(n)
                        node.insertText(text)
                        node._seen = True
                    else:
                        replace_nodes(tree, _text, node)
            elif skip_pre and node.name == 'pre':
                linkify_nodes(node, False)
            elif not getattr(node, '_seen', False):
                linkify_nodes(node)
            current += 1

    def email_repl(match):
        addr = match.group(0).replace('"', '&quot;')
        link = {
            '_text': addr,
            'href': 'mailto:%s' % addr,
        }
        link = apply_callbacks(link, True)

        if link is None:
            return addr

        _href = link.pop('href')
        _text = link.pop('_text')

        repl = '<a href="%s" %s>%s</a>'
        attribs = ' '.join('%s="%s"' % (k, v) for k, v in link.items())
        return repl % (_href, attribs, _text)

    def link_repl(match):
        url = match.group(0)
        open_brackets = close_brackets = 0
        if url.startswith('('):
            url, open_brackets, close_brackets = (
                    strip_wrapping_parentheses(url)
            )
        end = u''
        m = re.search(punct_re, url)
        if m:
            end = m.group(0)
            url = url[0:m.start()]
        if re.search(proto_re, url):
            href = url
        else:
            href = u''.join([u'http://', url])

        link = {
            '_text': url,
            'href': href,
        }

        link = apply_callbacks(link, True)

        if link is None:
            return url

        _text = link.pop('_text')
        _href = link.pop('href')

        repl = u'%s<a href="%s" %s>%s</a>%s%s'
        attribs = ' '.join('%s="%s"' % (k, v) for k, v in link.items())

        return repl % ('(' * open_brackets,
                       _href, attribs, _text, end,
                       ')' * close_brackets)

    try:
        linkify_nodes(forest)
    except (RECURSION_EXCEPTION), e:
        # If we hit the max recursion depth, just return what we've got.
        log.error('Probable recursion error: %r' % e, exc_info=sys.exc_info())

    return _render(forest)

Example 65

Project: tahoe-lafs
Source File: fixups.py
View license
def initialize():
    global done
    import sys
    if sys.platform != "win32" or done:
        return True
    done = True

    import codecs, re
    from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error
    from ctypes.wintypes import BOOL, HANDLE, DWORD, UINT, LPWSTR, LPCWSTR, LPVOID

    from allmydata.util import log
    from allmydata.util.encodingutil import canonical_encoding

    # <https://msdn.microsoft.com/en-us/library/ms680621%28VS.85%29.aspx>
    SetErrorMode = WINFUNCTYPE(
        UINT,  UINT,
        use_last_error=True
    )(("SetErrorMode", windll.kernel32))

    SEM_FAILCRITICALERRORS = 0x0001
    SEM_NOOPENFILEERRORBOX = 0x8000

    SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX)

    original_stderr = sys.stderr

    # If any exception occurs in this code, we'll probably try to print it on stderr,
    # which makes for frustrating debugging if stderr is directed to our wrapper.
    # So be paranoid about catching errors and reporting them to original_stderr,
    # so that we can at least see them.
    def _complain(message):
        print >>original_stderr, isinstance(message, str) and message or repr(message)
        log.msg(message, level=log.WEIRD)

    # Work around <http://bugs.python.org/issue6058>.
    codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)

    # Make Unicode console output work independently of the current code page.
    # This also fixes <http://bugs.python.org/issue1602>.
    # Credit to Michael Kaplan <https://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx>
    # and TZOmegaTZIOY
    # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
    try:
        # <https://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
        # HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
        # returns INVALID_HANDLE_VALUE, NULL, or a valid handle
        #
        # <https://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>
        # DWORD WINAPI GetFileType(DWORD hFile);
        #
        # <https://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
        # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);

        GetStdHandle = WINFUNCTYPE(
            HANDLE,  DWORD,
            use_last_error=True
        )(("GetStdHandle", windll.kernel32))

        STD_OUTPUT_HANDLE = DWORD(-11)
        STD_ERROR_HANDLE  = DWORD(-12)

        GetFileType = WINFUNCTYPE(
            DWORD,  DWORD,
            use_last_error=True
        )(("GetFileType", windll.kernel32))

        FILE_TYPE_CHAR   = 0x0002
        FILE_TYPE_REMOTE = 0x8000

        GetConsoleMode = WINFUNCTYPE(
            BOOL,  HANDLE, POINTER(DWORD),
            use_last_error=True
        )(("GetConsoleMode", windll.kernel32))

        INVALID_HANDLE_VALUE = DWORD(-1).value

        def not_a_console(handle):
            if handle == INVALID_HANDLE_VALUE or handle is None:
                return True
            return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
                    or GetConsoleMode(handle, byref(DWORD())) == 0)

        old_stdout_fileno = None
        old_stderr_fileno = None
        if hasattr(sys.stdout, 'fileno'):
            old_stdout_fileno = sys.stdout.fileno()
        if hasattr(sys.stderr, 'fileno'):
            old_stderr_fileno = sys.stderr.fileno()

        STDOUT_FILENO = 1
        STDERR_FILENO = 2
        real_stdout = (old_stdout_fileno == STDOUT_FILENO)
        real_stderr = (old_stderr_fileno == STDERR_FILENO)

        if real_stdout:
            hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
            if not_a_console(hStdout):
                real_stdout = False

        if real_stderr:
            hStderr = GetStdHandle(STD_ERROR_HANDLE)
            if not_a_console(hStderr):
                real_stderr = False

        if real_stdout or real_stderr:
            # <https://msdn.microsoft.com/en-us/library/windows/desktop/ms687401%28v=vs.85%29.aspx>
            # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,
            #                           LPDWORD lpCharsWritten, LPVOID lpReserved);

            WriteConsoleW = WINFUNCTYPE(
                BOOL,  HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID,
                use_last_error=True
            )(("WriteConsoleW", windll.kernel32))

            class UnicodeOutput:
                def __init__(self, hConsole, stream, fileno, name):
                    self._hConsole = hConsole
                    self._stream = stream
                    self._fileno = fileno
                    self.closed = False
                    self.softspace = False
                    self.mode = 'w'
                    self.encoding = 'utf-8'
                    self.name = name
                    if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8':
                        log.msg("%s: %r had encoding %r, but we're going to write UTF-8 to it" %
                                (name, stream, stream.encoding), level=log.CURIOUS)
                    self.flush()

                def isatty(self):
                    return False
                def close(self):
                    # don't really close the handle, that would only cause problems
                    self.closed = True
                def fileno(self):
                    return self._fileno
                def flush(self):
                    if self._hConsole is None:
                        try:
                            self._stream.flush()
                        except Exception, e:
                            _complain("%s.flush: %r from %r" % (self.name, e, self._stream))
                            raise

                def write(self, text):
                    try:
                        if self._hConsole is None:
                            if isinstance(text, unicode):
                                text = text.encode('utf-8')
                            self._stream.write(text)
                        else:
                            if not isinstance(text, unicode):
                                text = str(text).decode('utf-8')
                            remaining = len(text)
                            while remaining > 0:
                                n = DWORD(0)
                                # There is a shorter-than-documented limitation on the length of the string
                                # passed to WriteConsoleW (see #1232).
                                retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)
                                if retval == 0:
                                    raise IOError("WriteConsoleW failed with WinError: %s" % (WinError(get_last_error()),))
                                if n.value == 0:
                                    raise IOError("WriteConsoleW returned %r, n.value = 0" % (retval,))
                                remaining -= n.value
                                if remaining == 0: break
                                text = text[n.value:]
                    except Exception, e:
                        _complain("%s.write: %r" % (self.name, e))
                        raise

                def writelines(self, lines):
                    try:
                        for line in lines:
                            self.write(line)
                    except Exception, e:
                        _complain("%s.writelines: %r" % (self.name, e))
                        raise

            if real_stdout:
                sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')
            else:
                sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')

            if real_stderr:
                sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')
            else:
                sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')
    except Exception, e:
        _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))

    # This works around <http://bugs.python.org/issue2128>.

    # <https://msdn.microsoft.com/en-us/library/windows/desktop/ms683156%28v=vs.85%29.aspx>
    GetCommandLineW = WINFUNCTYPE(
        LPWSTR,
        use_last_error=True
    )(("GetCommandLineW", windll.kernel32))

    # <https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391%28v=vs.85%29.aspx>
    CommandLineToArgvW = WINFUNCTYPE(
        POINTER(LPWSTR),  LPCWSTR, POINTER(c_int),
        use_last_error=True
    )(("CommandLineToArgvW", windll.shell32))

    argc = c_int(0)
    argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
    if argv_unicode is None:
        raise WinError(get_last_error())

    # Because of <http://bugs.python.org/issue8775> (and similar limitations in
    # twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments.
    # Instead it "mangles" or escapes them using \x7F as an escape character, which we
    # unescape here.
    def unmangle(s):
        return re.sub(ur'\x7F[0-9a-fA-F]*\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s)

    try:
        argv = [unmangle(argv_unicode[i]).encode('utf-8') for i in xrange(0, argc.value)]
    except Exception, e:
        _complain("%s:  could not unmangle Unicode arguments.\n%r"
                  % (sys.argv[0], [argv_unicode[i] for i in xrange(0, argc.value)]))
        raise

    # Take only the suffix with the same number of arguments as sys.argv.
    # This accounts for anything that can cause initial arguments to be stripped,
    # for example, the Python interpreter or any options passed to it, or runner
    # scripts such as 'coverage run'. It works even if there are no such arguments,
    # as in the case of a frozen executable created by bb-freeze or similar.

    sys.argv = argv[-len(sys.argv):]
    if sys.argv[0].endswith('.pyscript'):
        sys.argv[0] = sys.argv[0][:-9]

Example 66

Project: jcvi
Source File: ahrd.py
View license
def fix_text(s, ignore_sym_pat=False):

    if not ignore_sym_pat:
        # Fix descriptions like D7TDB1 (
        s = re.sub("([A-Z0-9]){6} \(", "", s)
        s = s.split(";")[0]

    # Fix parantheses containing names
    s = s.translate(None, "[]")
    s = s.replace("(-)", "[-]")
    s = s.replace("(+)", "[+]")
    s = s.replace("(Uncharacterized protein)", "")
    if not ignore_sym_pat:
        s = s.translate(None, "()")

    # fix minor typos, seen in `autonaming` output
    # change 'protei ' to 'protein '
    # change 'hypthetical' to 'hypothetical'
    # fix string starting with 'ytochrome'
    if 'protei ' in s: s = s.replace('protei ', 'protein ')
    if 'hypthetical' in s: s = s.replace('hypthetical', 'hypothetical')
    if s.startswith('ytochrome'): s = s.replace('ytochrome', 'cytochrome')

    # before trimming off at the first ";", check if name has glycosidic
    # linkage information (e.g 1,3 or 1,4). If so, also check if multiple
    # linkages are separated by ";". If so, replace ";" by "-"
    m = re.findall(glycosidic_link_pat, s)
    if m and ";" in s:
        s = re.sub(";\s*", "-", s)

    # remove underscore from description
    s = re.sub("_", " ", s)

    # Cellular locations
    # Any word that matches e.g. AT5G54690
    # Any word that matches e.g. Os02g0234800
    # (fragment)
    # UPF
    # Remove 'DDB_G\d+' ID
    # '_At[0-9]+g[0-9]+' to ''
    for pat in (loc_pat, osg_pat, frag_pat, upf_pat, ddb_pat):
        # below is a hack since word boundaries don't work on /
        s = s.strip() + " "
        s = re.sub(pat, "", s)

    # &apos;? => '
    s = re.sub(apos_pat, "'", s)
    # &gt => none
    s = re.sub(gt_pat, "", s)
    # reduce runs such as -- '''
    s = re.sub(r"[-]+", "-", s)
    s = re.sub(r"[']+", "'", s)

    s = s.strip()

    # -like to -like protein
    s = re.sub(like_pat, "-like protein", s)

    # 'repeat$' to 'repeat protein'
    if re.search(repeat_pat, s):
        s += "-containing protein"

    # 'binding$' to 'binding protein'
    if re.search(binding_pat, s):
        s += " protein"
        if re.match(Protein_pat, s):
            s = re.sub(Protein_pat, "", s)

    # 'domain$' to 'domain-containing protein'
    if re.search(domain_pat, s):
        s += "-containing protein"
        if re.search(r"-domain", s):
            s = re.sub(r"-domain", " domain", s)
        if re.match(Protein_pat, s):
            s = re.sub(Protein_pat, "", s)

    # 'related$' to '-like protein'
    if re.search(related_pat, s):
        s = re.sub(related_pat, "-like protein", s)
        if re.match(Protein_pat, s) and not re.match(r"Protein kinase", s):
            s = re.sub(Protein_pat, "", s)

    # '[0-9]+ homolog' to '-like protein'
    if re.search(homolog_pat1, s):
        s = re.sub(homolog_pat1, "-like protein", s)
        if re.match(Protein_pat, s):
            s = re.sub(Protein_pat, "", s)

    # 'Protein\s+(.*)\s+homolog' to '$1-like protein'
    match = re.search(homolog_pat2, s)
    if match and not re.match(r"Protein kinase", s):
        ret = match.group(1)
        s = re.sub(homolog_pat2, ret + "-like protein", s)
        s = re.sub(r"^\s+", "", s)
        s = s.capitalize()

    # 'homolog protein' to '-like protein'
    # 'homologue$' to '-like protein'
    # 'homolog$' to '-like protein'
    for pat in (homolog_pat3, homolog_pat5, homolog_pat6):
        if re.search(pat, s):
            s = re.sub(pat, "-like protein", s)

    # 'Agenet domain-containing protein / bromo-adjacent homology (BAH) domain-containing protein'
    # to 'Agenet and bromo-adjacent homology (BAH) domain-containing protein'
    if re.search(agenet_pat, s):
        s = re.sub(agenet_pat, "Agenet and ", s)

    # plural to singular
    if re.search(plural_pat, s):
        if (s.find('biogenesis') == -1 and s.find('Topors') == -1) or (not re.search(with_and_pat, s)):
            s = re.sub(r"s$", "", s)

    # 'like_TBP' or 'likeTBP' to 'like TBP'
    if re.search(tbp_pat, s):
        s = re.sub(tbp_pat, "like TBP", s)

    # 'protein protein' to 'protein'
    if re.search(prot_pat, s):
        s = re.sub(prot_pat, "protein", s)

    # 'dimerisation' to 'dimerization'
    if re.search(dimer_pat, s):
        s = re.sub(dimer_pat, "dimerization", s)

    # Any AHRD that matches e.g. "AT5G54690-like protein"
    # Any AHRD that contains the words '^Belongs|^Encoded|^Expression|^highly'
    for pat in (atg_pat, athila_pat1):
        if re.search(pat, s):
            s = Unknown

    # remove 'arabidopsis[ thaliana]' and/or embedded Atg IDs
    for pat in (atg_id_pat, athila_pat2, athila_pat3, athila_pat4):
        # below is a hack since word boundaries don't work on /
        s = s.strip() + " "
        s = re.sub(pat, "", s)

    # remove "\s+LENGTH=\d+" from TAIR deflines
    if re.search(length_pat, s):
        s = re.sub(length_pat, "", s)

    # if name has a dot followed by a space (". ") in it and contains multiple
    # parts separated by a comma, strip name starting from first occurrence of ","
    if re.search(r"\. ", s):
        if re.search(r",", s):
            s = s.split(",")[0]

    # if name contains any of the disallowed words,
    # remove word occurrence from name
    # if name contains references to any other organism, trim name upto
    # that occurrence
    for pat in (disallow_pat, organism_pat):
        if re.search(pat, s):
            s = re.sub(pat, "", s)

    s = s.strip()

    if not ignore_sym_pat:
        # 'homolog \d+' to '-like protein'
        if re.search(homolog_pat4, s):
            s = re.sub(homolog_pat4, "", s)

        # Trailing protein numeric copy (e.g. Myb 1)
        if re.search(trail_pat, s):
            s = re.sub(trail_pat, "", s)

        # if name is entirely a gene symbol-like (all capital letters, maybe followed by numbers)
        # add a "-like protein" at the end
        if (re.search(sym_pat, s) or re.search(lc_sym_pat, s)) \
                and not re.search(spada_pat, s):
            s = s + "-like protein"

        # if gene symbol in parantheses at EOL, remove symbol
        if re.search(eol_sym_pat, s):
            s = re.sub(eol_sym_pat, "", s)

        # if name terminates at a symbol([^A-Za-z0-9_]), trim it off
        if re.search(r"\W{1,}$", s) and not re.search(r"\)$", s):
            s = re.sub("\W{1,}$", "", s)

        if "uncharacterized" in s:
            s = "uncharacterized protein"

    # change sulfer to sulfur
    if re.search(sulfer_pat, s):
        s = re.sub(sulfer_pat, "sulfur", s)

    # change sulph to sulf
    if re.search(sulph_pat, s):
        s = re.sub(sulph_pat, "sulf", s)

    # change monoxy to monooxy
    if re.search(monoxy_pat, s):
        s = re.sub(monoxy_pat, "monooxy", s)

    # change proteine to protein
    if re.search(proteine_pat, s):
        s = re.sub(proteine_pat, "protein", s)

    # change signalling to signaling
    if re.search(signalling_pat, s):
        s = re.sub(signalling_pat, "signaling", s)

    # change aluminium to aluminum
    if re.search(aluminium_pat, s):
        s = re.sub(aluminium_pat, "aluminum", s)

    # change haem to heme
    if re.search(haem_pat, s):
        s = re.sub(haem_pat, "heme", s)

    # chage haemo to hemo
    if re.search(haemo_pat, s):
        s = re.sub(haemo_pat, "hemo", s)

    # change assessory to accessory
    if re.search(assessory_pat, s):
        s = re.sub(assessory_pat, "accessory", s)

    # change -ise/-ised/-isation to -ize/-ized/-ization
    match = re.search(ise_pat, s)
    if match:
        ret = match.group(1)
        if match.group(2):
            suff = match.group(2)
            s = re.sub(ise_pat, "{0}ize{1}".format(ret, suff), s)
        else:
            s = re.sub(ise_pat, "{0}ize".format(ret), s)

    match = re.search(isation_pat, s)
    if match:
        ret = match.group(1)
        s = re.sub(isation_pat, "{0}ization".format(ret), s)

    # change -bre to -ber
    match = re.search(bre_pat, s)
    if match:
        ret = match.group(1)
        s = re.sub(bre_pat, "{0}ber".format(ret), s)

    if not s.startswith(Hypothetical):
        # 'Candidate|Hypothetical|Novel|Predicted|Possible|Probable|Uncharacterized' to 'Putative'
        if s.startswith('Uncharacterized') and any(pat in s for pat in ('UCP', 'UPF', 'protein')):
            pass
        else:
            if re.search(put_pat, s):
                s = re.sub(put_pat, "Putative", s)

    """
    case (qr/^Histone-lysine/) { $ahrd =~ s/,\s+H\d{1}\s+lysine\-\d+//gs; }
    """
    sl = s.lower()

    # Any mention of `clone` or `contig` is not informative
    if "clone" in sl or "contig" in sl:
        s = Unknown

    # All that's left is `protein` is not informative
    if sl in ("protein", "protein, putative", ""):
        s = Unknown

    if Unknown.lower() in sl:
        s = Unknown

    if "FUNCTIONS IN".lower() in sl and "unknown" in sl:
        s = Unknown

    if "LOCATED IN".lower() in sl:
        s = Unknown

    s = re.sub(r"[,]*\s+putative$", "", s)

    if s == Unknown or s.strip() == "protein":
        s = Hypothetical

    # Compact all spaces
    s = ' '.join(s.split())

    assert s.strip()

    return s

Example 67

Project: WIPSTER
Source File: crits.py
View license
def submit_to_crits(post_data, last_sample, crits_ta, savename=""):

    crits_result = {}
    crits_str_result = ""
    crits_upload_dict = {}
    data = {}
    final_data = {}
    search_res = {}

    for k, v in post_data.iteritems():
        if "chk" in k and v=="on":
            chk_input_key = re.sub("_chk", "", k)
            if chk_input_key in post_data : #Make sure there's an input that matches with the checkbox

                #Create or clear the dict if it already exists
                data, final_data = clear_upload_dicts(data, final_data)
                search_res.clear()

                if "_domain_" in chk_input_key:
                    data['type'] = "domain"
                elif "_ip_" in chk_input_key:
                    data['type'] = "ip"
                elif "_vt_" in chk_input_key:
                    data['type'] = "vt"
                elif "_command_" in chk_input_key:
                    data['type'] = "command"
                elif "_ua_" in chk_input_key:
                    data['type'] = "ua"
                else:
                    data['type'] = "event"

                data['val'] = post_data[chk_input_key]
                if not data['val']: #If the input is empty, check the next form box
                    continue

                if "ta_" in chk_input_key:
                    data['ta'] = True
                else:
                    data['ta'] = False

                # Search if the object already exists. If  it does, pull in the JSON, otherwise, add it to CRITs

                data['search'] = data['val']

                search_res = search_crits(data)

                # Set types for relationships later on
                if data['type'] == "domain":
                    crits_type = "Domain"
                elif data['type'] == "ip":
                    crits_type = "IP"
                else:
                    crits_type = "Event"


                if search_res['objects']: # If result found

                    if data['type'] not in crits_upload_dict: # If a list for that type does not yet exist, create it
                        crits_upload_dict[data['type']] = []
 
                    crits_upload_dict[data['type']].append({"id": search_res['objects'][0]['_id'],
                                                                     "type": crits_type})


                else: # If no result found in search, add it to CRITs

                    final_data = build_data(data, last_sample)
                    crits_upload_res = upload_object(final_data)
                    crits_str_result += "uploaded " + data['type'] + "\r\n\r\n" + str(crits_upload_res) + "\r\n\r\n*************\r\n\r\n"


                    if data['type'] not in crits_upload_dict:  # If a list of that type does not yet exist, create it
                        crits_upload_dict[data['type']] = []

                    crits_upload_dict[data['type']].append({"id": crits_upload_res['id'],
                                                            "type": crits_type})
                    

    #################################################
    #### Handle Uploading the Ticket as an Event ####
    #################################################

    #Create or clear the dict if it already exists
    data, final_data = clear_upload_dicts(data, final_data)
    search_res.clear()

    data['type'] = "ticket"

    # Search if the ticket already exists. If it does, pull in the JSON, otherwise, add it to CRITs
    data['search'] = last_sample.ticket
    search_res = search_crits(data)

    if 'objects' in search_res: # If an event with this Ticket # is found to exist, use its existing ID
        if search_res['objects']:
            crits_upload_dict['ticket'] = [{'id': search_res['objects'][0]['_id'],
                                            'type': 'Event'}]
#        crits_upload_dict['ticket'][0]['id'] = search_res['objects'][0]['_id']
#        crits_upload_dict['ticket'][0]['type'] = 'Event'
        else: # Otherwise, upload it
            final_data = build_data(data, last_sample)
            crits_upload_res = upload_object(final_data)

            crits_upload_dict['ticket'] = [{'id': crits_upload_res['id'],
                                            'type': 'Event'}]
            crits_str_result += "\r\nUploaded Ticket: " + str(crits_upload_dict['ticket'][0]) + "\r\n\r\n***********************\r\n"


    ############################################
    #### Handle uploading the sample itself ####
    ############################################

    #Create or clear the dict if it already exists
    data, final_data = clear_upload_dicts(data, final_data)
    search_res.clear()

    data['type'] = "sample"

    # Search if the sample already exists. If it does, pull the JSON, otherwise, add it to CRITs
    data['search'] = last_sample.md5
    search_res = search_crits(data)

    if 'objects' in search_res:
        if search_res['objects']:
            crits_upload_dict['sample'] = [{'id': search_res['objects'][0]['_id'],
                                            'type': 'Sample'}]

        else:
            # Need to handle renaming the sample to remove the .MAL when adding to CRITs
            # Before calling build_data()

            savename = "sanalysis/static/"+savename
            newname = threatanalyzer.remove_mal(savename) # Copy the file without .MAL - Removed later in main method
            
            final_data = build_data(data, last_sample, newname=newname)
            crits_upload_res = upload_object(final_data)

            rem_tmp = threatanalyzer.remove_tmp_file(newname) # Remove the copy of the file that doesn't have .MAL


            crits_upload_dict['sample'] = [{'id': crits_upload_res['id'],
                                            'type': 'Sample'}]

            crits_str_result += "\r\nUploaded Sample: \r\n" + str(crits_upload_dict['sample'][0]) + "\r\n\r\n****************\r\n\r\n"

    ########################################################
    #### Handle uploading metadata of any dropped files ####
    ########################################################
    
    if 'crits_dropped' in crits_ta:
        crits_upload_dict['sample_metadata'] = []
        for dropped in crits_ta['crits_dropped']:
        
            data, final_data = clear_upload_dicts(data, final_data)
            search_res.clear()
            
            data['type'] = "sample_metadata"
            data['val'] = dropped
            
            data['search'] = dropped['md5']
            search_res = search_crits(data)
            
            if 'objects' in search_res:
                if search_res['objects']:
                    crits_upload_dict['sample_metadata'].append({'id': search_res['objects'][0]['_id'],
                                                             'type': 'Sample'})
                                                         
                else:
                    final_data = build_data(data, last_sample)
                    crits_upload_res = upload_object(final_data)
                    
                    crits_upload_dict['sample_metadata'].append({'id': crits_upload_res['id'],
                                                             'type': 'Sample'})
                    crits_str_result += "\r\nUploaded Sample MetaData: \r\n" + str(crits_upload_dict['sample_metadata'][-1]) + "\r\n\r\n****************\r\n\r\n"



    ##################################
    #### Handle all relationships ####
    ##################################

    data, final_data = clear_upload_dicts(data, final_data) #Clear dicts

    crits_str_result += "\r\n\r\n****************crits_upload_dict*****************\r\n\r\n" + str(crits_upload_dict)

    relation_res = relate_objects(crits_upload_dict, last_sample)

    crits_str_result += "\r\n\r\n****************relation_res*****************\r\n\r\n" + str(relation_res)

    return crits_str_result

Example 68

View license
def _parse_single_sequence_example_raw(serialized,
                                       context_sparse_keys=None,
                                       context_sparse_types=None,
                                       context_dense_keys=None,
                                       context_dense_types=None,
                                       context_dense_defaults=None,
                                       context_dense_shapes=None,
                                       feature_list_sparse_keys=None,
                                       feature_list_sparse_types=None,
                                       feature_list_dense_keys=None,
                                       feature_list_dense_types=None,
                                       feature_list_dense_shapes=None,
                                       feature_list_dense_defaults=None,
                                       debug_name=None,
                                       name=None):
  """Parses a single `SequenceExample` proto.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
  with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
    context_dense_defaults = (
        {} if context_dense_defaults is None else context_dense_defaults)
    context_sparse_keys = (
        [] if context_sparse_keys is None else context_sparse_keys)
    context_sparse_types = (
        [] if context_sparse_types is None else context_sparse_types)
    context_dense_keys = (
        [] if context_dense_keys is None else context_dense_keys)
    context_dense_types = (
        [] if context_dense_types is None else context_dense_types)
    context_dense_shapes = (
        [[]] * len(context_dense_keys)
        if context_dense_shapes is None else context_dense_shapes)
    feature_list_sparse_keys = (
        [] if feature_list_sparse_keys is None else feature_list_sparse_keys)
    feature_list_sparse_types = (
        [] if feature_list_sparse_types is None else feature_list_sparse_types)
    feature_list_dense_keys = (
        [] if feature_list_dense_keys is None else feature_list_dense_keys)
    feature_list_dense_types = (
        [] if feature_list_dense_types is None else feature_list_dense_types)
    feature_list_dense_shapes = (
        [[]] * len(feature_list_dense_keys)
        if feature_list_dense_shapes is None else feature_list_dense_shapes)
    feature_list_dense_defaults = (
        dict() if feature_list_dense_defaults is None
        else feature_list_dense_defaults)
    debug_name = "" if debug_name is None else debug_name

    # Internal
    feature_list_dense_missing_assumed_empty = []

    num_context_dense = len(context_dense_keys)
    num_feature_list_dense = len(feature_list_dense_keys)
    num_context_sparse = len(context_sparse_keys)
    num_feature_list_sparse = len(feature_list_sparse_keys)

    if len(context_dense_shapes) != num_context_dense:
      raise ValueError(
          "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
          % (len(context_dense_shapes), num_context_dense))
    if len(context_dense_types) != num_context_dense:
      raise ValueError(
          "len(context_dense_types) != len(num_context_dense): %d vs. %d"
          % (len(context_dense_types), num_context_dense))
    if len(feature_list_dense_shapes) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
          "%d vs. %d" % (len(feature_list_dense_shapes),
                         num_feature_list_dense))
    if len(feature_list_dense_types) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_types) != len(num_feature_list_dense):"
          "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
    if len(context_sparse_types) != num_context_sparse:
      raise ValueError(
          "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
          % (len(context_sparse_types), num_context_sparse))
    if len(feature_list_sparse_types) != num_feature_list_sparse:
      raise ValueError(
          "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
          "%d vs. %d"
          % (len(feature_list_sparse_types), num_feature_list_sparse))
    if (num_context_dense + num_context_sparse
        + num_feature_list_dense + num_feature_list_sparse) == 0:
      raise ValueError(
          "Must provide at least one context_sparse key, context_dense key, "
          ", feature_list_sparse key, or feature_list_dense key")
    if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
      raise ValueError(
          "context_dense and context_sparse keys must not intersect; "
          "intersection: %s" %
          set(context_dense_keys).intersection(set(context_sparse_keys)))
    if not set(feature_list_dense_keys).isdisjoint(
        set(feature_list_sparse_keys)):
      raise ValueError(
          "feature_list_dense and feature_list_sparse keys must not intersect; "
          "intersection: %s" %
          set(feature_list_dense_keys).intersection(
              set(feature_list_sparse_keys)))
    if not isinstance(feature_list_dense_defaults, dict):
      raise TypeError("feature_list_dense_defaults must be a dict")
    for k, v in feature_list_dense_defaults.items():
      if v is not None:
        raise ValueError("Value feature_list_dense_defaults[%s] must be None"
                         % k)
      feature_list_dense_missing_assumed_empty.append(k)

    context_dense_defaults_vec = []
    for i, key in enumerate(context_dense_keys):
      default_value = context_dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=context_dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=context_dense_types[i], name=key_name)
        default_value = array_ops.reshape(
            default_value, context_dense_shapes[i])

      context_dense_defaults_vec.append(default_value)

    context_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                            for shape in context_dense_shapes]
    feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                                 for shape in feature_list_dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_single_sequence_example(
        serialized=serialized,
        debug_name=debug_name,
        context_dense_defaults=context_dense_defaults_vec,
        context_sparse_keys=context_sparse_keys,
        context_sparse_types=context_sparse_types,
        context_dense_keys=context_dense_keys,
        context_dense_shapes=context_dense_shapes,
        feature_list_sparse_keys=feature_list_sparse_keys,
        feature_list_sparse_types=feature_list_sparse_types,
        feature_list_dense_keys=feature_list_dense_keys,
        feature_list_dense_types=feature_list_dense_types,
        feature_list_dense_shapes=feature_list_dense_shapes,
        feature_list_dense_missing_assumed_empty=(
            feature_list_dense_missing_assumed_empty),
        name=name)
    # pylint: enable=protected-access

    (context_sparse_indices, context_sparse_values,
     context_sparse_shapes, context_dense_values,
     feature_list_sparse_indices, feature_list_sparse_values,
     feature_list_sparse_shapes, feature_list_dense_values) = outputs

    context_sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(context_sparse_indices,
               context_sparse_values,
               context_sparse_shapes)]

    feature_list_sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(feature_list_sparse_indices,
               feature_list_sparse_values,
               feature_list_sparse_shapes)]

    context_output = dict(
        zip(context_sparse_keys + context_dense_keys,
            context_sparse_tensors + context_dense_values))
    feature_list_output = dict(
        zip(feature_list_sparse_keys + feature_list_dense_keys,
            feature_list_sparse_tensors + feature_list_dense_values))

    return (context_output, feature_list_output)

Example 69

View license
def _parse_single_sequence_example_raw(serialized,
                                       context_sparse_keys=None,
                                       context_sparse_types=None,
                                       context_dense_keys=None,
                                       context_dense_types=None,
                                       context_dense_defaults=None,
                                       context_dense_shapes=None,
                                       feature_list_sparse_keys=None,
                                       feature_list_sparse_types=None,
                                       feature_list_dense_keys=None,
                                       feature_list_dense_types=None,
                                       feature_list_dense_shapes=None,
                                       feature_list_dense_defaults=None,
                                       debug_name=None,
                                       name=None):
  """Parses a single `SequenceExample` proto.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
  with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
    context_dense_defaults = (
        {} if context_dense_defaults is None else context_dense_defaults)
    context_sparse_keys = (
        [] if context_sparse_keys is None else context_sparse_keys)
    context_sparse_types = (
        [] if context_sparse_types is None else context_sparse_types)
    context_dense_keys = (
        [] if context_dense_keys is None else context_dense_keys)
    context_dense_types = (
        [] if context_dense_types is None else context_dense_types)
    context_dense_shapes = (
        [[]] * len(context_dense_keys)
        if context_dense_shapes is None else context_dense_shapes)
    feature_list_sparse_keys = (
        [] if feature_list_sparse_keys is None else feature_list_sparse_keys)
    feature_list_sparse_types = (
        [] if feature_list_sparse_types is None else feature_list_sparse_types)
    feature_list_dense_keys = (
        [] if feature_list_dense_keys is None else feature_list_dense_keys)
    feature_list_dense_types = (
        [] if feature_list_dense_types is None else feature_list_dense_types)
    feature_list_dense_shapes = (
        [[]] * len(feature_list_dense_keys)
        if feature_list_dense_shapes is None else feature_list_dense_shapes)
    feature_list_dense_defaults = (
        dict() if feature_list_dense_defaults is None
        else feature_list_dense_defaults)
    debug_name = "" if debug_name is None else debug_name

    # Internal
    feature_list_dense_missing_assumed_empty = []

    num_context_dense = len(context_dense_keys)
    num_feature_list_dense = len(feature_list_dense_keys)
    num_context_sparse = len(context_sparse_keys)
    num_feature_list_sparse = len(feature_list_sparse_keys)

    if len(context_dense_shapes) != num_context_dense:
      raise ValueError(
          "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
          % (len(context_dense_shapes), num_context_dense))
    if len(context_dense_types) != num_context_dense:
      raise ValueError(
          "len(context_dense_types) != len(num_context_dense): %d vs. %d"
          % (len(context_dense_types), num_context_dense))
    if len(feature_list_dense_shapes) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
          "%d vs. %d" % (len(feature_list_dense_shapes),
                         num_feature_list_dense))
    if len(feature_list_dense_types) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_types) != len(num_feature_list_dense):"
          "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
    if len(context_sparse_types) != num_context_sparse:
      raise ValueError(
          "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
          % (len(context_sparse_types), num_context_sparse))
    if len(feature_list_sparse_types) != num_feature_list_sparse:
      raise ValueError(
          "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
          "%d vs. %d"
          % (len(feature_list_sparse_types), num_feature_list_sparse))
    if (num_context_dense + num_context_sparse
        + num_feature_list_dense + num_feature_list_sparse) == 0:
      raise ValueError(
          "Must provide at least one context_sparse key, context_dense key, "
          ", feature_list_sparse key, or feature_list_dense key")
    if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
      raise ValueError(
          "context_dense and context_sparse keys must not intersect; "
          "intersection: %s" %
          set(context_dense_keys).intersection(set(context_sparse_keys)))
    if not set(feature_list_dense_keys).isdisjoint(
        set(feature_list_sparse_keys)):
      raise ValueError(
          "feature_list_dense and feature_list_sparse keys must not intersect; "
          "intersection: %s" %
          set(feature_list_dense_keys).intersection(
              set(feature_list_sparse_keys)))
    if not isinstance(feature_list_dense_defaults, dict):
      raise TypeError("feature_list_dense_defaults must be a dict")
    for k, v in feature_list_dense_defaults.items():
      if v is not None:
        raise ValueError("Value feature_list_dense_defaults[%s] must be None"
                         % k)
      feature_list_dense_missing_assumed_empty.append(k)

    context_dense_defaults_vec = []
    for i, key in enumerate(context_dense_keys):
      default_value = context_dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=context_dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=context_dense_types[i], name=key_name)
        default_value = array_ops.reshape(
            default_value, context_dense_shapes[i])

      context_dense_defaults_vec.append(default_value)

    context_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                            for shape in context_dense_shapes]
    feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                                 for shape in feature_list_dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_single_sequence_example(
        serialized=serialized,
        debug_name=debug_name,
        context_dense_defaults=context_dense_defaults_vec,
        context_sparse_keys=context_sparse_keys,
        context_sparse_types=context_sparse_types,
        context_dense_keys=context_dense_keys,
        context_dense_shapes=context_dense_shapes,
        feature_list_sparse_keys=feature_list_sparse_keys,
        feature_list_sparse_types=feature_list_sparse_types,
        feature_list_dense_keys=feature_list_dense_keys,
        feature_list_dense_types=feature_list_dense_types,
        feature_list_dense_shapes=feature_list_dense_shapes,
        feature_list_dense_missing_assumed_empty=(
            feature_list_dense_missing_assumed_empty),
        name=name)
    # pylint: enable=protected-access

    (context_sparse_indices, context_sparse_values,
     context_sparse_shapes, context_dense_values,
     feature_list_sparse_indices, feature_list_sparse_values,
     feature_list_sparse_shapes, feature_list_dense_values) = outputs

    context_sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(context_sparse_indices,
               context_sparse_values,
               context_sparse_shapes)]

    feature_list_sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(feature_list_sparse_indices,
               feature_list_sparse_values,
               feature_list_sparse_shapes)]

    context_output = dict(
        zip(context_sparse_keys + context_dense_keys,
            context_sparse_tensors + context_dense_values))
    feature_list_output = dict(
        zip(feature_list_sparse_keys + feature_list_dense_keys,
            feature_list_sparse_tensors + feature_list_dense_values))

    return (context_output, feature_list_output)

Example 70

Project: Glyphs-Scripts
Source File: Batch Metric Keys.py
View license
	def BatchMetricKeyMain( self, sender ):
		try:
			thisFont = Glyphs.font
			thisFontMaster = thisFont.selectedFontMaster
			listOfSelectedLayers = thisFont.selectedLayers
			fieldKey = self.w.keyTextField.get()
			flatFieldKey = re.sub("@Base", "@base", fieldKey)
	
			if "@base" in fieldKey or "@Base" in fieldKey:
				# Checks if a given layer has a metrics key of a glyph that has another key. Checks the glyph once and returns its name.
				def nestHuntL( targetGlyphName ):
					try:
						# Sees if the glyphName exists in the font
						if thisFont.glyphs[ targetGlyphName ]:
							# If exists, gets the left key of targetGlyph of the same layer
							targetGlyphL = thisFont.glyphs[ targetGlyphName ]
							targetLayerL = targetGlyphL.layers[ thisFontMaster.id ]
							targetLayerKeyL = targetLayerL.leftMetricsKeyUI()

							# If it's a plain number or calculation, returns the original glyph name
							a = ["=|", "+", "*", "/", "-1", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9"]
							if targetLayerKeyL[0].isdigit() or "-" in targetLayerKeyL[0] or any([ x in targetLayerKeyL for x in a]):
								return targetGlyphName
		
							# Finds the first component and returns its name
							elif "auto" in targetLayerKeyL:
								firstComponent = targetLayerL.components[0]
								return firstComponent.componentName
		
							# This is a single-letter key, so clean it up
							else:
								cleanGlyphName = re.sub( "=", "", targetLayerKeyL )
								cleanGlyphName = re.sub( " .*", "", cleanGlyphName )
								return cleanGlyphName
		
						# If the glyph doesn't exist:
						else:
							print "Found invalid LSB key while checking the key of %s" % thisGlyph.name
					except Exception, e:
						Glyphs.showMacroWindow()
						print "nestHuntL Error: %s" % e
	
				def nestHuntR( targetGlyphName ):
					try:
						# Sees if the glyphName exists in the font
						if thisFont.glyphs[ targetGlyphName ]:
							# If exists, gets the left key of targetGlyph of the same layer
							targetGlyphR = thisFont.glyphs[ targetGlyphName ]
							targetLayerR = targetGlyphR.layers[ thisFontMaster.id ]
							targetLayerKeyR = targetLayerR.rightMetricsKeyUI()
							# If it's a plain number or calculation, returns the original glyph name
							a = ["=|", "+", "*", "/", "-1", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9"]
							if targetLayerKeyR[0].isdigit() or "-" in targetLayerKeyR[0] or any([ x in targetLayerKeyR for x in a]):
								return targetGlyphName
		
							# Finds the last "Letter" component and returns its name
							elif "auto" in targetLayerKeyR:
								allCompornents = thisLayer.components 
								numOfCompornents = len(allCompornents)
								lastCompornent = allCompornents[numOfCompornents-1]
								lastCompornentName = lastCompornent.componentName
								lastCompornentGlyph = thisFont.glyphs[lastCompornentName]
								while lastCompornentGlyph.category != "Letter":
									numOfCompornents = numOfCompornents-1
									lastCompornent = allCompornents[numOfCompornents]
									lastCompornentName = lastCompornent.componentName
									lastCompornentGlyph = thisFont.glyphs[lastCompornentName]
								return lastCompornentName
		
							# This is a single-letter key, so clean it up
							else:
								cleanGlyphName = re.sub( "=", "", targetLayerKeyR )
								cleanGlyphName = re.sub( " .*", "", cleanGlyphName )
								return cleanGlyphName
		
						# If the glyph doesn't exist:
						else:
							print "Found invalid RSB key while checking the key of %s" % thisGlyph.name
					except Exception, e:
						Glyphs.showMacroWindow()
						print "nestHuntR Error: %s" % e
	
				# Set baseGlyphName for further nest hunting.
				for thisLayer in thisFont.selectedLayers:
					# Checks case of base glyph.
					thisGlyph = thisLayer.parent
					baseGlyphName = re.sub("\..*", "", thisGlyph.name)
					baseGlyphName = re.sub("superior", "", baseGlyphName)
					if "@Base" in fieldKey:
						baseGlyphName = baseGlyphName.capitalize()
						if thisGlyph.script == "latin" and re.match("Ij|Ae|Oe", baseGlyphName):
							baseGlyphName = baseGlyphName[0:2].upper() + baseGlyphName[2:]
							baseGlyphNameL = baseGlyphName
							baseGlyphNameR = baseGlyphName
					# Detects ligatures and sets baseGlyphNameL and R
					if "_" in baseGlyphName:
						baseGlyphNameL = re.sub("_.*", "", baseGlyphName)
						baseGlyphNameR = re.sub(".*_", "", baseGlyphName)
					elif "ordfeminine" in thisGlyph.name:
						baseGlyphNameL = "a"
						baseGlyphNameR = "a"
					elif "ordmasculine" in thisGlyph.name:
						baseGlyphNameL = "o"
						baseGlyphNameR = "o"
					else:
						baseGlyphNameL = baseGlyphName
						baseGlyphNameR = baseGlyphName
					thisFont.disableUpdateInterface()
					thisGlyph.beginUndo()	
	
					# Runs nestHuntL multiple times until it finds the final glyph,
					# and then set the final left metrics key.
					if self.w.applyL.get():
						if self.w.avoidNest:
							dummyOldL = nestHuntL(baseGlyphNameL)
							dummyNewL = nestHuntL(dummyOldL)
							while dummyOldL != dummyNewL:
								dummyOldL = nestHuntL(dummyNewL)
								dummyNewL = nestHuntL(dummyOldL)
							finalKeyL = re.sub("@base", dummyNewL, flatFieldKey)
							thisGlyph.setLeftMetricsKey_(finalKeyL)
	
					# Runs nestHuntR multiple times until it finds the final glyph,
					# and then set the final right metrics key.
					if self.w.applyR.get():
						if self.w.avoidNest:
							dummyOldR = nestHuntR(baseGlyphNameR)
							dummyNewR = nestHuntR(dummyOldR)
							while dummyOldR != dummyNewR:
								dummyOldR = nestHuntR(dummyNewR)
								dummyNewR = nestHuntR(dummyOldR)
							finalKeyR = re.sub("@base", dummyNewR, flatFieldKey)
		
							
							# Processes as normal
							if baseGlyphName != "Q":
								thisGlyph.setRightMetricsKey_(finalKeyR)
							# Uses width of the width of O of the same group
							elif baseGlyphName == "Q" and self.w.radioQ.get() == 0:
								Qbefore = thisGlyph.name
								Qname = re.sub("Q", "O", Qbefore)
								Qname = re.sub("q", "o", Qbefore)
								glyphO = thisFont.glyphs[Qname]
								numOfMasters = len(thisFont.masters)
								thisGlyph.setWidth_(thisOWidth)
							# Uses RSB as normal
							elif baseGlyphName == "Q" and self.w.radioQ.get() == 1:
								thisGlyph.setRightMetricsKey_(finalKeyR)
	
					thisGlyph.endUndo()
					thisFont.enableUpdateInterface()
				self.w.close()
	
			else:
				pass
				for thisLayer in listOfSelectedLayers:
					thisGlyph = thisLayer.parent
					thisFont.disableUpdateInterface()
					thisGlyph.beginUndo()	
					for i in thisGlyph.layers:
						if self.w.applyL.get():
							i.setLeftMetricsKey_(fieldKey)
						if self.w.applyR.get():
							i.setRightMetricsKey_(fieldKey)
					thisGlyph.endUndo()
					thisFont.enableUpdateInterface()
				self.w.close()
		except Exception, e:
			Glyphs.showMacroWindow()
			print "BatchMetricKeyMain Error: %s" % e

Example 71

Project: TARDIS
Source File: TARDIS.py
View license
def main(vulnerability,vulnObject,sourceIP,sourceHost):
	#Create results and working directories
	if not os.path.exists('Results'):
		os.makedirs('Results')
	if not os.path.exists('Working'):
		os.makedirs('Working')
	
	#Make sure the vulnerability is valid
	if vulnerability != "":
		vulnCheck=0
		resultCount=0
		logsource=''
		print("Searching for evidence of \"" + vulnerability + "\"")
		print("  Host: " + sourceIP)
		
		try:
			configFile = 'config.xml'
			tree = ET.parse(configFile)
			root = tree.getroot()
		except:
			sys.exit("Not a valid config XML file")
		for settings in root.findall("./log_source"):
			logsource=settings.text
		cnx = getDBConnector()
		
		
		#check if vulnerability/asset combo exists in assetVulnerability Table
		cursor = cnx.cursor()
		query = ("SELECT count(*) as count from assetVulnerabilities where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "'")
		
		cursor.execute(query)
		for row in cursor:
			vulnCheck=row[0]
		cursor.close()
		
		if vulnCheck==0:
			#No combination exists, write data to DB
			
			cursor = cnx.cursor()
			add_vulnInstance = ("INSERT INTO assetVulnerabilities "
               "(victim_ip, threat_id, active) "
               "VALUES (%s, %s, %s)")
			vulnData = (ip2long(sourceIP), vulnerability, '1')
			
			# Insert new entry
			cursor.execute(add_vulnInstance , vulnData )
			
			cnx.commit()
			cursor.close()
			cnx.close()
		searchStringResults= findStixObservables.run(vulnerability)
		isExploitFound=False
		searchStringCount=0
		operator=searchStringResults[0]
		numResults=0
		if(searchStringResults[1]=="No search file found"):
			searchResults="0"
			print("  No search file found\n")
		elif(searchStringResults[1]=="No supported observables found"):
			searchResults="0"
			print("  No supported observables found\n")
		else:
			#run  search...
			#search should return number of results
			#Insert source host from arguments
			for entry in searchStringResults:
				if logsource=="splunk":
					if (searchStringCount == 1):
						searchString=entry + " AND (host=\"" + sourceHost + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\")  | fields host, c_ip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\" | rename c_ip as clientip"
						numResults=splunk.searchVulnerability(searchString,vulnerability,sourceIP,sourceHost)
						if (numResults != "0"):
							data = json.load(numResults)
					
					if (operator=="AND"):
						if (searchStringCount > 1):
							resultCount=0
							for result in data["results"]:
								startTime =  dateutil.parser.parse(data["results"][resultCount]["_time"]) + datetime.timedelta(days =- 300)
								endTime =  dateutil.parser.parse(data["results"][resultCount]["_time"]) + datetime.timedelta(days = 300)
								searchString=entry + " AND (host=\"" + sourceHost + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\") | fields host, clientip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\""
								newResults=splunk.searchVulnerabilityTimeRange(searchString,vulnerability,sourceIP,sourceHost,startTime.isoformat(),endTime.isoformat())
								if (newResults != "0"):
									#This is the result from search 1
									newData = json.load(newResults)
									newResultCount=0
									for result in newData["results"]:
										try:
											clientip=newData["results"][newResultCount]["clientip"]
										except:
											clientip="0"
										isExploitFound=True
										#These are the results from any further results proving the AND condition
										cnx = getDBConnector()
										cursor = cnx.cursor()
										query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
										cursor.execute(query)
										for row in cursor:
											logCheck=row[0]
										cursor.close()
										if logCheck==0:
											#Write data to DB
											cursor = cnx.cursor()
											add_logInstance = ("INSERT INTO attackInventory "
																"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
																"VALUES (%s, %s, %s, %s, %s)")
											
											logData = (ip2long(sourceIP), ip2long(clientip), newData["results"][newResultCount]["_time"], newData["results"][newResultCount]["Raw Log"], vulnerability)
											# Insert new entry
											cursor.execute(add_logInstance , logData )
											cnx.commit()
											cursor.close()
										cnx.close()
										newResultCount=newResultCount+1
								else:
									newResultCount=0
							if (isExploitFound==True):
								try:
									clientip=data["results"][resultCount]["clientip"]
								except:
									clientip="0"
								cnx = getDBConnector()
								cursor = cnx.cursor()
								query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
								cursor.execute(query)
								for row in cursor:
									logCheck=row[0]
								cursor.close()
								if logCheck==0:
									#Write data to DB
									cursor = cnx.cursor()
									add_logInstance = ("INSERT INTO attackInventory "
														"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
														"VALUES (%s, %s, %s, %s, %s)")
									
									logData = (ip2long(sourceIP), ip2long(clientip), data["results"][resultCount]["_time"], data["results"][resultCount]["Raw Log"], vulnerability)
									# Insert new entry
									cursor.execute(add_logInstance , logData )
									cnx.commit()
									cursor.close()
								cnx.close()
								resultCount=newResultCount+1
							else:
								resultCount=newResultCount
					elif (operator=="OR"):
						if (searchStringCount > 0):
							#only keep searching if there are more IOCS to look at...
							if len(searchStringResults)>2:
								searchString=entry + " AND (host=\"" + sourceHost + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\")  | fields host, clientip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\""
								numResults=splunk.searchVulnerability(searchString,vulnerability,sourceIP,sourceHost)
								if (numResults != "0"):
									data = json.load(numResults)
									resultCount=0
									for result in data["results"]:
										isExploitFound=True
										cnx = getDBConnector()
										cursor = cnx.cursor()
										query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
										cursor.execute(query)
										for row in cursor:
											logCheck=row[0]
										cursor.close()
										if logCheck==0:
											#Write data to DB
											cursor = cnx.cursor()
											add_logInstance = ("INSERT INTO attackInventory "
																"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
																"VALUES (%s, %s, %s, %s, %s)")
											logData = (ip2long(sourceIP), ip2long(data["results"][resultCount]["clientip"]), data["results"][resultCount]["_time"], data["results"][resultCount]["Raw Log"], vulnerability)
											
											# Insert new entry
											cursor.execute(add_logInstance , logData )
											
											cnx.commit()
											cursor.close()
										cnx.close()
										resultCount=resultCount+1
							elif len(searchStringResults)==2:
								searchString=entry + " AND (host=\"" + sourceHost + "\" OR host=\"" + sourceIP + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\")  | fields host, clientip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\""
								numResults=splunk.searchVulnerability(searchString,vulnerability,sourceIP,sourceHost)
								if (numResults != "0"):
									data = json.load(numResults)
									resultCount=0
									for result in data["results"]:
										isExploitFound=True
										cnx = getDBConnector()
										cursor = cnx.cursor()
										query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
										cursor.execute(query)
										for row in cursor:
											logCheck=row[0]
										cursor.close()
										if logCheck==0:
											#Write data to DB
											cursor = cnx.cursor()
											add_logInstance = ("INSERT INTO attackInventory "
																"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
																"VALUES (%s, %s, %s, %s, %s)")
											
											logData = (ip2long(sourceIP), ip2long(data["results"][resultCount]["clientip"]), data["results"][resultCount]["_time"], data["results"][resultCount]["Raw Log"], vulnerability)
											
											# Insert new entry
											cursor.execute(add_logInstance , logData )
											
											cnx.commit()
											cursor.close()
										cnx.close()
										resultCount=resultCount+1
					searchStringCount=searchStringCount+1
				elif logsource=="elastic_search":
					numResults=0
					startTime="-90d"
					endTime="now"
					#Insert source host from arguments
					entry = re.sub('\<source_host\>', sourceHost, entry)
					#Insert source IP from arguments
					entry = re.sub('\<source_ip\>', sourceIP, entry)
					if (searchStringCount == 1):
						#Insert startTime
						entry = re.sub('\<startTime\>', startTime, entry)
						#Insert endTime
						entry = re.sub('\<endTime\>', endTime, entry)
						if sourceIP == '*':
							entry = re.sub('\<min_count\>', '1', entry)
						else:
							entry = re.sub('\<min_count\>', '2', entry)
						#print entry
						searchResults = ElasticSearchQuery.searchVulnerability(entry,vulnerability,sourceIP,sourceHost)
						#print searchResults
						numResults = getElasticSearchResults(searchResults)
						#print numResults
					if (operator=="AND"):
						if (searchStringCount > 1):
							resultCount=0
							for hit in searchResults['hits']['hits']:
								startTime =  dateutil.parser.parse(hit["_source"]["@timestamp"]) + datetime.timedelta(days =- 1)
								
								endTime =  dateutil.parser.parse(hit["_source"]["@timestamp"]) + datetime.timedelta(days = 1)
								#Insert start time
								entry = re.sub('\<startTime\>', str(startTime.isoformat()), entry)
								#Insert end time
								entry = re.sub('\<endTime\>', str(endTime.isoformat()), entry)
								newSearchResults = ElasticSearchQuery.searchVulnerability(entry,vulnerability,sourceIP,sourceHost)
								newResults = getElasticSearchResults(newSearchResults)
								if (newResults != "0"):
									#This is the result from search 1
									newResultCount=0
									isExploitFound=True
									for newhit in newSearchResults['hits']['hits']:
										try:
											attackerIP=newhit["_source"]["evt_srcip"]
										except:
											attackerIP="0.0.0.0"
										#These are the results from any further results proving the AND condition
										cnx = getDBConnector()
										cursor = cnx.cursor()
										#Check original log hit
										query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_log = '" + newhit["_source"]["message"] + "'")
										cursor.execute(query)
										for row in cursor:
											logCheck=row[0]
										cursor.close()
										if logCheck==0:
											#Write data to DB
											cursor = cnx.cursor()
											add_logInstance = ("INSERT INTO attackInventory "
																"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
																"VALUES (%s, %s, %s, %s, %s)")
											
											logData = (ip2long(sourceIP), ip2long(attackerIP),hit["_source"]["@timestamp"], hit["_source"]["message"], vulnerability)
											# Insert new entry
											cursor.execute(add_logInstance , logData )
										cursor = cnx.cursor()
										#check new log hit
										query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_log = '" + newhit["_source"]["message"] + "'")
										cursor.execute(query)
										for row in cursor:
											logCheck=row[0]
										cursor.close()
										if logCheck==0:
											#Write data to DB
											cursor = cnx.cursor()
											add_logInstance = ("INSERT INTO attackInventory "
																"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
																"VALUES (%s, %s, %s, %s, %s)")
											
											logData = (ip2long(sourceIP), ip2long(attackerIP),newhit["_source"]["@timestamp"], newhit["_source"]["message"], vulnerability)
											# Insert new entry
											cursor.execute(add_logInstance , logData )
											
											cnx.commit()
											cursor.close()
										cnx.close()
										newResultCount=newResultCount+1
								else:
									newResultCount=0
								resultCount=newResultCount+1
								
								
								
					elif (operator=="OR"):
						if (searchStringCount == 1):
							if (int(numResults) > 0):
								resultCount = int(numResults)
								writeElasticSearchResults(searchResults,vulnObject,sourceIP,vulnerability)
								isExploitFound=True
						if (searchStringCount > 1):
							#Insert startTime
							entry = re.sub('\<startTime\>', startTime, entry)
							#Insert endTime
							entry = re.sub('\<endTime\>', endTime, entry)
							if sourceIP == '*':
								entry = re.sub('\<min_count\>', '1', entry)
							else:
								entry = re.sub('\<min_count\>', '2', entry)
							#only keep searching if there are more IOCS to look at...
							if len(searchStringResults)>1:
								searchResults = ElasticSearchQuery.searchVulnerability(entry,vulnerability,sourceIP,sourceHost)
								numResults = getElasticSearchResults(searchResults)
								if int(numResults) > 0:
									writeElasticSearchResults(searchResults,vulnObject,sourceIP,vulnerability)
								resultCount = resultCount + int(numResults)
					searchStringCount=searchStringCount+1
			if (isExploitFound==True):
				print("  Found " + str(resultCount) + " instances of exploitation!")
				print("  Generating attack logs") 
				#Parse through data list to get elastic timestamp for audit log times...
			else:
				print("  No instances of exploitation found.\n")
	else:
		resultCount=0
		print("Invalid vulnerability ID")
	return(resultCount)

Example 72

Project: bakthat
Source File: __init__.py
View license
@app.cmd(help="Backup a file or a directory, backup the current directory if no arg is provided.")
@app.cmd_arg('filename', type=str, default=os.getcwd(), nargs="?")
@app.cmd_arg('-d', '--destination', type=str, help="s3|glacier|swift", default=None)
@app.cmd_arg('--prompt', type=str, help="yes|no", default="yes")
@app.cmd_arg('-t', '--tags', type=str, help="space separated tags", default="")
@app.cmd_arg('-p', '--profile', type=str, default="default", help="profile name (default by default)")
@app.cmd_arg('-c', '--config', type=str, default=CONFIG_FILE, help="path to config file")
@app.cmd_arg('-k', '--key', type=str, default=None, help="Custom key for periodic backups (works only with BakManager.io hook.)")
@app.cmd_arg('--exclude-file', type=str, default=None)
@app.cmd_arg('--s3-reduced-redundancy', action="store_true")
def backup(filename=os.getcwd(), destination=None, profile="default", config=CONFIG_FILE, prompt="yes", tags=[], key=None, exclude_file=None, s3_reduced_redundancy=False, **kwargs):
    """Perform backup.

    :type filename: str
    :param filename: File/directory to backup.

    :type destination: str
    :param destination: s3|glacier|swift

    :type prompt: str
    :param prompt: Disable password promp, disable encryption,
        only useful when using bakthat in command line mode.

    :type tags: str or list
    :param tags: Tags either in a str space separated,
        either directly a list of str (if calling from Python).

    :type password: str
    :keyword password: Password, empty string to disable encryption.

    :type conf: dict
    :keyword conf: Override/set AWS configuration.

    :type custom_filename: str
    :keyword custom_filename: Override the original filename (only in metadata)

    :rtype: dict
    :return: A dict containing the following keys: stored_filename, size, metadata, backend and filename.

    """
    storage_backend, destination, conf = _get_store_backend(config, destination, profile)
    backup_file_fmt = "{0}.{1}.tgz"

    session_id = str(uuid.uuid4())
    events.before_backup(session_id)

    # Check if compression is disabled on the configuration.
    if conf:
        compress = conf.get("compress", True)
    else:
        compress = config.get(profile).get("compress", True)

    if not compress:
        backup_file_fmt = "{0}.{1}"

    log.info("Backing up " + filename)

    if exclude_file and os.path.isfile(exclude_file):
        EXCLUDE_FILES.insert(0, exclude_file)

    _exclude = lambda filename: False
    if os.path.isdir(filename):
        join = functools.partial(os.path.join, filename)
        for efile in EXCLUDE_FILES:
            efile = join(efile)
            if os.path.isfile(efile):
                _exclude = _get_exclude(efile)
                log.info("Using {0} to exclude files.".format(efile))

    arcname = filename.strip('/').split('/')[-1]
    now = datetime.utcnow()
    date_component = now.strftime("%Y%m%d%H%M%S")
    stored_filename = backup_file_fmt.format(arcname, date_component)

    backup_date = int(now.strftime("%s"))
    backup_data = dict(filename=kwargs.get("custom_filename", arcname),
                       backup_date=backup_date,
                       last_updated=backup_date,
                       backend=destination,
                       is_deleted=False)

    # Useful only when using bakmanager.io hook
    backup_key = key

    password = kwargs.get("password", os.environ.get("BAKTHAT_PASSWORD"))
    if password is None and prompt.lower() != "no":
        password = getpass("Password (blank to disable encryption): ")
        if password:
            password2 = getpass("Password confirmation: ")
            if password != password2:
                log.error("Password confirmation doesn't match")
                return

    if not compress:
        log.info("Compression disabled")
        outname = filename
        with open(outname) as outfile:
            backup_data["size"] = os.fstat(outfile.fileno()).st_size
        bakthat_compression = False

    # Check if the file is not already compressed
    elif mimetypes.guess_type(arcname) == ('application/x-tar', 'gzip'):
        log.info("File already compressed")
        outname = filename

        # removing extension to reformat filename
        new_arcname = re.sub(r'(\.t(ar\.)?gz)', '', arcname)
        stored_filename = backup_file_fmt.format(new_arcname, date_component)

        with open(outname) as outfile:
            backup_data["size"] = os.fstat(outfile.fileno()).st_size

        bakthat_compression = False
    else:
        # If not we compress it
        log.info("Compressing...")

        with tempfile.NamedTemporaryFile(delete=False) as out:
            with closing(tarfile.open(fileobj=out, mode="w:gz")) as tar:
                tar.add(filename, arcname=arcname, exclude=_exclude)
            outname = out.name
            out.seek(0)
            backup_data["size"] = os.fstat(out.fileno()).st_size
        bakthat_compression = True

    bakthat_encryption = False
    if password:
        bakthat_encryption = True
        log.info("Encrypting...")
        encrypted_out = tempfile.NamedTemporaryFile(delete=False)
        encrypt_file(outname, encrypted_out.name, password)
        stored_filename += ".enc"

        # We only remove the file if the archive is created by bakthat
        if bakthat_compression:
            os.remove(outname)  # remove non-encrypted tmp file

        outname = encrypted_out.name

        encrypted_out.seek(0)
        backup_data["size"] = os.fstat(encrypted_out.fileno()).st_size

    # Handling tags metadata
    if isinstance(tags, list):
        tags = " ".join(tags)

    backup_data["tags"] = tags

    backup_data["metadata"] = dict(is_enc=bakthat_encryption,
                                   client=socket.gethostname())
    backup_data["stored_filename"] = stored_filename

    access_key = storage_backend.conf.get("access_key")
    container_key = storage_backend.conf.get(storage_backend.container_key)
    backup_data["backend_hash"] = hashlib.sha512(access_key + container_key).hexdigest()

    log.info("Uploading...")
    storage_backend.upload(stored_filename, outname, s3_reduced_redundancy=s3_reduced_redundancy)

    # We only remove the file if the archive is created by bakthat
    if bakthat_compression or bakthat_encryption:
        os.remove(outname)

    log.debug(backup_data)

    # Insert backup metadata in SQLite
    backup = Backups.create(**backup_data)

    BakSyncer(conf).sync_auto()

    # bakmanager.io hook, enable with -k/--key paramter
    if backup_key:
        bakmanager_hook(conf, backup_data, backup_key)

    events.on_backup(session_id, backup)

    return backup

Example 73

Project: QSTK
Source File: html_colorize.py
View license
    def handle_line(self, line):
        """
        Render a single logical line from the module, and write the
        generated HTML to C{self.out}.

        @param line: A single logical line, encoded as a list of
            C{(toktype,tokttext)} pairs corresponding to the tokens in
            the line.
        """
        # def_name is the name of the function or class defined by
        # this line; or None if no funciton or class is defined.
        def_name = None

        # def_type is the type of the function or class defined by
        # this line; or None if no funciton or class is defined.
        def_type = None

        # does this line start a class/func def?
        starting_def_block = False 

        in_base_list = False
        in_param_list = False
        in_param_default = 0
        at_module_top = (self.lineno == 1)

        ended_def_blocks = 0

        # The html output.
        if self.ADD_LINE_NUMBERS:
            s = self.lineno_to_html()
            self.lineno += 1
        else:
            s = ''
        s += '  <tt class="py-line">'

        # Loop through each token, and colorize it appropriately.
        for i, (toktype, toktext) in enumerate(line):
            if type(s) is not str:
                if type(s) is unicode:
                    log.error('While colorizing %s -- got unexpected '
                              'unicode string' % self.module_name)
                    s = s.encode('ascii', 'xmlcharrefreplace')
                else:
                    raise ValueError('Unexpected value for s -- %s' % 
                                     type(s).__name__)

            # For each token, determine its css class and whether it
            # should link to a url.
            css_class = None
            url = None
            tooltip = None
            onclick = uid = targets = None # these 3 are used together.

            # Is this token the class name in a class definition?  If
            # so, then make it a link back into the API docs.
            if i>=2 and line[i-2][1] == 'class':
                in_base_list = True
                css_class = self.CSS_CLASSES['DEFNAME']
                def_name = toktext
                def_type = 'class'
                if 'func' not in self.context_types:
                    cls_name = self.context_name(def_name)
                    url = self.name2url(cls_name)
                    s = self.mark_def(s, cls_name)
                    starting_def_block = True

            # Is this token the function name in a function def?  If
            # so, then make it a link back into the API docs.
            elif i>=2 and line[i-2][1] == 'def':
                in_param_list = True
                css_class = self.CSS_CLASSES['DEFNAME']
                def_name = toktext
                def_type = 'func'
                if 'func' not in self.context_types:
                    cls_name = self.context_name()
                    func_name = self.context_name(def_name)
                    url = self.name2url(cls_name, def_name)
                    s = self.mark_def(s, func_name)
                    starting_def_block = True

            # For each indent, update the indents list (which we use
            # to keep track of indentation strings) and the context
            # list.  If this indent is the start of a class or
            # function def block, then self.def_name will be its name;
            # otherwise, it will be None.
            elif toktype == token.INDENT:
                self.indents.append(toktext)
                self.context.append(self.def_name)
                self.context_types.append(self.def_type)

            # When we dedent, pop the last elements off the indents
            # list and the context list.  If the last context element
            # is a name, then we're ending a class or function def
            # block; so write an end-div tag.
            elif toktype == token.DEDENT:
                self.indents.pop()
                self.context_types.pop()
                if self.context.pop():
                    ended_def_blocks += 1

            # If this token contains whitespace, then don't bother to
            # give it a css tag.
            elif toktype in (None, tokenize.NL, token.NEWLINE,
                             token.ENDMARKER):
                css_class = None

            # Check if the token is a keyword.
            elif toktype == token.NAME and keyword.iskeyword(toktext):
                css_class = self.CSS_CLASSES['KEYWORD']

            elif in_base_list and toktype == token.NAME:
                css_class = self.CSS_CLASSES['BASECLASS']

            elif (in_param_list and toktype == token.NAME and
                  not in_param_default):
                css_class = self.CSS_CLASSES['PARAM']

            # Class/function docstring.
            elif (self.def_name and line[i-1][0] == token.INDENT and
                  self.is_docstring(line, i)):
                css_class = self.CSS_CLASSES['DOCSTRING']

            # Module docstring.
            elif at_module_top and self.is_docstring(line, i):
                css_class = self.CSS_CLASSES['DOCSTRING']

            # check for decorators??
            elif (toktype == token.NAME and
                  ((i>0 and line[i-1][1]=='@') or
                   (i>1 and line[i-1][0]==None and line[i-2][1] == '@'))):
                css_class = self.CSS_CLASSES['DECORATOR']
                self.has_decorators = True

            # If it's a name, try to link it.
            elif toktype == token.NAME:
                css_class = self.CSS_CLASSES['NAME']
                # If we have a variable named `toktext` in the current
                # context, then link to that.  Note that if we're inside
                # a function, then that function is our context, not
                # the namespace that contains it. [xx] this isn't always
                # the right thing to do.
                if (self.GUESS_LINK_TARGETS and self.docindex is not None
                    and self.url_func is not None):
                    context = [n for n in self.context if n is not None]
                    container = self.docindex.get_vardoc(
                        DottedName(self.module_name, *context))
                    if isinstance(container, NamespaceDoc):
                        doc = container.variables.get(toktext)
                        if doc is not None:
                            url = self.url_func(doc)
                            tooltip = str(doc.canonical_name)
                # Otherwise, check the name_to_docs index to see what
                # else this name might refer to.
                if (url is None and self.name_to_docs is not None
                    and self.url_func is not None):
                    docs = self.name_to_docs.get(toktext)
                    if docs:
                        tooltip='\n'.join([str(d.canonical_name)
                                           for d in docs])
                        if len(docs) == 1 and self.GUESS_LINK_TARGETS:
                            url = self.url_func(docs[0])
                        else:
                            uid, onclick, targets = self.doclink(toktext, docs)

            # For all other tokens, look up the CSS class to use
            # based on the token's type.
            else:
                if toktype == token.OP and toktext in self.CSS_CLASSES:
                    css_class = self.CSS_CLASSES[toktext]
                elif token.tok_name[toktype] in self.CSS_CLASSES:
                    css_class = self.CSS_CLASSES[token.tok_name[toktype]]
                else:
                    css_class = None

            # update our status..
            if toktext == ':':
                in_base_list = False
                in_param_list = False
            if toktext == '=' and in_param_list:
                in_param_default = True
            if in_param_default:
                if toktext in ('(','[','{'): in_param_default += 1
                if toktext in (')',']','}'): in_param_default -= 1
                if toktext == ',' and in_param_default == 1:
                    in_param_default = 0
                
            # Write this token, with appropriate colorization.
            if tooltip and self.ADD_TOOLTIPS:
                tooltip_html = ' title="%s"' % tooltip
            else: tooltip_html = ''
            if css_class: css_class_html = ' class="%s"' % css_class
            else: css_class_html = ''
            if onclick:
                if targets: targets_html = ' targets="%s"' % targets
                else: targets_html = ''
                s += ('<tt id="%s"%s%s><a%s%s href="#" onclick="%s">' %
                      (uid, css_class_html, targets_html, tooltip_html,
                       css_class_html, onclick))
            elif url:
                if isinstance(url, unicode):
                    url = url.encode('ascii', 'xmlcharrefreplace')
                s += ('<a%s%s href="%s">' %
                      (tooltip_html, css_class_html, url))
            elif css_class_html or tooltip_html:
                s += '<tt%s%s>' % (tooltip_html, css_class_html)
            if i == len(line)-1:
                s += ' </tt>' # Closes <tt class="py-line">
                s += cgi.escape(toktext)
            else:
                try:
                    s += self.add_line_numbers(cgi.escape(toktext), css_class)
                except Exception, e:
                    print (toktext, css_class, toktext.encode('ascii'))
                    raise

            if onclick: s += "</a></tt>"
            elif url: s += '</a>'
            elif css_class_html or tooltip_html: s += '</tt>'

        if self.ADD_DEF_BLOCKS:
            for i in range(ended_def_blocks):
                self.out(self.END_DEF_BLOCK)

        # Strip any empty <tt>s.
        s = re.sub(r'<tt class="[\w+]"></tt>', '', s)

        # Write the line.
        self.out(s)

        if def_name and starting_def_block:
            self.out('</div>')

        # Add div's if we're starting a def block.
        if (self.ADD_DEF_BLOCKS and def_name and starting_def_block and
            (line[-2][1] == ':')):
            indentation = (''.join(self.indents)+'    ').replace(' ', '+')
            linenum_padding = '+'*self.linenum_size
            name=self.context_name(def_name)
            self.out(self.START_DEF_BLOCK % (name, linenum_padding,
                                             indentation, name))
            
        self.def_name = def_name
        self.def_type = def_type

Example 74

Project: pelisalacarta
Source File: aquitorrent.py
View license
def fanart(item):
    logger.info("pelisalacarta.aquitorrent fanart")
    itemlist = []
    url = item.url
    data = scrapertools.cache_page(url)
    data = re.sub(r"\n|\r|\t|\s{2}|\(.*?\)|&nbsp;","",data)
    title = item.extra
    
    
    

    year=""
    item.title = re.sub(r"-|\(.*?\)|\d+x\d+","",item.title)
    if not "Series" in item.url:
        urlyear = item.url
        data = scrapertools.cache_page(urlyear)
        try:
            year =scrapertools.get_match(data,'<span style="text-align: justify;">.*?Año.*?(\d\d\d\d)')
        except:
            year = ""
        try:
            
            if "CLASICOS-DISNEY" in item.url:
                title = title + " "+"Disney"
            try:
                ###Busqueda en Tmdb la peli por titulo y año
                title_tmdb = title.replace(" ","%20")
                url_tmdb="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title_tmdb +"&year="+year+"&language=es&include_adult=false"
                
                data = scrapertools.cachePage(url_tmdb)
                data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                id = scrapertools.get_match(data,'"page":1.*?,"id":(.*?),')
            
            except:
                if ":" in title or "(" in title:
                    title_tmdb = title.replace(" ","%20")
                    url_tmdb="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title_tmdb +"&year="+year+"&language=es&include_adult=false"
                    data = scrapertools.cachePage(url_tmdb)
                    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                    id = scrapertools.get_match(data,'"page":1.*?,"id":(.*?),')
                    
                else:
                    title_tmdb = title.replace(" ","%20")
                    title_tmdb= re.sub(r"(:.*)|\(.*?\)","",title_tmdb)
                    url_tmdb="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title_tmdb +"&year="+year+"&language=es&include_adult=false"
                    data = scrapertools.cachePage(url_tmdb)
                    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                    id = scrapertools.get_match(data,'"page":1.*?,"id":(.*?),')



        except:
            ###Si no hay coincidencia realiza busqueda por bing del id Imdb
            urlbing_imdb = "http://www.bing.com/search?q=%s+%s+site:imdb.com" % (title.replace(' ', '+'),  year)
            data = browser (urlbing_imdb)
            
            try:
                subdata_imdb = scrapertools.get_match(data,'<li class="b_algo">(.*?)h="ID')
            except:
              pass
            
            try:
                url_imdb = scrapertools.get_match(subdata_imdb,'<a href="([^"]+)"')
        
            except:
                pass
            try:
                id_imdb = scrapertools.get_match(url_imdb,'.*?www.imdb.com/.*?/(.*?)/')
            except:
                pass
            try:
                ###Busca id Tmdb mediante el id de Imdb
                urltmdb_remote ="https://api.themoviedb.org/3/find/"+id_imdb+"?external_source=imdb_id&api_key=2e2160006592024ba87ccdf78c28f49f&language=es&include_adult=false"
                data = scrapertools.cachePage(urltmdb_remote)
                data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                id = scrapertools.get_match(data,'"movie_results".*?,"id":(\d+)')
                
            except:
                id = ""
                 
        
        ###Llegados aqui ya tenemos(o no) el id(Tmdb);Busca fanart_1
        urltmdb_fan1 ="http://api.themoviedb.org/3/movie/"+id+"?api_key=2e2160006592024ba87ccdf78c28f49f"
        data = scrapertools.cachePage( urltmdb_fan1 )
        data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
        patron = '"adult".*?"backdrop_path":"(.*?)"'
        matches = re.compile(patron,re.DOTALL).findall(data)
        try:
            ###Prueba poster de Tmdb
            posterdb = scrapertools.get_match(data,'"adult".*?"poster_path":"(.*?)"')
            posterdb =  "https://image.tmdb.org/t/p/original" + posterdb
        except:
            posterdb = item.thumbnail
    
        if len(matches)==0:
            fanart_info = item.fanart
            fanart= item.fanart
            fanart_2 = item.fanart
            itemlist.append( Item(channel=item.channel, title =item.title, url=item.url, action="findvideos", thumbnail=posterdb, fanart=fanart ,extra= fanart_2, folder=True) )
        for fan in matches:
    
            fanart="https://image.tmdb.org/t/p/original" + fan
            fanart_1= fanart
            
            ###Busca fanart para info, fanart para trailer y fanart_2(finvideos) en Tmdb
            urltmdb_images ="http://api.themoviedb.org/3/movie/"+id+"/images?api_key=2e2160006592024ba87ccdf78c28f49f"
            data = scrapertools.cachePage(urltmdb_images)
            data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
            
            patron = '"backdrops".*?"file_path":".*?",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
            matches = re.compile(patron,re.DOTALL).findall(data)
            
            if len(matches) == 0:
                patron = '"backdrops".*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if len(matches) == 0:
                    fanart_info = fanart_1
                    fanart_trailer = fanart_1
                    fanart_2 = fanart_1
                    category =""
            for fanart_info, fanart_trailer, fanart_2 in matches:
                fanart_info = "https://image.tmdb.org/t/p/original" + fanart_info
                fanart_trailer = "https://image.tmdb.org/t/p/original" + fanart_trailer
                fanart_2 = "https://image.tmdb.org/t/p/original" + fanart_2
                category = ""
                
                if fanart_info == fanart:
                    ###Busca fanart_info en Imdb si coincide con fanart
                    try:
                        url_imdbphoto = "http://www.imdb.com/title/"+id_imdb+"/mediaindex"
                        photo_imdb= scrapertools.get_match(url_imdbphoto,'<div class="media_index_thumb_list".*?src="([^"]+)"')
                        photo_imdb = photo_imdb.replace("@._V1_UY100_CR25,0,100,100_AL_.jpg","@._V1_SX1280_SY720_.jpg")
                        fanart_info = photo_imdb
                    except:
                        fanart_info = fanart_2
            itemlist.append( Item(channel=item.channel, title =item.title, url=item.url, action="findvideos", thumbnail=posterdb, fanart=fanart_1 ,extra= fanart_2, folder=True) )



    else:
        urlyear = item.url
        data = scrapertools.cache_page(urlyear)
        try:
            year =scrapertools.get_match(data,'<span style="text-align: justify;">.*?Año.*?(\d\d\d\d)')
        except:
              try:
                 year =scrapertools.get_match(data,'SINOPSIS.*? \((\d\d\d\d)')
              except:
                 year = ""
        #Busqueda bing de Imdb serie id
        url_imdb = "http://www.bing.com/search?q=%s+%s+tv+series+site:imdb.com" % (title.replace(' ', '+'),  year)
        data = browser (url_imdb)
        data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
        
        try:
            subdata_imdb = scrapertools.get_match(data,'<li class="b_algo">(.*?)h="ID')
        except:
            pass
        try:
            imdb_id = scrapertools.get_match(subdata_imdb,'<a href=.*?http.*?imdb.com/title/(.*?)/.*?"')
        except:
            imdb_id = ""
        ### Busca id de tvdb mediante imdb id
        urltvdb_remote="http://thetvdb.com/api/GetSeriesByRemoteID.php?imdbid="+imdb_id+"&language=es"
        data = scrapertools.cachePage(urltvdb_remote)
        data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
        patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
        matches = re.compile(patron,re.DOTALL).findall(data)
        
        if len(matches)== 0:
            print "gooooooo"
            ###Si no hay coincidencia busca en tvdb directamente
            if ":" in title or "(" in title:
                title= title.replace(" ","%20")
                url_tvdb="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
                data = scrapertools.cachePage(url_tvdb)
                data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if len(matches)== 0:
                    title= re.sub(r"(:.*)|\(.*?\)","",title)
                    title= title.replace(" ","%20")
                    url_tvdb="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
                    data = scrapertools.cachePage(url_tvdb)
                    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                    patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
                    matches = re.compile(patron,re.DOTALL).findall(data)
                        
                    if len(matches) == 0:
                        plot = ""
                        postertvdb = item.thumbnail
                        extra= "http://s6.postimg.org/rv2mu3pap/bityouthsinopsis2.png"
                        fanart_info = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
                        fanart_trailer = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
                        category= ""
                        show = title+"|"+year+"|"+"http://s6.postimg.org/mh3umjzkh/bityouthnofanventanuco.jpg"
                        itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="finvideos", thumbnail=item.thumbnail, fanart="http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg" ,extra=extra, category= category,  show=show ,plot=plot, folder=True) )
        
            else:
                title= title.replace(" ","%20")
                url_tvdb="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
                data = scrapertools.cachePage(url_tvdb)
                data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
                print "perroooo"
                print patron
                matches = re.compile(patron,re.DOTALL).findall(data)
                print matches
                if len(matches) == 0:
                    plot = ""
                    postertvdb = item.thumbnail
                    extra= "http://s6.postimg.org/rv2mu3pap/bityouthsinopsis2.png"
                    show = title+"|"+year+"|"+"http://s6.postimg.org/mh3umjzkh/bityouthnofanventanuco.jpg"
                    fanart_info = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
                    fanart_trailer = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
                    category= ""
                    itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart="http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg" ,extra=extra, category= category,  show=show ,plot= plot, folder=True) )
        #fanart
        for  id, info in matches:
            
            category = id
            plot = info
            id_serie = id
            
            url ="http://thetvdb.com/api/1D62F2F90030C444/series/"+id_serie+"/banners.xml"
            
            data = scrapertools.cachePage(url)
            data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
            patron = '<Banners><Banner>.*?<VignettePath>(.*?)</VignettePath>'
            matches = re.compile(patron,re.DOTALL).findall(data)
            try:
                postertvdb = scrapertools.get_match(data,'<Banners><Banner>.*?<BannerPath>posters/(.*?)</BannerPath>')
                postertvdb =  "http://thetvdb.com/banners/_cache/posters/" + postertvdb
            except:
                postertvdb = item.thumbnail
                                
            if len(matches)==0:
                extra="http://s6.postimg.org/rv2mu3pap/bityouthsinopsis2.png"
                show = title+"|"+year+"|"+"http://s6.postimg.org/mh3umjzkh/bityouthnofanventanuco.jpg"
                fanart_info = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
                fanart_trailer = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
                itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=postertvdb, fanart="http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"  ,category = category, extra=extra, show=show,folder=True) )
                                                        
            for fan in matches:
                fanart="http://thetvdb.com/banners/" + fan
                fanart_1= fanart
                patron= '<Banners><Banner>.*?<BannerPath>.*?</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if len(matches)==0:
                   fanart_info= fanart_1
                   fanart_trailer = fanart_1
                   fanart_2 = fanart_1
                   show = title+"|"+year+"|"+fanart_1
                   extra=postertvdb
                   itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=postertvdb, fanart=fanart_1  ,category = category, extra=extra, show=show,folder=True) )
                for fanart_info, fanart_trailer, fanart_2 in matches:
                    fanart_info = "http://thetvdb.com/banners/" + fanart_info
                    fanart_trailer = "http://thetvdb.com/banners/" + fanart_trailer
                    fanart_2 = "http://thetvdb.com/banners/" + fanart_2
                
                    itemlist.append( Item(channel=item.channel, title =item.title, url=item.url, action="findvideos", thumbnail=postertvdb, fanart=fanart_1 , extra= fanart_2,folder=True) )
    title ="Info"
    title = title.replace(title,"[COLOR skyblue][B]"+title+"[/B][/COLOR]")
    if "Series" in item.url:
        thumbnail = postertvdb
    else:
        thumbnail = posterdb

    itemlist.append( Item(channel=item.channel, action="info" , title=title , url=item.url, thumbnail=thumbnail, fanart=fanart_info , folder=False ))

    return itemlist

Example 75

Project: pelisalacarta
Source File: zentorrents.py
View license
def fanart(item):
    logger.info("pelisalacarta.peliculasdk fanart")
    itemlist = []
    url = item.url
    data = scrapertools.cachePage(url)
    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
    if "peliculas" in item.url:
    
        if "microhd" in url or "web" in url or "1080" in url or "bluray" in url or  "HDRip" in item.title:
            title= scrapertools.get_match(data,'<title>([^"]+) \[')
            title= re.sub(r"3D|[0-9]|SBS|\(.*?\)|\[.*?\]|","",title)
            title=title.replace('Perdón','perdon')
            title= title.replace(' ','%20')
        
    
        else:
                
            title= scrapertools.get_match(data,'<title>([^"]+) -')
            title= re.sub(r"3D|[0-9]|SBS|\(.*?\)|\[.*?\]|","",title)
            title= title.replace('á','a')
            title= title.replace('Á','A')
            title= title.replace('é','e')
            title= title.replace('í','i')
            title= title.replace('ó','o')
            title= title.replace('ú','u')
            title= title.replace('ñ','n')
            title= title.replace(' ','%20')

        url="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title + "&language=es&include_adult=false"
        data = scrapertools.cachePage(url)
        data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
        patron = '"page":1.*?,"id":(.*?),.*?"backdrop_path":"\\\(.*?)"'
        matches = re.compile(patron,re.DOTALL).findall(data)
        if len(matches)==0:
            extra=item.thumbnail
            show= item.thumbnail
            posterdb = item.thumbnail
            fanart_info = item.thumbnail
            fanart_trailer = item.thumbnail
            category= item.thumbnail
            itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail,extra = extra, show= show, category= category,folder=True) )
        
        for id, fan in matches:
            try:
                posterdb = scrapertools.get_match(data,'"page":1,.*?"poster_path":"\\\(.*?)"')
                posterdb =  "https://image.tmdb.org/t/p/original" + posterdb
            except:
                posterdb = item.thumbnail
            fanart="https://image.tmdb.org/t/p/original" + fan
            item.extra= fanart
            url ="http://api.themoviedb.org/3/movie/"+id+"/images?api_key=2e2160006592024ba87ccdf78c28f49f"
            data = scrapertools.cachePage(url)
            data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
            
            patron = '"backdrops".*?"file_path":".*?",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
            matches = re.compile(patron,re.DOTALL).findall(data)
                    
            if len(matches) == 0:
                patron = '"backdrops".*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if len(matches) == 0:
                    fanart_info = item.extra
                    fanart_trailer = item.extra
                    fanart_2 = item.extra
            for fanart_info, fanart_trailer, fanart_2 in matches:
                fanart_info = "https://image.tmdb.org/t/p/original" + fanart_info
                fanart_trailer = "https://image.tmdb.org/t/p/original" + fanart_trailer
                fanart_2 = "https://image.tmdb.org/t/p/original" + fanart_2
                        
            #clearart, fanart_2 y logo
            url ="http://webservice.fanart.tv/v3/movies/"+id+"?api_key=dffe90fba4d02c199ae7a9e71330c987"
            data = scrapertools.cachePage(url)
            data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
            patron = '"hdmovielogo":.*?"url": "([^"]+)"'
            matches = re.compile(patron,re.DOTALL).findall(data)
            if '"moviedisc"' in data:
                disc = scrapertools.get_match(data,'"moviedisc":.*?"url": "([^"]+)"')
            if '"movieposter"' in data:
                poster = scrapertools.get_match(data,'"movieposter":.*?"url": "([^"]+)"')
            if '"moviethumb"' in data:
                thumb = scrapertools.get_match(data,'"moviethumb":.*?"url": "([^"]+)"')
            if '"moviebanner"' in data:
                banner= scrapertools.get_match(data,'"moviebanner":.*?"url": "([^"]+)"')
            
            if len(matches)==0:
                extra=  posterdb
                show = fanart_2
                category = item.extra
                itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=item.thumbnail, fanart=item.extra, extra=extra, show=show, category= category, folder=True) )
        for logo in matches:
             if '"hdmovieclearart"' in data:
                  clear=scrapertools.get_match(data,'"hdmovieclearart":.*?"url": "([^"]+)"')
                  if '"moviebackground"' in data:
                      
                      extra=clear
                      show= fanart_2
                      if '"moviedisc"' in data:
                           category= disc
                      else:
                           category= clear
                      itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show,  category= category,folder=True) )
                  else:
                        extra= clear
                        show=fanart_2
                        if '"moviedisc"' in data:
                             category= disc
                        else:
                            category= clear
                        itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show,  category= category, folder=True) )
                    
             if '"moviebackground"' in data:
                 
                  if '"hdmovieclearart"' in data:
                       clear=scrapertools.get_match(data,'"hdmovieclearart":.*?"url": "([^"]+)"')
                       extra=clear
                       show= fanart_2
                       if '"moviedisc"' in data:
                            category= disc
                       else:
                            category= clear
                  else:
                        extra=logo
                        show= fanart_2
                        if '"moviedisc"' in data:
                            category= disc
                        else:
                            category= logo
                        itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show,  category= category, folder=True) )
                    
                    
                    
                    
             if not '"hdmovieclearart"' in data and not '"moviebackground"' in data:
                      extra= logo
                      show=  fanart_2
                      if '"moviedisc"' in data:
                           category= disc
                      else:
                           category= item.extra
                      itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show ,  category= category, folder=True) )
       
    else:
        if "series" in item.url:
            if "hdtv" in item.url or "720" in item.title or "1080p" in item.title:
                title= scrapertools.get_match(data,'<title>([^"]+) \[')
                title= re.sub(r"3D|'|,|[0-9]|#|;|\[.*?\]|SBS|-|","",title)
                title= title.replace('Temporada','')
                title= title.replace('Fin','')
                title= title.replace('x','')
                title= title.replace('Heli','Helix')
                title= title.replace('Anatomía','Anatomia')
                title= title.replace('á','a')
                title= title.replace('Á','A')
                title= title.replace('é','e')
                title= title.replace('í','i')
                title= title.replace('ó','o')
                title= title.replace('ú','u')
                title= title.replace('ñ','n')
                title= title.replace(' ','%20')
            
            
            else:
                title= scrapertools.get_match(data,'<title>([^"]+) -')
                title= re.sub(r"3D|'|,|[0-9]|#|;|´|VOSE|\[.*?\]|-|","",title)
                title= title.replace('Temporada','')
                title= title.replace('Fin','')
                title= title.replace('x','')
                title= title.replace('á','a')
                title= title.replace('Á','A')
                title= title.replace('é','e')
                title= title.replace('í','i')
                title= title.replace('ó','o')
                title= title.replace('ú','u')
                title= title.replace('ñ','n')
                title= title.replace('Anatomía','Anatomia')
                title= title.replace(' ','%20')
                
        url="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
        if "Erase%20una%20vez%20%20" in title:
            url ="http://thetvdb.com/api/GetSeries.php?seriesname=Erase%20una%20vez%20(2011)&language=es"
        data = scrapertools.cachePage(url)
        data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
        patron = '<Data><Series><seriesid>([^<]+)</seriesid>'
        matches = re.compile(patron,re.DOTALL).findall(data)
        if len(matches)==0:
            extra= item.thumbnail
            show=  item.thumbnail
            fanart_info = item.thumbnail
            fanart_trailer = item.thumbnail
            category= ""
            itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail ,extra=extra, category= category,  show=show , folder=True) )
        else:
            for id in matches:
                category = id
                id_serie = id
                url ="http://thetvdb.com/api/1D62F2F90030C444/series/"+id_serie+"/banners.xml"
                data = scrapertools.cachePage(url)
                data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                patron = '<Banners><Banner>.*?<VignettePath>(.*?)</VignettePath>'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if len(matches)==0:
                    extra=item.thumbnail
                    show= item.thumbnail
                    fanart_info = item.thumbnail
                    fanart_trailer = item.thumbnail
                    itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail ,category = category, extra=extra, show=show, folder=True) )
            for fan in matches:
                fanart="http://thetvdb.com/banners/" + fan
                item.extra= fanart
                patron= '<Banners><Banner>.*?<BannerPath>.*?</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if len(matches)==0:
                    fanart_info= item.extra
                    fanart_trailer = item.extra
                    fanart_2 = item.extra
                for fanart_info, fanart_trailer, fanart_2 in matches:
                    fanart_info = "http://thetvdb.com/banners/" + fanart_info
                    fanart_trailer = "http://thetvdb.com/banners/" + fanart_trailer
                    fanart_2 = "http://thetvdb.com/banners/" + fanart_2
            #clearart, fanart_2 y logo
            for id in matches:
                url ="http://webservice.fanart.tv/v3/tv/"+id_serie+"?api_key=dffe90fba4d02c199ae7a9e71330c987"
                if "Castle" in title:
                    url ="http://webservice.fanart.tv/v3/tv/83462?api_key=dffe90fba4d02c199ae7a9e71330c987"
                data = scrapertools.cachePage(url)
                data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","",data)
                patron = '"clearlogo":.*?"url": "([^"]+)"'
                matches = re.compile(patron,re.DOTALL).findall(data)
                if '"tvposter"' in data:
                    tvposter = scrapertools.get_match(data,'"tvposter":.*?"url": "([^"]+)"')
                if '"tvbanner"' in data:
                    tvbanner = scrapertools.get_match(data,'"tvbanner":.*?"url": "([^"]+)"')
                if '"tvthumb"' in data:
                    tvthumb = scrapertools.get_match(data,'"tvthumb":.*?"url": "([^"]+)"')
                if '"hdtvlogo"' in data:
                    hdtvlogo = scrapertools.get_match(data,'"hdtvlogo":.*?"url": "([^"]+)"')
                if '"hdclearart"' in data:
                    hdtvclear = scrapertools.get_match(data,'"hdclearart":.*?"url": "([^"]+)"')
                if len(matches)==0:
                    if '"hdtvlogo"' in data:
                        if "showbackground" in data:
                            
                            if '"hdclearart"' in data:
                                thumbnail = hdtvlogo
                                extra=  hdtvclear
                                show = fanart_2
                            else:
                                thumbnail = hdtvlogo
                                extra= thumbnail
                                show = fanart_2
                            itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, category=category, extra=extra, show=show, folder=True) )
                                                                        
                                                                        
                        else:
                            if '"hdclearart"' in data:
                                thumbnail= hdtvlogo
                                extra= hdtvclear
                                show= fanart_2
                            else:
                                thumbnail= hdtvlogo
                                extra= thumbnail
                                show= fanart_2
                            itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra, show=show,  category= category, folder=True) )
                    else:
                         extra=  item.thumbnail
                         show = fanart_2
                         itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url,  server="torrent", thumbnail=item.thumbnail, fanart=item.extra, extra=extra, show=show, category = category, folder=True) )
                                                                                                                                
            for logo in matches:
                if '"hdtvlogo"' in data:
                    thumbnail = hdtvlogo
                elif not '"hdtvlogo"' in data :
                           if '"clearlogo"' in data:
                            thumbnail= logo
                else:
                     thumbnail= item.thumbnail
                if '"clearart"' in data:
                    clear=scrapertools.get_match(data,'"clearart":.*?"url": "([^"]+)"')
                    if "showbackground" in data:
                                
                        extra=clear
                        show= fanart_2
                        itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show, category= category,  folder=True) )
                    else:
                         extra= clear
                         show=fanart_2
                         itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show, category= category, folder=True) )
                                     
                if "showbackground" in data:
                            
                    if '"clearart"' in data:
                        clear=scrapertools.get_match(data,'"clearart":.*?"url": "([^"]+)"')
                        extra=clear
                        show= fanart_2
                    else:
                        extra=logo
                        show= fanart_2
                        itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show,  category = category, folder=True) )
                                     
                if not '"clearart"' in data and not '"showbackground"' in data:
                        if '"hdclearart"' in data:
                            extra= hdtvclear
                            show= fanart_2
                        else:
                            extra= thumbnail
                            show=  fanart_2
                        itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show , category = category, folder=True) )
    

    title ="Info"
    title = title.replace(title,bbcode_kodi2html("[COLOR skyblue]"+title+"[/COLOR]"))
    if not "series" in item.url:
       thumbnail = posterdb
    if "series" in item.url:
        if '"tvposter"' in data:
            thumbnail= tvposter
        else:
            thumbnail = item.thumbnail
        
        if "tvbanner" in data:
            category = tvbanner
        else:
            
            category = show


    itemlist.append( Item(channel=item.channel, action="info" , title=title , url=item.url, thumbnail=thumbnail, fanart=fanart_info, extra= extra, category = category, show= show, folder=False ))

    title= "[COLOR cadetblue]Trailer[/COLOR]"
    if len(item.extra)==0:
        fanart=item.thumbnail
    else:
        fanart = item.extra
    if "series" in item.url:
        if '"tvthumb"' in data:
            thumbnail = tvthumb
        else:
            thumbnail = item.thumbnail
        if '"tvbanner"' in data:
            extra= tvbanner
        elif '"tvthumb"' in data:
            extra = tvthumb
        else:
            extra = item.thumbnail
    else:
        if '"moviethumb"' in data:
            thumbnail = thumb
        else:
            thumbnail = posterdb
        
        if '"moviebanner"' in data:
            extra= banner
        else:
            if '"hdmovieclearart"' in data:
                extra = clear
            
            else:
                extra = posterdb


    itemlist.append( Item(channel=item.channel, action="trailer", title=title , url=item.url , thumbnail=thumbnail , plot=item.plot , fanart=fanart_trailer, extra=extra, folder=True) )
    return itemlist

Example 76

Project: inspectors-general
Source File: doj.py
View license
def extract_info(content, directory, year_range):
  # goes through each agency or content bucket
  if directory in not_agency:
    agency = "doj"
    agency_name = "Department of Justice"
  elif directory[:11] == "Immigration":
    agency = "ins"
    agency_name = "Immigration and Naturalization Service"
  else:
    agency = agency_decoder[directory][1]
    agency_name = agency_decoder[directory][0]

  # there can be multiple reports per blurb
  blurbs = content[-1].find_all("p")
  report_count = 0

  for b in blurbs:
    # date
    # finding new dates that are just above the old ones

    # this is the format of the newest entries and the easiest to get
    x = b.previous_sibling
    y = b.previous_sibling.previous_sibling
    if isinstance(y, Tag) and y.get('class') == ['date']:
      date_string = y.string
    elif isinstance(x, Tag) and x.get('class') == ['date']:
      date_string = x.string
    else:
      date_string = None

    # finding older dates that are at the end of the text
    if date_string == None:
      try:
        date_string = b.get_text()
      except:
        date_string = None

    if date_string is not None:
      # get rid of extra stuff that is not the date
      date_text = re.sub(r'\([^)]*\)', '', date_string)
      date_text = re.sub(r'\[(.*?)\]', '', date_text)
      # chop up the string, the last part should be the date
      date_chopped = date_text.rsplit(',')
      day = date_chopped[-1]
      # ATF added dashes
      if "-" in day:
        date_chopped = date_text.rsplit('-')
        day = date_chopped[0]
      # cleaning
      date_string = day.strip()
      date_string = date_string.replace("  ", " ")
      day = day.strip()

      # this is a date written out with a comma
      if day.isdigit():
        date_string = date_chopped[-2] + "," + date_chopped[-1]

    # check for missing commas
    try:
      date_string = datetime.strptime(date_string, "%B %d %Y")
      date_string = datetime.strftime(date_string, "%B %d, %Y")
    except ValueError:
      pass

    # for dates without a day
    if date_string is not None:
      date_string = date_string.strip()
      if "," not in date_string:
        date_test = date_string.replace(" ", " 1, ")
        try:
          datetime.strptime(date_test, "%B %d, %Y")
          date_string = date_test
        except ValueError:
          pass

    # going through each link in a paragraph
    for l in b.find_all("a"):
      date = None
      real_title = None
      # most cases pass this test
      try:
        date = datetime.strptime(date_string, "%B %d, %Y")
      # these ones got to a coding purgatory called odd_link
      except ValueError:
        info = odd_link(b, date_string, l, directory, )
        # this should give better titles than "pdf" or "Press Release"
        real_title = info["real_title"]
        date_string = info["date_string"]
        # these are links to things that are not reports
        if real_title == False and date_string == False:
          break
        elif "," not in date_string:
          date_string = date_string.strip()
          date_string = date_string.replace(" ", " 1, ")
          date = datetime.strptime(date_string, "%B %d, %Y")

      if date is None:
        date = datetime.strptime(date_string, "%B %d, %Y")

      report_year = datetime.strftime(date, "%Y")
      published_on = datetime.strftime(date, "%Y-%m-%d")

      # trying to get the most descriptive title
      # I go from the best methods to fall back and override exceptions
      try:
        string_title = b.text
      except:
        string_title = b.string

      if string_title == None:
        string_title = b.contents
        if "<a href=" in str(string_title):
          string_title = b.contents[0]

      link = l.get("href")
      link = strip_url_fragment(link)
      if link != None:
        # title
        try:
          title = l.text
        except:
          title = l.string
        if title in ("HTML", "PDF", "Executive Summary", "Full Report"):
          title = string_title

        # in some cases the title is a heading a few elements up this gets passed in odd link
        if real_title is not None:
          title = real_title

        if title == 'id="content" name="content">':
          title =  b.string
          if title == None:
            title = b.text

        try:
          title = title.strip()
          title = title.replace('\n', "")
          title = title.replace('\r', "")
        except:
          pass

        file_type = find_file_type(link)
        if file_type == None or title == False:
          break

        if title == None:
          title = b.string

        # formating links consistently
        link = urljoin(base_url, link)
        # id
        doc_id = os.path.splitext(urlparse(link).path)[0]

        #these docs are one report where the page has a table of contents with links to content
        if "/index" in link:
          indexed = True
        else:
          indexed = False

        # creating ids
        # there may be a better way to do this but I am just taking out all the things that are not the id
        url_extras = ( "/final", "/fullpdf", "/ins_response", "oig/special/", "USMS/", "plus/", "oig/grants/", "oig/reports/", "EOUSA/", "BOP/", "ATF/", "COPS/", "FBI/", "OJP/", "INS/", "DEA/", "OBD", "/analysis", "/report", "/PDF_list", "/full_report", "/full", "_redacted", "oig", "r-", "/response", "/listpdf", "/memo", "/fullreport", "/Final", "/extradition", "/oig", "/grants", "/index")
        for n in url_extras:
          if n in doc_id:
            doc_id = doc_id.replace(n, "")

        while doc_id[:1] == "/":
          doc_id = doc_id[1:]

        year_match = YEAR_RE.match(doc_id)
        if year_match:
          doc_id = year_match.group(1)

        ag_match = AG_RE.match(link)
        if ag_match:
          doc_id = ag_match.group(1)

        # if it's still got slashes, just turn them into dashes
        # the ol' slash and dash
        doc_id = doc_id.replace("/", "-")

        # some weird issues I hard coded
        special_cases = {"a0118/au0118":"a0118", "a0207/0207":"a0207"}
        if doc_id in special_cases:
          doc_id = special_cases[doc_id]

        if "spanish" in link:
          language = "Spanish"
        else:
          language = "English"

        report_count += 1

        # if we're filtering on a year, and this isn't in it, skip it
        if int(report_year) not in year_range:
          # print("Skipping report for %s..." % report_year)
          continue

        if doc_id in report:
          if file_type == "pdf":
            # current and previous file pdf
            if report[doc_id]["file_type"] == "pdf":
              report[doc_id]["categories"].append(directory)
            # current file a pdf, old file html
            else:
              report[doc_id]["file_type"] = "pdf"
              report[doc_id]["url"] = link
              report[doc_id]["categories"].append(directory)
          else:
            # current file html old file pdf OR both files html
            report[doc_id]["categories"].append(directory)

          # add url if new
          old_url = False
          for n in report[doc_id]["urls"]:
            if link in n:
              old_url = True
          if not old_url:
            report[doc_id]["urls"].append({
              "url": link,
              "file_type": file_type,
              "indexed": indexed,
            })

          # finding the most descriptive name for cross-listed docs
          if report[doc_id]["agency"] == "doj" and agency != "doj":
            report[doc_id]["agency"] = agency
            report[doc_id]["agency_name"] = agency_name

        # Adding new document
        else:
          report[doc_id] = {
            "report_id": doc_id,
            "inspector": "doj",
            "inspector_url": "https://oig.justice.gov/reports/",
            "agency": agency,
            "agency_name": agency_name,
            "url": link,
            "title": title,
            "file_type": file_type,
            "categories": [directory,],
            "urls": [{
                "url": link,
                "file_type": file_type,
                "indexed": indexed,
            }],
            "published_on": published_on,
            # perhaps elaborate on this later
            "type": type_for(title),
            "language": language,
          }

  if report_count == 0:
    raise inspector.NoReportsFoundError("DOJ (%s)" % directory)

Example 77

Project: termite-data-server
Source File: ttfonts.py
View license
    def extractInfo(self): 
        #################/
        # name - Naming table
        #################/
        self.sFamilyClass = 0
        self.sFamilySubClass = 0

        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if (format != 0):
            die("Unknown name table format " + format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1:'',2:'',3:'',4:'',6:''}
        K = names.keys()
        nameCount = len(names)
        for i in range(numRecords): 
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if (nameId not in K): continue
            N = ''
            if (platformId == 3 and encodingId == 1 and languageId == 0x409):  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                self.seek(string_data_offset + offset)
                if (length % 2 != 0):
                    die("PostScript name is UTF-16BE string of odd length")
                length /= 2
                N = ''
                while (length > 0):
                    char = self.read_ushort()
                    N += (chr(char))
                    length -= 1
                self._pos = opos
                self.seek(opos)
            
            elif (platformId == 1 and encodingId == 0 and languageId == 0):  # Macintosh, Roman, English, PS Name
                opos = self._pos
                N = self.get_chunk(string_data_offset + offset, length)
                self._pos = opos
                self.seek(opos)
            
            if (N and names[nameId]==''):
                names[nameId] = N
                nameCount -= 1
                if (nameCount==0): break
            
        
        if (names[6]):
            psName = names[6]
        elif (names[4]):
            psName = re.sub(' ','-',names[4])
        elif (names[1]):
            psName = re.sub(' ','-',names[1])
        else:
            psName = ''
        if (not psName):
            die("Could not find PostScript font name")
        self.name = psName
        if (names[1]):
            self.familyName = names[1]  
        else:  
            self.familyName = psName 
        if (names[2]):
            self.styleName = names[2]
        else:
            self.styleName = 'Regular' 
        if (names[4]):
            self.fullName = names[4]
        else:
            self.fullName = psName 
        if (names[3]):
            self.uniqueFontID = names[3]
        else:
            self.uniqueFontID = psName 
        if (names[6]):
            self.fullName = names[6] 

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(18) 
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = 1000 / float(unitsPerEm)
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = [(xMin*scale), (yMin*scale), (xMax*scale), (yMax*scale)]
        self.skip(3*2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()
        if (glyphDataFormat != 0):
            die('Unknown glyph data format ' + glyphDataFormat)

        #################/
        # hhea metrics table
        #################/
        # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
        if ("hhea" in self.tables):
            self.seek_table("hhea")
            self.skip(4)
            hheaAscender = self.read_short()
            hheaDescender = self.read_short()
            self.ascent = (hheaAscender *scale)
            self.descent = (hheaDescender *scale)
        

        #################/
        # OS/2 - OS/2 and Windows metrics table
        #################/
        if ("OS/2" in self.tables): 
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if (fsType == 0x0002 or (fsType & 0x0300) != 0): 
                die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
                self.restrictedUse = True
            
            self.skip(20)
            sF = self.read_short()
            self.sFamilyClass = (sF >> 8)
            self.sFamilySubClass = (sF & 0xFF)
            self._pos += 10  #PANOSE = 10 byte length
            panose = self.fh.read(10)
            self.skip(26)
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            if (not self.ascent): 
                self.ascent = (sTypoAscender*scale)
            if (not self.descent): 
                self.descent = (sTypoDescender*scale)
            if (version > 1):
                self.skip(16)
                sCapHeight = self.read_short()
                self.capHeight = (sCapHeight*scale)
            else:
                self.capHeight = self.ascent            
        
        else:
            usWeightClass = 500
            if (not self.ascent): self.ascent = (yMax*scale)
            if (not self.descent): self.descent = (yMin*scale)
            self.capHeight = self.ascent
        
        self.stemV = 50 + int(pow((usWeightClass / 65.0),2))

        #################/
        # post - PostScript table
        #################/
        self.seek_table("post")
        self.skip(4) 
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short() * scale
        self.underlineThickness = self.read_short() * scale
        isFixedPitch = self.read_ulong()

        self.flags = 4

        if (self.italicAngle!= 0):
            self.flags = self.flags | 64
        if (usWeightClass >= 600):
            self.flags = self.flags | 262144
        if (isFixedPitch):
            self.flags = self.flags | 1

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32) 
        metricDataFormat = self.read_ushort()
        if (metricDataFormat != 0):
            die('Unknown horizontal metric data format '.metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if (numberOfHMetrics == 0):
            die('Number of horizontal metrics is 0')

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0
        
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1) or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    if (not unicode_cmap_offset):
                        unicode_cmap_offset = cmap_offset + offset
                    break
                    
            self.seek(save_pos)
        
        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:    
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        #################/
        # hmtx - Horizontal metrics table
        #################/
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)

Example 78

Project: termite-data-server
Source File: jsmin.py
View license
def _make_jsmin(extended=True, python_only=True):
    """
    Generate JS minifier based on `jsmin.c by Douglas Crockford`_

    .. _jsmin.c by Douglas Crockford:
       http://www.crockford.com/javascript/jsmin.c

    :Parameters:
      `extended` : ``bool``
        Extended Regexps? (using lookahead and lookbehind). This is faster,
        because it can be optimized way more. The regexps used with `extended`
        being false are only left here to allow easier porting to platforms
        without extended regex features (and for my own reference...)

      `python_only` : ``bool``
        Use only the python variant. If true, the c extension is not even
        tried to be loaded.

    :Return: Minifier
    :Rtype: ``callable``
    """
    # pylint: disable = R0912, R0914, W0612
    if not python_only:
        try:
            import _rjsmin
        except ImportError:
            pass
        else:
            return _rjsmin.jsmin
    try:
        xrange
    except NameError:
        xrange = range  # pylint: disable = W0622

    space_chars = r'[\000-\011\013\014\016-\040]'

    line_comment = r'(?://[^\r\n]*)'
    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    string1 = \
        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    strings = r'(?:%s|%s)' % (string1, string2)

    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    nospecial = r'[^/\\\[\r\n]'
    if extended:
        regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
            nospecial, charclass, nospecial
        )
    else:
        regex = (
            r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
        )
        regex = regex % (charclass, nospecial, charclass, nospecial)

    space = r'(?:%s|%s)' % (space_chars, space_comment)
    newline = r'(?:%s?[\r\n])' % line_comment

    def fix_charclass(result):
        """ Fixup string of chars to fit into a regex char class """
        pos = result.find('-')
        if pos >= 0:
            result = r'%s%s-' % (result[:pos], result[pos + 1:])

        def sequentize(string):
            """
            Notate consecutive characters as sequence

            (1-4 instead of 1234)
            """
            first, last, result = None, None, []
            for char in map(ord, string):
                if last is None:
                    first = last = char
                elif last + 1 == char:
                    last = char
                else:
                    result.append((first, last))
                    first = last = char
            if last is not None:
                result.append((first, last))
            return ''.join(['%s%s%s' % (
                chr(first),
                last > first + 1 and '-' or '',
                last != first and chr(last) or ''
            ) for first, last in result])

        return _re.sub(r'([\000-\040\047])',  # for better portability
                       lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
                                                              .replace('\\', '\\\\')
                                                              .replace('[', '\\[')
                                                              .replace(']', '\\]')
                                                              )
                       )

    def id_literal_(what):
        """ Make id_literal like char class """
        match = _re.compile(what).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return '[^%s]' % fix_charclass(result)

    def not_id_literal_(keep):
        """ Make negated id_literal like char class """
        match = _re.compile(id_literal_(keep)).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return r'[%s]' % fix_charclass(result)

    not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
    preregex1 = r'[(,=:\[!&|?{};\r\n]'
    preregex2 = r'%(not_id_literal)sreturn' % locals()

    if extended:
        id_literal = id_literal_(r'[a-zA-Z0-9_$]')
        id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_sub = _re.compile((
            r'([^\047"/\000-\040]+)'
            r'|(%(strings)s[^\047"/\000-\040]*)'
            r'|(?:(?<=%(preregex1)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
            r'|(?:(?<=%(preregex2)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
            r'|(?<=%(id_literal_close)s)'
            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
            r'(?=%(id_literal_open)s)'
            r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
            r'|%(space)s+'
            r'|(?:%(newline)s%(space)s*)+'
        ) % locals()).sub

        def space_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321, R0911
            groups = match.groups()
            if groups[0]:
                return groups[0]
            elif groups[1]:
                return groups[1]
            elif groups[2]:
                return groups[2]
            elif groups[3]:
                return groups[3]
            elif groups[4]:
                return '\n'
            elif groups[5]:
                return ' '
            else:
                return ''

        def jsmin(script):  # pylint: disable = W0621
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach which minifies the whole script with one big
            substitution regex.

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub(space_subber, '\n%s\n' % script).strip()

    else:
        pre_regex = r'(?:%(preregex1)s|%(preregex2)s)' % locals()
        not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_norm_sub = _re.compile((
            r'(%(strings)s)'
            r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
            r'|(%(space)s)+'
            r'|(?:(%(newline)s)%(space)s*)+'
        ) % locals()).sub

        def space_norm_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321
            groups = match.groups()
            if groups[0]:
                return groups[0]
            elif groups[1]:
                return groups[1].replace('\r', '\n') + groups[2]
            elif groups[3]:
                return ' '
            elif groups[4]:
                return '\n'

        space_sub1 = _re.compile((
            r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
            r'|\040(%(not_id_literal)s)'
            r'|\n(%(not_id_literal_open)s)'
        ) % locals()).sub

        def space_subber1(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2]

        space_sub2 = _re.compile((
            r'(%(strings)s)\040?'
            r'|(%(pre_regex)s%(regex)s)[\040\n]?'
            r'|(%(not_id_literal)s)\040'
            r'|(%(not_id_literal_close)s)\n'
        ) % locals()).sub

        def space_subber2(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2] or groups[3]

        def jsmin(script):
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach. The script is minified with three passes:

            normalization
                Control character are mapped to spaces, spaces and newlines
                are squeezed and comments are stripped.
            space removal 1
                Spaces before certain tokens are removed
            space removal 2
                Spaces after certain tokens are remove

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub2(space_subber2,
                              space_sub1(space_subber1,
                                         space_norm_sub(space_norm_subber,
                                                        '\n%s\n' % script)
                                         )
                              ).strip()
    return jsmin

Example 79

Project: termite-data-server
Source File: template.py
View license
    def parse(self, text):

        # Basically, r_tag.split will split the text into
        # an array containing, 'non-tag', 'tag', 'non-tag', 'tag'
        # so if we alternate this variable, we know
        # what to look for. This is alternate to
        # line.startswith("{{")
        in_tag = False
        extend = None
        pre_extend = True

        # Use a list to store everything in
        # This is because later the code will "look ahead"
        # for missing strings or brackets.
        ij = self.r_tag.split(text)
        # j = current index
        # i = current item
        stack = self.stack
        for j in range(len(ij)):
            i = ij[j]

            if i:
                if not stack:
                    self._raise_error('The "end" tag is unmatched, please check if you have a starting "block" tag')

                # Our current element in the stack.
                top = stack[-1]

                if in_tag:
                    line = i

                    # Get rid of delimiters
                    line = line[len(self.delimiters[0]):-len(self.delimiters[1])].strip()

                    # This is bad juju, but let's do it anyway
                    if not line:
                        continue

                    # We do not want to replace the newlines in code,
                    # only in block comments.
                    def remove_newline(re_val):
                        # Take the entire match and replace newlines with
                        # escaped newlines.
                        return re_val.group(0).replace('\n', '\\n')

                    # Perform block comment escaping.
                    # This performs escaping ON anything
                    # in between """ and """
                    line = sub(TemplateParser.r_multiline,
                               remove_newline,
                               line)

                    if line.startswith('='):
                        # IE: {{=response.title}}
                        name, value = '=', line[1:].strip()
                    else:
                        v = line.split(' ', 1)
                        if len(v) == 1:
                            # Example
                            # {{ include }}
                            # {{ end }}
                            name = v[0]
                            value = ''
                        else:
                            # Example
                            # {{ block pie }}
                            # {{ include "layout.html" }}
                            # {{ for i in range(10): }}
                            name = v[0]
                            value = v[1]

                    # This will replace newlines in block comments
                    # with the newline character. This is so that they
                    # retain their formatting, but squish down to one
                    # line in the rendered template.

                    # First check if we have any custom lexers
                    if name in self.lexers:
                        # Pass the information to the lexer
                        # and allow it to inject in the environment

                        # You can define custom names such as
                        # '{{<<variable}}' which could potentially
                        # write unescaped version of the variable.
                        self.lexers[name](parser=self,
                                          value=value,
                                          top=top,
                                          stack=stack)

                    elif name == '=':
                        # So we have a variable to insert into
                        # the template
                        buf = "\n%s(%s)" % (self.writer, value)
                        top.append(Node(buf, pre_extend=pre_extend))

                    elif name == 'block' and not value.startswith('='):
                        # Make a new node with name.
                        node = BlockNode(name=value.strip(),
                                         pre_extend=pre_extend,
                                         delimiters=self.delimiters)

                        # Append this node to our active node
                        top.append(node)

                        # Make sure to add the node to the stack.
                        # so anything after this gets added
                        # to this node. This allows us to
                        # "nest" nodes.
                        stack.append(node)

                    elif name == 'end' and not value.startswith('='):
                        # We are done with this node.

                        # Save an instance of it
                        self.blocks[top.name] = top

                        # Pop it.
                        stack.pop()

                    elif name == 'super' and not value.startswith('='):
                        # Get our correct target name
                        # If they just called {{super}} without a name
                        # attempt to assume the top blocks name.
                        if value:
                            target_node = value
                        else:
                            target_node = top.name

                        # Create a SuperNode instance
                        node = SuperNode(name=target_node,
                                         pre_extend=pre_extend)

                        # Add this to our list to be taken care of
                        self.super_nodes.append(node)

                        # And put in in the tree
                        top.append(node)

                    elif name == 'include' and not value.startswith('='):
                        # If we know the target file to include
                        if value:
                            self.include(top, value)

                        # Otherwise, make a temporary include node
                        # That the child node will know to hook into.
                        else:
                            include_node = BlockNode(
                                name='__include__' + self.name,
                                pre_extend=pre_extend,
                                delimiters=self.delimiters)
                            top.append(include_node)

                    elif name == 'extend' and not value.startswith('='):
                        # We need to extend the following
                        # template.
                        extend = value
                        pre_extend = False

                    else:
                        # If we don't know where it belongs
                        # we just add it anyways without formatting.
                        if line and in_tag:

                            # Split on the newlines >.<
                            tokens = line.split('\n')

                            # We need to look for any instances of
                            # for i in range(10):
                            #   = i
                            # pass
                            # So we can properly put a response.write() in place.
                            continuation = False
                            len_parsed = 0
                            for k, token in enumerate(tokens):

                                token = tokens[k] = token.strip()
                                len_parsed += len(token)

                                if token.startswith('='):
                                    if token.endswith('\\'):
                                        continuation = True
                                        tokens[k] = "\n%s(%s" % (
                                            self.writer, token[1:].strip())
                                    else:
                                        tokens[k] = "\n%s(%s)" % (
                                            self.writer, token[1:].strip())
                                elif continuation:
                                    tokens[k] += ')'
                                    continuation = False

                            buf = "\n%s" % '\n'.join(tokens)
                            top.append(Node(buf, pre_extend=pre_extend))

                else:
                    # It is HTML so just include it.
                    buf = "\n%s(%r, escape=False)" % (self.writer, i)
                    top.append(Node(buf, pre_extend=pre_extend))

            # Remember: tag, not tag, tag, not tag
            in_tag = not in_tag

        # Make a list of items to remove from child
        to_rm = []

        # Go through each of the children nodes
        for node in self.child_super_nodes:
            # If we declared a block that this node wants to include
            if node.name in self.blocks:
                # Go ahead and include it!
                node.value = self.blocks[node.name]
                # Since we processed this child, we don't need to
                # pass it along to the parent
                to_rm.append(node)

        # Remove some of the processed nodes
        for node in to_rm:
            # Since this is a pointer, it works beautifully.
            # Sometimes I miss C-Style pointers... I want my asterisk...
            self.child_super_nodes.remove(node)

        # If we need to extend a template.
        if extend:
            self.extend(extend)

Example 80

Project: termite-visualizations
Source File: ttfonts.py
View license
    def extractInfo(self): 
        #################/
        # name - Naming table
        #################/
        self.sFamilyClass = 0
        self.sFamilySubClass = 0

        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if (format != 0):
            die("Unknown name table format " + format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1:'',2:'',3:'',4:'',6:''}
        K = names.keys()
        nameCount = len(names)
        for i in range(numRecords): 
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if (nameId not in K): continue
            N = ''
            if (platformId == 3 and encodingId == 1 and languageId == 0x409):  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                self.seek(string_data_offset + offset)
                if (length % 2 != 0):
                    die("PostScript name is UTF-16BE string of odd length")
                length /= 2
                N = ''
                while (length > 0):
                    char = self.read_ushort()
                    N += (chr(char))
                    length -= 1
                self._pos = opos
                self.seek(opos)
            
            elif (platformId == 1 and encodingId == 0 and languageId == 0):  # Macintosh, Roman, English, PS Name
                opos = self._pos
                N = self.get_chunk(string_data_offset + offset, length)
                self._pos = opos
                self.seek(opos)
            
            if (N and names[nameId]==''):
                names[nameId] = N
                nameCount -= 1
                if (nameCount==0): break
            
        
        if (names[6]):
            psName = names[6]
        elif (names[4]):
            psName = re.sub(' ','-',names[4])
        elif (names[1]):
            psName = re.sub(' ','-',names[1])
        else:
            psName = ''
        if (not psName):
            die("Could not find PostScript font name")
        self.name = psName
        if (names[1]):
            self.familyName = names[1]  
        else:  
            self.familyName = psName 
        if (names[2]):
            self.styleName = names[2]
        else:
            self.styleName = 'Regular' 
        if (names[4]):
            self.fullName = names[4]
        else:
            self.fullName = psName 
        if (names[3]):
            self.uniqueFontID = names[3]
        else:
            self.uniqueFontID = psName 
        if (names[6]):
            self.fullName = names[6] 

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(18) 
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = 1000 / float(unitsPerEm)
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = [(xMin*scale), (yMin*scale), (xMax*scale), (yMax*scale)]
        self.skip(3*2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()
        if (glyphDataFormat != 0):
            die('Unknown glyph data format ' + glyphDataFormat)

        #################/
        # hhea metrics table
        #################/
        # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
        if ("hhea" in self.tables):
            self.seek_table("hhea")
            self.skip(4)
            hheaAscender = self.read_short()
            hheaDescender = self.read_short()
            self.ascent = (hheaAscender *scale)
            self.descent = (hheaDescender *scale)
        

        #################/
        # OS/2 - OS/2 and Windows metrics table
        #################/
        if ("OS/2" in self.tables): 
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if (fsType == 0x0002 or (fsType & 0x0300) != 0): 
                die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
                self.restrictedUse = True
            
            self.skip(20)
            sF = self.read_short()
            self.sFamilyClass = (sF >> 8)
            self.sFamilySubClass = (sF & 0xFF)
            self._pos += 10  #PANOSE = 10 byte length
            panose = self.fh.read(10)
            self.skip(26)
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            if (not self.ascent): 
                self.ascent = (sTypoAscender*scale)
            if (not self.descent): 
                self.descent = (sTypoDescender*scale)
            if (version > 1):
                self.skip(16)
                sCapHeight = self.read_short()
                self.capHeight = (sCapHeight*scale)
            else:
                self.capHeight = self.ascent            
        
        else:
            usWeightClass = 500
            if (not self.ascent): self.ascent = (yMax*scale)
            if (not self.descent): self.descent = (yMin*scale)
            self.capHeight = self.ascent
        
        self.stemV = 50 + int(pow((usWeightClass / 65.0),2))

        #################/
        # post - PostScript table
        #################/
        self.seek_table("post")
        self.skip(4) 
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short() * scale
        self.underlineThickness = self.read_short() * scale
        isFixedPitch = self.read_ulong()

        self.flags = 4

        if (self.italicAngle!= 0):
            self.flags = self.flags | 64
        if (usWeightClass >= 600):
            self.flags = self.flags | 262144
        if (isFixedPitch):
            self.flags = self.flags | 1

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32) 
        metricDataFormat = self.read_ushort()
        if (metricDataFormat != 0):
            die('Unknown horizontal metric data format '.metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if (numberOfHMetrics == 0):
            die('Number of horizontal metrics is 0')

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0
        
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1) or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    if (not unicode_cmap_offset):
                        unicode_cmap_offset = cmap_offset + offset
                    break
                    
            self.seek(save_pos)
        
        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:    
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        #################/
        # hmtx - Horizontal metrics table
        #################/
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)

Example 81

Project: termite-visualizations
Source File: jsmin.py
View license
def _make_jsmin(extended=True, python_only=True):
    """
    Generate JS minifier based on `jsmin.c by Douglas Crockford`_

    .. _jsmin.c by Douglas Crockford:
       http://www.crockford.com/javascript/jsmin.c

    :Parameters:
      `extended` : ``bool``
        Extended Regexps? (using lookahead and lookbehind). This is faster,
        because it can be optimized way more. The regexps used with `extended`
        being false are only left here to allow easier porting to platforms
        without extended regex features (and for my own reference...)

      `python_only` : ``bool``
        Use only the python variant. If true, the c extension is not even
        tried to be loaded.

    :Return: Minifier
    :Rtype: ``callable``
    """
    # pylint: disable = R0912, R0914, W0612
    if not python_only:
        try:
            import _rjsmin
        except ImportError:
            pass
        else:
            return _rjsmin.jsmin
    try:
        xrange
    except NameError:
        xrange = range  # pylint: disable = W0622

    space_chars = r'[\000-\011\013\014\016-\040]'

    line_comment = r'(?://[^\r\n]*)'
    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    string1 = \
        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    strings = r'(?:%s|%s)' % (string1, string2)

    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    nospecial = r'[^/\\\[\r\n]'
    if extended:
        regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
            nospecial, charclass, nospecial
        )
    else:
        regex = (
            r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
        )
        regex = regex % (charclass, nospecial, charclass, nospecial)

    space = r'(?:%s|%s)' % (space_chars, space_comment)
    newline = r'(?:%s?[\r\n])' % line_comment

    def fix_charclass(result):
        """ Fixup string of chars to fit into a regex char class """
        pos = result.find('-')
        if pos >= 0:
            result = r'%s%s-' % (result[:pos], result[pos + 1:])

        def sequentize(string):
            """
            Notate consecutive characters as sequence

            (1-4 instead of 1234)
            """
            first, last, result = None, None, []
            for char in map(ord, string):
                if last is None:
                    first = last = char
                elif last + 1 == char:
                    last = char
                else:
                    result.append((first, last))
                    first = last = char
            if last is not None:
                result.append((first, last))
            return ''.join(['%s%s%s' % (
                chr(first),
                last > first + 1 and '-' or '',
                last != first and chr(last) or ''
            ) for first, last in result])

        return _re.sub(r'([\000-\040\047])',  # for better portability
                       lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
                                                              .replace('\\', '\\\\')
                                                              .replace('[', '\\[')
                                                              .replace(']', '\\]')
                                                              )
                       )

    def id_literal_(what):
        """ Make id_literal like char class """
        match = _re.compile(what).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return '[^%s]' % fix_charclass(result)

    def not_id_literal_(keep):
        """ Make negated id_literal like char class """
        match = _re.compile(id_literal_(keep)).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return r'[%s]' % fix_charclass(result)

    not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
    preregex1 = r'[(,=:\[!&|?{};\r\n]'
    preregex2 = r'%(not_id_literal)sreturn' % locals()

    if extended:
        id_literal = id_literal_(r'[a-zA-Z0-9_$]')
        id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_sub = _re.compile((
            r'([^\047"/\000-\040]+)'
            r'|(%(strings)s[^\047"/\000-\040]*)'
            r'|(?:(?<=%(preregex1)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
            r'|(?:(?<=%(preregex2)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
            r'|(?<=%(id_literal_close)s)'
            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
            r'(?=%(id_literal_open)s)'
            r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
            r'|%(space)s+'
            r'|(?:%(newline)s%(space)s*)+'
        ) % locals()).sub

        def space_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321, R0911
            groups = match.groups()
            if groups[0]:
                return groups[0]
            elif groups[1]:
                return groups[1]
            elif groups[2]:
                return groups[2]
            elif groups[3]:
                return groups[3]
            elif groups[4]:
                return '\n'
            elif groups[5]:
                return ' '
            else:
                return ''

        def jsmin(script):  # pylint: disable = W0621
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach which minifies the whole script with one big
            substitution regex.

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub(space_subber, '\n%s\n' % script).strip()

    else:
        pre_regex = r'(?:%(preregex1)s|%(preregex2)s)' % locals()
        not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_norm_sub = _re.compile((
            r'(%(strings)s)'
            r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
            r'|(%(space)s)+'
            r'|(?:(%(newline)s)%(space)s*)+'
        ) % locals()).sub

        def space_norm_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321
            groups = match.groups()
            if groups[0]:
                return groups[0]
            elif groups[1]:
                return groups[1].replace('\r', '\n') + groups[2]
            elif groups[3]:
                return ' '
            elif groups[4]:
                return '\n'

        space_sub1 = _re.compile((
            r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
            r'|\040(%(not_id_literal)s)'
            r'|\n(%(not_id_literal_open)s)'
        ) % locals()).sub

        def space_subber1(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2]

        space_sub2 = _re.compile((
            r'(%(strings)s)\040?'
            r'|(%(pre_regex)s%(regex)s)[\040\n]?'
            r'|(%(not_id_literal)s)\040'
            r'|(%(not_id_literal_close)s)\n'
        ) % locals()).sub

        def space_subber2(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2] or groups[3]

        def jsmin(script):
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach. The script is minified with three passes:

            normalization
                Control character are mapped to spaces, spaces and newlines
                are squeezed and comments are stripped.
            space removal 1
                Spaces before certain tokens are removed
            space removal 2
                Spaces after certain tokens are remove

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub2(space_subber2,
                              space_sub1(space_subber1,
                                         space_norm_sub(space_norm_subber,
                                                        '\n%s\n' % script)
                                         )
                              ).strip()
    return jsmin

Example 82

Project: termite-visualizations
Source File: template.py
View license
    def parse(self, text):

        # Basically, r_tag.split will split the text into
        # an array containing, 'non-tag', 'tag', 'non-tag', 'tag'
        # so if we alternate this variable, we know
        # what to look for. This is alternate to
        # line.startswith("{{")
        in_tag = False
        extend = None
        pre_extend = True

        # Use a list to store everything in
        # This is because later the code will "look ahead"
        # for missing strings or brackets.
        ij = self.r_tag.split(text)
        # j = current index
        # i = current item
        stack = self.stack
        for j in range(len(ij)):
            i = ij[j]

            if i:
                if not stack:
                    self._raise_error('The "end" tag is unmatched, please check if you have a starting "block" tag')

                # Our current element in the stack.
                top = stack[-1]

                if in_tag:
                    line = i

                    # Get rid of delimiters
                    line = line[len(self.delimiters[0]):-len(self.delimiters[1])].strip()

                    # This is bad juju, but let's do it anyway
                    if not line:
                        continue

                    # We do not want to replace the newlines in code,
                    # only in block comments.
                    def remove_newline(re_val):
                        # Take the entire match and replace newlines with
                        # escaped newlines.
                        return re_val.group(0).replace('\n', '\\n')

                    # Perform block comment escaping.
                    # This performs escaping ON anything
                    # in between """ and """
                    line = sub(TemplateParser.r_multiline,
                               remove_newline,
                               line)

                    if line.startswith('='):
                        # IE: {{=response.title}}
                        name, value = '=', line[1:].strip()
                    else:
                        v = line.split(' ', 1)
                        if len(v) == 1:
                            # Example
                            # {{ include }}
                            # {{ end }}
                            name = v[0]
                            value = ''
                        else:
                            # Example
                            # {{ block pie }}
                            # {{ include "layout.html" }}
                            # {{ for i in range(10): }}
                            name = v[0]
                            value = v[1]

                    # This will replace newlines in block comments
                    # with the newline character. This is so that they
                    # retain their formatting, but squish down to one
                    # line in the rendered template.

                    # First check if we have any custom lexers
                    if name in self.lexers:
                        # Pass the information to the lexer
                        # and allow it to inject in the environment

                        # You can define custom names such as
                        # '{{<<variable}}' which could potentially
                        # write unescaped version of the variable.
                        self.lexers[name](parser=self,
                                          value=value,
                                          top=top,
                                          stack=stack)

                    elif name == '=':
                        # So we have a variable to insert into
                        # the template
                        buf = "\n%s(%s)" % (self.writer, value)
                        top.append(Node(buf, pre_extend=pre_extend))

                    elif name == 'block' and not value.startswith('='):
                        # Make a new node with name.
                        node = BlockNode(name=value.strip(),
                                         pre_extend=pre_extend,
                                         delimiters=self.delimiters)

                        # Append this node to our active node
                        top.append(node)

                        # Make sure to add the node to the stack.
                        # so anything after this gets added
                        # to this node. This allows us to
                        # "nest" nodes.
                        stack.append(node)

                    elif name == 'end' and not value.startswith('='):
                        # We are done with this node.

                        # Save an instance of it
                        self.blocks[top.name] = top

                        # Pop it.
                        stack.pop()

                    elif name == 'super' and not value.startswith('='):
                        # Get our correct target name
                        # If they just called {{super}} without a name
                        # attempt to assume the top blocks name.
                        if value:
                            target_node = value
                        else:
                            target_node = top.name

                        # Create a SuperNode instance
                        node = SuperNode(name=target_node,
                                         pre_extend=pre_extend)

                        # Add this to our list to be taken care of
                        self.super_nodes.append(node)

                        # And put in in the tree
                        top.append(node)

                    elif name == 'include' and not value.startswith('='):
                        # If we know the target file to include
                        if value:
                            self.include(top, value)

                        # Otherwise, make a temporary include node
                        # That the child node will know to hook into.
                        else:
                            include_node = BlockNode(
                                name='__include__' + self.name,
                                pre_extend=pre_extend,
                                delimiters=self.delimiters)
                            top.append(include_node)

                    elif name == 'extend' and not value.startswith('='):
                        # We need to extend the following
                        # template.
                        extend = value
                        pre_extend = False

                    else:
                        # If we don't know where it belongs
                        # we just add it anyways without formatting.
                        if line and in_tag:

                            # Split on the newlines >.<
                            tokens = line.split('\n')

                            # We need to look for any instances of
                            # for i in range(10):
                            #   = i
                            # pass
                            # So we can properly put a response.write() in place.
                            continuation = False
                            len_parsed = 0
                            for k, token in enumerate(tokens):

                                token = tokens[k] = token.strip()
                                len_parsed += len(token)

                                if token.startswith('='):
                                    if token.endswith('\\'):
                                        continuation = True
                                        tokens[k] = "\n%s(%s" % (
                                            self.writer, token[1:].strip())
                                    else:
                                        tokens[k] = "\n%s(%s)" % (
                                            self.writer, token[1:].strip())
                                elif continuation:
                                    tokens[k] += ')'
                                    continuation = False

                            buf = "\n%s" % '\n'.join(tokens)
                            top.append(Node(buf, pre_extend=pre_extend))

                else:
                    # It is HTML so just include it.
                    buf = "\n%s(%r, escape=False)" % (self.writer, i)
                    top.append(Node(buf, pre_extend=pre_extend))

            # Remember: tag, not tag, tag, not tag
            in_tag = not in_tag

        # Make a list of items to remove from child
        to_rm = []

        # Go through each of the children nodes
        for node in self.child_super_nodes:
            # If we declared a block that this node wants to include
            if node.name in self.blocks:
                # Go ahead and include it!
                node.value = self.blocks[node.name]
                # Since we processed this child, we don't need to
                # pass it along to the parent
                to_rm.append(node)

        # Remove some of the processed nodes
        for node in to_rm:
            # Since this is a pointer, it works beautifully.
            # Sometimes I miss C-Style pointers... I want my asterisk...
            self.child_super_nodes.remove(node)

        # If we need to extend a template.
        if extend:
            self.extend(extend)

Example 83

Project: LTLMoP
Source File: specCompiler.py
View license
    def _writeLTLFile(self):

        self.LTL2SpecLineNumber = None

        #regionList = [r.name for r in self.parser.proj.rfi.regions]
        regionList = [r.name for r in self.proj.rfi.regions]
        sensorList = deepcopy(self.proj.enabled_sensors)
        robotPropList = self.proj.enabled_actuators + self.proj.all_customs

        text = self.proj.specText

        response = None

        # Create LTL using selected parser
        # TODO: rename decomposition object to something other than 'parser'
        if self.proj.compile_options["parser"] == "slurp":
            # default to no region tags if no simconfig is defined, so we can compile without
            if self.proj.current_config == "":
                region_tags = {}
            else:
                self.hsub = handlerSubsystem.HandlerSubsystem(None, self.proj.project_root)
                config, success = self.hsub.loadConfigFile(self.proj.current_config)
                if success: self.hsub.configs.append(config)
                self.hsub.setExecutingConfig(self.proj.current_config)

                region_tags = self.hsub.executing_config.region_tags

            # Hack: We need to make sure there's only one of these
            global _SLURP_SPEC_GENERATOR

            # Make a new specgenerator and have it process the text
            if not _SLURP_SPEC_GENERATOR:
                # Add SLURP to path for import
                p = os.path.dirname(os.path.abspath(__file__))
                sys.path.append(os.path.join(p, "..", "etc", "SLURP"))
                from ltlbroom.specgeneration import SpecGenerator
                _SLURP_SPEC_GENERATOR = SpecGenerator()

            # Filter out regions it shouldn't know about
            filtered_regions = [region.name for region in self.proj.rfi.regions
                                if not (region.isObstacle or region.name.lower() == "boundary")]
            LTLspec_env, LTLspec_sys, self.proj.internal_props, internal_sensors, results, responses, traceback = \
                _SLURP_SPEC_GENERATOR.generate(text, sensorList, filtered_regions, robotPropList, region_tags)

            oldspec_env = LTLspec_env
            oldspec_sys = LTLspec_sys

            for ln, result in enumerate(results):
                if not result:
                    logging.warning("Could not parse the sentence in line {0}".format(ln))

            # Abort compilation if there were any errors
            if not all(results):
                return None, None, responses

            # Add in the sensors so they go into the SMV and spec files
            for s in internal_sensors:
                if s not in sensorList:
                    sensorList.append(s)
                    self.proj.all_sensors.append(s)
                    self.proj.enabled_sensors.append(s)

            # Conjoin all the spec chunks
            LTLspec_env = '\t\t' + ' & \n\t\t'.join(LTLspec_env)
            LTLspec_sys = '\t\t' + ' & \n\t\t'.join(LTLspec_sys)

            if self.proj.compile_options["decompose"]:
                # substitute decomposed region names
                for r in self.proj.rfi.regions:
                    if not (r.isObstacle or r.name.lower() == "boundary"):
                        LTLspec_env = re.sub('\\bs\.' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_env)
                        LTLspec_env = re.sub('\\be\.' + r.name + '\\b', "("+' | '.join(["e."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_env)
                        LTLspec_sys = re.sub('\\bs\.' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_sys)
                        LTLspec_sys = re.sub('\\be\.' + r.name + '\\b', "("+' | '.join(["e."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_sys)

            response = responses

        elif self.proj.compile_options["parser"] == "ltl":
            # delete comments
            text = re.sub(r"#.*$", "", text, flags=re.MULTILINE)

            # split into env and sys parts (by looking for a line of just dashes in between)
            LTLspec_env, LTLspec_sys = re.split(r"^\s*-+\s*$", text, maxsplit=1, flags=re.MULTILINE)

            # split into subformulas
            LTLspec_env = re.split(r"(?:[ \t]*[\n\r][ \t]*)+", LTLspec_env)
            LTLspec_sys = re.split(r"(?:[ \t]*[\n\r][ \t]*)+", LTLspec_sys)

            # remove any empty initial entries (HACK?)
            while '' in LTLspec_env:
                LTLspec_env.remove('')
            while '' in LTLspec_sys:
                LTLspec_sys.remove('')

            # automatically conjoin all the subformulas
            LTLspec_env = '\t\t' + ' & \n\t\t'.join(LTLspec_env)
            LTLspec_sys = '\t\t' + ' & \n\t\t'.join(LTLspec_sys)

            if self.proj.compile_options["decompose"]:
                # substitute decomposed region
                for r in self.proj.rfi.regions:
                    if not (r.isObstacle or r.name.lower() == "boundary"):
                        LTLspec_env = re.sub('\\b(?:s\.)?' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_env)
                        LTLspec_sys = re.sub('\\b(?:s\.)?' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_sys)
            else:
                for r in self.proj.rfi.regions:
                    if not (r.isObstacle or r.name.lower() == "boundary"):
                        LTLspec_env = re.sub('\\b(?:s\.)?' + r.name + '\\b', "s."+r.name, LTLspec_env)
                        LTLspec_sys = re.sub('\\b(?:s\.)?' + r.name + '\\b', "s."+r.name, LTLspec_sys)

            traceback = [] # HACK: needs to be something other than None
        elif self.proj.compile_options["parser"] == "structured":
            import parseEnglishToLTL

            if self.proj.compile_options["decompose"]:
                # substitute the regions name in specs
                for m in re.finditer(r'near (?P<rA>\w+)', text):
                    text=re.sub(r'near (?P<rA>\w+)', "("+' or '.join(["s."+r for r in self.parser.proj.regionMapping['near$'+m.group('rA')+'$'+str(50)]])+")", text)
                for m in re.finditer(r'within (?P<dist>\d+) (from|of) (?P<rA>\w+)', text):
                    text=re.sub(r'within ' + m.group('dist')+' (from|of) '+ m.group('rA'), "("+' or '.join(["s."+r for r in self.parser.proj.regionMapping['near$'+m.group('rA')+'$'+m.group('dist')]])+")", text)
                for m in re.finditer(r'between (?P<rA>\w+) and (?P<rB>\w+)', text):
                    text=re.sub(r'between ' + m.group('rA')+' and '+ m.group('rB'),"("+' or '.join(["s."+r for r in self.parser.proj.regionMapping['between$'+m.group('rA')+'$and$'+m.group('rB')+"$"]])+")", text)

                # substitute decomposed region
                for r in self.proj.rfi.regions:
                    if not (r.isObstacle or r.name.lower() == "boundary"):
                        text = re.sub('\\b' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", text)

                regionList = ["s."+x.name for x in self.parser.proj.rfi.regions]
            else:
                for r in self.proj.rfi.regions:
                    if not (r.isObstacle or r.name.lower() == "boundary"):
                        text = re.sub('\\b' + r.name + '\\b', "s."+r.name, text)

                regionList = ["s."+x.name for x in self.proj.rfi.regions]

            spec, traceback, failed, self.LTL2SpecLineNumber, self.proj.internal_props = parseEnglishToLTL.writeSpec(text, sensorList, regionList, robotPropList)

            # Abort compilation if there were any errors
            if failed:
                return None, None, None

            LTLspec_env = spec["EnvInit"] + spec["EnvTrans"] + spec["EnvGoals"]
            LTLspec_sys = spec["SysInit"] + spec["SysTrans"] + spec["SysGoals"]

        else:
            logging.error("Parser type '{0}' not currently supported".format(self.proj.compile_options["parser"]))
            return None, None, None

        if self.proj.compile_options["decompose"]:
            regionList = [x.name for x in self.parser.proj.rfi.regions]
        else:
            regionList = [x.name for x in self.proj.rfi.regions]

        if self.proj.compile_options["use_region_bit_encoding"]:
            # Define the number of bits needed to encode the regions
            numBits = int(math.ceil(math.log(len(regionList),2)))

            # creating the region bit encoding
            bitEncode = bitEncoding(len(regionList),numBits)
            currBitEnc = bitEncode['current']
            nextBitEnc = bitEncode['next']

            # switch to bit encodings for regions
            LTLspec_env = replaceRegionName(LTLspec_env, bitEncode, regionList)
            LTLspec_sys = replaceRegionName(LTLspec_sys, bitEncode, regionList)

            if self.LTL2SpecLineNumber is not None:
                for k in self.LTL2SpecLineNumber.keys():
                    new_k = replaceRegionName(k, bitEncode, regionList)
                    if new_k != k:
                        self.LTL2SpecLineNumber[new_k] = self.LTL2SpecLineNumber[k]
                        del self.LTL2SpecLineNumber[k]

        if self.proj.compile_options["decompose"]:
            adjData = self.parser.proj.rfi.transitions
        else:
            adjData = self.proj.rfi.transitions

        # Store some data needed for later analysis
        self.spec = {}
        if self.proj.compile_options["decompose"]:
            self.spec['Topo'] = createTopologyFragment(adjData, self.parser.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
        else:
            self.spec['Topo'] = createTopologyFragment(adjData, self.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])

        # Substitute any macros that the parsers passed us
        LTLspec_env = self.substituteMacros(LTLspec_env)
        LTLspec_sys = self.substituteMacros(LTLspec_sys)

        # If we are not using bit-encoding, we need to
        # explicitly encode a mutex for regions
        if not self.proj.compile_options["use_region_bit_encoding"]:
            # DNF version (extremely slow for core-finding)
            #mutex = "\n\t&\n\t []({})".format(" | ".join(["({})".format(" & ".join(["s."+r2.name if r is r2 else "!s."+r2.name for r2 in self.parser.proj.rfi.regions])) for r in self.parser.proj.rfi.regions]))

            if self.proj.compile_options["decompose"]:
                region_list = self.parser.proj.rfi.regions
            else:
                region_list = self.proj.rfi.regions

            # Almost-CNF version
            exclusions = []
            for i, r1 in enumerate(region_list):
                for r2 in region_list[i+1:]:
                    exclusions.append("!(s.{} & s.{})".format(r1.name, r2.name))
            mutex = "\n&\n\t []({})".format(" & ".join(exclusions))
            LTLspec_sys += mutex

        self.spec.update(self.splitSpecIntoComponents(LTLspec_env, LTLspec_sys))

        # Add in a fragment to make sure that we start in a valid region
        if self.proj.compile_options["decompose"]:
            self.spec['InitRegionSanityCheck'] = createInitialRegionFragment(self.parser.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
        else:
            self.spec['InitRegionSanityCheck'] = createInitialRegionFragment(self.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
        LTLspec_sys += "\n&\n" + self.spec['InitRegionSanityCheck']

        LTLspec_sys += "\n&\n" + self.spec['Topo']

        createLTLfile(self.proj.getFilenamePrefix(), LTLspec_env, LTLspec_sys)

        if self.proj.compile_options["parser"] == "slurp":
            self.reversemapping = {self.postprocessLTL(line,sensorList,robotPropList).strip():line.strip() for line in oldspec_env + oldspec_sys}
            self.reversemapping[self.spec['Topo'].replace("\n","").replace("\t","").lstrip().rstrip("\n\t &")] = "TOPOLOGY"

        #for k,v in self.reversemapping.iteritems():
        #    print "{!r}:{!r}".format(k,v)

        return self.spec, traceback, response

Example 84

Project: viper
Source File: emailparse.py
View license
    def run(self, *args):

        def string_clean(value):
            if value:
                return re.sub('[\n\t\r]', '', value)
            return ""

        def parse_ole_msg(ole):
            stream_dirs = ole.listdir()
            for stream in stream_dirs:
                # get stream that contains the email header
                if stream[0].startswith('__substg1.0_007D'):
                    email_header = ole.openstream(stream).read()
                    if stream[0].endswith('001F'):  # Unicode probably needs something better than just stripping \x00
                        email_header = email_header.replace('\x00', '')
            # If it came from outlook we may need to trim some lines
            try:
                email_header = email_header.split('Version 2.0\x0d\x0a', 1)[1]
            except:
                pass

            # Leaving us an RFC compliant email to parse
            msg = email.message_from_string(email_header)
            return msg

        def parse_ole_attachments(ole):
            # Hard part now, each part of the attachment is in a seperate stream

            # need to get a unique stream id for each att
            # its in the streamname as an 8 digit number.
            for i in range(20):  # arbitrary count of emails. i dont expecet this many
                stream_number = str(i).zfill(8)
                stream_name = '__attach_version1.0_#' + stream_number
                # Unicode
                try:
                    att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read()
                    att_mime = ole.openstream(stream_name + '/__substg1.0_370E001F').read()
                    att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
                    att_size = len(att_data)
                    att_md5 = hashlib.md5(att_data).hexdigest()
                    print i, att_size, att_md5, att_filename, att_mime
                except:
                    pass
                # ASCII
                try:
                    att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read()
                    att_mime = ole.openstream(stream_name + '/__substg1.0_370E001E').read()
                    att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
                    att_size = len(att_data)
                    att_md5 = hashlib.md5(att_data).hexdigest()
                    print i, att_size, att_md5, att_filename, att_mime
                except:
                    pass

        def att_session(att_id, msg, ole_flag):
            att_count = 0
            if ole_flag:
                ole = msg
                # Hard part now, each part of the attachment is in a seperate stream

                # need to get a unique stream id for each att
                # its in the streamname as an 8 digit number.
                for i in range(20):  # arbitrary count of emails. i dont expecet this many
                    stream_number = str(i).zfill(8)
                    stream_name = '__attach_version1.0_#' + stream_number
                    # Unicode
                    try:
                        att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read()
                        att_filename = att_filename.replace('\x00', '')
                        att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
                    except:
                        pass
                    # ASCII
                    try:
                        att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read()
                        att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
                    except:
                        pass
                    if i == att_id:
                        self.log('info', "Switching session to {0}".format(att_filename))
                        tmp_path = os.path.join(tempfile.gettempdir(), att_filename)
                        with open(tmp_path, 'w') as tmp:
                            tmp.write(att_data)
                        __sessions__.new(tmp_path)
                        return

            else:
                for part in msg.walk():
                    if part.get_content_type() == 'message/rfc822':
                        rfc822 = True
                    else:
                        rfc822 = False

                    if part.get_content_maintype() == 'multipart' \
                        or not part.get('Content-Disposition') \
                            and not rfc822:
                        continue

                    att_count += 1
                    if att_count == att_id:
                        if rfc822:
                            data = part.as_string()
                            m = re.match("Content-Type: message/rfc822\r?\n\r?\n(.*)", data, flags=re.S)
                            if not m:
                                self.log('error', "Could not extract RFC822 formatted message")
                                return
                            data = m.group(1)
                            att_size = len(data)
                            filename = "rfc822msg_{0}.eml".format(att_size)
                        else:
                            data = part.get_payload(decode=True)
                            filename = part.get_filename()

                        self.log('info', "Switching session to {0}".format(filename))

                        if data:
                            tmp_path = os.path.join(tempfile.gettempdir(), filename)
                            with open(tmp_path, 'w') as tmp:
                                tmp.write(data)
                            __sessions__.new(tmp_path)
                            return

        def email_envelope(msg):
            # Envelope
            self.log('info', "Email envelope:")
            rows = [
                ['Subject', msg.get("Subject")],
                ['To', msg.get("To")],
                ['From', msg.get("From")],
                ['Cc', msg.get("Cc")],
                ['Bcc', msg.get("Bcc")],
                ['Date', msg.get("Date")]
            ]
            self.log('table', dict(header=['Key', 'Value'], rows=rows))
            return

        def email_header(msg):
            # Headers
            rows = []
            for x in msg.keys():
                # Adding Received to ignore list. this has to be handeled separately if there are more then one line
                if x not in ['Subject', 'From', 'To', 'Date', 'Cc', 'Bcc', 'DKIM-Signature', 'Received']:
                    rows.append([x, string_clean(msg.get(x))])
            for x in msg.get_all('Received'):
                rows.append(['Received', string_clean(x)])
            self.log('info', "Email headers:")
            rows = sorted(rows, key=lambda entry: entry[0])
            self.log('table', dict(header=['Key', 'Value'], rows=rows))
            return

        def email_trace(msg, verbose):
            rows = []
            if verbose:
                fields = ['from', 'by', 'with', 'id', 'for', 'timestamp']
            else:
                fields = ['from', 'by', 'timestamp']
            for x in msg.get_all('Received'):
                x = string_clean(x)
                cre = re.compile("""
                    (?: from \s+ (?P<from>.*?) (?=by|with|id|ID|for|;|$) )?
                    (?: by \s+ (?P<by>.*?) (?=with|id|ID|for|;|$) )?
                    (?: with \s+ (?P<with>.*?) (?=id|ID|for|;|$) )?
                    (?: (id|ID) \s+ (?P<id>.*?) (?=for|;|$) )?
                    (?: for \s+ (?P<for>.*?) (?=;|$) )?
                    (?: \s* ; \s* (?P<timestamp>.*) )?
                    """, flags=re.X | re.I)
                m = cre.search(x)
                if not m:
                    self.log('error', "Received header regex didn't match")
                    return
                t = []
                for groupname in fields:
                    t.append(string_clean(m.group(groupname)))
                rows.insert(0, t)
            self.log('info', "Email path trace:")
            self.log('table', dict(header=fields, rows=rows))
            return

        def email_spoofcheck(msg, dnsenabled):
            self.log('info', "Email spoof check:")

            # test 1: check if From address is the same as Sender, Reply-To, and Return-Path
            rows = [
                ['Sender', string_clean(msg.get("Sender"))],
                ['From', string_clean(msg.get("From"))],
                ['Reply-To', string_clean(msg.get("Reply-To"))],
                ['Return-Path', string_clean(msg.get("Return-Path"))]
            ]
            self.log('table', dict(header=['Key', 'Value'], rows=rows))
            addr = {
                'Sender': email.utils.parseaddr(string_clean(msg.get("Sender")))[1],
                'From': email.utils.parseaddr(string_clean(msg.get("From")))[1],
                'Reply-To': email.utils.parseaddr(string_clean(msg.get("Reply-To")))[1],
                'Return-Path': email.utils.parseaddr(string_clean(msg.get("Return-Path")))[1]
            }
            if (addr['From'] == ''):
                self.log('error', "No From address!")
                return
            elif addr['Sender'] and (addr['From'] != addr['Sender']):
                self.log('warning', "Email FAILED: From address different than Sender")
            elif addr['Reply-To'] and (addr['From'] != addr['Reply-To']):
                self.log('warning', "Email FAILED: From address different than Reply-To")
            elif addr['Return-Path'] and (addr['From'] != addr['Return-Path']):
                self.log('warning', "Email FAILED: From address different than Return-Path")
            else:
                self.log('success', "Email PASSED: From address the same as Sender, Reply-To, and Return-Path")

            # test 2: check to see if first Received: by domain matches sender MX domain
            if not dnsenabled:
                self.log('info', "Unable to run Received by / sender check without dnspython available")
            else:
                r = msg.get_all('Received')[-1]
                m = re.search("by\s+(\S*?)(?:\s+\(.*?\))?\s+with", r)
                if not m:
                    self.log('error', "Received header regex didn't match")
                    return
                byname = m.group(1)
                # this can be either a name or an IP
                m = re.search("(\w+\.\w+|\d+\.\d+\.\d+\.\d+)$", byname)
                if not m:
                    self.log('error', "Could not find domain or IP in Received by field")
                    return
                bydomain = m.group(1)
                domains = [['Received by', bydomain]]
                # if it's an IP, do the reverse lookup
                m = re.search("\.\d+$", bydomain)
                if m:
                    bydomain = str(dns.reversename.from_address(bydomain)).strip('.')
                    domains.append(['Received by reverse lookup', bydomain])
                # if the email has a Sender header, use that
                if (addr['Sender'] != ""):
                    m = re.search("(\w+\.\w+)$", addr['Sender'])
                    if not m:
                        self.log('error', "Sender header regex didn't match")
                        return
                    fromdomain = m.group(1)
                    domains.append(['Sender', fromdomain])
                # otherwise, use the From header
                else:
                    m = re.search("(\w+\.\w+)$", addr['From'])
                    if not m:
                        self.log('error', "From header regex didn't match")
                        return
                    fromdomain = m.group(1)
                    domains.append(['From', fromdomain])

                bymatch = False
                try:
                    mx = dns.resolver.query(fromdomain, 'MX')
                    if mx :
                        for rdata in mx:
                            m = re.search("(\w+\.\w+).$", str(rdata.exchange))
                            if not m:
                                self.log('error', "MX domain regex didn't match")
                                continue
                            domains.append(['MX for ' + fromdomain, m.group(1)])
                            if bydomain == m.group(1):
                                bymatch = True
                    self.log('table', dict(header=['Key', 'Value'], rows=domains))
                except:
                    domains.append(['MX for ' + fromdomain, "not registered in DNS"])
                    self.log('table', dict(header=['Key', 'Value'], rows=domains))
                if bymatch:
                    self.log('success', "Email PASSED: Received by domain found in Sender/From MX domains")
                else:
                    self.log('warning', "Email FAILED: Could not match Received by domain to Sender/From MX")

            # test 3: look at SPF records
            rspf = []
            results = set()
            allspf = msg.get_all('Received-SPF')
            if not allspf:
                return
            for spf in allspf:
                # self.log('info', string_clean(spf))
                m = re.search("\s*(\w+)\s+\((.*?):\s*(.*?)\)\s+(.*);", string_clean(spf))
                if not m:
                    self.log('error', "Received-SPF regex didn't match")
                    return
                rspf.append([m.group(2), m.group(1), m.group(3), m.group(4)])
                results = results | {m.group(1)}
            self.log('table', dict(header=['Domain', 'Action', 'Info', 'Additional'], rows=rspf))
            if results & {'fail', 'softfail'}:
                self.log('warning', "Email FAILED: Found fail or softfail SPF results")
            elif results & {'none', 'neutral'}:
                self.log('warning', "Email NEUTRAL: Found none or neutral SPF results")
            elif results & {'permerror', 'temperror'}:
                self.log('warning', "Email NEUTRAL: Found error condition")
            elif results & {'pass'}:
                self.log('success', "Email PASSED: Found SPF pass result")

            return

        def email_attachments(msg, ole_flag):
            # Attachments
            att_count = 0
            rows = []
            links = []
            if ole_flag:
                ole = msg
                # Hard part now, each part of the attachment is in a seperate stream

                # need to get a unique stream id for each att
                # its in the streamname as an 8 digit number.
                for i in range(20):  # arbitrary count of emails. i dont expecet this many
                    stream_number = str(i).zfill(8)
                    stream_name = '__attach_version1.0_#' + stream_number
                    # Unicode
                    try:
                        att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read()
                        att_mime = ole.openstream(stream_name + '/__substg1.0_370E001F').read()
                        att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
                        att_size = len(att_data)
                        att_md5 = hashlib.md5(att_data).hexdigest()
                        rows.append([i, att_filename, att_mime, att_size, att_md5])
                        att_count += 1
                    except:
                        pass
                    # ASCII
                    try:
                        att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read()
                        att_mime = ole.openstream(stream_name + '/__substg1.0_370E001E').read()
                        att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
                        att_size = len(att_data)
                        att_md5 = hashlib.md5(att_data).hexdigest()
                        rows.append([i, att_filename, att_mime, att_size, att_md5])
                        att_count += 1
                    except:
                        pass

            else:
                # Walk through email string.
                for part in msg.walk():
                    content_type = part.get_content_type()

                    if content_type == 'multipart':
                        continue

                    if content_type in ('text/plain', 'text/html'):
                        part_content = part.get_payload(decode=True)
                        for link in re.findall(r'(https?://[^"<>\s]+)', part_content):
                            if link not in links:
                                links.append(link)

                    if content_type == 'message/rfc822':
                        part_content = part.as_string()
                        m = re.match("Content-Type: message/rfc822\r?\n\r?\n(.*)", part_content, flags=re.S)
                        if not m:
                            self.log('error', "Could not extract RFC822 formatted message")
                            return
                        part_content = m.group(1)
                        att_size = len(part_content)
                        att_file_name = "rfc822msg_{0}.eml".format(att_size)
                        att_md5 = hashlib.md5(part_content).hexdigest()
                        att_count += 1
                        rows.append([att_count, att_file_name, content_type, att_size, att_md5])
                        continue

                    if not part.get('Content-Disposition'):
                        # These are not attachments.
                        continue

                    att_file_name = part.get_filename()
                    att_size = len(part_content)

                    if not att_file_name:
                        continue

                    att_data = part.get_payload(decode=True)
                    att_md5 = hashlib.md5(att_data).hexdigest()
                    att_count += 1
                    rows.append([att_count, att_file_name, part.get_content_type(), att_size, att_md5])

            self.log('info', "Email attachments (total: {0}):".format(att_count))
            if att_count > 0:
                self.log('table', dict(header=['ID', 'FileName', 'Content Type', 'File Size', 'MD5'], rows=rows))

            self.log('info', "Email links:")
            for link in links:
                self.log('item', link)
            return

        # Start Here
        if not __sessions__.is_set():
            self.log('error', "No open session")
            return

        super(EmailParse, self).run(*args)
        if self.args is None:
            return

        # see if we can load the dns library for MX lookup spoof detecton
        try:
            import dns.resolver
            import dns.reversename
            dnsenabled = True
        except ImportError:
            dnsenabled = False

        # Try to open as an ole msg, if not treat as email string
        try:
            ole = olefile.OleFileIO(__sessions__.current.file.path)
            ole_flag = True
        except:
            ole_flag = False
            email_handle = open(__sessions__.current.file.path)
            msg = email.message_from_file(email_handle)
            email_handle.close()

        if self.args.open is not None:
            if ole_flag:
                msg = ole
            att_session(self.args.open, msg, ole_flag)
        elif self.args.envelope:
            if ole_flag:
                msg = parse_ole_msg(ole)
            email_envelope(msg)
        elif self.args.attach:
            if ole_flag:
                msg = ole
            email_attachments(msg, ole_flag)
        elif self.args.header:
            if ole_flag:
                msg = parse_ole_msg(ole)
            email_header(msg)
        elif self.args.trace:
            if ole_flag:
                msg = parse_ole_msg(ole)
            email_trace(msg, False)
        elif self.args.traceall:
            if ole_flag:
                msg = parse_ole_msg(ole)
            email_trace(msg, True)
        elif self.args.spoofcheck:
            if ole_flag:
                msg = parse_ole_msg(ole)
            email_spoofcheck(msg, dnsenabled)
        elif self.args.all:
            if ole_flag:
                msg = parse_ole_msg(ole)
            email_envelope(msg)
            email_header(msg)
            email_trace(msg, True)
            email_spoofcheck(msg, dnsenabled)
            if ole_flag:
                msg = ole
            email_attachments(msg, ole_flag)
        else:
            self.log('error', 'At least one of the parameters is required')
            self.usage()

Example 85

Project: frescobaldi
Source File: vocal.py
View license
    def build(self, data, builder):
        # normalize voicing
        staves = self.voicing.currentText().upper()
        # remove unwanted characters
        staves = re.sub(r'[^SATB-]+', '', staves)
        # remove double hyphens, and from begin and end
        staves = re.sub('-+', '-', staves).strip('-')
        if not staves:
            return
        
        splitStaves = staves.split('-')
        numStaves = len(splitStaves)
        staffCIDs = collections.defaultdict(int)    # number same-name staff Context-IDs
        voiceCounter = collections.defaultdict(int) # dict to number same voice types
        maxNumVoices = max(map(len, splitStaves))   # largest number of voices
        numStanzas = self.stanzas.value()
        lyrics = collections.defaultdict(list)      # lyrics grouped by stanza number
        pianoReduction = collections.defaultdict(list)
        rehearsalMidis = []
        
        p = ly.dom.ChoirStaff()
        choir = ly.dom.Sim(p)
        data.nodes.append(p)
        
        # print main instrumentName if there are more choirs, and we
        # have more than one staff.
        if numStaves > 1 and data.num:
            builder.setInstrumentNames(p,
                builder.instrumentName(lambda _: _("Choir"), data.num),
                builder.instrumentName(lambda _: _("abbreviation for Choir", "Ch."), data.num))
        
        # get the preferred way of adding lyrics
        lyrAllSame, lyrEachSame, lyrEachDiff, lyrSpread = (
            self.lyrics.currentIndex() == i for i in range(4))
        lyrEach = lyrEachSame or lyrEachDiff
        
        # stanzas to print (0 = don't print stanza number):
        if numStanzas == 1:
            allStanzas = [0]
        else:
            allStanzas = list(range(1, numStanzas + 1))
        
        # Which stanzas to print where:
        if lyrSpread and numStanzas > 1 and numStaves > 2:
            spaces = numStaves - 1
            count, rest = divmod(max(numStanzas, spaces), spaces)
            stanzaSource = itertools.cycle(allStanzas)
            stanzaGroups = (itertools.islice(stanzaSource, num)
                            for num in itertools.chain(
                                itertools.repeat(count + 1, rest),
                                itertools.repeat(count, numStaves - rest)))
        else:
            stanzaGroups = itertools.repeat(allStanzas, numStaves)
        
        # a function to set staff affinity (in LilyPond 2.13.4 and above):
        if builder.lyVersion >= (2, 13, 4):
            def setStaffAffinity(context, affinity):
                ly.dom.Line("\\override VerticalAxisGroup "
                     "#'staff-affinity = #" + affinity, context.getWith())
        else:
            def setStaffAffinity(lyricsContext, affinity):
                pass
        
        # a function to make a column markup:
        if builder.lyVersion >= (2, 11, 57):
            columnCommand = 'center-column'
        else:
            columnCommand = 'center-align'
        def makeColumnMarkup(names):
            node = ly.dom.Markup()
            column = ly.dom.MarkupEnclosed(columnCommand, node)
            for name in names:
                ly.dom.QuotedString(name, column)
            return node
        
        stavesLeft = numStaves
        for staff, stanzas in zip(splitStaves, stanzaGroups):
            # are we in the last staff?
            stavesLeft -= 1
            # the number of voices in this staff
            numVoices = len(staff)
            # sort the letters in order SATB
            staff = ''.join(i * staff.count(i) for i in 'SATB')
            # Create the staff for the voices
            s = ly.dom.Staff(parent=choir)
            builder.setMidiInstrument(s, self.midiInstrument)
            
            # Build a list of the voices in this staff.
            # Each entry is a tuple(name, num).
            # name is one of 'S', 'A', 'T', or 'B'
            # num is an integer: 0 when a voice occurs only once, or >= 1 when
            # there are more voices of the same type (e.g. Soprano I and II)
            voices = []
            for voice in staff:
                if staves.count(voice) > 1:
                    voiceCounter[voice] += 1
                voices.append((voice, voiceCounter[voice]))
            
            # Add the instrument names to the staff:
            if numVoices == 1:
                voice, num = voices[0]
                longName = builder.instrumentName(voice2Voice[voice].title, num)
                shortName = builder.instrumentName(voice2Voice[voice].short, num)
                builder.setInstrumentNames(s, longName, shortName)
            else:
                # stack instrument names (long and short) in a markup column.
                # long names
                longNames = makeColumnMarkup(
                    builder.instrumentName(voice2Voice[voice].title, num) for voice, num in voices)
                shortNames = makeColumnMarkup(
                    builder.instrumentName(voice2Voice[voice].short, num) for voice, num in voices)
                builder.setInstrumentNames(s, longNames, shortNames)
            
            # Make the { } or << >> holder for this staff's children.
            # If *all* staves have only one voice, addlyrics is used.
            # In that case, don't remove the braces.
            staffMusic = (ly.dom.Seq if lyrEach and maxNumVoices == 1 else
                          ly.dom.Seqr if numVoices == 1 else ly.dom.Simr)(s)
            
            # Set the clef for this staff:
            if 'B' in staff:
                ly.dom.Clef('bass', staffMusic)
            elif 'T' in staff:
                ly.dom.Clef('treble_8', staffMusic)

            # Determine voice order (\voiceOne, \voiceTwo etc.)
            if numVoices == 1:
                order = (0,)
            elif numVoices == 2:
                order = 1, 2
            elif staff in ('SSA', 'TTB'):
                order = 1, 3, 2
            elif staff in ('SAA', 'TBB'):
                order = 1, 2, 4
            elif staff in ('SSAA', 'TTBB'):
                order = 1, 3, 2, 4
            else:
                order = range(1, numVoices + 1)
            
            # What name would the staff get if we need to refer to it?
            # If a name (like 's' or 'sa') is already in use in this part,
            # just add a number ('ss2' or 'sa2', etc.)
            staffCIDs[staff] += 1
            cid = ly.dom.Reference(staff.lower() +
                str(staffCIDs[staff] if staffCIDs[staff] > 1 else ""))
            
            # Create voices and their lyrics:
            for (voice, num), voiceNum in zip(voices, order):
                name = voice2id[voice]
                if num:
                    name += ly.util.int2text(num)
                a = data.assignMusic(name, voice2Voice[voice].octave)
                lyrName = name + 'Verse' if lyrEachDiff else 'verse'
            
                # Use \addlyrics if all staves have exactly one voice.
                if lyrEach and maxNumVoices == 1:
                    for verse in stanzas:
                        lyrics[verse].append((ly.dom.AddLyrics(s), lyrName))
                    ly.dom.Identifier(a.name, staffMusic)
                else:
                    voiceName = voice2id[voice] + str(num or '')
                    v = ly.dom.Voice(voiceName, parent=staffMusic)
                    voiceMusic = ly.dom.Seqr(v)
                    if voiceNum:
                        ly.dom.Text('\\voice' + ly.util.int2text(voiceNum), voiceMusic)
                    ly.dom.Identifier(a.name, voiceMusic)
                    
                    if stanzas and (lyrEach or (voiceNum <= 1 and
                                    (stavesLeft or numStaves == 1))):
                        # Create the lyrics. If they should be above the staff,
                        # give the staff a suitable name, and use alignAbove-
                        # Context to align the Lyrics above the staff.
                        above = voiceNum & 1 if lyrEach else False
                        if above and s.cid is None:
                            s.cid = cid

                        for verse in stanzas:
                            l = ly.dom.Lyrics(parent=choir)
                            if above:
                                l.getWith()['alignAboveContext'] = cid
                                setStaffAffinity(l, "DOWN")
                            elif not lyrEach and stavesLeft:
                                setStaffAffinity(l, "CENTER")
                            lyrics[verse].append((ly.dom.LyricsTo(voiceName, l), lyrName))

                # Add ambitus:
                if self.ambitus.isChecked():
                    ambitusContext = (s if numVoices == 1 else v).getWith()
                    ly.dom.Line('\\consists "Ambitus_engraver"', ambitusContext)
                    if voiceNum > 1:
                        ly.dom.Line("\\override Ambitus #'X-offset = #{0}".format(
                                 (voiceNum - 1) * 2.0), ambitusContext)
            
                pianoReduction[voice].append(a.name)
                rehearsalMidis.append((voice, num, a.name, lyrName))
            
        # Assign the lyrics, so their definitions come after the note defs.
        # (These refs are used again below in the midi rehearsal routine.)
        refs = {}
        for verse in allStanzas:
            for node, name in lyrics[verse]:
                if (name, verse) not in refs:
                    refs[(name, verse)] = self.assignLyrics(data, name, verse).name
                ly.dom.Identifier(refs[(name, verse)], node)

        # Create the piano reduction if desired
        if self.pianoReduction.isChecked():
            a = data.assign('pianoReduction')
            data.nodes.append(ly.dom.Identifier(a.name))
            piano = ly.dom.PianoStaff(parent=a)
            
            sim = ly.dom.Sim(piano)
            rightStaff = ly.dom.Staff(parent=sim)
            leftStaff = ly.dom.Staff(parent=sim)
            right = ly.dom.Seq(rightStaff)
            left = ly.dom.Seq(leftStaff)
            
            # Determine the ordering of voices in the staves
            upper = pianoReduction['S'] + pianoReduction['A']
            lower = pianoReduction['T'] + pianoReduction['B']
            
            preferUpper = 1
            if not upper:
                # Male choir
                upper = pianoReduction['T']
                lower = pianoReduction['B']
                ly.dom.Clef("treble_8", right)
                ly.dom.Clef("bass", left)
                preferUpper = 0
            elif not lower:
                # Female choir
                upper = pianoReduction['S']
                lower = pianoReduction['A']
            else:
                ly.dom.Clef("bass", left)

            # Otherwise accidentals can be confusing
            ly.dom.Line("#(set-accidental-style 'piano)", right)
            ly.dom.Line("#(set-accidental-style 'piano)", left)
            
            # Move voices if unevenly spread
            if abs(len(upper) - len(lower)) > 1:
                voices = upper + lower
                half = (len(voices) + preferUpper) // 2
                upper = voices[:half]
                lower = voices[half:]
            
            for staff, voices in (ly.dom.Simr(right), upper), (ly.dom.Simr(left), lower):
                if voices:
                    for v in voices[:-1]:
                        ly.dom.Identifier(v, staff)
                        ly.dom.VoiceSeparator(staff).after = 1
                    ly.dom.Identifier(voices[-1], staff)

            # Make the piano part somewhat smaller
            ly.dom.Line("fontSize = #-1", piano.getWith())
            ly.dom.Line("\\override StaffSymbol #'staff-space = #(magstep -1)",
                piano.getWith())
            
            # Nice to add Mark engravers
            ly.dom.Line('\\consists "Mark_engraver"', rightStaff.getWith())
            ly.dom.Line('\\consists "Metronome_mark_engraver"', rightStaff.getWith())
            
            # Keep piano reduction out of the MIDI output
            if builder.midi:
                ly.dom.Line('\\remove "Staff_performer"', rightStaff.getWith())
                ly.dom.Line('\\remove "Staff_performer"', leftStaff.getWith())
        
        # Create MIDI files if desired
        if self.rehearsalMidi.isChecked():
            a = data.assign('rehearsalMidi')
            rehearsalMidi = a.name
            
            func = ly.dom.SchemeList(a)
            func.pre = '#\n(' # hack
            ly.dom.Text('define-music-function', func)
            ly.dom.Line('(parser location name midiInstrument lyrics) '
                 '(string? string? ly:music?)', func)
            choir = ly.dom.Sim(ly.dom.Command('unfoldRepeats', ly.dom.SchemeLily(func)))
            
            data.afterblocks.append(ly.dom.Comment(_("Rehearsal MIDI files:")))
            
            for voice, num, ref, lyrName in rehearsalMidis:
                # Append voice to the rehearsalMidi function
                name = voice2id[voice] + str(num or '')
                seq = ly.dom.Seq(ly.dom.Voice(name, parent=ly.dom.Staff(name, parent=choir)))
                if builder.lyVersion < (2, 18, 0):
                    ly.dom.Text('<>\\f', seq) # add one dynamic
                ly.dom.Identifier(ref, seq) # add the reference to the voice
                
                book = ly.dom.Book()
                
                # Append score to the aftermath (stuff put below the main score)
                suffix = "choir{0}-{1}".format(data.num, name) if data.num else name
                if builder.lyVersion < (2, 12, 0):
                    data.afterblocks.append(
                        ly.dom.Line('#(define output-suffix "{0}")'.format(suffix)))
                else:
                    ly.dom.Line('\\bookOutputSuffix "{0}"'.format(suffix), book)
                data.afterblocks.append(book)
                data.afterblocks.append(ly.dom.BlankLine())
                score = ly.dom.Score(book)
                
                # TODO: make configurable
                midiInstrument = voice2Midi[voice]

                cmd = ly.dom.Command(rehearsalMidi, score)
                ly.dom.QuotedString(name, cmd)
                ly.dom.QuotedString(midiInstrument, cmd)
                ly.dom.Identifier(refs[(lyrName, allStanzas[0])], cmd)
                ly.dom.Midi(score)
            
            ly.dom.Text("\\context Staff = $name", choir)
            seq = ly.dom.Seq(choir)
            ly.dom.Line("\\set Score.midiMinimumVolume = #0.5", seq)
            ly.dom.Line("\\set Score.midiMaximumVolume = #0.5", seq)
            ly.dom.Line("\\set Score.tempoWholesPerMinute = #" + data.scoreProperties.schemeMidiTempo(), seq)
            ly.dom.Line("\\set Staff.midiMinimumVolume = #0.8", seq)
            ly.dom.Line("\\set Staff.midiMaximumVolume = #1.0", seq)
            ly.dom.Line("\\set Staff.midiInstrument = $midiInstrument", seq)
            lyr = ly.dom.Lyrics(parent=choir)
            lyr.getWith()['alignBelowContext'] = ly.dom.Text('$name')
            ly.dom.Text("\\lyricsto $name $lyrics", lyr)

Example 86

Project: ewrt
Source File: phonetics.py
View license
def metaphone (term):
    "returns metaphone code for a given string"

    # implementation of the original algorithm from Lawrence Philips
    # extended/rewritten by M. Kuhn
    # improvements with thanks to John Machin <[email protected]>

    # define return value
    code = ""

    i = 0
    term_length = len(term)

    if (term_length == 0):
        # empty string ?
        return code
    # end if

    # extension #1 (added 2005-01-28)
    # convert to lowercase
    term = term.lower()

    # extension #2 (added 2005-01-28)
    # remove all non-english characters, first
    term = re.sub(r'[^a-z]', '', term)
    if len(term) == 0:
        # nothing left
        return code
    # end if

    # extension #3 (added 2005-01-24)
    # conflate repeated letters
    firstChar = term[0]
    str2 = firstChar
    for x in term:
        if x != str2[-1]:
               str2 = str2 + x
        # end if
    # end for

    # extension #4 (added 2005-01-24)
    # remove any vowels unless a vowel is the first letter
    firstChar = str2[0]
    str3 = firstChar
    for x in str2[1:]:
        if (re.search(r'[^aeiou]', x)):
            str3 = str3 + x
        # end if
    # end for

    term = str3
    term_length = len(term)
    if term_length == 0:
        # nothing left
        return code
    # end if

    # check for exceptions
    if (term_length > 1):
        # get first two characters
        first_chars = term[0:2]

        # build translation table
        table = {
            "ae":"e",
            "gn":"n",
            "kn":"n",
            "pn":"n",
            "wr":"n",
            "wh":"w"
        }

        if first_chars in table.keys():
            term = term[2:]
            code = table[first_chars]
            term_length = len(term)
        # end if

    elif (term[0] == "x"):
        term = ""
        code = "s"
        term_length = 0
    # end if

    # define standard translation table
    st_trans = {
        "b":"b",
        "c":"k",
        "d":"t",
        "g":"k",
        "h":"h",
        "k":"k",
        "p":"p",
        "q":"k",
        "s":"s",
        "t":"t",
        "v":"f",
        "w":"w",
        "x":"ks",
        "y":"y",
        "z":"s"
    }

    i = 0
    while (i<term_length):
        # init character to add, init basic patterns
        add_char = ""
        part_n_2 = ""
        part_n_3 = ""
        part_n_4 = ""
        part_c_2 = ""
        part_c_3 = ""

        # extract a number of patterns, if possible
        if (i < (term_length - 1)):
            part_n_2 = term[i:i+2]

            if (i>0):
                part_c_2 = term[i-1:i+1]
                part_c_3 = term[i-1:i+2]
            # end if
        # end if

        if (i < (term_length - 2)):
            part_n_3 = term[i:i+3]
        # end if

        if (i < (term_length - 3)):
            part_n_4 = term[i:i+4]
        # end if

        # use table with conditions for translations
        if (term[i] == "b"):
            add_char = st_trans["b"]
            if (i == (term_length - 1)):
                if (i>0):
                    if (term[i-1] == "m"):
                        add_char = ""
                    # end if
                # end if
            # end if
        elif (term[i] == "c"):
            add_char = st_trans["c"]
            if (part_n_2 == "ch"):
                add_char = "x"
            elif (re.search(r'c[iey]', part_n_2)):
                add_char = "s"
            # end if

            if (part_n_3 == "cia"):
                add_char = "x"
            # end if

            if (re.search(r'sc[iey]', part_c_3)):
                add_char = ""
            # end if

        elif (term[i] == "d"):
            add_char = st_trans["d"]
            if (re.search(r'dg[eyi]', part_n_3)):
                add_char = "j"
            # end if

        elif (term[i] == "g"):
            add_char = st_trans["g"]

            if (part_n_2 == "gh"):
                if (i == (term_length - 2)):
                    add_char = ""
                # end if
            elif (re.search(r'gh[aeiouy]', part_n_3)):
                add_char = ""
            elif (part_n_2 == "gn"):
                add_char = ""
            elif (part_n_4 == "gned"):
                add_char = ""
            elif (re.search(r'dg[eyi]',part_c_3)):
                add_char = ""
            elif (part_n_2 == "gi"):
                if (part_c_3 != "ggi"):
                    add_char = "j"
                # end if
            elif (part_n_2 == "ge"):
                if (part_c_3 != "gge"):
                    add_char = "j"
                # end if
            elif (part_n_2 == "gy"):
                if (part_c_3 != "ggy"):
                    add_char = "j"
                # end if
            elif (part_n_2 == "gg"):
                add_char = ""
            # end if
        elif (term[i] == "h"):
            add_char = st_trans["h"]
            if (re.search(r'[aeiouy]h[^aeiouy]', part_c_3)):
                add_char = ""
            elif (re.search(r'[csptg]h', part_c_2)):
                add_char = ""
            # end if
        elif (term[i] == "k"):
            add_char = st_trans["k"]
            if (part_c_2 == "ck"):
                add_char = ""
            # end if
        elif (term[i] == "p"):
            add_char = st_trans["p"]
            if (part_n_2 == "ph"):
                add_char = "f"
            # end if
        elif (term[i] == "q"):
            add_char = st_trans["q"]
        elif (term[i] == "s"):
            add_char = st_trans["s"]
            if (part_n_2 == "sh"):
                add_char = "x"
            # end if

            if (re.search(r'si[ao]', part_n_3)):
                add_char = "x"
            # end if
        elif (term[i] == "t"):
            add_char = st_trans["t"]
            if (part_n_2 == "th"):
                add_char = "0"
            # end if

            if (re.search(r'ti[ao]', part_n_3)):
                add_char = "x"
            # end if
        elif (term[i] == "v"):
            add_char = st_trans["v"]
        elif (term[i] == "w"):
            add_char = st_trans["w"]
            if (re.search(r'w[^aeiouy]', part_n_2)):
                add_char = ""
            # end if
        elif (term[i] == "x"):
            add_char = st_trans["x"]
        elif (term[i] == "y"):
            add_char = st_trans["y"]
        elif (term[i] == "z"):
            add_char = st_trans["z"]
        else:
            # alternative
            add_char = term[i]
        # end if

        code = code + add_char
        i += 1
    # end while

    # return metaphone code
    return code

Example 87

Project: webrecorder
Source File: usercontroller.py
View license
    def init_routes(self):

        @self.app.get(['/api/v1/dashboard', '/api/v1/dashboard/'])
        @self.manager.admin_view()
        def api_dashboard():
            cache_key = self.cache_template.format('dashboard')
            expiry = 5 * 60  # 5 min

            cache = self.manager.redis.get(cache_key)

            if cache:
                return json.loads(cache.decode('utf-8'))

            users = self.manager.get_users().items()
            results = []

            # add username and get collections
            for user, data in users:
                data['username'] = user
                results.append(data)

            temp = self.manager.redis.hgetall(self.temp_usage_key)
            user = self.manager.redis.hgetall(self.user_usage_key)
            temp = [(k.decode('utf-8'), int(v)) for k, v in temp.items()]
            user = [(k.decode('utf-8'), int(v)) for k, v in user.items()]

            data = {
                'users': UserSchema().load(results, many=True).data,
                'collections': self.manager.get_collections(user='*', api=True),
                'temp_usage': sorted(temp, key=itemgetter(0)),
                'user_usage': sorted(user, key=itemgetter(0)),
            }

            self.manager.redis.setex(cache_key,
                                     expiry,
                                     json.dumps(data, cls=CustomJSONEncoder))

            return data


        @self.app.get(['/api/v1/users', '/api/v1/users/'])
        @self.manager.admin_view()
        def api_users():
            """Full admin API resource of all users.
               Containing user info and public collections

               - Provides basic (1 dimension) RESTful sorting
               - TODO: Pagination
            """
            sorting = request.query.getunicode('sort', None)
            sort_key = sub(r'^-{1}?', '', sorting) if sorting is not None else None
            reverse = sorting.startswith('-') if sorting is not None else False

            def dt(d):
                return datetime.strptime(d, '%Y-%m-%d %H:%M:%S.%f')

            # sortable fields, with optional key unpacking functions
            filters = {
                'created': {'key': lambda obj: dt(obj[1]['creation_date'])},
                'email': {'key': lambda obj: obj[1]['email_addr']},
                'last_login': {'key': lambda obj: dt(obj[1]['last_login'])},
                'name': {'key': lambda obj: json.loads(obj[1]['desc'] or '{}')['name']},
                'username': {},
            }

            if sorting is not None and sort_key not in filters:
                raise HTTPError(400, 'Bad Request')

            sort_by = filters[sort_key] if sorting is not None else {}
            users = sorted(self.manager.get_users().items(),
                           **sort_by,
                           reverse=reverse)

            results = []

            # add username and get collections
            for user, data in users:
                data['username'] = user
                # add space usage
                total = self.manager.get_size_allotment(user)
                used = self.manager.get_size_usage(user)
                data['space_utilization'] = {
                    'total': total,
                    'used': used,
                    'available': total - used,
                }
                results.append(data)

            return {
                # `results` is a list so will always read as `many`
                'users': UserSchema().load(results, many=True).data
            }

        @self.app.get('/api/v1/anon_user')
        def get_anon_user():
            return {'anon_user': self.manager.get_anon_user(True)}

        @self.app.get('/api/v1/temp-users')
        @self.manager.admin_view()
        def temp_users():
            """ Resource returning active temp users
            """
            temp_users_keys = self.manager.redis.keys('u:{0}*'.format(self.temp_user_key))
            temp_users = []

            if len(temp_users_keys):
                with self.manager.redis.pipeline() as pi:
                    for user in temp_users_keys:
                        pi.hgetall(user)
                    temp_users = pi.execute()

                for idx, user in enumerate(temp_users_keys):
                    temp_users[idx][b'username'] = user

                # convert bytestrings, skip over incomplete
                temp_users = [{k.decode('utf-8'): v.decode('utf-8') for k, v in d.items()}
                              for d in temp_users
                              if b'max_size' in d and b'created_at' in d]

                for user in temp_users:
                    total = int(user['max_size'])
                    used = int(user.get('size', 0))
                    creation = datetime.fromtimestamp(int(user['created_at']))
                    removal = creation + timedelta(seconds=self.config['session.durations']['short']['total'])

                    u = re.search(r'{0}\w+'.format(self.temp_user_key),
                                  user['username']).group()
                    user['username'] = u
                    user['removal'] = removal.isoformat()
                    user['space_utilization'] = {
                        'total': total,
                        'used': used,
                        'available': total - used,
                    }

                temp_users, err = TempUserSchema().load(temp_users, many=True)
                if err:
                    return {'errors': err}

            return {'users': temp_users}

        @self.app.post('/api/v1/users/<user>/desc')
        def update_desc(user):
            """legacy, eventually move to the patch endpoint"""
            desc = request.body.read().decode('utf-8')

            self.manager.set_user_desc(user, desc)
            return {}

        @self.app.post(['/api/v1/users', '/api/v1/users/'])
        @self.manager.admin_view()
        def api_create_user():
            """API enpoint to create a user with schema validation"""
            users = self.manager.get_users()
            emails = [u[1]['email_addr'] for u in users.items()]
            data = request.json
            err = NewUserSchema().validate(data)

            if 'username' in data and data['username'] in users:
                if not err:
                    return {'errors': 'Username already exists'}
                else:
                    err.update({'username': 'Username already exists'})

            if 'email' in data and data['email'] in emails:
                if not err:
                    return {'errors': 'Email already exists'}
                else:
                    err.update({'email': 'Email already exists'})

            # validate
            if len(err):
                return {'errors': err}

            # create user
            self.manager.cork._store.users[data['username']] = {
                'role': data['role'],
                'hash': self.manager.cork._hash(data['username'],
                                                data['password']).decode('ascii'),
                'email_addr': data['email'],
                'desc': '{{"name":"{name}"}}'.format(name=data.get('name', '')),
                'creation_date': str(datetime.utcnow()),
                'last_login': str(datetime.utcnow()),
            }
            self.manager.cork._store.save_users()

            # add user account defaults
            key = self.manager.user_key.format(user=data['username'])
            now = int(time.time())

            max_size, max_coll = self.manager.redis.hmget('h:defaults',
                                                          ['max_size', 'max_coll'])
            if not max_size:
                max_size = self.manager.default_max_size

            if not max_coll:
                max_coll = self.manager.default_max_coll

            with redis.utils.pipeline(self.manager.redis) as pi:
                pi.hset(key, 'max_size', max_size)
                pi.hset(key, 'max_coll', max_coll)
                pi.hset(key, 'created_at', now)
                pi.hset(key, 'name', data.get('name', ''))
                pi.hsetnx(key, 'size', '0')

            # create initial collection
            self.manager.create_collection(
                data['username'],
                coll=self.manager.default_coll['id'],
                coll_title=self.manager.default_coll['title'],
                desc=self.manager.default_coll['desc'].format(data['username']),
                public=False,
                synthetic=True
            )

            # Check for mailing list management
            if self.manager.mailing_list:
                self.manager.add_to_mailing_list(
                    data['username'],
                    data['email'],
                    data.get('name', ''),
                )

        @self.app.get(['/api/v1/users/<username>', '/api/v1/users/<username>/'])
        @self.manager.admin_view()
        def api_get_user(username):
            """API enpoint to return user info"""
            users = self.manager.get_users()

            if username not in users:
                self._raise_error(404, 'No such user')

            user = users[username]

            # assemble space usage
            total = self.manager.get_size_allotment(username)
            used = self.manager.get_size_usage(username)
            user['space_utilization'] = {
                'total': total,
                'used': used,
                'available': total - used,
            }

            user_data, err = UserSchema(exclude=('username',)).load(user)
            colls = self.manager.get_collections(username,
                                                 include_recs=True,
                                                 api=True)

            for coll in colls:
                for rec in coll['recordings']:
                    rec['pages'] = self.manager.list_pages(username,
                                                           coll['id'],
                                                           rec['id'])

            # colls is a list so will always be `many` even if one collection
            collections, err = CollectionSchema().load(colls, many=True)
            user_data['collections'] = collections

            return {'user': user_data}

        @self.app.put(['/api/v1/users/<username>', '/api/v1/users/<username>/'])
        @self.manager.auth_view()
        def api_update_user(username):
            """API enpoint to update user info

               See `UserUpdateSchema` for available fields.

               ** bottle 0.12.9 doesn't support `PATCH` methods.. update to
                  patch once availabile.
            """
            users = self.manager.get_users()
            if username not in users:
                self._raise_error(404, 'No such user')

            # if not admin, check ownership
            if not self.manager.is_anon(username) and not self.manager.is_superuser():
                self.manager.assert_user_is_owner(username)

            user = users[username]
            try:
                json_data = json.loads(request.forms.json)
            except Exception as e:
                print(e)
                return {'errors': 'bad json data'}

            if len(json_data.keys()) == 0:
                return {'errors': 'empty payload'}

            data, err = UserUpdateSchema(only=json_data.keys()).load(json_data)

            if len(err):
                return {'errors': err}

            if 'name' in data:
                user['desc'] = '{{"name":"{name}"}}'.format(name=data.get('name', ''))

            #
            # restricted resources
            #
            if 'max_size' in data and self.manager.is_superuser():
                key = self.manager.user_key.format(user=username)
                max_size = data.get('max_size', self.manager.default_max_size)
                max_size = int(max_size) if type(max_size) is not int else max_size

                with redis.utils.pipeline(self.manager.redis) as pi:
                    pi.hset(key, 'max_size', max_size)

            if 'role' in data and self.manager.is_superuser():
                # set new role or default to base role
                user['role'] = data.get('role', 'archivist')

            #
            # return updated user data
            #
            total = self.manager.get_size_allotment(username)
            used = self.manager.get_size_usage(username)
            user['space_utilization'] = {
                'total': total,
                'used': used,
                'available': total - used,
            }

            user_data, err = UserSchema(exclude=('username',)).load(user)
            colls = self.manager.get_collections(username,
                                                 include_recs=True,
                                                 api=True)

            for coll in colls:
                for rec in coll['recordings']:
                    rec['pages'] = self.manager.list_pages(username,
                                                           coll['id'],
                                                           rec['id'])

            # colls is a list so will always be `many` even if one collection
            collections, err = CollectionSchema().load(colls, many=True)
            user_data['collections'] = collections

            return {'user': user_data}

        @self.app.delete(['/api/v1/users/<user>', '/api/v1/users/<user>/'])
        @self.manager.admin_view()
        def api_delete_user(user):
            """API enpoint to delete a user"""
            if user not in self.manager.get_users():
                self._raise_error(404, 'No such user')

            self.manager.delete_user(user)

        @self.app.get(['/<user>', '/<user>/'])
        @self.jinja2_view('user.html')
        def user_info(user):
            self.redir_host()

            if self.manager.is_anon(user):
                self.redirect('/' + user + '/temp')

            self.manager.assert_user_exists(user)

            result = {
                'user': user,
                'user_info': self.manager.get_user_info(user),
                'collections': self.manager.get_collections(user),
            }

            if not result['user_info'].get('desc'):
                result['user_info']['desc'] = self.default_user_desc.format(user)

            return result

        # User Account Settings
        @self.app.get('/<user>/_settings')
        @self.jinja2_view('account.html')
        def account_settings(user):
            self.manager.assert_user_is_owner(user)

            return {'user': user,
                    'user_info': self.manager.get_user_info(user),
                    'num_coll': self.manager.num_collections(user),
                   }

        # Delete User Account
        @self.app.post('/<user>/$delete')
        def delete_user(user):
            if self.manager.delete_user(user):
                self.flash_message('The user {0} has been permanently deleted!'.format(user), 'success')

                redir_to = '/'
                request.environ['webrec.delete_all_cookies'] = 'all'
                self.manager.cork.logout(success_redirect=redir_to, fail_redirect=redir_to)
            else:
                self.flash_message('There was an error deleting {0}'.format(coll))
                self.redirect(self.get_path(user))

        # Expiry Message
        @self.app.route('/_expire')
        def expire():
            self.flash_message('Sorry, the anonymous collection has expired due to inactivity')
            self.redirect('/')

        @self.app.post('/_reportissues')
        def report_issues():
            useragent = request.headers.get('User-Agent')

            @self.jinja2_view('email_error.html')
            def error_email(params):
                ua = UserAgent(params.get('ua'))
                if ua.browser:
                    browser = '{0} {1} {2} {3}'
                    lang = ua.language or ''
                    browser = browser.format(ua.platform, ua.browser,
                                             ua.version, lang)

                    params['browser'] = browser
                else:
                    params['browser'] = ua.string

                params['time'] = params['time'][:19]
                return params

            self.manager.report_issues(request.POST, useragent, error_email)
            return {}

        # Skip POST request recording
        @self.app.get('/_skipreq')
        def skip_req():
            url = request.query.getunicode('url')
            user = self.manager.get_curr_user()
            if not user:
                user = self.manager.get_anon_user()

            self.manager.skip_post_req(user, url)
            return {}

Example 88

Project: wikiteam
Source File: uploader.py
View license
def upload(wikis, config={}):
    headers = {'User-Agent': dumpgenerator.getUserAgent()}

    for wiki in wikis:
        print "#"*73
        print "# Uploading", wiki
        print "#"*73
        wiki = wiki.lower()
        prefix = dumpgenerator.domain2prefix(config={'api': wiki})

        wikiname = prefix.split('-')[0]
        dumps = []
        for dirname, dirnames, filenames in os.walk('.'):
            if dirname == '.':
                for f in filenames:
                    if f.startswith('%s-' % (wikiname)) and (f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z')):
                        dumps.append(f)
                break

        c = 0
        for dump in dumps:
            wikidate = dump.split('-')[1]
            item = get_item('wiki-' + wikiname)
            if dump in uploadeddumps:
                if config['prune-directories']:
                    rmline='rm -rf %s-%s-wikidump/' % (wikiname, wikidate)
                    # With -f the deletion might have happened before and we won't know
                    if not os.system(rmline):
                        print 'DELETED %s-%s-wikidump/' % (wikiname, wikidate)
                if config['prune-wikidump'] and dump.endswith('wikidump.7z'):
                        # Simplistic quick&dirty check for the presence of this file in the item
                        stdout, stderr = subprocess.Popen(["md5sum", dump], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
                        dumphash = re.sub(' +.+\n?', '', stdout)

                        if dumphash in map(lambda x: x['md5'], item.files):
                            log(wiki, dump, 'verified')
                            rmline='rm -rf %s' % dump
                            if not os.system(rmline):
                                print 'DELETED ' + dump
                            print '%s was uploaded before, skipping...' % (dump)
                            continue
                        else:
                            print 'ERROR: The online item misses ' + dump
                            log(wiki, dump, 'missing')
                            # We'll exit this if and go upload the dump
                else:
                    print '%s was uploaded before, skipping...' % (dump)
                    continue

            time.sleep(0.1)
            wikidate_text = wikidate[0:4]+'-'+wikidate[4:6]+'-'+wikidate[6:8]
            print wiki, wikiname, wikidate, dump

            # Does the item exist already?
            ismissingitem = not item.exists

            # Logo path
            logourl = ''

            if ismissingitem or config['update']:
                #get metadata from api.php
                #first sitename and base url
                params = {'action': 'query', 'meta': 'siteinfo', 'format': 'xml'}
                data = urllib.urlencode(params)
                req = urllib2.Request(url=wiki, data=data, headers=headers)
                xml = ''
                try:
                    f = urllib2.urlopen(req)
                    xml = f.read()
                    f.close()
                except:
                    pass

                sitename = ''
                baseurl = ''
                lang = ''
                try:
                    sitename = re.findall(ur"sitename=\"([^\"]+)\"", xml)[0]
                except:
                    pass
                try:
                    baseurl = re.findall(ur"base=\"([^\"]+)\"", xml)[0]
                except:
                    pass
                try:
                    lang = re.findall(ur"lang=\"([^\"]+)\"", xml)[0]
                except:
                    pass

                if not sitename:
                    sitename = wikiname
                if not baseurl:
                    baseurl = re.sub(ur"(?im)/api\.php", ur"", wiki)
                if lang:
                    lang = convertlang.has_key(lang.lower()) and convertlang[lang.lower()] or lang.lower()

                #now copyright info from API
                params = {'action': 'query', 'siprop': 'general|rightsinfo', 'format': 'xml'}
                data = urllib.urlencode(params)
                req = urllib2.Request(url=wiki, data=data, headers=headers)
                xml = ''
                try:
                    f = urllib2.urlopen(req)
                    xml = f.read()
                    f.close()
                except:
                    pass

                rightsinfourl = ''
                rightsinfotext = ''
                try:
                    rightsinfourl = re.findall(ur"rightsinfo url=\"([^\"]+)\"", xml)[0]
                    rightsinfotext = re.findall(ur"text=\"([^\"]+)\"", xml)[0]
                except:
                    pass

                raw = ''
                try:
                    f = urllib.urlopen(baseurl)
                    raw = f.read()
                    f.close()
                except:
                    pass

                #or copyright info from #footer in mainpage
                if baseurl and not rightsinfourl and not rightsinfotext:
                    rightsinfotext = ''
                    rightsinfourl = ''
                    try:
                        rightsinfourl = re.findall(ur"<link rel=\"copyright\" href=\"([^\"]+)\" />", raw)[0]
                    except:
                        pass
                    try:
                        rightsinfotext = re.findall(ur"<li id=\"copyright\">([^\n\r]*?)</li>", raw)[0]
                    except:
                        pass
                    if rightsinfotext and not rightsinfourl:
                        rightsinfourl = baseurl + '#footer'
                try:
                    logourl = re.findall(ur'p-logo["\'][^>]*>\s*<a [^>]*background-image:\s*(?:url\()?([^;)"]+)', raw)[0]
                except:
                    pass
                print logourl

                #retrieve some info from the wiki
                wikititle = "Wiki - %s" % (sitename) # Wiki - ECGpedia
                wikidesc = "<a href=\"%s\">%s</a> dumped with <a href=\"https://github.com/WikiTeam/wikiteam\" rel=\"nofollow\">WikiTeam</a> tools." % (baseurl, sitename)# "<a href=\"http://en.ecgpedia.org/\" rel=\"nofollow\">ECGpedia,</a>: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with <a href=\"https://github.com/WikiTeam/wikiteam\" rel=\"nofollow\">WikiTeam</a> tools."
                wikikeys = ['wiki', 'wikiteam', 'MediaWiki', sitename, wikiname] # ecg; ECGpedia; wiki; wikiteam; MediaWiki
                if not rightsinfourl and not rightsinfotext:
                    wikikeys.append('unknowncopyright')

                wikilicenseurl = rightsinfourl # http://creativecommons.org/licenses/by-nc-sa/3.0/
                wikirights = rightsinfotext # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found.
                wikiurl = wiki # we use api here http://en.ecgpedia.org/api.php
            else:
                print 'Item already exists.'
                lang = 'foo'
                wikititle = 'foo'
                wikidesc = 'foo'
                wikikeys = 'foo'
                wikilicenseurl = 'foo'
                wikirights = 'foo'
                wikiurl = 'foo'

            if c == 0:
                # Item metadata
                md = {
                    'mediatype': 'web',
                    'collection': config['collection'],
                    'title': wikititle,
                    'description': wikidesc,
                    'language': lang,
                    'last-updated-date': wikidate_text,
                    'subject': '; '.join(wikikeys), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
                    'licenseurl': wikilicenseurl and urlparse.urljoin(wiki, wikilicenseurl),
                    'rights': wikirights,
                    'originalurl': wikiurl,
                }

            #Upload files and update metadata
            try:
                item.upload(dump, metadata=md, access_key=accesskey, secret_key=secretkey, verbose=True)
                item.modify_metadata(md) # update
                print 'You can find it in https://archive.org/details/wiki-%s' % (wikiname)
                if logourl:
                    logo = StringIO.StringIO(urllib.urlopen(urlparse.urljoin(wiki, logourl)).read())
                    logoextension = logourl.split('.')[-1] if logourl.split('.') else 'unknown'
                    logo.name = 'wiki-' + wikiname + '_logo.' + logoextension
                    item.upload(logo, access_key=accesskey, secret_key=secretkey, verbose=True)
                uploadeddumps.append(dump)
                log(wiki, dump, 'ok')
            except:
                print wiki, dump, 'error when uploading?'

            c += 1

Example 89

Project: pyxform
Source File: xls2json_backends.py
View license
def xls_to_dict(path_or_file):
    """
    Return a Python dictionary with a key for each worksheet
    name. For each sheet there is a list of dictionaries, each
    dictionary corresponds to a single row in the worksheet. A
    dictionary has keys taken from the column headers and values
    equal to the cell value for that row and column.
    All the keys and leaf elements are unicode text.
    """
    try:
        if isinstance(path_or_file, basestring):
            workbook = xlrd.open_workbook(filename=path_or_file)
        else:
            workbook = xlrd.open_workbook(file_contents=path_or_file.read())
    except XLRDError as e:
        raise PyXFormError("Error reading .xls file: %s" % e.message)

    def xls_value_to_unicode(value, value_type):
        """
        Take a xls formatted value and try to make a unicode string
        representation.
        """
        if value_type == xlrd.XL_CELL_BOOLEAN:
            return u"TRUE" if value else u"FALSE"
        elif value_type == xlrd.XL_CELL_NUMBER:
            # Try to display as an int if possible.
            int_value = int(value)
            if int_value == value:
                return unicode(int_value)
            else:
                return unicode(value)
        elif value_type is xlrd.XL_CELL_DATE:
            # Warn that it is better to single quote as a string.
            # error_location = cellFormatString % (ss_row_idx, ss_col_idx)
            # raise Exception(
            #   "Cannot handle excel formatted date at " + error_location)
            datetime_or_time_only = xlrd.xldate_as_tuple(
                value, workbook.datemode)
            if datetime_or_time_only[:3] == (0, 0, 0):
                # must be time only
                return unicode(datetime.time(*datetime_or_time_only[3:]))
            return unicode(datetime.datetime(*datetime_or_time_only))
        else:
            # ensure unicode and replace nbsp spaces with normal ones
            # to avoid this issue:
            # https://github.com/modilabs/pyxform/issues/83
            return unicode(value).replace(unichr(160), ' ')

    def xls_to_dict_normal_sheet(sheet):
        def iswhitespace(string):
            return (
                isinstance(string, basestring) and len(string.strip()) == 0)

        # Check for duplicate column headers
        column_header_list = list()
        for column in range(0, sheet.ncols):
            column_header = sheet.cell_value(0, column)
            if column_header in column_header_list:
                raise PyXFormError(
                    u"Duplicate column header: %s" % column_header)
            # xls file with 3 columns mostly have a 3 more columns that are
            # blank by default or something, skip during check
            if column_header is not None:
                if not iswhitespace(column_header):
                    column_header_list.append(column_header)

        result = []
        for row in range(1, sheet.nrows):
            row_dict = OrderedDict()
            for column in range(0, sheet.ncols):
                # Changing to cell_value function
                # convert to string, in case it is not string
                key = u"%s" % sheet.cell_value(0, column)
                key = key.strip()
                value = sheet.cell_value(row, column)
                # remove whitespace at the beginning and end of value
                if isinstance(value, basestring):
                    value = value.strip()
                value_type = sheet.cell_type(row, column)
                if value is not None:
                    if not iswhitespace(value):
                        row_dict[key] = xls_value_to_unicode(value, value_type)
                # Taking this condition out so I can get accurate row numbers.
                # TODO: Do the same for csvs
                # if row_dict != {}:
            result.append(row_dict)
        return result, _list_to_dict_list(column_header_list)

    def xls_value_from_sheet(sheet, row, column):
        value = sheet.cell_value(row, column)
        value_type = sheet.cell_type(row, column)
        if value is not None and value != "":
            return xls_value_to_unicode(value, value_type)
        else:
            raise PyXFormError("Empty Value")

    def _xls_to_dict_cascade_sheet(sheet):
        result = []
        rs_dict = OrderedDict()  # tmp dict to hold entire structure

        def slugify(s):
            return re.sub(r'\W+', '_', s.strip().lower())

        prefix = "$PREFIX$"
        # get col headers and position first, ignore first column
        for column in range(1, sheet.ncols):
            col_name = sheet.cell_value(0, column)
            rs_dict[col_name] = {
                'pos': column,
                'data': [],
                'itemset': col_name,
                'type': constants.SELECT_ONE,
                'name':
                    prefix if (column == sheet.ncols - 1) else u''.join(
                        [prefix, '_', col_name]),
                'label': sheet.cell_value(1, column)}
            if column > 1:
                rs_dict[col_name]['parent'] = sheet.cell_value(0, column - 1)
            else:
                rs_dict[col_name]['choices'] = []
            choice_filter = ''
            for a in range(1, column):
                prev_col_name = sheet.cell_value(0, a)
                if choice_filter != '':
                    choice_filter += ' and %s=${%s_%s}' % \
                                     (prev_col_name, prefix, prev_col_name)
                else:
                    choice_filter += '%s=${%s_%s}' % \
                                     (prev_col_name, prefix, prev_col_name)
            rs_dict[col_name]['choice_filter'] = choice_filter
        # get data, use new cascade dict structure, data starts on 3 row
        for row in range(2, sheet.nrows):
            # go through each header aka column
            for col_name in rs_dict:
                column = rs_dict[col_name]['pos']
                cell_data = xls_value_from_sheet(sheet, row, column)
                try:
                    rs_dict[col_name]['data'].index(slugify(cell_data))
                except ValueError:
                    rs_dict[col_name]['data'].append(slugify(cell_data))
                    if 'choices' in rs_dict[col_name]:
                        l = {'name': slugify(cell_data), 'label': cell_data}
                        rs_dict[col_name]['choices'].append(l)
                data = {
                    'name': slugify(cell_data),
                    'label': cell_data.strip(),
                    constants.LIST_NAME: col_name
                }
                for prev_column in range(1, column):
                    prev_col_name = sheet.cell_value(0, prev_column)
                    data[prev_col_name] = slugify(xls_value_from_sheet(
                        sheet, row, prev_column))
                result.append(data)
        # order
        kl = []
        for column in range(1, sheet.ncols):
            col_name = sheet.cell_value(0, column)
            if 'parent' in rs_dict[col_name]:
                rs_dict[col_name].pop('parent')
            if 'pos' in rs_dict[col_name]:
                rs_dict[col_name].pop('pos')
            if 'data' in rs_dict[col_name]:
                rs_dict[col_name].pop('data')
            kl.append(rs_dict[col_name])

            # create list with no duplicates
        choices = []
        for rec in result:
            c = 0
            for check in result:
                if rec == check:
                    c += 1
            if c == 1:
                choices.append(rec)
            else:
                try:
                    choices.index(rec)
                except ValueError:
                    choices.append(rec)
        return [{'choices': choices, 'questions': kl}]

    result = OrderedDict()
    for sheet in workbook.sheets():
        if sheet.name == constants.CASCADING_CHOICES:
            result[sheet.name] = _xls_to_dict_cascade_sheet(sheet)
        else:
            result[sheet.name], result[u"%s_header" % sheet.name] = \
                xls_to_dict_normal_sheet(sheet)
    return result

Example 90

Project: trackma
Source File: libmal.py
View license
    def _parse_xml(self, data):
        # For some reason MAL returns an XML file with HTML exclusive
        # entities like &aacute;, so we have to create a custom XMLParser
        # to convert these entities correctly.

        ENTITIES = {
            "nbsp":     u'\u00A0',
            "iexcl":    u'\u00A1',
            "cent":     u'\u00A2',
            "pound":    u'\u00A3',
            "curren":   u'\u00A4',
            "yen":      u'\u00A5',
            "brvbar":   u'\u00A6',
            "sect":     u'\u00A7',
            "uml":      u'\u00A8',
            "copy":     u'\u00A9',
            "ordf":     u'\u00AA',
            "laquo":    u'\u00AB',
            "not":      u'\u00AC',
            "shy":      u'\u00AD',
            "reg":      u'\u00AE',
            "macr":     u'\u00AF',
            "deg":      u'\u00B0',
            "plusmn":   u'\u00B1',
            "sup2":     u'\u00B2',
            "sup3":     u'\u00B3',
            "acute":    u'\u00B4',
            "micro":    u'\u00B5',
            "para":     u'\u00B6',
            "middot":   u'\u00B7',
            "cedil":    u'\u00B8',
            "sup1":     u'\u00B9',
            "ordm":     u'\u00BA',
            "raquo":    u'\u00BB',
            "frac14":   u'\u00BC',
            "frac12":   u'\u00BD',
            "frac34":   u'\u00BE',
            "iquest":   u'\u00BF',
            "Agrave":   u'\u00C0',
            "Aacute":   u'\u00C1',
            "Acirc":    u'\u00C2',
            "Atilde":   u'\u00C3',
            "Auml":     u'\u00C4',
            "Aring":    u'\u00C5',
            "AElig":    u'\u00C6',
            "Ccedil":   u'\u00C7',
            "Egrave":   u'\u00C8',
            "Eacute":   u'\u00C9',
            "Ecirc":    u'\u00CA',
            "Euml":     u'\u00CB',
            "Igrave":   u'\u00CC',
            "Iacute":   u'\u00CD',
            "Icirc":    u'\u00CE',
            "Iuml":     u'\u00CF',
            "ETH":      u'\u00D0',
            "Ntilde":   u'\u00D1',
            "Ograve":   u'\u00D2',
            "Oacute":   u'\u00D3',
            "Ocirc":    u'\u00D4',
            "Otilde":   u'\u00D5',
            "Ouml":     u'\u00D6',
            "times":    u'\u00D7',
            "Oslash":   u'\u00D8',
            "Ugrave":   u'\u00D9',
            "Uacute":   u'\u00DA',
            "Ucirc":    u'\u00DB',
            "Uuml":     u'\u00DC',
            "Yacute":   u'\u00DD',
            "THORN":    u'\u00DE',
            "szlig":    u'\u00DF',
            "agrave":   u'\u00E0',
            "aacute":   u'\u00E1',
            "acirc":    u'\u00E2',
            "atilde":   u'\u00E3',
            "auml":     u'\u00E4',
            "aring":    u'\u00E5',
            "aelig":    u'\u00E6',
            "ccedil":   u'\u00E7',
            "egrave":   u'\u00E8',
            "eacute":   u'\u00E9',
            "ecirc":    u'\u00EA',
            "euml":     u'\u00EB',
            "igrave":   u'\u00EC',
            "iacute":   u'\u00ED',
            "icirc":    u'\u00EE',
            "iuml":     u'\u00EF',
            "eth":      u'\u00F0',
            "ntilde":   u'\u00F1',
            "ograve":   u'\u00F2',
            "oacute":   u'\u00F3',
            "ocirc":    u'\u00F4',
            "otilde":   u'\u00F5',
            "ouml":     u'\u00F6',
            "divide":   u'\u00F7',
            "oslash":   u'\u00F8',
            "ugrave":   u'\u00F9',
            "uacute":   u'\u00FA',
            "ucirc":    u'\u00FB',
            "uuml":     u'\u00FC',
            "yacute":   u'\u00FD',
            "thorn":    u'\u00FE',
            "yuml":     u'\u00FF',
            "fnof":     u'\u0192',
            "Alpha":    u'\u0391',
            "Beta":     u'\u0392',
            "Gamma":    u'\u0393',
            "Delta":    u'\u0394',
            "Epsilon":  u'\u0395',
            "Zeta":     u'\u0396',
            "Eta":      u'\u0397',
            "Theta":    u'\u0398',
            "Iota":     u'\u0399',
            "Kappa":    u'\u039A',
            "Lambda":   u'\u039B',
            "Mu":       u'\u039C',
            "Nu":       u'\u039D',
            "Xi":       u'\u039E',
            "Omicron":  u'\u039F',
            "Pi":       u'\u03A0',
            "Rho":      u'\u03A1',
            "Sigma":    u'\u03A3',
            "Tau":      u'\u03A4',
            "Upsilon":  u'\u03A5',
            "Phi":      u'\u03A6',
            "Chi":      u'\u03A7',
            "Psi":      u'\u03A8',
            "Omega":    u'\u03A9',
            "alpha":    u'\u03B1',
            "beta":     u'\u03B2',
            "gamma":    u'\u03B3',
            "delta":    u'\u03B4',
            "epsilon":  u'\u03B5',
            "zeta":     u'\u03B6',
            "eta":      u'\u03B7',
            "theta":    u'\u03B8',
            "iota":     u'\u03B9',
            "kappa":    u'\u03BA',
            "lambda":   u'\u03BB',
            "mu":       u'\u03BC',
            "nu":       u'\u03BD',
            "xi":       u'\u03BE',
            "omicron":  u'\u03BF',
            "pi":       u'\u03C0',
            "rho":      u'\u03C1',
            "sigmaf":   u'\u03C2',
            "sigma":    u'\u03C3',
            "tau":      u'\u03C4',
            "upsilon":  u'\u03C5',
            "phi":      u'\u03C6',
            "chi":      u'\u03C7',
            "psi":      u'\u03C8',
            "omega":    u'\u03C9',
            "thetasym": u'\u03D1',
            "upsih":    u'\u03D2',
            "piv":      u'\u03D6',
            "bull":     u'\u2022',
            "hellip":   u'\u2026',
            "prime":    u'\u2032',
            "Prime":    u'\u2033',
            "oline":    u'\u203E',
            "frasl":    u'\u2044',
            "weierp":   u'\u2118',
            "image":    u'\u2111',
            "real":     u'\u211C',
            "trade":    u'\u2122',
            "alefsym":  u'\u2135',
            "larr":     u'\u2190',
            "uarr":     u'\u2191',
            "rarr":     u'\u2192',
            "darr":     u'\u2193',
            "harr":     u'\u2194',
            "crarr":    u'\u21B5',
            "lArr":     u'\u21D0',
            "uArr":     u'\u21D1',
            "rArr":     u'\u21D2',
            "dArr":     u'\u21D3',
            "hArr":     u'\u21D4',
            "forall":   u'\u2200',
            "part":     u'\u2202',
            "exist":    u'\u2203',
            "empty":    u'\u2205',
            "nabla":    u'\u2207',
            "isin":     u'\u2208',
            "notin":    u'\u2209',
            "ni":       u'\u220B',
            "prod":     u'\u220F',
            "sum":      u'\u2211',
            "minus":    u'\u2212',
            "lowast":   u'\u2217',
            "radic":    u'\u221A',
            "prop":     u'\u221D',
            "infin":    u'\u221E',
            "ang":      u'\u2220',
            "and":      u'\u2227',
            "or":       u'\u2228',
            "cap":      u'\u2229',
            "cup":      u'\u222A',
            "int":      u'\u222B',
            "there4":   u'\u2234',
            "sim":      u'\u223C',
            "cong":     u'\u2245',
            "asymp":    u'\u2248',
            "ne":       u'\u2260',
            "equiv":    u'\u2261',
            "le":       u'\u2264',
            "ge":       u'\u2265',
            "sub":      u'\u2282',
            "sup":      u'\u2283',
            "nsub":     u'\u2284',
            "sube":     u'\u2286',
            "supe":     u'\u2287',
            "oplus":    u'\u2295',
            "otimes":   u'\u2297',
            "perp":     u'\u22A5',
            "sdot":     u'\u22C5',
            "lceil":    u'\u2308',
            "rceil":    u'\u2309',
            "lfloor":   u'\u230A',
            "rfloor":   u'\u230B',
            "lang":     u'\u2329',
            "rang":     u'\u232A',
            "loz":      u'\u25CA',
            "spades":   u'\u2660',
            "clubs":    u'\u2663',
            "hearts":   u'\u2665',
            "diams":    u'\u2666',
            "quot":     u'\"'    ,
            "amp":      u'&'     ,
            "lt":       u'<'     ,
            "gt":       u'>'     ,
            "OElig":    u'\u0152',
            "oelig":    u'\u0153',
            "Scaron":   u'\u0160',
            "scaron":   u'\u0161',
            "Yuml":     u'\u0178',
            "circ":     u'\u02C6',
            "tilde":    u'\u02DC',
            "ensp":     u'\u2002',
            "emsp":     u'\u2003',
            "thinsp":   u'\u2009',
            "zwnj":     u'\u200C',
            "zwj":      u'\u200D',
            "lrm":      u'\u200E',
            "rlm":      u'\u200F',
            "ndash":    u'\u2013',
            "mdash":    u'\u2014',
            "lsquo":    u'\u2018',
            "rsquo":    u'\u2019',
            "sbquo":    u'\u201A',
            "ldquo":    u'\u201C',
            "rdquo":    u'\u201D',
            "bdquo":    u'\u201E',
            "dagger":   u'\u2020',
            "Dagger":   u'\u2021',
            "permil":   u'\u2030',
            "lsaquo":   u'\u2039',
            "rsaquo":   u'\u203A',
            "euro":     u'\u20AC',
        }

        # http://stackoverflow.com/a/35591479/2016221
        magic = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
            "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [\n'''
        magic += ''.join("<!ENTITY %s '&#%d;'>\n" % (key, ord(value)) for key, value in ENTITIES.items())
        magic += '\n]>'

        # strip xml declaration since we're concatenating something before it
        data = re.sub('<\?.*?\?>', '', data)

        return ET.fromstring(magic + data)

Example 91

Project: system-config-printer
Source File: ppds.py
View license
    def getPPDNamesFromDeviceID (self, mfg, mdl, description="",
                                 commandsets=None, uri=None,
                                 make_and_model=None):
        """
	Obtain a best-effort PPD match for an IEEE 1284 Device ID.

	@param mfg: MFG or MANUFACTURER field
	@type mfg: string
	@param mdl: MDL or MODEL field
	@type mdl: string
	@param description: DES or DESCRIPTION field, optional
	@type description: string
	@param commandsets: CMD or COMMANDSET field, optional
	@type commandsets: string
	@param uri: device URI, optional (only needed for debugging)
	@type uri: string
        @param make_and_model: device-make-and-model string
        @type make_and_model: string
	@returns: a dict of fit (string) indexed by PPD name
	"""
        _debugprint ("\n%s %s" % (mfg, mdl))
        orig_mfg = mfg
        orig_mdl = mdl
        self._init_ids ()

        if commandsets is None:
            commandsets = []

        # Start with an empty result list and build it up using
        # several search methods, in increasing order of fuzziness.
        fit = {}

        # First, try looking up the device using the manufacturer and
        # model fields from the Device ID exactly as they appear (but
        # case-insensitively).
        mfgl = mfg.lower ()
        mdll = mdl.lower ()

        id_matched = False
        try:
            for each in self.ids[mfgl][mdll]:
                fit[each] = self.FIT_EXACT
            id_matched = True
        except KeyError:
            pass

        # The HP PPDs say "HP" not "Hewlett-Packard", so try that.
        if mfgl == "hewlett-packard":
            try:
                for each in self.ids["hp"][mdll]:
                    fit[each] = self.FIT_EXACT
                print ("**** Incorrect IEEE 1284 Device ID: %s" %
                       self.ids["hp"][mdll])
                print ("**** Actual ID is MFG:%s;MDL:%s;" % (mfg, mdl))
                print ("**** Please report a bug against the HPLIP component")
                id_matched = True
            except KeyError:
                pass

        # Now try looking up the device by ppd-make-and-model.
        _debugprint ("Trying make/model names")
        mdls = None
        self._init_makes ()
        make = None
        if mfgl == "":
            (mfg, mdl) = ppdMakeModelSplit (mdl)
            mfgl = normalize (mfg)
            mdll = normalize (mdl)

        _debugprint ("mfgl: %s" % mfgl)
        _debugprint ("mdll: %s" % mdll)
        mfgrepl = {"hewlett-packard": "hp",
                   "lexmark international": "lexmark",
                   "kyocera": "kyocera mita"}
        if mfgl in self.lmakes:
            # Found manufacturer.
            make = self.lmakes[mfgl]
        elif mfgl in mfgrepl:
            rmfg = mfgrepl[mfgl]
            if rmfg in self.lmakes:
                mfg = rmfg
                mfgl = mfg
                # Found manufacturer (after mapping to canonical name)
                _debugprint ("remapped mfgl: %s" % mfgl)
                make = self.lmakes[mfgl]

        _debugprint ("make: %s" % make)
        if make is not None:
            mdls = self.makes[make]
            mdlsl = self.lmodels[normalize(make)]

            # Remove manufacturer name from model field
            for prefix in [mfgl, 'hewlett-packard', 'hp']:
                if mdll.startswith (prefix + ' '):
                    mdl = mdl[len (prefix) + 1:]
                    mdll = normalize (mdl)
                    _debugprint ("unprefixed mdll: %s" % mdll)

            if mdll in self.lmodels[mfgl]:
                model = mdlsl[mdll]
                for each in mdls[model].keys ():
                    fit[each] = self.FIT_EXACT
                    _debugprint ("%s: %s" % (fit[each], each))
            else:
                # Make use of the model name clean-up in the
                # ppdMakeModelSplit () function
                (mfg2, mdl2) = ppdMakeModelSplit (mfg + " " + mdl)
                mdl2l = normalize (mdl2)
                _debugprint ("re-split mdll: %s" % mdl2l)
                if mdl2l in self.lmodels[mfgl]:
                    model = mdlsl[mdl2l]
                    for each in list(mdls[model].keys ()):
                        fit[each] = self.FIT_EXACT
                        _debugprint ("%s: %s" % (fit[each], each))
      
        if not fit and mdls:
            (s, ppds) = self._findBestMatchPPDs (mdls, mdl)
            if s != self.FIT_NONE:
                for each in ppds:
                    fit[each] = s
                    _debugprint ("%s: %s" % (fit[each], each))

        if commandsets:
            if type (commandsets) != list:
                commandsets = commandsets.split (',')

            _debugprint ("Checking CMD field")
            generic = self._getPPDNameFromCommandSet (commandsets)
            if generic:
                for driver in generic:
                    fit[driver] = self.FIT_GENERIC
                    _debugprint ("%s: %s" % (fit[driver], driver))

        # What about the CMD field of the Device ID?  Some devices
        # have optional units for page description languages, such as
        # PostScript, and they will report different CMD strings
        # accordingly.
        #
        # By convention, if a PPD contains a Device ID with a CMD
        # field, that PPD can only be used whenever any of the
        # comma-separated words in the CMD field appear in the
        # device's ID.
        # (See Red Hat bug #630058).
        #
        # We'll do that check now, and any PPDs that fail
        # (e.g. PostScript PPD for non-PostScript printer) can be
        # eliminated from the list.
        #
        # The reason we don't do this check any earlier is that we
        # don't want to eliminate PPDs only to have the fuzzy matcher
        # add them back in afterwards.
        #
        # While doing this, any drivers that we can positively confirm
        # as using a command set understood by the printer will be
        # converted from FIT_EXACT to FIT_EXACT_CMD.
        if id_matched and len (commandsets) > 0:
            failed = set()
            exact_cmd = set()
            for ppdname in fit.keys ():
                ppd_cmd_field = None
                ppd = self.ppds[ppdname]
                ppd_device_id = _singleton (ppd.get ('ppd-device-id'))
                if ppd_device_id:
                    ppd_device_id_dict = parseDeviceID (ppd_device_id)
                    ppd_cmd_field = ppd_device_id_dict["CMD"]

                if (not ppd_cmd_field and
                    # ppd-type is not reliable for driver-generated
                    # PPDs (see CUPS STR #3720).  Neither gutenprint
                    # nor foomatic specify ppd-type in their CUPS
                    # drivers.
                    ppdname.find (":") == -1):
                    # If this is a PostScript PPD we know which
                    # command set it will use.
                    ppd_type = _singleton (ppd.get ('ppd-type'))
                    if ppd_type == "postscript":
                        ppd_cmd_field = ["POSTSCRIPT"]

                if not ppd_cmd_field:
                    # We can't be sure which command set this driver
                    # uses.
                    continue

                usable = False
                for pdl in ppd_cmd_field:
                    if pdl in commandsets:
                        usable = True
                        break

                if usable:
                    exact_cmd.add (ppdname)
                else:
                    failed.add (ppdname)

            # Assign the more specific fit "exact-cmd" to those that
            # positively matched the CMD field.
            for each in exact_cmd:
                if fit[each] == self.FIT_EXACT:
                    fit[each] = self.FIT_EXACT_CMD
                    _debugprint (self.FIT_EXACT_CMD + ": %s" % each)

            if len (failed) < len ([d for (d, m) in fit.items ()
                                    if m != 'generic']):
                _debugprint ("Removed %s due to CMD mis-match" % failed)
                for each in failed:
                    del fit[each]
            else:
                _debugprint ("Not removing %s " % failed +
                             "due to CMD mis-match as it would "
                             "leave nothing good")

        if not fit:
            fallbacks = ["textonly.ppd", "postscript.ppd"]
            found = False
            for fallback in fallbacks:
                _debugprint ("'%s' fallback" % fallback)
                fallbackgz = fallback + ".gz"
                for ppdpath in self.ppds.keys ():
                    if (ppdpath.endswith (fallback) or
                        ppdpath.endswith (fallbackgz)):
                        fit[ppdpath] = self.FIT_NONE
                        found = True
                        break

                if found:
                    break

                _debugprint ("Fallback '%s' not available" % fallback)

            if not found:
                _debugprint ("No fallback available; choosing any")
                fit[list(self.ppds.keys ())[0]] = self.FIT_NONE

        if not id_matched:
            sanitised_uri = re.sub (pattern="//[^@]*@/?", repl="//",
                                    string=str (uri))
            try:
                cmd = reduce (lambda x, y: x + ","+ y, commandsets)
            except TypeError:
                cmd = ""
            id = "MFG:%s;MDL:%s;" % (orig_mfg, orig_mdl)
            if cmd:
                id += "CMD:%s;" % cmd
            if description:
                id += "DES:%s;" % description

            print ("No ID match for device %s:" % sanitised_uri)
            print (id)

        return fit

Example 92

View license
def create_model_data(dmd):
    '''
    Return an Endpoint suitable for Impact functional testing.
    '''
    # DeviceClass
    dc = dmd.Devices.createOrganizer('/OpenStack/Infrastructure')
    dc.setZenProperty('zPythonClass', 'ZenPacks.zenoss.OpenStackInfrastructure.Endpoint')

    # OSProcessClasses
    osc = dmd.Processes.createOrganizer('/OpenStack')
    for binary in ['nova-cert', 'nova-conductor', 'nova-consoleauth', 'nova-scheduler', 'nova-compute', 'nova-api']:
        osc.manage_addOSProcessClass(binary)

    # Endpoint
    endpoint = dc.createInstance('endpoint')

    # Org Structure
    from ZenPacks.zenoss.OpenStackInfrastructure.Region import Region
    from ZenPacks.zenoss.OpenStackInfrastructure.AvailabilityZone import AvailabilityZone
    region = addContained(endpoint, "components", Region("region"))
    zone1 = addContained(endpoint, "components", AvailabilityZone("zone1"))
    zone2 = addContained(endpoint, "components", AvailabilityZone("zone2"))
    addNonContained(region, "childOrgs", zone1)
    addNonContained(region, "childOrgs", zone2)

    # Tenants
    from ZenPacks.zenoss.OpenStackInfrastructure.Tenant import Tenant
    tenant1 = addContained(endpoint, "components", Tenant("tenant-tenant1"))
    tenant2 = addContained(endpoint, "components", Tenant("tenant-tenant2"))

    # Flavor
    from ZenPacks.zenoss.OpenStackInfrastructure.Flavor import Flavor
    flavor1 = addContained(endpoint, "components", Flavor("flavor1"))

    # Image
    from ZenPacks.zenoss.OpenStackInfrastructure.Image import Image
    image1 = addContained(endpoint, "components", Image("image1"))

    # Host
    from ZenPacks.zenoss.OpenStackInfrastructure.Host import Host
    computehost1 = addContained(endpoint, "components", Host("computehost1"))
    addNonContained(computehost1, "orgComponent", zone1)
    computehost2 = addContained(endpoint, "components", Host("computehost2"))
    addNonContained(computehost2, "orgComponent", zone2)
    controllerhost = addContained(endpoint, "components", Host("controllerhost"))
    addNonContained(controllerhost, "orgComponent", zone1)

    # SoftwareComponents
    from ZenPacks.zenoss.OpenStackInfrastructure.NovaService import NovaService
    from ZenPacks.zenoss.OpenStackInfrastructure.NovaApi import NovaApi
    nova_consoleauth = addContained(endpoint, "components", NovaService("nova-consoleauth"))
    nova_consoleauth.binary = 'nova-consoleauth'
    addNonContained(nova_consoleauth, "hostedOn", controllerhost)
    addNonContained(nova_consoleauth, "orgComponent", zone1)
    nova_scheduler = addContained(endpoint, "components", NovaService("nova-scheduler"))
    nova_scheduler.binary = 'nova-scheduler'
    addNonContained(nova_scheduler, "hostedOn", controllerhost)
    addNonContained(nova_scheduler, "orgComponent", zone1)
    nova_conductor1 = addContained(endpoint, "components", NovaService("nova-conductor1"))
    nova_conductor1.binary = 'nova-conductor'
    nova_conductor2 = addContained(endpoint, "components", NovaService("nova-conductor2"))
    nova_conductor2.binary = 'nova-conductor'
    addNonContained(nova_conductor1, "hostedOn", computehost1)
    addNonContained(nova_conductor1, "orgComponent", zone1)
    addNonContained(nova_conductor2, "hostedOn", computehost2)
    addNonContained(nova_conductor2, "orgComponent", zone2)
    nova_compute1 = addContained(endpoint, "components", NovaService("nova-compute1"))
    nova_compute1.binary = 'nova-compute'
    nova_compute2 = addContained(endpoint, "components", NovaService("nova-compute2"))
    nova_compute2.binary = 'nova-compute'
    addNonContained(nova_compute1, "hostedOn", computehost1)
    addNonContained(nova_compute1, "orgComponent", zone1)
    addNonContained(nova_compute2, "hostedOn", computehost2)
    addNonContained(nova_compute2, "orgComponent", zone2)
    nova_cert = addContained(endpoint, "components", NovaService("nova-cert"))
    nova_cert.binary = 'nova-cert'
    addNonContained(nova_cert, "hostedOn", controllerhost)
    addNonContained(nova_cert, "orgComponent", zone1)
    nova_api = addContained(endpoint, "components", NovaApi("nova-api"))
    nova_api.binary = 'nova-api'
    addNonContained(nova_api, "hostedOn", controllerhost)
    addNonContained(nova_api, "orgComponent", region)

    # Hypervisor
    from ZenPacks.zenoss.OpenStackInfrastructure.Hypervisor import Hypervisor
    hypervisor1 = addContained(endpoint, "components", Hypervisor("hypervisor1"))
    hypervisor2 = addContained(endpoint, "components", Hypervisor("hypervisor2"))
    addNonContained(hypervisor1, "host", computehost1)
    addNonContained(hypervisor2, "host", computehost2)

    # Instance
    from ZenPacks.zenoss.OpenStackInfrastructure.Instance import Instance
    instance1 = addContained(endpoint, "components", Instance("instance1"))
    instance2 = addContained(endpoint, "components", Instance("instance2"))
    instance3 = addContained(endpoint, "components", Instance("instance3"))
    instance4 = addContained(endpoint, "components", Instance("instance4"))
    addNonContained(instance1, "flavor", flavor1)
    addNonContained(instance2, "flavor", flavor1)
    addNonContained(instance3, "flavor", flavor1)
    addNonContained(instance4, "flavor", flavor1)
    addNonContained(instance1, "image", image1)
    addNonContained(instance2, "image", image1)
    addNonContained(instance3, "image", image1)
    addNonContained(instance4, "image", image1)
    addNonContained(instance1, "hypervisor", hypervisor1)
    addNonContained(instance2, "hypervisor", hypervisor1)
    addNonContained(instance3, "hypervisor", hypervisor2)
    addNonContained(instance4, "hypervisor", hypervisor2)
    addNonContained(instance1, "tenant", tenant1)
    addNonContained(instance2, "tenant", tenant2)
    addNonContained(instance3, "tenant", tenant1)
    addNonContained(instance4, "tenant", tenant2)

    # Vnic
    from ZenPacks.zenoss.OpenStackInfrastructure.Vnic import Vnic
    instance1vnic1 = addContained(instance1, "vnics", Vnic("instance1_vnic1"))
    instance1vnic1.macaddress = 'de:ad:be:ef:01:01'
    instance1vnic1.index_object()
    instance1vnic2 = addContained(instance1, "vnics", Vnic("instance1_vnic2"))
    instance1vnic2.macaddress = 'de:ad:be:ef:01:02'
    instance1vnic2.index_object()
    instance2vnic1 = addContained(instance2, "vnics", Vnic("instance2_vnic1"))
    instance2vnic1.macaddress = 'de:ad:be:ef:02:01'
    instance2vnic1.index_object()
    instance2vnic2 = addContained(instance2, "vnics", Vnic("instance2_vnic2"))
    instance2vnic2.macaddress = 'de:ad:be:ef:02:02'
    instance2vnic2.index_object()
    instance3vnic1 = addContained(instance3, "vnics", Vnic("instance3_vnic1"))
    instance3vnic1.macaddress = 'de:ad:be:ef:03:01'
    instance3vnic1.index_object()
    instance3vnic2 = addContained(instance3, "vnics", Vnic("instance3_vnic2"))
    instance3vnic2.macaddress = 'de:ad:be:ef:03:02'
    instance3vnic2.index_object()
    instance4vnic1 = addContained(instance4, "vnics", Vnic("instance4_vnic1"))
    instance4vnic1.macaddress = 'de:ad:be:ef:04:01'
    instance4vnic1.index_object()
    instance4vnic2 = addContained(instance4, "vnics", Vnic("instance4_vnic2"))
    instance4vnic2.macaddress = 'de:ad:be:ef:04:02'
    instance4vnic2.index_object()

    # Linux guest devices (Virtual)
    # make sure that the interfaces line up.
    guest_dc = dmd.Devices.createOrganizer('/Server/SSH/Linux')
    guest_dc.setZenProperty('zPythonClass', 'Products.ZenModel.Device')
    guest_instance1 = guest_dc.createInstance("g-instance1")
    guest_instance2 = guest_dc.createInstance("g-instance2")
    guest_instance3 = guest_dc.createInstance("g-instance3")
    # instance4 is not monitored by zenoss.

    from Products.ZenModel.IpInterface import IpInterface

    def add_linux_interface_mac(device, interface_name, macaddress):
        eth_if = IpInterface(interface_name)
        device.os.interfaces._setObject(eth_if.id, eth_if)
        eth_if = device.os.interfaces._getOb(eth_if.id)
        eth_if.macaddress = macaddress
        eth_if.index_object()
        device.index_object()

    add_linux_interface_mac(guest_instance1, 'eth0', 'de:ad:be:ef:01:01')
    add_linux_interface_mac(guest_instance1, 'eth1', 'de:ad:be:ef:01:02')
    add_linux_interface_mac(guest_instance2, 'eth0', 'de:ad:be:ef:02:01')
    add_linux_interface_mac(guest_instance2, 'eth1', 'de:ad:be:ef:02:02')
    add_linux_interface_mac(guest_instance3, 'eth0', 'de:ad:be:ef:03:01')
    add_linux_interface_mac(guest_instance3, 'eth1', 'de:ad:be:ef:03:02')

    # Linux devices (Physical)
    # (link to host1 and host2)
    phys_dc = dmd.Devices.createOrganizer('/Server/SSH/Linux/NovaHost')
    phys_dc.setZenProperty('zPythonClass', 'Products.ZenModel.Device')
    phys_computehost1 = phys_dc.createInstance("p-computehost1")
    phys_computehost2 = phys_dc.createInstance("p-computehost2")
    phys_controllerhost = phys_dc.createInstance("p-controllerhost")

    # Link the host components to the physical hosts.
    computehost1.claim_proxy_device(phys_computehost1)
    computehost2.claim_proxy_device(phys_computehost2)
    controllerhost.claim_proxy_device(phys_controllerhost)

    # Add OSprocesses for each of the software components.
    from ZenPacks.zenoss.OpenStackInfrastructure.SoftwareComponent import SoftwareComponent
    from Products.ZenModel.OSProcess import OSProcess
    for component in endpoint.components():
        if isinstance(component, SoftwareComponent):
            binary = component.binary
            linux_device = component.hostedOn().proxy_device()

            process_id = '%s_%s' % (linux_device.id, binary)
            process = OSProcess(process_id)
            linux_device.os.processes._setObject(process_id, process)
            process = linux_device.os.processes._getOb(process_id)

            process_class = re.sub(r'\d+$', '', binary)
            process.setOSProcessClass("Processes/OpenStack/osProcessClasses/%s" % process_class)


    # Cinder
    from ZenPacks.zenoss.OpenStackInfrastructure.Volume import Volume
    from ZenPacks.zenoss.OpenStackInfrastructure.VolSnapshot import VolSnapshot
    volume1 = addContained(endpoint, "components", Volume("volume1"))
    volsnap1 = addContained(endpoint, "components", VolSnapshot("volsnap1"))
    addNonContained(instance1, "volumes", volume1)
    addNonContained(volume1, "volSnapshots", volsnap1)

    return {
        'endpoint': endpoint,
        'phys_dc': phys_dc,
        'guest_dc': guest_dc
    }

Example 93

Project: bitcurator
Source File: ttfonts.py
View license
    def extractInfo(self):
        #################/
        # name - Naming table
        #################/
        self.sFamilyClass = 0
        self.sFamilySubClass = 0

        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if format != 0:
            die("Unknown name table format " + format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1: '', 2: '', 3: '', 4: '', 6: ''}
        K = list(names.keys())
        nameCount = len(names)
        for i in range(numRecords):
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if (nameId not in K):
                continue
            N = ''
            if (platformId == 3 and encodingId == 1 and languageId == 0x409):  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                self.seek(string_data_offset + offset)
                if (length % 2 != 0):
                    die("PostScript name is UTF-16BE string of odd length")
                length /= 2
                N = ''
                while (length > 0):
                    char = self.read_ushort()
                    N += (chr(char))
                    length -= 1
                self._pos = opos
                self.seek(opos)

            elif (platformId == 1 and encodingId == 0 and languageId == 0):  # Macintosh, Roman, English, PS Name
                opos = self._pos
                N = self.get_chunk(string_data_offset + offset, length)
                self._pos = opos
                self.seek(opos)

            if (N and names[nameId] == ''):
                names[nameId] = N
                nameCount -= 1
                if (nameCount == 0):
                    break

        if names[6]:
            psName = names[6]
        elif names[4]:
            psName = re.sub(' ', '-', names[4])
        elif names[1]:
            psName = re.sub(' ', '-', names[1])
        else:
            psName = ''
            
        if not psName:
            die("Could not find PostScript font name")
            
        self.name = psName
        if names[1]:
            self.familyName = names[1]
        else:
            self.familyName = psName
            
        if names[2]:
            self.styleName = names[2]
        else:
            self.styleName = 'Regular'
            
        if names[4]:
            self.fullName = names[4]
        else:
            self.fullName = psName
            
        if names[3]:
            self.uniqueFontID = names[3]
        else:
            self.uniqueFontID = psName
            
        if names[6]:
            self.fullName = names[6]

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(18)
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = 1000 / float(unitsPerEm)
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = [(xMin * scale), (yMin * scale),
                     (xMax * scale), (yMax * scale)]
        self.skip(3 * 2)
        
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()
        
        if glyphDataFormat != 0:
            die('Unknown glyph data format ' + glyphDataFormat)

        #################/
        # hhea metrics table
        #################/
        # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
        if "hhea" in self.tables:
            self.seek_table("hhea")
            self.skip(4)
            hheaAscender = self.read_short()
            hheaDescender = self.read_short()
            self.ascent = hheaAscender * scale
            self.descent = hheaDescender * scale

        #################/
        # OS/2 - OS/2 and Windows metrics table
        #################/
        if "OS/2" in self.tables:
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if fsType == 0x0002 or (fsType & 0x0300) != 0:
                die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
                self.restrictedUse = True

            self.skip(20)
            sF = self.read_short()
            self.sFamilyClass = (sF >> 8)
            self.sFamilySubClass = (sF & 0xFF)
            self._pos += 10  # PANOSE = 10 byte length
            panose = self.fh.read(10)
            self.skip(26)
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            
            if not self.ascent:
                self.ascent = (sTypoAscender * scale)
            if not self.descent:
                self.descent = (sTypoDescender * scale)
            if version > 1:
                self.skip(16)
                sCapHeight = self.read_short()
                self.capHeight = (sCapHeight * scale)
            else:
                self.capHeight = self.ascent

        else:
            usWeightClass = 500
            if not self.ascent:
                self.ascent = (yMax * scale)
            if not self.descent:
                self.descent = (yMin * scale)
            self.capHeight = self.ascent

        self.stemV = 50 + int(pow((usWeightClass / 65.0), 2))

        #################/
        # post - PostScript table
        #################/
        self.seek_table("post")
        self.skip(4)
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short() * scale
        self.underlineThickness = self.read_short() * scale
        isFixedPitch = self.read_ulong()

        self.flags = 4

        if self.italicAngle != 0:
            self.flags = self.flags | 64
            
        if usWeightClass >= 600:
            self.flags = self.flags | 262144
            
        if isFixedPitch:
            self.flags = self.flags | 1

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32)
        metricDataFormat = self.read_ushort()
        if (metricDataFormat != 0):
            die('Unknown horizontal metric data format '.metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if (numberOfHMetrics == 0):
            die('Number of horizontal metrics is 0')

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos

            if (platformID == 3 and encodingID == 1) or platformID == 0:  # Microsoft, Unicode
                format_ = self.get_ushort(cmap_offset + offset)
                print('Format is %s' % format_)
                if format_ == 4:
                    if not unicode_cmap_offset:
                        unicode_cmap_offset = cmap_offset + offset
                    break
            self.seek(save_pos)

        if not unicode_cmap_offset:
            die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 0, any encoding, format 4)')

        glyphToChar = {}
        charToGlyph = {}
        self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        #################/
        # hmtx - Horizontal metrics table
        #################/
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)

Example 94

Project: brython
Source File: markdown.py
View license
def mark(src):

    global refs
    t0 = time.time()
    refs = {}
    # split source in sections
    # sections can be :
    # - a block-level HTML element (markdown syntax will not be processed)
    # - a script
    # - a span-level HTML tag (markdown syntax will be processed)
    # - a code block
    
    # normalise line feeds
    src = src.replace('\r\n','\n')
    
    # lines followed by dashes
    src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src)
    src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src) 

    lines = src.split('\n')+['']
    
    i = bq = 0
    ul = ol = 0
    
    while i<len(lines):

        # enclose lines starting by > in a blockquote
        if lines[i].startswith('>'):
            nb = 1
            while nb<len(lines[i]) and lines[i][nb]=='>':
                nb += 1
            lines[i] = lines[i][nb:]
            if nb>bq:
                lines.insert(i,'<blockquote>'*(nb-bq))
                i += 1
                bq = nb
            elif nb<bq:
                lines.insert(i,'</blockquote>'*(bq-nb))
                i += 1
                bq = nb
        elif bq>0:
            lines.insert(i,'</blockquote>'*bq)
            i += 1
            bq = 0

        # unordered lists
        if lines[i].strip() and lines[i].lstrip()[0] in '-+*' \
            and len(lines[i].lstrip())>1 \
            and lines[i].lstrip()[1]==' ' \
            and (i==0 or ul or not lines[i-1].strip()):
            # line indentation indicates nesting level
            nb = 1+len(lines[i])-len(lines[i].lstrip())
            lines[i] = '<li>'+lines[i][nb:]
            if nb>ul:
                lines.insert(i,'<ul>'*(nb-ul))
                i += 1
            elif nb<ul:
                lines.insert(i,'</ul>'*(ul-nb))
                i += 1
            ul = nb
        elif ul and not lines[i].strip():
            if i<len(lines)-1 and lines[i+1].strip() \
                and not lines[i+1].startswith(' '):
                    nline = lines[i+1].lstrip()
                    if nline[0] in '-+*' and len(nline)>1 and nline[1]==' ':
                        pass
                    else:
                        lines.insert(i,'</ul>'*ul)
                        i += 1
                        ul = 0

        # ordered lists
        mo = re.search(r'^(\d+\.)',lines[i])
        if mo:
            if not ol:
                lines.insert(i,'<ol>')
                i += 1
            lines[i] = '<li>'+lines[i][len(mo.groups()[0]):]
            ol = 1
        elif ol and not lines[i].strip() and i<len(lines)-1 \
            and not lines[i+1].startswith(' ') \
            and not re.search(r'^(\d+\.)',lines[i+1]):
            lines.insert(i,'</ol>')
            i += 1
            ol = 0
        
        i += 1
    
    if ul:
        lines.append('</ul>'*ul)
    if ol:
        lines.append('</ol>'*ol)
    if bq:
        lines.append('</blockquote>'*bq)

    t1 = time.time()
    #print('part 1', t1-t0)    
    sections = []
    scripts = []
    section = Marked()

    i = 0
    while i<len(lines):
        line = lines[i]
        if line.strip() and line.startswith('    '):
            if isinstance(section,Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line[4:])
            j = i+1
            while j<len(lines) and lines[j].startswith('    '):
                section.lines.append(lines[j][4:])
                j += 1
            sections.append(section)
            section = Marked()
            i = j   
            continue

        elif line.strip() and line.startswith("```"):
            # fenced code blocks à la Github Flavoured Markdown
            if isinstance(section,Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line)
            j = i+1
            while j<len(lines) and not lines[j].startswith("```"):
                section.lines.append(lines[j])
                j += 1
            sections.append(section)
            section = Marked()
            i = j+1
            continue

        elif line.lower().startswith('<script'):
            if isinstance(section,Marked) and section.line:
                sections.append(section)
                section = Marked()
            j = i+1
            while j<len(lines):
                if lines[j].lower().startswith('</script>'):
                    scripts.append('\n'.join(lines[i+1:j]))
                    for k in range(i,j+1):
                        lines[k] = ''
                    break
                j += 1
            i = j
            continue

        # atext header
        elif line.startswith('#'):
            level = 1
            line = lines[i]
            while level<len(line) and line[level]=='#' and level<=6:
                level += 1
            if not line[level+1:].strip():
                if level==1:
                    i += 1
                    continue
                else:
                    lines[i] = '<H%s>%s</H%s>\n' %(level-1,'#',level-1)
            else:
                lines[i] = '<H%s>%s</H%s>\n' %(level,line[level+1:],level)

        else:
            mo = re.search(ref_pattern,line)
            if mo is not None:
                if isinstance(section,Marked) and section.line:
                    sections.append(section)
                    section = Marked()
                key = mo.groups()[0]
                value = URL(mo.groups()[1])
                refs[key.lower()] = value
            else:
                if not line.strip():
                    line = '<p></p>'
                if section.line:
                    section.line += '\n'
                section.line += line
                    
            i += 1
    t2 = time.time()
    #print('section 2', t2-t1)
    if isinstance(section,Marked) and section.line:
        sections.append(section)

    res = ''
    for section in sections:
        mk,_scripts = section.to_html()
        res += mk
        scripts += _scripts
    #print('end mark', time.time()-t2)
    return res,scripts

Example 95

Project: KaraKara
Source File: __init__.py
View license
def main(global_config, **settings):
    """
        This function returns a Pyramid WSGI application.
    """
    # Setup --------------------------------------------------------------------

    # Db
    init_DBSession(settings)

    # Pyramid Global Settings
    config = Configurator(settings=settings)  # , autocommit=True

    # Register Aditional Includes ---------------------------------------------
    config.include('pyramid_mako')  # The mako.directories value is updated in the scan for addons. We trigger the import here to include the correct folders.

    # Reload on template change
    template_filenames = map(operator.attrgetter('absolute'), file_scan(config.registry.settings['mako.directories']))
    add_file_callback(lambda: template_filenames)

    # Parse/Convert setting keys that have specifyed datatypes
    for key in config.registry.settings.keys():
        config.registry.settings[key] = convert_str_with_type(config.registry.settings[key])

    # i18n
    config.add_translation_dirs(config.registry.settings['i18n.translation_dirs'])

    # Session Manager
    session_settings = extract_subkeys(config.registry.settings, 'session.')
    session_factory = SignedCookieSessionFactory(serializer=json_serializer, **session_settings)
    config.set_session_factory(session_factory)

    # Cachebust etags ----------------------------------------------------------
    #  crude implementation; count the number of tags in db, if thats changed, the etags will invalidate
    if not config.registry.settings['server.etag.cache_buster']:
        from .model.actions import last_update
        config.registry.settings['server.etag.cache_buster'] = 'last_update:{0}'.format(str(last_update()))

    # Search Config ------------------------------------------------------------
    import karakara.views.search
    karakara.views.search.search_config = read_json(config.registry.settings['karakara.search.view.config'])

    # WebSocket ----------------------------------------------------------------

    class NullAuthEchoServerManager(object):
        def recv(self, *args, **kwargs):
            pass
    socket_manager = NullAuthEchoServerManager()

    # Do not activate websocket if in community mode
    if config.registry.settings.get('karakara.server.mode') == 'comunity':
        config.registry.settings['karakara.websocket.port'] = None

    if config.registry.settings.get('karakara.websocket.port'):
        def authenicator(key):
            """Only admin authenticated keys can connect to the websocket"""
            request = Request({'HTTP_COOKIE':'{0}={1}'.format(config.registry.settings['session.cookie_name'],key)})
            session_data = session_factory(request)
            return session_data and session_data.get('admin')
        try:
            _socket_manager = AuthEchoServerManager(
                authenticator=authenicator,
                websocket_port=config.registry.settings['karakara.websocket.port'],
                tcp_port=config.registry.settings.get('karakara.tcp.port'),
            )
            _socket_manager.start()
            socket_manager = _socket_manager
        except OSError:
            log.warn('Unable to setup websocket')

    config.registry['socket_manager'] = socket_manager


    # Login Providers ----------------------------------------------------------

    from .views.comunity_login import social_login
    social_login.user_store = ComunityUserStore()
    login_providers = config.registry.settings.get('login.provider.enabled')
    # Facebook
    if 'facebook' in login_providers:
        for settings_key in ('facebook.appid', 'facebook.secret'):
            assert config.registry.settings.get(settings_key), 'To use facebook as a login provider appid and secret must be provided'
        social_login.add_login_provider(FacebookLogin(
            appid=config.registry.settings.get('facebook.appid'),
            secret=config.registry.settings.get('facebook.secret'),
            permissions=config.registry.settings.get('facebook.permissions'),
        ))
    # Firefox Persona (Deprecated technology but a useful reference)
    #if 'persona' in login_providers:
    #    social_login.add_login_provider(PersonaLogin(
    #        site_url=config.registry.settings.get('server.url')
    #    ))
    # No login provider
    if not login_providers and config.registry.settings.get('karakara.server.mode') == 'development':
        # Auto login if no service keys are provided
        social_login.add_login_provider(NullLoginProvider())
        social_login.user_store = NullComunityUserStore()
    template_helpers.javascript_inline['comunity'] = social_login.html_includes

    # Renderers ----------------------------------------------------------------

    # AllanC - currently the auto_format decorator does all the formatting work
    #          it would be far preferable to use the pyramid renderer framework
    #          issue is, we want to set the renderer to be dynamic based on the url given
    #          I don't want to define EVERY method with loads of renderer tags
    #          and I don't want to define 5+ routes for every view callable with differnt formats
    #          We need a nice way of doing this in pyramid, and right now, after HOURS of trawling
    #          the doc and experimenting, I cant find one.
    #from .renderers.auto_render_factory import AutoRendererFactory, handle_before_render
    #config.add_renderer(None   , AutoRendererFactory) #'renderers.auto_render_factory.auto_renderer_factory'
    #config.add_renderer('.html', 'pyramid.mako_templating.renderer_factory')
    #config.add_subscriber(handle_before_render , pyramid.events.BeforeRender) # maybe use this to set renderer?
    # closeset ive seen
    #   http://zhuoqiang.me/a/restful-pyramid
    #   http://stackoverflow.com/questions/4633320/is-there-a-better-way-to-switch-between-html-and-json-output-in-pyramid


    # Routes -------------------------------------------------------------------

    # Static Routes
    config.add_static_view(name='ext'   , path='../externals/static') #cache_max_age=3600 # settings["static.assets"]
    config.add_static_view(name='static', path='karakara:{0}'.format(settings["static.assets"])) #cache_max_age=3600 # settings["static.assets"]
    config.add_static_view(name='player', path=settings["static.player"])

    # AllanC - it's official ... static route setup and generation is a mess in pyramid
    #config.add_static_view(name=settings["static.media" ], path="karakara:media" )
    config.add_static_view(name='files' , path=settings["static.processed"])

    # Routes
    def append_format_pattern(route):
        return re.sub(r'{(.*)}', r'{\1:[^/\.]+}', route) + r'{spacer:[.]?}{format:(%s)?}' % '|'.join(registered_formats())

    config.add_route('home'          , append_format_pattern('/')              )
    config.add_route('track'         , append_format_pattern('/track/{id}')    )
    config.add_route('track_list'    , append_format_pattern('/track_list')    )
    config.add_route('queue'         , append_format_pattern('/queue')         )
    config.add_route('priority_tokens', append_format_pattern('/priority_tokens'))
    config.add_route('fave'          , append_format_pattern('/fave')          )
    config.add_route('message'       , append_format_pattern('/message')          )
    config.add_route('admin_toggle'  , append_format_pattern('/admin')         )
    config.add_route('admin_lock'    , append_format_pattern('/admin_lock')    )
    config.add_route('remote'        , append_format_pattern('/remote')        )
    config.add_route('feedback'      , append_format_pattern('/feedback')      )
    config.add_route('settings'      , append_format_pattern('/settings')      )
    config.add_route('random_images' , append_format_pattern('/random_images') )
    config.add_route('inject_testdata' , append_format_pattern('/inject_testdata') )
    config.add_route('stats'         , append_format_pattern('/stats')         )
    config.add_route('comunity'      , append_format_pattern('/comunity')      )
    config.add_route('comunity_login', append_format_pattern('/comunity/login'))
    config.add_route('comunity_logout', append_format_pattern('/comunity/logout'))
    config.add_route('comunity_list' , append_format_pattern('/comunity/list') )
    config.add_route('comunity_track', append_format_pattern('/comunity/track/{id}'))
    config.add_route('comunity_upload', append_format_pattern('/comunity/upload'))
    config.add_route('comunity_settings', append_format_pattern('/comunity/settings'))
    config.add_route('comunity_processmedia_log', append_format_pattern('/comunity/processmedia_log'))

    config.add_route('search_tags'   , '/search_tags/{tags:.*}')
    config.add_route('search_list'   , '/search_list/{tags:.*}')

    # Upload extras -----
    #config.add_static_view(name=settings['upload.route.uploaded'], path=settings['upload.path'])  # the 'upload' route above always matchs first
    config.add_route('upload', '/upload{sep:/?}{name:.*}')

    # Events -------------------------------------------------------------------
    config.add_subscriber(add_localizer_to_request, pyramid.events.NewRequest)
    config.add_subscriber(add_render_globals_to_template, pyramid.events.BeforeRender)

    # Return -------------------------------------------------------------------
    config.scan(ignore='.tests')
    config.scan('externals.lib.pyramid_helpers.views')
    return config.make_wsgi_app()

Example 96

Project: canvas
Source File: rjsmin.py
View license
def _make_jsmin(extended=True, python_only=False):
    """
    Generate JS minifier based on `jsmin.c by Douglas Crockford`_

    .. _jsmin.c by Douglas Crockford:
       http://www.crockford.com/javascript/jsmin.c

    :Parameters:
      `extended` : ``bool``
        Extended Regexps? (using lookahead and lookbehind). This is faster,
        because it can be optimized way more. The regexps used with `extended`
        being false are only left here to allow easier porting to platforms
        without extended regex features (and for my own reference...)

      `python_only` : ``bool``
        Use only the python variant. If true, the c extension is not even
        tried to be loaded.

    :Return: Minifier
    :Rtype: ``callable``
    """
    # pylint: disable = R0912, R0914, W0612
    if not python_only:
        try:
            import _rjsmin
        except ImportError:
            pass
        else:
            return _rjsmin.jsmin
    try:
        xrange
    except NameError:
        xrange = range # pylint: disable = W0622

    space_chars = r'[\000-\011\013\014\016-\040]'

    line_comment = r'(?://[^\r\n]*)'
    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    string1 = \
        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    strings = r'(?:%s|%s)' % (string1, string2)

    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    nospecial = r'[^/\\\[\r\n]'
    if extended:
        regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
            nospecial, charclass, nospecial
        )
    else:
        regex = (
            r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
        )
        regex = regex % (charclass, nospecial, charclass, nospecial)
    pre_regex = r'[(,=:\[!&|?{};\r\n]'

    space = r'(?:%s|%s)' % (space_chars, space_comment)
    newline = r'(?:%s?[\r\n])' % line_comment

    def fix_charclass(result):
        """ Fixup string of chars to fit into a regex char class """
        pos = result.find('-')
        if pos >= 0:
            result = r'%s%s-' % (result[:pos], result[pos + 1:])

        def sequentize(string):
            """
            Notate consecutive characters as sequence

            (1-4 instead of 1234)
            """
            first, last, result = None, None, []
            for char in map(ord, string):
                if last is None:
                    first = last = char
                elif last + 1 == char:
                    last = char
                else:
                    result.append((first, last))
                    first = last = char
            if last is not None:
                result.append((first, last))
            return ''.join(['%s%s%s' % (
                chr(first),
                last > first + 1 and '-' or '',
                last != first and chr(last) or ''
            ) for first, last in result])

        return _re.sub(r'([\000-\040\047])', # for better portability
            lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
                .replace('\\', '\\\\')
                .replace('[', '\\[')
                .replace(']', '\\]')
            )
        )

    def id_literal_(what):
        """ Make id_literal like char class """
        match = _re.compile(what).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return '[^%s]' % fix_charclass(result)

    def not_id_literal_(keep):
        """ Make negated id_literal like char class """
        match = _re.compile(id_literal_(keep)).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return r'[%s]' % fix_charclass(result)

    if extended:
        id_literal = id_literal_(r'[a-zA-Z0-9_$]')
        id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_sub = _re.compile((
            r'([^\047"/\000-\040]+)'
            r'|(%(strings)s[^\047"/\000-\040]*)'
            r'|(?:(?<=%(pre_regex)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
            r'|(?<=%(id_literal_close)s)'
                r'%(space)s*(?:(%(newline)s)%(space)s*)+'
                r'(?=%(id_literal_open)s)'
            r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
            r'|%(space)s+'
            r'|(?:%(newline)s%(space)s*)+'
        ) % locals()).sub
        def space_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321
            groups = match.groups()
            if groups[0]: return groups[0]
            elif groups[1]: return groups[1]
            elif groups[2]: return groups[2]
            elif groups[3]: return '\n'
            elif groups[4]: return ' '
            return ''

        def jsmin(script): # pylint: disable = W0621
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach which minifies the whole script with one big
            substitution regex.

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub(space_subber, '\n%s\n' % script).strip()

    else:
        not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
        not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_norm_sub = _re.compile((
            r'(%(strings)s)'
            r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
            r'|(%(space)s)+'
            r'|(?:(%(newline)s)%(space)s*)+'
        ) % locals()).sub
        def space_norm_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321
            groups = match.groups()
            if groups[0]: return groups[0]
            elif groups[1]: return groups[1].replace('\r', '\n') + groups[2]
            elif groups[3]: return ' '
            elif groups[4]: return '\n'

        space_sub1 = _re.compile((
            r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
            r'|\040(%(not_id_literal)s)'
            r'|\n(%(not_id_literal_open)s)'
        ) % locals()).sub
        def space_subber1(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2]

        space_sub2 = _re.compile((
            r'(%(strings)s)\040?'
            r'|(%(pre_regex)s%(regex)s)[\040\n]?'
            r'|(%(not_id_literal)s)\040'
            r'|(%(not_id_literal_close)s)\n'
        ) % locals()).sub
        def space_subber2(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2] or groups[3]

        def jsmin(script):
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach. The script is minified with three passes:

            normalization
                Control character are mapped to spaces, spaces and newlines
                are squeezed and comments are stripped.
            space removal 1
                Spaces before certain tokens are removed
            space removal 2
                Spaces after certain tokens are remove

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub2(space_subber2,
                space_sub1(space_subber1,
                    space_norm_sub(space_norm_subber, '\n%s\n' % script)
                )
            ).strip()
    return jsmin

Example 97

Project: cgat
Source File: IndexedFasta.py
View license
def createDatabase(db, iterator,
                   force=False,
                   synonyms=None,
                   compression=None,
                   random_access_points=None,
                   regex_identifier=None,
                   clean_sequence=False,
                   ignore_duplicates=False,
                   allow_duplicates=False,
                   translator=None):
    """index files in filenames to create database.

    Two new files are created - db.fasta and db_name.idx

    If compression is enabled, provide random access points
    every # bytes.

    Dictzip is treated as an uncompressed file.

    regex_identifier: pattern to extract identifier from description line.
    If None, the part until the first white-space character is used.

    translator: specify a translator
    """

    if db.endswith(".fasta"):
        db = db[:-len(".fasta")]

    if compression:
        if compression == "lzo":
            import lzo

            def lzo_mangler(s):
                return lzo.compress(s, 9)
            mangler = lzo_mangler
            db_name = db + ".lzo"
            write_chunks = True
        elif compression == "zlib":
            def zlib_mangler(s):
                return zlib.compress(s, 9)
            mangler = zlib_mangler
            db_name = db + ".zlib"
            write_chunks = True
        elif compression == "gzip":
            mangler = gzip_mangler
            db_name = db + ".gz"
            write_chunks = True
        elif compression == "dictzip":
            from . import dictzip

            def mangler(x):
                return x

            db_name = db + ".dz"
            write_chunks = False
        elif compression == "bzip2":
            import bz2

            def bzip_mangler(x):
                return bz2.compress(x, 9)

            mangler = bzip_mangler
            db_name = db + ".bz2"
            write_chunks = True
        elif compression == "debug":
            def mangler(x):
                return x
            db_name = db + ".debug"
            write_chunks = True
        elif compression == "rle":
            from . import RLE
            mangler = RLE.compress
            db_name = db + ".rle"
            write_chunks = True
        else:
            raise ValueError("unknown compression library: %s" % compression)

        index_name = db + ".cdx"

        if write_chunks and random_access_points is None \
           or random_access_points <= 0:
            raise ValueError("specify chunksize in --random-access-points")

    else:
        def mangler(x):
            return x
        db_name = db + ".fasta"
        write_chunks = False
        index_name = db + ".idx"

    if os.path.exists(db_name) and not force:
        raise ValueError("database %s already exists." % db_name)

    if os.path.exists(index_name) and not force:
        raise ValueError("database index %s already exists." % index_name)

    outfile_index = open(index_name, "w")
    if compression == "dictzip":
        if random_access_points is None or random_access_points <= 0:
            raise ValueError(
                "specify dictzip chunksize in --random-access-points")
        outfile_fasta = dictzip.open(
            db_name, "wb", buffersize=1000000, chunksize=random_access_points)
        compression = None
    else:
        outfile_fasta = open(db_name, "w")

    identifiers = {}
    lsequence = 0
    identifier_pos, sequence_pos = 0, 0

    if sys.version_info.major >= 3:
        translation = str.maketrans("xX", "nN")
    else:
        translation = string.maketrans("xX", "nN")

    fragments = []
    lfragment = 0

    last_identifier = None

    while 1:

        try:
            result = next(iterator)
        except StopIteration:
            break

        if not result:
            break

        is_new, identifier, fragment = result

        if is_new:
            # check for duplicate identifiers
            if identifier in identifiers:
                if ignore_duplicates:
                    raise ValueError("ignore duplicates not implemented")
                elif allow_duplicates:
                    # the current implementation will fail if the same
                    # identifiers
                    # are directly succeeding each other
                    # better: add return to iterator that indicates a new
                    # identifier
                    out_identifier = identifier + \
                        "_%i" % (identifiers[identifier])
                    identifiers[identifier] += 1
                    identifiers[out_identifier] = 1
                else:
                    raise ValueError("%s occurs more than once" %
                                     (identifier,))
            else:
                identifiers[identifier] = 1
                out_identifier = identifier

            if last_identifier:
                if write_chunks:
                    writeFragments(outfile_fasta, outfile_index,
                                   fragments, mangler,
                                   size=random_access_points,
                                   write_all=True)

                    fragments = []
                    lfragment = 0
                else:
                    outfile_fasta.write("\n")

                outfile_index.write("\t%i\n" % lsequence)

            identifier_pos = outfile_fasta.tell()
            outfile_fasta.write(mangler(">%s\n" % out_identifier))
            sequence_pos = outfile_fasta.tell()

            outfile_index.write("%s\t%i" % (out_identifier,
                                            identifier_pos))
            if write_chunks:
                outfile_index.write("\t%i" % random_access_points)
            else:
                outfile_index.write("\t%i" % sequence_pos)

            fragments = []
            lsequence = 0
            last_identifier = identifier

        if translator:
            s = translator(fragment)
        else:
            s = re.sub("\s", "", fragment.strip())
            if clean_sequence:
                s = s.translate(translation)

        lsequence += len(s)

        if write_chunks:
            fragments.append(s)
            lfragment += len(s)
            if lfragment > random_access_points:
                rest = writeFragments(outfile_fasta,
                                      outfile_index,
                                      fragments,
                                      mangler,
                                      size=random_access_points,
                                      write_all=False)
                fragments = [rest]
                lfragment = len(rest)
        else:
            outfile_fasta.write(mangler(s))

    if write_chunks:
        writeFragments(outfile_fasta, outfile_index, fragments, mangler,
                       size=random_access_points, write_all=True)
    else:
        outfile_fasta.write("\n")

    outfile_index.write("\t%i\n" % lsequence)

    # add synonyms for the table
    if synonyms:
        for key, vals in list(synonyms.items()):
            for val in vals:
                outfile_index.write("%s\t%s\n" % (key, val))

Example 98

Project: cgat
Source File: cgat2rdf.py
View license
def processScript(script_name, outfile, options):
    '''process one script.'''

    # call other script
    dirname = os.path.dirname(script_name)
    basename = os.path.basename(script_name)[:-3]

    if options.src_dir:
        dirname = options.src_dir
        script_name = os.path.join(dirname, basename) + ".py"

    sys.path.insert(0, dirname)
    module = __import__(basename)

    E.Start = LocalStart
    E.info("loaded modules %s" % module)
    try:
        module.main(argv=["--help"])
    except DummyError:
        pass

    # get script's docstring
    docstring = module.__doc__

    # for k in dir(PARSER):
    #     print k, getattr(PARSER, k)
    # for option in PARSER.option_list:
    # print option, option.type, option.help, option._short_opts,
    # option._long_opts, option.default

    # @prefix clp: <http://www.humgen.nl/climate/ontologies/clp#> .
    # @prefix co: <http://www.isi.edu/ikcap/Wingse/componentOntology.owl#> .
    # @prefix dcterms: <http://purl.org/dc/terms/> .

    # n = Namespace("http://example.org/people/")
    g = Generator()

    data = collections.defaultdict(str)

    data['meta_title'] = 'Interface generator for CGAT scripts'
    data['meta_author'] = 'Andreas Heger'
    data['meta_version'] = 0.1

    data['name'] = basename
    data['interpreter'] = 'python'
    data['property_bag'] = {}
    data['description'] = getDescription(basename, docstring)
    data['help'] = docstring
    data['version'] = "1.0"
    data['owner'] = "CGAT"
    data['email'] = "[email protected]"
    data['binary'] = script_name

    # does not output multiple files
    data['multiple_output_files'] = False

    input_format, output_format = guessFormats(basename, docstring)

    stdin = {}
    stdin['name'] = 'input_file'
    stdin['ns_name'] = 'input_file'
    stdin['type'] = 'stdin'
    stdin['label'] = 'input file'
    stdin['description'] = 'input file'
    stdin['choices'] = None
    stdin['format'] = MAP_TYPE2FORMAT.get(input_format, input_format)
    stdin['rank'] = 1
    stdin['display'] = 'show'
    stdin['min_occurrence'] = 1
    stdin['max_occurrence'] = 1
    stdin['value'] = ""
    stdin['arg'] = "<"
    stdin['arg_long'] = ""
    stdin['property_bag'] = {}
    stdin['dependencies'] = {}

    stdout = {}
    stdout['name'] = 'tsvfile'
    stdout['ns_name'] = 'tsvfile'
    stdout['type'] = 'stdout'
    stdout['label'] = 'table'
    stdout['description'] = 'bam file'
    stdout['choices'] = None
    stdout['format'] = MAP_TYPE2FORMAT.get(output_format, output_format)
    stdout['rank'] = 1
    stdout['display'] = 'show'
    stdout['min_occurrence'] = 1
    stdout['max_occurrence'] = 1
    stdout['value'] = ""
    stdout['arg'] = ">"
    stdout['arg_long'] = ""
    stdout['property_bag'] = {}
    stdout['dependencies'] = {}

    outputs = [stdout]

    data['parameters'] = [stdin, stdout]

    defaults = PARSER.get_default_values()

    # flag to indicate wether script needs to go through cgat_wrapper.py
    use_wrapper = False

    for option in PARSER.option_list:
        # ignore options added by optparse
        if option.dest is None:
            continue

        # ignore benchmarking options
        if option.dest.startswith("timeit"):
            continue

        # ignore options related to forcing output
        if "force" in option.dest:
            continue

        # ignore some special options:
        # if option.dest in ("output_filename_pattern", ):
        #    continue

        # ignore output options
        if option.dest in ("stdin", "stdout", "stdlog", "stderr", "loglevel"):
            continue

        # remove default from help string
        option.help = re.sub("\[[^\]]*%default[^\]]*\]", "", option.help)

        param = buildParam()

        # get command line option call (long/short option)
        try:
            param['arg'] = option._short_opts[0]
        except IndexError:
            pass

        try:
            param['arg_long'] = option._long_opts[0]
        except IndexError:
            pass

        assert 'arg' in param or 'arg_long' in param

        # print "----------------------------------"
        # print [(x,getattr(option,x)) for x in dir( option )]

        param['name'] = option.dest
        param['ns_name'] = option.dest
        if option.type == "int":
            param['type'] = "integer"
        elif option.type == "float":
            param['type'] = "float"
        elif option.type == "string":
            param['type'] = "text"
            if option.metavar:
                mvar = option.metavar.lower()
                if mvar in MAP_TYPE2FORMAT:
                    param['format'] = MAP_TYPE2FORMAT[mvar]
                    param['type'] = "data"
                if mvar == "bam":
                    use_wrapper = True
                    data['parameters'].append(buildParam(
                        name='wrapper_bam_file',
                        ns_name='wrapper_bam_file',
                        arg_long='--wrapper-bam-file',
                        label=option.dest,
                        type='data',
                        format='bam',
                        help=option.help,
                        value=getattr(defaults,  option.dest)))

                    data['parameters'].append(buildParam(
                        name='wrapper_bam_index',
                        ns_name='wrapper_bam_index',
                        arg_long='--wrapper-bai-file',
                        type='data',
                        value='${wrapper_bam_file.metadata.bam_index}',
                        display='hidden'))

                    # use long argument
                    data['parameters'].append(buildParam(
                        name='wrapper_bam_option',
                        ns_name='wrapper_bam_option',
                        arg_long='--wrapper-bam-option',
                        value=param[
                            'arg_long'],
                        display='hidden'))

                    continue

        elif option.type == "choice":
            param['type'] = "select"
            param['choices'] = option.choices
            if option.action == "append":
                param['multiple'] = True
        elif option.action.startswith("store"):
            param['type'] = "boolean"
        else:
            raise ValueError("unknown type for %s" % str(option))

        param['label'] = option.dest
        param['description'] = option.help
        param['rank'] = 1
        param['display'] = 'show'
        param['min_occurrence'] = 0
        param['max_occurrence'] = 1

        # get default value
        param['value'] = getattr(defaults,  option.dest)

        param['dependencies'] = {}
        param['property_bag'] = {}

        if option.dest == "genome_file":
            param['property_bag'] = {'from_loc': 'path',
                                     'loc_id': 'sam_fa',
                                     'loc_id_filter': '1'}

        # deal with multiple output files:
        if option.dest == "output_filename_pattern":
            use_wrapper = True
            data['parameters'].append(buildParam(
                name='wrapper_html_file',
                ns_name='wrapper_html_file',
                arg_long='--wrapper-html-file',
                value='$html_file',
                display='hidden'))

            data['parameters'].append(buildParam(
                name='wrapper_html_dir',
                ns_name='wrapper_html_dir',
                arg_long='--wrapper-html-dir',
                value='$html_file.files_path',
                display='hidden'))

            outputs.append(buildParam(name='html_file',
                                      ns_name='html_file',
                                      format='html',
                                      label='html'),
                           )
            continue

        data['parameters'].append(param)

    if options.output_format == "rdf":
        outfile.write(g.serialize(data, format='turtle') + "\n")

    elif options.output_format == "galaxy":

        if use_wrapper:

            # add hidden option for wrapper
            param = buildParam(
                name='wrapper-command',
                ns_name='wrapper-command',
                display='hidden',
                type='text',
                value=data['binary'],
                label='wrapper',
                description='wrapper',
                arg_long="--wrapper-command")

            data['parameters'].append(param)

            # point to wrapper
            data['binary'] = os.path.join(dirname, "cgat_galaxy_wrapper.py")

        displayMap = collections.defaultdict(list)

        for param in data['parameters']:
            displayMap[param['display']].append(param)

        displayMap['normal'] = displayMap['show']

        target = Template(
           IOTools.openFile('/ifs/devel/andreas/cgat/scripts/cgat2rdf/galaxy.xml').read())
        outfile.write(target.render(data=data,
                                    displayMap=displayMap,
                                    outputs=outputs) + "\n")

Example 99

Project: cgat
Source File: run_nubiscan.py
View license
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version="%prog version: $Id: run_nubiscan.py 2861 2010-02-23 17:36:32Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-i", "--iterations", dest="iterations", type="int",
                      help="number of iterations for sampling [default=%default].")

    parser.add_option("-q", "--qvalue", dest="qvalue_threshold", type="float",
                      help="qvalue threshold [default=%default].")

    parser.add_option("--without-combine", dest="combine", action="store_false",
                      help="combine overlapping motifs [default=%default].")

    parser.add_option("-f", "--fdr-control", dest="fdr_control", type="choice",
                      choices=("per-sequence", "all", "xall"),
                      help="qvalue threshold [default=%default].")

    parser.add_option("-m", "--motif", dest="motif", type="choice",
                      choices=("rxrvdr", "rxrvdr1", "rxrvdr2", "nr"),
                      help="qvalue threshold [default=%default].")

    parser.add_option("-a", "--arrangements", dest="arrangements", type="string",
                      help="',' separated list of repeat arrangements [default=%default]")

    parser.add_option("-x", "--mask", dest="mask", type="choice",
                      choices=("dust", "repeatmasker"),
                      help ="mask sequences before scanning [default=%default]")

    parser.add_option("--output-stats", dest="output_stats", action="store_true",
                      help="output stats [default=%default].")

    parser.add_option("--add-sequence", dest="add_sequence", action="store_true",
                      help="add sequence information [default=%default].")

    parser.set_defaults(
        iterations=100,
        qvalue_threshold=0.05,
        motif="rxrvdr",
        fdr_control="all",
        combine=True,
        arrangements=None,
        mask=None,
        output_stats=False,
        add_sequence=False,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    # do sth
    ninput, nskipped, noutput = 0, 0, 0

    if options.arrangements is None:
        options.arrangements = [
            "DR%s" % x for x in range(0, 15)] + ["ER%s" % x for x in range(0, 15)]
    else:
        options.arrangements = options.arrangements.split(",")

    options.stdout.write("%s" % "\t".join(Nubiscan.NubiscanMatch._fields))
    if options.add_sequence:
        options.stdout.write("\tsequence")
    options.stdout.write("\n")

    if options.motif == 'nr':
        sense_matrix = NR
    elif options.motif == "rxrvdr":
        sense_matrix = RXRVDR
    elif options.motif == "rxrvdr1":
        sense_matrix = RXRVDR1
    elif options.motif == "rxrvdr2":
        sense_matrix = RXRVDR2
    else:
        raise ValueError("unknown matrix %s" % options.motif)

    if options.fdr_control == "all":

        seqs = list(FastaIterator.iterate(options.stdin))

        if options.mask:
            masked_seqs = maskSequences(
                [x.sequence for x in seqs], options.mask)
        else:
            masked_seqs = [x.sequence for x in seqs]

        ninput = len(seqs)
        map_id2title = dict(
            enumerate([re.sub("\s.*", "", x.title) for x in seqs]))
        matcher = Nubiscan.MatcherRandomisationSequences(sense_matrix,
                                                         samples=options.iterations)

        results = matcher.run(masked_seqs,
                              options.arrangements,
                              qvalue_threshold=options.qvalue_threshold)

        if options.combine:
            results = Nubiscan.combineMotifs(results)

        for r in results:

            if r.alternatives:
                alternatives = ",".join(
                    [x.arrangement for x in r.alternatives])
            else:
                alternatives = ""

            options.stdout.write("\t".join((
                map_id2title[r.id],
                "%i" % r.start,
                "%i" % r.end,
                r.strand,
                r.arrangement,
                "%6.4f" % r.score,
                "%6.4f" % r.zscore,
                "%6.4e" % r.pvalue,
                "%6.4e" % r.qvalue,
                alternatives)))

            if options.add_sequence:
                s = masked_seqs[int(r.id)][r.start:r.end]
                if r.strand == "-":
                    s = Genomics.complement(s)
                s = s[:6].upper() + s[6:-6].lower() + s[-6:].upper()
                options.stdout.write("\t%s" % s)

            options.stdout.write("\n")
            noutput += 1

        # output stats
        if options.output_stats:
            outfile = E.openOutputFile("fdr")
            outfile.write("bin\thist\tnobserved\n")
            for bin, hist, nobs in zip(matcher.bin_edges, matcher.hist, matcher.nobservations):
                outfile.write("%f\t%f\t%f\n" % (bin, hist, nobs))
            outfile.close()

    elif options.fdr_control == "xall":

        matcher = Nubiscan.MatcherRandomisationSequence(sense_matrix,
                                                        samples=options.iterations)

        # collect all results
        matches = []
        for seq in FastaIterator.iterate(options.stdin):
            ninput += 1
            mm = matcher.run(seq.sequence,
                             options.arrangements,
                             qvalue_threshold=None)
            for m in mm:
                matches.append(m._replace(sequence=seq.title))

        # estimate qvalues for all matches across all sequences
        pvalues = [x.pvalue for x in matches]
        fdr = Stats.doFDR(pvalues)
        qvalues = fdr.mQValues
        results = []
        for m, qvalue in zip(matches, qvalues):
            if qvalue > options.qvalue_threshold:
                continue
            results.append(m._replace(qvalue=qvalue))

        if options.combine:
            results = Nubiscan.combineMotifs(results)

        # output
        for r in results:
            options.stdout.write("\t".join((
                r.id,
                "%i" % r.start,
                "%i" % r.end,
                r.strand,
                r.arrangement,
                "%6.4f" % r.score,
                "%6.4f" % r.zscore,
                "%6.4e" % r.pvalue,
                "%6.4e" % r.qvalue)) + "\n")

            noutput += 1

    elif options.fdr_control == "per-sequence":
        matcher = Nubiscan.MatcherRandomisationSequence(sense_matrix,
                                                        samples=options.iterations)

        for seq in FastaIterator.iterate(options.stdin):
            ninput += 1
            result = matcher.run(seq.sequence,
                                 options.arrangements,
                                 qvalue_threshold=options.qvalue_threshold)

            if options.combine:
                result = Nubiscan.combineMotifs(result)

            t = re.sub(" .*", "",  seq.title)
            for r in result:
                options.stdout.write("\t".join((
                    t,
                    "%i" % r.start,
                    "%i" % r.end,
                    r.strand,
                    r.arrangement,
                    "%6.4f" % r.score,
                    "%6.4f" % r.zscore,
                    "%f" % r.pvalue,
                    "%f" % r.qvalue)) + "\n")

            noutput += 1

    E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped))

    # write footer and output benchmark information.
    E.Stop()

Example 100

Project: cgstudiomap
Source File: escpos.py
View license
    def receipt(self,xml):
        """
        Prints an xml based receipt definition
        """

        def strclean(string):
            if not string:
                string = ''
            string = string.strip()
            string = re.sub('\s+',' ',string)
            return string

        def format_value(value, decimals=3, width=0, decimals_separator='.', thousands_separator=',', autoint=False, symbol='', position='after'):
            decimals = max(0,int(decimals))
            width    = max(0,int(width))
            value    = float(value)

            if autoint and math.floor(value) == value:
                decimals = 0
            if width == 0:
                width = ''

            if thousands_separator:
                formatstr = "{:"+str(width)+",."+str(decimals)+"f}"
            else:
                formatstr = "{:"+str(width)+"."+str(decimals)+"f}"


            ret = formatstr.format(value)
            ret = ret.replace(',','COMMA')
            ret = ret.replace('.','DOT')
            ret = ret.replace('COMMA',thousands_separator)
            ret = ret.replace('DOT',decimals_separator)

            if symbol:
                if position == 'after':
                    ret = ret + symbol
                else:
                    ret = symbol + ret
            return ret

        def print_elem(stylestack, serializer, elem, indent=0):

            elem_styles = {
                'h1': {'bold': 'on', 'size':'double'},
                'h2': {'size':'double'},
                'h3': {'bold': 'on', 'size':'double-height'},
                'h4': {'size': 'double-height'},
                'h5': {'bold': 'on'},
                'em': {'font': 'b'},
                'b':  {'bold': 'on'},
            }

            stylestack.push()
            if elem.tag in elem_styles:
                stylestack.set(elem_styles[elem.tag])
            stylestack.set(elem.attrib)

            if elem.tag in ('p','div','section','article','receipt','header','footer','li','h1','h2','h3','h4','h5'):
                serializer.start_block(stylestack)
                serializer.text(elem.text)
                for child in elem:
                    print_elem(stylestack,serializer,child)
                    serializer.start_inline(stylestack)
                    serializer.text(child.tail)
                    serializer.end_entity()
                serializer.end_entity()

            elif elem.tag in ('span','em','b','left','right'):
                serializer.start_inline(stylestack)
                serializer.text(elem.text)
                for child in elem:
                    print_elem(stylestack,serializer,child)
                    serializer.start_inline(stylestack)
                    serializer.text(child.tail)
                    serializer.end_entity()
                serializer.end_entity()

            elif elem.tag == 'value':
                serializer.start_inline(stylestack)
                serializer.pre(format_value( 
                                              elem.text,
                                              decimals=stylestack.get('value-decimals'),
                                              width=stylestack.get('value-width'),
                                              decimals_separator=stylestack.get('value-decimals-separator'),
                                              thousands_separator=stylestack.get('value-thousands-separator'),
                                              autoint=(stylestack.get('value-autoint') == 'on'),
                                              symbol=stylestack.get('value-symbol'),
                                              position=stylestack.get('value-symbol-position') 
                                            ))
                serializer.end_entity()

            elif elem.tag == 'line':
                width = stylestack.get('width')
                if stylestack.get('size') in ('double', 'double-width'):
                    width = width / 2

                lineserializer = XmlLineSerializer(stylestack.get('indent')+indent,stylestack.get('tabwidth'),width,stylestack.get('line-ratio'))
                serializer.start_block(stylestack)
                for child in elem:
                    if child.tag == 'left':
                        print_elem(stylestack,lineserializer,child,indent=indent)
                    elif child.tag == 'right':
                        lineserializer.start_right()
                        print_elem(stylestack,lineserializer,child,indent=indent)
                serializer.pre(lineserializer.get_line())
                serializer.end_entity()

            elif elem.tag == 'ul':
                serializer.start_block(stylestack)
                bullet = stylestack.get('bullet')
                for child in elem:
                    if child.tag == 'li':
                        serializer.style(stylestack)
                        serializer.raw(' ' * indent * stylestack.get('tabwidth') + bullet)
                    print_elem(stylestack,serializer,child,indent=indent+1)
                serializer.end_entity()

            elif elem.tag == 'ol':
                cwidth = len(str(len(elem))) + 2
                i = 1
                serializer.start_block(stylestack)
                for child in elem:
                    if child.tag == 'li':
                        serializer.style(stylestack)
                        serializer.raw(' ' * indent * stylestack.get('tabwidth') + ' ' + (str(i)+')').ljust(cwidth))
                        i = i + 1
                    print_elem(stylestack,serializer,child,indent=indent+1)
                serializer.end_entity()

            elif elem.tag == 'pre':
                serializer.start_block(stylestack)
                serializer.pre(elem.text)
                serializer.end_entity()

            elif elem.tag == 'hr':
                width = stylestack.get('width')
                if stylestack.get('size') in ('double', 'double-width'):
                    width = width / 2
                serializer.start_block(stylestack)
                serializer.text('-'*width)
                serializer.end_entity()

            elif elem.tag == 'br':
                serializer.linebreak()

            elif elem.tag == 'img':
                if 'src' in elem.attrib and 'data:' in elem.attrib['src']:
                    self.print_base64_image(elem.attrib['src'])

            elif elem.tag == 'barcode' and 'encoding' in elem.attrib:
                serializer.start_block(stylestack)
                self.barcode(strclean(elem.text),elem.attrib['encoding'])
                serializer.end_entity()

            elif elem.tag == 'cut':
                self.cut()
            elif elem.tag == 'partialcut':
                self.cut(mode='part')
            elif elem.tag == 'cashdraw':
                self.cashdraw(2)
                self.cashdraw(5)

            stylestack.pop()

        try:
            stylestack      = StyleStack() 
            serializer      = XmlSerializer(self)
            root            = ET.fromstring(xml.encode('utf-8'))

            self._raw(stylestack.to_escpos())

            print_elem(stylestack,serializer,root)

            if 'open-cashdrawer' in root.attrib and root.attrib['open-cashdrawer'] == 'true':
                self.cashdraw(2)
                self.cashdraw(5)
            if not 'cut' in root.attrib or root.attrib['cut'] == 'true' :
                self.cut()

        except Exception as e:
            errmsg = str(e)+'\n'+'-'*48+'\n'+traceback.format_exc() + '-'*48+'\n'
            self.text(errmsg)
            self.cut()

            raise e