os.path.splitext

Here are the examples of the python api os.path.splitext taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

Project: django-static
Source File: django_static.py
View license
def _static_file(filename,
                 optimize_if_possible=False,
                 symlink_if_possible=False,
                 warn_no_file=True):
    """
    """
    if not settings.DJANGO_STATIC:
        return file_proxy(filename, disabled=True)

    def wrap_up(filename):
        if settings.DJANGO_STATIC_MEDIA_URL_ALWAYS:
            return settings.DJANGO_STATIC_MEDIA_URL + filename
        elif settings.DJANGO_STATIC_MEDIA_URL:
            return settings.DJANGO_STATIC_MEDIA_URL + filename
        return filename

    is_combined_files = isinstance(filename, list)
    if is_combined_files and len(filename) == 1:
        # e.g. passed a list of files but only one so treat it like a
        # single file
        filename = filename[0]
        is_combined_files = False

    if is_combined_files:
        map_key = ';'.join(filename)
    else:
        map_key = filename

    if settings.DJANGO_STATIC_USE_MANIFEST_FILE:
        new_filename, m_time = _get(_MANIFEST_PATH, map_key)
    else:
        new_filename, m_time = _FILE_MAP.get(map_key, (None, None))


    # we might already have done a conversion but the question is
    # if the file has changed. This we only want
    # to bother with when in DEBUG mode because it adds one more
    # unnecessary operation.
    if new_filename:
        if settings.DEBUG:
            # need to check if the original has changed
            old_new_filename = new_filename
            new_filename = None
        else:
            # This is really fast and only happens when NOT in DEBUG mode
            # since it doesn't do any comparison
            return file_proxy(wrap_up(new_filename), **fp_default_kwargs)
    else:
        # This is important so that we can know that there wasn't an
        # old file which will help us know we don't need to delete
        # the old one
        old_new_filename = None


    if not new_filename:
        if is_combined_files:
            # It's a list! We have to combine it into one file
            new_file_content = StringIO()
            each_m_times = []
            extension = None
            for each in filename:
                filepath, path = _find_filepath_in_roots(each)
                if not filepath:
                    raise OSError("Failed to find %s in %s" % (each,
                        ",".join(settings.DJANGO_STATIC_MEDIA_ROOTS)))

                if extension:
                    if os.path.splitext(filepath)[1] != extension:
                        raise ValueError("Mismatching file extension in combo %r" % \
                          each)
                else:
                    extension = os.path.splitext(filepath)[1]
                each_m_times.append(os.stat(filepath)[stat.ST_MTIME])
                new_file_content.write(open(filepath, 'r').read().strip())
                new_file_content.write('\n')

            filename = _combine_filenames(filename, settings.DJANGO_STATIC_NAME_MAX_LENGTH)
            # Set the root path of the combined files to the first entry
            # in the MEDIA_ROOTS list. This way django-static behaves a
            # little more predictible.
            path = settings.DJANGO_STATIC_MEDIA_ROOTS[0]
            new_m_time = max(each_m_times)

        else:
            filepath, path = _find_filepath_in_roots(filename)
            if not filepath:
                if warn_no_file:
                    msg = "Can't find file %s in %s" % \
                      (filename, ",".join(settings.DJANGO_STATIC_MEDIA_ROOTS))
                    warnings.warn(msg)
                return file_proxy(wrap_up(filename),
                                  **dict(fp_default_kwargs,
                                         filepath=filepath,
                                         notfound=True))

            new_m_time = os.stat(filepath)[stat.ST_MTIME]

        if m_time:
            # we had the filename in the map
            if m_time != new_m_time:
                # ...but it has changed!
                m_time = None
            else:
                # ...and it hasn't changed!
                return file_proxy(wrap_up(old_new_filename))

        if not m_time:
            # We did not have the filename in the map OR it has changed
            apart = os.path.splitext(filename)
            new_filename = _generate_filename(apart, new_m_time)
            fileinfo = (settings.DJANGO_STATIC_NAME_PREFIX + new_filename,
                        new_m_time)


            if settings.DJANGO_STATIC_USE_MANIFEST_FILE:
                _set(_MANIFEST_PATH, map_key, fileinfo)
            else:
                _FILE_MAP[map_key] = fileinfo


            if old_new_filename:
                old_new_filename = old_new_filename.replace(
                                      settings.DJANGO_STATIC_NAME_PREFIX, '')
                old_new_filepath = _filename2filepath(old_new_filename,
                        settings.DJANGO_STATIC_SAVE_PREFIX or path)
                if not os.path.isdir(os.path.dirname(old_new_filepath)):
                    _mkdir(os.path.dirname(old_new_filepath))

                if os.path.isfile(old_new_filepath):
                    os.remove(old_new_filepath)
    new_filepath = _filename2filepath(new_filename,
            settings.DJANGO_STATIC_SAVE_PREFIX or path)

    if not os.path.isdir(os.path.dirname(new_filepath)):
        _mkdir(os.path.dirname(new_filepath))


    # Files are either slimmered or symlinked or just copied. Basically, only
    # .css and .js can be slimmered but not all are. For example, an already
    # minified say jquery.min.js doesn't need to be slimmered nor does it need
    # to be copied.
    # If you're on windows, it will always have to do a copy.
    # When symlinking, what the achievement is is that it gives the file a
    # unique and different name than the original.
    #
    # The caller of this method is responsible for dictacting if we're should
    # slimmer and if we can symlink.
    if optimize_if_possible:
        # Then we expect to be able to modify the content and we will
        # definitely need to write a new file.
        if is_combined_files:
            content = new_file_content.getvalue().decode('utf-8')
        else:
            #content = open(filepath).read()
            content = codecs.open(filepath, 'r', 'utf-8').read()
        if new_filename.endswith('.js') and has_optimizer(JS):
            content = optimize(content, JS)
        elif new_filename.endswith('.css') and has_optimizer(CSS):
            content = optimize(content, CSS)

            # and _static_file() all images refered in the CSS file itself
            def replacer(match):
                this_filename = match.groups()[0]

                if (this_filename.startswith('"') and this_filename.endswith('"')) or \
                  (this_filename.startswith("'") and this_filename.endswith("'")):
                    this_filename = this_filename[1:-1]
                # It's really quite common that the CSS file refers to the file
                # that doesn't exist because if you refer to an image in CSS for
                # a selector you never use you simply don't suffer.
                # That's why we say not to warn on nonexisting files

                replace_with = this_filename

                if not (this_filename.startswith('/') or \
                  (this_filename.startswith('http') and '://' in this_filename)):
                    # if the referenced filename is something like
                    # 'images/foo.jpg' or 'sub/module.css' then we need to copy the
                    # current relative directory
                    replace_with = this_filename
                    this_filename = os.path.join(os.path.dirname(filename), this_filename)
                optimize_again = optimize_if_possible and \
                                 this_filename.lower().endswith('.css') or False
                new_filename = _static_file(this_filename,
                                            symlink_if_possible=symlink_if_possible,
                                            optimize_if_possible=optimize_again,
                                            warn_no_file=settings.DEBUG and True or False)
                return match.group().replace(replace_with, new_filename)

            content = REFERRED_CSS_URLS_REGEX.sub(replacer, content)
            content = REFERRED_CSS_URLLESS_IMPORTS_REGEX.sub(replacer, content)

        elif slimmer or cssmin:
            raise ValueError(
              "Unable to slimmer file %s. Unrecognized extension" % new_filename)
        #print "** STORING:", new_filepath
        codecs.open(new_filepath, 'w', 'utf-8').write(content)
    elif symlink_if_possible and not is_combined_files:
        #print "** SYMLINK:", filepath, '-->', new_filepath

        # The reason we have to do this strange while loop is that it can
        # happen that in between the time it takes to destroy symlink till you
        # can create it, another thread or process might be trying to do the
        # exact same thing with just a fraction of a second difference, thus
        # making it possible to, at the time of creating the symlink, that it's
        # already there which will raise an OSError.
        #
        # This is quite possible when Django for example starts multiple fcgi
        # threads roughly all at the same time. An alternative approach would
        # be to store the global variable _FILE_MAP in a cache or something
        # which would effectively make it thread safe but that has the annoying
        # disadvantage that it remains in the cache between server restarts and
        # for a production environment, server restarts very often happen
        # because you have upgraded the code (and the static files). So, an
        # alternative is to use a cache so that thread number 2, number 3 etc
        # gets the file mappings of the first thread and then let this cache
        # only last for a brief amount of time. That amount of time would
        # basically be equivalent of the time the sys admin or developer would
        # have to wait between new code deployment and refreshed symlinks for
        # the static files. That feels convoluted and complex so I've decided
        # to instead use this rather obtuse while loop which is basically built
        # to try X number of times. If it still fails after X number of attempts
        # there's something else wrong with the IO which needs to bubble up.
        _max_attempts = 10
        while True:
            try:
                if os.path.lexists(new_filepath):
                    # since in the other cases we write a new file, it doesn't matter
                    # that the file existed before.
                    # That's not the case with symlinks
                    os.unlink(new_filepath)

                os.symlink(filepath, new_filepath)
                break
            except OSError:
                _max_attempts -= 1
                if _max_attempts <= 0:
                    raise
    elif is_combined_files:
        #print "** STORING COMBO:", new_filepath
        open(new_filepath, 'w').write(new_file_content.getvalue())
    else:
        # straight copy
        #print "** STORING COPY:", new_filepath
        shutil.copyfile(filepath, new_filepath)

    return file_proxy(wrap_up(settings.DJANGO_STATIC_NAME_PREFIX + new_filename),
                      **dict(fp_default_kwargs, new=True,
                             filepath=new_filepath, checked=True))

Example 2

Project: project_generator
Source File: iar.py
View license
    def _export_single_project(self):
        """ A single project export """
        expanded_dic = self.workspace.copy()

        self._fix_paths(expanded_dic)

        # generic tool template specified or project
        if expanded_dic['template']:
            template_ewp = False
            template_ewd = False
            # process each template file
            for template in expanded_dic['template']:
                template = join(getcwd(), template)
                # we support .ewp or .ewp.tmpl templates
                if os.path.splitext(template)[1] == '.ewp' or re.match('.*\.ewp.tmpl$', template):
                    try:
                        ewp_dic = xmltodict.parse(open(template), dict_constructor=dict)
                        template_ewp = True
                    except IOError:
                        logger.info("Template file %s not found" % template)
                        ewp_dic = xmltodict.parse(open(self.ewp_file).read())
                if os.path.splitext(template)[1] == '.ewd' or re.match('.*\.ewd.tmpl$', template):
                    try:
                        ewd_dic = xmltodict.parse(open(template), dict_constructor=dict)
                        template_ewd = True
                    except IOError:
                        logger.info("Template file %s not found" % template)
                        ewd_dic = xmltodict.parse(open(self.ewd_file).read())
                # handle non valid template files or not specified
                if not template_ewp and template_ewd:
                    ewp_dic, _ = self._get_default_templates() 
                elif not template_ewd and template_ewp:
                    _, ewd_dic = self._get_default_templates()
                else:
                    ewp_dic, ewd_dic = self._get_default_templates()
        elif 'iar' in self.env_settings.templates.keys():
            template_ewp = False
            template_ewd = False
            # template overrides what is set in the yaml files
            for template in self.env_settings.templates['iar']:
                template = join(getcwd(), template)
                if os.path.splitext(template)[1] == '.ewp' or re.match('.*\.ewp.tmpl$', template):
                    try:
                        ewp_dic = xmltodict.parse(open(template), dict_constructor=dict)
                        template_ewp = True
                    except IOError:
                        logger.info("Template file %s not found" % template)
                        ewp_dic = xmltodict.parse(open(self.ewp_file).read())
                if os.path.splitext(template)[1] == '.ewd' or re.match('.*\.ewd.tmpl$', template):
                    # get ewd template
                    try:
                        ewd_dic = xmltodict.parse(open(template), dict_constructor=dict)
                        template_ewd = True
                    except IOError:
                        logger.info("Template file %s not found" % template)
                        ewd_dic = xmltodict.parse(open(self.ewd_file).read())
                # handle non valid template files or not specified
                if not template_ewp and template_ewd:
                    ewp_dic, _ = self._get_default_templates() 
                elif not template_ewd and template_ewp:
                    _, ewd_dic = self._get_default_templates()
                else:
                    ewp_dic, ewd_dic = self._get_default_templates()
        else:
            ewp_dic, ewd_dic = self._get_default_templates()

        eww = None
        if self.workspace['singular']:
            # TODO 0xc0170: if we use here self.definitions.eww, travis fails. I cant reproduce it and dont see
            # eww used anywhere prior to exporting this.
            eww_dic = {u'workspace': {u'project': {u'path': u''}, u'batchBuild': None}}
            # set eww
            self._eww_set_path_single_project(eww_dic, expanded_dic['name'])
            eww_xml = xmltodict.unparse(eww_dic, pretty=True)
            project_path, eww = self.gen_file_raw(eww_xml, '%s.eww' % expanded_dic['name'], expanded_dic['output_dir']['path'])


        try:
            ewp_configuration = ewp_dic['project']['configuration'][0]
            logging.debug("Provided .ewp file has multiple configuration, we use only the first one")
            ewp_dic['project']['configuration'] = ewp_dic['project']['configuration'][0]
        except KeyError:
            ewp_configuration = ewp_dic['project']['configuration']

        try:
            ewp_configuration = ewp_dic['project']['configuration'][0]
            logging.debug("Provided .ewp file has multiple configuration, we use only the first one")
            ewp_dic['project']['configuration'] = ewp_dic['project']['configuration'][0]
        except KeyError:
            ewp_configuration = ewp_dic['project']['configuration']

        # replace all None with empty strings ''
        self._clean_xmldict_ewp(ewp_configuration)
        #self._clean_xmldict_ewd(ewd_dic)

        try:
            self._ewp_set_name(ewp_configuration, expanded_dic['name'])
        except KeyError:
            raise RuntimeError("The IAR template is not valid .ewp file")

        # set ARM toolchain and project name\
        self._ewp_set_toolchain(ewp_configuration, 'ARM')

        # set common things we have for IAR
        self._ewp_general_set(ewp_configuration['settings'], expanded_dic)
        self._ewp_iccarm_set(ewp_configuration['settings'], expanded_dic)
        self._ewp_aarm_set(ewp_configuration['settings'], expanded_dic)
        self._ewp_ilink_set(ewp_configuration['settings'], expanded_dic)
        self._ewp_files_set(ewp_dic, expanded_dic)

        # set target only if defined, otherwise use from template/default one
        if expanded_dic['target']:
            # get target definition (target + mcu)
            proj_def = ProGenDef('iar')
            if not proj_def.is_supported(expanded_dic['target'].lower()):
                raise RuntimeError("Target %s is not supported." % expanded_dic['target'].lower())
            mcu_def_dic = proj_def.get_tool_definition(expanded_dic['target'].lower())
            if not mcu_def_dic:
                 raise RuntimeError(
                    "Mcu definitions were not found for %s. Please add them to https://github.com/project-generator/project_generator_definitions" % expanded_dic['target'].lower())
            self._normalize_mcu_def(mcu_def_dic)
            logger.debug("Mcu definitions: %s" % mcu_def_dic)
            self._ewp_set_target(ewp_configuration['settings'], mcu_def_dic)

            try:
                debugger = proj_def.get_debugger(expanded_dic['target'])
                self._ewd_set_debugger(ewd_dic['project']['configuration']['settings'], debugger)
            except KeyError as err:
                # TODO: worth reporting?
                pass

        # overwrite debugger only if defined in the project file, otherwise use either default or from template
        if expanded_dic['debugger']:
            try:
                self._ewd_set_debugger(ewd_dic['project']['configuration']['settings'], expanded_dic['debugger'])
            except KeyError:
                raise RuntimeError("Debugger %s is not supported" % expanded_dic['debugger'])

        self._ewd_set_name(ewd_dic['project']['configuration'], expanded_dic['name'])

        # IAR uses ident 2 spaces, encoding iso-8859-1
        ewp_xml = xmltodict.unparse(ewp_dic, encoding='iso-8859-1', pretty=True, indent='  ')
        project_path, ewp = self.gen_file_raw(ewp_xml, '%s.ewp' % expanded_dic['name'], expanded_dic['output_dir']['path'])

        ewd_xml = xmltodict.unparse(ewd_dic, encoding='iso-8859-1', pretty=True, indent='  ')
        project_path, ewd = self.gen_file_raw(ewd_xml, '%s.ewd' % expanded_dic['name'], expanded_dic['output_dir']['path'])
        return project_path, [ewp, eww, ewd]

Example 3

Project: pyblosxom
Source File: blosxom.py
View license
def blosxom_process_path_info(args):
    """Process HTTP ``PATH_INFO`` for URI according to path
    specifications, fill in data dict accordingly.

    The paths specification looks like this:

    - ``/foo.html`` and ``/cat/foo.html`` - file foo.* in / and /cat
    - ``/cat`` - category
    - ``/2002`` - category
    - ``/2002`` - year
    - ``/2002/Feb`` and ``/2002/02`` - Year and Month
    - ``/cat/2002/Feb/31`` and ``/cat/2002/02/31``- year and month day
      in category.

    :param args: dict containing the incoming Request object
    """
    request = args['request']
    config = request.get_configuration()
    data = request.get_data()
    py_http = request.get_http()

    form = request.get_form()

    # figure out which flavour to use.  the flavour is determined by
    # looking at the "flav" post-data variable, the "flav" query
    # string variable, the "default_flavour" setting in the config.py
    # file, or "html"
    flav = config.get("default_flavour", "html")
    if form.has_key("flav"):
        flav = form["flav"].value

    data['flavour'] = flav

    data['pi_yr'] = ''
    data['pi_mo'] = ''
    data['pi_da'] = ''

    path_info = py_http.get("PATH_INFO", "")

    data['root_datadir'] = config['datadir']

    data["pi_bl"] = path_info

    # first we check to see if this is a request for an index and we
    # can pluck the extension (which is certainly a flavour) right
    # off.
    new_path, ext = os.path.splitext(path_info)
    if new_path.endswith("/index") and ext:
        # there is a flavour-like thing, so that's our new flavour and
        # we adjust the path_info to the new filename
        data["flavour"] = ext[1:]
        path_info = new_path

    while path_info and path_info.startswith("/"):
        path_info = path_info[1:]

    absolute_path = os.path.join(config["datadir"], path_info)

    path_info = path_info.split("/")

    if os.path.isdir(absolute_path):

        # this is an absolute path

        data['root_datadir'] = absolute_path
        data['bl_type'] = 'dir'

    elif absolute_path.endswith("/index") and \
            os.path.isdir(absolute_path[:-6]):

        # this is an absolute path with /index at the end of it

        data['root_datadir'] = absolute_path[:-6]
        data['bl_type'] = 'dir'

    else:
        # this is either a file or a date

        ext = tools.what_ext(data["extensions"].keys(), absolute_path)
        if not ext:
            # it's possible we didn't find the file because it's got a
            # flavour thing at the end--so try removing it and
            # checking again.
            new_path, flav = os.path.splitext(absolute_path)
            if flav:
                ext = tools.what_ext(data["extensions"].keys(), new_path)
                if ext:
                    # there is a flavour-like thing, so that's our new
                    # flavour and we adjust the absolute_path and
                    # path_info to the new filename
                    data["flavour"] = flav[1:]
                    absolute_path = new_path
                    path_info, flav = os.path.splitext("/".join(path_info))
                    path_info = path_info.split("/")

        if ext:
            # this is a file
            data["bl_type"] = "file"
            data["root_datadir"] = absolute_path + "." + ext

        else:
            data["bl_type"] = "dir"

            # it's possible to have category/category/year/month/day
            # (or something like that) so we pluck off the categories
            # here.
            pi_bl = ""
            while len(path_info) > 0 and \
                    not (len(path_info[0]) == 4 and path_info[0].isdigit()):
                pi_bl = os.path.join(pi_bl, path_info.pop(0))

            # handle the case where we do in fact have a category
            # preceding the date.
            if pi_bl:
                pi_bl = pi_bl.replace("\\", "/")
                data["pi_bl"] = pi_bl
                data["root_datadir"] = os.path.join(config["datadir"], pi_bl)

            if len(path_info) > 0:
                item = path_info.pop(0)
                # handle a year token
                if len(item) == 4 and item.isdigit():
                    data['pi_yr'] = item
                    item = ""

                    if len(path_info) > 0:
                        item = path_info.pop(0)
                        # handle a month token
                        if item in tools.MONTHS:
                            data['pi_mo'] = item
                            item = ""

                            if len(path_info) > 0:
                                item = path_info.pop(0)
                                # handle a day token
                                if len(item) == 2 and item.isdigit():
                                    data["pi_da"] = item
                                    item = ""

                                    if len(path_info) > 0:
                                        item = path_info.pop(0)

                # if the last item we picked up was "index", then we
                # just ditch it because we don't need it.
                if item == "index":
                    item = ""

                # if we picked off an item we don't recognize and/or
                # there is still stuff in path_info to pluck out, then
                # it's likely this wasn't a date.
                if item or len(path_info) > 0:
                    data["bl_type"] = "dir"
                    data["root_datadir"] = absolute_path

    # construct our final URL
    url = config['base_url']
    if data['pi_bl'].startswith("/") and url.endswith("/"):
        url = url[:-1] + data['pi_bl']
    elif data['pi_bl'].startswith("/") or url.endswith("/"):
        url = url + data["pi_bl"]
    else:
        url = url + "/" + data['pi_bl']
    data['url'] = url

    # set path_info to our latest path_info
    data['path_info'] = path_info

    if data.get("pi_yr"):
        data["truncate"] = config.get("truncate_date", False)
    elif data.get("bl_type") == "dir":
        if data["path_info"] == [''] or data["path_info"] == ['index']:
            data["truncate"] = config.get("truncate_frontpage", True)
        else:
            data["truncate"] = config.get("truncate_category", True)
    else:
        data["truncate"] = False

Example 4

Project: pyblosxom
Source File: paginate.py
View license
def page(request, num_entries, entry_list):
    http = request.get_http()
    config = request.get_configuration()
    data = request.get_data()

    first_text = config.get("paginate_first_text", "<<<")
    previous_text = config.get("paginate_previous_text", "<<")
    next_text = config.get("paginate_next_text", ">>")
    last_text = config.get("paginate_last_text", ">>>")

    first_last = config.get("paginate_first_last", 0)
    if first_last > 1:
        first_last = 1

    link_style = config.get("paginate_linkstyle", 1)
    if link_style > 1:
        link_style = 1

    entries_per_page = num_entries
    count_from = config.get("paginate_count_from", 0)

    if isinstance(entry_list, list) and 0 < entries_per_page < len(entry_list):

        page = count_from
        url = http.get("REQUEST_URI", http.get("HTTP_REQUEST_URI", ""))
        url_template = url
        if not data.get("STATIC"):
            form = request.get_form()

            if form:
                try:
                    page = int(form.getvalue("page"))
                except (TypeError, ValueError):
                    page = count_from

            # Restructure the querystring so that page= is at the end
            # where we can fill in the next/previous pages.
            if url_template.find("?") != -1:
                query = url_template[url_template.find("?") + 1:]
                url_template = url_template[:url_template.find("?")]

                query = query.split("&")
                query = [m for m in query if not m.startswith("page=")]
                if len(query) == 0:
                    url_template = url_template + "?" + "page=%d"
                else:
                    # Note: We're using &amp; here because it needs to
                    # be url_templateencoded.
                    url_template = (url_template + "?" + "&amp;".join(query) +
                                    "&amp;page=%d")
            else:
                url_template += "?page=%d"

        else:
            try:
                page = data["paginate_page"]
            except KeyError:
                page = count_from

            # The REQUEST_URI isn't the full url here--it's only the
            # path and so we need to add the base_url.
            base_url = config["base_url"].rstrip("/")
            url_template = base_url + url_template

            url_template = url_template.split("/")
            ret = url_template[-1].rsplit("_", 1)
            if len(ret) == 1:
                fn, ext = os.path.splitext(ret[0])
                pageno = "_page%d"
            else:
                fn, pageno = ret
                pageno, ext = os.path.splitext(pageno)
                pageno = "_page%d"
            url_template[-1] = fn + pageno + ext
            url_template = "/".join(url_template)
            url_first_page = url_template.split("_page")
            url_first_page = url_first_page[0] + ext

        begin = (page - count_from) * entries_per_page
        end = (page + 1 - count_from) * entries_per_page
        if end > len(entry_list):
            end = len(entry_list)

        max_pages = ((len(entry_list) - 1) / entries_per_page) + 1 + count_from

        data["entry_list"] = entry_list[begin:end]

        data["page_navigation"] = PageDisplay(
            url_template, url_first_page, page, max_pages, count_from, previous_text,
            next_text, link_style, first_last, first_text, last_text, request)

        # If we're static rendering and there wasn't a page specified
        # and this is one of the flavours to statically render, then
        # this is the first page and we need to render all the rest of
        # the pages, so we do that here.
        static_flavours = config.get("static_flavours", ["html"])
        if ((data.get("STATIC") and page == count_from
             and data.get("flavour") in static_flavours)):
            # Turn http://example.com/index.html into
            # http://example.com/index_page5.html for each page.
            url = url.split('/')
            fn = url[-1]
            fn, ext = os.path.splitext(fn)
            template = '/'.join(url[:-1]) + '/' + fn + '_page%d'
            if ext:
                template = template + ext

            for i in range(count_from + 1, max_pages):
                print "   rendering page %s ..." % (template % i)
                render_url_statically(dict(config), template % i, '')

Example 5

Project: pyNastran
Source File: test_op2.py
View license
def run_op2(op2_filename, make_geom=False, write_bdf=False,
            write_f06=True, write_op2=False, write_xlsx=False,
            is_mag_phase=False, is_sort2=False,
            delete_f06=False,
            subcases=None, exclude=None, short_stats=False,
            compare=True, debug=False, binary_debug=False,
            quiet=False, check_memory=False, stop_on_failure=True, dev=False):
    """
    Runs an OP2

    Parameters
    ----------
    op2_filename : str
        path of file to test
    make_geom : bool; default=False
        should the GEOMx, EPT, MPT, DYNAMIC, DIT, etc. tables be read
    write_bdf : bool; default=False
        should a BDF be written based on the geometry tables
    write_f06 : bool; default=True
        should an F06 be written based on the results
    write_op2 : bool; default=False
        should an OP2 be written based on the results
    write_xlsx : bool; default=False
        should an XLSX be written based on the results
    is_mag_phase : bool; default=False
        False : write real/imag results
        True : write mag/phase results
        For static results, does nothing
    is_sort2 : bool; default=False
        False : writes "transient" data is SORT1
        True : writes "transient" data is SORT2
    delete_f06 : bool; default=False
        deletes the F06 (assumes write_f06 is True)
    subcases : List[int, ...]; default=None
        limits subcases to specified values; default=None -> no limiting
    exclude : List[str, ...]; default=None
        limits result types; (remove what's listed)
    short_stats : bool; default=False
        print a short version of the op2 stats
    compare : bool
        True : compares vectorized result to slow vectorized result
        False : doesn't run slow vectorized result
    debug : bool; default=False
        dunno???
    binary_debug : bool; default=False
        creates a very cryptic developer debug file showing exactly what was parsed
    quiet : bool; default=False
        dunno???
    stop_on_failure : bool; default=True
        is this used???
    """
    op2 = None
    op2_nv = None
    if subcases is None:
        subcases = []
    if exclude is None:
        exclude = []
    assert '.op2' in op2_filename.lower(), 'op2_filename=%s is not an OP2' % op2_filename
    is_passed = False

    fname_base = os.path.splitext(op2_filename)[0]
    bdf_filename = fname_base + '.test_op2.bdf'

    if isinstance(subcases, string_types):
        if '_' in subcases:
            subcases = [int(i) for i in subcases.split('_')]
        else:
            subcases = [int(subcases)]
    if not quiet:
        print('subcases = %s' % subcases)

    debug_file = None
    model = os.path.splitext(op2_filename)[0]
    if binary_debug or write_op2:
        debug_file = model + '.debug.out'
    #print('debug_file = %r' % debug_file, os.getcwd())

    if make_geom and not is_geom:
        raise RuntimeError('make_geom=%s is not supported' % make_geom)
    if make_geom:
        op2 = OP2Geom(debug=debug)
        op2_nv = OP2Geom(debug=debug, debug_file=debug_file)
        op2_bdf = OP2Geom(debug=debug)
        op2_bdf.set_error_storage(nparse_errors=0, stop_on_parsing_error=True,
                                  nxref_errors=0, stop_on_xref_error=True)
    else:
        op2 = OP2(debug=debug)
        op2_nv = OP2(debug=debug, debug_file=debug_file) # have to double write this until
    op2_nv.use_vector = False

    op2.set_subcases(subcases)
    op2_nv.set_subcases(subcases)
    op2.remove_results(exclude)
    op2_nv.remove_results(exclude)

    if is_memory and check_memory:
        if is_linux: # linux
            kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
        else: # windows
            kb = get_memory_usage() / 1024
        mb = kb / 1024.
        print("Memory usage start: %s (KB); %.2f (MB)" % (kb, mb))

    try:
        #op2.read_bdf(op2.bdf_filename, includeDir=None, xref=False)
        if compare:
            op2_nv.read_op2(op2_filename)
        op2.read_op2(op2_filename)

        #op2a.get_op2_stats()
        if quiet:
            op2.get_op2_stats()
            op2.object_attributes()
            op2.object_methods()
        else:
            print("---stats for %s---" % op2_filename)
            print(op2.get_op2_stats(short=short_stats))
            op2.print_subcase_key()

        if write_bdf:
            assert make_geom, 'make_geom=%s' % make_geom
            op2._nastran_format = 'msc'
            op2.executive_control_lines = ['CEND\n']
            op2.write_bdf(bdf_filename, size=8)
            op2.log.debug('bdf_filename = %s' % bdf_filename)
            try:
                op2_bdf.read_bdf(bdf_filename)
            except:
                if dev and len(op2_bdf.card_count) == 0:
                    pass
                else:
                    raise
            #os.remove(bdf_filename)
        if compare:
            assert op2 == op2_nv

        if is_memory and check_memory:
            if is_linux: # linux
                kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
            else: # windows
                kb = get_memory_usage() / 1024
            mb = kb / 1024.
            print("Memory usage     end: %s (KB); %.2f (MB)" % (kb, mb))

        if write_f06:
            op2.write_f06(model + '.test_op2.f06', is_mag_phase=is_mag_phase,
                          is_sort1=not is_sort2, quiet=quiet, repr_check=True)
            if delete_f06:
                try:
                    os.remove(model + '.test_op2.f06')
                except:
                    pass

        # we put it down here so we don't blame the dataframe for real errors
        if is_pandas:
            op2.build_dataframe()
        #if compare:
            #op2_nv.build_dataframe()

        if write_op2:
            model = os.path.splitext(op2_filename)[0]
            op2.write_op2(model + '.test_op2.op2', is_mag_phase=is_mag_phase)
            if delete_f06:
                try:
                    os.remove(model + '.test_op2.op2')
                except:
                    pass

        if write_xlsx:
            model = os.path.splitext(op2_filename)[0]
            op2.write_xlsx(model + '.test_op2.xlsx', is_mag_phase=is_mag_phase)
            if delete_f06:
                try:
                    os.remove(model + '.test_op2.xlsx')
                except:
                    pass

        if is_memory and check_memory:
            op2 = None
            del op2_nv
            if is_linux: # linux
                kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
            else: # windows
                kb = get_memory_usage() / 1024
            mb = kb / 1024.
            print("Memory usage cleanup: %s (KB); %.2f (MB)" % (kb, mb))


        #table_names_f06 = parse_table_names_from_F06(op2.f06FileName)
        #table_names_op2 = op2.getTableNamesFromOP2()
        #print("subcases = ", op2.subcases)

        #if table_names_f06 != table_names_op2:
            #msg = 'table_names_f06=%s table_names_op2=%s' % (table_names_f06, table_names_op2)
            #raise RuntimeError(msg)
        #op2.case_control_deck.sol = op2.sol
        #print(op2.case_control_deck.get_op2_data())
        #print(op2.case_control_deck.get_op2_data())
        is_passed = True
    except KeyboardInterrupt:
        sys.stdout.flush()
        print_exc(file=sys.stdout)
        sys.stderr.write('**file=%s\n' % op2_filename)
        sys.exit('keyboard stop...')
    #except SortCodeError: # inherits from Runtime; comment this
        #is_passed = True

    #except RuntimeError: # the op2 is bad, not my fault; comment this
        #is_passed = True
        #if stop_on_failure:
            #raise
        #else:
            #is_passed = True
    #except RuntimeError:
        #pass
    #except ValueError:
        #pass
    #except FortranMarkerError:
        #pass
    except IOError: # missing file; this block should be uncommented
        #if stop_on_failure:
            #raise
        if not dev:
            raise
        is_passed = True
    #except UnicodeDecodeError:  # this block should be commented
        #is_passed = True
    #except NotImplementedError:  # this block should be commented
        #is_passed = True
    except FatalError:  # this block should be commented
        #if stop_on_failure:
            #raise
        if not dev:
            raise
        is_passed = True
    #except KeyError:  # this block should be commented
        #is_passed = True
    #except DeviceCodeError:  # this block should be commented
        #is_passed = True
    #except AssertionError:  # this block should be commented
        #is_passed = True
    #except RuntimeError: #invalid analysis code; this block should be commented
        #is_passed = True
    except SystemExit:
        #print_exc(file=sys.stdout)
        #sys.exit('stopping on sys.exit')
        raise
    #except NameError:  # variable isnt defined
    #    if stop_on_failure:
    #        raise
    #    else:
    #        is_passed = True
    #except IndexError: # this block should be commented
        #is_passed = True
    #except SyntaxError: #Param Parse; this block should be commented
        #if stop_on_failure:
            #raise
        #is_passed = True
    except:
        #print(e)
        if stop_on_failure:
            raise
        else:
            print_exc(file=sys.stdout)
            is_passed = False

    return op2, is_passed

Example 6

Project: KiCost
Source File: __main__.py
View license
def main():

    parser = ap.ArgumentParser(
        description='Build cost spreadsheet for a KiCAD project.')
    parser.add_argument('-v', '--version',
                        action='version',
                        version='KiCost ' + __version__)
    parser.add_argument('-i', '--input',
                        nargs='?',
                        type=str,
                        metavar='file.xml',
                        help='Schematic BOM XML file.')
    parser.add_argument('-o', '--output',
                        nargs='?',
                        type=str,
                        metavar='file.xlsx',
                        help='Generated cost spreadsheet.')
    parser.add_argument('-f', '--fields',
                        nargs='+',
                        type=str,
                        default=[],
                        metavar='name',
                        help='''Specify the names of additional part fields to 
                            extract and insert in the global data section of 
                            the spreadsheet.''')
    parser.add_argument('-var', '--variant',
                        nargs='?',
                        type=str,
                        default='',
                        help='schematic variant name filter')
    parser.add_argument('-w', '--overwrite',
                        action='store_true',
                        help='Allow overwriting of an existing spreadsheet.')
    parser.add_argument('-s', '--serial',
                        action='store_true',
                        help='Do web scraping of part data using a single process.')
    parser.add_argument('-q', '--quiet',
                        action='store_true',
                        help='Enable quiet mode with no warnings.')
    parser.add_argument('-np', '--num_processes',
                        nargs='?',
                        type=int,
                        default=NUM_PROCESSES,
                        const=NUM_PROCESSES,
                        metavar='NUM_PROCESSES',
                        help='''Set the number of parallel 
                            processes used for web scraping part data.''')
    parser.add_argument('-ign', '--ignore_fields',
                        nargs='+',
                        default=[],
                        help='Declare part fields to ignore when grouping parts.',
                        metavar='name',
                        type=str)
    parser.add_argument('-d', '--debug',
                        nargs='?',
                        type=int,
                        default=None,
                        metavar='LEVEL',
                        help='Print debugging info. (Larger LEVEL means more info.)')

    args = parser.parse_args()

    # Set up logging.
    if args.debug is not None:
        log_level = logging.DEBUG + 1 - args.debug
    elif args.quiet is True:
        log_level = logging.ERROR
    else:
        log_level = logging.WARNING
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(log_level)
    logger.addHandler(handler)
    logger.setLevel(log_level)

    # Set up spreadsheet output file.
    if args.output == None:
        # If no output file is given...
        if args.input != None:
            # Send output to spreadsheet with name of input file.
            args.output = os.path.splitext(args.input)[0] + '.xlsx'
        else:
            # Send output to spreadsheet with name of this application.
            args.output = os.path.splitext(sys.argv[0])[0] + '.xlsx'
    else:
        # Output file was given. Make sure it has spreadsheet extension.
        args.output = os.path.splitext(args.output)[0] + '.xlsx'

    # Handle case where output is going into an existing spreadsheet file.
    if os.path.isfile(args.output):
        if not args.overwrite:
            logger.critical('''Output file {} already exists! Use the
                --overwrite option to replace it.'''.format(args.output))
            sys.exit(1)

    # Set XML input source.
    if args.input == None:
        # Get XML from the STDIN if no input file is given.
        args.input = sys.stdin
    else:
        # Otherwise get XML from the given file.
        args.input = os.path.splitext(args.input)[0] + '.xml'
        args.input = open(args.input)

    # Set number of processes to use for web scraping.
    if args.serial:
        num_processes = 1
    else:
        num_processes = args.num_processes

    kicost(in_file=args.input, out_filename=args.output,
        user_fields=args.fields, ignore_fields=args.ignore_fields, 
        variant=args.variant, num_processes=num_processes)

Example 7

Project: KiPart
Source File: __main__.py
View license
def main():
    parser = ap.ArgumentParser(
        description=
        'Generate single & multi-unit schematic symbols for KiCad from a CSV file.')

    parser.add_argument('-v', '--version',
        action='version',
        version='KiPart ' + __version__)
    parser.add_argument(
        'input_files',
        nargs='+',
        type=str,
        metavar='file1.[csv|zip] file2.[csv|zip] ...',
        help='Files for parts in CSV format or as CSV files in .zip archives.')
    parser.add_argument('-r', '--reader',
        nargs='?',
        type=str.lower,
        choices=['generic', 'xilinxultra', 'xilinx7', 'xilinx6s', 'xilinx6v', 'psoc5lp', 'stm32cube'],
        default='generic',
        help='Name of function for reading the CSV file.')
    parser.add_argument(
        '-s', '--sort',
        nargs='?',
        type=str.lower,
        choices=['row', 'num', 'name'],
        default='row',
        help=
        'Sort the part pins by their entry order in the CSV file, their pin number, or their pin name.')
    parser.add_argument(
        '--reverse',
        action = 'store_true',
        help='Sort pins in reverse order.'
    )
    parser.add_argument(
        '--side',
        nargs='?',
        type=str.lower,
        choices=['left', 'right', 'top', 'bottom'],
        default='left',
        help='Which side to place the pins by default.'
    )
    parser.add_argument('-o', '--output',
        nargs='?',
        type=str,
        metavar='file.lib',
        help='Generated KiCad library for part.')
    parser.add_argument(
        '-f', '--fuzzy_match',
        action='store_true',
        help=
        'Use approximate string matching when looking-up the pin type, style and orientation.')
    parser.add_argument(
        '-b', '--bundle',
        action='store_true',
        help=
        'Bundle multiple, identically-named power, ground and no-connect pins each into a single schematic pin.')
    parser.add_argument('-a', '--append',
        action='store_true',
        help='Append to an existing part library.')
    parser.add_argument('-w', '--overwrite',
        action='store_true',
        help='Allow overwriting of an existing part library.')
    parser.add_argument(
        '-d', '--debug',
        nargs='?',
        type=int,
        default=0,
        metavar='LEVEL',
        help='Print debugging info. (Larger LEVEL means more info.)')

    args = parser.parse_args()

    if args.output == None:
        args.output = os.path.splitext(sys.argv[0])[0] + '.lib'
    else:
        args.output = os.path.splitext(args.output)[0] + '.lib'

    if os.path.isfile(args.output):
        if not args.overwrite and not args.append:
            print('Output file {} already exists! Use the --overwrite option to replace it or the --append option to append to it.'.format(
                args.output))
            sys.exit(1)

    def call_kipart(part_data_file):
        '''Helper routine for calling kipart.'''
        return kipart(reader_type=args.reader,
                   part_data_file=part_data_file,
                   lib_filename=args.output,
                   append_to_lib=append_to_lib,
                   sort_type=args.sort,
                   reverse=args.reverse,
                   fuzzy_match=args.fuzzy_match,
                   bundle=args.bundle,
                   debug_level=args.debug)

    DEFAULT_PIN.side = args.side

    append_to_lib = args.append
    for input_file in args.input_files:
        file_ext = os.path.splitext(input_file)[-1]
        if file_ext == '.zip':
            zip_file = zipfile.ZipFile(input_file, 'r')
            zipped_files = zip_file.infolist()
            for zipped_file in zipped_files:
                if os.path.splitext(zipped_file.filename)[-1] in ['.csv', '.txt']:
                    with zip_file.open(zipped_file, 'r') as part_data_file:
                        part_data_file = io.TextIOWrapper(part_data_file)
                        append_to_lib = call_kipart(part_data_file)
        elif file_ext in ['.csv', '.txt']:
            with open(input_file, 'r') as part_data_file:
                append_to_lib = call_kipart(part_data_file)
        else:
            continue

Example 8

Project: youtube-dl-GUI
Source File: generic.py
View license
    def _real_extract(self, url):
        if url.startswith('//'):
            return {
                '_type': 'url',
                'url': self.http_scheme() + url,
            }

        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
                default_search = 'fixup_error'

            if default_search in ('auto', 'auto_warning', 'fixup_error'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                elif default_search != 'fixup_error':
                    if default_search == 'auto_warning':
                        if re.match(r'^(?:url|URL)$', url):
                            raise ExtractorError(
                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
                                expected=True)
                        else:
                            self._downloader.report_warning(
                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)

            if default_search in ('error', 'fixup_error'):
                raise ExtractorError(
                    '%r is not a valid URL. '
                    'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                    % (url, url), expected=True)
            else:
                if ':' not in default_search:
                    default_search += ':'
                return self.url_result(default_search + url)

        url, smuggled_data = unsmuggle_url(url)
        force_videoid = None
        is_intentional = smuggled_data and smuggled_data.get('to_generic')
        if smuggled_data and 'force_videoid' in smuggled_data:
            force_videoid = smuggled_data['force_videoid']
            video_id = force_videoid
        else:
            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]

        self.to_screen('%s: Requesting header' % video_id)

        head_req = HEADRequest(url)
        head_response = self._request_webpage(
            head_req, video_id,
            note=False, errnote='Could not send HEAD request to %s' % url,
            fatal=False)

        if head_response is not False:
            # Check for redirect
            new_url = head_response.geturl()
            if url != new_url:
                self.report_following_redirect(new_url)
                if force_videoid:
                    new_url = smuggle_url(
                        new_url, {'force_videoid': force_videoid})
                return self.url_result(new_url)

        full_response = None
        if head_response is False:
            full_response = self._request_webpage(url, video_id)
            head_response = full_response

        # Check for direct link to a video
        content_type = head_response.headers.get('Content-Type', '')
        m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
        if m:
            upload_date = unified_strdate(
                head_response.headers.get('Last-Modified'))
            return {
                'id': video_id,
                'title': os.path.splitext(url_basename(url))[0],
                'direct': True,
                'formats': [{
                    'format_id': m.group('format_id'),
                    'url': url,
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }],
                'upload_date': upload_date,
            }

        if not self._downloader.params.get('test', False) and not is_intentional:
            self._downloader.report_warning('Falling back on generic information extractor.')

        if not full_response:
            full_response = self._request_webpage(url, video_id)

        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
        first_bytes = full_response.read(512)
        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
            upload_date = unified_strdate(
                head_response.headers.get('Last-Modified'))
            return {
                'id': video_id,
                'title': os.path.splitext(url_basename(url))[0],
                'direct': True,
                'url': url,
                'upload_date': upload_date,
            }

        webpage = self._webpage_read_content(
            full_response, url, video_id, prefix=first_bytes)

        self.report_extraction(video_id)

        # Is it an RSS feed?
        try:
            doc = parse_xml(webpage)
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
        except compat_xml_parse_error:
            pass

        # Is it a Camtasia project?
        camtasia_res = self._extract_camtasia(url, video_id, webpage)
        if camtasia_res is not None:
            return camtasia_res

        # Sometimes embedded video player is hidden behind percent encoding
        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
        # Unescaping the whole page allows to handle those cases in a generic way
        webpage = compat_urllib_parse.unquote(webpage)

        # it's tempting to parse this further, but you would
        # have to take into account all the variations like
        #   Video Title - Site Name
        #   Site Name | Video Title
        #   Video Title - Tagline | Site Name
        # and so on and so forth; it's just not practical
        video_title = self._html_search_regex(
            r'(?s)<title>(.*?)</title>', webpage, 'video title',
            default='video')

        # Try to detect age limit automatically
        age_limit = self._rta_search(webpage)
        # And then there are the jokers who advertise that they use RTA,
        # but actually don't.
        AGE_LIMIT_MARKERS = [
            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
        ]
        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
            age_limit = 18

        # video uploader is domain name
        video_uploader = self._search_regex(
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')

        # Helper method
        def _playlist_from_matches(matches, getter=None, ie=None):
            urlrs = orderedSet(
                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
                for m in matches)
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)

        # Look for BrightCove:
        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
        if bc_urls:
            self.to_screen('Brightcove video detected.')
            entries = [{
                '_type': 'url',
                'url': smuggle_url(bc_url, {'Referer': url}),
                'ie_key': 'Brightcove'
            } for bc_url in bc_urls]

            return {
                '_type': 'playlist',
                'title': video_title,
                'id': video_id,
                'entries': entries,
            }

        # Look for embedded (iframe) Vimeo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
        if mobj:
            player_url = unescapeHTML(mobj.group('url'))
            surl = smuggle_url(player_url, {'Referer': url})
            return self.url_result(surl)

        # Look for embedded (swf embed) Vimeo player
        mobj = re.search(
            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
        if mobj:
            return self.url_result(mobj.group(1))

        # Look for embedded YouTube player
        matches = re.findall(r'''(?x)
            (?:
                <iframe[^>]+?src=|
                data-video-url=|
                <embed[^>]+?src=|
                embedSWF\(?:\s*|
                new\s+SWFObject\(
            )
            (["\'])
                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
                (?:embed|v|p)/.+?)
            \1''', webpage)
        if matches:
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))

        # Look for lazyYT YouTube embed
        matches = re.findall(
            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
        if matches:
            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))

        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
        if matches:
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))

        # Look for embedded Dailymotion playlist player (#3822)
        m = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
        if m:
            playlists = re.findall(
                r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
            if playlists:
                return _playlist_from_matches(
                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)

        # Look for embedded Wistia player
        match = re.search(
            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
        if match:
            embed_url = self._proto_relative_url(
                unescapeHTML(match.group('url')))
            return {
                '_type': 'url_transparent',
                'url': embed_url,
                'ie_key': 'Wistia',
                'uploader': video_uploader,
                'title': video_title,
                'id': video_id,
            }

        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
        if match:
            return {
                '_type': 'url_transparent',
                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
                'ie_key': 'Wistia',
                'uploader': video_uploader,
                'title': video_title,
                'id': match.group('id')
            }

        # Look for embedded blip.tv player
        mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
        if mobj:
            return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
        if mobj:
            return self.url_result(mobj.group(1), 'BlipTV')

        # Look for embedded condenast player
        matches = re.findall(
            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
            webpage)
        if matches:
            return {
                '_type': 'playlist',
                'entries': [{
                    '_type': 'url',
                    'ie_key': 'CondeNast',
                    'url': ma,
                } for ma in matches],
                'title': video_title,
                'id': video_id,
            }

        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
            burl = unescapeHTML(mobj.group(1))
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)

        # Look for embedded Vevo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for Ooyala videos
        mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
        if mobj is not None:
            return OoyalaIE._build_url_result(mobj.group('ec'))

        # Look for Aparat videos
        mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Aparat')

        # Look for MPORA videos
        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

        # Look for embedded NovaMov-based player
        mobj = re.search(
            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
                    (?P<url>http://(?:(?:embed|www)\.)?
                        (?:novamov\.com|
                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
                           videoweed\.(?:es|com)|
                           movshare\.(?:net|sx|ag)|
                           divxstage\.(?:eu|net|ch|co|at|ag))
                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded Facebook player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Facebook')

        # Look for embedded VK player
        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'VK')

        # Look for embedded ivi player
        mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ivi')

        # Look for embedded Huffington Post player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'HuffPost')

        # Look for embed.ly
        mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))
        mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
        if mobj is not None:
            return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))

        # Look for funnyordie embed
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
        if matches:
            return _playlist_from_matches(
                matches, getter=unescapeHTML, ie='FunnyOrDie')

        # Look for BBC iPlayer embed
        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
        if matches:
            return _playlist_from_matches(matches, ie='BBCCoUk')

        # Look for embedded RUTV player
        rutv_url = RUTVIE._extract_url(webpage)
        if rutv_url:
            return self.url_result(rutv_url, 'RUTV')

        # Look for embedded TED player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')

        # Look for embedded Ustream videos
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ustream')

        # Look for embedded arte.tv player
        mobj = re.search(
            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'ArteTVEmbed')

        # Look for embedded smotri.com player
        smotri_url = SmotriIE._extract_url(webpage)
        if smotri_url:
            return self.url_result(smotri_url, 'Smotri')

        # Look for embeded soundcloud player
        mobj = re.search(
            r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
            webpage)
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url)

        # Look for embedded vulture.com player
        mobj = re.search(
            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
            webpage)
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url, ie='Vulture')

        # Look for embedded mtvservices player
        mobj = re.search(
            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
            webpage)
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url, ie='MTVServicesEmbedded')

        # Look for embedded yahoo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Yahoo')

        # Look for embedded sbs.com.au player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')

        # Look for embedded Cinchcast player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Cinchcast')

        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'MLB')

        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
            webpage)
        if mobj is not None:
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')

        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Livestream')

        def check_video(vurl):
            vpath = compat_urlparse.urlparse(vurl).path
            vext = determine_ext(vpath)
            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')

        def filter_video(urls):
            return list(filter(check_video, urls))

        # Start with something easy: JW Player in SWFObject
        found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
        if not found:
            # Look for gorilla-vid style embedding
            found = filter_video(re.findall(r'''(?sx)
                (?:
                    jw_plugins|
                    JWPlayerOptions|
                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
                )
                .*?file\s*:\s*["\'](.*?)["\']''', webpage))
        if not found:
            # Broaden the search a little bit
            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
        if not found:
            # Broaden the findall a little bit: JWPlayer JS loader
            found = filter_video(re.findall(
                r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
        if not found:
            # Flow player
            found = filter_video(re.findall(r'''(?xs)
                flowplayer\("[^"]+",\s*
                    \{[^}]+?\}\s*,
                    \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                        ["']?url["']?\s*:\s*["']([^"']+)["']
            ''', webpage))
        if not found:
            # Try to find twitter cards info
            found = filter_video(re.findall(
                r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
        if not found:
            # We look for Open Graph info:
            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
            if m_video_type is not None:
                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
        if not found:
            # HTML5 video
            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
        if not found:
            found = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
                webpage)
            if found:
                new_url = found.group(1)
                self.report_following_redirect(new_url)
                return {
                    '_type': 'url',
                    'url': new_url,
                }
        if not found:
            raise UnsupportedError(url)

        entries = []
        for video_url in found:
            video_url = compat_urlparse.urljoin(url, video_url)
            video_id = compat_urllib_parse.unquote(os.path.basename(video_url))

            # Sometimes, jwplayer extraction will result in a YouTube URL
            if YoutubeIE.suitable(video_url):
                entries.append(self.url_result(video_url, 'Youtube'))
                continue

            # here's a fun little line of code for you:
            video_id = os.path.splitext(video_id)[0]

            entries.append({
                'id': video_id,
                'url': video_url,
                'uploader': video_uploader,
                'title': video_title,
                'age_limit': age_limit,
            })

        if len(entries) == 1:
            return entries[0]
        else:
            for num, e in enumerate(entries, start=1):
                e['title'] = '%s (%d)' % (e['title'], num)
            return {
                '_type': 'playlist',
                'entries': entries,
            }

Example 9

Project: orange
Source File: orngDoc.py
View license
    def saveDocumentAsApp(self, asTabs = 1):
        # get filename
        extension = sys.platform == "win32" and ".pyw" or ".py"
        appName = (os.path.splitext(self.schemaName)[0] or "Schema") + extension
        appPath = os.path.exists(self.canvasDlg.settings["saveApplicationDir"]) and self.canvasDlg.settings["saveApplicationDir"] or self.schemaPath
        qname = QFileDialog.getSaveFileName(self, "Save Orange Schema as Application", os.path.join(appPath, appName) , "Orange Scripts (*%s)" % extension)
        if qname.isEmpty(): return
        qname = unicode(qname)
        (appPath, appName) = os.path.split(qname)
        appNameWithoutExt = os.path.splitext(appName)[0]
        if os.path.splitext(appName)[1].lower() not in [".py", ".pyw"]: appName = appNameWithoutExt + extension
        self.canvasDlg.settings["saveApplicationDir"] = appPath

        saveDlg = saveApplicationDlg(None)

        # add widget captions
        for instance in self.signalManager.widgets:
            widget = None
            for i in range(len(self.widgets)):
                if self.widgets[i].instance == instance: saveDlg.insertWidgetName(self.widgets[i].caption)

        if saveDlg.exec_() == QDialog.Rejected:
            return

        #format string with file content
        t = "    "  # instead of tab
        n = "\n"

        start = """#!/usr/bin/env python

#This file is automatically created by Orange Canvas and containing an Orange schema

import orngEnviron
import orngDebugging
import sys, os, cPickle, orange, orngSignalManager, OWGUI
from OWBaseWidget import *

class GUIApplication(OWBaseWidget):
    def __init__(self,parent=None):
        self.signalManager = orngSignalManager.SignalManager()
        OWBaseWidget.__init__(self, title = '%s', signalManager = self.signalManager)
        self.widgets = {}
        self.loadSettings()
        """ % (appNameWithoutExt)

        if asTabs == 1:
            start += """
        self.tabs = QTabWidget(self)
        self.setLayout(QVBoxLayout())
        self.layout().addWidget(self.tabs)
        self.resize(800,600)"""
        else:
            start += """
        self.setLayout(QVBoxLayout())
        self.box = OWGUI.widgetBox(self, 'Widgets')"""


        links = "# add widget signals\n"+t+t + "self.signalManager.setFreeze(1)\n" +t+t
        widgetParameters = ""

        # gui for shown widgets
        for widgetName in saveDlg.shownWidgetList:    # + saveDlg.hiddenWidgetList
            if widgetName != "[Separator]":
                widget = None
                for i in range(len(self.widgets)):
                    if self.widgets[i].caption == widgetName: widget = self.widgets[i]

                shown = widgetName in saveDlg.shownWidgetList
                widgetParameters += "self.createWidget('%s', '%s', '%s', %d, self.signalManager)\n" % ("%s.%s" % (widget.widgetInfo.module, widget.widgetInfo.fileName) if widget.widgetInfo.module else widget.widgetInfo.fileName, widget.widgetInfo.icon, widget.caption, shown) +t+t
            else:
                if not asTabs:
                    widgetParameters += "self.box.layout().addSpacing(10)\n" +t+t

        for line in self.lines:
            if not line.getEnabled(): continue
            for (outName, inName) in line.getSignals():
                links += "self.signalManager.addLink( self.widgets['" + line.outWidget.caption+ "'], self.widgets['" + line.inWidget.caption+ "'], '" + outName + "', '" + inName + "', 1)\n" +t+t

        links += "self.signalManager.setFreeze(0)\n" +t+t
        if not asTabs:
            widgetParameters += """
        box2 = OWGUI.widgetBox(self, 1)
        exitButton = OWGUI.button(box2, self, "Exit", callback = self.accept)
        self.layout().addStretch(100)"""

        if asTabs:
            guiText = "OWGUI.createTabPage(self.tabs, caption, widget)"
        else:
            guiText = "OWGUI.button(self.box, self, caption, callback = widget.reshow)"

        progress = """
        statusBar = QStatusBar(self)
        self.layout().addWidget(statusBar)
        self.caption = QLabel('', statusBar)
        self.caption.setMaximumWidth(230)
        self.progress = QProgressBar(statusBar)
        self.progress.setMaximumWidth(100)
        self.status = QLabel("", statusBar)
        self.status.setSizePolicy(QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Preferred))
        statusBar.addWidget(self.progress)
        statusBar.addWidget(self.caption)
        statusBar.addWidget(self.status)"""

        handlerFuncts = """
    def createWidget(self, fname, iconName, caption, shown, signalManager):
        widgetSettings = cPickle.loads(self.strSettings[caption])
        m = __import__(fname)
        widget = m.__dict__[fname].__new__(m.__dict__[fname], _settingsFromSchema = widgetSettings)
        widget.__init__(signalManager=signalManager)
        widget.setEventHandler(self.eventHandler)
        widget.setProgressBarHandler(self.progressHandler)
        widget.setWidgetIcon(iconName)
        widget.setWindowTitle(caption)
        self.signalManager.addWidget(widget)
        self.widgets[caption] = widget
        if shown: %s
        for dlg in getattr(widget, "wdChildDialogs", []):
            dlg.setEventHandler(self.eventHandler)
            dlg.setProgressBarHandler(self.progressHandler)

    def eventHandler(self, text, eventVerbosity = 1):
        if orngDebugging.orngVerbosity >= eventVerbosity:
            self.status.setText(text)

    def progressHandler(self, widget, val):
        if val < 0:
            self.caption.setText("<nobr>Processing: <b>" + str(widget.captionTitle) + "</b></nobr>")
            self.progress.setValue(0)
        elif val >100:
            self.caption.setText("")
            self.progress.reset()
        else:
            self.progress.setValue(val)
            self.update()

    def loadSettings(self):
        try:
            file = open("%s", "r")
            self.strSettings = cPickle.load(file)
            file.close()

        except:
            print "unable to load settings"
            pass

    def closeEvent(self, ev):
        OWBaseWidget.closeEvent(self, ev)
        if orngDebugging.orngDebuggingEnabled: return
        strSettings = {}
        for (name, widget) in self.widgets.items():
            widget.synchronizeContexts()
            strSettings[name] = widget.saveSettingsStr()
            widget.close()
        file = open("%s", "w")
        cPickle.dump(strSettings, file)
        file.close()

if __name__ == "__main__":
    application = QApplication(sys.argv)
    ow = GUIApplication()
    ow.show()
    # comment the next line if in debugging mode and are interested only in output text in 'signalManagerOutput.txt' file
    application.exec_()
        """ % (guiText, appNameWithoutExt + ".sav", appNameWithoutExt + ".sav")


        #save app
        f = open(os.path.join(appPath, appName), "wt")
        f.write(start + n+n+t+t+ widgetParameters + n+t+t + progress + n+n+t+t + links + n + handlerFuncts)
        f.close()

        # save widget settings
        list = {}
        for widget in self.widgets:
            list[widget.caption] = widget.instance.saveSettingsStr()

        f = open(os.path.join(appPath, appNameWithoutExt) + ".sav", "wt")
        cPickle.dump(list, f)
        f.close

Example 10

Project: orange
Source File: orngDoc.py
View license
    def saveDocumentAsApp(self, asTabs = 1):
        # get filename
        extension = sys.platform == "win32" and ".pyw" or ".py"
        appName = (os.path.splitext(self.schemaName)[0] or "Schema") + extension
        appPath = os.path.exists(self.canvasDlg.settings["saveApplicationDir"]) and self.canvasDlg.settings["saveApplicationDir"] or self.schemaPath
        qname = QFileDialog.getSaveFileName(self, "Save Orange Schema as Application", os.path.join(appPath, appName) , "Orange Scripts (*%s)" % extension)
        if qname.isEmpty(): return
        qname = unicode(qname)
        (appPath, appName) = os.path.split(qname)
        appNameWithoutExt = os.path.splitext(appName)[0]
        if os.path.splitext(appName)[1].lower() not in [".py", ".pyw"]: appName = appNameWithoutExt + extension
        self.canvasDlg.settings["saveApplicationDir"] = appPath

        saveDlg = saveApplicationDlg(None)

        # add widget captions
        for instance in self.signalManager.widgets:
            widget = None
            for i in range(len(self.widgets)):
                if self.widgets[i].instance == instance: saveDlg.insertWidgetName(self.widgets[i].caption)

        if saveDlg.exec_() == QDialog.Rejected:
            return

        #format string with file content
        t = "    "  # instead of tab
        n = "\n"

        start = """#!/usr/bin/env python

#This file is automatically created by Orange Canvas and containing an Orange schema

import orngEnviron
import orngDebugging
import sys, os, cPickle, orange, orngSignalManager, OWGUI
from OWBaseWidget import *

class GUIApplication(OWBaseWidget):
    def __init__(self,parent=None):
        self.signalManager = orngSignalManager.SignalManager()
        OWBaseWidget.__init__(self, title = '%s', signalManager = self.signalManager)
        self.widgets = {}
        self.loadSettings()
        """ % (appNameWithoutExt)

        if asTabs == 1:
            start += """
        self.tabs = QTabWidget(self)
        self.setLayout(QVBoxLayout())
        self.layout().addWidget(self.tabs)
        self.resize(800,600)"""
        else:
            start += """
        self.setLayout(QVBoxLayout())
        self.box = OWGUI.widgetBox(self, 'Widgets')"""


        links = "# add widget signals\n"+t+t + "self.signalManager.setFreeze(1)\n" +t+t
        widgetParameters = ""

        # gui for shown widgets
        for widgetName in saveDlg.shownWidgetList:    # + saveDlg.hiddenWidgetList
            if widgetName != "[Separator]":
                widget = None
                for i in range(len(self.widgets)):
                    if self.widgets[i].caption == widgetName: widget = self.widgets[i]

                shown = widgetName in saveDlg.shownWidgetList
                widgetParameters += "self.createWidget('%s', '%s', '%s', %d, self.signalManager)\n" % ("%s.%s" % (widget.widgetInfo.module, widget.widgetInfo.fileName) if widget.widgetInfo.module else widget.widgetInfo.fileName, widget.widgetInfo.icon, widget.caption, shown) +t+t
            else:
                if not asTabs:
                    widgetParameters += "self.box.layout().addSpacing(10)\n" +t+t

        for line in self.lines:
            if not line.getEnabled(): continue
            for (outName, inName) in line.getSignals():
                links += "self.signalManager.addLink( self.widgets['" + line.outWidget.caption+ "'], self.widgets['" + line.inWidget.caption+ "'], '" + outName + "', '" + inName + "', 1)\n" +t+t

        links += "self.signalManager.setFreeze(0)\n" +t+t
        if not asTabs:
            widgetParameters += """
        box2 = OWGUI.widgetBox(self, 1)
        exitButton = OWGUI.button(box2, self, "Exit", callback = self.accept)
        self.layout().addStretch(100)"""

        if asTabs:
            guiText = "OWGUI.createTabPage(self.tabs, caption, widget)"
        else:
            guiText = "OWGUI.button(self.box, self, caption, callback = widget.reshow)"

        progress = """
        statusBar = QStatusBar(self)
        self.layout().addWidget(statusBar)
        self.caption = QLabel('', statusBar)
        self.caption.setMaximumWidth(230)
        self.progress = QProgressBar(statusBar)
        self.progress.setMaximumWidth(100)
        self.status = QLabel("", statusBar)
        self.status.setSizePolicy(QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Preferred))
        statusBar.addWidget(self.progress)
        statusBar.addWidget(self.caption)
        statusBar.addWidget(self.status)"""

        handlerFuncts = """
    def createWidget(self, fname, iconName, caption, shown, signalManager):
        widgetSettings = cPickle.loads(self.strSettings[caption])
        m = __import__(fname)
        widget = m.__dict__[fname].__new__(m.__dict__[fname], _settingsFromSchema = widgetSettings)
        widget.__init__(signalManager=signalManager)
        widget.setEventHandler(self.eventHandler)
        widget.setProgressBarHandler(self.progressHandler)
        widget.setWidgetIcon(iconName)
        widget.setWindowTitle(caption)
        self.signalManager.addWidget(widget)
        self.widgets[caption] = widget
        if shown: %s
        for dlg in getattr(widget, "wdChildDialogs", []):
            dlg.setEventHandler(self.eventHandler)
            dlg.setProgressBarHandler(self.progressHandler)

    def eventHandler(self, text, eventVerbosity = 1):
        if orngDebugging.orngVerbosity >= eventVerbosity:
            self.status.setText(text)

    def progressHandler(self, widget, val):
        if val < 0:
            self.caption.setText("<nobr>Processing: <b>" + str(widget.captionTitle) + "</b></nobr>")
            self.progress.setValue(0)
        elif val >100:
            self.caption.setText("")
            self.progress.reset()
        else:
            self.progress.setValue(val)
            self.update()

    def loadSettings(self):
        try:
            file = open("%s", "r")
            self.strSettings = cPickle.load(file)
            file.close()

        except:
            print "unable to load settings"
            pass

    def closeEvent(self, ev):
        OWBaseWidget.closeEvent(self, ev)
        if orngDebugging.orngDebuggingEnabled: return
        strSettings = {}
        for (name, widget) in self.widgets.items():
            widget.synchronizeContexts()
            strSettings[name] = widget.saveSettingsStr()
            widget.close()
        file = open("%s", "w")
        cPickle.dump(strSettings, file)
        file.close()

if __name__ == "__main__":
    application = QApplication(sys.argv)
    ow = GUIApplication()
    ow.show()
    # comment the next line if in debugging mode and are interested only in output text in 'signalManagerOutput.txt' file
    application.exec_()
        """ % (guiText, appNameWithoutExt + ".sav", appNameWithoutExt + ".sav")


        #save app
        f = open(os.path.join(appPath, appName), "wt")
        f.write(start + n+n+t+t+ widgetParameters + n+t+t + progress + n+n+t+t + links + n + handlerFuncts)
        f.close()

        # save widget settings
        list = {}
        for widget in self.widgets:
            list[widget.caption] = widget.instance.saveSettingsStr()

        f = open(os.path.join(appPath, appNameWithoutExt) + ".sav", "wt")
        cPickle.dump(list, f)
        f.close

Example 11

Project: cgat
Source File: tophat_segment_juncs.py
View license
def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if DISABLE:
        print "# tophat_segment_juncs.py disabled"
        argv[0] = "segment_juncs.original"
        runCommand( argv , "segment_juncs.log" )
        return 0

    E.Start( no_parsing = True )

    # collect arguments
    parser = argparse.ArgumentParser(description='Process tophat options.')
    parser.add_argument('-p', '--num-threads', metavar='N', type=int, dest='nthreads',
                         help='number of threads')
    parser.add_argument('--version', action='version', version='%(prog)s')
    options, args = parser.parse_known_args( argv[1:] )

    E.info( "parallelizing segment juncs with %i threads" % options.nthreads )
    
    x = argv.index("--ium-reads") + 1
    
    all_options = argv[1:x]

    (input_missing_reads, input_genome, 
     output_junctions, 
     output_insertions, output_deletions,
     input_left_all_reads,
     input_left_all_map,
     input_left_segments_maps ) = argv[x:x + 8]

    input_left_segments_maps = input_left_segments_maps.split(",")

    if len(argv) > x + 8:
        ( input_right_all_reads,
          input_right_all_map,
          input_right_segments_maps ) = argv[x+8:x+11]
        input_right_segments_maps = input_right_segments_maps.split(",")
    else:
        input_right_all_reads = ""
        input_right_all_map = ""
        input_right_segments_maps = []

    keys = set()
    
    # some filenames might appear multiple times
    files_to_split = set([input_left_all_map, \
                              input_right_all_map ] +\
                             input_left_segments_maps +\
                             input_right_segments_maps )

    E.info( "splitting %i files" % len(files_to_split))

    ## split all map files by chromosome
    for filename in files_to_split:
        if filename == "": continue
        E.info("splitting %s" % filename )
        base, ext = os.path.splitext( filename )

        f = glob.glob( "%s.input.*%s" % (filename, ext) )
        if f:
            E.info("files already exist - skipping" )
            keys.update( [ re.match("%s.input.(\S+)%s" % (filename,ext), x ).groups()[0] for x in f ] )
            continue
        
        infile = IOTools.openFile( filename )

        outfiles = IOTools.FilePool( filename + ".input.%s" + ext )

        for line in infile:
            key = line.split("\t")[2]
            keys.add( key )
            outfiles.write( key, line )

        outfiles.close()

    # keys = set( ["chr1", "chr2", "chr3", "chr4", "chr5",
    #              "chr6", "chr7", "chr8", "chr9", "chr10",
    #              "chr11", "chr12", "chr13", "chr14", "chr15",
    #              "chr16", "chr17", "chr18", "chr19", "chr20",
    #              "chr21", "chr22", "chrX", "chrY", "chrM" ] )

    E.info( "working on %i contigs: %s" % (len(keys), list(keys)))

    pool = multiprocessing.pool.ThreadPool( options.nthreads )
    #pool = threadpool.ThreadPool( THREADS )

    tmpdir = os.path.dirname( input_left_all_reads )
    logdir = os.path.join( tmpdir[:-len("tmp")], "logs" )

    if not os.path.exists(logdir):
        raise IOError( "can not find logdir %s" % logdir )

    args = []
    for key in keys:

        def modout( old, key ):
            if not old:return ""
            _, ext = os.path.splitext( old )
            return old + ".output.%s%s" % (key, ext)

        def modin( old, key ):
            if not old:return ""
            _, ext = os.path.splitext( old )
            return old + ".input.%s%s" % (key,ext)

        def modgenome( old, key ):
            dirname, filename = os.path.split(old)
            genome, ext = os.path.splitext( filename )
            if genome.lower().endswith("_cs"): genome = genome[:-3]
            new = os.path.join( dirname, genome + ".perchrom", key + ext )
            if not os.path.exists(new):
                raise ValueError( "can not find chromoseme file %s" % new )
            return new

        cmd = ["segment_juncs"] +\
            all_options +\
            [input_missing_reads,  \
                 modgenome(input_genome,key), \
                 modout(output_junctions,key),\
                 modout(output_insertions,key),\
                 modout(output_deletions,key),\
                 input_left_all_reads,\
                 modin( input_left_all_map, key ),\
                 ",".join( [ modin( x, key ) for x in input_left_segments_maps ] ),\
                 input_right_all_reads,\
                 modin( input_right_all_map, key ),\
                 ",".join( [ modin( x, key ) for x in input_right_segments_maps ] ) ]


        logfile = os.path.join(logdir, "segment_juncs_%s.log" % key )
        args.append( (cmd,logfile) )

    E.info( "submitting %i jobs" % len(keys) )

    pool.map( runCommand, args, chunksize = 1 )
    pool.close()
    pool.join()

    E.info("all jobs finished successfully" )

    E.info("merging results")
    ## merge results
    for filename in (output_junctions, output_insertions, output_deletions):
        outfile = open(filename, "w")
        for inf in glob.glob( filename + ".output.*" ):
            infile = open( inf, "r" )
            outfile.write( infile.read() )
            infile.close()
        outfile.close()
        
    E.info("results merged")

    ## cleaning up is done automatically by tophat
    E.info("cleaning up" )
    for f in glob.glob( os.path.join( tmpdir, "*.output.*") ) +\
            glob.glob( os.path.join( tmpdir, "*.input.*") ):
        os.remove(f)

    ## write footer and output benchmark information.
    E.Stop()

Example 12

Project: avclass
Source File: avclass_labeler.py
View license
def main(args):
    # Select hash used to identify sample, by default MD5
    hash_type = args.hash if args.hash else 'md5'

    # If ground truth provided, read it from file
    gt_dict = {}
    if args.gt:
        with open(args.gt, 'r') as gt_fd:
            for line in gt_fd:
                gt_hash, family = map(str.lower, line.strip().split('\t', 1))
                gt_dict[gt_hash] = family

        # Guess type of hash in ground truth file
        hash_type = guess_hash(gt_dict.keys()[0])

    # Create AvLabels object
    av_labels = AvLabels(args.gen, args.alias, args.av)

    # Select input file with AV labels
    ifile = args.vt if args.vt else args.lb

    # If verbose, open log file
    if args.verbose:
        log_filename = os.path.basename(os.path.splitext(ifile)[0]) + \
                            '.verbose'
        verb_fd = open(log_filename, 'w+')

    # Process each JSON
    vt_all = 0
    vt_empty = 0
    singletons = 0
    with open(ifile, 'r') as fd:
        first_token_dict = {}
        token_count_map = {}
        pair_count_map = {}
        token_family_map = {}

        for line in fd:

            # If blank line, skip
            if line == '\n':
                continue

            # Debug info
            if vt_all % 100 == 0:
                sys.stderr.write('\r[-] %d JSON read' % vt_all)
                sys.stderr.flush()
            vt_all += 1

            # Read JSON line and extract sample info (i.e., hashes and labels)
            vt_rep = json.loads(line)
            sample_info = av_labels.get_sample_info(vt_rep, args.vt)
            name = getattr(sample_info, hash_type)

            # If the VT report has no AV labels, continue
            if not sample_info[3]:
                vt_empty += 1
                sys.stderr.write('\nNo AV labels for %s\n' % name)
                sys.stderr.flush()
                continue
            
            # Get the distinct tokens from all the av labels in the report
            # And print them. If not verbose, print the first token.
            # If verbose, print the whole list
            try:
                # Get distinct tokens from AV labels
                tokens = av_labels.get_family_ranking(sample_info).items()

                # If alias detection, populate maps
                if args.aliasdetect:
                    prev_tok = ""
                    for entry in tokens:
                        curr_tok = entry[0]
                        curr_count = token_count_map.get(curr_tok)
                        if curr_count:
                            token_count_map[curr_tok] = curr_count + 1
                        else:
                            token_count_map[curr_tok] = 1
                        if prev_tok != "":
                            if prev_tok < curr_tok:
                                pair = (prev_tok,curr_tok) 
                            else: 
                                pair = (curr_tok,prev_tok)
                            pair_count = pair_count_map.get(pair)
                            if pair_count:
                                pair_count_map[pair] = pair_count + 1
                            else:
                                pair_count_map[pair] = 1
                        prev_tok = curr_tok

                # If generic token detection, populate map
                if args.gendetect and args.gt:
                    for entry in tokens:
                        curr_tok = entry[0]
                        curr_fam_set = token_family_map.get(curr_tok)
                        family = gt_dict[name] if name in gt_dict else None
                        if curr_fam_set and family:
                            curr_fam_set.add(family)
                        elif family:
                            token_family_map[curr_tok] = set(family)

                # Top candidate is most likely family name
                if tokens:
                    family = tokens[0][0]
                else:
                    family = "SINGLETON:" + name
                    singletons += 1

                # Check if sample is PUP, if requested
                if args.pup:
                    if av_labels.is_pup(sample_info[3]):
                        is_pup_str = "\t1"
                    else:
                        is_pup_str = "\t0"
                else:
                    is_pup_str =  ""

                # Build family map for precision, recall, computation
                first_token_dict[name] = family

                # Get ground truth family, if available
                if args.gt:
                    gt_family = '\t' + gt_dict[name] if name in gt_dict else ""
                else:
                    gt_family = ""

                # Print family (and ground truth if available) to stdout
                print '%s\t%s%s%s' % (name, family, gt_family, is_pup_str)

                # If verbose, print tokens (and ground truth if available) 
                # to log file
                if args.verbose:
                    verb_fd.write('%s\t%s%s%s\n' % (
                        name, tokens, gt_family, is_pup_str))

            except:
                traceback.print_exc(file=sys.stderr)
                continue

        # Debug info
        sys.stderr.write('\r[-] %d JSON read' % vt_all)
        sys.stderr.flush()
        sys.stderr.write('\n')

    # Print statistics
    sys.stderr.write(
            "[-] Samples: %d NoLabels: %d Singletons: %d "
            "GroundTruth: %d\n" % (
                vt_all, vt_empty, singletons, len(gt_dict)))

    # If ground truth, print precision, recall, and F1-measure
    if args.gt and args.eval:
        precision, recall, fmeasure = \
                    ec.eval_precision_recall_fmeasure(gt_dict,
                                                      first_token_dict)
        sys.stderr.write( \
            "Precision: %.2f\tRecall: %.2f\tF1-Measure: %.2f\n" % \
                          (precision, recall, fmeasure))

    # If generic token detection, print map
    if args.gendetect:
        # Open generic tokens file
        gen_filename = os.path.basename(os.path.splitext(ifile)[0]) + \
                            '.gen'
        gen_fd = open(gen_filename, 'w+')
        # Output header line
        gen_fd.write("Token\t#Families\n")
        sorted_pairs = sorted(token_family_map.iteritems(), 
                              key=lambda x: len(x[1]) if x[1] else 0, 
                              reverse=True)
        for (t,fset) in sorted_pairs:
            gen_fd.write("%s\t%d\n" % (t, len(fset)))

        # Close generic tokens file
        gen_fd.close()

    # If alias detection, print map
    if args.aliasdetect:
        # Open alias file
        alias_filename = os.path.basename(os.path.splitext(ifile)[0]) + \
                            '.alias'
        alias_fd = open(alias_filename, 'w+')
        # Sort token pairs by number of times they appear together
        sorted_pairs = sorted(
                pair_count_map.items(), key=itemgetter(1))
        # Output header line
        alias_fd.write("# t1\tt2\t|t1|\t|t2|\t|t1^t2|\t|t1^t2|/|t1|\n")
        # Compute token pair statistic and output to alias file
        for (t1,t2),c in sorted_pairs:
            n1 = token_count_map[t1]
            n2 = token_count_map[t2]
            if (n1 < n2):
                x = t1
                y = t2
                xn = n1
                yn = n2
            else:
                x = t2
                y = t1
                xn = n2
                yn = n1
            f = float(c) / float(xn)
            alias_fd.write("%s\t%s\t%d\t%d\t%d\t%0.2f\n" % (
                x,y,xn,yn,c,f))
        # Close alias file
        alias_fd.close()

    # Close log file
    if args.verbose:
        sys.stderr.write('[-] Verbose output in %s\n' % (log_filename))
        verb_fd.close()

Example 13

Project: machine
Source File: conform.py
View license
def find_source_path(source_definition, source_paths):
    "Figure out which of the possible paths is the actual source"
    conform = source_definition["conform"]
    if conform["type"] in ("shapefile", "shapefile-polygon"):
        # TODO this code is too complicated; see XML variant below for simpler option
        # Shapefiles are named *.shp
        candidates = []
        for fn in source_paths:
            basename, ext = os.path.splitext(fn)
            if ext.lower() == ".shp":
                candidates.append(fn)
        if len(candidates) == 0:
            _L.warning("No shapefiles found in %s", source_paths)
            return None
        elif len(candidates) == 1:
            _L.debug("Selected %s for source", candidates[0])
            return candidates[0]
        else:
            # Multiple candidates; look for the one named by the file attribute
            if "file" not in conform:
                _L.warning("Multiple shapefiles found, but source has no file attribute.")
                return None
            source_file_name = conform["file"]
            for c in candidates:
                if source_file_name == os.path.basename(c):
                    return c
            _L.warning("Source names file %s but could not find it", source_file_name)
            return None
    elif conform["type"] == "geojson" and source_definition["type"] != "ESRI":
        candidates = []
        for fn in source_paths:
            basename, ext = os.path.splitext(fn)
            if ext.lower() in (".json", ".geojson"):
                candidates.append(fn)
        if len(candidates) == 0:
            _L.warning("No JSON found in %s", source_paths)
            return None
        elif len(candidates) == 1:
            _L.debug("Selected %s for source", candidates[0])
            return candidates[0]
        else:
            _L.warning("Found more than one JSON file in source, can't pick one")
            # geojson spec currently doesn't include a file attribute. Maybe it should?
            return None
    elif conform["type"] == "geojson" and source_definition["type"] == "ESRI":
        # Old style ESRI conform: ESRI downloader should only give us a single cache.csv file
        return source_paths[0]
    elif conform["type"] == "csv":
        # Return file if it's specified, else return the first file we find
        if "file" in conform:
            for fn in source_paths:
                # Consider it a match if the basename matches; directory names are a mess
                if os.path.basename(conform["file"]) == os.path.basename(fn):
                    return fn
            _L.warning("Conform named %s as file but we could not find it." % conform["file"])
            return None
        else:
            return source_paths[0]
    elif conform["type"] == "gdb":
        candidates = []
        for fn in source_paths:
            fn = re.sub('\.gdb.*', '.gdb', fn)
            basename, ext = os.path.splitext(fn)
            if ext.lower() == ".gdb" and fn not in candidates:
                candidates.append(fn)
        if len(candidates) == 0:
            _L.warning("No GDB found in %s", source_paths)
            return None
        elif len(candidates) == 1:
            _L.debug("Selected %s for source", candidates[0])
            return candidates[0]
        else: 
            # Multiple candidates; look for the one named by the file attribute
            if "file" not in conform:
                _L.warning("Multiple GDBs found, but source has no file attribute.")
                return None
            source_file_name = conform["file"]
            for c in candidates:
                if source_file_name == os.path.basename(c):
                    return c
            _L.warning("Source names file %s but could not find it", source_file_name)
            return None
    elif conform["type"] == "xml":
        # Return file if it's specified, else return the first .gml file we find
        if "file" in conform:
            for fn in source_paths:
                # Consider it a match if the basename matches; directory names are a mess
                if os.path.basename(conform["file"]) == os.path.basename(fn):
                    return fn
            _L.warning("Conform named %s as file but we could not find it." % conform["file"])
            return None
        else:
            for fn in source_paths:
                _, ext = os.path.splitext(fn)
                if ext == ".gml":
                    return fn
            _L.warning("Could not find a .gml file")
            return None
    else:
        _L.warning("Unknown source type %s", conform["type"])
        return None

Example 14

Project: openelections-core
Source File: load.py
View license
    def run(self, mapping):
        """
        generated_filename will return a filename similar to this:
            `20101107__wa__general__precinct.csv`

        election will return a filename similar to this:
            `20101102__wa__general__precinct`

        """

        generated_filename = mapping['generated_filename']
        election = mapping['election']

        """
        bad_filenames[] holds the list of files who have content that's
        hard to use (e.g. an .xls file with 10 sheets).

        The edge cases will be taken care of later. The cases where there is
        zero actual usable data will have to be rectified outside of the
        loader module.

        """

        bad_filenames = [
            # The below are Excel (.xls) files that have results spread across
            # multiple worksheets and in different structures from each other

            '20070821__wa__primary.xls',
            '20070821__wa__primary__county.xls',
            '20080219__wa__primary__adams__precinct.xls',
            '20080219__wa__primary__benton__precinct.xls',
            '20080219__wa__primary__congressional_district_state_legislative.xls',
            '20080219__wa__primary__douglas__precinct.xls',
            '20080219__wa__primary__kitsap__precinct.xls',
            '20080819__wa__primary__kitsap__precinct.xls',
            '20080819__wa__primary__pierce__precinct.xls',
            '20081104__wa__general__congressional_district.xls',
            '20081104__wa__general__adams__precinct.xls',
            '20091103__wa__general__clark__precinct.xls',
            '20081104__wa__general__franklin__precinct.xls',
            '20081104__wa__general__kittitas__precinct.xls',
            '20081104__wa__general__kitsap__precinct.xls',
            '20081104__wa__general__pierce__precinct.xls',
            '20081104__wa__general__precinct.xls',
            '20081104__wa__general__state_legislative.xls',
            '20091103__wa__general__kitsap__precinct.xls',
            '20091103__wa__general__pierce__precinct.xls',
            '20101102__wa__general__kittitas___precinct.xls',
            '20101102__wa__general__san_juan___precinct.xls',
            '20100817__wa__primary__state_legislative.xls',
            '20100817__wa__primary__congressional_district.xls',
            '20111108__wa__general__clark___precinct.xlsx',
            '20111108__wa__general__spokane___precinct.xlsx',
            '20120807__wa__primary__congressional_district.xls',
            '20120807__wa__primary__state_legislative.xls',
            '20121106__wa__general__congressional_district.xls',
            '20121106__wa__general__state_legislative.xls',
        ]

        """
        Could try using `generated_filename.split(.)[-1]` instead of
        os.path.splitext(election)[-1], since all filenames are
        standardized. This would, of course, break if the file path includes
        a full stop (period).

        """

        # If files are 'bad', skip them
        if any(x in generated_filename for x in bad_filenames):
            loader = SkipLoader()

        # If files are .xls(x), use the correct loader
        elif os.path.splitext(
                generated_filename)[-1].lower() in ('.xls', '.xlsx'):
            loader = WALoaderExcel()

        elif os.path.splitext(generated_filename)[-1].lower() == '.txt':

            """
            We run into issues where King County provides > 1 million line
            .txt files that break my machine's memory. We definitely need to
            refactor, but for the moment we'll pass over said files.

            """

            logger.info(
                'Cannot do anything with {0}'.format(generated_filename))
            loader = SkipLoader()

        elif 'precinct' in generated_filename:
            loader = WALoaderPrecincts()

        elif any(s in election for s in [
                '2000',
                '2001',
                '2002',
                '2003',
                '2004',
                '2005',
                '2006']):
            loader = WALoaderPre2007()

        elif os.path.splitext(
                generated_filename)[-1].lower() in ('.csv', '.txt'):
            loader = WALoaderPost2007()

        else:
            loader = SkipLoader()

        """
        * UnboundLocalError: File passes through the elif statements, but is
          not a file we have a loader class set up to handle at this point, so
          loader.run(mapping) is called before it's mentioned

        * IOError: File in quesiton does not exist. Seen when the mapping
          a file path that recieved a 404 error

        * unicodecsv.Error: Similar to UnboundLocalError, this error means
          that the loader tried running but the csv parser could not parse
          the file because of a null byte. See:
          https://github.com/jdunck/python-unicodecsv/blob/master/unicodecsv/test.py#L222

        * errors.InvalidOperation: When a file has no useful data, RawResult
          is empty and mongodb refuses to load it.

        Because of the if/else flow, sometimes we'll end up with multiple
        UnboundLocalErrors. This should be changed so we only get the error
        once.

        """

        try:
            loader.run(mapping)
        except UnboundLocalError:
            logger.error(
                '\tUnsupported file type ({0})'
                .format('UnboundLocalError'))
        except IOError:
            logger.error(
                '\tFile "{0}" does not exist'
                .format(generated_filename))
        except unicodecsv.Error:
            logger.error(
                '\tUnsupported file type "({0})"'
                .format('unicodecsv.Error'))
        except errors.InvalidOperation:
            logger.error('\tNo raw results loaded')

Example 15

Project: pecan
Source File: core.py
View license
    def find_controller(self, state):
        '''
        The main request handler for Pecan applications.
        '''
        # get a sorted list of hooks, by priority (no controller hooks yet)
        req = state.request
        pecan_state = req.pecan

        # store the routing path for the current application to allow hooks to
        # modify it
        pecan_state['routing_path'] = path = req.path_info

        # handle "on_route" hooks
        self.handle_hooks(self.hooks, 'on_route', state)

        # lookup the controller, respecting content-type as requested
        # by the file extension on the URI
        pecan_state['extension'] = None

        # attempt to guess the content type based on the file extension
        if self.guess_content_type_from_ext \
                and not pecan_state['content_type'] \
                and '.' in path:
            _, extension = splitext(path.rstrip('/'))

            # preface with a letter to ensure compat for 2.5
            potential_type = guess_type('x' + extension)[0]

            if extension and potential_type is not None:
                path = ''.join(path.rsplit(extension, 1))
                pecan_state['extension'] = extension
                pecan_state['content_type'] = potential_type

        controller, remainder = self.route(req, self.root, path)
        cfg = _cfg(controller)

        if cfg.get('generic_handler'):
            raise exc.HTTPNotFound

        # handle generic controllers
        im_self = None
        if cfg.get('generic'):
            im_self = six.get_method_self(controller)
            handlers = cfg['generic_handlers']
            controller = handlers.get(req.method, handlers['DEFAULT'])
            handle_security(controller, im_self)
            cfg = _cfg(controller)

        # add the controller to the state so that hooks can use it
        state.controller = controller

        # if unsure ask the controller for the default content type
        content_types = cfg.get('content_types', {})
        if not pecan_state['content_type']:
            # attempt to find a best match based on accept headers (if they
            # exist)
            accept = getattr(req.accept, 'header_value', '*/*')
            if accept == '*/*' or (
                    accept.startswith('text/html,') and
                    list(content_types.keys()) in self.SIMPLEST_CONTENT_TYPES):
                pecan_state['content_type'] = cfg.get(
                    'content_type',
                    'text/html'
                )
            else:
                best_default = acceptparse.MIMEAccept(
                    accept
                ).best_match(
                    content_types.keys()
                )

                if best_default is None:
                    msg = "Controller '%s' defined does not support " + \
                          "content_type '%s'. Supported type(s): %s"
                    logger.error(
                        msg % (
                            controller.__name__,
                            pecan_state['content_type'],
                            content_types.keys()
                        )
                    )
                    raise exc.HTTPNotAcceptable()

                pecan_state['content_type'] = best_default
        elif cfg.get('content_type') is not None and \
                pecan_state['content_type'] not in content_types:

            msg = "Controller '%s' defined does not support content_type " + \
                  "'%s'. Supported type(s): %s"
            logger.error(
                msg % (
                    controller.__name__,
                    pecan_state['content_type'],
                    content_types.keys()
                )
            )
            raise exc.HTTPNotFound

        # fetch any parameters
        if req.method == 'GET':
            params = req.GET
        elif req.content_type in ('application/json',
                                  'application/javascript'):
            try:
                if not isinstance(req.json, dict):
                    raise TypeError('%s is not a dict' % req.json)
                params = NestedMultiDict(req.GET, req.json)
            except (TypeError, ValueError):
                params = req.params
        else:
            params = req.params

        # fetch the arguments for the controller
        args, varargs, kwargs = self.get_args(
            state,
            params.mixed(),
            remainder,
            cfg['argspec'],
            im_self
        )
        state.arguments = Arguments(args, varargs, kwargs)

        # handle "before" hooks
        self.handle_hooks(self.determine_hooks(controller), 'before', state)

        return controller, args + varargs, kwargs

Example 16

Project: PHEnix
Source File: __init__.py
View license
def get_dist_mat(aSampleNames, avail_pos, dArgs):
    """
    Calculates the distance matrix, optionally removes recombination
    from it and optionally normalises it

    Parameters
    ----------
    aSampleNames: list
        list of sample names
    avail_pos: dict
        infomatin on all available positions
        {'gi|194097589|ref|NC_011035.1|':
            FastRBTree({2329: {'stats': <vcf2distancematrix.BaseStats object at 0x40fb590>,
                               'reference': 'A',
                               '211700_H15498026501': 'C',
                               '211701_H15510030401': 'C',
                               '211702_H15522021601': 'C'},
                        3837: {'211700_H15498026501': 'G',
                               'stats': <vcf2distancematrix.BaseStats object at 0x40fbf90>,
                               '211701_H15510030401': 'G',
                               'reference': 'T',
                               '211702_H15522021601': 'G'},
                        4140: {'211700_H15498026501': 'A',
                               'stats': <vcf2distancematrix.BaseStats object at 0x40fb790>,
                               '211701_H15510030401': 'A',
                               'reference': 'G',
                               '211702_H15522021601': 'A'}})}
    dArgs: dict
        input parameter dictionary as created by get_args()

    Returns
    -------
    call to get_sample_pair_densities with parameters unpacked
    """

    dDen = None
    dRemovals = None
    if dArgs['remove_recombination'] == True:
        if HAVE_SCIPY == False:
            logging.error("Cannot import scipy requied for recombination removal.")
            return None
        dDen = precompute_snp_densities(avail_pos, aSampleNames, dArgs)
        dRemovals = {}
        for i, sample_1 in enumerate(aSampleNames):
            dRemovals[sample_1] = {}
            for j, sample_2 in enumerate(aSampleNames):
                if j <= i:
                    dRemovals[sample_1][sample_2] = 0

    # initialise empty matrix
    dist_mat = {}
    for i, sample_1 in enumerate(aSampleNames):
        dist_mat[sample_1] = {}
        for j, sample_2 in enumerate(aSampleNames):
            if j <= i:
                if dArgs['substitution'] == 'k80' or dArgs['substitution'] == 't93':
                    dist_mat[sample_1][sample_2] = [0.0, 0.0]
                elif dArgs['substitution'] == 'tn84':
                    dist_mat[sample_1][sample_2] = {'A': {'A': 0.0, 'C': 0.0, 'G': 0.0, 'T': 0.0},
                                                    'C': {'A': 0.0, 'C': 0.0, 'G': 0.0, 'T': 0.0},
                                                    'G': {'A': 0.0, 'C': 0.0, 'G': 0.0, 'T': 0.0},
                                                    'T': {'A': 0.0, 'C': 0.0, 'G': 0.0, 'T': 0.0}}
                else:
                    dist_mat[sample_1][sample_2] = 0.0
            else:  # j > i
                pass

    flNofWins = 0.0
    if dDen != None:
        (_, flGenLen) = get_ref_freqs(dArgs['refgenome'], len_only=True)
        flNofWins = flGenLen / dArgs['winsize']

    aStats = []
    for sContig in avail_pos.keys():
        for pos in avail_pos[sContig]:
            ref_base = avail_pos[sContig][pos].get("reference")
            for i, sample_1 in enumerate(aSampleNames):
                s1_base = avail_pos[sContig][pos].get(sample_1, ref_base)
                # consider only differences between valid characters -> pairwise deletion
                if dValChars.get(s1_base.upper(), None) == None:
                    continue
                for j, sample_2 in enumerate(aSampleNames):
                    if j < i:
                        s2_base = avail_pos[sContig][pos].get(sample_2, ref_base)
                        # consider only differences between valid characters -> pairwise deletion
                        if dValChars.get(s2_base.upper(), None) == None:
                            continue

                        # Recombination removal happens here
                        if dDen != None and s1_base != s2_base:

                            iDiffsInWin = dDen[sContig][sample_1][sample_2][pos]
                            iTotalDiffs = dDen['diffs'][sample_1][sample_2]
                            p_hitting_window = 1.0 / flNofWins
                            p_ok = 1.0

                            # only do binomial test if there are 'too many differences'
                            #  => do not exclude diffs because there are 'not enough'
                            if iDiffsInWin > 1 and iDiffsInWin > iTotalDiffs / float(flNofWins):
                                # binomial test:
                                # what is the probability that I have x successes (i.e. diffs in the current window)
                                # given n trials (i.e. the total number of differences between the two samples)
                                # and given that the probabilty of success is 1/total_num_windows
                                p_ok = binom_test(iDiffsInWin, iTotalDiffs, p_hitting_window)

                            corr_p_thresh = (0.01 / iTotalDiffs)
                            aStats.append("%s\t%s\t%i\t%i\t%i\t%e\t%e\n" % (sample_1,
                                                                            sample_2,
                                                                            pos,
                                                                            iDiffsInWin,
                                                                            iTotalDiffs,
                                                                            p_ok,
                                                                            corr_p_thresh))

                            # null-hypothesis: probability of hitting it is equal for all windows, i.e.
                            # the diffs between the samples are uniformly distributed
                            # Bonferroni corrected p-value threshold: (0.05 / iTotalDiffs)
                            if p_ok <= corr_p_thresh:
                                # likely to be recombinat site (for a given definition of 'likely' and 'recombinant')
                                # aStats.append("%s\t%s\t%i\t%i\t%i\t%e\n" % (sample_1, sample_2, pos, iDiffsInWin, iTotalDiffs, p_ok))
                                dRemovals[sample_1][sample_2] += 1
                                continue

                        if dArgs['substitution'] == 'k80' or dArgs['substitution'] == 't93':
                            k = get_difference_value(s1_base.upper(), s2_base.upper(), dArgs['substitution'])
                            dist_mat[sample_1][sample_2][0] += k[0]
                            dist_mat[sample_1][sample_2][1] += k[1]
                        elif dArgs['substitution'] == 'tn84':

                            if s1_base > s2_base:
                                # don't do this:
                                # s1_base, s2_base = s2_base, s1_base
                                # dist_mat[sample_1][sample_2][s1_base][s2_base] += 1.0
                                # => messes up the references
                                dist_mat[sample_1][sample_2][s2_base][s1_base] += 1.0
                            else:
                                dist_mat[sample_1][sample_2][s1_base][s2_base] += 1.0

                        elif dArgs['substitution'] == 'number_of_differences' or dArgs['substitution'] == 'jc69':

                            k = get_difference_value(s1_base.upper(), s2_base.upper(), dArgs['substitution'])
                            dist_mat[sample_1][sample_2] += k
                        else:
                            raise NotImplementedError
                    else:  # j >= i
                        pass

    # write additional stats if required
    if dArgs['remove_recombination'] == True and dArgs['with_stats'] == True:
        sOutBase = os.path.splitext(dArgs['out'])[0]
        with open("%s.removals.tsv" % (sOutBase), 'w') as fOut:
            for i, sample_1 in enumerate(aSampleNames):
                row = sample_1
                for j, sample_2 in enumerate(aSampleNames):
                    if j < i:
                        row += "%s%i" % ('\t', dRemovals[sample_1][sample_2])
                fOut.write("%s\n" % row)
        with open("%s.proportion_removed.tsv" % (sOutBase), 'w') as fOut:
            for i, sample_1 in enumerate(aSampleNames):
                row = sample_1
                for j, sample_2 in enumerate(aSampleNames):
                    if j < i:
                        try:
                            row += "%s%f" % ('\t', dRemovals[sample_1][sample_2] \
                                                 / (dist_mat[sample_1][sample_2] \
                                                 + dRemovals[sample_1][sample_2]))
                        except ZeroDivisionError:
                            row += "\tNAN"
                fOut.write("%s\n" % row)
        with open("%s.all_snps.tsv" % (sOutBase), 'w') as fOut:
            fOut.write("sample_1\tsample_2\tposition\tdiffs_in_win\ttotal_diffs\tp_ok\tthreshold\n")
            for sLine in aStats:
                fOut.write(sLine)

    # 'normalise' distance matrix according to model requested
    if dArgs['substitution'] == 'jc69':
        dist_mat = normalise_jc69(dist_mat, dArgs['refgenome'], aSampleNames)
    elif dArgs['substitution'] == 'k80':
        dist_mat = normalise_k80(dist_mat, dArgs['refgenome'], aSampleNames)
    elif dArgs['substitution'] == 'tn84':
        dist_mat = normalise_tn84(dist_mat, dArgs['refgenome'], aSampleNames)
    elif dArgs['substitution'] == 't93':
        dist_mat = normalise_t93(dist_mat, dArgs['refgenome'], aSampleNames)
    elif dArgs['substitution'] == 'number_of_differences':
        pass
    else:
        raise NotImplementedError

    return dist_mat

Example 17

Project: BioSPPy
Source File: plotting.py
View license
def plot_eeg(ts=None,
             raw=None,
             filtered=None,
             labels=None,
             features_ts=None,
             theta=None,
             alpha_low=None,
             alpha_high=None,
             beta=None,
             gamma=None,
             plf_pairs=None,
             plf=None,
             path=None,
             show=False):
    """Create a summary plot from the output of signals.eeg.eeg.

    Parameters
    ----------
    ts : array
        Signal time axis reference (seconds).
    raw : array
        Raw EEG signal.
    filtered : array
        Filtered EEG signal.
    labels : list
        Channel labels.
    features_ts : array
        Features time axis reference (seconds).
    theta : array
        Average power in the 4 to 8 Hz frequency band; each column is one
        EEG channel.
    alpha_low : array
        Average power in the 8 to 10 Hz frequency band; each column is one
        EEG channel.
    alpha_high : array
        Average power in the 10 to 13 Hz frequency band; each column is one
        EEG channel.
    beta : array
        Average power in the 13 to 25 Hz frequency band; each column is one
        EEG channel.
    gamma : array
        Average power in the 25 to 40 Hz frequency band; each column is one
        EEG channel.
    plf_pairs : list
        PLF pair indices.
    plf : array
        PLF matrix; each column is a channel pair.
    path : str, optional
        If provided, the plot will be saved to the specified file.
    show : bool, optional
        If True, show the plot immediately.

    """

    nrows = MAX_ROWS
    alpha = 2.

    figs = []

    # raw
    fig = _plot_multichannel(ts=ts,
                             signal=raw,
                             labels=labels,
                             nrows=nrows,
                             alpha=alpha,
                             title='EEG Summary - Raw',
                             xlabel='Time (s)',
                             ylabel='Amplitude')
    figs.append(('_Raw', fig))

    # filtered
    fig = _plot_multichannel(ts=ts,
                             signal=filtered,
                             labels=labels,
                             nrows=nrows,
                             alpha=alpha,
                             title='EEG Summary - Filtered',
                             xlabel='Time (s)',
                             ylabel='Amplitude')
    figs.append(('_Filtered', fig))

    # band-power
    names = ('Theta Band', 'Lower Alpha Band', 'Higher Alpha Band',
             'Beta Band', 'Gamma Band')
    args = (theta, alpha_low, alpha_high, beta, gamma)
    for n, a in zip(names, args):
        fig = _plot_multichannel(ts=features_ts,
                                 signal=a,
                                 labels=labels,
                                 nrows=nrows,
                                 alpha=alpha,
                                 title='EEG Summary - %s' % n,
                                 xlabel='Time (s)',
                                 ylabel='Power')
        figs.append(('_' + n.replace(' ', '_'), fig))

    # PLF
    plf_labels = ['%s vs %s' % (labels[p[0]], labels[p[1]]) for p in plf_pairs]
    fig = _plot_multichannel(ts=features_ts,
                             signal=plf,
                             labels=plf_labels,
                             nrows=nrows,
                             alpha=alpha,
                             title='EEG Summary - Phase-Locking Factor',
                             xlabel='Time (s)',
                             ylabel='PLF')
    figs.append(('_PLF', fig))

    # save to file
    if path is not None:
        path = utils.normpath(path)
        root, ext = os.path.splitext(path)
        ext = ext.lower()
        if ext not in ['png', 'jpg']:
            ext = '.png'

        for n, fig in figs:
            path = root + n + ext
            fig.savefig(path, dpi=200, bbox_inches='tight')

    # show
    if show:
        plt.show()
    else:
        # close
        for _, fig in figs:
            plt.close(fig)

Example 18

Project: picrust
Source File: predict_metagenomes.py
View license
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if opts.verbose:
        print "Loading OTU table: ",opts.input_otu_table

    otu_table = load_table(opts.input_otu_table)
    ids_to_load = otu_table.ids(axis='observation').tolist()

    if opts.verbose:
        print "Done loading OTU table containing %i samples and %i OTUs." \
          %(len(otu_table.ids()),len(otu_table.ids(axis='observation')))

    #Hardcoded loaction of the precalculated datasets for PICRUSt,
    #relative to the project directory
    precalc_data_dir=join(get_picrust_project_dir(),'picrust','data')

    # Load a table of gene counts by OTUs.
    #This can be either user-specified or precalculated
    genome_table_fp = determine_data_table_fp(precalc_data_dir,\
      opts.type_of_prediction,opts.gg_version,\
      user_specified_table=opts.input_count_table,verbose=opts.verbose)

    if opts.verbose:
        print "Loading gene count data from file: %s" %genome_table_fp

    genome_table= load_data_table(genome_table_fp,\
      load_data_table_in_biom=opts.load_precalc_file_in_biom,\
      suppress_subset_loading=opts.suppress_subset_loading,\
      ids_to_load=ids_to_load,verbose=opts.verbose,transpose=True)

    if opts.verbose:
        print "Loaded %i genes across %i OTUs from gene count table" \
          %(len(genome_table.ids(axis='observation')),len(genome_table.ids()))

    if opts.with_confidence:
        if opts.input_variance_table:
            variance_table_fp = opts.input_variance_table
        else:
            variance_table_fp = determine_data_table_fp(precalc_data_dir,\
              opts.type_of_prediction,opts.gg_version,\
              precalc_file_suffix='precalculated_variances.tab.gz',\
              user_specified_table=opts.input_count_table)

        if opts.verbose:
            print "Loading variance information from table: %s" \
            %variance_table_fp

        variance_table= load_data_table(variance_table_fp,\
          load_data_table_in_biom=opts.load_precalc_file_in_biom,\
          suppress_subset_loading=opts.suppress_subset_loading,\
          ids_to_load=ids_to_load,transpose=True)

        if opts.verbose:
            print "Loaded %i genes across %i OTUs from variance table" \
              %(len(variance_table.ids(axis='observation')),len(variance_table.ids()))
        #Raise an error if the genome table and variance table differ
        #in the genomes they contain.
        #better to find out now than have something obscure happen latter on
        if opts.verbose:
            print "Checking that genome table and variance table are consistent"
        try:
            assert set(variance_table.ids(axis='observation')) == set(genome_table.ids(axis='observation'))
        except AssertionError,e:
            for var_id in variance_table.ids(axis='observation'):
                if var_id not in genome_table.ids(axis='observation'):
                    print "Variance table ObsId %s not in genome_table ObsIds" %var_id
            raise AssertionError("Variance table and genome table contain different gene ids")
        try:
            assert set(variance_table.ids()) == set(genome_table.ids())
        except AssertionError,e:
            for var_id in variance_table.ids():
                if var_id not in genome_table.ids():
                    print "Variance table SampleId %s not in genome_table SampleIds" %var_id
            raise AssertionError("Variance table and genome table contain different OTU ids")

        #sort the ObservationIds and SampleIds to be in the same order
        variance_table=variance_table.sort_order(genome_table.ids(axis='observation'), axis='observation')
        variance_table=variance_table.sort_order(genome_table.ids(), axis='sample')

    make_output_dir_for_file(opts.output_metagenome_table)

    if opts.accuracy_metrics:
        # Calculate accuracy metrics
        weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True)
        samples= weighted_nsti[0]
        nstis = list(weighted_nsti[1])
        samples_and_nstis = zip(samples,nstis)
        if opts.verbose:
            print "Writing NSTI information to file:", opts.accuracy_metrics
        accuracy_output_fh = open(opts.accuracy_metrics,'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process

        if opts.verbose:
            print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..."

        predicted_metagenomes,predicted_metagenome_variances,\
        predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\
          predict_metagenome_variances(otu_table,genome_table,variance_table)
    else:
        #If we don't need confidence intervals, we can do a faster pure numpy prediction

        if opts.verbose:
            print "Predicting the metagenome..."
        predicted_metagenomes = predict_metagenomes(otu_table,genome_table)

    if opts.normalize_by_otu:
        #normalize (e.g. divide) the abundances by the sum of the OTUs per sample
        if opts.verbose:
            print "Normalizing functional abundances by sum of OTUs per sample"
        inverse_otu_sums = [1/x for x in otu_table.sum(axis='sample')]
        scaling_factors = dict(zip(otu_table.ids(),inverse_otu_sums))
        predicted_metagenomes = scale_metagenomes(predicted_metagenomes,scaling_factors)

    if opts.normalize_by_function:
        #normalize (e.g. divide) the abundances by the sum of the functions per sample
        #Sum of functional abundances per sample will equal 1 (e.g. relative abundance).
        if opts.verbose:
            print "Normalizing functional abundances by sum of functions per sample"
        predicted_metagenomes = predicted_metagenomes.norm(axis='sample', inplace=False)


    write_metagenome_to_file(predicted_metagenomes,opts.output_metagenome_table,\
        opts.format_tab_delimited,"metagenome prediction",verbose=opts.verbose)

    if opts.with_confidence:
        output_path,output_filename = split(opts.output_metagenome_table)
        base_output_filename,ext = splitext(output_filename)
        variance_output_fp =\
          join(output_path,"%s_variances%s" %(base_output_filename,ext))
        upper_CI_95_output_fp =\
          join(output_path,"%s_upper_CI_95%s" %(base_output_filename,ext))
        lower_CI_95_output_fp =\
          join(output_path,"%s_lower_CI_95%s" %(base_output_filename,ext))

        write_metagenome_to_file(predicted_metagenome_variances,\
          variance_output_fp,opts.format_tab_delimited,\
          "metagenome prediction variance",verbose=opts.verbose)

        write_metagenome_to_file(predicted_metagenomes_upper_CI_95,\
          upper_CI_95_output_fp,opts.format_tab_delimited,\
          "metagenome prediction upper 95% confidence interval",\
          verbose=opts.verbose)

        write_metagenome_to_file(predicted_metagenomes_lower_CI_95,\
          lower_CI_95_output_fp,opts.format_tab_delimited,\
          "metagenome prediction lower 95% confidence interval",\
          verbose=opts.verbose)

Example 19

Project: picrust
Source File: predict_traits.py
View license
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    #if we specify we want NSTI only then we have to calculate it first
    if opts.output_accuracy_metrics_only:
        opts.calculate_accuracy_metrics=True

    if opts.verbose:
        print "Loading tree from file:", opts.tree

    # Load Tree
    #tree = LoadTree(opts.tree)
    tree = load_picrust_tree(opts.tree, opts.verbose)

    table_headers =[]
    traits={}
    #load the asr trait table using the previous list of functions to order the arrays
    if opts.reconstructed_trait_table:
        table_headers,traits =\
                update_trait_dict_from_file(opts.reconstructed_trait_table)

        #Only load confidence intervals on the reconstruction
        #If we actually have ASR values in the analysis
        if opts.reconstruction_confidence:
            if opts.verbose:
                print "Loading ASR confidence data from file:",\
                opts.reconstruction_confidence
                print "Assuming confidence data is of type:",opts.confidence_format

            asr_confidence_output = open(opts.reconstruction_confidence)
            asr_min_vals,asr_max_vals, params,column_mapping =\
              parse_asr_confidence_output(asr_confidence_output,format=opts.confidence_format)
            if 'sigma' in params:
                brownian_motion_parameter = params['sigma'][0]
            else:
                brownian_motion_parameter = None

            if opts.verbose:
                print "Done. Loaded %i confidence interval values." %(len(asr_max_vals))
                print "Brownian motion parameter:",brownian_motion_parameter
        else:
            brownian_motion_parameter = None

    #load the trait table into a dict with organism names as keys and arrays as functions
    table_headers,genome_traits =\
            update_trait_dict_from_file(opts.observed_trait_table,table_headers)


    #Combine the trait tables overwriting the asr ones if they exist in the genome trait table.
    traits.update(genome_traits)

    # Specify the attribute where we'll store the reconstructions
    trait_label = "Reconstruction"

    if opts.verbose:
        print "Assigning traits to tree..."

    # Decorate tree using the traits
    tree = assign_traits_to_tree(traits,tree, trait_label=trait_label)


    if opts.reconstruction_confidence:
        if opts.verbose:
            print "Assigning trait confidence intervals to tree..."
        tree = assign_traits_to_tree(asr_min_vals,tree,\
            trait_label="lower_bound")

        tree = assign_traits_to_tree(asr_max_vals,tree,\
            trait_label="upper_bound")

        if brownian_motion_parameter is None:

             if opts.verbose:
                 print "No Brownian motion parameters loaded. Inferring these from 95% confidence intervals..."
             brownian_motion_parameter = get_brownian_motion_param_from_confidence_intervals(tree,\
                      upper_bound_trait_label="upper_bound",\
                      lower_bound_trait_label="lower_bound",\
                      trait_label=trait_label,\
                      confidence=0.95)
             if opts.verbose:
                 print "Inferred the following rate parameters:",brownian_motion_parameter
    if opts.verbose:
        print "Collecting list of nodes to predict..."

    #Start by predict all tip nodes.
    nodes_to_predict = [tip.Name for tip in tree.tips()]

    if opts.verbose:
        print "Found %i nodes to predict." % len(nodes_to_predict)

    if opts.limit_predictions_to_organisms:
        organism_id_str = opts.limit_predictions_to_organisms
        ok_organism_ids = organism_id_str.split(',')
        ok_organism_ids = [n.strip() for n in ok_organism_ids]
        for f in set_label_conversion_fns(True,True):
            ok_organism_ids = [f(i) for i in ok_organism_ids]

        if opts.verbose:
            print "Limiting predictions to user-specified ids:",\
              ",".join(ok_organism_ids)


        if not ok_organism_ids:
            raise RuntimeError(\
              "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\
              % opts.limit_predictions_to_organisms)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in ok_organism_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by user-specified ids resulted in an empty set of nodes to predict.   Are the ids on the commmand-line and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0]))

        if opts.verbose:
            print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" %(len(nodes_to_predict))

    if opts.limit_predictions_by_otu_table:
        if opts.verbose:
            print "Limiting predictions to ids in user-specified OTU table:",\
              opts.limit_predictions_by_otu_table
        otu_table = open(opts.limit_predictions_by_otu_table,"U")
        #Parse OTU table for ids

        otu_ids =\
          extract_ids_from_table(otu_table.readlines(),delimiter="\t")

        if not otu_ids:
            raise RuntimeError(\
              "Found no valid ids in input OTU table: %s.  Is the path correct?"\
              % opts.limit_predictions_by_otu_table)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in otu_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by OTU table resulted in an empty set of nodes to predict.   Are the OTU ids and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0]))

        if opts.verbose:
            print "After filtering by OTU table, %i nodes remain to be predicted" %(len(nodes_to_predict))

    # Calculate accuracy of PICRUST for the given tree, sequenced genomes
    # and set of ndoes to predict
    accuracy_metrics = ['NSTI']
    accuracy_metric_results = None
    if opts.calculate_accuracy_metrics:
        if opts.verbose:
            print "Calculating accuracy metrics: %s" %([",".join(accuracy_metrics)])
        accuracy_metric_results = {}
        if 'NSTI' in accuracy_metrics:

            nsti_result,min_distances =\
                calc_nearest_sequenced_taxon_index(tree,\
                limit_to_tips = nodes_to_predict,\
                trait_label = trait_label, verbose=opts.verbose)

            #accuracy_metric_results['NSTI'] = nsti_result
            for organism in min_distances.keys():
                accuracy_metric_results[organism] = {'NSTI': min_distances[organism]}

            if opts.verbose:
                print "NSTI:", nsti_result

        if opts.output_accuracy_metrics_only:
            #Write accuracy metrics to file
            if opts.verbose:
                print "Writing accuracy metrics to file:",opts.output_accuracy_metrics

            f = open(opts.output_accuracy_metrics_only,'w+')
            f.write("metric\torganism\tvalue\n")
            lines =[]
            for organism in accuracy_metric_results.keys():
                for metric in accuracy_metric_results[organism].keys():
                    lines.append('\t'.join([metric,organism,\
                      str(accuracy_metric_results[organism][metric])])+'\n')
            f.writelines(sorted(lines))
            f.close()
            exit()


    if opts.verbose:
        print "Generating predictions using method:",opts.prediction_method

    if opts.weighting_method == 'exponential':
        #For now, use exponential weighting
        weight_fn = make_neg_exponential_weight_fn(e)

    variances=None #Overwritten by methods that calc variance
    confidence_intervals=None #Overwritten by methods that calc variance

    if opts.prediction_method == 'asr_and_weighting':
        # Perform predictions using reconstructed ancestral states

        if opts.reconstruction_confidence:
            predictions,variances,confidence_intervals =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              lower_bound_trait_label="lower_bound",\
              upper_bound_trait_label="upper_bound",\
              calc_confidence_intervals = True,\
              brownian_motion_parameter=brownian_motion_parameter,\
              weight_fn =weight_fn,verbose=opts.verbose)

        else:
             predictions =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              weight_fn =weight_fn,verbose=opts.verbose)

    elif opts.prediction_method == 'weighting_only':
        #Ignore ancestral information
        predictions =\
          weighted_average_tip_prediction(tree,nodes_to_predict,\
          trait_label=trait_label,\
          weight_fn =weight_fn,verbose=opts.verbose)



    elif opts.prediction_method == 'nearest_neighbor':

        predictions = predict_nearest_neighbor(tree,nodes_to_predict,\
          trait_label=trait_label,tips_only = True)

    elif opts.prediction_method == 'random_neighbor':

        predictions = predict_random_neighbor(tree,\
          nodes_to_predict,trait_label=trait_label)

    if opts.verbose:
        print "Done making predictions."

    make_output_dir_for_file(opts.output_trait_table)

    out_fh=open(opts.output_trait_table,'w')
    #Generate the table of biom predictions
    if opts.verbose:
        print "Converting results to .biom format for output..."

    biom_predictions=biom_table_from_predictions(predictions,table_headers,\
                                                         observation_metadata=None,\
                                                         sample_metadata=accuracy_metric_results,convert_to_int=False)
    if opts.verbose:
        print "Writing prediction results to file: ",opts.output_trait_table

    if opts.output_precalc_file_in_biom:

        #write biom table to file
        write_biom_table(biom_predictions, opts.output_trait_table)

    else:
        #convert to precalc (tab-delimited) format

        out_fh = open(opts.output_trait_table, 'w')
        out_fh.write(convert_biom_to_precalc(biom_predictions))
        out_fh.close()

    #Write out variance information to file
    if variances:

        if opts.verbose:
            print "Converting variances to BIOM format"

        if opts.output_precalc_file_in_biom:
            suffix='.biom'
        else:
            suffix='.tab'

        biom_prediction_variances=biom_table_from_predictions({k:v['variance'] for k,v in variances.iteritems()},table_headers,\
        observation_metadata=None,\
        sample_metadata=None,convert_to_int=False)
        outfile_base,extension = splitext(opts.output_trait_table)
        variance_outfile = outfile_base+"_variances"+suffix
        make_output_dir_for_file(variance_outfile)

        if opts.verbose:
            print "Writing variance information to file:",variance_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_variances, variance_outfile)
        else:
            open(variance_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_variances))


    if confidence_intervals:

        if opts.verbose:
            print "Converting upper confidence interval values to BIOM format"

        biom_prediction_upper_CI=biom_table_from_predictions({k:v['upper_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\
          observation_metadata=None,\
          sample_metadata=None,convert_to_int=False)

        outfile_base,extension = splitext(opts.output_trait_table)
        upper_CI_outfile = outfile_base+"_upper_CI"+suffix
        make_output_dir_for_file(upper_CI_outfile)

        if opts.verbose:
            print "Writing upper confidence limit information to file:",upper_CI_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_upper_CI, upper_CI_outfile)
        else:
            open(upper_CI_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_upper_CI))

        biom_prediction_lower_CI=biom_table_from_predictions({k:v['lower_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\
          observation_metadata=None,\
          sample_metadata=None,convert_to_int=False)

        outfile_base,extension = splitext(opts.output_trait_table)
        lower_CI_outfile = outfile_base+"_lower_CI"+suffix
        make_output_dir_for_file(lower_CI_outfile)

        if opts.verbose:
            print "Writing lower confidence limit information to file",lower_CI_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_lower_CI, lower_CI_outfile)
        else:
            open(lower_CI_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_lower_CI))

Example 20

Project: SecureCRT
Source File: ConfigLoad.py
View license
def main():
    # Get configfile to load
    configfile = get_configfile()

    # See if we got our input file
    if configfile != "":
        #Do we continue on errors or not?
        result = crt.Dialog.MessageBox("Stop on Errors?", "Error", ICON_QUESTION | BUTTON_YESNO | DEFBUTTON2)
        if result == IDYES:
            error_stop = True
        else:
            error_stop = False

        # Run session start commands and save session information into a dictionary
        session = StartSession(crt)

        # Get CRT Tab for sending commands
        tab = session['tab']

        # Define the line endings we are going to look for while entering commands
        endings = ["\r\n", ")#"]

        # Get the current date in the format supplied in date_format
        my_date = GetDateString(settings['date_format'])

        # Define variable to be used to identify if we are sending banner config lines and end mark
        banner_lines = False
        end_of_banner = ""

        # Define my_line_count variable to tell where we found an error, if we do.
        my_line_count = 0

        # Break out the input file path and name minus the extension so we can create output filename
        c_filename, c_extension = os.path.splitext(configfile)

        # Create Filename
        file_bits = [c_filename, "Config_Load", my_date + c_extension]
        output_file = '-'.join(file_bits)

        # Open the output file for writing
        out_file = open(output_file, 'w')

        # Write header to output file
        out_file.write('Output of all Configuration Commands\r\n')
        out_file.write('Error Lines will start with *** Invalid Command :\r\n')
        out_file.write('\r\n========\r\n')

        # Enter configuration mode
        tab.Send("config term\n")

        # Loop through each line of the input config file.
        with open(configfile, "rU") as InputFile:
            for line in InputFile:
                try:
                    # Increment line count
                    my_line_count += 1
                    # Strip line endings so as not to get double spacing
                    line = line.strip()
                    # Send line to device
                    tab.Send(line + "\n")
                    # Check to see if it was a banner line as we won't get the prompt back
                    if "banner" in line:
                        banner_lines = True
                        # Determine what the end of banner character is going to be
                        end_of_banner = line[-1]
                    # If we're still processing banner lines continue
                    elif banner_lines:
                        # Check if end of Banner
                        if line == end_of_banner:
                            banner_lines = False
                        # Wait for echo of banner line
                        tab.WaitForString(line.strip())
                    else:
                        # Wait for echo of config command
                        tab.WaitForString(line.strip())

                    # Loop to capture every line of output.  If we get CR/LF (first entry
                    # in our "endings" list), then write that line to the file.  If we get
                    # our prompt back (which won't have CR/LF), break the loop b/c we found the
                    # end of the output.
                    while True:
                        next_line = tab.ReadString(endings)
                        # If the match was the 1st index in the endings list -> \r\n
                        if tab.MatchIndex == 1:
                            # Strip newlines from front and back of line.
                            next_line = next_line.strip('\r\n')
                            # If there is something left, check for Invalid command.
                            if "% Invalid" in next_line:
                                # Strip line endings from line.  Also re-encode line as ASCII
                                # and ignore the character if it can't be done (rare error on
                                # Nexus)
                                out_file.write(
                                    '*** Invalid Command : ' + line.strip('\r\n').encode('ascii', 'ignore') + '\r\n'
                                )
                                # If we're stopping on errors, raise an exception so we'll stop on next iteration
                                if error_stop:
                                    raise NameError('InvalidCommand')
                                break
                            elif banner_lines:
                                # write out banner lines as a special case
                                out_file.write(line.strip('\r\n').encode('ascii', 'ignore') + '\r\n')
                                break
                        else:
                            # We got our prompt (MatchIndex is 2), so break the loop
                            out_file.write(line.strip('\r\n').encode('ascii', 'ignore') + '\r\n')
                            break
                # If we've raised an exception for an Invalid command and are supposed to stop
                # present a dialog box with a message indicating which line had the invalid command.
                except NameError:
                    crt.Dialog.MessageBox("Invalid Command Found\n On line: " + str(my_line_count), "Error", 16)
                    break

        # End configuration mode
        tab.Send("end\n")

        # Close input and output files
        out_file.close()
        InputFile.close()
        # Clean up before closing session
        EndSession(session)

        # Show dialog with completion message
        crt.Dialog.MessageBox("Config Load Complete", "Script Complete", 64)

    else:
        crt.Dialog.MessageBox("No Configfile Provided", "Error", 16)

Example 21

Project: mpop
Source File: viirs_compact.py
View license
def load(satscene, *args, **kwargs):
    del args

    files_to_load = []
    files_to_delete = []

    try:
        filename = kwargs.get("filename")
        logger.debug("reading %s", str(filename))
        if filename is not None:
            if isinstance(filename, (list, set, tuple)):
                files = filename
            else:
                files = [filename]
            files_to_load = []
            for filename in files:
                pathname, ext = os.path.splitext(filename)
                if ext == ".bz2":
                    zipfile = bz2.BZ2File(filename)
                    newname = os.path.join("/tmp", os.path.basename(pathname))
                    if not os.path.exists(newname):
                        with open(newname, "wb") as fp_:
                            fp_.write(zipfile.read())
                    zipfile.close()
                    files_to_load.append(newname)
                    files_to_delete.append(newname)
                else:
                    files_to_load.append(filename)
        else:
            time_start, time_end = kwargs.get("time_interval",
                                              (satscene.time_slot, None))

            conf = ConfigParser()
            conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
            options = {}
            for option, value in conf.items(satscene.instrument_name + "-level2",
                                            raw=True):
                options[option] = value

            template = os.path.join(options["dir"], options["filename"])

            second = timedelta(seconds=1)
            files_to_load = []

            if time_end is not None:
                time = time_start - second * 85
                files_to_load = []
                while time <= time_end:
                    fname = time.strftime(template)
                    flist = glob.glob(fname)
                    try:
                        files_to_load.append(flist[0])
                        time += second * 80
                    except IndexError:
                        pass
                    time += second

            else:
                files_to_load = glob.glob(time_start.strftime(template))

        chan_dict = {"M01": "M1",
                     "M02": "M2",
                     "M03": "M3",
                     "M04": "M4",
                     "M05": "M5",
                     "M06": "M6",
                     "M07": "M7",
                     "M08": "M8",
                     "M09": "M9",
                     "M10": "M10",
                     "M11": "M11",
                     "M12": "M12",
                     "M13": "M13",
                     "M14": "M14",
                     "M15": "M15",
                     "M16": "M16",
                     "DNB": "DNB"}

        channels = [(chn, chan_dict[chn])
                    for chn in satscene.channels_to_load
                    if chn in chan_dict]
        try:
            channels_to_load, chans = zip(*channels)
        except ValueError:
            return

        m_chans = []
        dnb_chan = []
        for chn in chans:
            if chn.startswith('M'):
                m_chans.append(chn)
            elif chn.startswith('DNB'):
                dnb_chan.append(chn)
            else:
                raise ValueError("Reading of channel %s not implemented", chn)

        m_datas = []
        m_lonlats = []
        dnb_datas = []
        dnb_lonlats = []

        for fname in files_to_load:
            is_dnb = os.path.basename(fname).startswith('SVDNBC')
            logger.debug("Reading %s", fname)
            if is_dnb:
                if tables:
                    h5f = tables.open_file(fname, "r")
                else:
                    logger.warning("DNB data could not be read from %s, "
                                   "PyTables not available.", fname)
                    continue
            else:
                h5f = h5py.File(fname, "r")
            if m_chans and not is_dnb:
                try:
                    arr, m_units = read_m(h5f, m_chans)
                    m_datas.append(arr)
                    m_lonlats.append(navigate_m(h5f, m_chans[0]))
                except KeyError:
                    pass
            if dnb_chan and is_dnb and tables:
                try:
                    arr, dnb_units = read_dnb(h5f)
                    dnb_datas.append(arr)
                    dnb_lonlats.append(navigate_dnb(h5f))
                except KeyError:
                    pass
            h5f.close()

        if len(m_lonlats) > 0:
            m_lons = np.ma.vstack([lonlat[0] for lonlat in m_lonlats])
            m_lats = np.ma.vstack([lonlat[1] for lonlat in m_lonlats])
        if len(dnb_lonlats) > 0:
            dnb_lons = np.ma.vstack([lonlat[0] for lonlat in dnb_lonlats])
            dnb_lats = np.ma.vstack([lonlat[1] for lonlat in dnb_lonlats])

        m_i = 0
        dnb_i = 0
        for chn in channels_to_load:
            if m_datas and chn.startswith('M'):
                m_data = np.ma.vstack([dat[m_i] for dat in m_datas])
                satscene[chn] = m_data
                satscene[chn].info["units"] = m_units[m_i]
                m_i += 1
            if dnb_datas and chn.startswith('DNB'):
                dnb_data = np.ma.vstack([dat[dnb_i] for dat in dnb_datas])
                satscene[chn] = dnb_data
                satscene[chn].info["units"] = dnb_units[dnb_i]
                dnb_i += 1

        if m_datas:
            m_area_def = SwathDefinition(np.ma.masked_where(m_data.mask, m_lons),
                                         np.ma.masked_where(m_data.mask, m_lats))
        else:
            logger.warning("No M channel data available.")

        if dnb_datas:
            dnb_area_def = SwathDefinition(np.ma.masked_where(dnb_data.mask,
                                                              dnb_lons),
                                           np.ma.masked_where(dnb_data.mask,
                                                              dnb_lats))
        else:
            logger.warning("No DNB data available.")

        for chn in channels_to_load:
            if "DNB" not in chn and m_datas:
                satscene[chn].area = m_area_def

        if dnb_datas:
            for chn in dnb_chan:
                satscene[chn].area = dnb_area_def

    finally:
        for fname in files_to_delete:
            if os.path.exists(fname):
                os.remove(fname)

Example 22

Project: rdfextras
Source File: test.py
View license
def generictest(testFile):
    func_name = __name__ = __doc__ = id = 'test_sparql.' + \
                os.path.splitext(testFile)[0][8:].translate(
                                                    maketrans('-/','__'))
    store = plugin.get(STORE,Store)()
    bootStrapStore(store)
    store.commit()
    prefix = testFile.split('.rq')[-1]
    manifestPath = '/'.join(testFile.split('/')[:-1]+['manifest.n3'])
    manifestPath2 = '/'.join(testFile.split('/')[:-1]+['manifest.ttl'])
    queryFileName = testFile.split('/')[-1]
    store = plugin.get(STORE,Store)()
    store.open(configString,create=False)
    assert len(store) == 0
    manifestG=ConjunctiveGraph(store)
    if not os.path.exists(manifestPath):
        assert os.path.exists(manifestPath2)
        manifestPath = manifestPath2
    manifestG.default_context.parse(open(manifestPath),
                                    publicID=URIRef(TEST_BASE),
                                    format='n3')
    manifestData = manifestG.query(
                      MANIFEST_QUERY,
                      processor='sparql',
                      initBindings={'query' : TEST_BASE[queryFileName]},
                      initNs=manifestNS,
                      DEBUG = False)
    store.rollback()
    store.close()
    for source,testCaseName,testCaseComment,expectedRT in manifestData:
        if expectedRT:
            expectedRT = '/'.join(testFile.split('/')[:-1] + \
                                    [expectedRT.replace(TEST_BASE,'')])
        if source:
            source = '/'.join(testFile.split('/')[:-1] + \
                                    [source.replace(TEST_BASE,'')])
        testCaseName = testCaseComment and testCaseComment or testCaseName
        # log.debug("## Source: %s ##"%source)
        # log.debug("## Test: %s ##"%testCaseName)
        # log.debug("## Result: %s ##"%expectedRT)
        #Expected results
        if expectedRT:
            store = plugin.get(STORE,Store)()
            store.open(configString,create=False)
            resultG=ConjunctiveGraph(store).default_context
            log.debug("###"*10)
            log.debug("parsing: %s" % open(expectedRT).read())
            log.debug("###"*10)
            assert len(store) == 0
            # log.debug("## Parsing (%s) ##"%(expectedRT))
            if not trialAndErrorRTParse(resultG,expectedRT,DEBUG):
                log.debug(
                    "Unexpected result format (for %s), skipping" % \
                                                    (expectedRT))
                store.rollback()
                store.close()
                continue
            log.debug("## Done .. ##")
            rtVars = [rtVar for rtVar in 
                        resultG.objects(None,RESULT_NS.resultVariable)]
            bindings = []
            resultSetNode = resultG.value(predicate=RESULT_NS.value,
                                          object=RESULT_NS.ResultSet)
            for solutionNode in resultG.objects(resultSetNode,
                                                RESULT_NS.solution):
                bindingDict = dict([(key,None) for key in rtVars])
                for bindingNode in resultG.objects(solutionNode,
                                                   RESULT_NS.binding):
                    value = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.value)
                    name  = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.variable)
                    bindingDict[name] = value
                rbinds = [bindingDict[vName] for vName in rtVars]
                # print("Rbinds", rbinds)
                if len(rbinds) > 1 and (
                    isinstance(rbinds, list) or isinstance(rbinds, tuple)):
                    bindings.append(frozenset(rbinds))
                elif len(rbinds) == 1 and (
                    isinstance(rbinds, list) or isinstance(rbinds, tuple)):
                    bindings.append(rbinds[0])
                else:
                    bindings.append(rbinds)
                # bindings.append(tuple([bindingDict[vName] for vName in rtVars]))
            log.debug(open(expectedRT).read())
            store.rollback()
            store.close()
        if testFile in tests2Skip.keys():
            log.debug("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
            raise SkipTest("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
        query = open(testFile).read()
        log.debug("### %s (%s) ###" % (testCaseName,testFile))
        log.debug(query)
        p = parse(query)#,DEBUG_PARSE)
        log.debug(p)
        if EVALUATE and source:
            log.debug("### Source Graph: ###")
            log.debug(open(source).read())
            store = plugin.get(STORE,Store)()
            store.open(configString,create=False)
            g = ConjunctiveGraph(store)
            try:
                g.parse(open(source),format='n3')
            except:
                log.debug("Unexpected data format (for %s), skipping" % \
                                                                (source))
                store.rollback()
                store.close()
                continue
            rt = g.query(query,
                         processor='sparql',
                         DEBUG = False)
            if expectedRT:
                try:
                    result = rt.result
                except AttributeError:
                    result = rt
                if isinstance(result, Graph):
                    resgraph = open(graphtests[testFile]).read()
                    store = plugin.get(STORE,Store)()
                    store.open(configString,create=False)
                    g = ConjunctiveGraph(store)
                    g.parse(data=resgraph,format="n3")
                    assert result == g, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (g.serialize(format="n3"), 
                                     result.serialize(format="n3"))
                else:
                    # result = [r[0] for r in result if isinstance(r, (tuple, list))]
                    def stab(r):
                        if isinstance(r, (tuple, list)):
                            return frozenset(r)
                        else:
                            return r
                    results = set(
                        [stab(r) for r in result])
                    assert set(bindings).difference(results) == set([]) or set(bindings) == results, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (set(bindings), results)
                log.debug("### Test Passed: ###")
            store.rollback()

Example 23

Project: headphones
Source File: registerapp.py
View license
def registerapp(app):

    # don't do any of this unless >= 10.8
    if not [int(n) for n in platform.mac_ver()[0].split('.')] >= [10, 8]:
        return None, 'Registering requires OS X version >= 10.8'

    app_path = None

    # check app bundle doesn't already exist
    app_path = subprocess.check_output(['/usr/bin/mdfind', 'kMDItemCFBundleIdentifier == "ade.headphones.osxnotify"']).strip()
    if app_path:
        return app_path, 'App previously registered'

    # check app doesn't already exist
    app = app.strip()
    if not app:
        return None, 'Path/Application not entered'
    if os.path.splitext(app)[1] == ".app":
        app_path = app
    else:
        app_path = app + '.app'
    if os.path.exists(app_path):
        return None, 'App %s already exists, choose a different name' % app_path

    # generate app
    try:
        os.mkdir(app_path)
        os.mkdir(app_path + "/Contents")
        os.mkdir(app_path + "/Contents/MacOS")
        os.mkdir(app_path + "/Contents/Resources")
        shutil.copy(os.path.join(os.path.dirname(__file__), "appIcon.icns"), app_path + "/Contents/Resources/")

        version = "1.0.0"
        bundleName = "OSXNotify"
        bundleIdentifier = "ade.headphones.osxnotify"

        f = open(app_path + "/Contents/Info.plist", "w")
        f.write("""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
    <key>CFBundleDevelopmentRegion</key>
    <string>English</string>
    <key>CFBundleExecutable</key>
    <string>main.py</string>
    <key>CFBundleGetInfoString</key>
    <string>%s</string>
    <key>CFBundleIconFile</key>
    <string>appIcon.icns</string>
    <key>CFBundleIdentifier</key>
    <string>%s</string>
    <key>CFBundleInfoDictionaryVersion</key>
    <string>6.0</string>
    <key>CFBundleName</key>
    <string>%s</string>
    <key>CFBundlePackageType</key>
    <string>APPL</string>
    <key>CFBundleShortVersionString</key>
    <string>%s</string>
    <key>CFBundleSignature</key>
    <string>????</string>
    <key>CFBundleVersion</key>
    <string>%s</string>
    <key>NSAppleScriptEnabled</key>
    <string>YES</string>
    <key>NSMainNibFile</key>
    <string>MainMenu</string>
    <key>NSPrincipalClass</key>
    <string>NSApplication</string>
</dict>
</plist>
"""     % (bundleName + " " + version, bundleIdentifier, bundleName, bundleName + " " + version, version))
        f.close()

        f = open(app_path + "/Contents/PkgInfo", "w")
        f.write("APPL????")
        f.close()

        f = open(app_path + "/Contents/MacOS/main.py", "w")
        f.write("""#!/usr/bin/python

objc = None

def swizzle(cls, SEL, func):
    old_IMP = cls.instanceMethodForSelector_(SEL)
    def wrapper(self, *args, **kwargs):
        return func(self, old_IMP, *args, **kwargs)
    new_IMP = objc.selector(wrapper, selector=old_IMP.selector,
        signature=old_IMP.signature)
    objc.classAddMethod(cls, SEL, new_IMP)

def notify(title, subtitle=None, text=None, sound=True):
    global objc
    objc = __import__("objc")
    swizzle(objc.lookUpClass('NSBundle'),
        b'bundleIdentifier',
        swizzled_bundleIdentifier)
    NSUserNotification = objc.lookUpClass('NSUserNotification')
    NSUserNotificationCenter = objc.lookUpClass('NSUserNotificationCenter')
    NSAutoreleasePool = objc.lookUpClass('NSAutoreleasePool')
    pool = NSAutoreleasePool.alloc().init()
    notification = NSUserNotification.alloc().init()
    notification.setTitle_(title)
    notification.setSubtitle_(subtitle)
    notification.setInformativeText_(text)
    notification.setSoundName_("NSUserNotificationDefaultSoundName")
    notification_center = NSUserNotificationCenter.defaultUserNotificationCenter()
    notification_center.deliverNotification_(notification)
    del pool

def swizzled_bundleIdentifier(self, original):
    return 'ade.headphones.osxnotify'

if __name__ == '__main__':
    notify('Half Man Half Biscuit', 'Back in the DHSS', '99% Of Gargoyles Look Like Bob Todd')
""")
        f.close()

        oldmode = os.stat(app_path + "/Contents/MacOS/main.py").st_mode
        os.chmod(app_path + "/Contents/MacOS/main.py", oldmode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

        return app_path, 'App registered'

    except Exception, e:
        return None, 'Error creating App %s. %s' % (app_path, e)

Example 24

Project: youtube-dl
Source File: test_download.py
View license
def generator(test_case):

    def test_template(self):
        ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
        other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
        is_playlist = any(k.startswith('playlist') for k in test_case)
        test_cases = test_case.get(
            'playlist', [] if is_playlist else [test_case])

        def print_skipping(reason):
            print('Skipping %s: %s' % (test_case['name'], reason))
        if not ie.working():
            print_skipping('IE marked as not _WORKING')
            return

        for tc in test_cases:
            info_dict = tc.get('info_dict', {})
            if not (info_dict.get('id') and info_dict.get('ext')):
                raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')

        if 'skip' in test_case:
            print_skipping(test_case['skip'])
            return
        for other_ie in other_ies:
            if not other_ie.working():
                print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
                return

        params = get_params(test_case.get('params', {}))
        if is_playlist and 'playlist' not in test_case:
            params.setdefault('extract_flat', 'in_playlist')
            params.setdefault('skip_download', True)

        ydl = YoutubeDL(params, auto_init=False)
        ydl.add_default_info_extractors()
        finished_hook_called = set()

        def _hook(status):
            if status['status'] == 'finished':
                finished_hook_called.add(status['filename'])
        ydl.add_progress_hook(_hook)
        expect_warnings(ydl, test_case.get('expected_warnings', []))

        def get_tc_filename(tc):
            return ydl.prepare_filename(tc.get('info_dict', {}))

        res_dict = None

        def try_rm_tcs_files(tcs=None):
            if tcs is None:
                tcs = test_cases
            for tc in tcs:
                tc_filename = get_tc_filename(tc)
                try_rm(tc_filename)
                try_rm(tc_filename + '.part')
                try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
        try_rm_tcs_files()
        try:
            try_num = 1
            while True:
                try:
                    # We're not using .download here sine that is just a shim
                    # for outside error handling, and returns the exit code
                    # instead of the result dict.
                    res_dict = ydl.extract_info(
                        test_case['url'],
                        force_generic_extractor=params.get('force_generic_extractor', False))
                except (DownloadError, ExtractorError) as err:
                    # Check if the exception is not a network related one
                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
                        raise

                    if try_num == RETRIES:
                        report_warning('Failed due to network errors, skipping...')
                        return

                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))

                    try_num += 1
                else:
                    break

            if is_playlist:
                self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video'])
                self.assertTrue('entries' in res_dict)
                expect_info_dict(self, res_dict, test_case.get('info_dict', {}))

            if 'playlist_mincount' in test_case:
                assertGreaterEqual(
                    self,
                    len(res_dict['entries']),
                    test_case['playlist_mincount'],
                    'Expected at least %d in playlist %s, but got only %d' % (
                        test_case['playlist_mincount'], test_case['url'],
                        len(res_dict['entries'])))
            if 'playlist_count' in test_case:
                self.assertEqual(
                    len(res_dict['entries']),
                    test_case['playlist_count'],
                    'Expected %d entries in playlist %s, but got %d.' % (
                        test_case['playlist_count'],
                        test_case['url'],
                        len(res_dict['entries']),
                    ))
            if 'playlist_duration_sum' in test_case:
                got_duration = sum(e['duration'] for e in res_dict['entries'])
                self.assertEqual(
                    test_case['playlist_duration_sum'], got_duration)

            for tc in test_cases:
                tc_filename = get_tc_filename(tc)
                if not test_case.get('params', {}).get('skip_download', False):
                    self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                    self.assertTrue(tc_filename in finished_hook_called)
                    expected_minsize = tc.get('file_minsize', 10000)
                    if expected_minsize is not None:
                        if params.get('test'):
                            expected_minsize = max(expected_minsize, 10000)
                        got_fsize = os.path.getsize(tc_filename)
                        assertGreaterEqual(
                            self, got_fsize, expected_minsize,
                            'Expected %s to be at least %s, but it\'s only %s ' %
                            (tc_filename, format_bytes(expected_minsize),
                                format_bytes(got_fsize)))
                    if 'md5' in tc:
                        md5_for_file = _file_md5(tc_filename)
                        self.assertEqual(md5_for_file, tc['md5'])
                info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                self.assertTrue(
                    os.path.exists(info_json_fn),
                    'Missing info file %s' % info_json_fn)
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)

                expect_info_dict(self, info_dict, tc.get('info_dict', {}))
        finally:
            try_rm_tcs_files()
            if is_playlist and res_dict is not None and res_dict.get('entries'):
                # Remove all other files that may have been extracted if the
                # extractor returns full results even with extract_flat
                res_tcs = [{'info_dict': e} for e in res_dict['entries']]
                try_rm_tcs_files(res_tcs)

    return test_template

Example 25

Project: youtube-dl
Source File: __init__.py
View license
def _real_main(argv=None):
    # Compatibility fixes for Windows
    if sys.platform == 'win32':
        # https://github.com/rg3/youtube-dl/issues/820
        codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)

    workaround_optparse_bug9161()

    setproctitle('youtube-dl')

    parser, opts, args = parseOpts(argv)

    # Set user agent
    if opts.user_agent is not None:
        std_headers['User-Agent'] = opts.user_agent

    # Set referer
    if opts.referer is not None:
        std_headers['Referer'] = opts.referer

    # Custom HTTP headers
    if opts.headers is not None:
        for h in opts.headers:
            if ':' not in h:
                parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
            key, value = h.split(':', 1)
            if opts.verbose:
                write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
            std_headers[key] = value

    # Dump user agent
    if opts.dump_user_agent:
        write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
        sys.exit(0)

    # Batch file verification
    batch_urls = []
    if opts.batchfile is not None:
        try:
            if opts.batchfile == '-':
                batchfd = sys.stdin
            else:
                batchfd = io.open(
                    compat_expanduser(opts.batchfile),
                    'r', encoding='utf-8', errors='ignore')
            batch_urls = read_batch_urls(batchfd)
            if opts.verbose:
                write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
        except IOError:
            sys.exit('ERROR: batch file could not be read')
    all_urls = batch_urls + args
    all_urls = [url.strip() for url in all_urls]
    _enc = preferredencoding()
    all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]

    if opts.list_extractors:
        for ie in list_extractors(opts.age_limit):
            write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
            for mu in matchedUrls:
                write_string('  ' + mu + '\n', out=sys.stdout)
        sys.exit(0)
    if opts.list_extractor_descriptions:
        for ie in list_extractors(opts.age_limit):
            if not ie._WORKING:
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
            if desc is False:
                continue
            if hasattr(ie, 'SEARCH_KEY'):
                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
                _COUNTS = ('', '5', '10', 'all')
                desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
            write_string(desc + '\n', out=sys.stdout)
        sys.exit(0)
    if opts.ap_list_mso:
        table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
        write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
        sys.exit(0)

    # Conflicting, missing and erroneous options
    if opts.usenetrc and (opts.username is not None or opts.password is not None):
        parser.error('using .netrc conflicts with giving username/password')
    if opts.password is not None and opts.username is None:
        parser.error('account username missing\n')
    if opts.ap_password is not None and opts.ap_username is None:
        parser.error('TV Provider account username missing\n')
    if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
        parser.error('using output template conflicts with using title, video ID or auto number')
    if opts.usetitle and opts.useid:
        parser.error('using title conflicts with using video ID')
    if opts.username is not None and opts.password is None:
        opts.password = compat_getpass('Type account password and press [Return]: ')
    if opts.ap_username is not None and opts.ap_password is None:
        opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
    if opts.ratelimit is not None:
        numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
        if numeric_limit is None:
            parser.error('invalid rate limit specified')
        opts.ratelimit = numeric_limit
    if opts.min_filesize is not None:
        numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
        if numeric_limit is None:
            parser.error('invalid min_filesize specified')
        opts.min_filesize = numeric_limit
    if opts.max_filesize is not None:
        numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
        if numeric_limit is None:
            parser.error('invalid max_filesize specified')
        opts.max_filesize = numeric_limit
    if opts.sleep_interval is not None:
        if opts.sleep_interval < 0:
            parser.error('sleep interval must be positive or 0')
    if opts.max_sleep_interval is not None:
        if opts.max_sleep_interval < 0:
            parser.error('max sleep interval must be positive or 0')
        if opts.max_sleep_interval < opts.sleep_interval:
            parser.error('max sleep interval must be greater than or equal to min sleep interval')
    else:
        opts.max_sleep_interval = opts.sleep_interval
    if opts.ap_mso and opts.ap_mso not in MSO_INFO:
        parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')

    def parse_retries(retries):
        if retries in ('inf', 'infinite'):
            parsed_retries = float('inf')
        else:
            try:
                parsed_retries = int(retries)
            except (TypeError, ValueError):
                parser.error('invalid retry count specified')
        return parsed_retries
    if opts.retries is not None:
        opts.retries = parse_retries(opts.retries)
    if opts.fragment_retries is not None:
        opts.fragment_retries = parse_retries(opts.fragment_retries)
    if opts.buffersize is not None:
        numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
        if numeric_buffersize is None:
            parser.error('invalid buffer size specified')
        opts.buffersize = numeric_buffersize
    if opts.playliststart <= 0:
        raise ValueError('Playlist start must be positive')
    if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
        raise ValueError('Playlist end must be greater than playlist start')
    if opts.extractaudio:
        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
            parser.error('invalid audio format specified')
    if opts.audioquality:
        opts.audioquality = opts.audioquality.strip('k').strip('K')
        if not opts.audioquality.isdigit():
            parser.error('invalid audio quality specified')
    if opts.recodevideo is not None:
        if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
            parser.error('invalid video recode format specified')
    if opts.convertsubtitles is not None:
        if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
            parser.error('invalid subtitle format specified')

    if opts.date is not None:
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)

    # Do not download videos when there are audio-only formats
    if opts.extractaudio and not opts.keepvideo and opts.format is None:
        opts.format = 'bestaudio/best'

    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
    # this was the old behaviour if only --all-sub was given.
    if opts.allsubtitles and not opts.writeautomaticsub:
        opts.writesubtitles = True

    outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
               (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
               (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
               (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
               (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
               (opts.useid and '%(id)s.%(ext)s') or
               (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
               DEFAULT_OUTTMPL)
    if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
        parser.error('Cannot download a video and extract audio into the same'
                     ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
                     ' template'.format(outtmpl))

    any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
    any_printing = opts.print_json
    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive

    # PostProcessors
    postprocessors = []
    # Add the metadata pp first, the other pps will copy it
    if opts.metafromtitle:
        postprocessors.append({
            'key': 'MetadataFromTitle',
            'titleformat': opts.metafromtitle
        })
    if opts.addmetadata:
        postprocessors.append({'key': 'FFmpegMetadata'})
    if opts.extractaudio:
        postprocessors.append({
            'key': 'FFmpegExtractAudio',
            'preferredcodec': opts.audioformat,
            'preferredquality': opts.audioquality,
            'nopostoverwrites': opts.nopostoverwrites,
        })
    if opts.recodevideo:
        postprocessors.append({
            'key': 'FFmpegVideoConvertor',
            'preferedformat': opts.recodevideo,
        })
    if opts.convertsubtitles:
        postprocessors.append({
            'key': 'FFmpegSubtitlesConvertor',
            'format': opts.convertsubtitles,
        })
    if opts.embedsubtitles:
        postprocessors.append({
            'key': 'FFmpegEmbedSubtitle',
        })
    if opts.embedthumbnail:
        already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
        postprocessors.append({
            'key': 'EmbedThumbnail',
            'already_have_thumbnail': already_have_thumbnail
        })
        if not already_have_thumbnail:
            opts.writethumbnail = True
    # XAttrMetadataPP should be run after post-processors that may change file
    # contents
    if opts.xattrs:
        postprocessors.append({'key': 'XAttrMetadata'})
    # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
    # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
    if opts.exec_cmd:
        postprocessors.append({
            'key': 'ExecAfterDownload',
            'exec_cmd': opts.exec_cmd,
        })
    external_downloader_args = None
    if opts.external_downloader_args:
        external_downloader_args = compat_shlex_split(opts.external_downloader_args)
    postprocessor_args = None
    if opts.postprocessor_args:
        postprocessor_args = compat_shlex_split(opts.postprocessor_args)
    match_filter = (
        None if opts.match_filter is None
        else match_filter_func(opts.match_filter))

    ydl_opts = {
        'usenetrc': opts.usenetrc,
        'username': opts.username,
        'password': opts.password,
        'twofactor': opts.twofactor,
        'videopassword': opts.videopassword,
        'ap_mso': opts.ap_mso,
        'ap_username': opts.ap_username,
        'ap_password': opts.ap_password,
        'quiet': (opts.quiet or any_getting or any_printing),
        'no_warnings': opts.no_warnings,
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
        'forcethumbnail': opts.getthumbnail,
        'forcedescription': opts.getdescription,
        'forceduration': opts.getduration,
        'forcefilename': opts.getfilename,
        'forceformat': opts.getformat,
        'forcejson': opts.dumpjson or opts.print_json,
        'dump_single_json': opts.dump_single_json,
        'simulate': opts.simulate or any_getting,
        'skip_download': opts.skip_download,
        'format': opts.format,
        'listformats': opts.listformats,
        'outtmpl': outtmpl,
        'autonumber_size': opts.autonumber_size,
        'restrictfilenames': opts.restrictfilenames,
        'ignoreerrors': opts.ignoreerrors,
        'force_generic_extractor': opts.force_generic_extractor,
        'ratelimit': opts.ratelimit,
        'nooverwrites': opts.nooverwrites,
        'retries': opts.retries,
        'fragment_retries': opts.fragment_retries,
        'skip_unavailable_fragments': opts.skip_unavailable_fragments,
        'buffersize': opts.buffersize,
        'noresizebuffer': opts.noresizebuffer,
        'continuedl': opts.continue_dl,
        'noprogress': opts.noprogress,
        'progress_with_newline': opts.progress_with_newline,
        'playliststart': opts.playliststart,
        'playlistend': opts.playlistend,
        'playlistreverse': opts.playlist_reverse,
        'noplaylist': opts.noplaylist,
        'logtostderr': opts.outtmpl == '-',
        'consoletitle': opts.consoletitle,
        'nopart': opts.nopart,
        'updatetime': opts.updatetime,
        'writedescription': opts.writedescription,
        'writeannotations': opts.writeannotations,
        'writeinfojson': opts.writeinfojson,
        'writethumbnail': opts.writethumbnail,
        'write_all_thumbnails': opts.write_all_thumbnails,
        'writesubtitles': opts.writesubtitles,
        'writeautomaticsub': opts.writeautomaticsub,
        'allsubtitles': opts.allsubtitles,
        'listsubtitles': opts.listsubtitles,
        'subtitlesformat': opts.subtitlesformat,
        'subtitleslangs': opts.subtitleslangs,
        'matchtitle': decodeOption(opts.matchtitle),
        'rejecttitle': decodeOption(opts.rejecttitle),
        'max_downloads': opts.max_downloads,
        'prefer_free_formats': opts.prefer_free_formats,
        'verbose': opts.verbose,
        'dump_intermediate_pages': opts.dump_intermediate_pages,
        'write_pages': opts.write_pages,
        'test': opts.test,
        'keepvideo': opts.keepvideo,
        'min_filesize': opts.min_filesize,
        'max_filesize': opts.max_filesize,
        'min_views': opts.min_views,
        'max_views': opts.max_views,
        'daterange': date,
        'cachedir': opts.cachedir,
        'youtube_print_sig_code': opts.youtube_print_sig_code,
        'age_limit': opts.age_limit,
        'download_archive': download_archive_fn,
        'cookiefile': opts.cookiefile,
        'nocheckcertificate': opts.no_check_certificate,
        'prefer_insecure': opts.prefer_insecure,
        'proxy': opts.proxy,
        'socket_timeout': opts.socket_timeout,
        'bidi_workaround': opts.bidi_workaround,
        'debug_printtraffic': opts.debug_printtraffic,
        'prefer_ffmpeg': opts.prefer_ffmpeg,
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
        'encoding': opts.encoding,
        'extract_flat': opts.extract_flat,
        'mark_watched': opts.mark_watched,
        'merge_output_format': opts.merge_output_format,
        'postprocessors': postprocessors,
        'fixup': opts.fixup,
        'source_address': opts.source_address,
        'call_home': opts.call_home,
        'sleep_interval': opts.sleep_interval,
        'max_sleep_interval': opts.max_sleep_interval,
        'external_downloader': opts.external_downloader,
        'list_thumbnails': opts.list_thumbnails,
        'playlist_items': opts.playlist_items,
        'xattr_set_filesize': opts.xattr_set_filesize,
        'match_filter': match_filter,
        'no_color': opts.no_color,
        'ffmpeg_location': opts.ffmpeg_location,
        'hls_prefer_native': opts.hls_prefer_native,
        'hls_use_mpegts': opts.hls_use_mpegts,
        'external_downloader_args': external_downloader_args,
        'postprocessor_args': postprocessor_args,
        'cn_verification_proxy': opts.cn_verification_proxy,
        'geo_verification_proxy': opts.geo_verification_proxy,

    }

    with YoutubeDL(ydl_opts) as ydl:
        # Update version
        if opts.update_self:
            update_self(ydl.to_screen, opts.verbose, ydl._opener)

        # Remove cache dir
        if opts.rm_cachedir:
            ydl.cache.remove()

        # Maybe do nothing
        if (len(all_urls) < 1) and (opts.load_info_filename is None):
            if opts.update_self or opts.rm_cachedir:
                sys.exit()

            ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
            parser.error(
                'You must provide at least one URL.\n'
                'Type youtube-dl --help to see a list of all options.')

        try:
            if opts.load_info_filename is not None:
                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
            else:
                retcode = ydl.download(all_urls)
        except MaxDownloadsReached:
            ydl.to_screen('--max-download limit reached, aborting.')
            retcode = 101

    sys.exit(retcode)

Example 26

Project: CythonCTypesBackend
Source File: Annotate.py
View license
    def save_annotation(self, source_filename, target_filename):
        self.mark_pos(None)
        f = Utils.open_source_file(source_filename)
        lines = f.readlines()
        for k in range(len(lines)):
            line = lines[k]
            for c, cc, html in special_chars:
                line = line.replace(c, cc)
            lines[k] = line
        f.close()
        all = []
        if False:
            for pos, item in self.annotations:
                if pos[0].filename == source_filename:
                    start = item.start()
                    size, end = item.end()
                    if size:
                        all.append((pos, start))
                        all.append(((source_filename, pos[1], pos[2]+size), end))
                    else:
                        all.append((pos, start+end))

        all.sort()
        all.reverse()
        for pos, item in all:
            _, line_no, col = pos
            line_no -= 1
            col += 1
            line = lines[line_no]
            lines[line_no] = line[:col] + item + line[col:]

        html_filename = os.path.splitext(target_filename)[0] + ".html"
        f = codecs.open(html_filename, "w", encoding="UTF-8")
        f.write(u'<!-- Generated by Cython %s on %s -->\n' % (Version.version, time.asctime()))
        f.write(u'<html>\n')
        f.write(u"""
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style type="text/css">

body { font-family: courier; font-size: 12; }

.code  { font-size: 9; color: #444444; display: none; margin-left: 20px; }
.py_c_api  { color: red; }
.py_macro_api  { color: #FF7000; }
.pyx_c_api  { color: #FF3000; }
.pyx_macro_api  { color: #FF7000; }
.refnanny  { color: #FFA000; }

.error_goto  { color: #FFA000; }

.tag  {  }

.coerce  { color: #008000; border: 1px dotted #008000 }

.py_attr { color: #FF0000; font-weight: bold; }
.c_attr  { color: #0000FF; }

.py_call { color: #FF0000; font-weight: bold; }
.c_call  { color: #0000FF; }

.line { margin: 0em }

</style>
<script>
function toggleDiv(id) {
    theDiv = document.getElementById(id);
    if (theDiv.style.display == 'none') theDiv.style.display = 'block';
    else theDiv.style.display = 'none';
}
</script>
</head>
        """)
        f.write(u'<body>\n')
        f.write(u'<p>Generated by Cython %s on %s\n' % (Version.version, time.asctime()))
        c_file = Utils.decode_filename(os.path.basename(target_filename))
        f.write(u'<p>Raw output: <a href="%s">%s</a>\n' % (c_file, c_file))
        k = 0

        py_c_api = re.compile(u'(Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]+)\(')
        py_marco_api = re.compile(u'(Py[A-Z][a-z]+_[A-Z][A-Z_]+)\(')
        pyx_c_api = re.compile(u'(__Pyx_[A-Z][a-z_][A-Za-z_]+)\(')
        pyx_macro_api = re.compile(u'(__Pyx_[A-Z][A-Z_]+)\(')
        error_goto = re.compile(ur'((; *if .*)? \{__pyx_filename = .*goto __pyx_L\w+;\})')
        refnanny = re.compile(u'(__Pyx_X?(GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)')

        code_source_file = self.code[source_filename]
        for line in lines:

            k += 1
            try:
                code = code_source_file[k]
            except KeyError:
                code = ''

            code = code.replace('<', '<code><</code>')

            code, py_c_api_calls = py_c_api.subn(ur"<span class='py_c_api'>\1</span>(", code)
            code, pyx_c_api_calls = pyx_c_api.subn(ur"<span class='pyx_c_api'>\1</span>(", code)
            code, py_macro_api_calls = py_marco_api.subn(ur"<span class='py_macro_api'>\1</span>(", code)
            code, pyx_macro_api_calls = pyx_macro_api.subn(ur"<span class='pyx_macro_api'>\1</span>(", code)
            code, refnanny_calls = refnanny.subn(ur"<span class='refnanny'>\1</span>", code)
            code, error_goto_calls = error_goto.subn(ur"<span class='error_goto'>\1</span>", code)

            code = code.replace(u"<span class='error_goto'>;", u";<span class='error_goto'>")

            score = 5*py_c_api_calls + 2*pyx_c_api_calls + py_macro_api_calls + pyx_macro_api_calls - refnanny_calls
            color = u"FFFF%02x" % int(255/(1+score/10.0))
            f.write(u"<pre class='line' style='background-color: #%s' onclick='toggleDiv(\"line%s\")'>" % (color, k))

            f.write(u" %d: " % k)
            for c, cc, html in special_chars:
                line = line.replace(cc, html)
            f.write(line.rstrip())

            f.write(u'</pre>\n')
            code = re.sub(line_pos_comment, '', code) # inline annotations are redundant
            f.write(u"<pre id='line%s' class='code' style='background-color: #%s'>%s</pre>" % (k, color, code))
        f.write(u'</body></html>\n')
        f.close()

Example 27

Project: attention-lvcsr
Source File: main.py
View license
def initialize_all(config, save_path, bokeh_name,
                   params, bokeh_server, bokeh, test_tag, use_load_ext,
                   load_log, fast_start):
    root_path, extension = os.path.splitext(save_path)

    data = Data(**config['data'])
    train_conf = config['training']
    recognizer = create_model(config, data, test_tag)

    # Separate attention_params to be handled differently
    # when regularization is applied
    attention = recognizer.generator.transition.attention
    attention_params = Selector(attention).get_parameters().values()

    logger.info(
        "Initialization schemes for all bricks.\n"
        "Works well only in my branch with __repr__ added to all them,\n"
        "there is an issue #463 in Blocks to do that properly.")

    def show_init_scheme(cur):
        result = dict()
        for attr in dir(cur):
            if attr.endswith('_init'):
                result[attr] = getattr(cur, attr)
        for child in cur.children:
            result[child.name] = show_init_scheme(child)
        return result
    logger.info(pprint.pformat(show_init_scheme(recognizer)))

    prediction, prediction_mask = add_exploration(recognizer, data, train_conf)

    #
    # Observables:
    #
    primary_observables = []  # monitored each batch
    secondary_observables = []  # monitored every 10 batches
    validation_observables = []  # monitored on the validation set

    cg = recognizer.get_cost_graph(
        batch=True, prediction=prediction, prediction_mask=prediction_mask)
    labels, = VariableFilter(
        applications=[recognizer.cost], name='labels')(cg)
    labels_mask, = VariableFilter(
        applications=[recognizer.cost], name='labels_mask')(cg)

    gain_matrix = VariableFilter(
        theano_name=RewardRegressionEmitter.GAIN_MATRIX)(cg)
    if len(gain_matrix):
        gain_matrix, = gain_matrix
        primary_observables.append(
            rename(gain_matrix.min(), 'min_gain'))
        primary_observables.append(
            rename(gain_matrix.max(), 'max_gain'))

    batch_cost = cg.outputs[0].sum()
    batch_size = rename(recognizer.labels.shape[1], "batch_size")
    # Assumes constant batch size. `aggregation.mean` is not used because
    # of Blocks #514.
    cost = batch_cost / batch_size
    cost.name = "sequence_total_cost"
    logger.info("Cost graph is built")

    # Fetch variables useful for debugging.
    # It is important not to use any aggregation schemes here,
    # as it's currently impossible to spread the effect of
    # regularization on their variables, see Blocks #514.
    cost_cg = ComputationGraph(cost)
    r = recognizer
    energies, = VariableFilter(
        applications=[r.generator.readout.readout], name="output_0")(
            cost_cg)
    bottom_output = VariableFilter(
        # We need name_regex instead of name because LookupTable calls itsoutput output_0
        applications=[r.bottom.apply], name_regex="output")(
            cost_cg)[-1]
    attended, = VariableFilter(
        applications=[r.generator.transition.apply], name="attended")(
            cost_cg)
    attended_mask, = VariableFilter(
        applications=[r.generator.transition.apply], name="attended_mask")(
            cost_cg)
    weights, = VariableFilter(
        applications=[r.generator.evaluate], name="weights")(
            cost_cg)
    max_recording_length = rename(bottom_output.shape[0],
                                  "max_recording_length")
    # To exclude subsampling related bugs
    max_attended_mask_length = rename(attended_mask.shape[0],
                                      "max_attended_mask_length")
    max_attended_length = rename(attended.shape[0],
                                 "max_attended_length")
    max_num_phonemes = rename(labels.shape[0],
                              "max_num_phonemes")
    min_energy = rename(energies.min(), "min_energy")
    max_energy = rename(energies.max(), "max_energy")
    mean_attended = rename(abs(attended).mean(),
                           "mean_attended")
    mean_bottom_output = rename(abs(bottom_output).mean(),
                                "mean_bottom_output")
    weights_penalty = rename(monotonicity_penalty(weights, labels_mask),
                             "weights_penalty")
    weights_entropy = rename(entropy(weights, labels_mask),
                             "weights_entropy")
    mask_density = rename(labels_mask.mean(),
                          "mask_density")
    cg = ComputationGraph([
        cost, weights_penalty, weights_entropy,
        min_energy, max_energy,
        mean_attended, mean_bottom_output,
        batch_size, max_num_phonemes,
        mask_density])
    # Regularization. It is applied explicitly to all variables
    # of interest, it could not be applied to the cost only as it
    # would not have effect on auxiliary variables, see Blocks #514.
    reg_config = config.get('regularization', dict())
    regularized_cg = cg
    if reg_config.get('dropout'):
        logger.info('apply dropout')
        regularized_cg = apply_dropout(cg, [bottom_output], 0.5)
    if reg_config.get('noise'):
        logger.info('apply noise')
        noise_subjects = [p for p in cg.parameters if p not in attention_params]
        regularized_cg = apply_noise(cg, noise_subjects, reg_config['noise'])

    train_cost = regularized_cg.outputs[0]
    if reg_config.get("penalty_coof", .0) > 0:
        # big warning!!!
        # here we assume that:
        # regularized_weights_penalty = regularized_cg.outputs[1]
        train_cost = (train_cost +
                      reg_config.get("penalty_coof", .0) *
                      regularized_cg.outputs[1] / batch_size)
    if reg_config.get("decay", .0) > 0:
        train_cost = (train_cost + reg_config.get("decay", .0) *
                      l2_norm(VariableFilter(roles=[WEIGHT])(cg.parameters)) ** 2)

    train_cost = rename(train_cost, 'train_cost')

    gradients = None
    if reg_config.get('adaptive_noise'):
        logger.info('apply adaptive noise')
        if ((reg_config.get("penalty_coof", .0) > 0) or
                (reg_config.get("decay", .0) > 0)):
            logger.error('using  adaptive noise with alignment weight panalty '
                         'or weight decay is probably stupid')
        train_cost, regularized_cg, gradients, noise_brick = apply_adaptive_noise(
            cg, cg.outputs[0],
            variables=cg.parameters,
            num_examples=data.get_dataset('train').num_examples,
            parameters=Model(regularized_cg.outputs[0]).get_parameter_dict().values(),
            **reg_config.get('adaptive_noise')
        )
        train_cost.name = 'train_cost'
        adapt_noise_cg = ComputationGraph(train_cost)
        model_prior_mean = rename(
            VariableFilter(applications=[noise_brick.apply],
                           name='model_prior_mean')(adapt_noise_cg)[0],
            'model_prior_mean')
        model_cost = rename(
            VariableFilter(applications=[noise_brick.apply],
                           name='model_cost')(adapt_noise_cg)[0],
            'model_cost')
        model_prior_variance = rename(
            VariableFilter(applications=[noise_brick.apply],
                           name='model_prior_variance')(adapt_noise_cg)[0],
            'model_prior_variance')
        regularized_cg = ComputationGraph(
            [train_cost, model_cost] +
            regularized_cg.outputs +
            [model_prior_mean, model_prior_variance])
        primary_observables += [
            regularized_cg.outputs[1],  # model cost
            regularized_cg.outputs[2],  # task cost
            regularized_cg.outputs[-2],  # model prior mean
            regularized_cg.outputs[-1]]  # model prior variance

    model = Model(train_cost)
    if params:
        logger.info("Load parameters from " + params)
        # please note: we cannot use recognizer.load_params
        # as it builds a new computation graph that dies not have
        # shapred variables added by adaptive weight noise
        with open(params, 'r') as src:
            param_values = load_parameters(src)
        model.set_parameter_values(param_values)

    parameters = model.get_parameter_dict()
    logger.info("Parameters:\n" +
                pprint.pformat(
                    [(key, parameters[key].get_value().shape) for key
                     in sorted(parameters.keys())],
                    width=120))

    # Define the training algorithm.
    clipping = StepClipping(train_conf['gradient_threshold'])
    clipping.threshold.name = "gradient_norm_threshold"
    rule_names = train_conf.get('rules', ['momentum'])
    core_rules = []
    if 'momentum' in rule_names:
        logger.info("Using scaling and momentum for training")
        core_rules.append(Momentum(train_conf['scale'], train_conf['momentum']))
    if 'adadelta' in rule_names:
        logger.info("Using AdaDelta for training")
        core_rules.append(AdaDelta(train_conf['decay_rate'], train_conf['epsilon']))
    max_norm_rules = []
    if reg_config.get('max_norm', False) > 0:
        logger.info("Apply MaxNorm")
        maxnorm_subjects = VariableFilter(roles=[WEIGHT])(cg.parameters)
        if reg_config.get('max_norm_exclude_lookup', False):
            maxnorm_subjects = [v for v in maxnorm_subjects
                                if not isinstance(get_brick(v), LookupTable)]
        logger.info("Parameters covered by MaxNorm:\n"
                    + pprint.pformat([name for name, p in parameters.items()
                                      if p in maxnorm_subjects]))
        logger.info("Parameters NOT covered by MaxNorm:\n"
                    + pprint.pformat([name for name, p in parameters.items()
                                      if not p in maxnorm_subjects]))
        max_norm_rules = [
            Restrict(VariableClipping(reg_config['max_norm'], axis=0),
                     maxnorm_subjects)]
    burn_in = []
    if train_conf.get('burn_in_steps', 0):
        burn_in.append(
            BurnIn(num_steps=train_conf['burn_in_steps']))
    algorithm = GradientDescent(
        cost=train_cost,
        parameters=parameters.values(),
        gradients=gradients,
        step_rule=CompositeRule(
            [clipping] + core_rules + max_norm_rules +
            # Parameters are not changed at all
            # when nans are encountered.
            [RemoveNotFinite(0.0)] + burn_in),
        on_unused_sources='warn')

    logger.debug("Scan Ops in the gradients")
    gradient_cg = ComputationGraph(algorithm.gradients.values())
    for op in ComputationGraph(gradient_cg).scans:
        logger.debug(op)

    # More variables for debugging: some of them can be added only
    # after the `algorithm` object is created.
    secondary_observables += list(regularized_cg.outputs)
    if not 'train_cost' in [v.name for v in secondary_observables]:
        secondary_observables += [train_cost]
    secondary_observables += [
        algorithm.total_step_norm, algorithm.total_gradient_norm,
        clipping.threshold]
    for name, param in parameters.items():
        num_elements = numpy.product(param.get_value().shape)
        norm = param.norm(2) / num_elements ** 0.5
        grad_norm = algorithm.gradients[param].norm(2) / num_elements ** 0.5
        step_norm = algorithm.steps[param].norm(2) / num_elements ** 0.5
        stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm)
        stats.name = name + '_stats'
        secondary_observables.append(stats)

    primary_observables += [
        train_cost,
        algorithm.total_gradient_norm,
        algorithm.total_step_norm, clipping.threshold,
        max_recording_length,
        max_attended_length, max_attended_mask_length]

    validation_observables += [
        rename(aggregation.mean(batch_cost, batch_size), cost.name),
        rename(aggregation.sum_(batch_size), 'num_utterances'),
        weights_entropy, weights_penalty]

    def attach_aggregation_schemes(variables):
        # Aggregation specification has to be factored out as a separate
        # function as it has to be applied at the very last stage
        # separately to training and validation observables.
        result = []
        for var in variables:
            if var.name == 'weights_penalty':
                result.append(rename(aggregation.mean(var, batch_size),
                                     'weights_penalty_per_recording'))
            elif var.name == 'weights_entropy':
                result.append(rename(aggregation.mean(var, labels_mask.sum()),
                                     'weights_entropy_per_label'))
            else:
                result.append(var)
        return result

    mon_conf = config['monitoring']

    # Build main loop.
    logger.info("Initialize extensions")
    extensions = []
    if use_load_ext and params:
        extensions.append(Load(params, load_iteration_state=True, load_log=True))
    if load_log and params:
        extensions.append(LoadLog(params))
    extensions += [
        Timing(after_batch=True),
        CGStatistics(),
        #CodeVersion(['lvsr']),
    ]
    extensions.append(TrainingDataMonitoring(
        primary_observables, after_batch=True))
    average_monitoring = TrainingDataMonitoring(
        attach_aggregation_schemes(secondary_observables),
        prefix="average", every_n_batches=10)
    extensions.append(average_monitoring)
    validation = DataStreamMonitoring(
        attach_aggregation_schemes(validation_observables),
        data.get_stream("valid", shuffle=False), prefix="valid").set_conditions(
            before_first_epoch=not fast_start,
            every_n_epochs=mon_conf['validate_every_epochs'],
            every_n_batches=mon_conf['validate_every_batches'],
            after_training=False)
    extensions.append(validation)
    per = PhonemeErrorRate(recognizer, data,
                           **config['monitoring']['search'])
    per_monitoring = DataStreamMonitoring(
        [per], data.get_stream("valid", batches=False, shuffle=False),
        prefix="valid").set_conditions(
            before_first_epoch=not fast_start,
            every_n_epochs=mon_conf['search_every_epochs'],
            every_n_batches=mon_conf['search_every_batches'],
            after_training=False)
    extensions.append(per_monitoring)
    track_the_best_per = TrackTheBest(
        per_monitoring.record_name(per)).set_conditions(
            before_first_epoch=True, after_epoch=True)
    track_the_best_cost = TrackTheBest(
        validation.record_name(cost)).set_conditions(
            before_first_epoch=True, after_epoch=True)
    extensions += [track_the_best_cost, track_the_best_per]
    extensions.append(AdaptiveClipping(
        algorithm.total_gradient_norm.name,
        clipping, train_conf['gradient_threshold'],
        decay_rate=0.998, burnin_period=500))
    extensions += [
        SwitchOffLengthFilter(
            data.length_filter,
            after_n_batches=train_conf.get('stop_filtering')),
        FinishAfter(after_n_batches=train_conf.get('num_batches'),
                    after_n_epochs=train_conf.get('num_epochs'))
            .add_condition(["after_batch"], _gradient_norm_is_none),
    ]
    channels = [
        # Plot 1: training and validation costs
        [average_monitoring.record_name(train_cost),
         validation.record_name(cost)],
        # Plot 2: gradient norm,
        [average_monitoring.record_name(algorithm.total_gradient_norm),
         average_monitoring.record_name(clipping.threshold)],
        # Plot 3: phoneme error rate
        [per_monitoring.record_name(per)],
        # Plot 4: training and validation mean weight entropy
        [average_monitoring._record_name('weights_entropy_per_label'),
         validation._record_name('weights_entropy_per_label')],
        # Plot 5: training and validation monotonicity penalty
        [average_monitoring._record_name('weights_penalty_per_recording'),
         validation._record_name('weights_penalty_per_recording')]]
    if bokeh:
        extensions += [
            Plot(bokeh_name if bokeh_name
                 else os.path.basename(save_path),
                 channels,
                 every_n_batches=10,
                 server_url=bokeh_server),]
    extensions += [
        Checkpoint(save_path,
                   before_first_epoch=not fast_start, after_epoch=True,
                   every_n_batches=train_conf.get('save_every_n_batches'),
                   save_separately=["model", "log"],
                   use_cpickle=True)
        .add_condition(
            ['after_epoch'],
            OnLogRecord(track_the_best_per.notification_name),
            (root_path + "_best" + extension,))
        .add_condition(
            ['after_epoch'],
            OnLogRecord(track_the_best_cost.notification_name),
            (root_path + "_best_ll" + extension,)),
        ProgressBar()]
    extensions.append(EmbedIPython(use_main_loop_run_caller_env=True))
    if config['net']['criterion']['name'].startswith('mse'):
        extensions.append(
            LogInputsGains(
                labels, cg, recognizer.generator.readout.emitter, data))

    if train_conf.get('patience'):
        patience_conf = train_conf['patience']
        if not patience_conf.get('notification_names'):
            # setdefault will not work for empty list
            patience_conf['notification_names'] = [
                track_the_best_per.notification_name,
                track_the_best_cost.notification_name]
        extensions.append(Patience(**patience_conf))

    extensions.append(Printing(every_n_batches=1,
                               attribute_filter=PrintingFilterList()))

    return model, algorithm, data, extensions

Example 28

Project: rootpy
Source File: gen_rst.py
View license
def generate_file_rst(fname, target_dir, src_dir, plot_gallery):
    """ Generate the rst file for a given example.
    """
    base_image_name = os.path.splitext(fname)[0]
    image_fname = '%s_%%s.png' % base_image_name
    root_image_fname = 'root_%s_%%s.png' % base_image_name
    root_fig_num = 1

    this_template = rst_template
    last_dir = os.path.split(src_dir)[-1]
    # to avoid leading . in file names, and wrong names in links
    if last_dir == '.' or last_dir == 'examples':
        last_dir = ''
    else:
        last_dir += '_'
    short_fname = last_dir + fname
    src_file = os.path.join(src_dir, fname)
    example_file = os.path.join(target_dir, fname)
    shutil.copyfile(src_file, example_file)

    # The following is a list containing all the figure names
    figure_list = []

    image_dir = os.path.join(target_dir, 'images')
    thumb_dir = os.path.join(image_dir, 'thumb')
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    if not os.path.exists(thumb_dir):
        os.makedirs(thumb_dir)
    image_path = os.path.join(image_dir, image_fname)
    root_image_path = os.path.join(image_dir, root_image_fname)

    stdout_path = os.path.join(image_dir,
                               'stdout_%s.txt' % base_image_name)
    time_path = os.path.join(image_dir,
                               'time_%s.txt' % base_image_name)
    thumb_file = os.path.join(thumb_dir, fname[:-3] + '.png')
    time_elapsed = 0
    if plot_gallery and fname.startswith('plot'):
        # generate the plot as png image if file name
        # starts with plot and if it is more recent than an
        # existing image.
        first_image_file = image_path % 1
        first_root_image_file = root_image_path % 1
        if os.path.exists(stdout_path):
            stdout = open(stdout_path).read()
        else:
            stdout = ''
        if os.path.exists(time_path):
            time_elapsed = float(open(time_path).read())

        if (not os.path.exists(first_image_file) or
            not os.path.exists(first_root_image_file) or
                os.stat(first_image_file).st_mtime <=
                                    os.stat(src_file).st_mtime):
            # We need to execute the code
            print 'plotting %s' % fname
            t0 = time()
            import matplotlib.pyplot as plt
            plt.close('all')
            cwd = os.getcwd()
            try:
                # First CD in the original example dir, so that any file
                # created by the example get created in this directory
                orig_stdout = sys.stdout
                os.chdir(os.path.dirname(src_file))
                my_buffer = StringIO()
                my_stdout = Tee(sys.stdout, my_buffer)
                sys.stdout = my_stdout
                my_globals = {'pl': plt}
                execfile(os.path.basename(src_file), my_globals)
                time_elapsed = time() - t0
                sys.stdout = orig_stdout
                my_stdout = my_buffer.getvalue()
                if '__doc__' in my_globals:
                    # The __doc__ is often printed in the example, we
                    # don't with to echo it
                    my_stdout = my_stdout.replace(
                                            my_globals['__doc__'],
                                            '')
                my_stdout = my_stdout.strip()
                if my_stdout:
                    stdout = '**Script output**::\n\n  %s\n\n' % (
                        '\n  '.join(my_stdout.split('\n')))
                open(stdout_path, 'w').write(stdout)
                open(time_path, 'w').write('%f' % time_elapsed)
                os.chdir(cwd)

                # In order to save every figure we have two solutions :
                # * iterate from 1 to infinity and call plt.fignum_exists(n)
                #   (this requires the figures to be numbered
                #    incrementally: 1, 2, 3 and not 1, 2, 5)
                # * iterate over [fig_mngr.num for fig_mngr in
                #   matplotlib._pylab_helpers.Gcf.get_all_fig_managers()]
                for fig_num in (fig_mngr.num for fig_mngr in
                        matplotlib._pylab_helpers.Gcf.get_all_fig_managers()):
                    # Set the fig_num figure as the current figure as we can't
                    # save a figure that's not the current figure.
                    plt.figure(fig_num)
                    plt.savefig(image_path % fig_num)
                    figure_list.append(image_fname % fig_num)
                for canvas in ROOT.gROOT.GetListOfCanvases():
                    canvas.SaveAs(root_image_path % root_fig_num)
                    canvas.Close()
                    figure_list.append(root_image_fname % root_fig_num)
                    root_fig_num += 1
            except:
                print 80 * '_'
                print '%s is not compiling:' % fname
                traceback.print_exc()
                print 80 * '_'
            finally:
                os.chdir(cwd)
                sys.stdout = orig_stdout

            print " - time elapsed : %.2g sec" % time_elapsed
        else:
            figure_list = [f[len(image_dir):]
                            for f in glob.glob(image_path % '[1-9]')]
                            #for f in glob.glob(image_path % '*')]

        # generate thumb file
        this_template = plot_rst_template
        from matplotlib import image
        if os.path.exists(first_image_file):
            image.thumbnail(first_image_file, thumb_file, 0.2)
        elif os.path.exists(first_root_image_file):
            image.thumbnail(first_root_image_file, thumb_file, 0.2)

    if not os.path.exists(thumb_file):
        # create something not to replace the thumbnail
        shutil.copy('images/blank_image.png', thumb_file)

    docstring, short_desc, end_row = extract_docstring(example_file)

    # Depending on whether we have one or more figures, we're using a
    # horizontal list or a single rst call to 'image'.
    if len(figure_list) == 1:
        figure_name = figure_list[0]
        image_list = SINGLE_IMAGE % figure_name.lstrip('/')
    else:
        image_list = HLIST_HEADER
        for figure_name in figure_list:
            image_list += HLIST_IMAGE_TEMPLATE % figure_name.lstrip('/')

    f = open(os.path.join(target_dir, fname[:-2] + 'rst'), 'w')
    f.write(this_template % locals())
    f.flush()

Example 29

Project: rootpy
Source File: plot_contour_matrix.py
View license
def plot_contour_matrix(arrays,
                        fields,
                        filename,
                        weights=None,
                        sample_names=None,
                        sample_lines=None,
                        sample_colors=None,
                        color_map=None,
                        num_bins=20,
                        num_contours=3,
                        cell_width=2,
                        cell_height=2,
                        cell_margin_x=0.05,
                        cell_margin_y=0.05,
                        dpi=100,
                        padding=0,
                        animate_field=None,
                        animate_steps=10,
                        animate_delay=20,
                        animate_loop=0):
    """
    Create a matrix of contour plots showing all possible 2D projections of a
    multivariate dataset. You may optionally animate the contours as a cut on
    one of the fields is increased. ImageMagick must be installed to produce
    animations.

    Parameters
    ----------

    arrays : list of arrays of shape [n_samples, n_fields]
        A list of 2D NumPy arrays for each sample. All arrays must have the
        same number of columns.

    fields : list of strings
        A list of the field names.

    filename : string
        The output filename. If animatation is enabled
        ``animate_field is not None`` then ``filename`` must have the .gif
        extension.

    weights : list of arrays, optional (default=None)
        List of 1D NumPy arrays of sample weights corresponding to the arrays
        in ``arrays``.

    sample_names : list of strings, optional (default=None)
        A list of the sample names for the legend. If None, then no legend will
        be shown.

    sample_lines : list of strings, optional (default=None)
        A list of matplotlib line styles for each sample. If None then line
        styles will cycle through 'dashed', 'solid', 'dashdot', and 'dotted'.
        Elements of this list may also be a list of line styles which will be
        cycled through for the contour lines of the corresponding sample.

    sample_colors : list of matplotlib colors, optional (default=None)
        The color of the contours for each sample. If None, then colors will be
        selected according to regular intervals along the ``color_map``.

    color_map : a matplotlib color map, optional (default=None)
        If ``sample_colors is None`` then select colors according to regular
        intervals along this matplotlib color map. If ``color_map`` is None,
        then the spectral color map is used.

    num_bins : int, optional (default=20)
        The number of bins along both axes of the 2D histograms.

    num_contours : int, optional (default=3)
        The number of contour line to show for each sample.

    cell_width : float, optional (default=2)
        The width, in inches, of each subplot in the matrix.

    cell_height : float, optional (default=2)
        The height, in inches, of each subplot in the matrix.

    cell_margin_x : float, optional (default=0.05)
        The horizontal margin between adjacent subplots, as a fraction
        of the subplot size.

    cell_margin_y : float, optional (default=0.05)
        The vertical margin between adjacent subplots, as a fraction
        of the subplot size.

    dpi : int, optional (default=100)
        The number of pixels per inch.

    padding : float, optional (default=0)
        The padding, as a fraction of the range of the value along each axes to
        guarantee around each sample's contour plot.

    animate_field : string, optional (default=None)
        The field to animate a cut along. By default no animation is produced.
        If ``animate_field is not None`` then ``filename`` must end in the .gif
        extension and an animated GIF is produced.

    animate_steps : int, optional (default=10)
        The number of frames in the animation, corresponding to the number of
        regularly spaced cut values to show along the range of the
        ``animate_field``.

    animate_delay : int, optional (default=20)
        The duration that each frame is shown in the animation as a multiple of
        1 / 100 of a second.

    animate_loop : int, optional (default=0)
        The number of times to loop the animation. If zero, then loop forever.

    Notes
    -----

    NumPy and matplotlib are required

    """
    import numpy as np
    from .. import root2matplotlib as r2m
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MaxNLocator
    from matplotlib import cm
    from matplotlib.lines import Line2D

    # we must have at least two fields (columns)
    num_fields = len(fields)
    if num_fields < 2:
        raise ValueError(
            "record arrays must have at least two fields")
    # check that all arrays have the same number of columns
    for array in arrays:
        if array.shape[1] != num_fields:
            raise ValueError(
                "number of array columns does not match number of fields")

    if sample_colors is None:
        if color_map is None:
            color_map = cm.spectral
        steps = np.linspace(0, 1, len(arrays) + 2)[1:-1]
        sample_colors = [color_map(s) for s in steps]

    # determine range of each field
    low = np.vstack([a.min(axis=0) for a in arrays]).min(axis=0)
    high = np.vstack([a.max(axis=0) for a in arrays]).max(axis=0)
    width = np.abs(high - low)
    width *= padding
    low -= width
    high += width

    def single_frame(arrays, filename, label=None):

        # create the canvas and divide into matrix
        fig, axes = plt.subplots(
            nrows=num_fields,
            ncols=num_fields,
            figsize=(cell_width * num_fields, cell_height * num_fields))
        fig.subplots_adjust(hspace=cell_margin_y, wspace=cell_margin_x)

        for ax in axes.flat:
            # only show the left and bottom axes ticks and labels
            if ax.is_last_row() and not ax.is_last_col():
                ax.xaxis.set_visible(True)
                ax.xaxis.set_ticks_position('bottom')
                ax.xaxis.set_major_locator(MaxNLocator(4, prune='both'))
                for tick in ax.xaxis.get_major_ticks():
                    tick.label.set_rotation('vertical')
            else:
                ax.xaxis.set_visible(False)

            if ax.is_first_col() and not ax.is_first_row():
                ax.yaxis.set_visible(True)
                ax.yaxis.set_ticks_position('left')
                ax.yaxis.set_major_locator(MaxNLocator(4, prune='both'))
            else:
                ax.yaxis.set_visible(False)

        # turn off axes frames in upper triangular matrix
        for ix, iy in zip(*np.triu_indices_from(axes, k=0)):
            axes[ix, iy].axis('off')

        levels = np.linspace(0, 1, num_contours + 2)[1:-1]

        # plot the data
        for iy, ix in zip(*np.tril_indices_from(axes, k=-1)):
            ymin = float(low[iy])
            ymax = float(high[iy])
            xmin = float(low[ix])
            xmax = float(high[ix])
            for isample, a in enumerate(arrays):
                hist = Hist2D(
                    num_bins, xmin, xmax,
                    num_bins, ymin, ymax)
                if weights is not None:
                    hist.fill_array(a[:, [ix, iy]], weights[isample])
                else:
                    hist.fill_array(a[:, [ix, iy]])
                # normalize so maximum is 1.0
                _max = hist.GetMaximum()
                if _max != 0:
                    hist /= _max
                r2m.contour(hist,
                    axes=axes[iy, ix],
                    levels=levels,
                    linestyles=sample_lines[isample] if sample_lines else LINES,
                    colors=sample_colors[isample])

        # label the diagonal subplots
        for i, field in enumerate(fields):
            axes[i, i].annotate(field,
                (0.1, 0.2),
                rotation=45,
                xycoords='axes fraction',
                ha='left', va='center')

        # make proxy artists for legend
        lines = []
        for color in sample_colors:
            lines.append(Line2D([0, 0], [0, 0], color=color))

        if sample_names is not None:
            # draw the legend
            leg = fig.legend(lines, sample_names, loc=(0.65, 0.8))
            leg.set_frame_on(False)

        if label is not None:
            axes[0, 0].annotate(label, (0, 1),
                ha='left', va='top',
                xycoords='axes fraction')

        fig.savefig(filename, bbox_inches='tight', dpi=dpi)
        plt.close(fig)

    if animate_field is not None:
        _, ext = os.path.splitext(filename)
        if ext != '.gif':
            raise ValueError(
                "animation is only supported for .gif files")
        field_idx = fields.index(animate_field)
        cuts = np.linspace(
            low[field_idx],
            high[field_idx],
            animate_steps + 1)[:-1]
        gif = GIF()
        temp_dir = tempfile.mkdtemp()
        for i, cut in enumerate(cuts):
            frame_filename = os.path.join(temp_dir, 'frame_{0:d}.png'.format(i))
            label = '{0} > {1:.2f}'.format(animate_field, cut)
            log.info("creating frame for {0} ...".format(label))
            new_arrays = []
            for array in arrays:
                new_arrays.append(array[array[:, field_idx] > cut])
            single_frame(new_arrays,
                filename=frame_filename,
                label=label)
            gif.add_frame(frame_filename)
        gif.write(filename, delay=animate_delay, loop=animate_loop)
        shutil.rmtree(temp_dir)
    else:
        single_frame(arrays, filename=filename)

Example 30

Project: root_numpy
Source File: gen_rst.py
View license
def generate_file_rst(fname, target_dir, src_dir, plot_gallery):
    """ Generate the rst file for a given example.
    """
    base_image_name = os.path.splitext(fname)[0]
    image_fname = '%s_%%s.png' % base_image_name
    root_image_fname = 'root_%s_%%s.png' % base_image_name
    root_fig_num = 1

    this_template = rst_template
    last_dir = os.path.split(src_dir)[-1]
    # to avoid leading . in file names, and wrong names in links
    if last_dir == '.' or last_dir == 'examples':
        last_dir = ''
    else:
        last_dir += '_'
    short_fname = last_dir + fname
    src_file = os.path.join(src_dir, fname)
    example_file = os.path.join(target_dir, fname)
    shutil.copyfile(src_file, example_file)

    # The following is a list containing all the figure names
    figure_list = []

    image_dir = os.path.join(target_dir, 'images')
    thumb_dir = os.path.join(image_dir, 'thumb')
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    if not os.path.exists(thumb_dir):
        os.makedirs(thumb_dir)
    image_path = os.path.join(image_dir, image_fname)
    root_image_path = os.path.join(image_dir, root_image_fname)

    stdout_path = os.path.join(image_dir,
                               'stdout_%s.txt' % base_image_name)
    time_path = os.path.join(image_dir,
                               'time_%s.txt' % base_image_name)
    thumb_file = os.path.join(thumb_dir, fname[:-3] + '.png')
    time_elapsed = 0
    if plot_gallery and fname.startswith('plot'):
        # generate the plot as png image if file name
        # starts with plot and if it is more recent than an
        # existing image.
        first_image_file = image_path % 1
        first_root_image_file = root_image_path % 1
        if os.path.exists(stdout_path):
            stdout = open(stdout_path).read()
        else:
            stdout = ''
        if os.path.exists(time_path):
            time_elapsed = float(open(time_path).read())

        if (not os.path.exists(first_image_file) or
            not os.path.exists(first_root_image_file) or
                os.stat(first_image_file).st_mtime <=
                                    os.stat(src_file).st_mtime):
            # We need to execute the code
            print 'plotting %s' % fname
            t0 = time()
            import matplotlib.pyplot as plt
            plt.close('all')
            cwd = os.getcwd()
            try:
                # First CD in the original example dir, so that any file
                # created by the example get created in this directory
                orig_stdout = sys.stdout
                os.chdir(os.path.dirname(src_file))
                my_buffer = StringIO()
                my_stdout = Tee(sys.stdout, my_buffer)
                sys.stdout = my_stdout
                my_globals = {'pl': plt}
                execfile(os.path.basename(src_file), my_globals)
                time_elapsed = time() - t0
                sys.stdout = orig_stdout
                my_stdout = my_buffer.getvalue()
                if '__doc__' in my_globals:
                    # The __doc__ is often printed in the example, we
                    # don't with to echo it
                    my_stdout = my_stdout.replace(
                                            my_globals['__doc__'],
                                            '')
                my_stdout = my_stdout.strip()
                if my_stdout:
                    stdout = '**Script output**::\n\n  %s\n\n' % (
                        '\n  '.join(my_stdout.split('\n')))
                open(stdout_path, 'w').write(stdout)
                open(time_path, 'w').write('%f' % time_elapsed)
                os.chdir(cwd)

                # In order to save every figure we have two solutions :
                # * iterate from 1 to infinity and call plt.fignum_exists(n)
                #   (this requires the figures to be numbered
                #    incrementally: 1, 2, 3 and not 1, 2, 5)
                # * iterate over [fig_mngr.num for fig_mngr in
                #   matplotlib._pylab_helpers.Gcf.get_all_fig_managers()]
                for fig_num in (fig_mngr.num for fig_mngr in
                        matplotlib._pylab_helpers.Gcf.get_all_fig_managers()):
                    # Set the fig_num figure as the current figure as we can't
                    # save a figure that's not the current figure.
                    plt.figure(fig_num)
                    plt.savefig(image_path % fig_num)
                    figure_list.append(image_fname % fig_num)
                for canvas in ROOT.gROOT.GetListOfCanvases():
                    canvas.SaveAs(root_image_path % root_fig_num)
                    canvas.Close()
                    figure_list.append(root_image_fname % root_fig_num)
                    root_fig_num += 1
            except:
                print 80 * '_'
                print '%s is not compiling:' % fname
                traceback.print_exc()
                print 80 * '_'
            finally:
                os.chdir(cwd)
                sys.stdout = orig_stdout

            print " - time elapsed : %.2g sec" % time_elapsed
        else:
            figure_list = [f[len(image_dir):]
                            for f in glob.glob(image_path % '[1-9]')]
                            #for f in glob.glob(image_path % '*')]

        # generate thumb file
        this_template = plot_rst_template
        from matplotlib import image
        if os.path.exists(first_image_file):
            image.thumbnail(first_image_file, thumb_file, 0.2)
        elif os.path.exists(first_root_image_file):
            image.thumbnail(first_root_image_file, thumb_file, 0.2)

    if not os.path.exists(thumb_file):
        # create something not to replace the thumbnail
        shutil.copy('images/blank_image.png', thumb_file)

    docstring, short_desc, end_row = extract_docstring(example_file)

    # Depending on whether we have one or more figures, we're using a
    # horizontal list or a single rst call to 'image'.
    if len(figure_list) == 1:
        figure_name = figure_list[0]
        image_list = SINGLE_IMAGE % figure_name.lstrip('/')
    else:
        image_list = HLIST_HEADER
        for figure_name in figure_list:
            image_list += HLIST_IMAGE_TEMPLATE % figure_name.lstrip('/')

    f = open(os.path.join(target_dir, fname[:-2] + 'rst'), 'w')
    f.write(this_template % locals())
    f.flush()

Example 31

Project: roundware-server
Source File: api.py
View license
def save_asset_from_request(request, session, asset=None):
    log_event("start_upload", session.id, request.GET)
    fileitem = asset.file if asset else request.FILES.get('file')
    if fileitem is None or not fileitem.name:
        raise RoundException("No file in request")

    # get mediatype from the GET request
    mediatype = get_parameter_from_request(
        request, 'mediatype') if not asset else asset.mediatype
    # also observe properly underscored version of same field
    if mediatype is None:
        mediatype = get_parameter_from_request(request, 'media_type')
    # if mediatype parameter not passed, set to 'audio'
    # this ensures backwards compatibility
    if mediatype is None:
        mediatype = "audio"

    # copy the file to a unique name (current time and date)
    logger.debug("Session %s - Processing:%s", session.id, fileitem.name)
    (filename_prefix, filename_extension) = os.path.splitext(fileitem.name)

    dest_file = time.strftime("%Y%m%d-%H%M%S-") + str(session.id)
    dest_filename = dest_file + filename_extension
    dest_filepath = os.path.join(settings.MEDIA_ROOT, dest_filename)
    count = 0
    # If the file exists add underscore and a number until it doesn't.`
    while (os.path.isfile(dest_filepath)):
        dest_filename = "%s_%d%s" % (dest_file, count, filename_extension)
        dest_filepath = os.path.join(settings.MEDIA_ROOT, dest_filename)
        count += 1

    fileout = open(dest_filepath, 'wb')
    fileout.write(fileitem.file.read())
    fileout.close()

    # Delete the uploaded original after the copy has been made.
    if asset:
        asset.file.delete()
        asset.file.name = dest_filename
        asset.filename = dest_filename
        asset.save()
    # Make sure everything is in wav form only if media type is audio.
    if mediatype == "audio":
        newfilename = convertaudio.convert_uploaded_file(dest_filename)
    else:
        newfilename = dest_filename
    if not newfilename:
        raise RoundException("File not converted successfully: " + newfilename)

    # if the request comes from the django admin interface
    # update the Asset with the right information
    if asset:
        asset.session = session
        asset.filename = newfilename
    # create the new asset if request comes in from a source other
    # than the django admin interface
    else:
        # get location data from request
        latitude = get_parameter_from_request(request, 'latitude')
        longitude = get_parameter_from_request(request, 'longitude')
        # if no location data in request, default to project latitude
        # and longitude
        if not latitude:
            latitude = session.project.latitude
        if not longitude:
            longitude = session.project.longitude
        tagset = []
        tags = get_parameter_from_request(request, 'tags')
        if tags is None:
            tags = get_parameter_from_request(request, 'tag_ids')
        if tags is not None:
            ids = tags.rstrip(',').split(',')
            try:
                tagset = models.Tag.objects.filter(id__in=ids)
            except:
                raise RoundException("Could not decode tag list")

        # get optional submitted parameter from request (Y, N or blank
        # string are only acceptable values)
        submitted = get_parameter_from_request(request, 'submitted')
        # set submitted variable to proper boolean value if it is
        # passed as parameter
        if submitted == "N":
            submitted = False
        elif submitted == "Y":
            submitted = True
        # if blank string or not included as parameter, check if in range of project and if so
        # set asset.submitted based on project.auto_submit boolean
        # value
        elif submitted is None or len(submitted) == 0:
            submitted = False
            if is_listener_in_range_of_stream(request.GET, session.project):
                submitted = session.project.auto_submit

        # save description if provided, null is not allowed
        description = get_parameter_from_request(request, 'description')
        if description is None:
            description = ""

        asset = models.Asset(latitude=latitude,
                             longitude=longitude,
                             filename=newfilename,
                             session=session,
                             submitted=submitted,
                             mediatype=mediatype,
                             description=description,
                             volume=1.0,
                             language=session.language,
                             project=session.project)
        asset.file.name = dest_filename
        asset.save()
        for tag in tagset:
            asset.tags.add(tag)

    # get the audiolength of the file only if mediatype is audio and
    # update the Asset
    if mediatype == "audio":
        discover_audiolength.discover_and_set_audiolength(
            asset, newfilename)
        asset.save()

    return asset

Example 32

View license
def _real_main(argv=None):
    # Compatibility fixes for Windows
    if sys.platform == 'win32':
        # https://github.com/rg3/youtube-dl/issues/820
        codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)

    workaround_optparse_bug9161()

    setproctitle('youtube-dl')

    parser, opts, args = parseOpts(argv)

    # Set user agent
    if opts.user_agent is not None:
        std_headers['User-Agent'] = opts.user_agent

    # Set referer
    if opts.referer is not None:
        std_headers['Referer'] = opts.referer

    # Custom HTTP headers
    if opts.headers is not None:
        for h in opts.headers:
            if ':' not in h:
                parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
            key, value = h.split(':', 1)
            if opts.verbose:
                write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
            std_headers[key] = value

    # Dump user agent
    if opts.dump_user_agent:
        write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
        sys.exit(0)

    # Batch file verification
    batch_urls = []
    if opts.batchfile is not None:
        try:
            if opts.batchfile == '-':
                batchfd = sys.stdin
            else:
                batchfd = io.open(
                    compat_expanduser(opts.batchfile),
                    'r', encoding='utf-8', errors='ignore')
            batch_urls = read_batch_urls(batchfd)
            if opts.verbose:
                write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
        except IOError:
            sys.exit('ERROR: batch file could not be read')
    all_urls = batch_urls + args
    all_urls = [url.strip() for url in all_urls]
    _enc = preferredencoding()
    all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]

    if opts.list_extractors:
        for ie in list_extractors(opts.age_limit):
            write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
            for mu in matchedUrls:
                write_string('  ' + mu + '\n', out=sys.stdout)
        sys.exit(0)
    if opts.list_extractor_descriptions:
        for ie in list_extractors(opts.age_limit):
            if not ie._WORKING:
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
            if desc is False:
                continue
            if hasattr(ie, 'SEARCH_KEY'):
                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
                _COUNTS = ('', '5', '10', 'all')
                desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
            write_string(desc + '\n', out=sys.stdout)
        sys.exit(0)
    if opts.ap_list_mso:
        table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
        write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
        sys.exit(0)

    # Conflicting, missing and erroneous options
    if opts.usenetrc and (opts.username is not None or opts.password is not None):
        parser.error('using .netrc conflicts with giving username/password')
    if opts.password is not None and opts.username is None:
        parser.error('account username missing\n')
    if opts.ap_password is not None and opts.ap_username is None:
        parser.error('TV Provider account username missing\n')
    if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
        parser.error('using output template conflicts with using title, video ID or auto number')
    if opts.usetitle and opts.useid:
        parser.error('using title conflicts with using video ID')
    if opts.username is not None and opts.password is None:
        opts.password = compat_getpass('Type account password and press [Return]: ')
    if opts.ap_username is not None and opts.ap_password is None:
        opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
    if opts.ratelimit is not None:
        numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
        if numeric_limit is None:
            parser.error('invalid rate limit specified')
        opts.ratelimit = numeric_limit
    if opts.min_filesize is not None:
        numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
        if numeric_limit is None:
            parser.error('invalid min_filesize specified')
        opts.min_filesize = numeric_limit
    if opts.max_filesize is not None:
        numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
        if numeric_limit is None:
            parser.error('invalid max_filesize specified')
        opts.max_filesize = numeric_limit
    if opts.sleep_interval is not None:
        if opts.sleep_interval < 0:
            parser.error('sleep interval must be positive or 0')
    if opts.max_sleep_interval is not None:
        if opts.max_sleep_interval < 0:
            parser.error('max sleep interval must be positive or 0')
        if opts.max_sleep_interval < opts.sleep_interval:
            parser.error('max sleep interval must be greater than or equal to min sleep interval')
    else:
        opts.max_sleep_interval = opts.sleep_interval
    if opts.ap_mso and opts.ap_mso not in MSO_INFO:
        parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')

    def parse_retries(retries):
        if retries in ('inf', 'infinite'):
            parsed_retries = float('inf')
        else:
            try:
                parsed_retries = int(retries)
            except (TypeError, ValueError):
                parser.error('invalid retry count specified')
        return parsed_retries
    if opts.retries is not None:
        opts.retries = parse_retries(opts.retries)
    if opts.fragment_retries is not None:
        opts.fragment_retries = parse_retries(opts.fragment_retries)
    if opts.buffersize is not None:
        numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
        if numeric_buffersize is None:
            parser.error('invalid buffer size specified')
        opts.buffersize = numeric_buffersize
    if opts.playliststart <= 0:
        raise ValueError('Playlist start must be positive')
    if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
        raise ValueError('Playlist end must be greater than playlist start')
    if opts.extractaudio:
        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
            parser.error('invalid audio format specified')
    if opts.audioquality:
        opts.audioquality = opts.audioquality.strip('k').strip('K')
        if not opts.audioquality.isdigit():
            parser.error('invalid audio quality specified')
    if opts.recodevideo is not None:
        if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
            parser.error('invalid video recode format specified')
    if opts.convertsubtitles is not None:
        if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
            parser.error('invalid subtitle format specified')

    if opts.date is not None:
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)

    # Do not download videos when there are audio-only formats
    if opts.extractaudio and not opts.keepvideo and opts.format is None:
        opts.format = 'bestaudio/best'

    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
    # this was the old behaviour if only --all-sub was given.
    if opts.allsubtitles and not opts.writeautomaticsub:
        opts.writesubtitles = True

    outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
               (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
               (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
               (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
               (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
               (opts.useid and '%(id)s.%(ext)s') or
               (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
               DEFAULT_OUTTMPL)
    if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
        parser.error('Cannot download a video and extract audio into the same'
                     ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
                     ' template'.format(outtmpl))

    any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
    any_printing = opts.print_json
    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive

    # PostProcessors
    postprocessors = []
    # Add the metadata pp first, the other pps will copy it
    if opts.metafromtitle:
        postprocessors.append({
            'key': 'MetadataFromTitle',
            'titleformat': opts.metafromtitle
        })
    if opts.addmetadata:
        postprocessors.append({'key': 'FFmpegMetadata'})
    if opts.extractaudio:
        postprocessors.append({
            'key': 'FFmpegExtractAudio',
            'preferredcodec': opts.audioformat,
            'preferredquality': opts.audioquality,
            'nopostoverwrites': opts.nopostoverwrites,
        })
    if opts.recodevideo:
        postprocessors.append({
            'key': 'FFmpegVideoConvertor',
            'preferedformat': opts.recodevideo,
        })
    if opts.convertsubtitles:
        postprocessors.append({
            'key': 'FFmpegSubtitlesConvertor',
            'format': opts.convertsubtitles,
        })
    if opts.embedsubtitles:
        postprocessors.append({
            'key': 'FFmpegEmbedSubtitle',
        })
    if opts.embedthumbnail:
        already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
        postprocessors.append({
            'key': 'EmbedThumbnail',
            'already_have_thumbnail': already_have_thumbnail
        })
        if not already_have_thumbnail:
            opts.writethumbnail = True
    # XAttrMetadataPP should be run after post-processors that may change file
    # contents
    if opts.xattrs:
        postprocessors.append({'key': 'XAttrMetadata'})
    # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
    # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
    if opts.exec_cmd:
        postprocessors.append({
            'key': 'ExecAfterDownload',
            'exec_cmd': opts.exec_cmd,
        })
    external_downloader_args = None
    if opts.external_downloader_args:
        external_downloader_args = compat_shlex_split(opts.external_downloader_args)
    postprocessor_args = None
    if opts.postprocessor_args:
        postprocessor_args = compat_shlex_split(opts.postprocessor_args)
    match_filter = (
        None if opts.match_filter is None
        else match_filter_func(opts.match_filter))

    ydl_opts = {
        'usenetrc': opts.usenetrc,
        'username': opts.username,
        'password': opts.password,
        'twofactor': opts.twofactor,
        'videopassword': opts.videopassword,
        'ap_mso': opts.ap_mso,
        'ap_username': opts.ap_username,
        'ap_password': opts.ap_password,
        'quiet': (opts.quiet or any_getting or any_printing),
        'no_warnings': opts.no_warnings,
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
        'forcethumbnail': opts.getthumbnail,
        'forcedescription': opts.getdescription,
        'forceduration': opts.getduration,
        'forcefilename': opts.getfilename,
        'forceformat': opts.getformat,
        'forcejson': opts.dumpjson or opts.print_json,
        'dump_single_json': opts.dump_single_json,
        'simulate': opts.simulate or any_getting,
        'skip_download': opts.skip_download,
        'format': opts.format,
        'listformats': opts.listformats,
        'outtmpl': outtmpl,
        'autonumber_size': opts.autonumber_size,
        'restrictfilenames': opts.restrictfilenames,
        'ignoreerrors': opts.ignoreerrors,
        'force_generic_extractor': opts.force_generic_extractor,
        'ratelimit': opts.ratelimit,
        'nooverwrites': opts.nooverwrites,
        'retries': opts.retries,
        'fragment_retries': opts.fragment_retries,
        'skip_unavailable_fragments': opts.skip_unavailable_fragments,
        'buffersize': opts.buffersize,
        'noresizebuffer': opts.noresizebuffer,
        'continuedl': opts.continue_dl,
        'noprogress': opts.noprogress,
        'progress_with_newline': opts.progress_with_newline,
        'playliststart': opts.playliststart,
        'playlistend': opts.playlistend,
        'playlistreverse': opts.playlist_reverse,
        'noplaylist': opts.noplaylist,
        'logtostderr': opts.outtmpl == '-',
        'consoletitle': opts.consoletitle,
        'nopart': opts.nopart,
        'updatetime': opts.updatetime,
        'writedescription': opts.writedescription,
        'writeannotations': opts.writeannotations,
        'writeinfojson': opts.writeinfojson,
        'writethumbnail': opts.writethumbnail,
        'write_all_thumbnails': opts.write_all_thumbnails,
        'writesubtitles': opts.writesubtitles,
        'writeautomaticsub': opts.writeautomaticsub,
        'allsubtitles': opts.allsubtitles,
        'listsubtitles': opts.listsubtitles,
        'subtitlesformat': opts.subtitlesformat,
        'subtitleslangs': opts.subtitleslangs,
        'matchtitle': decodeOption(opts.matchtitle),
        'rejecttitle': decodeOption(opts.rejecttitle),
        'max_downloads': opts.max_downloads,
        'prefer_free_formats': opts.prefer_free_formats,
        'verbose': opts.verbose,
        'dump_intermediate_pages': opts.dump_intermediate_pages,
        'write_pages': opts.write_pages,
        'test': opts.test,
        'keepvideo': opts.keepvideo,
        'min_filesize': opts.min_filesize,
        'max_filesize': opts.max_filesize,
        'min_views': opts.min_views,
        'max_views': opts.max_views,
        'daterange': date,
        'cachedir': opts.cachedir,
        'youtube_print_sig_code': opts.youtube_print_sig_code,
        'age_limit': opts.age_limit,
        'download_archive': download_archive_fn,
        'cookiefile': opts.cookiefile,
        'nocheckcertificate': opts.no_check_certificate,
        'prefer_insecure': opts.prefer_insecure,
        'proxy': opts.proxy,
        'socket_timeout': opts.socket_timeout,
        'bidi_workaround': opts.bidi_workaround,
        'debug_printtraffic': opts.debug_printtraffic,
        'prefer_ffmpeg': opts.prefer_ffmpeg,
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
        'encoding': opts.encoding,
        'extract_flat': opts.extract_flat,
        'mark_watched': opts.mark_watched,
        'merge_output_format': opts.merge_output_format,
        'postprocessors': postprocessors,
        'fixup': opts.fixup,
        'source_address': opts.source_address,
        'call_home': opts.call_home,
        'sleep_interval': opts.sleep_interval,
        'max_sleep_interval': opts.max_sleep_interval,
        'external_downloader': opts.external_downloader,
        'list_thumbnails': opts.list_thumbnails,
        'playlist_items': opts.playlist_items,
        'xattr_set_filesize': opts.xattr_set_filesize,
        'match_filter': match_filter,
        'no_color': opts.no_color,
        'ffmpeg_location': opts.ffmpeg_location,
        'hls_prefer_native': opts.hls_prefer_native,
        'hls_use_mpegts': opts.hls_use_mpegts,
        'external_downloader_args': external_downloader_args,
        'postprocessor_args': postprocessor_args,
        'cn_verification_proxy': opts.cn_verification_proxy,
        'geo_verification_proxy': opts.geo_verification_proxy,

    }

    with YoutubeDL(ydl_opts) as ydl:
        # Update version
        if opts.update_self:
            update_self(ydl.to_screen, opts.verbose, ydl._opener)

        # Remove cache dir
        if opts.rm_cachedir:
            ydl.cache.remove()

        # Maybe do nothing
        if (len(all_urls) < 1) and (opts.load_info_filename is None):
            if opts.update_self or opts.rm_cachedir:
                sys.exit()

            ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
            parser.error(
                'You must provide at least one URL.\n'
                'Type youtube-dl --help to see a list of all options.')

        try:
            if opts.load_info_filename is not None:
                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
            else:
                retcode = ydl.download(all_urls)
        except MaxDownloadsReached:
            ydl.to_screen('--max-download limit reached, aborting.')
            retcode = 101

    sys.exit(retcode)

Example 33

Project: pylon
Source File: main.py
View license
def main():
    """ Parses the command line and call Pylon with the correct data.
    """
    parser = optparse.OptionParser(usage="usage: pylon [options] input_file",
                                   version="%prog 0.4.4")

    parser.add_option("-o", "--output", dest="output", metavar="FILE",
        help="Write the solution report to FILE.")

#    parser.add_option("-q", "--quiet", action="store_true", dest="quiet",
#        default=False, help="Print less information.")

    parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
        default=False, help="Print more information.")

#    parser.add_option("-g", "--gui", action="store_true", dest="gui",
#        default=False, help="Use the portable graphical interface to Pylon.")

#    parser.add_option("-n", "--no-report", action="store_true",
#        dest="no_report", default=False, help="Suppress report output.")

    parser.add_option("-d", "--debug", action="store_true", dest="debug",
        default=False, help="Print debug information.")

    parser.add_option("-t", "--input-type", dest="type", metavar="TYPE",
        default="any", help="The argument following the -t is used to "
        "indicate the format type of the input data file. The types which are "
        "currently supported include: matpower, psse [default: %default]"
        " If not specified Pylon will try to determine the type according to "
        "the file name extension and the file header.")

    parser.add_option("-s", "--solver", dest="solver", metavar="SOLVER",
        default="acpf", help="The argument following the -s is used to"
        "indicate the type of routine to use in solving. The types which are "
        "currently supported are: 'dcpf', 'acpf', 'dcopf', 'acopf', 'udopf' "
        "and 'none' [default: %default].")

    parser.add_option("-a", "--algorithm", action="store_true",
        metavar="ALGORITHM", dest="algorithm", default="newton",
        help="Indicates the algorithm type to be used for AC power flow. The "
        "types which are currently supported are: 'newton' and 'fdpf' "
        "[default: %default].")

    parser.add_option("-T", "--output-type", dest="output_type",
        metavar="OUTPUT_TYPE", default="rst", help="Indicates the output "
        "format type.  The type swhich are currently supported include: rst, "
        "matpower, csv, excel and none [default: %default].")

    (options, args) = parser.parse_args()

    if options.verbose:
        logger.setLevel(logging.INFO)
    elif options.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.ERROR)

    # Output.
    outext = {'psse': '.raw', 'matpower': '.m'}
    if options.output:
        if options.output == "-":
            outfile = sys.stdout
            logger.setLevel(logging.CRITICAL) # must stay quiet
#            options.output_type = "none"
        else:
            outfile = open(options.output, "wb")
    elif options.output_type is not None:
        if options.output_type in outext.keys():
            inname, ext = os.path.splitext(args[0])
            outfile = inname + outext[options.output_type]
        else:
            outfile = sys.stdout
    else:
        outfile = sys.stdout
#        if not options.no_report:
#            logger.setLevel(logging.CRITICAL) # must stay quiet

    # Input.
    if len(args) > 1:
        parser.print_help()
        sys.exit(1)
    elif (len(args) == 0) or (args[0] == "-"):
        filename = ""
        if sys.stdin.isatty():
            # True if the file is connected to a tty device, and False
            # otherwise (pipeline or file redirection).
            parser.print_help()
            sys.exit(1)
        else:
            # Handle piped input ($ cat ehv3.raw | pylon | rst2pdf -o ans.pdf).
            infile = sys.stdin
    else:
        filename = args[0]
        infile = open(filename, "rb")

    if options.type == "any":
        type = detect_data_file(infile, filename)
    else:
        type = options.type

    # Get the case from the input file-like object.
    case = read_case(infile, type)

    if case is not None:
        # Routine (and algorithm) selection.
        if options.solver == "dcpf":
            solver = DCPF(case)
        elif options.solver == "acpf":
            if options.algorithm == "newton":
                solver = NewtonPF(case)
            elif options.algorithm == "fdpf":
                solver = FastDecoupledPF(case)
            else:
                logger.critical("Invalid algorithm [%s]." % options.algorithm)
                sys.exit(1)
        elif options.solver == "dcopf":
            solver = OPF(case, True)
        elif options.solver == "acopf":
            solver = OPF(case, False)
        elif options.solver == "udopf":
            solver = UDOPF(case)
        elif options.solver == "none":
            solver = None
        else:
            logger.critical("Invalid solver [%s]." % options.solver)
#            sys.exit(1)
            solver = None

        # Output writer selection.
        if options.output_type == "matpower":
            writer = MATPOWERWriter(case)
        elif options.output_type == "psse":
            writer = PSSEWriter(case)
        elif options.output_type == "rst":
            writer = ReSTWriter(case)
        elif options.output_type == "csv":
            from pylon.io.excel import CSVWriter
            writer = CSVWriter(case)
        elif options.output_type == "excel":
            from pylon.io.excel import ExcelWriter
            writer = ExcelWriter(case)
        elif options.output_type == "pickle":
            writer = PickleWriter(case)
        else:
            logger.critical("Invalid output type [%s]." % options.output_type)
            sys.exit(1)

        if solver is not None:
            solver.solve()
        if options.output_type != "none":
            writer.write(outfile)
            print('Output file {0} written'.format(outfile))
    else:
        logger.critical("Unable to read case data.")

    # Don't close stdin or stdout.
    if len(args) == 1:
        infile.close()
    if options.output and not (options.output == "-"):
        outfile.close()

Example 34

Project: PYPOWER
Source File: loadcase.py
View license
def loadcase(casefile,
        return_as_obj=True, expect_gencost=True, expect_areas=True):
    """Returns the individual data matrices or an dict containing them
    as values.

    Here C{casefile} is either a dict containing the keys C{baseMVA}, C{bus},
    C{gen}, C{branch}, C{areas}, C{gencost}, or a string containing the name
    of the file. If C{casefile} contains the extension '.mat' or '.py', then
    the explicit file is searched. If C{casefile} containts no extension, then
    L{loadcase} looks for a '.mat' file first, then for a '.py' file.  If the
    file does not exist or doesn't define all matrices, the function returns
    an exit code as follows:

        0.  all variables successfully defined
        1.  input argument is not a string or dict
        2.  specified extension-less file name does not exist
        3.  specified .mat file does not exist
        4.  specified .py file does not exist
        5.  specified file fails to define all matrices or contains syntax
            error

    If the input data is not a dict containing a 'version' key, it is
    assumed to be a PYPOWER case file in version 1 format, and will be
    converted to version 2 format.

    @author: Carlos E. Murillo-Sanchez (PSERC Cornell & Universidad
    Autonoma de Manizales)
    @author: Ray Zimmerman (PSERC Cornell)
    """
    if return_as_obj == True:
        expect_gencost = False
        expect_areas = False

    info = 0

    # read data into case object
    if isinstance(casefile, basestring):
        # check for explicit extension
        if casefile.endswith(('.py', '.mat')):
            rootname, extension = splitext(casefile)
            fname = basename(rootname)
        else:
            # set extension if not specified explicitly
            rootname = casefile
            if exists(casefile + '.mat'):
                extension = '.mat'
            elif exists(casefile + '.py'):
                extension = '.py'
            else:
                info = 2
            fname = basename(rootname)

        lasterr = ''

        ## attempt to read file
        if info == 0:
            if extension == '.mat':       ## from MAT file
                try:
                    d = loadmat(rootname + extension, struct_as_record=True)
                    if 'ppc' in d or 'mpc' in d:    ## it's a MAT/PYPOWER dict
                        if 'ppc' in d:
                            struct = d['ppc']
                        else:
                            struct = d['mpc']
                        val = struct[0, 0]

                        s = {}
                        for a in val.dtype.names:
                            s[a] = val[a]
                    else:                 ## individual data matrices
                        d['version'] = '1'

                        s = {}
                        for k, v in d.items():
                            s[k] = v

                    s['baseMVA'] = s['baseMVA'][0]  # convert array to float

                except IOError as e:
                    info = 3
                    lasterr = str(e)
            elif extension == '.py':      ## from Python file
                try:
                    if PY2:
                        execfile(rootname + extension)
                    else:
                        exec(compile(open(rootname + extension).read(),
                                     rootname + extension, 'exec'))

                    try:                      ## assume it returns an object
                        s = eval(fname)()
                    except ValueError as e:
                        info = 4
                        lasterr = str(e)
                    ## if not try individual data matrices
                    if info == 0 and not isinstance(s, dict):
                        s = {}
                        s['version'] = '1'
                        if expect_gencost:
                            try:
                                s['baseMVA'], s['bus'], s['gen'], s['branch'], \
                                s['areas'], s['gencost'] = eval(fname)()
                            except IOError as e:
                                info = 4
                                lasterr = str(e)
                        else:
                            if return_as_obj:
                                try:
                                    s['baseMVA'], s['bus'], s['gen'], \
                                        s['branch'], s['areas'], \
                                        s['gencost'] = eval(fname)()
                                except ValueError as e:
                                    try:
                                        s['baseMVA'], s['bus'], s['gen'], \
                                            s['branch'] = eval(fname)()
                                    except ValueError as e:
                                        info = 4
                                        lasterr = str(e)
                            else:
                                try:
                                    s['baseMVA'], s['bus'], s['gen'], \
                                        s['branch'] = eval(fname)()
                                except ValueError as e:
                                    info = 4
                                    lasterr = str(e)

                except IOError as e:
                    info = 4
                    lasterr = str(e)


                if info == 4 and exists(rootname + '.py'):
                    info = 5
                    err5 = lasterr

    elif isinstance(casefile, dict):
        s = deepcopy(casefile)
    else:
        info = 1

    # check contents of dict
    if info == 0:
        # check for required keys
        if (s['baseMVA'] is None or s['bus'] is None \
            or s['gen'] is None or s['branch'] is None) or \
            (expect_gencost and s['gencost'] is None) or \
            (expect_areas and s['areas'] is None):
            info = 5  ## missing some expected fields
            err5 = 'missing data'
        else:
            ## remove empty areas if not needed
            if hasattr(s, 'areas') and (len(s['areas']) == 0) and (not expect_areas):
                del s['areas']

            ## all fields present, copy to ppc
            ppc = deepcopy(s)
            if not hasattr(ppc, 'version'):  ## hmm, struct with no 'version' field
                if ppc['gen'].shape[1] < 21:    ## version 2 has 21 or 25 cols
                    ppc['version'] = '1'
                else:
                    ppc['version'] = '2'

            if (ppc['version'] == '1'):
                # convert from version 1 to version 2
                ppc['gen'], ppc['branch'] = ppc_1to2(ppc['gen'], ppc['branch']);
                ppc['version'] = '2'

    if info == 0:  # no errors
        if return_as_obj:
            return ppc
        else:
            result = [ppc['baseMVA'], ppc['bus'], ppc['gen'], ppc['branch']]
            if expect_gencost:
                if expect_areas:
                    result.extend([ppc['areas'], ppc['gencost']])
                else:
                    result.extend([ppc['gencost']])
            return result
    else:  # error encountered
        if info == 1:
            sys.stderr.write('Input arg should be a case or a string '
                             'containing a filename\n')
        elif info == 2:
            sys.stderr.write('Specified case not a valid file\n')
        elif info == 3:
            sys.stderr.write('Specified MAT file does not exist\n')
        elif info == 4:
            sys.stderr.write('Specified Python file does not exist\n')
        elif info == 5:
            sys.stderr.write('Syntax error or undefined data '
                             'matrix(ices) in the file\n')
        else:
            sys.stderr.write('Unknown error encountered loading case.\n')

        sys.stderr.write(lasterr + '\n')

        return info

Example 35

Project: scikit-image
Source File: plot2rst.py
View license
def write_example(src_name, src_dir, rst_dir, cfg):
    """Write rst file from a given python example.

    Parameters
    ----------
    src_name : str
        Name of example file.
    src_dir : 'str'
        Source directory for python examples.
    rst_dir : 'str'
        Destination directory for rst files generated from python examples.
    cfg : config object
        Sphinx config object created by Sphinx.
    """
    last_dir = src_dir.psplit()[-1]
    # to avoid leading . in file names, and wrong names in links
    if last_dir == '.' or last_dir == 'examples':
        last_dir = Path('')
    else:
        last_dir += '_'

    src_path = src_dir.pjoin(src_name)
    example_file = rst_dir.pjoin(src_name)
    shutil.copyfile(src_path, example_file)

    image_dir = rst_dir.pjoin('images')
    thumb_dir = image_dir.pjoin('thumb')
    notebook_dir = rst_dir.pjoin('notebook')
    image_dir.makedirs()
    thumb_dir.makedirs()
    notebook_dir.makedirs()

    base_image_name = os.path.splitext(src_name)[0]
    image_path = image_dir.pjoin(base_image_name + '_{0}.png')

    basename, py_ext = os.path.splitext(src_name)

    rst_path = rst_dir.pjoin(basename + cfg.source_suffix_str)
    notebook_path = notebook_dir.pjoin(basename + '.ipynb')

    if _plots_are_current(src_path, image_path) and rst_path.exists and \
        notebook_path.exists:
        return

    print('plot2rst: %s' % basename)

    blocks = split_code_and_text_blocks(example_file)
    if blocks[0][2].startswith('#!'):
        blocks.pop(0) # don't add shebang line to rst file.

    rst_link = '.. _example_%s:\n\n' % (last_dir + src_name)
    figure_list, rst = process_blocks(blocks, src_path, image_path, cfg)

    has_inline_plots = any(cfg.plot2rst_plot_tag in b[2] for b in blocks)
    if has_inline_plots:
        example_rst = ''.join([rst_link, rst])
    else:
        # print first block of text, display all plots, then display code.
        first_text_block = [b for b in blocks if b[0] == 'text'][0]
        label, (start, end), content = first_text_block
        figure_list = save_all_figures(image_path)
        rst_blocks = [IMAGE_TEMPLATE % f.lstrip('/') for f in figure_list]

        example_rst = rst_link
        example_rst += eval(content)
        example_rst += ''.join(rst_blocks)
        code_info = dict(src_name=src_name, code_start=end)
        example_rst += LITERALINCLUDE.format(**code_info)

    example_rst += CODE_LINK.format(src_name)
    ipnotebook_name = src_name.replace('.py', '.ipynb')
    ipnotebook_name = './notebook/' + ipnotebook_name
    example_rst += NOTEBOOK_LINK.format(ipnotebook_name)

    with open(rst_path, 'w') as f:
        f.write(example_rst)

    thumb_path = thumb_dir.pjoin(src_name[:-3] + '.png')
    first_image_file = image_dir.pjoin(figure_list[0].lstrip('/'))
    if first_image_file.exists:
        first_image = io.imread(first_image_file)
        save_thumbnail(first_image, thumb_path, cfg.plot2rst_thumb_shape)

    if not thumb_path.exists:
        if cfg.plot2rst_default_thumb is None:
            print("WARNING: No plots found and default thumbnail not defined.")
            print("Specify 'plot2rst_default_thumb' in Sphinx config file.")
        else:
            shutil.copy(cfg.plot2rst_default_thumb, thumb_path)

    # Export example to IPython notebook
    nb = Notebook()

    # Add sphinx roles to the examples, otherwise docutils
    # cannot compile the ReST for the notebook
    sphinx_roles = PythonDomain.roles.keys()
    preamble = '\n'.join('.. role:: py:{0}(literal)\n'.format(role)
                         for role in sphinx_roles)

    # Grab all references to inject them in cells where needed
    ref_regexp = re.compile('\n(\.\. \[(\d+)\].*(?:\n[ ]{7,8}.*)+)')
    math_role_regexp = re.compile(':math:`(.*?)`')

    text = '\n'.join((content for (cell_type, _, content) in blocks
                     if cell_type != 'code'))

    references = re.findall(ref_regexp, text)

    for (cell_type, _, content) in blocks:
        if cell_type == 'code':
            nb.add_cell(content, cell_type='code')
        else:
            if content.startswith('r'):
                content = content.replace('r"""', '')
                escaped = False
            else:
                content = content.replace('"""', '')
                escaped = True

            if not escaped:
                content = content.replace("\\", "\\\\")

            content = content.replace('.. seealso::', '**See also:**')
            content = re.sub(math_role_regexp, r'$\1$', content)

            # Remove math directive when rendering notebooks
            # until we implement a smarter way of capturing and replacing
            # its content
            content = content.replace('.. math::', '')

            if not content.strip():
                continue

            content = (preamble + content).rstrip('\n')
            content = '\n'.join([line for line in content.split('\n') if
                                 not line.startswith('.. image')])

            # Remove reference links until we can figure out a better way to
            # preserve them
            for (reference, ref_id) in references:
                ref_tag = '[{0}]_'.format(ref_id)
                if ref_tag in content:
                    content = content.replace(ref_tag, ref_tag[:-1])

            html = publish_parts(content, writer_name='html')['html_body']
            nb.add_cell(html, cell_type='markdown')

    with open(notebook_path, 'w') as f:
        f.write(nb.json())

Example 36

Project: polylearn
Source File: gen_rst.py
View license
def generate_file_rst(fname, target_dir, src_dir, root_dir, plot_gallery):
    """ Generate the rst file for a given example.

    Returns the set of sklearn functions/classes imported in the example.
    """
    base_image_name = os.path.splitext(fname)[0]
    image_fname = '%s_%%03d.png' % base_image_name

    this_template = rst_template
    last_dir = os.path.split(src_dir)[-1]
    # to avoid leading . in file names, and wrong names in links
    if last_dir == '.' or last_dir == 'examples':
        last_dir = ''
    else:
        last_dir += '_'
    short_fname = last_dir + fname
    src_file = os.path.join(src_dir, fname)
    example_file = os.path.join(target_dir, fname)
    shutil.copyfile(src_file, example_file)

    # The following is a list containing all the figure names
    figure_list = []

    image_dir = os.path.join(target_dir, 'images')
    thumb_dir = os.path.join(image_dir, 'thumb')
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    if not os.path.exists(thumb_dir):
        os.makedirs(thumb_dir)
    image_path = os.path.join(image_dir, image_fname)
    stdout_path = os.path.join(image_dir,
                               'stdout_%s.txt' % base_image_name)
    time_path = os.path.join(image_dir,
                             'time_%s.txt' % base_image_name)
    thumb_file = os.path.join(thumb_dir, base_image_name + '.png')
    time_elapsed = 0
    if plot_gallery and fname.startswith('plot'):
        # generate the plot as png image if file name
        # starts with plot and if it is more recent than an
        # existing image.
        first_image_file = image_path % 1
        if os.path.exists(stdout_path):
            stdout = open(stdout_path).read()
        else:
            stdout = ''
        if os.path.exists(time_path):
            time_elapsed = float(open(time_path).read())

        if not os.path.exists(first_image_file) or \
           os.stat(first_image_file).st_mtime <= os.stat(src_file).st_mtime:
            # We need to execute the code
            print('plotting %s' % fname)
            t0 = time()
            import matplotlib.pyplot as plt
            plt.close('all')
            cwd = os.getcwd()
            try:
                # First CD in the original example dir, so that any file
                # created by the example get created in this directory
                orig_stdout = sys.stdout
                os.chdir(os.path.dirname(src_file))
                my_buffer = StringIO()
                my_stdout = Tee(sys.stdout, my_buffer)
                sys.stdout = my_stdout
                my_globals = {'pl': plt}
                execfile(os.path.basename(src_file), my_globals)
                time_elapsed = time() - t0
                sys.stdout = orig_stdout
                my_stdout = my_buffer.getvalue()

                if '__doc__' in my_globals:
                    # The __doc__ is often printed in the example, we
                    # don't with to echo it
                    my_stdout = my_stdout.replace(
                        my_globals['__doc__'],
                        '')
                my_stdout = my_stdout.strip().expandtabs()
                if my_stdout:
                    stdout = '**Script output**::\n\n  %s\n\n' % (
                        '\n  '.join(my_stdout.split('\n')))
                open(stdout_path, 'w').write(stdout)
                open(time_path, 'w').write('%f' % time_elapsed)
                os.chdir(cwd)

                # In order to save every figure we have two solutions :
                # * iterate from 1 to infinity and call plt.fignum_exists(n)
                #   (this requires the figures to be numbered
                #    incrementally: 1, 2, 3 and not 1, 2, 5)
                # * iterate over [fig_mngr.num for fig_mngr in
                #   matplotlib._pylab_helpers.Gcf.get_all_fig_managers()]
                fig_managers = matplotlib._pylab_helpers.Gcf.get_all_fig_managers()
                for fig_mngr in fig_managers:
                    # Set the fig_num figure as the current figure as we can't
                    # save a figure that's not the current figure.
                    fig = plt.figure(fig_mngr.num)
                    kwargs = {}
                    to_rgba = matplotlib.colors.colorConverter.to_rgba
                    for attr in ['facecolor', 'edgecolor']:
                        fig_attr = getattr(fig, 'get_' + attr)()
                        default_attr = matplotlib.rcParams['figure.' + attr]
                        if to_rgba(fig_attr) != to_rgba(default_attr):
                            kwargs[attr] = fig_attr

                    fig.savefig(image_path % fig_mngr.num, **kwargs)
                    figure_list.append(image_fname % fig_mngr.num)
            except:
                print(80 * '_')
                print('%s is not compiling:' % fname)
                traceback.print_exc()
                print(80 * '_')
            finally:
                os.chdir(cwd)
                sys.stdout = orig_stdout

            print(" - time elapsed : %.2g sec" % time_elapsed)
        else:
            figure_list = [f[len(image_dir):]
                           for f in glob.glob(image_path.replace("%03d",
                                                '[0-9][0-9][0-9]'))]
        figure_list.sort()

        # generate thumb file
        this_template = plot_rst_template
        car_thumb_path = os.path.join(os.path.split(root_dir)[0], '_build/html/stable/_images/')
        # Note: normaly, make_thumbnail is used to write to the path contained in `thumb_file`
        # which is within `auto_examples/../images/thumbs` depending on the example.
        # Because the carousel has different dimensions than those of the examples gallery,
        # I did not simply reuse them all as some contained whitespace due to their default gallery
        # thumbnail size. Below, for a few cases, seperate thumbnails are created (the originals can't
        # just be overwritten with the carousel dimensions as it messes up the examples gallery layout).
        # The special carousel thumbnails are written directly to _build/html/stable/_images/,
        # as for some reason unknown to me, Sphinx refuses to copy my 'extra' thumbnails from the
        # auto examples gallery to the _build folder. This works fine as is, but it would be cleaner to
        # have it happen with the rest. Ideally the should be written to 'thumb_file' as well, and then
        # copied to the _images folder during the `Copying Downloadable Files` step like the rest.
        if not os.path.exists(car_thumb_path):
            os.makedirs(car_thumb_path)
        if os.path.exists(first_image_file):
            # We generate extra special thumbnails for the carousel
            carousel_tfile = os.path.join(car_thumb_path, base_image_name + '_carousel.png')
            first_img = image_fname % 1
            if first_img in carousel_thumbs:
                make_thumbnail((image_path % carousel_thumbs[first_img][0]),
                               carousel_tfile, carousel_thumbs[first_img][1], 190)
            make_thumbnail(first_image_file, thumb_file, 400, 280)

    if not os.path.exists(thumb_file):
        # create something to replace the thumbnail
        make_thumbnail('images/no_image.png', thumb_file, 200, 140)

    docstring, short_desc, end_row = extract_docstring(example_file)

    # Depending on whether we have one or more figures, we're using a
    # horizontal list or a single rst call to 'image'.
    if len(figure_list) == 1:
        figure_name = figure_list[0]
        image_list = SINGLE_IMAGE % figure_name.lstrip('/')
    else:
        image_list = HLIST_HEADER
        for figure_name in figure_list:
            image_list += HLIST_IMAGE_TEMPLATE % figure_name.lstrip('/')

    time_m, time_s = divmod(time_elapsed, 60)
    f = open(os.path.join(target_dir, base_image_name + '.rst'), 'w')
    f.write(this_template % locals())
    f.flush()

    # save variables so we can later add links to the documentation
    if six.PY2:
        example_code_obj = identify_names(open(example_file).read())
    else:
        example_code_obj = \
            identify_names(open(example_file, encoding='utf-8').read())
    if example_code_obj:
        codeobj_fname = example_file[:-3] + '_codeobj.pickle'
        with open(codeobj_fname, 'wb') as fid:
            pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)

    backrefs = set('{module_short}.{name}'.format(**entry)
                   for entry in example_code_obj.values()
                   if entry['module'].startswith('sklearn'))
    return backrefs

Example 37

Project: scikit-video
Source File: gen_rst.py
View license
def generate_file_rst(fname, target_dir, src_dir, root_dir, plot_gallery):
    """ Generate the rst file for a given example.

    Returns the set of sklearn functions/classes imported in the example.
    """
    base_image_name = os.path.splitext(fname)[0]
    image_fname = '%s_%%03d.png' % base_image_name

    this_template = rst_template
    last_dir = os.path.split(src_dir)[-1]
    # to avoid leading . in file names, and wrong names in links
    if last_dir == '.' or last_dir == 'examples':
        last_dir = ''
    else:
        last_dir += '_'
    short_fname = last_dir + fname
    src_file = os.path.join(src_dir, fname)
    example_file = os.path.join(target_dir, fname)
    shutil.copyfile(src_file, example_file)

    # The following is a list containing all the figure names
    figure_list = []

    image_dir = os.path.join(target_dir, 'images')
    thumb_dir = os.path.join(image_dir, 'thumb')
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    if not os.path.exists(thumb_dir):
        os.makedirs(thumb_dir)
    image_path = os.path.join(image_dir, image_fname)
    stdout_path = os.path.join(image_dir,
                               'stdout_%s.txt' % base_image_name)
    time_path = os.path.join(image_dir,
                             'time_%s.txt' % base_image_name)
    thumb_file = os.path.join(thumb_dir, base_image_name + '.png')
    time_elapsed = 0
    if plot_gallery and fname.startswith('plot'):
        # generate the plot as png image if file name
        # starts with plot and if it is more recent than an
        # existing image.
        first_image_file = image_path % 1
        if os.path.exists(stdout_path):
            stdout = open(stdout_path).read()
        else:
            stdout = ''
        if os.path.exists(time_path):
            time_elapsed = float(open(time_path).read())

        if not os.path.exists(first_image_file) or \
           os.stat(first_image_file).st_mtime <= os.stat(src_file).st_mtime:
            # We need to execute the code
            print('plotting %s' % fname)
            t0 = time()
            import matplotlib.pyplot as plt
            plt.close('all')
            cwd = os.getcwd()
            try:
                # First CD in the original example dir, so that any file
                # created by the example get created in this directory
                orig_stdout = sys.stdout
                os.chdir(os.path.dirname(src_file))
                my_buffer = StringIO()
                my_stdout = Tee(sys.stdout, my_buffer)
                sys.stdout = my_stdout
                my_globals = {'pl': plt}
                execfile(os.path.basename(src_file), my_globals)
                time_elapsed = time() - t0
                sys.stdout = orig_stdout
                my_stdout = my_buffer.getvalue()

                if '__doc__' in my_globals:
                    # The __doc__ is often printed in the example, we
                    # don't with to echo it
                    my_stdout = my_stdout.replace(
                        my_globals['__doc__'],
                        '')
                my_stdout = my_stdout.strip().expandtabs()
                if my_stdout:
                    stdout = '**Script output**::\n\n  %s\n\n' % (
                        '\n  '.join(my_stdout.split('\n')))
                open(stdout_path, 'w').write(stdout)
                open(time_path, 'w').write('%f' % time_elapsed)
                os.chdir(cwd)

                # In order to save every figure we have two solutions :
                # * iterate from 1 to infinity and call plt.fignum_exists(n)
                #   (this requires the figures to be numbered
                #    incrementally: 1, 2, 3 and not 1, 2, 5)
                # * iterate over [fig_mngr.num for fig_mngr in
                #   matplotlib._pylab_helpers.Gcf.get_all_fig_managers()]
                fig_managers = matplotlib._pylab_helpers.Gcf.get_all_fig_managers()
                for fig_mngr in fig_managers:
                    # Set the fig_num figure as the current figure as we can't
                    # save a figure that's not the current figure.
                    fig = plt.figure(fig_mngr.num)
                    kwargs = {}
                    to_rgba = matplotlib.colors.colorConverter.to_rgba
                    for attr in ['facecolor', 'edgecolor']:
                        fig_attr = getattr(fig, 'get_' + attr)()
                        default_attr = matplotlib.rcParams['figure.' + attr]
                        if to_rgba(fig_attr) != to_rgba(default_attr):
                            kwargs[attr] = fig_attr

                    fig.savefig(image_path % fig_mngr.num, **kwargs)
                    figure_list.append(image_fname % fig_mngr.num)
            except:
                print(80 * '_')
                print('%s is not compiling:' % fname)
                traceback.print_exc()
                print(80 * '_')
            finally:
                os.chdir(cwd)
                sys.stdout = orig_stdout

            print(" - time elapsed : %.2g sec" % time_elapsed)
        else:
            figure_list = [f[len(image_dir):]
                           for f in glob.glob(image_path.replace("%03d",
                                                '[0-9][0-9][0-9]'))]
        figure_list.sort()

        # generate thumb file
        this_template = plot_rst_template
        car_thumb_path = os.path.join(os.path.split(root_dir)[0], '_build/html/stable/_images/')
        # Note: normaly, make_thumbnail is used to write to the path contained in `thumb_file`
        # which is within `auto_examples/../images/thumbs` depending on the example.
        # Because the carousel has different dimensions than those of the examples gallery,
        # I did not simply reuse them all as some contained whitespace due to their default gallery
        # thumbnail size. Below, for a few cases, seperate thumbnails are created (the originals can't
        # just be overwritten with the carousel dimensions as it messes up the examples gallery layout).
        # The special carousel thumbnails are written directly to _build/html/stable/_images/,
        # as for some reason unknown to me, Sphinx refuses to copy my 'extra' thumbnails from the
        # auto examples gallery to the _build folder. This works fine as is, but it would be cleaner to
        # have it happen with the rest. Ideally the should be written to 'thumb_file' as well, and then
        # copied to the _images folder during the `Copying Downloadable Files` step like the rest.
        if not os.path.exists(car_thumb_path):
            os.makedirs(car_thumb_path)
        if os.path.exists(first_image_file):
            # We generate extra special thumbnails for the carousel
            carousel_tfile = os.path.join(car_thumb_path, base_image_name + '_carousel.png')
            first_img = image_fname % 1
            if first_img in carousel_thumbs:
                make_thumbnail((image_path % carousel_thumbs[first_img][0]),
                               carousel_tfile, carousel_thumbs[first_img][1], 190)
            make_thumbnail(first_image_file, thumb_file, 400, 280)

    if not os.path.exists(thumb_file):
        # create something to replace the thumbnail
        make_thumbnail('images/no_image.png', thumb_file, 200, 140)

    docstring, short_desc, end_row = extract_docstring(example_file)

    # Depending on whether we have one or more figures, we're using a
    # horizontal list or a single rst call to 'image'.
    if len(figure_list) == 1:
        figure_name = figure_list[0]
        image_list = SINGLE_IMAGE % figure_name.lstrip('/')
    else:
        image_list = HLIST_HEADER
        for figure_name in figure_list:
            image_list += HLIST_IMAGE_TEMPLATE % figure_name.lstrip('/')

    time_m, time_s = divmod(time_elapsed, 60)
    f = open(os.path.join(target_dir, base_image_name + '.rst'), 'w')
    f.write(this_template % locals())
    f.flush()

    # save variables so we can later add links to the documentation
    if six.PY2:
        example_code_obj = identify_names(open(example_file).read())
    else:
        example_code_obj = \
            identify_names(open(example_file, encoding='utf-8').read())
    if example_code_obj:
        codeobj_fname = example_file[:-3] + '_codeobj.pickle'
        with open(codeobj_fname, 'wb') as fid:
            pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)

    backrefs = set('{module_short}.{name}'.format(**entry)
                   for entry in example_code_obj.values()
                   if entry['module'].startswith('sklearn'))
    return backrefs

Example 38

Project: scikit-video
Source File: avconv.py
View license
    def __init__(self, filename, inputdict=None, outputdict=None, verbosity=0):
        """Initializes libav in reading mode with the given parameters

        During initialization, additional parameters about the video file
        are parsed using :func:`skvideo.io.avprobe`. Then avconv is launched
        as a subprocess. Parameters passed into inputdict are parsed and
        used to set as internal variables about the video. If the parameter,
        such as "Height" is not found in the inputdict, it is found through
        scanning the file's header information. If not in the header, avprobe
        is used to decode the file to determine the information. In the case
        that the information is not supplied and connot be inferred from the
        input file, a ValueError exception is thrown.

        Parameters
        ----------
        filename : string
            Video file path

        inputdict : dict
            Input dictionary parameters, i.e. how to interpret the input file.

        outputdict : dict
            Output dictionary parameters, i.e. how to encode the data 
            when sending back to the python process.

        Returns
        -------
        none

        """
        # check if avconv exists in the path
        assert _HAS_AVCONV, "Cannot find installation of libav (which comes with avprobe)."


        israw = 0

        if not inputdict:
            inputdict = {}

        if not outputdict:
            outputdict = {}

        # General information
        _, self.extension = os.path.splitext(filename)
        self.size = os.path.getsize(filename)
        self.probeInfo = avprobe(filename)

        viddict = {}
        if "video" in self.probeInfo:
            viddict = self.probeInfo["video"]

        self.inputfps = -1
        if ("-r" in inputdict):
            self.inputfps = np.int(inputdict["-r"])
        elif "avg_frame_rate" in viddict:
            # check for the slash
            frtxt = viddict["avg_frame_rate"]
            parts = frtxt.split('/') 
            if len(parts) > 1:
                self.inputfps = np.float(parts[0])/np.float(parts[1])
            else:
                self.inputfps = np.float(frtxt)
        else:
            # simply default to a common 25 fps and warn
            self.inputfps = 25
            # No input frame rate detected. Assuming 25 fps. Consult documentation on I/O if this is not desired.

        # if we don't have width or height at all, raise exception
        if ("-s" in inputdict):
            widthheight = inputdict["-s"].split('x')
            self.inputwidth = np.int(widthheight[0])
            self.inputheight = np.int(widthheight[1])
        elif (("width" in viddict) and ("height" in viddict)):
            self.inputwidth = np.int(viddict["width"])
            self.inputheight = np.int(viddict["height"])
        else:
            raise ValueError("No way to determine width or height from video. Need `-s` in `inputdict`. Consult documentation on I/O.")

        self.bpp = -1 # bits per pixel
        self.pix_fmt = ""
        # completely unsure of this:
        if ("-pix_fmt" in inputdict):
            self.pix_fmt = inputdict["-pix_fmt"]
        elif ("pix_fmt" in viddict):
            # parse this bpp
            self.pix_fmt = viddict["pix_fmt"]
        else:
            self.pix_fmt = "yuvj444p"
            if verbosity != 0:
                warnings.warn("No input color space detected. Assuming yuvj444p.", UserWarning)

        self.inputdepth = np.int(bpplut[self.pix_fmt][0])
        self.bpp = np.int(bpplut[self.pix_fmt][1])

        if (self.extension == ".yuv"):
            israw = 1

        if ("-vframes" in outputdict):
            self.inputframenum = np.int(outputdict["-vframes"])
        elif ("nb_frames" in viddict):
            self.inputframenum = np.int(viddict["nb_frames"])
        elif israw == 1:
            # we can compute it based on the input size and color space
            self.inputframenum = np.int(self.size / (self.inputwidth * self.inputheight * (self.bpp/8.0)))
        else:
            self.inputframenum = -1
            if verbosity != 0:
                warnings.warn("Cannot determine frame count. Scanning input file, this is slow when repeated many times. Need `-vframes` in inputdict. Consult documentation on I/O.", UserWarning) 

        if israw != 0:
            inputdict['-pix_fmt'] = self.pix_fmt

        self._filename = filename

        if '-f' not in outputdict:
            outputdict['-f'] = "rawvideo"

        if '-pix_fmt' not in outputdict:
            outputdict['-pix_fmt'] = "rgb24"

        if '-s' in outputdict:
            widthheight = outputdict["-s"].split('x')
            self.outputwidth = np.int(widthheight[0])
            self.outputheight = np.int(widthheight[1])
        else:
            self.outputwidth = self.inputwidth
            self.outputheight = self.inputheight

        self.outputdepth = np.int(bpplut[outputdict['-pix_fmt']][0])
        self.outputbpp = np.int(bpplut[outputdict['-pix_fmt']][1])


        # Create input args
        iargs = []
        for key in inputdict.keys():
            iargs.append(key)
            iargs.append(inputdict[key])

        oargs = []
        for key in outputdict.keys():
            oargs.append(key)
            oargs.append(outputdict[key])

        if self.inputframenum == -1:
            # open process with supplied arguments,
            # grabbing number of frames using ffprobe
            probecmd = [_AVCONV_PATH + "/avprobe"] + ["-v", "error", "-count_frames", "-select_streams", "v:0", "-show_entries", "stream=nb_read_frames", "-of", "default=nokey=1:noprint_wrappers=1", self._filename]
            self.inputframenum = np.int(check_output(probecmd).split('\n')[0])

        # Create process

        if verbosity == 0:
            cmd = [_AVCONV_PATH + "/avconv", "-nostats", "-loglevel", "0"] + iargs + ['-i', self._filename] + oargs + ['pipe:']
            self._proc = sp.Popen(cmd, stdin=sp.PIPE,
                                  stdout=sp.PIPE, stderr=sp.PIPE)
        else:
            cmd = [_AVCONV_PATH + "/avconv"] + iargs + ['-i', self._filename] + oargs + ['pipe:']
            print(cmd)
            self._proc = sp.Popen(cmd, stdin=sp.PIPE,
                                  stdout=sp.PIPE, stderr=None)

Example 39

Project: scikit-video
Source File: ffmpeg.py
View license
    def __init__(self, filename, inputdict=None, outputdict=None, verbosity=0):
        """Initializes FFmpeg in reading mode with the given parameters

        During initialization, additional parameters about the video file
        are parsed using :func:`skvideo.io.ffprobe`. Then FFmpeg is launched
        as a subprocess. Parameters passed into inputdict are parsed and
        used to set as internal variables about the video. If the parameter,
        such as "Height" is not found in the inputdict, it is found through
        scanning the file's header information. If not in the header, ffprobe
        is used to decode the file to determine the information. In the case
        that the information is not supplied and connot be inferred from the
        input file, a ValueError exception is thrown.

        Parameters
        ----------
        filename : string
            Video file path

        inputdict : dict
            Input dictionary parameters, i.e. how to interpret the input file.

        outputdict : dict
            Output dictionary parameters, i.e. how to encode the data 
            when sending back to the python process.

        Returns
        -------
        none

        """
        # check if FFMPEG exists in the path
        assert _HAS_FFMPEG, "Cannot find installation of real FFmpeg (which comes with ffprobe)."

        israw = 0

        if not inputdict:
            inputdict = {}

        if not outputdict:
            outputdict = {}

        # General information
        _, self.extension = os.path.splitext(filename)


        self.size = os.path.getsize(filename)
        self.probeInfo = ffprobe(filename)

        viddict = {}
        if "video" in self.probeInfo:
            viddict = self.probeInfo["video"]

        self.inputfps = -1
        if ("-r" in inputdict):
            self.inputfps = np.int(inputdict["-r"])
        elif "@r_frame_rate" in viddict:
            # check for the slash
            frtxt = viddict["@r_frame_rate"]
            parts = frtxt.split('/')
            if len(parts) > 1:
                self.inputfps = np.float(parts[0])/np.float(parts[1])
            else:
                self.inputfps = np.float(frtxt)
        else:
            # simply default to a common 25 fps and warn
            self.inputfps = 25
            # No input frame rate detected. Assuming 25 fps. Consult documentation on I/O if this is not desired.

        # if we don't have width or height at all, raise exception
        if ("-s" in inputdict):
            widthheight = inputdict["-s"].split('x')
            self.inputwidth = np.int(widthheight[0])
            self.inputheight = np.int(widthheight[1])
        elif (("@width" in viddict) and ("@height" in viddict)):
            self.inputwidth = np.int(viddict["@width"])
            self.inputheight = np.int(viddict["@height"])
        else:
            raise ValueError("No way to determine width or height from video. Need `-s` in `inputdict`. Consult documentation on I/O.")

        self.bpp = -1 # bits per pixel
        self.pix_fmt = ""
        # completely unsure of this:
        if ("-pix_fmt" in inputdict):
            self.pix_fmt = inputdict["-pix_fmt"]
        elif ("@pix_fmt" in viddict):
            # parse this bpp
            self.pix_fmt = viddict["@pix_fmt"]
        else:
            self.pix_fmt = "yuvj444p"
            if verbosity != 0:
                warnings.warn("No input color space detected. Assuming yuvj420p.", UserWarning)

        self.inputdepth = np.int(bpplut[self.pix_fmt][0])
        self.bpp = np.int(bpplut[self.pix_fmt][1])

        if (str.encode(self.extension) in [b".raw", b".yuv"]):
            israw = 1

        if ("-vframes" in outputdict):
            self.inputframenum = np.int(outputdict["-vframes"])
        elif ("@nb_frames" in viddict):
            self.inputframenum = np.int(viddict["@nb_frames"])
        elif israw == 1:
            # we can compute it based on the input size and color space
            self.inputframenum = np.int(self.size / (self.inputwidth * self.inputheight * (self.bpp/8.0)))
        else:
            self.inputframenum = -1
            if verbosity != 0:
                warnings.warn("Cannot determine frame count. Scanning input file, this is slow when repeated many times. Need `-vframes` in inputdict. Consult documentation on I/O.", UserWarning) 

        if israw != 0:
            inputdict['-pix_fmt'] = self.pix_fmt
        else:
            # check that the extension makes sense
            assert str.encode(self.extension).lower() in _FFMPEG_SUPPORTED_DECODERS, "Unknown decoder extension: " + self.extension.lower()

        self._filename = filename

        if '-f' not in outputdict:
            outputdict['-f'] = "image2pipe"

        if '-pix_fmt' not in outputdict:
            outputdict['-pix_fmt'] = "rgb24"

        if '-s' in outputdict:
            widthheight = outputdict["-s"].split('x')
            self.outputwidth = np.int(widthheight[0])
            self.outputheight = np.int(widthheight[1])
        else:
            self.outputwidth = self.inputwidth
            self.outputheight = self.inputheight


        self.outputdepth = np.int(bpplut[outputdict['-pix_fmt']][0])
        self.outputbpp = np.int(bpplut[outputdict['-pix_fmt']][1])

        if '-vcodec' not in outputdict:
            outputdict['-vcodec'] = "rawvideo"

        # Create input args
        iargs = []
        for key in inputdict.keys():
            iargs.append(key)
            iargs.append(inputdict[key])

        oargs = []
        for key in outputdict.keys():
            oargs.append(key)
            oargs.append(outputdict[key])

        if self.inputframenum == -1:
            # open process with supplied arguments,
            # grabbing number of frames using ffprobe
            probecmd = [_FFMPEG_PATH + "/ffprobe"] + ["-v", "error", "-count_frames", "-select_streams", "v:0", "-show_entries", "stream=nb_read_frames", "-of", "default=nokey=1:noprint_wrappers=1", self._filename]
            self.inputframenum = np.int(check_output(probecmd).split('\n')[0])

        # Create process

        if verbosity == 0:
            cmd = [_FFMPEG_PATH + "/ffmpeg", "-nostats", "-loglevel", "0"] + iargs + ['-i', self._filename] + oargs + ['-']
            self._proc = sp.Popen(cmd, stdin=sp.PIPE,
                                  stdout=sp.PIPE, stderr=sp.PIPE)
        else:
            cmd = [_FFMPEG_PATH + "/ffmpeg"] + iargs + ['-i', self._filename] + oargs + ['-']
            print(cmd)
            self._proc = sp.Popen(cmd, stdin=sp.PIPE,
                                  stdout=sp.PIPE, stderr=None)

Example 40

Project: gmusicapi
Source File: musicmanager.py
View license
    @classmethod
    def fill_track_info(cls, filepath):
        """Given the path and contents of a track, return a filled locker_pb2.Track.
        On problems, raise ValueError."""
        track = locker_pb2.Track()

        # The track protobuf message supports an additional metadata list field.
        # ALBUM_ART_HASH has been observed being sent in this field so far.
        # Append locker_pb2.AdditionalMetadata objects to additional_metadata.
        # AdditionalMetadata objects consist of two fields, 'tag_name' and 'value'.
        additional_metadata = []

        track.client_id = cls.get_track_clientid(filepath)

        extension = os.path.splitext(filepath)[1].upper()

        if isinstance(extension, bytes):
            extension = extension.decode('utf8')

        if extension:
            # Trim leading period if it exists (ie extension not empty).
            extension = extension[1:]

        if extension.upper() == 'M4B':
            # M4B are supported by the music manager, and transcoded like normal.
            extension = 'M4A'

        if not hasattr(locker_pb2.Track, extension):
            raise ValueError("unsupported filetype")

        track.original_content_type = getattr(locker_pb2.Track, extension)

        track.estimated_size = os.path.getsize(filepath)
        track.last_modified_timestamp = int(os.path.getmtime(filepath))

        # These are typically zeroed in my examples.
        track.play_count = 0
        track.client_date_added = 0
        track.recent_timestamp = 0
        track.rating = locker_pb2.Track.NOT_RATED  # star rating

        # Populate information about the encoding.
        audio = mutagen.File(filepath, easy=True)
        if audio is None:
            raise ValueError("could not open to read metadata")
        elif isinstance(audio, mutagen.asf.ASF):
            # WMA entries store more info than just the value.
            # Monkeypatch in a dict {key: value} to keep interface the same for all filetypes.
            asf_dict = dict((k, [ve.value for ve in v]) for (k, v) in audio.tags.as_dict().items())
            audio.tags = asf_dict

        track.duration_millis = int(audio.info.length * 1000)

        try:
            bitrate = audio.info.bitrate // 1000
        except AttributeError:
            # mutagen doesn't provide bitrate for some lossless formats (eg FLAC), so
            # provide an estimation instead. This shouldn't matter too much;
            # the bitrate will always be > 320, which is the highest scan and match quality.
            bitrate = (track.estimated_size * 8) // track.duration_millis

        track.original_bit_rate = bitrate

        # Populate metadata.

        def track_set(field_name, val, msg=track):
            """Returns result of utils.pb_set and logs on failures.
            Should be used when setting directly from metadata."""
            success = utils.pb_set(msg, field_name, val)

            if not success:
                log.info("could not pb_set track.%s = %r for '%r'", field_name, val, filepath)

            return success

        # Title is required.
        # If it's not in the metadata, the filename will be used.
        if "title" in audio:
            title = audio['title'][0]
            if isinstance(title, mutagen.asf.ASFUnicodeAttribute):
                title = title.value

            track_set('title', title)
        else:
            # Assume ascii or unicode.
            track.title = os.path.basename(filepath)

        if "date" in audio:
            date_val = str(audio['date'][0])
            try:
                datetime = dateutil.parser.parse(date_val, fuzzy=True)
            except (ValueError, TypeError) as e:
                # TypeError provides compatibility with:
                #  https://bugs.launchpad.net/dateutil/+bug/1247643
                log.warning("could not parse date md for '%r': (%s)", filepath, e)
            else:
                track_set('year', datetime.year)

        for null_field in ['artist', 'album']:
            # If these fields aren't provided, they'll render as "undefined" in the web interface;
            # see https://github.com/simon-weber/gmusicapi/issues/236.
            # Defaulting them to an empty string fixes this.
            if null_field not in audio:
                track_set(null_field, '')

        # Mass-populate the rest of the simple fields.
        # Merge shared and unshared fields into {mutagen: Track}.
        fields = dict(
            itertools.chain(
                ((shared, shared) for shared in cls.shared_fields),
                cls.field_map.items()))

        for mutagen_f, track_f in fields.items():
            if mutagen_f in audio:
                track_set(track_f, audio[mutagen_f][0])

        for mutagen_f, (track_f, track_total_f) in cls.count_fields.items():
            if mutagen_f in audio:
                numstrs = str(audio[mutagen_f][0]).split("/")
                track_set(track_f, numstrs[0])

                if len(numstrs) == 2 and numstrs[1]:
                    track_set(track_total_f, numstrs[1])

        if additional_metadata:
            track.track_extras.additional_metadata.extend(additional_metadata)

        return track

Example 41

Project: speedtest-cli
Source File: speedtest_cli.py
View license
def speedtest():
    """Run the full speedtest.net test"""

    global shutdown_event, source, scheme
    shutdown_event = threading.Event()

    signal.signal(signal.SIGINT, ctrl_c)

    description = (
        'Command line interface for testing internet bandwidth using '
        'speedtest.net.\n'
        '------------------------------------------------------------'
        '--------------\n'
        'https://github.com/sivel/speedtest-cli')

    parser = ArgParser(description=description)
    # Give optparse.OptionParser an `add_argument` method for
    # compatibility with argparse.ArgumentParser
    try:
        parser.add_argument = parser.add_option
    except AttributeError:
        pass
    parser.add_argument('--bytes', dest='units', action='store_const',
                        const=('byte', 1), default=('bit', 8),
                        help='Display values in bytes instead of bits. Does '
                             'not affect the image generated by --share')
    parser.add_argument('--share', action='store_true',
                        help='Generate and provide a URL to the speedtest.net '
                             'share results image')
    parser.add_argument('--simple', action='store_true',
                        help='Suppress verbose output, only show basic '
                             'information')
    parser.add_argument('--list', action='store_true',
                        help='Display a list of speedtest.net servers '
                             'sorted by distance')
    parser.add_argument('--server', help='Specify a server ID to test against')
    parser.add_argument('--mini', help='URL of the Speedtest Mini server')
    parser.add_argument('--source', help='Source IP address to bind to')
    parser.add_argument('--timeout', default=10, type=int,
                        help='HTTP timeout in seconds. Default 10')
    parser.add_argument('--secure', action='store_true',
                        help='Use HTTPS instead of HTTP when communicating '
                             'with speedtest.net operated servers')
    parser.add_argument('--version', action='store_true',
                        help='Show the version number and exit')

    options = parser.parse_args()
    if isinstance(options, tuple):
        args = options[0]
    else:
        args = options
    del options

    # Print the version and exit
    if args.version:
        version()

    socket.setdefaulttimeout(args.timeout)

    # Pre-cache the user agent string
    build_user_agent()

    # If specified bind to a specific IP address
    if args.source:
        source = args.source
        socket.socket = bound_socket

    if args.secure:
        scheme = 'https'

    if not args.simple:
        print_('Retrieving speedtest.net configuration...')
    try:
        config = getConfig()
    except URLError:
        print_('Cannot retrieve speedtest configuration')
        sys.exit(1)

    if not args.simple:
        print_('Retrieving speedtest.net server list...')
    if args.list or args.server:
        servers = closestServers(config['client'], True)
        if args.list:
            serverList = []
            for server in servers:
                line = ('%(id)4s) %(sponsor)s (%(name)s, %(country)s) '
                        '[%(d)0.2f km]' % server)
                serverList.append(line)
            print_('\n'.join(serverList).encode('utf-8', 'ignore'))
            sys.exit(0)
    else:
        servers = closestServers(config['client'])

    if not args.simple:
        print_('Testing from %(isp)s (%(ip)s)...' % config['client'])

    if args.server:
        try:
            best = getBestServer(filter(lambda x: x['id'] == args.server,
                                        servers))
        except IndexError:
            print_('Invalid server ID')
            sys.exit(1)
    elif args.mini:
        name, ext = os.path.splitext(args.mini)
        if ext:
            url = os.path.dirname(args.mini)
        else:
            url = args.mini
        urlparts = urlparse(url)
        try:
            request = build_request(args.mini)
            f = urlopen(request)
        except:
            print_('Invalid Speedtest Mini URL')
            sys.exit(1)
        else:
            text = f.read()
            f.close()
        extension = re.findall('upload_extension: "([^"]+)"', text.decode())
        if not extension:
            for ext in ['php', 'asp', 'aspx', 'jsp']:
                try:
                    request = build_request('%s/speedtest/upload.%s' %
                                            (args.mini, ext))
                    f = urlopen(request)
                except:
                    pass
                else:
                    data = f.read().strip()
                    if (f.code == 200 and
                            len(data.splitlines()) == 1 and
                            re.match('size=[0-9]', data)):
                        extension = [ext]
                        break
        if not urlparts or not extension:
            print_('Please provide the full URL of your Speedtest Mini server')
            sys.exit(1)
        servers = [{
            'sponsor': 'Speedtest Mini',
            'name': urlparts[1],
            'd': 0,
            'url': '%s/speedtest/upload.%s' % (url.rstrip('/'), extension[0]),
            'latency': 0,
            'id': 0
        }]
        try:
            best = getBestServer(servers)
        except:
            best = servers[0]
    else:
        if not args.simple:
            print_('Selecting best server based on latency...')
        best = getBestServer(servers)

    if not args.simple:
        print_(('Hosted by %(sponsor)s (%(name)s) [%(d)0.2f km]: '
               '%(latency)s ms' % best).encode('utf-8', 'ignore'))
    else:
        print_('Ping: %(latency)s ms' % best)

    sizes = [350, 500, 750, 1000, 1500, 2000, 2500, 3000, 3500, 4000]
    urls = []
    for size in sizes:
        for i in range(0, 4):
            urls.append('%s/random%sx%s.jpg' %
                        (os.path.dirname(best['url']), size, size))
    if not args.simple:
        print_('Testing download speed', end='')
    dlspeed = downloadSpeed(urls, args.simple)
    if not args.simple:
        print_()
    print_('Download: %0.2f M%s/s' %
           ((dlspeed / 1000 / 1000) * args.units[1], args.units[0]))

    sizesizes = [int(.25 * 1000 * 1000), int(.5 * 1000 * 1000)]
    sizes = []
    for size in sizesizes:
        for i in range(0, 25):
            sizes.append(size)
    if not args.simple:
        print_('Testing upload speed', end='')
    ulspeed = uploadSpeed(best['url'], sizes, args.simple)
    if not args.simple:
        print_()
    print_('Upload: %0.2f M%s/s' %
           ((ulspeed / 1000 / 1000) * args.units[1], args.units[0]))

    if args.share and args.mini:
        print_('Cannot generate a speedtest.net share results image while '
               'testing against a Speedtest Mini server')
    elif args.share:
        dlspeedk = int(round((dlspeed / 1000) * 8, 0))
        ping = int(round(best['latency'], 0))
        ulspeedk = int(round((ulspeed / 1000) * 8, 0))

        # Build the request to send results back to speedtest.net
        # We use a list instead of a dict because the API expects parameters
        # in a certain order
        apiData = [
            'download=%s' % dlspeedk,
            'ping=%s' % ping,
            'upload=%s' % ulspeedk,
            'promo=',
            'startmode=%s' % 'pingselect',
            'recommendedserverid=%s' % best['id'],
            'accuracy=%s' % 1,
            'serverid=%s' % best['id'],
            'hash=%s' % md5(('%s-%s-%s-%s' %
                             (ping, ulspeedk, dlspeedk, '297aae72'))
                            .encode()).hexdigest()]

        headers = {'Referer': 'http://c.speedtest.net/flash/speedtest.swf'}
        request = build_request('://www.speedtest.net/api/api.php',
                                data='&'.join(apiData).encode(),
                                headers=headers)
        f, e = catch_request(request)
        if e:
            print_('Could not submit results to speedtest.net: %s' % e)
            sys.exit(1)
        response = f.read()
        code = f.code
        f.close()

        if int(code) != 200:
            print_('Could not submit results to speedtest.net')
            sys.exit(1)

        qsargs = parse_qs(response.decode())
        resultid = qsargs.get('resultid')
        if not resultid or len(resultid) != 1:
            print_('Could not submit results to speedtest.net')
            sys.exit(1)

        print_('Share results: %s://www.speedtest.net/result/%s.png' %
               (scheme, resultid[0]))

Example 42

Project: sklearn-theano
Source File: gen_rst.py
View license
def generate_file_rst(fname, target_dir, src_dir, root_dir, plot_gallery):
    """ Generate the rst file for a given example.

    Returns the set of sklearn functions/classes imported in the example.
    """
    base_image_name = os.path.splitext(fname)[0]
    image_fname = '%s_%%03d.png' % base_image_name

    this_template = rst_template
    last_dir = os.path.split(src_dir)[-1]
    # to avoid leading . in file names, and wrong names in links
    if last_dir == '.' or last_dir == 'examples':
        last_dir = ''
    else:
        last_dir += '_'
    short_fname = last_dir + fname
    src_file = os.path.join(src_dir, fname)
    example_file = os.path.join(target_dir, fname)
    shutil.copyfile(src_file, example_file)

    # The following is a list containing all the figure names
    figure_list = []

    image_dir = os.path.join(target_dir, 'images')
    thumb_dir = os.path.join(image_dir, 'thumb')
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    if not os.path.exists(thumb_dir):
        os.makedirs(thumb_dir)
    image_path = os.path.join(image_dir, image_fname)
    stdout_path = os.path.join(image_dir,
                               'stdout_%s.txt' % base_image_name)
    time_path = os.path.join(image_dir,
                             'time_%s.txt' % base_image_name)
    thumb_file = os.path.join(thumb_dir, fname[:-3] + '.png')
    time_elapsed = 0
    time_m = 0
    time_s = 0
    if plot_gallery and fname.startswith('plot'):
        # generate the plot as png image if file name
        # starts with plot and if it is more recent than an
        # existing image.
        first_image_file = image_path % 1
        if os.path.exists(stdout_path):
            stdout = open(stdout_path).read()
        else:
            stdout = ''
        if os.path.exists(time_path):
            time_elapsed = float(open(time_path).read())

        if not os.path.exists(first_image_file) or \
           os.stat(first_image_file).st_mtime <= os.stat(src_file).st_mtime:
            # We need to execute the code
            print('plotting %s' % fname)
            t0 = time()
            import matplotlib.pyplot as plt
            plt.close('all')
            cwd = os.getcwd()
            try:
                # First CD in the original example dir, so that any file
                # created by the example get created in this directory
                orig_stdout = sys.stdout
                os.chdir(os.path.dirname(src_file))
                my_buffer = StringIO()
                my_stdout = Tee(sys.stdout, my_buffer)
                sys.stdout = my_stdout
                my_globals = {'pl': plt}
                execfile(os.path.basename(src_file), my_globals)
                time_elapsed = time() - t0
                sys.stdout = orig_stdout
                my_stdout = my_buffer.getvalue()

                if '__doc__' in my_globals:
                    # The __doc__ is often printed in the example, we
                    # don't with to echo it
                    my_stdout = my_stdout.replace(
                        my_globals['__doc__'],
                        '')
                my_stdout = my_stdout.strip()
                if my_stdout:
                    stdout = '**Script output**::\n\n  %s\n\n' % (
                        '\n  '.join(my_stdout.split('\n')))
                open(stdout_path, 'w').write(stdout)
                open(time_path, 'w').write('%f' % time_elapsed)
                os.chdir(cwd)

                # In order to save every figure we have two solutions :
                # * iterate from 1 to infinity and call plt.fignum_exists(n)
                #   (this requires the figures to be numbered
                #    incrementally: 1, 2, 3 and not 1, 2, 5)
                # * iterate over [fig_mngr.num for fig_mngr in
                #   matplotlib._pylab_helpers.Gcf.get_all_fig_managers()]
                fig_managers = matplotlib._pylab_helpers.Gcf.get_all_fig_managers()
                for fig_mngr in fig_managers:
                    # Set the fig_num figure as the current figure as we can't
                    # save a figure that's not the current figure.
                    plt.figure(fig_mngr.num)
                    plt.savefig(image_path % fig_mngr.num)
                    figure_list.append(image_fname % fig_mngr.num)
            except:
                print(80 * '_')
                print('%s is not compiling:' % fname)
                traceback.print_exc()
                print(80 * '_')
            finally:
                os.chdir(cwd)
                sys.stdout = orig_stdout

            print(" - time elapsed : %.2g sec" % time_elapsed)
        else:
            figure_list = [f[len(image_dir):]
                           for f in glob.glob(image_path.replace("%03d",
                                                '[0-9][0-9][0-9]'))]
        figure_list.sort()

        # generate thumb file
        this_template = plot_rst_template
        car_thumb_path = os.path.join(os.path.split(root_dir)[0], '_build/html/dev/_images/')
        # Note: normaly, make_thumbnail is used to write to the path contained in `thumb_file`
        # which is within `auto_examples/../images/thumbs` depending on the example.
        # Because the carousel has different dimensions than those of the examples gallery,
        # I did not simply reuse them all as some contained whitespace due to their default gallery
        # thumbnail size. Below, for a few cases, seperate thumbnails are created (the originals can't
        # just be overwritten with the carousel dimensions as it messes up the examples gallery layout).
        # The special carousel thumbnails are written directly to
        # _build/html/dev/_images/,
        # as for some reason unknown to me, Sphinx refuses to copy my 'extra' thumbnails from the
        # auto examples gallery to the _build folder. This works fine as is, but it would be cleaner to
        # have it happen with the rest. Ideally the should be written to 'thumb_file' as well, and then
        # copied to the _images folder during the `Copying Downloadable Files` step like the rest.
        if not os.path.exists(car_thumb_path):
            os.makedirs(car_thumb_path)
        if os.path.exists(first_image_file):
            # We generate extra special thumbnails for the carousel
            carousel_tfile = os.path.join(car_thumb_path, fname[:-3] + '_carousel.png')
            first_img = image_fname % 1
            if first_img in carousel_thumbs:
                make_thumbnail((image_path % carousel_thumbs[first_img][0]),
                               carousel_tfile, carousel_thumbs[first_img][1], 190)
            make_thumbnail(first_image_file, thumb_file, 400, 280)

    if not os.path.exists(thumb_file):
        # create something to replace the thumbnail
        make_thumbnail('images/no_image.png', thumb_file, 200, 140)

    docstring, short_desc, end_row = extract_docstring(example_file)

    # Depending on whether we have one or more figures, we're using a
    # horizontal list or a single rst call to 'image'.
    if len(figure_list) == 1:
        figure_name = figure_list[0]
        image_list = SINGLE_IMAGE % figure_name.lstrip('/')
    else:
        image_list = HLIST_HEADER
        for figure_name in figure_list:
            image_list += HLIST_IMAGE_TEMPLATE % figure_name.lstrip('/')

    time_m, time_s = divmod(time_elapsed, 60)
    f = open(os.path.join(target_dir, fname[:-2] + 'rst'), 'w')
    f.write(this_template % locals())
    f.flush()

    # save variables so we can later add links to the documentation
    example_code_obj = identify_names(open(example_file).read())
    if example_code_obj:
        codeobj_fname = example_file[:-3] + '_codeobj.pickle'
        with open(codeobj_fname, 'wb') as fid:
            pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)

    backrefs = set('{module_short}.{name}'.format(**entry)
                   for entry in example_code_obj.values()
                   if entry['module'].startswith('sklearn'))
    return backrefs

Example 43

Project: tumblr-utils
Source File: tumblr_backup.py
View license
    def backup(self, account):
        """makes single files and an index for every post on a public Tumblr blog account"""

        self.index = defaultdict(lambda: defaultdict(list))
        self.archives = []

        base = get_api_url(account)

        # make sure there are folders to save in
        global save_folder, media_folder, post_ext, post_dir, save_dir, have_custom_css
        if options.blosxom:
            save_folder = root_folder
            post_ext = '.txt'
            post_dir = os.curdir
            post_class = BlosxomPost
        else:
            save_folder = join(root_folder, options.outdir or account)
            media_folder = path_to(media_dir)
            if options.dirs:
                post_ext = ''
                save_dir = '../../'
                mkdir(path_to(post_dir), True)
            else:
                mkdir(save_folder, True)
            post_class = TumblrPost
            have_custom_css = os.access(path_to(custom_css), os.R_OK)

        self.post_count = 0

        # get the highest post id already saved
        ident_max = None
        if options.incremental:
            try:
                ident_max = max(
                    long(splitext(split(f)[1])[0])
                    for f in glob(path_to(post_dir, '*' + post_ext))
                )
                log(account, "Backing up posts after %d\r" % ident_max)
            except ValueError:  # max() arg is an empty sequence
                pass
        else:
            log(account, "Getting basic information\r")

        # start by calling the API with just a single post
        soup = apiparse(base, 1)
        if not soup:
            self.errors = True
            return

        # collect all the meta information
        resp = soup['response']
        blog = resp['blog']
        try:
            self.title = escape(blog['title'])
        except KeyError:
            self.title = account
        self.subtitle = blog['description']

        # use the meta information to create a HTML header
        TumblrPost.post_header = self.header(body_class='post')

        # find the post number limit to back up
        last_post = blog['posts']
        if options.count:
            last_post = min(last_post, options.count + options.skip)

        def _backup(posts):
            for p in sorted(posts, key=lambda x: x['id'], reverse=True):
                post = post_class(p)
                if ident_max and long(post.ident) <= ident_max:
                    return False
                if options.period:
                    if post.date >= options.p_stop:
                        continue
                    if post.date < options.p_start:
                        return False
                if options.request:
                    if post.typ not in options.request:
                        continue
                    tags = options.request[post.typ]
                    if not (TAG_ANY in tags or tags & post.tags_lower):
                        continue
                if options.no_reblog:
                    if 'reblogged_from_name' in p or 'reblogged_root_name' in p:
                        if 'trail' in p and not p['trail']:
                            continue
                        elif 'trail' in p and 'is_current_item' not in p['trail'][-1]:
                            continue
                    elif 'trail' in p and p['trail'] and 'is_current_item' not in p['trail'][-1]:
                        continue
                backup_pool.add_work(post.save_content)
                self.post_count += 1
            return True

        # start the thread pool
        backup_pool = ThreadPool()
        try:
            # Get the JSON entries from the API, which we can only do for max 50 posts at once.
            # Posts "arrive" in reverse chronological order. Post #0 is the most recent one.
            last_batch = MAX_POSTS
            i = options.skip
            while i < last_post:
                # find the upper bound
                j = min(i + MAX_POSTS, last_post)
                log(account, "Getting posts %d to %d of %d\r" % (i, j - 1, last_post))

                soup = apiparse(base, j - i, i)
                if soup is None:
                    i += last_batch     # try the next batch
                    self.errors = True
                    continue

                posts = soup['response']['posts']
                if not _backup(posts):
                    break

                last_batch = len(posts)
                i += last_batch
        except:
            # ensure proper thread pool termination
            backup_pool.cancel()
            raise

        # wait until all posts have been saved
        backup_pool.wait()

        # postprocessing
        if not options.blosxom and self.post_count:
            get_avatar()
            get_style()
            if not have_custom_css:
                save_style()
            self.build_index()
            self.save_index()

        log(account, "%d posts backed up\n" % self.post_count)
        self.total_count += self.post_count

Example 44

View license
def open_file(path_arg, mode='r'):
    """Decorator to ensure clean opening and closing of files.

    Parameters
    ----------
    path_arg : int
        Location of the path argument in args.  Even if the argument is a
        named positional argument (with a default value), you must specify its
        index as a positional argument.
    mode : str
        String for opening mode.

    Returns
    -------
    _open_file : function
        Function which cleanly executes the io.

    Examples
    --------
    Decorate functions like this::

       @open_file(0,'r')
       def read_function(pathname):
           pass

       @open_file(1,'w')
       def write_function(G,pathname):
           pass

       @open_file(1,'w')
       def write_function(G, pathname='graph.dot')
           pass

       @open_file('path', 'w+')
       def another_function(arg, **kwargs):
           path = kwargs['path']
           pass
    """
    # Note that this decorator solves the problem when a path argument is
    # specified as a string, but it does not handle the situation when the
    # function wants to accept a default of None (and then handle it).
    # Here is an example:
    #
    # @open_file('path')
    # def some_function(arg1, arg2, path=None):
    #    if path is None:
    #        fobj = tempfile.NamedTemporaryFile(delete=False)
    #        close_fobj = True
    #    else:
    #        # `path` could have been a string or file object or something
    #        # similar. In any event, the decorator has given us a file object
    #        # and it will close it for us, if it should.
    #        fobj = path
    #        close_fobj = False
    #
    #    try:
    #        fobj.write('blah')
    #    finally:
    #        if close_fobj:
    #            fobj.close()
    #
    # Normally, we'd want to use "with" to ensure that fobj gets closed.
    # However, recall that the decorator will make `path` a file object for
    # us, and using "with" would undesirably close that file object. Instead,
    # you use a try block, as shown above. When we exit the function, fobj will
    # be closed, if it should be, by the decorator.

    @decorator
    def _open_file(func, *args, **kwargs):

        # Note that since we have used @decorator, *args, and **kwargs have
        # already been resolved to match the function signature of func. This
        # means default values have been propagated. For example,  the function
        # func(x, y, a=1, b=2, **kwargs) if called as func(0,1,b=5,c=10) would
        # have args=(0,1,1,5) and kwargs={'c':10}.

        # First we parse the arguments of the decorator. The path_arg could
        # be an positional argument or a keyword argument.  Even if it is
        try:
            # path_arg is a required positional argument
            # This works precisely because we are using @decorator
            path = args[path_arg]
        except TypeError:
            # path_arg is a keyword argument. It is "required" in the sense
            # that it must exist, according to the decorator specification,
            # It can exist in `kwargs` by a developer specified default value
            # or it could have been explicitly set by the user.
            try:
                path = kwargs[path_arg]
            except KeyError:
                # Could not find the keyword. Thus, no default was specified
                # in the function signature and the user did not provide it.
                msg = 'Missing required keyword argument: {0}'
                raise nx.NetworkXError(msg.format(path_arg))
            else:
                is_kwarg = True
        except IndexError:
            # A "required" argument was missing. This can only happen if
            # the decorator of the function was incorrectly specified.
            # So this probably is not a user error, but a developer error.
            msg = "path_arg of open_file decorator is incorrect"
            raise nx.NetworkXError(msg)
        else:
            is_kwarg = False

        # Now we have the path_arg. There are two types of input to consider:
        #   1) string representing a path that should be opened
        #   2) an already opened file object
        if is_string_like(path):
            ext = splitext(path)[1]
            fobj = _dispatch_dict[ext](path, mode=mode)
            close_fobj = True
        elif hasattr(path, 'read'):
            # path is already a file-like object
            fobj = path
            close_fobj = False
        else:
            # could be None, in which case the algorithm will deal with it
            fobj = path
            close_fobj = False

        # Insert file object into args or kwargs.
        if is_kwarg:
            new_args = args
            kwargs[path_arg] = fobj
        else:
            # args is a tuple, so we must convert to list before modifying it.
            new_args = list(args)
            new_args[path_arg] = fobj

        # Finally, we call the original function, making sure to close the fobj.
        try:
            result = func(*new_args, **kwargs)
        finally:
            if close_fobj:
                fobj.close()

        return result

    return _open_file

Example 45

Project: maltrail
Source File: httpd.py
View license
def start_httpd(address=None, port=None, join=False, pem=None):
    """
    Starts HTTP server
    """

    class ThreadingServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
        def server_bind(self):
            self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            BaseHTTPServer.HTTPServer.server_bind(self)

        def finish_request(self, *args, **kwargs):
            try:
                BaseHTTPServer.HTTPServer.finish_request(self, *args, **kwargs)
            except:
                if config.SHOW_DEBUG:
                    traceback.print_exc()

    class SSLThreadingServer(ThreadingServer):
        def __init__(self, server_address, pem, HandlerClass):
            import OpenSSL  # python-openssl

            ThreadingServer.__init__(self, server_address, HandlerClass)
            ctx = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
            ctx.use_privatekey_file(pem)
            ctx.use_certificate_file(pem)
            self.socket = OpenSSL.SSL.Connection(ctx, socket.socket(self.address_family, self.socket_type))
            self.server_bind()
            self.server_activate()

        def shutdown_request(self, request):
            try:
                request.shutdown()
            except:
                if config.SHOW_DEBUG:
                    traceback.print_exc()

    class ReqHandler(BaseHTTPServer.BaseHTTPRequestHandler):
        def do_GET(self):
            path, query = self.path.split('?', 1) if '?' in self.path else (self.path, "")
            params = {}
            content = None
            skip = False

            if hasattr(self, "data"):
                params.update(urlparse.parse_qs(self.data))

            if query:
                params.update(urlparse.parse_qs(query))

            for key in params:
                if params[key]:
                    params[key] = params[key][-1]

            if path == '/':
                path = "index.html"

            path = path.strip('/')
            extension = os.path.splitext(path)[-1].lower()

            if hasattr(self, "_%s" % path):
                content = getattr(self, "_%s" % path)(params)

            else:
                path = path.replace('/', os.path.sep)
                path = os.path.abspath(os.path.join(HTML_DIR, path)).strip()

                if not os.path.isfile(path) and os.path.isfile("%s.html" % path):
                    path = "%s.html" % path

                if ".." not in os.path.relpath(path, HTML_DIR) and os.path.isfile(path) and (extension not in DISABLED_CONTENT_EXTENSIONS or os.path.split(path)[-1] in CONTENT_EXTENSIONS_EXCLUSIONS):
                    mtime = time.gmtime(os.path.getmtime(path))
                    if_modified_since = self.headers.get(HTTP_HEADER.IF_MODIFIED_SINCE)

                    if if_modified_since and extension not in (".htm", ".html"):
                        if_modified_since = [_ for _ in if_modified_since.split(';') if _.upper().endswith("GMT")][0]
                        if time.mktime(mtime) <= time.mktime(time.strptime(if_modified_since, HTTP_TIME_FORMAT)):
                            self.send_response(httplib.NOT_MODIFIED)
                            self.send_header(HTTP_HEADER.CONNECTION, "close")
                            skip = True

                    if not skip:
                        content = open(path, "rb").read()
                        last_modified = time.strftime(HTTP_TIME_FORMAT, mtime)
                        self.send_response(httplib.OK)
                        self.send_header(HTTP_HEADER.CONNECTION, "close")
                        self.send_header(HTTP_HEADER.CONTENT_TYPE, mimetypes.guess_type(path)[0] or "application/octet-stream")
                        self.send_header(HTTP_HEADER.LAST_MODIFIED, last_modified)
                        if extension not in (".htm", ".html"):
                            self.send_header(HTTP_HEADER.EXPIRES, "Sun, 17-Jan-2038 19:14:07 GMT")        # Reference: http://blog.httpwatch.com/2007/12/10/two-simple-rules-for-http-caching/
                            self.send_header(HTTP_HEADER.CACHE_CONTROL, "max-age=3600, must-revalidate")  # Reference: http://stackoverflow.com/a/5084555
                        else:
                            self.send_header(HTTP_HEADER.CACHE_CONTROL, "no-cache")

                else:
                    self.send_response(httplib.NOT_FOUND)
                    self.send_header(HTTP_HEADER.CONNECTION, "close")
                    content = '<!DOCTYPE html><html lang="en"><head><title>404 Not Found</title></head><body><h1>Not Found</h1><p>The requested URL %s was not found on this server.</p></body></html>' % self.path.split('?')[0]

            if content is not None:
                for match in re.finditer(r"<\!(\w+)\!>", content):
                    name = match.group(1)
                    _ = getattr(self, "_%s" % name.lower(), None)
                    if _:
                        content = self._format(content, **{ name: _() })

                if "gzip" in self.headers.getheader(HTTP_HEADER.ACCEPT_ENCODING, ""):
                    self.send_header(HTTP_HEADER.CONTENT_ENCODING, "gzip")
                    _ = cStringIO.StringIO()
                    compress = gzip.GzipFile("", "w+b", 9, _)
                    compress._stream = _
                    compress.write(content)
                    compress.flush()
                    compress.close()
                    content = compress._stream.getvalue()

                self.send_header(HTTP_HEADER.CONTENT_LENGTH, str(len(content)))

            self.end_headers()

            if content:
                self.wfile.write(content)

            self.wfile.flush()
            self.wfile.close()

        def do_POST(self):
            length = self.headers.getheader(HTTP_HEADER.CONTENT_LENGTH)
            data = self.rfile.read(int(length))
            data = urllib.unquote_plus(data)
            self.data = data
            self.do_GET()

        def get_session(self):
            retval = None
            cookie = self.headers.get(HTTP_HEADER.COOKIE)

            if cookie:
                match = re.search(r"%s\s*=\s*([^;]+)" % SESSION_COOKIE_NAME, cookie)
                if match:
                    session = match.group(1)
                    if session in SESSIONS:
                        if SESSIONS[session].client_ip != self.client_address[0]:
                            pass
                        elif SESSIONS[session].expiration > time.time():
                            retval = SESSIONS[session]
                        else:
                            del SESSIONS[session]

            return retval

        def delete_session(self):
            cookie = self.headers.get(HTTP_HEADER.COOKIE)

            if cookie:
                match = re.search(r"%s=(.+)" % SESSION_COOKIE_NAME, cookie)
                if match:
                    session = match.group(1)
                    if session in SESSIONS:
                        del SESSIONS[session]

        def version_string(self):
            return SERVER_HEADER

        def end_headers(self):
            if not hasattr(self, "_headers_ended"):
                BaseHTTPServer.BaseHTTPRequestHandler.end_headers(self)
                self._headers_ended = True

        def log_message(self, format, *args):
            return

        def finish(self):
            try:
                BaseHTTPServer.BaseHTTPRequestHandler.finish(self)
            except:
                if config.SHOW_DEBUG:
                    traceback.print_exc()

        def _version(self):
            return VERSION

        def _format(self, content, **params):
            if content:
                for key, value in params.items():
                    content = content.replace("<!%s!>" % key, value)

            return content

        def _login(self, params):
            valid = False

            if params.get("username") and params.get("hash") and params.get("nonce"):
                if params.get("nonce") not in DISPOSED_NONCES:
                    DISPOSED_NONCES.add(params.get("nonce"))
                    for entry in (config.USERS or []):
                        entry = re.sub(r"\s", "", entry)
                        username, stored_hash, uid, netfilter = entry.split(':')
                        if username == params.get("username"):
                            try:
                                if params.get("hash") == hashlib.sha256(stored_hash.strip() + params.get("nonce")).hexdigest():
                                    valid = True
                                    break
                            except:
                                if config.SHOW_DEBUG:
                                    traceback.print_exc()

            if valid:
                session_id = os.urandom(SESSION_ID_LENGTH).encode("hex")
                expiration = time.time() + 3600 * SESSION_EXPIRATION_HOURS

                self.send_response(httplib.OK)
                self.send_header(HTTP_HEADER.CONNECTION, "close")
                self.send_header(HTTP_HEADER.SET_COOKIE, "%s=%s; expires=%s; path=/; HttpOnly" % (SESSION_COOKIE_NAME, session_id, time.strftime(HTTP_TIME_FORMAT, time.gmtime(expiration))))

                if netfilter in ("", "0.0.0.0/0"):
                    netfilters = None
                else:
                    addresses = set()
                    netmasks = set()

                    for item in set(re.split(r"[;,]", netfilter)):
                        item = item.strip()
                        if '/' in item:
                            _ = item.split('/')[-1]
                            if _.isdigit() and int(_) >= 16:
                                lower = addr_to_int(item.split('/')[0])
                                mask = make_mask(int(_))
                                upper = lower | (0xffffffff ^ mask)
                                while lower <= upper:
                                    addresses.add(int_to_addr(lower))
                                    lower += 1
                            else:
                                netmasks.add(item)
                        elif '-' in item:
                            _ = item.split('-')
                            lower, upper = addr_to_int(_[0]), addr_to_int(_[1])
                            while lower <= upper:
                                addresses.add(int_to_addr(lower))
                                lower += 1
                        elif re.search(r"\d+\.\d+\.\d+\.\d+", item):
                            addresses.add(item)

                    netfilters = netmasks
                    if addresses:
                        netfilters.add(get_regex(addresses))

                SESSIONS[session_id] = AttribDict({"username": username, "uid": uid, "netfilters": netfilters, "expiration": expiration, "client_ip": self.client_address[0]})
            else:
                time.sleep(UNAUTHORIZED_SLEEP_TIME)
                self.send_response(httplib.UNAUTHORIZED)
                self.send_header(HTTP_HEADER.CONNECTION, "close")

            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")
            content = "Login %s" % ("success" if valid else "failed")

            if not subprocess.mswindows:
                try:
                    subprocess.check_output("logger -p auth.info -t \"%s[%d]\" \"%s password for %s from %s port %s\"" % (NAME.lower(), os.getpid(), "Accepted" if valid else "Failed", params.get("username"), self.client_address[0], self.client_address[1]), stderr=subprocess.STDOUT, shell=True)
                except Exception:
                    if config.SHOW_DEBUG:
                        traceback.print_exc()

            return content

        def _logout(self, params):
            self.delete_session()
            self.send_response(httplib.FOUND)
            self.send_header(HTTP_HEADER.CONNECTION, "close")
            self.send_header(HTTP_HEADER.LOCATION, "/")

        def _whoami(self, params):
            session = self.get_session()
            username = session.username if session else ""

            self.send_response(httplib.OK)
            self.send_header(HTTP_HEADER.CONNECTION, "close")
            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")

            return username

        def _check_ip(self, params):
            session = self.get_session()

            if session is None:
                self.send_response(httplib.UNAUTHORIZED)
                self.send_header(HTTP_HEADER.CONNECTION, "close")
                return None

            self.send_response(httplib.OK)
            self.send_header(HTTP_HEADER.CONNECTION, "close")
            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")

            try:
                result_worst = worst_asns(params.get("address"))
                if result_worst:
                    result_ipcat = result_worst
                else:
                    _ = (ipcat_lookup(params.get("address")) or "").lower().split(' ')
                    result_ipcat = _[1] if _[0] == 'the' else _[0]
                return ("%s" if not params.get("callback") else "%s(%%s)" % params.get("callback")) % json.dumps({"ipcat": result_ipcat, "worst_asns": str(result_worst is not None).lower()})
            except:
                if config.SHOW_DEBUG:
                    traceback.print_exc()

        def _trails(self, params):
            self.send_response(httplib.OK)
            self.send_header(HTTP_HEADER.CONNECTION, "close")
            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")

            return open(TRAILS_FILE, "rb").read()

        def _ping(self, params):
            self.send_response(httplib.OK)
            self.send_header(HTTP_HEADER.CONNECTION, "close")
            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")

            return PING_RESPONSE

        def _events(self, params):
            session = self.get_session()

            if session is None:
                self.send_response(httplib.UNAUTHORIZED)
                self.send_header(HTTP_HEADER.CONNECTION, "close")
                return None

            start, end, size, total = None, None, -1, None
            content = None
            log_exists = False
            dates = params.get("date", "")

            if ".." in dates:
                pass
            elif '_' not in dates:
                try:
                    date = datetime.datetime.strptime(dates, "%Y-%m-%d").strftime("%Y-%m-%d")
                    event_log_path = os.path.join(config.LOG_DIR, "%s.log" % date)
                    if os.path.exists(event_log_path):
                        range_handle = open(event_log_path, "rb")
                        log_exists = True
                except ValueError:
                    print "[!] invalid date format in request"
                    log_exists = False
            else:
                logs_data = ""
                date_interval = dates.split("_", 1)
                try:
                    start_date = datetime.datetime.strptime(date_interval[0], "%Y-%m-%d").date()
                    end_date = datetime.datetime.strptime(date_interval[1], "%Y-%m-%d").date()
                    for i in xrange(int((end_date - start_date).days) + 1):
                        date = start_date + datetime.timedelta(i)
                        event_log_path = os.path.join(config.LOG_DIR, "%s.log" % date.strftime("%Y-%m-%d"))
                        if os.path.exists(event_log_path):
                            log_handle = open(event_log_path, "rb")
                            logs_data += log_handle.read()
                            log_handle.close()

                    range_handle = io.BytesIO(logs_data)
                    log_exists = True
                except ValueError:
                    print "[!] invalid date format in request"
                    log_exists = False

            if log_exists:
                range_handle.seek(0, 2)
                total = range_handle.tell()
                range_handle.seek(0)

                if self.headers.get(HTTP_HEADER.RANGE):
                    match = re.search(r"bytes=(\d+)-(\d+)", self.headers[HTTP_HEADER.RANGE])
                    if match:
                        start, end = int(match.group(1)), int(match.group(2))
                        max_size = end - start + 1
                        end = min(total - 1, end)
                        size = end - start + 1

                        if start == 0 or not session.range_handle:
                            session.range_handle = range_handle

                        if session.netfilters is None:
                            session.range_handle.seek(start)
                            self.send_response(httplib.PARTIAL_CONTENT)
                            self.send_header(HTTP_HEADER.CONNECTION, "close")
                            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")
                            self.send_header(HTTP_HEADER.CONTENT_RANGE, "bytes %d-%d/%d" % (start, end, total))
                            content = session.range_handle.read(size)
                        else:
                            self.send_response(httplib.OK)
                            self.send_header(HTTP_HEADER.CONNECTION, "close")
                            self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")

                            buffer, addresses, netmasks, regex = cStringIO.StringIO(), set(), [], ""
                            for netfilter in session.netfilters:
                                if not netfilter:
                                    continue
                                if '/' in netfilter:
                                    netmasks.append(netfilter)
                                elif re.search(r"\A[\d.]+\Z", netfilter):
                                    addresses.add(netfilter)
                                elif '\.' in netfilter:
                                    regex = r"\b(%s)\b" % netfilter
                                else:
                                    print "[!] invalid network filter '%s'" % netfilter
                                    return

                            for line in session.range_handle:
                                display = False
                                ip = None

                                if regex:
                                    match = re.search(regex, line)
                                    if match:
                                        ip = match.group(1)
                                        display = True

                                if not display and (addresses or netmasks):
                                    for match in re.finditer(r"\b(\d+\.\d+\.\d+\.\d+)\b", line):
                                        if not display:
                                            ip = match.group(1)
                                        else:
                                            break

                                        if ip in addresses:
                                            display = True
                                            break
                                        elif netmasks:
                                            for _ in netmasks:
                                                prefix, mask = _.split('/')
                                                if addr_to_int(ip) & make_mask(int(mask)) == addr_to_int(prefix):
                                                    addresses.add(ip)
                                                    display = True
                                                    break

                                if display:
                                    if ",%s" % ip in line or "%s," % ip in line:
                                        line = re.sub(r" ([\d.,]+,)?%s(,[\d.,]+)? " % re.escape(ip), " %s " % ip, line)
                                    buffer.write(line)
                                    if buffer.tell() >= max_size:
                                        break

                            content = buffer.getvalue()
                            end = start + len(content) - 1
                            self.send_header(HTTP_HEADER.CONTENT_RANGE, "bytes %d-%d/%d" % (start, end, end + 1 + max_size * (len(content) >= max_size)))

                        if len(content) < max_size:
                            session.range_handle.close()
                            session.range_handle = None

                if size == -1:
                    self.send_response(httplib.OK)
                    self.send_header(HTTP_HEADER.CONNECTION, "close")
                    self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain")
                    self.end_headers()

                    with range_handle as f:
                        while True:
                            data = f.read(io.DEFAULT_BUFFER_SIZE)
                            if not data:
                                break
                            else:
                                self.wfile.write(data)

            else:
                self.send_response(httplib.OK)  # instead of httplib.NO_CONTENT (compatibility reasons)
                self.send_header(HTTP_HEADER.CONNECTION, "close")
                if self.headers.get(HTTP_HEADER.RANGE):
                    self.send_header(HTTP_HEADER.CONTENT_RANGE, "bytes 0-0/0")

            return content

        def _counts(self, params):
            counts = {}

            session = self.get_session()

            if session is None:
                self.send_response(httplib.UNAUTHORIZED)
                self.send_header(HTTP_HEADER.CONNECTION, "close")
                return None

            self.send_response(httplib.OK)
            self.send_header(HTTP_HEADER.CONNECTION, "close")
            self.send_header(HTTP_HEADER.CONTENT_TYPE, "application/json")

            match = re.search(r"\d+\-\d+\-\d+", params.get("from", ""))
            if match:
                min_ = datetime.datetime.strptime(match.group(0), DATE_FORMAT)
            else:
                min_ = datetime.datetime.fromtimestamp(0)

            match = re.search(r"\d+\-\d+\-\d+", params.get("to", ""))
            if match:
                max_ = datetime.datetime.strptime(match.group(0), DATE_FORMAT)
            else:
                max_ = datetime.datetime.now()

            min_ = min_.replace(hour=0, minute=0, second=0, microsecond=0)
            max_ = max_.replace(hour=23, minute=59, second=59, microsecond=999999)

            for filepath in sorted(glob.glob(os.path.join(config.LOG_DIR, "*.log"))):
                filename = os.path.basename(filepath)
                if not re.search(r"\A\d{4}-\d{2}-\d{2}\.log\Z", filename):
                    continue
                try:
                    current = datetime.datetime.strptime(os.path.splitext(filename)[0], DATE_FORMAT)
                except:
                    if config.SHOW_DEBUG:
                        traceback.print_exc()
                else:
                    if min_ <= current <= max_:
                        timestamp = int(time.mktime(current.timetuple()))
                        size = os.path.getsize(filepath)
                        with open(filepath, "rb") as f:
                            content = f.read(io.DEFAULT_BUFFER_SIZE)
                            if size >= io.DEFAULT_BUFFER_SIZE:
                                total = 1.0 * content.count('\n') * size / io.DEFAULT_BUFFER_SIZE
                                counts[timestamp] = int(round(total / 100) * 100)
                            else:
                                counts[timestamp] = content.count('\n')

            return json.dumps(counts)

    class SSLReqHandler(ReqHandler):
        def setup(self):
            self.connection = self.request
            self.rfile = socket._fileobject(self.request, "rb", self.rbufsize)
            self.wfile = socket._fileobject(self.request, "wb", self.wbufsize)

    try:
        if pem:
            server = SSLThreadingServer((address or '', int(port) if str(port or "").isdigit() else 0), pem, SSLReqHandler)
        else:
            server = ThreadingServer((address or '', int(port) if str(port or "").isdigit() else 0), ReqHandler)
    except Exception as ex:
        if "Address already in use" in str(ex):
            exit("[!] another instance already running")
        elif "Name or service not known" in str(ex):
            exit("[!] invalid configuration value for 'HTTP_ADDRESS' ('%s')" % config.HTTP_ADDRESS)
        elif "Cannot assign requested address" in str(ex):
            exit("[!] can't use configuration value for 'HTTP_ADDRESS' ('%s')" % config.HTTP_ADDRESS)
        else:
            raise

    print "[i] starting HTTP%s server at 'http%s://%s:%d/'" % ('S' if pem else "", 's' if pem else "", server.server_address[0], server.server_address[1])

    print "[o] running..."

    if join:
        server.serve_forever()
    else:
        thread = threading.Thread(target=server.serve_forever)
        thread.daemon = True
        thread.start()

Example 46

Project: maltrail
Source File: sensor.py
View license
def _process_packet(packet, sec, usec, ip_offset):
    """
    Processes single (raw) IP layer data
    """

    global _connect_sec
    global _last_syn
    global _last_logged_syn
    global _last_udp
    global _last_logged_udp
    global _last_dns_exhaustion
    global _subdomains_sec

    try:
        if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES:
            _result_cache.clear()

        if config.USE_HEURISTICS:
            if _locks.connect_sec:
                _locks.connect_sec.acquire()

            connect_sec = _connect_sec
            _connect_sec = sec

            if _locks.connect_sec:
                _locks.connect_sec.release()

            if sec > connect_sec:
                for key in _connect_src_dst:
                    if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                        _src_ip, _dst_ip = key.split('~')
                        if not check_whitelisted(_src_ip):
                            for _ in _connect_src_details[key]:
                                log_event((sec, usec, _src_ip, _[2], _dst_ip, _[3], PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)"), packet)

                _connect_src_dst.clear()
                _connect_src_details.clear()

        ip_data = packet[ip_offset:]
        ip_version = ord(ip_data[0]) >> 4
        localhost_ip = LOCALHOST_IP[ip_version]

        if ip_version == 0x04:  # IPv4
            ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])
            iph_length = (ip_header[0] & 0xf) << 2
            protocol = ip_header[6]
            src_ip = socket.inet_ntoa(ip_header[8])
            dst_ip = socket.inet_ntoa(ip_header[9])
        elif ip_version == 0x06:  # IPv6
            # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/
            ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40])
            iph_length = 40
            protocol = ip_header[4]
            src_ip = inet_ntoa6(ip_header[6])
            dst_ip = inet_ntoa6(ip_header[7])
        else:
            return

        if protocol == socket.IPPROTO_TCP:  # TCP
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14])

            if flags != 2 and config.plugin_functions:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True)
                elif src_ip in trails and dst_ip != localhost_ip:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True)

            if flags == 2:  # SYN set (only)
                _ = _last_syn
                _last_syn = (sec, src_ip, src_port, dst_ip, dst_port)
                if _ == _last_syn:  # skip bursts
                    return

                if dst_ip in trails:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)

                elif src_ip in trails and dst_ip != localhost_ip:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                if config.USE_HEURISTICS:
                    if dst_ip != localhost_ip:
                        key = "%s~%s" % (src_ip, dst_ip)
                        if key not in _connect_src_dst:
                            _connect_src_dst[key] = set()
                            _connect_src_details[key] = set()
                        _connect_src_dst[key].add(dst_port)
                        _connect_src_details[key].add((sec, usec, src_port, dst_port))

            else:
                tcph_length = doff_reserved >> 4
                h_size = iph_length + (tcph_length << 2)
                tcp_data = ip_data[h_size:]

                if tcp_data.startswith("HTTP/"):
                    if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet)
                    else:
                        index = tcp_data.find("<title>")
                        if index >= 0:
                            title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)]
                            if all(_ in title.lower() for _ in ("this domain", "has been seized")):
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet)

                    content_type = None
                    first_index = tcp_data.find("\r\nContent-Type:")
                    if first_index >= 0:
                        first_index = first_index + len("\r\nContent-Type:")
                        last_index = tcp_data.find("\r\n", first_index)
                        if last_index >= 0:
                            content_type = tcp_data[first_index:last_index].strip().lower()

                    if content_type and content_type in SUSPICIOUS_CONTENT_TYPES:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, content_type, "content type (suspicious)", "(heuristic)"), packet)

                method, path = None, None
                index = tcp_data.find("\r\n")
                if index >= 0:
                    line = tcp_data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method, path, _ = line.split(' ')

                if method and path:
                    post_data = None
                    host = dst_ip
                    first_index = tcp_data.find("\r\nHost:")

                    if first_index >= 0:
                        first_index = first_index + len("\r\nHost:")
                        last_index = tcp_data.find("\r\n", first_index)
                        if last_index >= 0:
                            host = tcp_data[first_index:last_index]
                            host = host.strip().lower()
                            if host.endswith(":80"):
                                host = host[:-3]
                            if host and host[0].isalpha() and dst_ip in trails:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet)
                            elif config.CHECK_HOST_DOMAINS and not host.replace('.', "").isdigit():
                                _check_domain(host, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet)

                    index = tcp_data.find("\r\n\r\n")
                    if index >= 0:
                        post_data = tcp_data[index + 4:]

                    if config.USE_HEURISTICS and dst_port == 80 and path.startswith("http://") and not _check_domain_whitelisted(urlparse.urlparse(path).netloc.split(':')[0]):
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, path, "potential proxy probe (suspicious)", "(heuristic)"), packet)
                        return
                    elif "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        if host.endswith(":80"):
                            host = host[:-3]
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        if host.endswith(":80"):
                            host = host[:-3]
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    else:
                        url = "%s%s" % (host, path)

                    if config.USE_HEURISTICS:
                        user_agent, result = None, None

                        first_index = tcp_data.find("\r\nUser-Agent:")
                        if first_index >= 0:
                            first_index = first_index + len("\r\nUser-Agent:")
                            last_index = tcp_data.find("\r\n", first_index)
                            if last_index >= 0:
                                user_agent = tcp_data[first_index:last_index]
                                user_agent = urllib.unquote(user_agent).strip()

                        if user_agent:
                            result = _result_cache.get(user_agent)
                            if result is None:
                                if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS):
                                    match = re.search(SUSPICIOUS_UA_REGEX, user_agent)
                                    if match:
                                        def _(value):
                                            return value.replace('(', "\\(").replace(')', "\\)")

                                        parts = user_agent.split(match.group(0), 1)

                                        if len(parts) > 1 and parts[0] and parts[-1]:
                                            result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent))
                                        else:
                                            result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts)
                                if not result:
                                    _result_cache[user_agent] = False

                            if result:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet)

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])
                        checks.append(checks[0][checks[0].rfind('/'):].split('?')[0])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        unquoted_path = urllib.unquote(path)
                        unquoted_post_data = urllib.unquote(post_data or "")
                        for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                            replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char]
                            path = path.replace(char, replacement)
                            if post_data:
                                post_data = post_data.replace(char, replacement)

                        if not _check_domain_whitelisted(host):
                            if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS):
                                if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_path)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_path, re.I | re.DOTALL):
                                                found = desc
                                                break
                                        _result_cache[unquoted_path] = found or ""
                                    if found:
                                        trail = "%s(%s)" % (host, path)
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                                if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_post_data)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_post_data, re.I | re.DOTALL):
                                                found = desc
                                                break
                                        _result_cache[unquoted_post_data] = found or ""
                                    if found:
                                        trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip())
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "%s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                            if '.' in path:
                                _ = urlparse.urlparse("http://%s" % url)  # dummy scheme
                                path = path.lower()
                                filename = _.path.split('/')[-1]
                                name, extension = os.path.splitext(filename)
                                trail = "%s(%s)" % (host, path)
                                if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and '=' not in _.query and len(name) < 10:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet)
                                elif filename in WEB_SHELLS:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential web shell (suspicious)", "(heuristic)"), packet)
                                else:
                                    for desc, regex in SUSPICIOUS_HTTP_PATH_REGEXES:
                                        if re.search(regex, filename, re.I):
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % desc, "(heuristic)"), packet)
                                            break

        elif protocol == socket.IPPROTO_UDP:  # UDP
            _ = ip_data[iph_length:iph_length + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            _ = _last_udp
            _last_udp = (sec, src_ip, src_port, dst_ip, dst_port)
            if _ == _last_udp:  # skip bursts
                return

            if src_port != 53 and dst_port != 53:  # not DNS
                if dst_ip in trails:
                    trail = dst_ip
                elif src_ip in trails:
                    trail = src_ip
                else:
                    trail = None

                if trail:
                    _ = _last_logged_udp
                    _last_logged_udp = _last_udp
                    if _ != _last_logged_udp:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet)

            else:
                dns_data = ip_data[iph_length + 8:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(dns_data) > 6:
                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(dns_data) > offset:
                            length = ord(dns_data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += dns_data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        query = query.lower()

                        if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES):
                            return

                        parts = query.split('.')

                        if ord(dns_data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5])

                            if len(parts) > 2:
                                domain = '.'.join(parts[-2:])

                                if not _check_domain_whitelisted(domain):  # e.g. <hash>.hashserver.cs.trendmicro.com
                                    if (sec - (_subdomains_sec or 0)) > DAILY_SECS:
                                        _subdomains.clear()
                                        _dns_exhausted_domains.clear()
                                        _subdomains_sec = sec

                                    subdomains = _subdomains.get(domain)

                                    if not subdomains:
                                        subdomains = _subdomains[domain] = set()

                                    if len(subdomains) < DNS_EXHAUSTION_THRESHOLD:
                                        subdomains.add('.'.join(parts[:-2]))
                                    else:
                                        if (sec - (_last_dns_exhaustion or 0)) > 60:
                                            trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet)
                                            _dns_exhausted_domains.add(domain)
                                            _last_dns_exhaustion = sec

                                        return

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                if dst_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet)
                                elif src_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet)

                        elif config.USE_HEURISTICS:
                            if ord(dns_data[2]) & 0x80:  # standard response
                                if ord(dns_data[3]) == 0x80:  # recursion available, no error
                                    _ = offset + 5
                                    try:
                                        while _ < len(dns_data):
                                            if ord(dns_data[_]) & 0xc0 != 0 and dns_data[_ + 2] == "\00" and dns_data[_ + 3] == "\x01":  # Type A
                                                break
                                            else:
                                                _ += 12 + struct.unpack("!H", dns_data[_ + 10: _ + 12])[0]

                                        _ = dns_data[_ + 12:_ + 16]
                                        if _:
                                            answer = socket.inet_ntoa(_)
                                            if answer in trails:
                                                _ = trails[answer]
                                                if "sinkhole" in _[0]:
                                                    trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % _[0].split(" ")[1], "(heuristic)"), packet)  # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn)
                                                elif "parking" in _[0]:
                                                    trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "parked site (suspicious)", "(heuristic)"), packet)
                                    except IndexError:
                                        pass

                                elif ord(dns_data[3]) == 0x83:  # recursion available, no such name
                                    if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails):
                                        if parts[-1].isdigit():
                                            return

                                        if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])):  # generic check for DNSBL IP lookups
                                            for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)):
                                                if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600:
                                                    NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()]
                                                else:
                                                    NO_SUCH_NAME_COUNTERS[_][1] += 1
                                                    NO_SUCH_NAME_COUNTERS[_][2].add(query)

                                                    if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD:
                                                        if _.startswith("*."):
                                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet)
                                                            for item in NO_SUCH_NAME_COUNTERS[_][2]:
                                                                try:
                                                                    del NO_SUCH_NAME_COUNTERS[item]
                                                                except KeyError:
                                                                    pass
                                                        else:
                                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet)

                                                        try:
                                                            del NO_SUCH_NAME_COUNTERS[_]
                                                        except KeyError:
                                                            pass

                                                        break

                                            if len(parts) > 2:
                                                part = parts[0] if parts[0] != "www" else parts[1]
                                                trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                            elif len(parts) == 2:
                                                part = parts[0]
                                                trail = "(%s).%s" % (parts[0], parts[1])
                                            else:
                                                part = query
                                                trail = query

                                            if part and '-' not in part:
                                                result = _result_cache.get(part)

                                                if result is None:
                                                    # Reference: https://github.com/exp0se/dga_detector
                                                    probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part))
                                                    entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities)
                                                    if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
                                                        result = "entropy threshold no such domain (suspicious)"

                                                    if not result:
                                                        if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
                                                            result = "consonant threshold no such domain (suspicious)"

                                                    _result_cache[part] = result or False

                                                if result:
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet)

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                if ord(ip_data[iph_length]) != 0x08:  # Non-echo request
                    return
            elif protocol == socket.IPPROTO_ICMPV6:
                if ord(ip_data[iph_length]) != 0x80:  # Non-echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

    except struct.error:
        pass

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()

Example 47

Project: dql
Source File: engine.py
View license
    def _select(self, tree, allow_select_scan):
        """ Run a SELECT statement """
        tablename = tree.table
        desc = self.describe(tablename, require=True)
        kwargs = {}
        if tree.consistent:
            kwargs['consistent'] = True

        visitor = Visitor(self.reserved_words)

        selection = SelectionExpression.from_selection(tree.attrs)
        if selection.is_count:
            kwargs['select'] = 'COUNT'

        if tree.keys_in:
            if tree.limit:
                raise SyntaxError("Cannot use LIMIT with KEYS IN")
            elif tree.using:
                raise SyntaxError("Cannot use USING with KEYS IN")
            elif tree.order:
                raise SyntaxError("Cannot use DESC/ASC with KEYS IN")
            elif tree.where:
                raise SyntaxError("Cannot use WHERE with KEYS IN")
            keys = list(self._iter_where_in(tree))
            kwargs['attributes'] = selection.build(visitor)
            kwargs['alias'] = visitor.attribute_names
            return self.connection.batch_get(tablename, keys=keys, **kwargs)

        if tree.limit:
            if tree.scan_limit:
                kwargs['limit'] = Limit(scan_limit=resolve(tree.scan_limit[2]),
                                        item_limit=resolve(tree.limit[1]),
                                        strict=True)
            else:
                kwargs['limit'] = Limit(item_limit=resolve(tree.limit[1]),
                                        strict=True)
        elif tree.scan_limit:
            kwargs['limit'] = Limit(scan_limit=resolve(tree.scan_limit[2]))

        (action, query_kwargs, index) = self._build_query(desc, tree, visitor)
        if action == 'scan' and not allow_select_scan:
            raise SyntaxError(
                "No index found for query. Please use a SCAN query, or "
                "set allow_select_scan=True\nopt allow_select_scan true")
        order_by = None
        if tree.order_by:
            order_by = tree.order_by[0]
        reverse = tree.order == 'DESC'
        if tree.order:
            if action == 'scan' and not tree.order_by:
                raise SyntaxError("No index found for query, "
                                  "cannot use ASC or DESC without "
                                  "ORDER BY <field>")
            if action == 'query':
                if order_by is None or order_by == index.range_key:
                    kwargs['desc'] = reverse

        kwargs.update(query_kwargs)

        # This is a special case for when we're querying an index and selecting
        # fields that aren't projected into the index.
        # We will change the query to only fetch the primary keys, and then
        # fill in the selected attributes after the fact.
        fetch_attrs_after = False
        if (index is not None and
                not index.projects_all_attributes(selection.all_fields)):
            kwargs['attributes'] = [visitor.get_field(a) for a in
                                    desc.primary_key_attributes]
            fetch_attrs_after = True
        else:
            kwargs['attributes'] = selection.build(visitor)
        kwargs['expr_values'] = visitor.expression_values
        kwargs['alias'] = visitor.attribute_names

        method = getattr(self.connection, action + '2')
        result = method(tablename, **kwargs)

        # If the queried index didn't project the selected attributes, we need
        # to do a BatchGetItem to fetch all the data.
        if fetch_attrs_after:
            if not isinstance(result, list):
                result = list(result)
            # If no results, no need to batch_get
            if not result:
                return result
            visitor = Visitor(self.reserved_words)
            kwargs = {
                'keys': [desc.primary_key(item) for item in result],
            }
            kwargs['attributes'] = selection.build(visitor)
            kwargs['alias'] = visitor.attribute_names
            result = self.connection.batch_get(tablename, **kwargs)

        def order(items):
            """ Sort the items by the specified keys """
            if order_by is None:
                return items
            if index is None or order_by != index.range_key:
                if not isinstance(items, list):
                    items = list(items)
                items.sort(key=lambda x: x.get(order_by), reverse=reverse)
            return items

        # Save the data to a file
        if tree.save_file:
            if selection.is_count:
                raise Exception("Cannot use count(*) with SAVE")
            count = 0
            result = order(selection.convert(item, True) for item in result)
            filename = tree.save_file[0]
            if filename[0] in ['"', "'"]:
                filename = unwrap(filename)
            # If it's still an iterator, convert to a list so we can iterate
            # multiple times.
            if not isinstance(result, list):
                result = list(result)
            remainder, ext = os.path.splitext(filename)
            if ext.lower() in ['.gz', '.gzip']:
                ext = os.path.splitext(remainder)[1]
                opened = gzip.open(filename, 'wb')
            else:
                opened = open(filename, 'wb')
            if ext.lower() == '.csv':
                if selection.all_keys:
                    headers = selection.all_keys
                else:
                    # Have to do this to get all the headers :(
                    result = list(result)
                    all_headers = set()
                    for item in result:
                        all_headers.update(item.keys())
                    headers = list(all_headers)
                with opened as ofile:
                    writer = csv.DictWriter(ofile, fieldnames=headers,
                                            extrasaction='ignore')
                    writer.writeheader()
                    for item in result:
                        count += 1
                        writer.writerow(item)
            elif ext.lower() == '.json':
                with opened as ofile:
                    for item in result:
                        count += 1
                        ofile.write(self._encoder.encode(item))
                        ofile.write('\n')
            else:
                with opened as ofile:
                    for item in result:
                        count += 1
                        pickle.dump(item, ofile)
            return count
        elif not selection.is_count:
            result = order(selection.convert(item) for item in result)

        return result

Example 48

Project: pyNastran
Source File: surf_io.py
View license
    def _fill_surf_case(self, surf_filename, cases, ID, nnodes, nelements, model):
        base, ext = os.path.splitext(surf_filename)
        assert ext == '.surf', surf_filename

        tag_filename = base + '.tags'

        cases_new = []
        has_tag_data = False
        results_form = []
        geometry_form = [
            #('Region', 0, []),
            ('ElementID', 0, []),
            ('NodeID', 1, []),
            ('SurfaceID', 2, []),
            ('ReconFlag', 3, []),
            ('GridBC', 4, []),

            ('NormalX', 5, []),
            ('NormalY', 6, []),
            ('NormalZ', 7, []),

            ('normSpacing', 8, []),
            ('BL_thick', 9, []),
        ]
        nids = arange(1, nnodes + 1)
        norm_spacing = model.node_props[:, 0]
        bl_thickness = model.node_props[:, 1]

        ntris = model.tris.shape[0]
        nquads = model.quads.shape[0]
        #nelements = ntris + nquads
        eids = arange(1, nelements + 1)

        if ntris and nquads:
            element_props = vstack([model.tri_props, model.quad_props])
        elif ntris:
            element_props = model.tri_props
        elif nquads:
            element_props = model.quad_props

        surf_ids = element_props[:, 0]
        recon_flags = element_props[:, 1]
        grid_bcs = element_props[:, 2]
        #print(unique(grid_bcs))

        normals = model.get_normals()
        eid_res = GuiResult(0, header='ElementID', title='ElementID',
                            location='centroid', scalar=eids)
        nid_res = GuiResult(0, header='NodeID', title='NodeID',
                            location='node', scalar=nids)
        surface_res = GuiResult(0, header='SurfaceID', title='SurfaceID',
                                location='centroid', scalar=surf_ids)

        recon_res = GuiResult(0, header='ReconFlag', title='ReconFlag',
                              location='centroid', scalar=recon_flags)
        gridbc_res = GuiResult(0, header='GridBC', title='GridBC',
                               location='centroid', scalar=grid_bcs)
        normalx_res = GuiResult(0, header='NormalX', title='NormalX',
                                location='centroid', scalar=normals[:, 0])
        normaly_res = GuiResult(0, header='NormalY', title='NormalY',
                                location='centroid', scalar=normals[:, 1])
        normalz_res = GuiResult(0, header='NormalZ', title='NormalZ',
                                location='centroid', scalar=normals[:, 2])

        normspacing_res = GuiResult(0, header='NormSpacing', title='NormSpacing',
                                    location='node', scalar=norm_spacing)
        blthick_res = GuiResult(0, header='BL_thick', title='BL_thick',
                                location='node', scalar=bl_thickness)

        icase = 0
        cases[icase] = (eid_res, (0, 'ElementID'))
        cases[icase + 1] = (nid_res, (0, 'NodeID'))
        cases[icase + 2] = (surface_res, (0, 'SurfaceID'))

        cases[icase + 3] = (recon_res, (0, 'ReconFlag'))
        cases[icase + 4] = (gridbc_res, (0, 'GridBC'))
        cases[icase + 5] = (normalx_res, (0, 'NormalX'))
        cases[icase + 6] = (normaly_res, (0, 'NormalY'))
        cases[icase + 7] = (normalz_res, (0, 'NormalZ'))

        cases[icase + 8] = (normspacing_res, (0, 'NormSpacing'))
        cases[icase + 9] = (blthick_res, (0, 'BL_thick'))
        icase += 10

        if os.path.exists(tag_filename):
            tagger = TagReader()
            data = tagger.read_tag_filename(tag_filename)

            int_data = ones((nelements, 8), dtype='int32') * -10.
            float_data = zeros((nelements, 2), dtype='float64')
            for key, datai in sorted(iteritems(data)):
                #self.log.info(datai)
                [name, is_visc, is_recon, is_rebuild, is_fixed, is_source,
                 is_trans, is_delete, bl_spacing, bl_thickness, nlayers] = datai
                i = where(surf_ids == key)[0]
                int_data[i, :] = [is_visc, is_recon, is_rebuild, is_fixed,
                                  is_source, is_trans, is_delete, nlayers]
                float_data[i, :] = [bl_spacing, bl_thickness]
                self.log.info('data[%i] = %s' % (key, name))

            has_tag_data = True
            tag_form = []
            tag_form.append(('is_visc', icase, []))
            tag_form.append(('is_recon', icase + 1, []))
            tag_form.append(('is_rebuild', icase + 2, []))
            tag_form.append(('is_fixed', icase + 3, []))
            tag_form.append(('is_source', icase + 4, []))
            tag_form.append(('is_trans', icase + 5, []))
            tag_form.append(('is_delete', icase + 6, []))
            tag_form.append(('nlayers', icase + 7, []))
            tag_form.append(('bl_spacing', icase + 8, []))
            tag_form.append(('bl_thickness', icase + 9, []))

            visc_res = GuiResult(0, header='is_visc', title='is_visc',
                                 location='node', scalar=int_data[:, 0])
            recon_res = GuiResult(0, header='is_recon', title='is_recon',
                                  location='node', scalar=int_data[:, 1])
            rebuild_res = GuiResult(0, header='is_rebuild', title='is_rebuild',
                                    location='node', scalar=int_data[:, 2])
            fixed_res = GuiResult(0, header='is_fixed', title='is_fixed',
                                  location='node', scalar=int_data[:, 3])
            source_res = GuiResult(0, header='is_source', title='is_source',
                                   location='node', scalar=int_data[:, 4])
            trans_res = GuiResult(0, header='is_trans', title='is_trans',
                                  location='node', scalar=int_data[:, 5])
            delete_res = GuiResult(0, header='is_delete', title='is_delete',
                                   location='node', scalar=int_data[:, 6])
            nlayers_res = GuiResult(0, header='nlayers', title='nlayers',
                                    location='node', scalar=int_data[:, 7])

            spacing_res = GuiResult(0, header='bl_spacing', title='bl_spacing',
                                    location='centroid', scalar=float_data[:, 0])
            blthickness_res = GuiResult(0, header='bl_thickness', title='bl_thickness',
                                        location='centroid', scalar=float_data[:, 1])

            cases[icase] = (visc_res, (0, 'is_visc'))
            cases[icase + 1] = (recon_res, (0, 'is_recon'))
            cases[icase + 2] = (rebuild_res, (0, 'is_rebuild'))
            cases[icase + 3] = (fixed_res, (0, 'is_fixed'))
            cases[icase + 4] = (source_res, (0, 'is_source'))
            cases[icase + 5] = (trans_res, (0, 'is_trans'))
            cases[icase + 6] = (delete_res, (0, 'is_delete'))
            cases[icase + 7] = (nlayers_res, (0, 'nlayers'))
            cases[icase + 8] = (spacing_res, (0, 'bl_spacing'))
            cases[icase + 9] = (blthickness_res, (0, 'bl_thickness'))

        form = [
            ('Geometry', None, geometry_form),
        ]
        if has_tag_data:
            form.append(('Tag Data', None, tag_form),)

        results_form = []
        if len(results_form):
            form.append(('Results', None, results_form))
        return form, cases

Example 49

View license
def set_infolabels_from_library(itemlist, tipo):
    """
    guarda los datos (thumbnail, fanart, plot, actores, etc) a mostrar de la library de Kodi.
    @type itemlist: list
    @param itemlist: item
    @type tipo: str
    @param tipo:
    @rtype:   infoLabels
    @return:  result of saving.
    """
    logger.info("streamondemand.platformcode.library set_infoLabels_from_library")

    # Metodo 1: De la bilioteca de pelisalacarta
    if tipo == 'Movies':
        for item in itemlist:
            if item.path.endswith(".strm"):
                data_file = item.path
                if filetools.exists(data_file):
                    infolabels = Item().fromurl(filetools.read(data_file)).infoLabels
                    item.infoLabels = infolabels
            else:
                data_file = os.path.splitext(item.path)[0] + ".json"
                if filetools.exists(data_file):
                    infolabels = Item().fromjson(filetools.read(data_file)).infoLabels
                    item.infoLabels = infolabels

            item.title = item.contentTitle
            item.plot = item.contentPlot
            item.thumbnail = item.contentThumbnail

    elif tipo == 'TVShows':
        for item in itemlist:
            data_file = filetools.join(item.path, "tvshow.json")
            if filetools.exists(data_file):
                infolabels = Item().fromjson(filetools.read(data_file)).infoLabels
                item.infoLabels = infolabels

            item.title = item.contentSerieName
            item.thumbnail = item.contentThumbnail
            item.plot = item.contentPlot

    elif tipo == 'Episodes':
        for item in itemlist:
            if item.path.endswith(".strm"):
                data_file = item.path
                if filetools.exists(data_file):
                    infolabels = Item().fromurl(filetools.read(data_file)).infoLabels
                    item.infoLabels = infolabels
            # TODO debería existir el else?
            else:
                data_file = os.path.splitext(item.path)[0] + ".json"
                if filetools.exists(data_file):
                    infolabels = Item().fromjson(filetools.read(data_file)).infoLabels
                    item.infoLabels = infolabels

            item.plot = item.contentPlot
            item.thumbnail = item.contentThumbnail

            if item.contentTitle:
                if len(str(item.contentEpisodeNumber)) == 1:
                    item.title = "{0}x0{1}".format(item.contentSeason, item.contentEpisodeNumber)
                else:
                    item.title = "{0}x{1}".format(item.contentSeason, item.contentEpisodeNumber)

                item.title = "{0} - {1}".format(item.title, item.contentTitle.strip())
            else:
                if "fulltitle" in item:
                    item.title = item.fulltitle
                else:
                    if len(str(item.contentEpisodeNumber)) == 1:
                        item.title = "{0}x0{1}".format(item.contentSeason, item.contentEpisodeNumber)
                    else:
                        item.title = "{0}x{1}".format(item.contentSeason, item.contentEpisodeNumber)

                    item.title = "{0} - {1}".format(item.title, "Episodio {0}".format(item.contentEpisodeNumber))

    if config.get_setting("get_metadata_from_kodi") == "true":
        # Metodo2: De la bilioteca de kodi
        payload = dict()
        result = list()

        if tipo == 'Movies':
            payload = {"jsonrpc": "2.0",
                       "method": "VideoLibrary.GetMovies",
                       "params": {"properties": ["title", "year", "rating", "trailer", "tagline", "plot", "plotoutline",
                                                 "originaltitle", "lastplayed", "playcount", "writer", "mpaa", "cast",
                                                 "imdbnumber", "runtime", "set", "top250", "votes", "fanart", "tag",
                                                 "thumbnail", "file", "director", "country", "studio", "genre",
                                                 "sorttitle", "setid", "dateadded"
                                                 ]},
                       "id": "libMovies"}

        elif tipo == 'TVShows':
            payload = {"jsonrpc": "2.0",
                       "method": "VideoLibrary.GetTVShows",
                       "params": {"properties": ["title", "genre", "year", "rating", "plot", "studio", "mpaa", "cast",
                                                 "playcount", "episode", "imdbnumber", "premiered", "votes",
                                                 "lastplayed", "fanart", "thumbnail", "file", "originaltitle",
                                                 "sorttitle", "episodeguide", "season", "watchedepisodes", "dateadded",
                                                 "tag"]},
                       "id": "libTvShows"}

        elif tipo == 'Episodes' and 'tvshowid' in itemlist[0].infoLabels and itemlist[0].infoLabels['tvshowid']:
            tvshowid = itemlist[0].infoLabels['tvshowid']
            payload = {"jsonrpc": "2.0",
                       "method": "VideoLibrary.GetEpisodes",
                       "params": {"tvshowid": tvshowid,
                                  "properties": ["title", "plot", "votes", "rating", "writer", "firstaired",
                                                 "playcount", "runtime", "director", "productioncode", "season",
                                                 "episode", "originaltitle", "showtitle", "cast", "lastplayed",
                                                 "fanart", "thumbnail", "file", "dateadded", "tvshowid"]},
                       "id": 1}

        data = get_data(payload)
        logger.debug("JSON-RPC: {0}".format(data))

        if 'error' in data:
            logger.error("JSON-RPC: {0}".format(data))

        elif 'movies' in data['result']:
            result = data['result']['movies']

        elif 'tvshows' in data['result']:
            result = data['result']['tvshows']

        elif 'episodes' in data['result']:
            result = data['result']['episodes']

        if result:
            for i in itemlist:
                for r in result:

                    if r['file'].endswith(os.sep) or r['file'].endswith('/'):
                        r_filename_aux = r['file'][:-1]
                    else:
                        r_filename_aux = r['file']

                    # r_filename_aux = r['file'][:-1] if r['file'].endswith(os.sep) or r['file'].endswith('/') else r['file']
                    r_filename = os.path.basename(r_filename_aux)
                    # logger.debug(os.path.basename(i.path) + '\n' + r_filename)
                    i_filename = os.path.basename(i.path)
                    if i_filename == r_filename:
                        infolabels = r

                        # Obtener imagenes y asignarlas al item
                        if 'thumbnail' in infolabels:

                            infolabels['thumbnail'] = urllib.unquote_plus(infolabels['thumbnail']).replace('image://',
                                                                                                           '')

                            if infolabels['thumbnail'].endswith('/'):
                                i.thumbnail = infolabels['thumbnail'][:-1]
                            else:
                                i.thumbnail = infolabels['thumbnail']

                                # i.thumbnail = infolabels['thumbnail'][:-1] if infolabels['thumbnail'].endswith('/') else infolabels['thumbnail']

                        if 'fanart' in infolabels:

                            infolabels['fanart'] = urllib.unquote_plus(infolabels['fanart']).replace('image://', '')

                            if infolabels['fanart'].endswith('/'):
                                i.fanart = infolabels['fanart'][:-1]
                            else:
                                i.fanart = infolabels['fanart']

                                # i.fanart = infolabels['fanart'][:-1] if infolabels['fanart'].endswith('/') else infolabels['fanart']

                        # Adaptar algunos campos al formato infoLables
                        if 'cast' in infolabels:
                            l_castandrole = list()
                            for c in sorted(infolabels['cast'], key=lambda _c: _c["order"]):
                                l_castandrole.append((c['name'], c['role']))
                            infolabels.pop('cast')
                            infolabels['castandrole'] = l_castandrole
                        if 'genre' in infolabels:
                            infolabels['genre'] = ', '.join(infolabels['genre'])
                        if 'writer' in infolabels:
                            infolabels['writer'] = ', '.join(infolabels['writer'])
                        if 'director' in infolabels:
                            infolabels['director'] = ', '.join(infolabels['director'])
                        if 'country' in infolabels:
                            infolabels['country'] = ', '.join(infolabels['country'])
                        if 'studio' in infolabels:
                            infolabels['studio'] = ', '.join(infolabels['studio'])
                        if 'runtime' in infolabels:
                            infolabels['duration'] = infolabels.pop('runtime')

                        # Fijar el titulo si existe y añadir infoLabels al item
                        if 'label' in infolabels:
                            i.title = infolabels['label']
                        i.infoLabels = infolabels
                        result.remove(r)
                        break

Example 50

Project: gyp
Source File: xcode_ninja.py
View license
def CreateWrapper(target_list, target_dicts, data, params):
  """Initialize targets for the ninja wrapper.

  This sets up the necessary variables in the targets to generate Xcode projects
  that use ninja as an external builder.
  Arguments:
    target_list: List of target pairs: 'base/base.gyp:base'.
    target_dicts: Dict of target properties keyed on target pair.
    data: Dict of flattened build files keyed on gyp path.
    params: Dict of global options for gyp.
  """
  orig_gyp = params['build_files'][0]
  for gyp_name, gyp_dict in data.iteritems():
    if gyp_name == orig_gyp:
      depth = gyp_dict['_DEPTH']

  # Check for custom main gyp name, otherwise use the default CHROMIUM_GYP_FILE
  # and prepend .ninja before the .gyp extension.
  generator_flags = params.get('generator_flags', {})
  main_gyp = generator_flags.get('xcode_ninja_main_gyp', None)
  if main_gyp is None:
    (build_file_root, build_file_ext) = os.path.splitext(orig_gyp)
    main_gyp = build_file_root + ".ninja" + build_file_ext

  # Create new |target_list|, |target_dicts| and |data| data structures.
  new_target_list = []
  new_target_dicts = {}
  new_data = {}

  # Set base keys needed for |data|.
  new_data[main_gyp] = {}
  new_data[main_gyp]['included_files'] = []
  new_data[main_gyp]['targets'] = []
  new_data[main_gyp]['xcode_settings'] = \
      data[orig_gyp].get('xcode_settings', {})

  # Normally the xcode-ninja generator includes only valid executable targets.
  # If |xcode_ninja_executable_target_pattern| is set, that list is reduced to
  # executable targets that match the pattern. (Default all)
  executable_target_pattern = \
      generator_flags.get('xcode_ninja_executable_target_pattern', None)

  # For including other non-executable targets, add the matching target name
  # to the |xcode_ninja_target_pattern| regular expression. (Default none)
  target_extras = generator_flags.get('xcode_ninja_target_pattern', None)

  for old_qualified_target in target_list:
    spec = target_dicts[old_qualified_target]
    if IsValidTargetForWrapper(target_extras, executable_target_pattern, spec):
      # Add to new_target_list.
      target_name = spec.get('target_name')
      new_target_name = '%s:%s#target' % (main_gyp, target_name)
      new_target_list.append(new_target_name)

      # Add to new_target_dicts.
      new_target_dicts[new_target_name] = _TargetFromSpec(spec, params)

      # Add to new_data.
      for old_target in data[old_qualified_target.split(':')[0]]['targets']:
        if old_target['target_name'] == target_name:
          new_data_target = {}
          new_data_target['target_name'] = old_target['target_name']
          new_data_target['toolset'] = old_target['toolset']
          new_data[main_gyp]['targets'].append(new_data_target)

  # Create sources target.
  sources_target_name = 'sources_for_indexing'
  sources_target = _TargetFromSpec(
    { 'target_name' : sources_target_name,
      'toolset': 'target',
      'default_configuration': 'Default',
      'mac_bundle': '0',
      'type': 'executable'
    }, None)

  # Tell Xcode to look everywhere for headers.
  sources_target['configurations'] = {'Default': { 'include_dirs': [ depth ] } }

  sources = []
  for target, target_dict in target_dicts.iteritems():
    base =  os.path.dirname(target)
    files = target_dict.get('sources', []) + \
            target_dict.get('mac_bundle_resources', [])
    # Remove files starting with $. These are mostly intermediate files for the
    # build system.
    files = [ file for file in files if not file.startswith('$')]

    # Make sources relative to root build file.
    relative_path = os.path.dirname(main_gyp)
    sources += [ os.path.relpath(os.path.join(base, file), relative_path)
                    for file in files ]

  sources_target['sources'] = sorted(set(sources))

  # Put sources_to_index in it's own gyp.
  sources_gyp = \
      os.path.join(os.path.dirname(main_gyp), sources_target_name + ".gyp")
  fully_qualified_target_name = \
      '%s:%s#target' % (sources_gyp, sources_target_name)

  # Add to new_target_list, new_target_dicts and new_data.
  new_target_list.append(fully_qualified_target_name)
  new_target_dicts[fully_qualified_target_name] = sources_target
  new_data_target = {}
  new_data_target['target_name'] = sources_target['target_name']
  new_data_target['_DEPTH'] = depth
  new_data_target['toolset'] = "target"
  new_data[sources_gyp] = {}
  new_data[sources_gyp]['targets'] = []
  new_data[sources_gyp]['included_files'] = []
  new_data[sources_gyp]['xcode_settings'] = \
      data[orig_gyp].get('xcode_settings', {})
  new_data[sources_gyp]['targets'].append(new_data_target)

  # Write workspace to file.
  _WriteWorkspace(main_gyp, sources_gyp, params)
  return (new_target_list, new_target_dicts, new_data)