re.compile

Here are the examples of the python api re.compile taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

165 Examples 7

Example 1

Project: django-rosetta Source File: views.py
@never_cache
@user_passes_test(lambda user: can_translate(user), settings.LOGIN_URL)
def home(request):
    """
    Displays a list of messages to be translated
    """

    def fix_nls(in_, out_):
        """Fixes submitted translations by filtering carriage returns and pairing
        newlines at the begging and end of the translated string with the original
        """
        if 0 == len(in_) or 0 == len(out_):
            return out_

        if "\r" in out_ and "\r" not in in_:
            out_ = out_.replace("\r", '')

        if "\n" == in_[0] and "\n" != out_[0]:
            out_ = "\n" + out_
        elif "\n" != in_[0] and "\n" == out_[0]:
            out_ = out_.lstrip()
        if 0 == len(out_):
            pass
        elif "\n" == in_[-1] and "\n" != out_[-1]:
            out_ = out_ + "\n"
        elif "\n" != in_[-1] and "\n" == out_[-1]:
            out_ = out_.rstrip()
        return out_

    def _request_request(key, default=None):
        if key in request.GET:
            return request.GET.get(key)
        elif key in request.POST:
            return request.POST.get(key)
        return default

    storage = get_storage(request)
    query = ''
    if storage.has('rosetta_i18n_fn'):
        rosetta_i18n_fn = storage.get('rosetta_i18n_fn')

        rosetta_i18n_app = get_app_name(rosetta_i18n_fn)
        rosetta_i18n_lang_code = storage.get('rosetta_i18n_lang_code')
        rosetta_i18n_lang_bidi = rosetta_i18n_lang_code.split('-')[0] in settings.LANGUAGES_BIDI
        rosetta_i18n_write = storage.get('rosetta_i18n_write', True)
        if rosetta_i18n_write:
            rosetta_i18n_pofile = pofile(rosetta_i18n_fn, wrapwidth=rosetta_settings.POFILE_WRAP_WIDTH)
            for entry in rosetta_i18n_pofile:
                entry.md5hash = hashlib.md5(
                    (six.text_type(entry.msgid) +
                        six.text_type(entry.msgstr) +
                        six.text_type(entry.msgctxt or "")).encode('utf8')
                ).hexdigest()

        else:
            rosetta_i18n_pofile = storage.get('rosetta_i18n_pofile')

        if 'filter' in request.GET:
            if request.GET.get('filter') in ('untranslated', 'translated', 'fuzzy', 'all'):
                filter_ = request.GET.get('filter')
                storage.set('rosetta_i18n_filter', filter_)
                return HttpResponseRedirect(reverse('rosetta-home'))

        rosetta_i18n_filter = storage.get('rosetta_i18n_filter', 'all')

        if '_next' in request.POST:
            rx = re.compile(r'^m_([0-9a-f]+)')
            rx_plural = re.compile(r'^m_([0-9a-f]+)_([0-9]+)')
            file_change = False
            for key, value in request.POST.items():
                md5hash = None
                plural_id = None

                if rx_plural.match(key):
                    md5hash = str(rx_plural.match(key).groups()[0])
                    # polib parses .po files into unicode strings, but
                    # doesn't bother to convert plural indexes to int,
                    # so we need unicode here.
                    plural_id = six.text_type(rx_plural.match(key).groups()[1])

                    # Above no longer true as of Polib 1.0.4
                    if plural_id and plural_id.isdigit():
                        plural_id = int(plural_id)

                elif rx.match(key):
                    md5hash = str(rx.match(key).groups()[0])

                if md5hash is not None:
                    entry = rosetta_i18n_pofile.find(md5hash, 'md5hash')
                    # If someone did a makemessage, some entries might
                    # have been removed, so we need to check.
                    if entry:
                        old_msgstr = entry.msgstr
                        if plural_id is not None:
                            plural_string = fix_nls(entry.msgid_plural, value)
                            entry.msgstr_plural[plural_id] = plural_string
                        else:
                            entry.msgstr = fix_nls(entry.msgid, value)

                        is_fuzzy = bool(request.POST.get('f_%s' % md5hash, False))
                        old_fuzzy = 'fuzzy' in entry.flags

                        if old_fuzzy and not is_fuzzy:
                            entry.flags.remove('fuzzy')
                        elif not old_fuzzy and is_fuzzy:
                            entry.flags.append('fuzzy')

                        file_change = True

                        if old_msgstr != value or old_fuzzy != is_fuzzy:
                            entry_changed.send(sender=entry,
                                               user=request.user,
                                               old_msgstr=old_msgstr,
                                               old_fuzzy=old_fuzzy,
                                               pofile=rosetta_i18n_fn,
                                               language_code=rosetta_i18n_lang_code,
                                               )

                    else:
                        storage.set('rosetta_last_save_error', True)

            if file_change and rosetta_i18n_write:
                try:
                    rosetta_i18n_pofile.metadata['Last-Translator'] = unicodedata.normalize('NFKD', u"%s %s <%s>" % (
                        getattr(request.user, 'first_name', 'Anonymous'),
                        getattr(request.user, 'last_name', 'User'),
                        getattr(request.user, 'email', '[email protected]')
                    )).encode('ascii', 'ignore')
                    rosetta_i18n_pofile.metadata['X-Translated-Using'] = u"django-rosetta %s" % rosetta.get_version(False)
                    rosetta_i18n_pofile.metadata['PO-Revision-Date'] = timestamp_with_timezone()
                except UnicodeDecodeError:
                    pass

                try:
                    rosetta_i18n_pofile.save()
                    po_filepath, ext = os.path.splitext(rosetta_i18n_fn)

                    if rosetta_settings.AUTO_COMPILE:
                        save_as_mo_filepath = po_filepath + '.mo'
                        rosetta_i18n_pofile.save_as_mofile(save_as_mo_filepath)

                    post_save.send(sender=None, language_code=rosetta_i18n_lang_code, request=request)
                    # Try auto-reloading via the WSGI daemon mode reload mechanism
                    if rosetta_settings.WSGI_AUTO_RELOAD and \
                        'mod_wsgi.process_group' in request.environ and \
                        request.environ.get('mod_wsgi.process_group', None) and \
                        'SCRIPT_FILENAME' in request.environ and \
                            int(request.environ.get('mod_wsgi.script_reloading', '0')):
                            try:
                                os.utime(request.environ.get('SCRIPT_FILENAME'), None)
                            except OSError:
                                pass
                    # Try auto-reloading via uwsgi daemon reload mechanism
                    if rosetta_settings.UWSGI_AUTO_RELOAD:
                        try:
                            import uwsgi
                            # pretty easy right?
                            uwsgi.reload()
                        except:
                            # we may not be running under uwsgi :P
                            pass

                except Exception as e:
                    messages.error(request, e)
                    storage.set('rosetta_i18n_write', False)
                storage.set('rosetta_i18n_pofile', rosetta_i18n_pofile)

                # Retain query arguments
                query_arg = '?_next=1'
                if _request_request('query', False):
                    query_arg += '&query=%s' % _request_request('query')
                if 'page' in request.GET:
                    query_arg += '&page=%d&_next=1' % int(request.GET.get('page'))
                return HttpResponseRedirect(reverse('rosetta-home') + iri_to_uri(query_arg))
        rosetta_i18n_lang_code = storage.get('rosetta_i18n_lang_code')

        if _request_request('query', False) and _request_request('query', '').strip():
            query = _request_request('query', '').strip()
            rx = re.compile(re.escape(query), re.IGNORECASE)
            paginator = Paginator([e_ for e_ in rosetta_i18n_pofile if not e_.obsolete and rx.search(six.text_type(e_.msgstr) + six.text_type(e_.msgid) + u''.join([o[0] for o in e_.occurrences]))], rosetta_settings.MESSAGES_PER_PAGE)
        else:
            if rosetta_i18n_filter == 'untranslated':
                paginator = Paginator(rosetta_i18n_pofile.untranslated_entries(), rosetta_settings.MESSAGES_PER_PAGE)
            elif rosetta_i18n_filter == 'translated':
                paginator = Paginator(rosetta_i18n_pofile.translated_entries(), rosetta_settings.MESSAGES_PER_PAGE)
            elif rosetta_i18n_filter == 'fuzzy':
                paginator = Paginator([e_ for e_ in rosetta_i18n_pofile.fuzzy_entries() if not e_.obsolete], rosetta_settings.MESSAGES_PER_PAGE)
            else:
                paginator = Paginator([e_ for e_ in rosetta_i18n_pofile if not e_.obsolete], rosetta_settings.MESSAGES_PER_PAGE)

        if rosetta_settings.ENABLE_REFLANG:
            ref_lang = storage.get('rosetta_i18n_ref_lang_code', 'msgid')
            ref_pofile = None
            if ref_lang != 'msgid':
                ref_fn = re.sub('/locale/[a-z]{2}/', '/locale/%s/' % ref_lang, rosetta_i18n_fn)
                try:
                    ref_pofile = pofile(ref_fn)
                except IOError:
                    # there's a syntax error in the PO file and polib can't open it. Let's just
                    # do nothing and thus display msgids.
                    pass

            for o in paginator.object_list:
                # default
                o.ref_txt = o.msgid
                if ref_pofile is not None:
                    ref_entry = ref_pofile.find(o.msgid)
                    if ref_entry is not None and ref_entry.msgstr:
                        o.ref_txt = ref_entry.msgstr
            LANGUAGES = list(settings.LANGUAGES) + [('msgid', 'MSGID')]
        else:
            ref_lang = None
            LANGUAGES = settings.LANGUAGES

        page = 1
        if 'page' in request.GET:
            try:
                get_page = int(request.GET.get('page'))
            except ValueError:
                page = 1  # fall back to page 1
            else:
                if 0 < get_page <= paginator.num_pages:
                    page = get_page

        if '_next' in request.GET or '_next' in request.POST:
            page += 1
            if page > paginator.num_pages:
                page = 1
            query_arg = '?page=%d' % page
            return HttpResponseRedirect(reverse('rosetta-home') + iri_to_uri(query_arg))

        rosetta_messages = paginator.page(page).object_list
        main_language = None
        if rosetta_settings.MAIN_LANGUAGE and rosetta_settings.MAIN_LANGUAGE != rosetta_i18n_lang_code:
            for language in settings.LANGUAGES:
                if language[0] == rosetta_settings.MAIN_LANGUAGE:
                    main_language = _(language[1])
                    break

            fl = ("/%s/" % rosetta_settings.MAIN_LANGUAGE).join(rosetta_i18n_fn.split("/%s/" % rosetta_i18n_lang_code))
            po = pofile(fl)

            for message in rosetta_messages:
                message.main_lang = po.find(message.msgid).msgstr

        needs_pagination = paginator.num_pages > 1
        if needs_pagination:
            if paginator.num_pages >= 10:
                page_range = pagination_range(1, paginator.num_pages, page)
            else:
                page_range = range(1, 1 + paginator.num_pages)
        try:
            ADMIN_MEDIA_PREFIX = settings.ADMIN_MEDIA_PREFIX
            ADMIN_IMAGE_DIR = ADMIN_MEDIA_PREFIX + 'img/admin/'
        except AttributeError:
            ADMIN_MEDIA_PREFIX = settings.STATIC_URL + 'admin/'
            ADMIN_IMAGE_DIR = ADMIN_MEDIA_PREFIX + 'img/'

        if storage.has('rosetta_last_save_error'):
            storage.delete('rosetta_last_save_error')
            rosetta_last_save_error = True
        else:
            rosetta_last_save_error = False

        try:
            rosetta_i18n_lang_name = force_text(_(storage.get('rosetta_i18n_lang_name')))
        except:
            rosetta_i18n_lang_name = force_text(storage.get('rosetta_i18n_lang_name'))

        return render(request, 'rosetta/pofile.html', dict(
            version=rosetta.get_version(True),
            ADMIN_MEDIA_PREFIX=ADMIN_MEDIA_PREFIX,
            ADMIN_IMAGE_DIR=ADMIN_IMAGE_DIR,
            ENABLE_REFLANG=rosetta_settings.ENABLE_REFLANG,
            LANGUAGES=LANGUAGES,
            rosetta_settings=rosetta_settings,
            rosetta_i18n_lang_name=rosetta_i18n_lang_name,
            rosetta_i18n_lang_code=rosetta_i18n_lang_code,
            rosetta_i18n_lang_bidi=rosetta_i18n_lang_bidi,
            rosetta_last_save_error=rosetta_last_save_error,
            rosetta_i18n_filter=rosetta_i18n_filter,
            rosetta_i18n_write=rosetta_i18n_write,
            rosetta_messages=rosetta_messages,
            page_range=needs_pagination and page_range,
            needs_pagination=needs_pagination,
            main_language=main_language,
            rosetta_i18n_app=rosetta_i18n_app,
            page=page,
            query=query,
            paginator=paginator,
            rosetta_i18n_pofile=rosetta_i18n_pofile,
            ref_lang=ref_lang,
        ))
    else:
        return list_languages(request, do_session_warn=True)

Example 2

Project: cpe Source File: cpecomp2_3.py
Function: is_valid_language
    def _is_valid_language(self):
        """
        Return True if the value of component in attribute "language" is valid,
        and otherwise False.

        :returns: True if value is valid, False otherwise
        :rtype: boolean

        CASE 1: Language part with/without region part
        CASE 2: Language part without region part
        CASE 3: Region part with language part
        CASE 4: Region part without language part
        """

        def check_generic_language(self, value):
            """
            Check possible values in language part
            when region part exists or not in language value.

            Possible values of language attribute: a=letter
            | *a
            | *aa
            | aa
            | aaa
            | ?a
            | ?aa
            | ??
            | ??a
            | ???
            """
            lang_pattern = []
            lang_pattern.append("^(\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append("[a-z]{1,2}")
            lang_pattern.append("|\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append("(([a-z][a-z]?)|(\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append("(\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append("|[a-z])?))")
            lang_pattern.append("|([a-z]{2,3}))$")

            lang_rxc = re.compile("".join(lang_pattern))

            return lang_rxc.match(value)

        def check_language_without_region(self, value):
            """
            Check possible values in language part
            when region part not exist in language value.

            Possible values of language attribute: a=letter
            | a?
            | aa?
            | a??
            | a*
            | aa*
            | aaa*
            | *a*
            | *a?
            | ?a*
            | ?a?
            """
            lang_pattern = []
            lang_pattern.append("^([a-z]")
            lang_pattern.append("([a-z](\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append("|\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append("|")
            lang_pattern.append("([a-z]\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append("))")
            lang_pattern.append("|")
            lang_pattern.append("\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append("(\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append(")?")
            lang_pattern.append("|\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append(")|\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append("[a-z](\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append("|\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append(")")
            lang_pattern.append("|\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append("[a-z](\\")
            lang_pattern.append(self.WILDCARD_MULTI)
            lang_pattern.append("|\\")
            lang_pattern.append(self.WILDCARD_ONE)
            lang_pattern.append(")")
            lang_pattern.append(")$")

            lang_rxc = re.compile("".join(lang_pattern))

            return lang_rxc.match(value)

        def check_region_with_language(self, value):
            """
            Check possible values in region part when language part exists.

            Possible values of language attribute: a=letter, 1=digit
            | *
            | a*
            | a?
            | aa
            | ??
            | 1*
            | 1??
            | 11*
            | 11?
            | 111
            | ???
            """
            region_pattern = []
            region_pattern.append("^(")
            region_pattern.append("(\\")
            region_pattern.append(self.WILDCARD_MULTI)
            region_pattern.append(")|((\\")
            region_pattern.append(self.WILDCARD_ONE)
            region_pattern.append("){2,3})|([a-z]([a-z]|\\")
            region_pattern.append(self.WILDCARD_MULTI)
            region_pattern.append("|\\")
            region_pattern.append(self.WILDCARD_ONE)
            region_pattern.append("))|([0-9](\\")
            region_pattern.append(self.WILDCARD_MULTI)
            region_pattern.append("|\\")
            region_pattern.append(self.WILDCARD_ONE)
            region_pattern.append("(\\")
            region_pattern.append(self.WILDCARD_ONE)
            region_pattern.append(")?|[0-9][0-9\\")
            region_pattern.append(self.WILDCARD_MULTI)
            region_pattern.append("\\")
            region_pattern.append(self.WILDCARD_ONE)
            region_pattern.append("])))$")

            region_rxc = re.compile("".join(region_pattern))
            return region_rxc.match(region)

        def check_region_without_language(self, value):
            """
            Check possible values in region part when language part not exist.

            Possible values of language attribute: 1=digit
            | *111
            | *11
            | *1
            """
            region_pattern = []
            region_pattern.append("^(")
            region_pattern.append("(\\")
            region_pattern.append(self.WILDCARD_MULTI)
            region_pattern.append("[0-9])")
            region_pattern.append("([0-9]([0-9])?)?")
            region_pattern.append(")$")

            region_rxc = re.compile("".join(region_pattern))
            return region_rxc.match(region)

        comp_str = self._encoded_value.lower()

        # Value with wildcards; separate language and region of value
        parts = comp_str.split(self.SEPARATOR_LANG)
        language = parts[0]
        region_exists = len(parts) == 2

        # Check the language part
        if check_generic_language(self, language) is not None:
            # Valid language, check region part
            if region_exists:
                # Region part exists; check it
                region = parts[1]
                return (check_region_with_language(self, region) is not None)
            else:
                # Not region part
                return True
        elif check_language_without_region(self, language) is not None:
            # Language without region; region part should not exist
            return not region_exists
        else:
            # Language part not exist; check region part
            region = parts[0]
            return check_region_without_language(self, region) is not None

Example 3

Project: geoinference Source File: geocoder.py
    def geocode_noisy(self, location_name):
        """
        Returns the latitude and lonitude (tuple) of a noisy location name
        (e.g., the location field of a social media user's profile).  If your
        input isn't cleaned, you probably want this method instead of geocode().
        """

        usaRegex = re.compile("\\bUSA\\b")
        usRegex = re.compile("\\bUS\\b")
        ukRegex = re.compile("\\bUK\\b")
        
        name = location_name
        name = name.strip()

        # Correct for a few common noisy prefices
        if name.startswith("the city of "):
            name = name[12:] #.substring("the city of ".length())
        if name.startswith("downtown "):
            name = name[9:] #.substring("downtown ".length())

        # Swap out the three common contry abbrevations
        name = re.sub(usaRegex, "United States", name)
        name = re.sub(usRegex, "United States", name)
        name = re.sub(ukRegex, "United Kingdom", name)

        # Substitute out state names from the US
        matches = re.search(self.state_abbv_regex, name)
        if not matches is None:
            abbv = matches.group(0)
            expanded = name[:matches.start(0)] + self.abbv_to_state[abbv] + name[matches.end(0):]
            #print "%s:: %s -> %s" % (abbv, name, expanded)
            name = expanded

        # Once we've matched abbreivations, lower case for all further
        # comparisons
        name = name.lower();

        if name == "nyc":
            name = "new york, new york"

        # Strip off all the cruft on either side
        name = re.sub(ur'^[\W+]+', " ", name);
        name = re.sub(ur'[\W+]+$', " ", name);
        name = name.strip();

        # Rename the dict for brevity since we're going to referencing it a lot
        # in the next section
        locs = self.lc_name_to_location
        lat_lon = None

#        print "SEACHING %s..." % (name)

        # Look for some name delimeters in the name to try matching on
        # city/state, etc.
        if name.find(',') >= 0 or name.find('-') >= 0 or name.find('|') >= 0:
            parts = re.split(r'[,\-|]+', name)

            if len(parts) == 2:
                p1 = parts[0].strip()
                p2 = parts[1].strip()
                # print "CASE1: (%s) (%s)" % (p1, p2)
                if p1 + '\t' + p2 in locs:
                    lat_lon = locs[p1 + '\t' + p2]
                elif p2 + '\t' + p1 in locs:
                    lat_lon = locs[p2 + '\t' + p1]
                elif p1 in locs:
                    lat_lon = locs[p1]

                if lat_lon is None and p1.find("st.") >= 0:
                    p1 = re.sub("st.", "saint", p1)
                    if p1 + '\t' + p2 in locs:
                        lat_lon = locs[p1 + '\t' + p2]
                    elif p2 + '\t' + p1 in locs:
                        lat_lon = locs[p2 + '\t' + p1]
                    elif p1 in locs:
                        lat_lon = locs[p1]

                elif lat_lon is None and p1.find("saint") >= 0:
                    p1 = re.sub("saint", "st.", p1)
                    if p1 + '\t' + p2 in locs:
                        lat_lon = locs[p1 + '\t' + p2]
                    elif p2 + '\t' + p1 in locs:
                        lat_lon = locs[p2 + '\t' + p1]
                    elif p1 in locs:
                        lat_lon = locs[p1]

            elif len(parts) == 3:
                p1 = parts[0].strip()
                p2 = parts[1].strip()
                p3 = parts[2].strip()
                # print "CASE2: (%s) (%s) (%s)" % (p1, p2, p3)
                if p1 + '\t' + p2 in locs:
                    lat_lon = locs[p1 + '\t' + p2]
                elif p1 + '\t' + p3 in locs:
                    lat_lon = locs[p1 + '\t' + p3]
                elif p1 in locs:
                    lat_lon = locs[p1]

                if lat_lon is None and p1.find("st.") >= 0:
                    p1 = re.sub("st.", "saint", p1)
                    if p1 + '\t' + p2 in locs:
                        lat_lon = locs[p1 + '\t' + p2]
                    elif p1 + '\t' + p3 in locs:
                        lat_lon = locs[p1 + '\t' + p3]
                    elif p1 in locs:
                        lat_lon = locs[p1]
                if lat_lon is None and p1.find("saint") >= 0:
                    p1 = re.sub("saint", "st.", p1)
                    if p1 + '\t' + p2 in locs:
                        lat_lon = locs[p1 + '\t' + p2]
                    elif p1 + '\t' + p3 in locs:
                        lat_lon = locs[p1 + '\t' + p3]
                    elif p1 in locs:
                        lat_lon = locs[p1]

            else:
                pass #print "CASE5: %s" % (parts)            

        # Otherwise no delimeters so we're left to guess at where the name
        # breaks
        else:
            parts = re.split(r'[ \t\n\r]+', name)
            if len(parts) == 2:
                p1 = parts[0]
                p2 = parts[1]
                #print "CASE3: (%s) (%s)" % (p1, p2)
                if p1 + '\t' + p2 in locs:
                    lat_lon = locs[p1 + '\t' + p2]
                elif p2 + '\t' + p1 in locs:
                    lat_lon = locs[p2 + '\t' + p1]
                elif p1 in locs:
                    lat_lon = locs[p1]
                
                if lat_lon is None and p1.find("st.") >= 0:
                    p1 = re.sub("st.", "saint", p1)
                    if p1 + '\t' + p2 in locs:
                        lat_lon = locs[p1 + '\t' + p2]
                    elif p2 + '\t' + p1 in locs:
                        lat_lon = locs[p2 + '\t' + p1]
                    elif p1 in locs:
                        lat_lon = locs[p1]

                elif lat_lon is None and p1.find("saint") >= 0:
                    p1 = re.sub("saint", "st.", p1)
                    if p1 + '\t' + p2 in locs:
                        lat_lon = locs[p1 + '\t' + p2]
                    elif p2 + '\t' + p1 in locs:
                        lat_lon = locs[p2 + '\t' + p1]
                    elif p1 in locs:
                        lat_lon = locs[p1]


            elif len(parts) > 2:
                # Guess that the last name is a country/state and try
                # city/<whatever>
                #print "CASE4: %s" % (parts)                
                last = parts[-1]
                city = ' '.join(parts[:-1])
            else:
                pass #print "CASE6: %s" % (parts)

        # Last ditch effort: just try matching the whole name and hope it's
        # a single unambiguous city match
        if lat_lon is None and name in locs:
            lat_lon = locs[name]                              

        #print "FOUND? %s ('%s') -> %s" % (location_name, name, lat_lon)

            

        return lat_lon

Example 4

Project: lair-drones-version1-deprecated Source File: nexpose.py
def parse(project, nexpose_file, include_informational=False):
    """Parses a Nexpose XMLv2 file and updates the Lair database

    :param project: The project id
    :param nexpose_file: The Nexpose xml file to be parsed
    :include_informational: Whether to include info findings in data. Default False
    """

    cve_pattern = re.compile(r'(CVE-|CAN-)')
    html_tag_pattern = re.compile(r'<.*?>')
    white_space_pattern = re.compile(r'\s+', re.MULTILINE)

    # Used to create unique notes in DB
    note_id = 1

    tree = et.parse(nexpose_file)
    root = tree.getroot()
    if root is None or \
            root.tag != "NexposeReport" or \
            root.attrib['version'] != "2.0":
        raise IncompatibleDataVersionError("Nexpose XML 2.0")

    # Create the project dictionary which acts as foundation of docuement
    project_dict = dict(models.project_model)
    project_dict['commands'] = list()
    project_dict['vulnerabilities'] = list()
    project_dict['project_id'] = project
    project_dict['commands'].append({'tool': TOOL, 'command': 'scan'})

    # Used to maintain a running list of host:port vulnerabilities by plugin
    vuln_host_map = dict()

    for vuln in root.iter('vulnerability'):
        v = copy.deepcopy(models.vulnerability_model)
        v['cves'] = list()
        v['plugin_ids'] = list()
        v['identified_by'] = list()
        v['hosts'] = list()

        v['cvss'] = float(vuln.attrib['cvssScore'])
        v['title'] = vuln.attrib['title']
        plugin_id = vuln.attrib['id'].lower()

        # Set plugin id
        plugin_dict = dict(models.plugin_id_model)
        plugin_dict['tool'] = TOOL
        plugin_dict['id'] = plugin_id
        v['plugin_ids'].append(plugin_dict)

        # Set identified by information
        identified_dict = dict(models.identified_by_model)
        identified_dict['tool'] = TOOL
        identified_dict['id'] = plugin_id
        v['identified_by'].append(identified_dict)

        # Search for exploits
        for exploit in vuln.iter('exploit'):
            v['flag'] = True
            note_dict = copy.deepcopy(models.note_model)
            note_dict['title'] = "{0} ({1})".format(
                exploit.attrib['type'],
                exploit.attrib['id']
            )
            note_dict['content'] = "{0}\n{1}".format(
                exploit.attrib['title'].encode('ascii', 'replace'),
                exploit.attrib['link'].encode('ascii', 'replace')
            )
            note_dict['last_modified_by'] = TOOL
            v['notes'].append(note_dict)

        # Search for CVE references
        for reference in vuln.iter('reference'):
            if reference.attrib['source'] == 'CVE':
                cve = cve_pattern.sub('', reference.text)
                v['cves'].append(cve)

        # Search for solution
        solution = vuln.find('solution')
        if solution is not None:
            for text in solution.itertext():
                s = text.encode('ascii', 'replace').strip()
                v['solution'] += white_space_pattern.sub(" ", s)

        # Search for description
        description = vuln.find('description')
        if description is not None:
            for text in description.itertext():
                s = text.encode('ascii', 'replace').strip()
                v['description'] += white_space_pattern.sub(" ", s)

        # Build mapping of plugin-id to host to vuln dictionary
        vuln_host_map[plugin_id] = dict()
        vuln_host_map[plugin_id]['vuln'] = v
        vuln_host_map[plugin_id]['hosts'] = set()

    for node in root.iter('node'):

        host_dict = dict(models.host_model)
        host_dict['os'] = list()
        host_dict['ports'] = list()
        host_dict['hostnames'] = list()

        # Set host status
        if node.attrib['status'] != 'alive':
            host_dict['alive'] = False

        # Set IP address
        host_dict['string_addr'] = node.attrib['address']
        host_dict['long_addr'] = helper.ip2long(node.attrib['address'])

        # Set the OS fingerprint
        certainty = 0
        for os in node.iter('os'):
            if float(os.attrib['certainty']) > certainty:
                certainty = float(os.attrib['certainty'])
                os_dict = dict(models.os_model)
                os_dict['tool'] = TOOL
                os_dict['weight'] = OS_WEIGHT

                fingerprint = ''
                if 'vendor' in os.attrib:
                    fingerprint += os.attrib['vendor'] + " "

                # Make an extra check to limit duplication of data in the
                # event that the product name was already in the vendor name
                if 'product' in os.attrib and \
                        os.attrib['product'] not in fingerprint:
                    fingerprint += os.attrib['product'] + " "

                fingerprint = fingerprint.strip()
                os_dict['fingerprint'] = fingerprint

                host_dict['os'] = list()
                host_dict['os'].append(os_dict)

        # Test for general, non-port related vulnerabilities
        # Add them as tcp, port 0
        tests = node.find('tests')
        if tests is not None:
            port_dict = dict(models.port_model)
            port_dict['service'] = "general"

            for test in tests.findall('test'):
                # vulnerable-since attribute is used to flag
                # confirmed vulns
                if 'vulnerable-since' in test.attrib:
                    plugin_id = test.attrib['id'].lower()

                    # This is used to track evidence for the host/port
                    # and plugin
                    h = "{0}:{1}:{2}".format(
                        host_dict['string_addr'],
                        "0",
                        models.PROTOCOL_TCP
                    )
                    vuln_host_map[plugin_id]['hosts'].add(h)

            host_dict['ports'].append(port_dict)

        # Use the endpoint elements to populate port data
        for endpoint in node.iter('endpoint'):
            port_dict = copy.deepcopy(models.port_model)
            port_dict['port'] = int(endpoint.attrib['port'])
            port_dict['protocol'] = endpoint.attrib['protocol']
            if endpoint.attrib['status'] != 'open':
                port_dict['alive'] = False

            # Use the service elements to identify service
            for service in endpoint.iter('service'):

                # Ignore unknown services
                if 'unknown' not in service.attrib['name'].lower():
                    if not port_dict['service']:
                        port_dict['service'] = service.attrib['name'].lower()

                # Use the test elements to identify vulnerabilities for
                # the host
                for test in service.iter('test'):
                    # vulnerable-since attribute is used to flag
                    # confirmed vulns
                    if 'vulnerable-since' in test.attrib:
                        plugin_id = test.attrib['id'].lower()

                        # Add service notes for evidence
                        note_dict = copy.deepcopy(models.note_model)
                        note_dict['title'] = "{0} (ID{1})".format(plugin_id,
                                                              str(note_id))
                        for evidence in test.iter():
                            if evidence.text:
                                for line in evidence.text.split("\n"):
                                    line = line.strip()
                                    if line:
                                        note_dict['content'] += "    " + \
                                                                line + "\n"
                            elif evidence.tag == "URLLink":
                                note_dict['content'] += "    "
                                note_dict['content'] += evidence.attrib[
                                                            'LinkURL'
                                                        ] + "\n"

                        note_dict['last_modified_by'] = TOOL
                        port_dict['notes'].append(note_dict)
                        note_id += 1

                        # This is used to track evidence for the host/port
                        # and plugin
                        h = "{0}:{1}:{2}".format(
                            host_dict['string_addr'],
                            str(port_dict['port']),
                            port_dict['protocol']
                        )
                        vuln_host_map[plugin_id]['hosts'].add(h)

            # Use the fingerprint elements to identify product
            certainty = 0
            for fingerprint in endpoint.iter('fingerprint'):
                if float(fingerprint.attrib['certainty']) > certainty:
                    certainty = float(fingerprint.attrib['certainty'])
                    prod = ''
                    if 'vendor' in fingerprint.attrib:
                        prod += fingerprint.attrib['vendor'] + " "

                    if 'product' in fingerprint.attrib:
                        prod += fingerprint.attrib['product'] + " "

                    if 'version' in fingerprint.attrib:
                        prod += fingerprint.attrib['version'] + " "

                    prod = prod.strip()
                    port_dict['product'] = prod

            host_dict['ports'].append(port_dict)

        project_dict['hosts'].append(host_dict)

    # This code block uses the plugin/host/vuln mapping to associate
    # all vulnerable hosts to their vulnerability data within the
    # context of the expected Lair schema structure.
    for plugin_id, data in vuln_host_map.items():

        # Build list of host and ports affected by vulnerability and
        # assign that list to the vulnerability model
        for key in data['hosts']:
            (string_addr, port, protocol) = key.split(':')

            host_key_dict = dict(models.host_key_model)
            host_key_dict['string_addr'] = string_addr
            host_key_dict['port'] = int(port)
            host_key_dict['protocol'] = protocol
            data['vuln']['hosts'].append(host_key_dict)

        # By default, don't include informational findings unless
        # explicitly told to do so.
        if data['vuln']['cvss'] == 0 and not include_informational:
            continue

        project_dict['vulnerabilities'].append(data['vuln'])

    return project_dict

Example 5

Project: gramps Source File: importgeneweb.py
    def parse_person(self,fields,idx,gender,father_surname):

        if not father_surname:
            if not idx < len(fields):
                LOG.warning("Missing surname of person in line %d!" % self.lineno)
                surname =""
            else:
                surname = self.decode(fields[idx])
            idx += 1
        else:
            surname = father_surname

        if not idx < len(fields):
            LOG.warning("Missing firstname of person in line %d!" % self.lineno)
            firstname = ""
        else:
            firstname = self.decode(fields[idx])
        idx += 1
        if idx < len(fields) and father_surname:
            noSurnameRe = re.compile("^[({\[~><?0-9#].*$")
            if not noSurnameRe.match(fields[idx]):
                surname = self.decode(fields[idx])
                idx += 1

        LOG.debug("Person: %s %s" % (firstname, surname))
        person = self.get_or_create_person(firstname,surname)
        name = Name()
        name.set_type( NameType(NameType.BIRTH))
        name.set_first_name(firstname)
        surname_obj = name.get_primary_surname()
        surname_obj.set_surname(surname)
        person.set_primary_name(name)
        if person.get_gender() == Person.UNKNOWN and gender is not None:
            person.set_gender(gender)
        self.db.commit_person(person,self.trans)
        personDataRe = re.compile("^[kmes0-9<>~#\[({!].*$")
        dateRe = re.compile("^[kmes0-9~<>?]+.*$")

        source = None
        birth_parsed = False
        birth_date = None
        birth_place = None
        birth_source = None

        bapt_date = None
        bapt_place = None
        bapt_source = None

        death_date = None
        death_place = None
        death_source = None
        death_cause = None

        crem_date = None
        bur_date = None
        bur_place = None
        bur_source = None

        public_name = None
        firstname_aliases = []
        nick_names = []
        name_aliases = []
        surname_aliases = []

        while idx < len(fields) and personDataRe.match(fields[idx]):
            field = fields[idx]
            idx += 1
            if field.startswith('('):
                LOG.debug("Public Name: %s" % field)
                public_name = self.decode(field[1:-1])
            elif field.startswith('{'):
                LOG.debug("Firstsname Alias: %s" % field)
                firstname_aliases.append(self.decode(field[1:-1]))
            elif field.startswith('['):
                LOG.debug("Title: %s" % field)
                titleparts = self.decode(field[1:-1]).split(":")
                tname = ttitle = tplace = tstart = tend = tnth = None
                try:
                    tname =  titleparts[0]
                    ttitle = titleparts[1]
                    if titleparts[2]:
                        tplace = self.get_or_create_place(titleparts[2])
                    tstart = self.parse_date(titleparts[3])
                    tend =   self.parse_date(titleparts[4])
                    tnth =   titleparts[5]
                except IndexError:  # not all parts are written all the time
                    pass
                if tnth:    # Append title numer to title
                    ttitle += ", " + tnth
                title = self.create_event(
                           EventType.NOB_TITLE, ttitle, tstart, tplace)
                # TODO: Geneweb has a start date and an end date, and therefore
                # supports stuff like: FROM about 1955 TO between 1998 and 1999
                # gramps only supports one single date or range.
                if tname and tname != "*":
                    n = Note()
                    n.set(tname)
                    self.db.add_note(n,self.trans)
                    title.add_note( n.handle)
                title_ref = EventRef()
                title_ref.set_reference_handle(title.get_handle())
                person.add_event_ref(title_ref)
            elif field == '#nick' and idx < len(fields):
                LOG.debug("Nick Name: %s" % fields[idx])
                nick_names.append(self.decode(fields[idx]))
                idx += 1
            elif field == '#occu' and idx < len(fields):
                LOG.debug("Occupation: %s" % fields[idx])
                occu = self.create_event(
                        EventType.OCCUPATION, self.decode(fields[idx]))
                occu_ref = EventRef()
                occu_ref.set_reference_handle(occu.get_handle())
                person.add_event_ref(occu_ref)
                idx += 1
            elif field == '#alias' and idx < len(fields):
                LOG.debug("Name Alias: %s" % fields[idx])
                name_aliases.append(self.decode(fields[idx]))
                idx += 1
            elif field == '#salias' and idx < len(fields):
                LOG.debug("Surname Alias: %s" % fields[idx])
                surname_aliases.append(self.decode(fields[idx]))
                idx += 1
            elif field == '#image' and idx < len(fields):
                LOG.debug("Image: %s" % fields[idx])
                idx += 1
            elif field == '#src' and idx < len(fields):
                LOG.debug("Source: %s" % fields[idx])
                source = self.get_or_create_source(self.decode(fields[idx]))
                idx += 1
            elif field == '#bs' and idx < len(fields):
                LOG.debug("Birth Source: %s" % fields[idx])
                birth_source = self.get_or_create_source(self.decode(fields[idx]))
                idx += 1
            elif field[0] == '!':
                LOG.debug("Baptize at: %s" % field[1:])
                bapt_date = self.parse_date(self.decode(field[1:]))
            elif field == '#bp' and idx < len(fields):
                LOG.debug("Birth Place: %s" % fields[idx])
                birth_place = self.get_or_create_place(self.decode(fields[idx]))
                idx += 1
            elif field == '#pp' and idx < len(fields):
                LOG.debug("Baptize Place: %s" % fields[idx])
                bapt_place = self.get_or_create_place(self.decode(fields[idx]))
                idx += 1
            elif field == '#ps' and idx < len(fields):
                LOG.debug("Baptize Source: %s" % fields[idx])
                bapt_source = self.get_or_create_source(self.decode(fields[idx]))
                idx += 1
            elif field == '#dp' and idx < len(fields):
                LOG.debug("Death Place: %s" % fields[idx])
                death_place = self.get_or_create_place(self.decode(fields[idx]))
                idx += 1
            elif field == '#ds' and idx < len(fields):
                LOG.debug("Death Source: %s" % fields[idx])
                death_source = self.get_or_create_source(self.decode(fields[idx]))
                idx += 1
            elif field == '#buri' and idx < len(fields):
                if fields[idx][0]!='#': # bug in GeneWeb: empty #buri fields
                    LOG.debug("Burial Date: %s" % fields[idx])
                    bur_date = self.parse_date(self.decode(fields[idx]))
                    idx += 1
            elif field == '#crem' and idx < len(fields):
                LOG.debug("Cremention Date: %s" % fields[idx])
                crem_date = self.parse_date(self.decode(fields[idx]))
                idx += 1
            elif field == '#rp' and idx < len(fields):
                LOG.debug("Burial Place: %s" % fields[idx])
                bur_place = self.get_or_create_place(self.decode(fields[idx]))
                idx += 1
            elif field == '#rs' and idx < len(fields):
                LOG.debug("Burial Source: %s" % fields[idx])
                bur_source = self.get_or_create_source(self.decode(fields[idx]))
                idx += 1
            elif field == '#apubl':
                LOG.debug("This is a public record")
            elif field == '#apriv':
                LOG.debug("This is a private record")
                person.set_privacy(True)
            elif field == '#h':
                LOG.debug("This is a restricted record")
                #TODO: Gramps does currently not feature this level
                person.set_privacy(True)
            elif dateRe.match(field):
                if not birth_parsed:
                    LOG.debug("Birth Date: %s" % field)
                    birth_date = self.parse_date(self.decode(field))
                    birth_parsed = True
                else:
                    LOG.debug("Death Date: %s" % field)
                    death_date = self.parse_date(self.decode(field))
                    if field == "mj":
                        death_cause = "Died joung"
                    elif field.startswith("k"):
                        death_cause = "Killed"
                    elif field.startswith("m"):
                        death_cause = "Murdered"
                    elif field.startswith("e"):
                        death_cause = "Executed"
                    elif field.startswith("d"):
                        death_cause = "Disappeared"
                    #TODO: Set special death types more properly
            else:
                LOG.warning(("parse_person(): Unknown field " +
                          "'%s' for person in line %d!") % (field, self.lineno))

        if public_name:
            name = person.get_primary_name()
            name.set_type(NameType(NameType.BIRTH))
            person.add_alternate_name(name)
            name = Name()
            name.set_type(NameType(NameType.AKA))
            name.set_first_name(public_name)
            surname_obj = name.get_primary_surname()
            surname_obj.set_surname(surname)
            person.set_primary_name(name)

        for aka in nick_names:
            name = Attribute()
            name.set_type(AttributeType(AttributeType.NICKNAME))
            name.set_value(aka)
            person.add_attribute(name)

        for aka in firstname_aliases:
            name = Name()
            name.set_type(NameType(NameType.AKA))
            name.set_first_name(aka)
            surname_obj = name.get_primary_surname()
            surname_obj.set_surname(surname)
            person.add_alternate_name(name)

        for aka in name_aliases:
            name = Name()
            name.set_type(NameType(NameType.AKA))
            name.set_first_name(aka)
            surname_obj = name.get_primary_surname()
            surname_obj.set_surname(surname)
            person.add_alternate_name(name)

        for aka in surname_aliases:
            name = Name()
            name.set_type(NameType(NameType.AKA))
            if public_name:
                name.set_first_name(public_name)
            else:
                name.set_first_name(firstname)
            surname_obj = name.get_primary_surname()
            surname_obj.set_surname(aka)
            person.add_alternate_name(name)

        if source:
            person.add_citation(source.get_handle())

        if birth_date or birth_place or birth_source:
            birth = self.create_event(EventType.BIRTH, None, birth_date, birth_place, birth_source)
            birth_ref = EventRef()
            birth_ref.set_reference_handle( birth.get_handle())
            person.set_birth_ref( birth_ref)

        if bapt_date or bapt_place or bapt_source:
            babt = self.create_event(EventType.BAPTISM, None, bapt_date, bapt_place, bapt_source)
            babt_ref = EventRef()
            babt_ref.set_reference_handle( babt.get_handle())
            person.add_event_ref( babt_ref)

        if death_date or death_place or death_source or death_cause:
            death = self.create_event(EventType.DEATH, None, death_date, death_place, death_source)
            if death_cause:
                death.set_description(death_cause)
                self.db.commit_event(death,self.trans)
            death_ref = EventRef()
            death_ref.set_reference_handle( death.get_handle())
            person.set_death_ref( death_ref)

        if bur_date:
            bur = self.create_event(EventType.BURIAL, None, bur_date, bur_place, bur_source)
            bur_ref = EventRef()
            bur_ref.set_reference_handle( bur.get_handle())
            person.add_event_ref( bur_ref)

        if crem_date:
            crem = self.create_event(EventType.CREMATION, None, crem_date, bur_place, bur_source)
            crem_ref = EventRef()
            crem_ref.set_reference_handle( crem.get_handle())
            person.add_event_ref(crem_ref)

        self.db.commit_person(person,self.trans)

        return (idx,person)

Example 6

Project: uberwriter Source File: UberwriterInlinePreview.py
    def populate_popup(self, editor, menu, data=None):
        # popover = Gtk.Popover.new(editor)
        # pop_cont = Gtk.Container.new()
        # popover.add(pop_cont)
        # popover.show_all()

        item = Gtk.MenuItem.new()
        item.set_name("PreviewMenuItem")
        separator = Gtk.SeparatorMenuItem.new()

        # table_item = Gtk.MenuItem.new()
        # table_item.set_label('Fix that table')

        # table_item.connect('activate', self.fix_table)
        # table_item.show()
        # menu.prepend(table_item)
        # menu.show()

        start_iter = self.TextBuffer.get_iter_at_mark(self.ClickMark)
        # Line offset of click mark
        line_offset = start_iter.get_line_offset()
        end_iter = start_iter.copy()
        start_iter.set_line_offset(0)
        end_iter.forward_to_line_end()

        text = self.TextBuffer.get_text(start_iter, end_iter, False)

        math = MarkupBuffer.regex["MATH"]
        link = MarkupBuffer.regex["LINK"]

        footnote = re.compile('\[\^([^\s]+?)\]')
        image = re.compile("!\[(.+?)\]\((.+?)\)")

        buf = self.TextBuffer
        context_offset = 0

        matchlist = []

        found_match = False

        matches = re.finditer(math, text)
        for match in matches:
            logger.debug(match.group(1))
            if match.start() < line_offset and match.end() > line_offset:
                success, result = self.LatexConverter.generatepng(match.group(1))
                if success:
                    image = Gtk.Image.new_from_file(result)
                    image.show()
                    logger.debug("logging image")
                    # item.add(image)
                    self.open_popover_with_widget(image)
                else:
                    label = Gtk.Label()
                    msg = 'Formula looks incorrect:\n' + result
                    label.set_alignment(0.0, 0.5)
                    label.set_text(msg)
                    label.show()
                    item.add(label)
                item.show()
                menu.prepend(separator)
                separator.show()
                menu.prepend(item)
                menu.show()
                found_match = True
                break

        if not found_match:
            # Links
            matches = re.finditer(link, text)
            for match in matches:
                if match.start() < line_offset and match.end() > line_offset:
                    text = text[text.find("http://"):-1]

                    item.connect("activate", lambda w: webbrowser.open(text))

                    logger.debug(text)

                    statusitem = Gtk.MenuItem.new()
                    statusitem.show()

                    spinner = Gtk.Spinner.new()
                    spinner.start()
                    statusitem.add(spinner)
                    spinner.show()
                    
                    thread = threading.Thread(target=check_url, 
                        args=(text, statusitem, spinner))
                    thread.start()

                    webphoto_item = Gtk.MenuItem.new()
                    webphoto_item.show()
                    spinner_2 = Gtk.Spinner.new()
                    spinner_2.start()
                    webphoto_item.add(spinner_2)
                    spinner_2.show()

                    thread_image = threading.Thread(target=get_web_thumbnail, 
                        args=(text, webphoto_item, spinner_2))

                    thread_image.start()

                    item.set_label(_("Open Link in Webbrowser"))
                    item.show()
    
                    menu.prepend(separator)
                    separator.show()

                    menu.prepend(webphoto_item)
                    menu.prepend(statusitem)
                    menu.prepend(item)
                    menu.show()


                    found_match = True
                    break

        if not found_match:
            matches = re.finditer(image, text)
            for match in matches:
                if match.start() < line_offset and match.end() > line_offset:
                    path = match.group(2)
                    if path.startswith("file://"):
                        path = path[7:]
                    logger.info(path)
                    pb = GdkPixbuf.Pixbuf.new_from_file_at_size(path, 400, 300)
                    image = Gtk.Image.new_from_pixbuf(pb)
                    image.show()
                    self.open_popover_with_widget(image)
                    item.set_property('width-request', 50)

                    # item.add(image)
                    # item.set_property('width-request', 50)
                    # item.show()
                    # menu.prepend(separator)
                    # separator.show()
                    # menu.prepend(item)
                    # menu.show()
                    found_match = True
                    break

        if not found_match:
            matches = re.finditer(footnote, text)
            for match in matches:
                if match.start() < line_offset and match.end() > line_offset:
                    logger.debug(match.group(1))
                    footnote_match = re.compile("\[\^" + match.group(1) + "\]: (.+(?:\n|\Z)(?:^[\t].+(?:\n|\Z))*)", re.MULTILINE)
                    replace = re.compile("^\t", re.MULTILINE)
                    start, end = self.TextBuffer.get_bounds()
                    fn_match = re.search(footnote_match, self.TextBuffer.get_text(start, end, False))
                    label = Gtk.Label()
                    label.set_alignment(0.0, 0.5)
                    logger.debug(fn_match)
                    if fn_match:
                        result = re.sub(replace, "", fn_match.group(1))
                        if result.endswith("\n"):
                            result = result[:-1]
                    else:
                        result = _("No matching footnote found")
                    label.set_max_width_chars(40)
                    label.set_line_wrap(True)
                    label.set_text(result)
                    label.show()
                    item.add(label)
                    item.show()

                    menu.prepend(separator)
                    separator.show()
                    menu.prepend(item)
                    menu.show()
                    found_match = True
                    break

        if not found_match:
            start_iter = self.TextBuffer.get_iter_at_mark(self.ClickMark)
            start_iter.backward_word_start()
            end_iter = start_iter.copy()
            end_iter.forward_word_end()
            word = self.TextBuffer.get_text(start_iter, end_iter, False)
            terms = get_dictionary(word)
            if terms:
                sc = Gtk.ScrolledWindow.new()
                sc.add(fill_lexikon_bubble(word, terms))
                sc.props.width_request = 500
                sc.props.height_request = 400
                sc.show_all()
                self.open_popover_with_widget(sc)

        return

Example 7

Project: Arelle Source File: DTS.py
def checkFilingDTS(val, modelDocuement, visited):
    global targetNamespaceDatePattern, efmFilenamePattern, roleTypePattern, arcroleTypePattern, \
            arcroleDefinitionPattern, namePattern, linkroleDefinitionBalanceIncomeSheet, \
            namespacesConflictPattern
    if targetNamespaceDatePattern is None:
        targetNamespaceDatePattern = re.compile(r"/([12][0-9]{3})-([01][0-9])-([0-3][0-9])|"
                                            r"/([12][0-9]{3})([01][0-9])([0-3][0-9])|")
        efmFilenamePattern = re.compile(r"^[a-z0-9][a-zA-Z0-9_\.\-]*(\.xsd|\.xml)$")
        roleTypePattern = re.compile(r"^.*/role/[^/\s]+$")
        arcroleTypePattern = re.compile(r"^.*/arcrole/[^/\s]+$")
        arcroleDefinitionPattern = re.compile(r"^.*[^\\s]+.*$")  # at least one non-whitespace character
        namePattern = re.compile("[][()*+?\\\\/^{}|@#%^=~`\"';:,<>&$\u00a3\u20ac]") # u20ac=Euro, u00a3=pound sterling 
        linkroleDefinitionBalanceIncomeSheet = re.compile(r"[^-]+-\s+Statement\s+-\s+.*(income|balance|financial\W+position)",
                                                          re.IGNORECASE)
        namespacesConflictPattern = re.compile(r"http://(xbrl\.us|fasb\.org|xbrl\.sec\.gov)/(dei|us-types|us-roles|rr)/([0-9]{4}-[0-9]{2}-[0-9]{2})$")
        
    visited.append(modelDocuement)
    for referencedDocuement, modelDocuementReference in modelDocuement.referencesDocuement.items():
        #6.07.01 no includes
        if modelDocuementReference.referenceType == "include":
            val.modelXbrl.error("SBR.NL.2.2.0.18",
                _("Taxonomy schema %(schema)s includes %(include)s, only import is allowed"),
                modelObject=modelDocuementReference.referringModelObject,
                    schema=os.path.basename(modelDocuement.uri), 
                    include=os.path.basename(referencedDocuement.uri))
        if referencedDocuement not in visited:
            checkFilingDTS(val, referencedDocuement, visited)
            
    if val.disclosureSystem.standardTaxonomiesDict is None:
        pass

    if (modelDocuement.type == ModelDocuement.Type.SCHEMA and 
        modelDocuement.targetNamespace not in val.disclosureSystem.baseTaxonomyNamespaces and
        modelDocuement.uri.startswith(val.modelXbrl.uriDir)):
        
        # check schema contents types
        definesLinkroles = False
        definesArcroles = False
        definesLinkParts = False
        definesAbstractItems = False
        definesNonabstractItems = False
        definesConcepts = False
        definesTuples = False
        definesPresentationTuples = False
        definesSpecificationTuples = False
        definesTypes = False
        definesEnumerations = False
        definesDimensions = False
        definesDomains = False
        definesHypercubes = False
                
        genrlSpeclRelSet = val.modelXbrl.relationshipSet(XbrlConst.generalSpecial)
        for modelConcept in modelDocuement.xmlRootElement.iterdescendants(tag="{http://www.w3.org/2001/XMLSchema}element"):
            if isinstance(modelConcept,ModelConcept):
                # 6.7.16 name not duplicated in standard taxonomies
                name = modelConcept.get("name")
                if name is None: 
                    name = ""
                    if modelConcept.get("ref") is not None:
                        continue    # don't validate ref's here
                for c in val.modelXbrl.nameConcepts.get(name, []):
                    if c.modelDocuement != modelDocuement:
                        if not (genrlSpeclRelSet.isRelated(modelConcept, "child", c) or genrlSpeclRelSet.isRelated(c, "child", modelConcept)):
                            val.modelXbrl.error("SBR.NL.2.2.2.02",
                                _("Concept %(concept)s is also defined in standard taxonomy schema %(standardSchema)s without a general-special relationship"),
                                modelObject=c, concept=modelConcept.qname, standardSchema=os.path.basename(c.modelDocuement.uri))
                ''' removed RH 2011-12-23 corresponding set up of table in ValidateFiling
                if val.validateSBRNL and name in val.nameWordsTable:
                    if not any( any( genrlSpeclRelSet.isRelated(c, "child", modelConcept)
                                     for c in val.modelXbrl.nameConcepts.get(partialWordName, []))
                                for partialWordName in val.nameWordsTable[name]):
                        val.modelXbrl.error("SBR.NL.2.3.2.01",
                            _("Concept %(specialName)s is appears to be missing a general-special relationship to %(generalNames)s"),
                            modelObject=c, specialName=modelConcept.qname, generalNames=', or to '.join(val.nameWordsTable[name]))
                '''

                if modelConcept.isTuple:
                    if modelConcept.substitutionGroupQname.localName == "presentationTuple" and modelConcept.substitutionGroupQname.namespaceURI.endswith("/basis/sbr/xbrl/xbrl-syntax-extension"): # namespace may change each year
                        definesPresentationTuples = True
                    elif modelConcept.substitutionGroupQname.localName == "specificationTuple" and modelConcept.substitutionGroupQname.namespaceURI.endswith("/basis/sbr/xbrl/xbrl-syntax-extension"): # namespace may change each year
                        definesSpecificationTuples = True
                    else:
                        definesTuples = True
                    definesConcepts = True
                    if modelConcept.isAbstract:
                        val.modelXbrl.error("SBR.NL.2.2.2.03",
                            _("Concept %(concept)s is an abstract tuple"),
                            modelObject=modelConcept, concept=modelConcept.qname)
                    if tupleCycle(val,modelConcept):
                        val.modelXbrl.error("SBR.NL.2.2.2.07",
                            _("Tuple %(concept)s has a tuple cycle"),
                            modelObject=modelConcept, concept=modelConcept.qname)
                    if modelConcept.get("nillable") != "false" and modelConcept.isRoot:
                        val.modelXbrl.error("SBR.NL.2.2.2.17", #don't want default, just what was really there
                            _("Tuple %(concept)s must have nillable='false'"),
                            modelObject=modelConcept, concept=modelConcept.qname)
                elif modelConcept.isItem:
                    definesConcepts = True
                if modelConcept.abstract == "true":
                    if modelConcept.isRoot:
                        if modelConcept.get("nillable") != "false": #don't want default, just what was really there
                            val.modelXbrl.error("SBR.NL.2.2.2.16",
                                _("Abstract root concept %(concept)s must have nillable='false'"),
                            modelObject=modelConcept, concept=modelConcept.qname)
                        if modelConcept.typeQname != XbrlConst.qnXbrliStringItemType:
                            val.modelXbrl.error("SBR.NL.2.2.2.21",
                                _("Abstract root concept %(concept)s must have type='xbrli:stringItemType'"),
                            modelObject=modelConcept, concept=modelConcept.qname)
                    if modelConcept.balance:
                        val.modelXbrl.error("SBR.NL.2.2.2.22",
                            _("Abstract concept %(concept)s must not have a balance attribute"),
                            modelObject=modelConcept, concept=modelConcept.qname)
                    if modelConcept.isHypercubeItem:
                        definesHypercubes = True
                    elif modelConcept.isDimensionItem:
                        definesDimensions = True
                    elif modelConcept.substitutionGroupQname and modelConcept.substitutionGroupQname.localName in ("domainItem","domainMemberItem"):
                        definesDomains = True
                    elif modelConcept.isItem:
                        definesAbstractItems = True
                else:   # not abstract
                    if modelConcept.isItem:
                        definesNonabstractItems = True
                        if not (modelConcept.label(preferredLabel=XbrlConst.docuementationLabel,fallbackToQname=False,lang="nl") or
                                val.modelXbrl.relationshipSet(XbrlConst.conceptReference).fromModelObject(c) or
                                modelConcept.genLabel(role=XbrlConst.genDocuementationLabel,lang="nl") or
                                val.modelXbrl.relationshipSet(XbrlConst.elementReference).fromModelObject(c)):
                            val.modelXbrl.error("SBR.NL.2.2.2.28",
                                _("Concept %(concept)s must have a docuementation label or reference"),
                                modelObject=modelConcept, concept=modelConcept.qname)
                if modelConcept.balance and not modelConcept.instanceOfType(XbrlConst.qnXbrliMonetaryItemType):
                    val.modelXbrl.error("SBR.NL.2.2.2.24",
                        _("Non-monetary concept %(concept)s must not have a balance attribute"),
                        modelObject=modelConcept, concept=modelConcept.qname)
                if modelConcept.isLinkPart:
                    definesLinkParts = True
                    val.modelXbrl.error("SBR.NL.2.2.5.01",
                        _("Link:part concept %(concept)s is not allowed"),
                        modelObject=modelConcept, concept=modelConcept.qname)
                    if not modelConcept.genLabel(fallbackToQname=False,lang="nl"):
                        val.modelXbrl.error("SBR.NL.2.2.5.02",
                            _("Link part definition %(concept)s must have a generic label in language 'nl'"),
                            modelObject=modelConcept, concept=modelConcept.qname)

        # 6.7.9 role types authority
        for e in modelDocuement.xmlRootElement.iterdescendants(tag="{http://www.xbrl.org/2003/linkbase}roleType"):
            if isinstance(e,ModelObject):
                roleURI = e.get("roleURI")
                # 6.7.10 only one role type declaration in DTS
                modelRoleTypes = val.modelXbrl.roleTypes.get(roleURI)
                if modelRoleTypes is not None:
                    modelRoleType = modelRoleTypes[0]
                    definition = modelRoleType.definitionNotStripped
                    usedOns = modelRoleType.usedOns
                    if usedOns & XbrlConst.standardExtLinkQnames or XbrlConst.qnGenLink in usedOns:
                        definesLinkroles = True
                        if not e.genLabel():
                            val.modelXbrl.error("SBR.NL.2.2.3.03",
                                _("Link RoleType %(roleType)s missing a generic standard label"),
                                modelObject=e, roleType=roleURI)
                        nlLabel = e.genLabel(lang="nl")
                        if definition != nlLabel:
                            val.modelXbrl.error("SBR.NL.2.2.3.04",
                                _("Link RoleType %(roleType)s definition does not match NL standard generic label, \ndefinition: %(definition)s \nNL label: %(label)s"),
                                modelObject=e, roleType=roleURI, definition=definition, label=nlLabel)
                    if definition and (definition[0].isspace() or definition[-1].isspace()):
                        val.modelXbrl.error("SBR.NL.2.2.3.07",
                            _('Link RoleType %(roleType)s definition has leading or trailing spaces: "%(definition)s"'),
                            modelObject=e, roleType=roleURI, definition=definition)

        # 6.7.13 arcrole types authority
        for e in modelDocuement.xmlRootElement.iterdescendants(tag="{http://www.xbrl.org/2003/linkbase}arcroleType"):
            if isinstance(e,ModelObject):
                arcroleURI = e.get("arcroleURI")
                definesArcroles = True
                val.modelXbrl.error("SBR.NL.2.2.4.01",
                    _("Arcrole type definition is not allowed: %(arcroleURI)s"),
                    modelObject=e, arcroleURI=arcroleURI)
                    
        for appinfoElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}appinfo"):
            for nonLinkElt in appinfoElt.iterdescendants():
                if isinstance(nonLinkElt, ModelObject) and nonLinkElt.namespaceURI != XbrlConst.link:
                    val.modelXbrl.error("SBR.NL.2.2.11.05",
                        _("Appinfo contains disallowed non-link element %(element)s"),
                        modelObject=nonLinkElt, element=nonLinkElt.qname)

        for cplxTypeElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}complexType"):
            choiceElt = cplxTypeElt.find("{http://www.w3.org/2001/XMLSchema}choice")
            if choiceElt is not None:
                val.modelXbrl.error("SBR.NL.2.2.11.09",
                    _("ComplexType contains disallowed xs:choice element"),
                    modelObject=choiceElt)
                
        for cplxContentElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}complexContent"):
            if XmlUtil.descendantAttr(cplxContentElt, "http://www.w3.org/2001/XMLSchema", ("extension","restriction"), "base") != "sbr:placeholder":
                val.modelXbrl.error("SBR.NL.2.2.11.10",
                    _("ComplexContent is disallowed"),
                    modelObject=cplxContentElt)

        for typeEltTag in ("{http://www.w3.org/2001/XMLSchema}complexType",
                            "{http://www.w3.org/2001/XMLSchema}simpleType"):
            for typeElt in modelDocuement.xmlRootElement.iter(tag=typeEltTag):
                definesTypes = True
                name = typeElt.get("name")
                if name:
                    if not name[0].islower() or not name.isalnum():
                        val.modelXbrl.error("SBR.NL.3.2.8.09",
                            _("Type name attribute must be lower camelcase: %(name)s."),
                            modelObject=typeElt, name=name)
        
        for enumElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}enumeration"):
            definesEnumerations = True
            if any(not valueElt.genLabel(lang="nl")
                   for valueElt in enumElt.iter(tag="{http://www.w3.org/2001/XMLSchema}value")):
                val.modelXbrl.error("SBR.NL.2.2.7.05",
                    _("Enumeration element has value(s) without generic label."),
                    modelObject=enumElt)

        if (definesLinkroles + definesArcroles + definesLinkParts +
            definesAbstractItems + definesNonabstractItems + 
            definesTuples + definesPresentationTuples + definesSpecificationTuples + definesTypes +
            definesEnumerations + definesDimensions + definesDomains + 
            definesHypercubes) != 1:
            schemaContents = []
            if definesLinkroles: schemaContents.append(_("linkroles"))
            if definesArcroles: schemaContents.append(_("arcroles"))
            if definesLinkParts: schemaContents.append(_("link parts"))
            if definesAbstractItems: schemaContents.append(_("abstract items"))
            if definesNonabstractItems: schemaContents.append(_("nonabstract items"))
            if definesTuples: schemaContents.append(_("tuples"))
            if definesPresentationTuples: schemaContents.append(_("sbrPresentationTuples"))
            if definesSpecificationTuples: schemaContents.append(_("sbrSpecificationTuples"))
            if definesTypes: schemaContents.append(_("types"))
            if definesEnumerations: schemaContents.append(_("enumerations"))
            if definesDimensions: schemaContents.append(_("dimensions"))
            if definesDomains: schemaContents.append(_("domains"))
            if definesHypercubes: schemaContents.append(_("hypercubes"))
            if schemaContents:
                if not ((definesTuples or definesPresentationTuples or definesSpecificationTuples) and
                        not (definesLinkroles or definesArcroles or definesLinkParts or definesAbstractItems or
                             definesTypes or definesDimensions or definesDomains or definesHypercubes)):
                    val.modelXbrl.error("SBR.NL.2.2.1.01",
                        _("Taxonomy schema may only define one of these: %(contents)s"),
                        modelObject=modelDocuement, contents=', '.join(schemaContents))
            elif not any(refDoc.inDTS and refDoc.targetNamespace not in val.disclosureSystem.baseTaxonomyNamespaces
                         for refDoc in modelDocuement.referencesDocuement.keys()): # no linkbase ref or includes
                val.modelXbrl.error("SBR.NL.2.2.1.01",
                    _("Taxonomy schema must be a DTS entrypoint OR define linkroles OR arcroles OR link:parts OR context fragments OR abstract items OR tuples OR non-abstract elements OR types OR enumerations OR dimensions OR domains OR hypercubes"),
                    modelObject=modelDocuement)
        if definesConcepts ^ any(  # xor so either concepts and no label LB or no concepts and has label LB
                   (refDoc.type == ModelDocuement.Type.LINKBASE and
                    XmlUtil.descendant(refDoc.xmlRootElement, XbrlConst.link, "labelLink") is not None)
                   for refDoc in modelDocuement.referencesDocuement.keys()): # no label linkbase
            val.modelXbrl.error("SBR.NL.2.2.1.02",
                _("A schema that defines concepts MUST have a linked 2.1 label linkbase"),
                modelObject=modelDocuement)
        if (definesNonabstractItems or definesTuples) and not any(  # was xor but changed to and not per RH 1/11/12
                   (refDoc.type == ModelDocuement.Type.LINKBASE and
                   (XmlUtil.descendant(refDoc.xmlRootElement, XbrlConst.link, "referenceLink") is not None or
                    XmlUtil.descendant(refDoc.xmlRootElement, XbrlConst.link, "label", "{http://www.w3.org/1999/xlink}role", "http://www.xbrl.org/2003/role/docuementation" ) is not None))
                    for refDoc in modelDocuement.referencesDocuement.keys()):
            val.modelXbrl.error("SBR.NL.2.2.1.03",
                _("A schema that defines non-abstract items MUST have a linked (2.1) reference linkbase AND/OR a label linkbase with @xlink:role=docuementation"),
                modelObject=modelDocuement)

    elif modelDocuement.type == ModelDocuement.Type.LINKBASE:
        pass
    visited.remove(modelDocuement)

Example 8

Project: autospec Source File: tarball.py
def download_tarball(url_argument, name_argument, archives, target_dir):
    global name
    global rawname
    global version
    global url
    global path
    global tarball_prefix
    global gcov_file
    # go naming
    global golibpath
    global go_pkgname

    url = url_argument
    tarfile = os.path.basename(url)
    pattern_options = [
        r"(.*?)[\-_](v*[0-9]+[alpha\+_spbfourcesigedsvstableP0-9\.\-\~]*)\.src\.(tgz|tar|zip)",
        r"(.*?)[\-_](v*[0-9]+[alpha\+_sbpfourcesigedsvstableP0-9\.\-\~]*)\.(tgz|tar|zip)",
        r"(.*?)[\-_](v*[0-9]+[a-zalpha\+_spbfourcesigedsvstableP0-9\.\-\~]*)\.orig\.tar",
        r"(.*?)[\-_](v*[0-9]+[\+_spbfourcesigedsvstableP0-9\.\~]*)(-.*?)?\.tar",
    ]
    for pattern in pattern_options:
        p = re.compile(pattern)
        m = p.search(tarfile)
        if m:
            name = m.group(1).strip()
            version = m.group(2).strip()
            b = version.find("-")
            if b >= 0:
                version = version[:b]
            break

    rawname = name
    # R package
    if url_argument.find("cran.r-project.org") > 0 or url_argument.find("cran.rstudio.com") > 0:
        buildpattern.set_build_pattern("R", 10)
        files.want_dev_split = 0
        buildreq.add_buildreq("clr-R-helpers")
        p = re.compile(r"([A-Za-z0-9]+)_(v*[0-9]+[\+_spbfourcesigedsvstableP0-9\.\~\-]*)\.tar\.gz")
        m = p.search(tarfile)
        if m:
            name = "R-" + m.group(1).strip()
            rawname = m.group(1).strip()
            version = m.group(2).strip()
            b = version.find("-")
            if b >= 0:
                version = version[:b]

    if url_argument.find("pypi.python.org") > 0:
        buildpattern.set_build_pattern("distutils", 10)
        url_argument = "http://pypi.debian.net/" + name + "/" + tarfile
    if url_argument.find("pypi.debian.net") > 0:
        buildpattern.set_build_pattern("distutils", 10)

    if url_argument.find(".cpan.org/CPAN/") > 0:
        buildpattern.set_build_pattern("cpan", 10)
        if name:
            name = "perl-" + name
    if url_argument.find(".metacpan.org/") > 0:
        buildpattern.set_build_pattern("cpan", 10)
        if name:
            name = "perl-" + name

    if "github.com" in url_argument:
        # golibpath = golang_libpath(url_argument)
        # go_pkgname = golang_name(url_argument)
        # define regex accepted for valid packages
        github_patterns = [r"https://github.com/.*/(.*?)/archive/(.*)-final.tar",
                           r"https://github.com/.*/.*/archive/[0-9a-fA-F]{1,40}\/(.*)\-(.*).tar",
                           r"https://github.com/.*/(.*?)/archive/(.*).zip",
                           r"https://github.com/.*/(.*?)/archive/v?(.*).tar"]

        for pattern in github_patterns:
            p = re.compile(pattern)
            m = p.search(url_argument)
            if m:
                name = m.group(1).strip()
                version = m.group(2).strip()
                b = version.find("-")
                if b > 0:
                    version = version[:b]
                break

    if url_argument.find("bitbucket.org") > 0:
        p = re.compile(r"https://bitbucket.org/.*/(.*?)/get/[a-zA-Z_-]*([0-9][0-9_.]*).tar")
        m = p.search(url_argument)
        if m:
            name = m.group(1).strip()
            version = m.group(2).strip().replace('_', '.')
        else:
            version = "1"

    # ruby
    if url_argument.find("rubygems.org/") > 0:
        buildpattern.set_build_pattern("ruby", 10)
        p = re.compile(r"(.*?)[\-_](v*[0-9]+[alpha\+_spbfourcesigedsvstableP0-9\.\-\~]*)\.gem")
        m = p.search(tarfile)
        if m:
            buildreq.add_buildreq("ruby")
            buildreq.add_buildreq("rubygem-rdoc")
            name = "rubygem-" + m.group(1).strip()
            rawname = m.group(1).strip()
            version = m.group(2).strip()
            b = version.find("-")
            if b >= 0:
                version = version[:b]

    # override from commandline
    if name_argument and name_argument[0] != name:
        pattern = name_argument[0] + r"[\-]*(.*)\.(tgz|tar|zip)"
        p = re.compile(pattern)
        m = p.search(tarfile)
        if m:
            name = name_argument[0]
            rawname = name
            version = m.group(1).strip()
            b = version.find("-")
            if b >= 0 and version.find("-beta") < 0:
                version = version[:b]
            if version.startswith('.'):
                version = version[1:]
        else:
            name = name_argument[0]

    if not name:
        split = url_argument.split('/')
        if len(split) > 3 and split[-2] in ('archive', 'tarball'):
            name = split[-3]
            version = split[-1]
            if version.startswith('v'):
                version = version[1:]
            # remove extension
            version = '.'.join(version.split('.')[:-1])
            if version.endswith('.tar'):
                version = '.'.join(version.split('.')[:-1])

    b = version.find("-")
    if b >= 0 and version.find("-beta") < 0:
        b = b + 1
        version = version[b:]

    if len(version) > 0 and version[0] in ['v', 'r']:
        version = version[1:]

    assert name != ""

    if not target_dir:
        build.download_path = os.getcwd() + "/" + name
    else:
        build.download_path = target_dir
    call("mkdir -p %s" % build.download_path)

    gcov_path = build.download_path + "/" + name + ".gcov"
    if os.path.isfile(gcov_path):
        gcov_file = name + ".gcov"

    tarball_path = check_or_get_file(url, tarfile)
    sha1 = get_sha1sum(tarball_path)
    with open(build.download_path + "/upstream", "w") as file:
        file.write(sha1 + "/" + tarfile + "\n")

    tarball_prefix = name + "-" + version
    if tarfile.lower().endswith('.zip'):
        tarball_contents = subprocess.check_output(
            ["unzip", "-l", tarball_path], universal_newlines=True)
        if tarball_contents and len(tarball_contents.splitlines()) > 3:
            tarball_prefix = tarball_contents.splitlines()[3].rsplit("/")[0].split()[-1]
        extract_cmd = "unzip -d {0} {1}".format(build.base_path, tarball_path)

    elif tarfile.lower().endswith('.gem'):
        tarball_contents = subprocess.check_output(
            ["gem", "unpack", "--verbose", tarball_path], universal_newlines=True)
        extract_cmd = "gem unpack --target={0} {1}".format(build.base_path, tarball_path)
        if tarball_contents:
            tarball_prefix = tarball_contents.splitlines()[-1].rsplit("/")[-1]
            if tarball_prefix.endswith("'"):
                tarball_prefix = tarball_prefix[:-1]
    else:
        extract_cmd, tarball_prefix = build_untar(tarball_path)

    if version == "":
        version = "1"

    print("\n")

    print("Processing", url_argument)
    print(
        "=============================================================================================")
    print("Name        :", name)
    print("Version     :", version)
    print("Prefix      :", tarball_prefix)

    with open(build.download_path + "/Makefile", "w") as file:
        file.write("PKG_NAME := " + name + "\n")
        file.write("URL := " + url_argument + "\n")
        file.write("ARCHIVES :=")
        for archive in archives:
            file.write(" {}".format(archive))
        file.write("\n")
        file.write("\n")
        file.write("include ../common/Makefile.common\n")

    shutil.rmtree("{}".format(build.base_path), ignore_errors=True)
    os.makedirs("{}".format(build.output_path))
    call("mkdir -p %s" % build.download_path)
    call(extract_cmd)

    path = build.base_path + tarball_prefix

    for archive, destination in zip(archives[::2], archives[1::2]):
        source_tarball_path = check_or_get_file(archive, os.path.basename(archive))
        if source_tarball_path.lower().endswith('.zip'):
            tarball_contents = subprocess.check_output(
                ["unzip", "-l", source_tarball_path], universal_newlines=True)
            if tarball_contents and len(tarball_contents.splitlines()) > 3:
                source_tarball_prefix = tarball_contents.splitlines()[3].rsplit("/")[0].split()[-1]
            extract_cmd = "unzip -d {0} {1}".format(build.base_path, source_tarball_path)
        else:
            extract_cmd, source_tarball_prefix = build_untar(source_tarball_path)
        buildpattern.archive_details[archive + "prefix"] = source_tarball_prefix
        call(extract_cmd)
        tar_files = glob.glob("{0}{1}/*".format(build.base_path, source_tarball_prefix))
        move_cmd = "mv "
        for tar_file in tar_files:
            move_cmd += tar_file + " "
        move_cmd += '{0}/{1}'.format(path, destination)

        mkdir_cmd = "mkdir -p "
        mkdir_cmd += '{0}/{1}'.format(path, destination)

        print("mkdir " + mkdir_cmd)
        call(mkdir_cmd)
        call(move_cmd)

        sha1 = get_sha1sum(source_tarball_path)
        with open(build.download_path + "/upstream", "a") as file:
            file.write(sha1 + "/" + os.path.basename(archive) + "\n")

Example 9

Project: trelby Source File: myimport.py
def importFountain(fileName, frame):
    # regular expressions for fountain markdown.
    # https://github.com/vilcans/screenplain/blob/master/screenplain/richstring.py
    ire = re.compile(
            # one star
            r'\*'
            # anything but a space, then text
            r'([^\s].*?)'
            # finishing with one star
            r'\*'
            # must not be followed by star
            r'(?!\*)'
        )
    bre = re.compile(
            # two stars
            r'\*\*'
            # must not be followed by space
            r'(?=\S)'
            # inside text
            r'(.+?[*_]*)'
            # finishing with two stars
            r'(?<=\S)\*\*'
        )
    ure = re.compile(
            # underline
            r'_'
            # must not be followed by space
            r'(?=\S)'
            # inside text
            r'([^_]+)'
            # finishing with underline
            r'(?<=\S)_'
        )
    boneyard_re = re.compile('/\\*.*?\\*/', flags=re.DOTALL)

    # random magicstring used to escape literal star '\*'
    literalstar = "Aq7RR"

    # returns s with markdown formatting removed.
    def unmarkdown(s):
        s = s.replace("\\*", literalstar)
        for style in (bre, ire, ure):
            s = style.sub(r'\1', s)
        return s.replace(literalstar, "*")

    data = util.loadFile(fileName, frame, 1000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)
        return None

    inf = []
    inf.append(misc.CheckBoxItem("Import titles as action lines."))
    inf.append(misc.CheckBoxItem("Remove unsupported formatting markup."))
    inf.append(misc.CheckBoxItem("Import section/synopsis as notes."))

    dlg = misc.CheckBoxDlg(frame, "Fountain import options", inf,
        "Import options:", False)

    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None

    importTitles = inf[0].selected
    removeMarkdown = inf[1].selected
    importSectSyn = inf[2].selected

    # pre-process data - fix newlines, remove boneyard.
    data = util.fixNL(data)
    data = boneyard_re.sub('', data)
    prelines = data.split("\n")
    for i in xrange(len(prelines)):
        try:
            util.toLatin1(prelines[i])
        except:
            prelines[i] = util.cleanInput(u"" + prelines[i].decode('UTF-8', "ignore"))
    lines = []

    tabWidth = 4
    lns = []
    sceneStartsList = ("INT", "EXT", "EST", "INT./EXT", "INT/EXT", "I/E", "I./E")
    TWOSPACE = "  "
    skipone = False

    # First check if title lines are present:
    c = 0
    while c < len(prelines):
        if prelines[c] != "":
            c = c+1
        else:
            break

    # prelines[0:i] are the first bunch of lines, that could be titles.
    # Our check for title is simple:
    #   - the line does not start with 'fade'
    #   - the first line has a single ':'

    if c > 0:
        l = util.toInputStr(prelines[0].expandtabs(tabWidth).lstrip().lower())
        if not l.startswith("fade") and l.count(":") == 1:
            # these are title lines. Now do what the user requested.
            if importTitles:
                # add TWOSPACE to all the title lines.
                for i in xrange(c):
                    prelines[i] += TWOSPACE
            else:
                #remove these lines
                prelines = prelines[c+1:]

    for l in prelines:
        if l != TWOSPACE:
            lines.append(util.toInputStr(l.expandtabs(tabWidth)))
        else:
            lines.append(TWOSPACE)

    linesLen = len(lines)

    def isPrevEmpty():
        if lns and lns[-1].text == "":
            return True
        return False

    def isPrevType(ltype):
        return (lns and lns[-1].lt == ltype)

    # looks ahead to check if next line is not empty
    def isNextEmpty(i):
        return  (i+1 < len(lines) and lines[i+1] == "")

    def getPrevType():
        if lns:
            return lns[-1].lt
        else:
            return screenplay.ACTION

    def isParen(s):
        return (s.startswith('(') and s.endswith(')'))

    def isScene(s):
        if s.endswith(TWOSPACE):
            return False
        if s.startswith(".") and not s.startswith(".."):
            return True
        tmp = s.upper()
        if (re.match(r'^(INT|EXT|EST)[ .]', tmp) or
            re.match(r'^(INT\.?/EXT\.?)[ .]', tmp) or
            re.match(r'^I/E[ .]', tmp)):
            return True
        return False

    def isTransition(s):
        return ((s.isupper() and s.endswith("TO:")) or
                (s.startswith(">") and not s.endswith("<")))

    def isCentered(s):
        return s.startswith(">") and s.endswith("<")

    def isPageBreak(s):
        return s.startswith('===') and s.lstrip('=') == ''

    def isNote(s):
        return s.startswith("[[") and s.endswith("]]")

    def isSection(s):
        return s.startswith("#")

    def isSynopsis(s):
        return s.startswith("=") and not s.startswith("==")

    # first pass - identify linetypes
    for i in range(linesLen):
        if skipone:
            skipone = False
            continue

        s = lines[i]
        sl = s.lstrip()
        # mark as ACTION by default.
        line = screenplay.Line(screenplay.LB_FORCED, screenplay.ACTION, s)

        # Start testing lines for element type. Go in order:
        # Scene Character, Paren, Dialog, Transition, Note.

        if s == "" or isCentered(s) or isPageBreak(s):
            # do nothing - import as action.
            pass

        elif s == TWOSPACE:
            line.lt = getPrevType()

        elif isScene(s):
            line.lt = screenplay.SCENE
            if sl.startswith('.'):
                line.text = sl[1:]
            else:
                line.text = sl

        elif isTransition(sl) and isPrevEmpty() and isNextEmpty(i):
            line.lt = screenplay.TRANSITION
            if line.text.startswith('>'):
                line.text = sl[1:].lstrip()

        elif s.isupper() and isPrevEmpty() and not isNextEmpty(i):
            line.lt = screenplay.CHARACTER
            if s.endswith(TWOSPACE):
                line.lt = screenplay.ACTION

        elif isParen(sl) and (isPrevType(screenplay.CHARACTER) or
                                isPrevType(screenplay.DIALOGUE)):
            line.lt = screenplay.PAREN

        elif (isPrevType(screenplay.CHARACTER) or
             isPrevType(screenplay.DIALOGUE) or
             isPrevType(screenplay.PAREN)):
            line.lt = screenplay.DIALOGUE

        elif isNote(sl):
            line.lt = screenplay.NOTE
            line.text = sl.strip('[]')

        elif isSection(s) or isSynopsis(s):
            if not importSectSyn:
                if isNextEmpty(i):
                    skipone = True
                continue

            line.lt = screenplay.NOTE
            line.text = sl.lstrip('=#')

        if line.text == TWOSPACE:
            pass

        elif line.lt != screenplay.ACTION:
            line.text = line.text.lstrip()

        else:
            tmp = line.text.rstrip()
            # we don't support center align, so simply add required indent.
            if isCentered(tmp):
                tmp = tmp[1:-1].strip()
                width = frame.panel.ctrl.sp.cfg.getType(screenplay.ACTION).width
                if len(tmp) < width:
                    tmp = ' ' * ((width - len(tmp)) // 2) + tmp
            line.text = tmp

        if removeMarkdown:
            line.text = unmarkdown(line.text)
            if line.lt == screenplay.CHARACTER and line.text.endswith('^'):
                line.text = line.text[:-1]

        lns.append(line)

    ret = []

    # second pass helper functions.
    def isLastLBForced():
        return ret and ret[-1].lb == screenplay.LB_FORCED

    def makeLastLBLast():
        if ret:
            ret[-1].lb = screenplay.LB_LAST

    def isRetPrevType(t):
        return ret and ret[-1].lt == t

    # second pass - remove unneeded empty lines, and fix the linebreaks.
    for ln in lns:
        if ln.text == '':
            if isLastLBForced():
                makeLastLBLast()
            else:
                ret.append(ln)

        elif not isRetPrevType(ln.lt):
            makeLastLBLast()
            ret.append(ln)

        else:
            ret.append(ln)

    makeLastLBLast()
    return ret

Example 10

Project: ru Source File: default.py
def ListSeries(params):
	prtitle = ''; infoSet = {}; vhost_marks = []
	global layout_marks
	layout_marks = []
	common_list = ['ФИЛЬМ', 'СМОТРЕТЬ', 'ТЕЛЕШОУ', 'МУЛЬТСЕРИАЛ', '\n', 'СЕРИАЛ', 'Полный Фильм', 'Фильм']
	common_titles_list = ['фильм', 'Фильм', 'документальный фильм', 'мультфильм', 'Телешоу', 'Концерт']

	http = GET(params['url'])
	http = clean_html(http, 'ext')

	#diagnose(http.decode('utf-8').encode('ascii','replace'))
	try: soup = bs(http, 'html5lib', from_encoding = "utf-8")
	except Exception, e:
		print "BS load error: " + str(e)
		ShowMessage(addon_name, "BS error")
		return True
	#print soup.prettify('utf-8')
	content = soup.find('div', class_ = 'full-item')
	#content = soup.find('div', id = 'allEntries')
	#print content.prettify('utf-8')
	if not content:
		print "Content container is not found, used uncut html"
		content = soup
	try: 
		videos = content.find_all(video_conditions)
	except Exception, e:
		print "BS exception: " + str(e)
		ShowMessage(addon_name, "Exception in BS module")
		return True

	if len(videos) == 0:
		removedmes = content.find(removed_message_conditions, attrs = {"style": "color:red"})
		if removedmes:
			ShowMessage("Cinema-hd.ru", removedmes.string.encode('utf-8'), times = 55000)
			return True
		else:
			print "Failed to parse"
			ShowMessage(addon_name, "неизвестный тип верстки")
			return True
	#print videos

	#plot = content.find('span', itemprop = "description")
	try: plot = content.find('div', class_ = "item-info inline")
	except Exception, e: print str(e)
	if plot:
		try:
			imdata = plot.find_parent('div', class_ = 'full-item-content')
			plot = ' '.join(plot.stripped_strings).encode('utf-8')
			infoSet['plot'] = plot
			#imdata = imdata.find('a', target = "_blank", class_ = "ulightbox")
			imdata = imdata.find('img', itemprop = "image")
			img = imdata['src']
			#print img
		except Exception, e:
			print str(e)
			img = params['image']
	else:
		img = params['image']

	#Metadata
	try:
		metadata = content.find('ul', class_ = 'film-tech-info')
		director = metadata.find('strong', itemprop = "director").next_sibling.strip().encode('utf-8')
		genre = content.find('span', itemprop = "genre").string.strip().encode('utf-8')
		actors = content.find('strong', itemprop = "actor").next_sibling.strip().encode('utf-8').split(', ')
		year = content.find('strong', itemprop = "dateCreated").next_sibling.encode('utf-8')
		infoSet.update({
			'genre': genre,
			'year': int(year),
			'director': director,
			'cast': actors
			})
	except Exception, e: print str(e)

	#Fanart
	fanartcontlist = content.find_all('a', attrs = {"class": "ulightbox", "data-fancybox-group": "screenshots"})
	if fanartcontlist: fanartlist = [i['href'] for i in fanartcontlist]
	else: fanartlist = None
	#print fanartlist

	for iframe in videos:
		#Layout 1
		title = iframe.find_previous_sibling('span', style = re.compile("color\:.?(#ff9900|orange|yellow)|font-size\:.?(14|13)pt"))
		if title:
			#print "Layout 1"
			layout_marks.append('1')
		#Layout 2
		if not title:
			title = iframe.find_parent('span', style = re.compile("color\:.?(#ff9900|orange|yellow)|font-size\:.?(14|13)pt"))
			if title:
				#print "Layout 2"
				layout_marks.append('2')
		#Layout 3
		if not title:
			title = iframe.find_previous('font', color = "ff9900")
			if title: 
				titlecont = list(title.stripped_strings)
				if len(titlecont) == 0:
					title = title.find_previous('font', color = "ff9900")
					if title:
						#print "Layout 3b"
						layout_marks.append('3b')
				elif not title.font:
					#print "Layout 3"
					layout_marks.append('3')
			#Layout 3a
			if title and title.font:
				titlecontalt = list(title.stripped_strings)
				title.font.decompose()
				titlecont = list(title.stripped_strings)
				if len(titlecont) == 0:
					if len(titlecontalt) > 0:
						title = titlecontalt[0].encode('utf-8')
						#print "Layout 3a1"
						layout_marks.append('3a1')
					else: title = None
				else:
					#print "Layout 3a"
					layout_marks.append('3a')
		#Layout 4
		if not title:
			title = iframe.find_previous('span', style = re.compile("color\:.?(#ff9900|orange|yellow)|font-size\:.?(14|13)pt"))
			#print title
			#print str(type(title.contents[0]))
			if title and str(type(title.contents[0])) == "<class 'bs4.element.Tag'>":
				#if title.contents[0].has_attr('style') and title.contents[0]['style']=='font-size:13pt':
				title = None
			else:
				if title:
					#print "Layout 4"
					layout_marks.append('4')

		#print type(title)
		if str(type(title)) == "<class 'bs4.element.Tag'>":
			titlecont = list(title.stripped_strings)
			title = titlecont[0].encode('utf-8')

		#Layout 5
		if not title or title in common_titles_list:
			title = content.find('meta', itemprop = "name")
			if title:
				title = title['content'].encode('utf-8')
				#print "Layout 5"
				layout_marks.append('5')

		#Layout N
		if not title:
			title = params['title']
			#print "Layout N"
			layout_marks.append('N')

		for common in common_list:
			if title and common in title:
				title = title.replace(common, '',  1).strip()

		#don't add trailer with the same name
		#if len(videos) == 2 and title == prtitle: break
		prtitle = title

		#print title, url
		#if title == 'трейлер' or title == 'Трейлер': continue

		url = iframe['src']
		#print url
		vhost = re.findall(r'(?:www\.)?(?:[\w\-]+\.)*([\w\-]+)\.\w+/', url)
		if vhost:
			vhost = vhost[0]
			vhost_marks.append(vhost)
		else:
			layout_marks.pop()
			continue

		'''if 'moonwalk.cc/serial' in url:
			ListMWSeasons(url, params['url'])
			continue'''
		
		li = xbmcgui.ListItem(title, iconImage = addon_icon, thumbnailImage = img)
		li.setInfo(type = "video", infoLabels = infoSet)
		if fanartlist:
			import random
			fanart = random.choice(fanartlist)
			if xbmcver >= 13: li.setArt({'fanart': fanart})
			else: li.setProperty('fanart_image', fanart)
		IF = False; IP = True
		uri = {'url': url};
		if re.search('moonwalk\.cc\/serial|serpens\.nl\/serial', url):
			uri['func'] = 'ListMWSeasons'
			uri['ref'] = params['url']
			uri['tvshowtitle'] = title
			uri['img'] = img
			IF = True; IP = False
		else:
			uri['func'] = 'Play'
			uri['title'] = title
		if 'moonwalk.cc/video' in url and use_ahds:
			#IP = False
			IP = True
		uri = construct_request(uri)
		if IP: li.setProperty('IsPlayable', 'true')
		xbmcplugin.addDirectoryItem(hos, uri, li, IF)
	
	if debug_mode:
		ShowMessage(addon_name, "[COLOR bisque]" + "-".join(layout_marks) + "[/COLOR] " + ", ".join(vhost_marks), times = 8000)
	
	xbmcplugin.setContent(hos, 'movies')
	#skin = xbmc.getSkinDir()
	#if skin == 'skin.aeonmq5':
	#	print xbmc.getInfoLabel('Container.Viewmode')
	#	xbmc.executebuiltin('Container.SetViewMode(55)')
	xbmcplugin.endOfDirectory(hos)

Example 11

Project: disco-dop Source File: runexp.py
def getgrammars(trees, sents, stages, testmaxwords, resultdir,
		numproc, lexmodel, simplelexsmooth, top):
	"""Read off the requested grammars."""
	tbfanout, n = treetransforms.treebankfanout(trees)
	logging.info('binarized treebank fan-out: %d #%d', tbfanout, n)
	mappings = [None for _ in stages]
	for n, stage in enumerate(stages):
		traintrees = trees
		stage.mapping = None
		prevn = 0
		if n and stage.prune:
			prevn = [a.name for a in stages].index(stage.prune)
		if stage.split:
			traintrees = [treetransforms.binarize(
					treetransforms.splitdiscnodes(
						tree.copy(True),
						stage.markorigin),
					childchar=':', dot=True, ids=grammar.UniqueIDs())
					for tree in traintrees]
			logging.info('splitted discontinuous nodes')
		if stage.collapse:
			traintrees, mappings[n] = treebanktransforms.collapselabels(
					[tree.copy(True) for tree in traintrees],
					tbmapping=treebanktransforms.MAPPINGS[
						stage.collapse[0]][stage.collapse[1]])
			logging.info('collapsed phrase labels for multilevel '
					'coarse-to-fine parsing to %s level %d',
					*stage.collapse)
		if n and mappings[prevn] is not None:
			# Given original labels A, convert CTF mapping1 A => C,
			# and mapping2 A => B to a mapping B => C.
			mapping1, mapping2 = mappings[prevn], mappings[n]
			if mappings[n] is None:
				stage.mapping = {a: mapping1[a] for a in mapping1}
			else:
				stage.mapping = {mapping2[a]: mapping1[a] for a in mapping2}
		if stage.mode.startswith('pcfg'):
			if tbfanout != 1 and not stage.split:
				raise ValueError('Cannot extract PCFG from treebank '
						'with discontinuities.')
		backtransform = extrarules = None
		if lexmodel and simplelexsmooth:
			extrarules = lexicon.simplesmoothlexicon(lexmodel)
		if stage.mode == 'mc-rerank':
			from . import _fragments
			gram = parser.DictObj(_fragments.getctrees(zip(trees, sents)))
			tree = gram.trees1.extract(0, gram.vocab)
			gram.start = tree[:tree.index(' ')].lstrip('(')
			with gzip.open('%s/%s.train.pickle.gz' % (resultdir, stage.name),
					'wb') as out:
				out.write(pickle.dumps(gram, protocol=-1))
		elif stage.dop:
			if stage.dop in ('doubledop', 'dop1'):
				if stage.dop == 'doubledop':
					(xgrammar, backtransform,
							altweights, fragments) = grammar.doubledop(
							traintrees, sents, binarized=stage.binarized,
							iterate=stage.iterate, complement=stage.complement,
							numproc=numproc, maxdepth=stage.maxdepth,
							maxfrontier=stage.maxfrontier,
							extrarules=extrarules)
				elif stage.dop == 'dop1':
					(xgrammar, backtransform,
							altweights, fragments) = grammar.dop1(
							traintrees, sents, binarized=stage.binarized,
							maxdepth=stage.maxdepth,
							maxfrontier=stage.maxfrontier,
							extrarules=extrarules)
				# dump fragments
				with codecs.getwriter('utf8')(gzip.open('%s/%s.fragments.gz' %
						(resultdir, stage.name), 'w')) as out:
					out.writelines('%s\t%d\n' % (a, len(b))
							for a, b in fragments)
			elif stage.dop == 'reduction':
				xgrammar, altweights = grammar.dopreduction(
						traintrees, sents, packedgraph=stage.packedgraph,
						extrarules=extrarules)
			else:
				raise ValueError('unrecognized DOP model: %r' % stage.dop)
			nodes = sum(len(list(a.subtrees())) for a in traintrees)
			if lexmodel and not simplelexsmooth:  # FIXME: altweights?
				xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
			msg = grammar.grammarinfo(xgrammar)
			rules, lex = grammar.writegrammar(
					xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
			with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
					resultdir, stage.name), 'wb')) as rulesfile:
				rulesfile.write(rules)
			with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
					resultdir, stage.name), 'wb')) as lexiconfile:
				lexiconfile.write(lex)
			gram = Grammar(rules, lex, start=top,
					binarized=stage.binarized)
			for name in altweights:
				gram.register('%s' % name, altweights[name])
			logging.info('DOP model based on %d sentences, %d nodes, '
				'%d nonterminals', len(traintrees), nodes, len(gram.toid))
			logging.info(msg)
			if stage.estimator != 'rfe':
				gram.switch('%s' % stage.estimator)
			logging.info(gram.testgrammar()[1])
			if stage.dop in ('doubledop', 'dop1'):
				# backtransform keys are line numbers to rules file;
				# to see them together do:
				# $ paste <(zcat dop.rules.gz) <(zcat dop.backtransform.gz)
				with codecs.getwriter('utf8')(gzip.open(
						'%s/%s.backtransform.gz' % (resultdir, stage.name),
						'wb')) as out:
					out.writelines('%s\n' % a for a in backtransform)
				if n and stage.prune:
					msg = gram.getmapping(stages[prevn].grammar,
							striplabelre=None if stages[prevn].dop
								else re.compile('@.+$'),
							neverblockre=re.compile('.+}<'),
							splitprune=stage.splitprune and stages[prevn].split,
							markorigin=stages[prevn].markorigin,
							mapping=stage.mapping)
				else:
					# recoverfragments() relies on this mapping to identify
					# binarization nodes
					msg = gram.getmapping(None,
							striplabelre=None,
							neverblockre=re.compile('.+}<'),
							splitprune=False, markorigin=False,
							mapping=stage.mapping)
				logging.info(msg)
			elif n and stage.prune:  # dop reduction
				msg = gram.getmapping(stages[prevn].grammar,
						striplabelre=None if stages[prevn].dop
							and stages[prevn].dop not in ('doubledop', 'dop1')
							else re.compile('@[-0-9]+$'),
						neverblockre=re.compile(stage.neverblockre)
							if stage.neverblockre else None,
						splitprune=stage.splitprune and stages[prevn].split,
						markorigin=stages[prevn].markorigin,
						mapping=stage.mapping)
				if stage.mode == 'dop-rerank':
					gram.getrulemapping(
							stages[prevn].grammar, re.compile(r'@[-0-9]+\b'))
				logging.info(msg)
			# write prob models
			np.savez_compressed('%s/%s.probs.npz' % (resultdir, stage.name),
					**{name: mod for name, mod
						in zip(gram.modelnames, gram.models)})
		else:  # not stage.dop
			xgrammar = grammar.treebankgrammar(traintrees, sents,
					extrarules=extrarules)
			logging.info('induced %s based on %d sentences',
				('PCFG' if tbfanout == 1 or stage.split else 'PLCFRS'),
				len(traintrees))
			if stage.split or os.path.exists('%s/pcdist.txt' % resultdir):
				logging.info(grammar.grammarinfo(xgrammar))
			else:
				logging.info(grammar.grammarinfo(xgrammar,
						dump='%s/pcdist.txt' % resultdir))
			if lexmodel and not simplelexsmooth:
				xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
			rules, lex = grammar.writegrammar(
					xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
			with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
					resultdir, stage.name), 'wb')) as rulesfile:
				rulesfile.write(rules)
			with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
					resultdir, stage.name), 'wb')) as lexiconfile:
				lexiconfile.write(lex)
			gram = Grammar(rules, lex, start=top)
			logging.info(gram.testgrammar()[1])
			if n and stage.prune:
				msg = gram.getmapping(stages[prevn].grammar,
					striplabelre=None,
					neverblockre=re.compile(stage.neverblockre)
						if stage.neverblockre else None,
					splitprune=stage.splitprune and stages[prevn].split,
					markorigin=stages[prevn].markorigin,
					mapping=stage.mapping)
				logging.info(msg)
		logging.info('wrote grammar to %s/%s.{rules,lex%s}.gz',
				resultdir, stage.name,
				',backtransform' if stage.dop in ('doubledop', 'dop1') else '')

		outside = None
		if stage.estimates in ('SX', 'SXlrgaps'):
			if stage.estimates == 'SX' and tbfanout != 1 and not stage.split:
				raise ValueError('SX estimate requires PCFG.')
			elif stage.mode != 'plcfrs':
				raise ValueError('estimates require parser w/agenda.')
			begin = time.clock()
			logging.info('computing %s estimates', stage.estimates)
			if stage.estimates == 'SX':
				outside = estimates.getpcfgestimates(gram, testmaxwords,
						gram.toid[trees[0].label])
			elif stage.estimates == 'SXlrgaps':
				outside = estimates.getestimates(gram, testmaxwords,
						gram.toid[trees[0].label])
			logging.info('estimates done. cpu time elapsed: %gs',
					time.clock() - begin)
			np.savez_compressed('%s/%s.outside.npz' % (
					resultdir, stage.name), outside=outside)
			logging.info('saved %s estimates', stage.estimates)
		elif stage.estimates:
			raise ValueError('unrecognized value; specify SX or SXlrgaps.')

		stage.update(grammar=gram, backtransform=backtransform,
				outside=outside)

	if any(stage.mapping is not None for stage in stages):
		with codecs.getwriter('utf8')(gzip.open('%s/mapping.json.gz' % (
				resultdir), 'wb')) as mappingfile:
			mappingfile.write(json.dumps([stage.mapping for stage in stages]))

Example 12

Project: FanFicFare Source File: adapter_fanfictionnet.py
    def doExtractChapterUrlsAndMetadata(self,get_cover=True):

        # fetch the chapter.  From that we will get almost all the
        # metadata and chapter list

        url = self.origurl
        logger.debug("URL: "+url)

        # use BeautifulSoup HTML parser to make everything easier to find.
        try:
            data = self._fetchUrl(url)
            #logger.debug("\n===================\n%s\n===================\n"%data)
            soup = self.make_soup(data)
        except urllib2.HTTPError as e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(url)
            else:
                raise e

        if "Unable to locate story" in data:
            raise exceptions.StoryDoesNotExist(url)

        # some times "Chapter not found...", sometimes "Chapter text not found..."
        if "not found. Please check to see you are not using an outdated url." in data:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  'Chapter not found. Please check to see you are not using an outdated url.'" % url)

        if self.getConfig('check_next_chapter'):
            try:
                ## ffnet used to have a tendency to send out update
                ## notices in email before all their servers were
                ## showing the update on the first chapter.  It
                ## generates another server request and doesn't seem
                ## to be needed lately, so now default it to off.
                try:
                    chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
                # get chapter part of url.
                except:
                    chapcount = 1
                chapter = url.split('/',)[5]
                tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
                                                self.story.getMetadata('storyId'),
                                                chapcount+1)
                logger.debug('=Trying newer chapter: %s' % tryurl)
                newdata = self._fetchUrl(tryurl)
                if "not found. Please check to see you are not using an outdated url." not in newdata \
                        and "This request takes too long to process, it is timed out by the server." not in newdata:
                    logger.debug('=======Found newer chapter: %s' % tryurl)
                    soup = self.make_soup(newdata)
            except urllib2.HTTPError as e:
                if e.code == 503:
                    raise e
            except e:
                logger.warn("Caught an exception reading URL: %s sleeptime(%s) Exception %s."%(unicode(url),sleeptime,unicode(e)))
                pass

        # Find authorid and URL from... author url.
        a = soup.find('a', href=re.compile(r"^/u/\d+"))
        self.story.setMetadata('authorId',a['href'].split('/')[2])
        self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
        self.story.setMetadata('author',a.string)

        ## Pull some additional data from html.

        ## ffnet shows category two ways
        ## 1) class(Book, TV, Game,etc) >> category(Harry Potter, Sailor Moon, etc)
        ## 2) cat1_cat2_Crossover
        ## For 1, use the second link.
        ## For 2, fetch the crossover page and pull the two categories from there.

        categories = soup.find('div',{'id':'pre_story_links'}).findAll('a',{'class':'xcontrast_txt'})
        #print("xcontrast_txt a:%s"%categories)
        if len(categories) > 1:
            # Strangely, the ones with *two* links are the
            # non-crossover categories.  Each is in a category itself
            # of Book, Movie, etc.
            self.story.addToList('category',stripHTML(categories[1]))
        elif 'Crossover' in categories[0]['href']:
            caturl = "https://%s%s"%(self.getSiteDomain(),categories[0]['href'])
            catsoup = self.make_soup(self._fetchUrl(caturl))
            found = False
            for a in catsoup.findAll('a',href=re.compile(r"^/crossovers/.+?/\d+/")):
                self.story.addToList('category',stripHTML(a))
                found = True
            if not found:
                # Fall back.  I ran across a story with a Crossver
                # category link to a broken page once.
                # http://www.fanfiction.net/s/2622060/1/
                # Naruto + Harry Potter Crossover
                logger.info("Fall back category collection")
                for c in stripHTML(categories[0]).replace(" Crossover","").split(' + '):
                    self.story.addToList('category',c)

        a = soup.find('a', href=re.compile(r'https?://www\.fictionratings\.com/'))
        rating = a.string
        if 'Fiction' in rating: # if rating has 'Fiction ', strip that out for consistency with past.
            rating = rating[8:]

        self.story.setMetadata('rating',rating)

        # after Rating, the same bit of text containing id:123456 contains
        # Complete--if completed.
        gui_table1i = soup.find('div',{'id':'content_wrapper_inner'})

        self.story.setMetadata('title', stripHTML(gui_table1i.find('b'))) # title appears to be only(or at least first) bold tag in gui_table1i

        summarydiv = gui_table1i.find('div',{'style':'margin-top:2px'})
        if summarydiv:
            self.setDescription(url,stripHTML(summarydiv))


        grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
        # for b in grayspan.findAll('button'):
        #     b.extract()
        metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
        #logger.debug("metatext:(%s)"%metatext)

        if 'Status: Complete' in metatext:
            self.story.setMetadata('status', 'Completed')
        else:
            self.story.setMetadata('status', 'In-Progress')

        metalist = metatext.split(" - ")
        #logger.debug("metalist:(%s)"%metalist)

        # Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
        # Rated: Fiction T - English - Adventure/Sci-Fi - Naruto U. - Chapters: 22 - Words: 114,414 - Reviews: 395 - Favs: 779 - Follows: 835 - Updated: 03-21-13 - Published: 04-28-12 - id: 8067258

        # rating is obtained above more robustly.
        if metalist[0].startswith('Rated:'):
            metalist=metalist[1:]

        # next is assumed to be language.
        self.story.setMetadata('language',metalist[0])
        metalist=metalist[1:]

        # next might be genre.
        genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
        goodgenres=True
        for g in genrelist:
            #logger.debug("g:(%s)"%g)
            if g.strip() not in ffnetgenres:
                #logger.info("g not in ffnetgenres")
                goodgenres=False
        if goodgenres:
            self.story.extendList('genre',genrelist)
            metalist=metalist[1:]

        # Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
        # Published: <span data-xutime='1384358726'>8m ago</span>
        dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
        if len(dates) > 1 :
            # updated get set to the same as published upstream if not found.
            self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
        self.story.setMetadata('datePublished',datetime.fromtimestamp(float(dates[-1]['data-xutime'])))

        # Meta key titles and the metadata they go into, if any.
        metakeys = {
            # These are already handled separately.
            'Chapters':False,
            'Status':False,
            'id':False,
            'Updated':False,
            'Published':False,
            'Reviews':'reviews',
            'Favs':'favs',
            'Follows':'follows',
            'Words':'numWords',
            }

        chars_ships_list=[]
        while len(metalist) > 0:
            m = metalist.pop(0)
            if ':' in m:
                key = m.split(':')[0].strip()
                if key in metakeys:
                    if metakeys[key]:
                        self.story.setMetadata(metakeys[key],m.split(':')[1].strip())
                    continue
            # no ':' or not found in metakeys
            chars_ships_list.append(m)

        # all because sometimes chars can have ' - ' in them.
        chars_ships_text = (' - ').join(chars_ships_list)
        # print("chars_ships_text:%s"%chars_ships_text)
        # with 'pairing' support, pairings are bracketed w/o comma after
        # [Caspian X, Lucy Pevensie] Edmund Pevensie, Peter Pevensie
        self.story.extendList('characters',chars_ships_text.replace('[','').replace(']',',').split(','))

        l = chars_ships_text
        while '[' in l:
            self.story.addToList('ships',l[l.index('[')+1:l.index(']')].replace(', ','/'))
            l = l[l.index(']')+1:]

        if get_cover:
            # Try the larger image first.
            cover_url = ""
            try:
                img = soup.select('img.lazy.cimage')
                cover_url=img[0]['data-original']
            except:
                img = soup.select('img.cimage')
                if img:
                    cover_url=img[0]['src']
            logger.debug("cover_url:%s"%cover_url)

            authimg_url = ""
            if cover_url and self.getConfig('skip_author_cover'):
                authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
                try:
                    img = authsoup.select('img.lazy.cimage')
                    authimg_url=img[0]['data-original']
                except:
                    img = authsoup.select('img.cimage')
                    if img:
                        authimg_url=img[0]['src']

                logger.debug("authimg_url:%s"%authimg_url)

                ## ffnet uses different sizes on auth & story pages, but same id.
                ## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/150/
                ## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/180/
                try:
                    cover_id = cover_url.split('/')[4]
                except:
                    cover_id = None
                try:
                    authimg_id = authimg_url.split('/')[4]
                except:
                    authimg_id = None

                ## don't use cover if it matches the auth image.
                if cover_id and authimg_id and cover_id == authimg_id:
                    cover_url = None

            if cover_url:
                self.setCoverImage(url,cover_url)


        # Find the chapter selector
        select = soup.find('select', { 'name' : 'chapter' } )

        if select is None:
    	   # no selector found, so it's a one-chapter story.
    	   self.chapterUrls.append((self.story.getMetadata('title'),url))
        else:
            allOptions = select.findAll('option')
            for o in allOptions:
                url = u'https://%s/s/%s/%s/' % ( self.getSiteDomain(),
                                                 self.story.getMetadata('storyId'),
                                                 o['value'])
                # just in case there's tags, like <i> in chapter titles.
                title = u"%s" % o
                title = re.sub(r'<[^>]+>','',title)
                self.chapterUrls.append((title,url))

        self.story.setMetadata('numChapters',len(self.chapterUrls))

        return

Example 13

Project: pelisalacarta Source File: _Util.py
Function: is_leap
    def isleap(year):
    return year % 4 == 0 and (year % 100 <> 0 or year % 400 == 0)

    # Return number of leap years in range [y1, y2)
    # Assume y1 <= y2 and no funny (non-leap century) years
    def leapdays(y1, y2):
    return (y2+3)/4 - (y1+3)/4

    EPOCH = 1970
    def timegm(tuple):
        """Unrelated but handy function to calculate Unix timestamp from GMT."""
        year, month, day, hour, minute, second = tuple[:6]
        assert year >= EPOCH
        assert 1 <= month <= 12
        days = 365*(year-EPOCH) + leapdays(EPOCH, year)
        for i in range(1, month):
            days = days + mdays[i]
        if month > 2 and isleap(year):
            days = days + 1
        days = days + day - 1
        hours = days*24 + hour
        minutes = hours*60 + minute
        seconds = minutes*60L + second
        return seconds


# Date/time conversion routines for formats used by the HTTP protocol.

EPOCH = 1970
def my_timegm(tt):
    year, month, mday, hour, min, sec = tt[:6]
    if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
        return timegm(tt)
    else:
        return None

days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
months_lower = []
for month in months: months_lower.append(string.lower(month))


def time2isoz(t=None):
    """Return a string representing time in seconds since epoch, t.

    If the function is called without an argument, it will use the current
    time.

    The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
    representing Universal Time (UTC, aka GMT).  An example of this format is:

    1994-11-24 08:49:37Z

    """
    if t is None: t = time.time()
    year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
    return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
        year, mon, mday, hour, min, sec)

def time2netscape(t=None):
    """Return a string representing time in seconds since epoch, t.

    If the function is called without an argument, it will use the current
    time.

    The format of the returned string is like this:

    Wed, DD-Mon-YYYY HH:MM:SS GMT

    """
    if t is None: t = time.time()
    year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
    return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
        days[wday], mday, months[mon-1], year, hour, min, sec)


UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}

timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
def offset_from_tz_string(tz):
    offset = None
    if UTC_ZONES.has_key(tz):
        offset = 0
    else:
        m = timezone_re.search(tz)
        if m:
            offset = 3600 * int(m.group(2))
            if m.group(3):
                offset = offset + 60 * int(m.group(3))
            if m.group(1) == '-':
                offset = -offset
    return offset

def _str2time(day, mon, yr, hr, min, sec, tz):
    # translate month name to number
    # month numbers start with 1 (January)
    try:
        mon = months_lower.index(string.lower(mon))+1
    except ValueError:
        # maybe it's already a number
        try:
            imon = int(mon)
        except ValueError:
            return None
        if 1 <= imon <= 12:
            mon = imon
        else:
            return None

    # make sure clock elements are defined
    if hr is None: hr = 0
    if min is None: min = 0
    if sec is None: sec = 0

    yr = int(yr)
    day = int(day)
    hr = int(hr)
    min = int(min)
    sec = int(sec)

    if yr < 1000:
    # find "obvious" year
    cur_yr = time.localtime(time.time())[0]
    m = cur_yr % 100
    tmp = yr
    yr = yr + cur_yr - m
    m = m - tmp
        if abs(m) > 50:
            if m > 0: yr = yr + 100
            else: yr = yr - 100

    # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
    t = my_timegm((yr, mon, day, hr, min, sec, tz))

    if t is not None:
        # adjust time using timezone string, to get absolute time since epoch
        if tz is None:
            tz = "UTC"
        tz = string.upper(tz)
        offset = offset_from_tz_string(tz)
        if offset is None:
            return None
        t = t - offset

    return t


strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
wkday_re = re.compile(
    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
loose_http_re = re.compile(
    r"""^
    (\d\d?)            # day
       (?:\s+|[-\/])
    (\w+)              # month
        (?:\s+|[-\/])
    (\d+)              # year
    (?:
      (?:\s+|:)    # separator before clock
       (\d\d?):(\d\d)  # hour:min
       (?::(\d\d))?    # optional seconds
    )?                 # optional clock
       \s*
    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
       \s*
    (?:\(\w+\))?       # ASCII representation of timezone in parens.
       \s*$""", re.X)
def http2time(text):
    """Returns time in seconds since epoch of time represented by a string.

    Return value is an integer.

    None is returned if the format of str is unrecognized, the time is outside
    the representable range, or the timezone string is not recognized.  If the
    string contains no timezone, UTC is assumed.

    The timezone in the string may be numerical (like "-0800" or "+0100") or a
    string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
    timezone strings equivalent to UTC (zero offset) are known to the function.

    The function loosely parses the following formats:

    Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
    Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
    Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
    09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
    08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
    08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)

    The parser ignores leading and trailing whitespace.  The time may be
    absent.

    If the year is given with only 2 digits, the function will select the
    century that makes the year closest to the current date.

    """
    # fast exit for strictly conforming string
    m = strict_re.search(text)
    if m:
        g = m.groups()
        mon = months_lower.index(string.lower(g[1])) + 1
        tt = (int(g[2]), mon, int(g[0]),
              int(g[3]), int(g[4]), float(g[5]))
        return my_timegm(tt)

    # No, we need some messy parsing...

    # clean up
    text = string.lstrip(text)
    text = wkday_re.sub("", text, 1)  # Useless weekday

    # tz is time zone specifier string
    day, mon, yr, hr, min, sec, tz = [None]*7

    # loose regexp parse
    m = loose_http_re.search(text)
    if m is not None:
        day, mon, yr, hr, min, sec, tz = m.groups()
    else:
        return None  # bad format

    return _str2time(day, mon, yr, hr, min, sec, tz)


iso_re = re.compile(
    """^
    (\d{4})              # year
       [-\/]?
    (\d\d?)              # numerical month
       [-\/]?
    (\d\d?)              # day
   (?:
         (?:\s+|[-:Tt])  # separator before clock
      (\d\d?):?(\d\d)    # hour:min
      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
   )?                    # optional clock
      \s*
   ([-+]?\d\d?:?(:?\d\d)?
    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
      \s*$""", re.X)
def iso2time(text):
    """
    As for http2time, but parses the ISO 8601 formats:

    1994-02-03 14:15:29 -0100    -- ISO 8601 format
    1994-02-03 14:15:29          -- zone is optional
    1994-02-03                   -- only date
    1994-02-03T14:15:29          -- Use T as separator
    19940203T141529Z             -- ISO 8601 compact format
    19940203                     -- only date

    """
    # clean up
    text = string.lstrip(text)

    # tz is time zone specifier string
    day, mon, yr, hr, min, sec, tz = [None]*7

    # loose regexp parse
    m = iso_re.search(text)
    if m is not None:
        # XXX there's an extra bit of the timezone I'm ignoring here: is
        #   this the right thing to do?
        yr, mon, day, hr, min, sec, tz, _ = m.groups()
    else:
        return None  # bad format

    return _str2time(day, mon, yr, hr, min, sec, tz)



# XXX Andrew Dalke kindly sent me a similar class in response to my request on
# comp.lang.python, which I then proceeded to lose.  I wrote this class
# instead, but I think he's released his code publicly since, could pinch the
# tests from it, at least...
class seek_wrapper:
    """Adds a seek method to a file object.

    This is only designed for seeking on readonly file-like objects.

    Wrapped file-like object must have a read method.  The readline method is
    only supported if that method is present on the wrapped object.  The
    readlines method is always supported.  xreadlines and iteration are
    supported only for Python 2.2 and above.

    Public attribute: wrapped (the wrapped file object).

    WARNING: All other attributes of the wrapped object (ie. those that are not
    one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
    are passed through unaltered, which may or may not make sense for your
    particular file object.

    """
    # General strategy is to check that cache is full enough, then delegate
    # everything to the cache (self._cache, which is a StringIO.StringIO
    # instance.  Seems to be some cStringIO.StringIO problem on 1.5.2 -- I
    # get a StringOobject, with no readlines method.

    # Invariant: the end of the cache is always at the same place as the
    # end of the wrapped file:
    # self.wrapped.tell() == self.__cache.tell()

    def __init__(self, wrapped):
        self.wrapped = wrapped
        self.__have_readline = hasattr(self.wrapped, "readline")
        self.__cache = StringIO()

    def __getattr__(self, name):
        wrapped = self.__dict__.get("wrapped")
        if wrapped:
            return getattr(wrapped, name)
        return getattr(self.__class__, name)

    def seek(self, offset, whence=0):
        # make sure we have read all data up to the point we are seeking to
        pos = self.__cache.tell()
        if whence == 0:  # absolute
            to_read = offset - pos
        elif whence == 1:  # relative to current position
            to_read = offset
        elif whence == 2:  # relative to end of *wrapped* file
            # since we don't know yet where the end of that file is, we must
            # read everything
            to_read = None
        if to_read is None or to_read >= 0:
            if to_read is None:
                self.__cache.write(self.wrapped.read())
            else:
                self.__cache.write(self.wrapped.read(to_read))
            self.__cache.seek(pos)

        return self.__cache.seek(offset, whence)

    def tell(self):
        return self.__cache.tell()

    def read(self, size=-1):
        pos = self.__cache.tell()

        self.__cache.seek(pos)

        end = len(self.__cache.getvalue())
        available = end - pos

        # enough data already cached?
        if size <= available and size != -1:
            return self.__cache.read(size)

        # no, so read sufficient data from wrapped file and cache it
        to_read = size - available
        assert to_read > 0 or size == -1
        self.__cache.seek(0, 2)
        if size == -1:
            self.__cache.write(self.wrapped.read())
        else:
            self.__cache.write(self.wrapped.read(to_read))
        self.__cache.seek(pos)

        return self.__cache.read(size)

    def readline(self, size=-1):
        if not self.__have_readline:
            raise NotImplementedError("no readline method on wrapped object")

        # line we're about to read might not be complete in the cache, so
        # read another line first
        pos = self.__cache.tell()
        self.__cache.seek(0, 2)
        self.__cache.write(self.wrapped.readline())
        self.__cache.seek(pos)

        data = self.__cache.readline()
        if size != -1:
            r = data[:size]
            self.__cache.seek(pos+size)
        else:
            r = data
        return r

    def readlines(self, sizehint=-1):
        pos = self.__cache.tell()
        self.__cache.seek(0, 2)
        self.__cache.write(self.wrapped.read())
        self.__cache.seek(pos)
        try:
            return self.__cache.readlines(sizehint)
        except TypeError:  # 1.5.2 hack
            return self.__cache.readlines()

    def __iter__(self): return self
    def next(self):
        line = self.readline()
        if line == "": raise StopIteration
        return line

    xreadlines = __iter__

    def __repr__(self):
        return ("<%s at %s whose wrapped object = %s>" %
                (self.__class__.__name__, `id(self)`, `self.wrapped`))

    def close(self):
        self._cache = None
        self.read = None
        self.readline = None
        self.readlines = None
        self.seek = None
        if self.wrapped: self.wrapped.close()
        self.wrapped = None

class eoffile:
    # file-like object that always claims to be at end-of-file
    def read(self, size=-1): return ""
    def readline(self, size=-1): return ""

class response_seek_wrapper(seek_wrapper):
    """Avoids unnecessarily clobbering methods on .close().

    Also supports pickling.

    Class name is for historical reasons.

    """

    def close(self):
        self.wrapped.close()
        self.wrapped = eoffile()

    def __getstate__(self):
        # There are three obvious options here:
        # 1. truncate
        # 2. read to end
        # 3. close socket, pickle state including read position, then open
        #    again on unpickle and use Range header

        # 2 breaks pickle protocol, because one expects the original object
        # to be left unscathed by pickling.  3 is too complicated and
        # surprising (and too much work ;-) to happen in a sane __getstate__.
        # So we do 1.

        state = self.__dict__.copy()
        state["wrapped"] = eoffile()
        return state

Example 14

Project: pymatgen Source File: nwchem.py
    def _parse_job(self, output):
        energy_patt = re.compile("Total \w+ energy\s+=\s+([\.\-\d]+)")
        energy_gas_patt = re.compile("gas phase energy\s+=\s+([\.\-\d]+)")
        energy_sol_patt = re.compile("sol phase energy\s+=\s+([\.\-\d]+)")
        coord_patt = re.compile("\d+\s+(\w+)\s+[\.\-\d]+\s+([\.\-\d]+)\s+"
                                "([\.\-\d]+)\s+([\.\-\d]+)")
        lat_vector_patt = re.compile("a[123]=<\s+([\.\-\d]+)\s+"
                                     "([\.\-\d]+)\s+([\.\-\d]+)\s+>")
        corrections_patt = re.compile("([\w\-]+ correction to \w+)\s+="
                                      "\s+([\.\-\d]+)")
        preamble_patt = re.compile("(No. of atoms|No. of electrons"
                                   "|SCF calculation type|Charge|Spin "
                                   "multiplicity)\s*:\s*(\S+)")
        force_patt = re.compile("\s+(\d+)\s+(\w+)" + 6 * "\s+([0-9\.\-]+)")

        time_patt = re.compile("\s+ Task \s+ times \s+ cpu: \s+   ([\.\d]+)s .+ ", re.VERBOSE)

        error_defs = {
            "calculations not reaching convergence": "Bad convergence",
            "Calculation failed to converge": "Bad convergence",
            "geom_binvr: #indep variables incorrect": "autoz error",
            "dft optimize failed": "Geometry optimization failed"}

        fort2py = lambda x : x.replace("D", "e")
        isfloatstring = lambda s : s.find(".") == -1

        parse_hess = False
        parse_proj_hess = False
        hessian = None
        projected_hessian = None
        parse_force = False
        all_forces = []
        forces = []

        data = {}
        energies = []
        frequencies = None
        normal_frequencies = None
        corrections = {}
        molecules = []
        structures = []
        species = []
        coords = []
        lattice = []
        errors = []
        basis_set = {}
        bset_header = []
        parse_geom = False
        parse_freq = False
        parse_bset = False
        parse_projected_freq = False
        job_type = ""
        parse_time = False
        time = 0
        for l in output.split("\n"):
            for e, v in error_defs.items():
                if l.find(e) != -1:
                    errors.append(v)
            if parse_time:
                m = time_patt.search(l)
                if m:
                    time = m.group(1)
                    parse_time = False
            if parse_geom:
                if l.strip() == "Atomic Mass":
                    if lattice:
                        structures.append(Structure(lattice, species, coords,
                                                     coords_are_cartesian=True))
                    else:
                        molecules.append(Molecule(species, coords))
                    species = []
                    coords = []
                    lattice = []
                    parse_geom = False
                else:
                    m = coord_patt.search(l)
                    if m:
                        species.append(m.group(1).capitalize())
                        coords.append([float(m.group(2)), float(m.group(3)),
                                       float(m.group(4))])
                    m = lat_vector_patt.search(l)
                    if m:
                        lattice.append([float(m.group(1)), float(m.group(2)),
                                        float(m.group(3))])

            if parse_force:
                m = force_patt.search(l)
                if m:
                    forces.extend(map(float, m.groups()[5:]))
                elif len(forces) > 0:
                    all_forces.append(forces)
                    forces = []
                    parse_force = False

            elif parse_freq:
                if len(l.strip()) == 0:
                    if len(normal_frequencies[-1][1]) == 0:
                        continue
                    else:
                        parse_freq = False
                else:
                    vibs = [float(vib) for vib in l.strip().split()[1:]]
                    num_vibs = len(vibs)
                    for mode, dis in zip(normal_frequencies[-num_vibs:], vibs):
                        mode[1].append(dis)

            elif parse_projected_freq:
                if len(l.strip()) == 0:
                    if len(frequencies[-1][1]) == 0:
                        continue
                    else:
                        parse_projected_freq = False
                else:
                    vibs = [float(vib) for vib in l.strip().split()[1:]]
                    num_vibs = len(vibs)
                    for mode, dis in zip(
                            frequencies[-num_vibs:], vibs):
                        mode[1].append(dis)

            elif parse_bset:
                if l.strip() == "":
                    parse_bset = False
                else:
                    toks = l.split()
                    if toks[0] != "Tag" and not re.match("\-+", toks[0]):
                        basis_set[toks[0]] = dict(zip(bset_header[1:],
                                                      toks[1:]))
                    elif toks[0] == "Tag":
                        bset_header = toks
                        bset_header.pop(4)
                        bset_header = [h.lower() for h in bset_header]

            elif parse_hess:
                if l.strip() == "":
                    continue
                if len(hessian) > 0 and l.find("----------") != -1:
                    parse_hess = False
                    continue
                toks = l.strip().split()
                if len(toks) > 1:
                    try:
                        row = int(toks[0])
                    except Exception as e:
                        continue
                    if isfloatstring(toks[1]):
                        continue
                    vals = [float(fort2py(x)) for x in toks[1:]]
                    if len(hessian) < row:
                        hessian.append(vals)
                    else:
                        hessian[row - 1].extend(vals)

            elif parse_proj_hess:
                if l.strip() == "":
                    continue
                nat3 = len(hessian)
                toks = l.strip().split()
                if len(toks) > 1:
                    try:
                        row = int(toks[0])
                    except Exception as e:
                        continue
                    if isfloatstring(toks[1]):
                        continue
                    vals = [float(fort2py(x)) for x in toks[1:]]
                    if len(projected_hessian) < row:
                        projected_hessian.append(vals)
                    else:
                        projected_hessian[row - 1].extend(vals)
                    if len(projected_hessian[-1]) == nat3:
                        parse_proj_hess = False

            else:
                m = energy_patt.search(l)
                if m:
                    energies.append(Energy(m.group(1), "Ha").to("eV"))
                    parse_time = True
                    continue

                m = energy_gas_patt.search(l)
                if m:
                    cosmo_scf_energy = energies[-1]
                    energies[-1] = dict()
                    energies[-1].update({"cosmo scf": cosmo_scf_energy})
                    energies[-1].update({"gas phase":
                                         Energy(m.group(1), "Ha").to("eV")})


                m = energy_sol_patt.search(l)
                if m:
                    energies[-1].update(
                        {"sol phase": Energy(m.group(1), "Ha").to("eV")})

                m = preamble_patt.search(l)
                if m:
                    try:
                        val = int(m.group(2))
                    except ValueError:
                        val = m.group(2)
                    k = m.group(1).replace("No. of ", "n").replace(" ", "_")
                    data[k.lower()] = val
                elif l.find("Geometry \"geometry\"") != -1:
                    parse_geom = True
                elif l.find("Summary of \"ao basis\"") != -1:
                    parse_bset = True
                elif l.find("P.Frequency") != -1:
                    parse_projected_freq = True
                    if frequencies is None:
                        frequencies = []
                    toks = l.strip().split()[1:]
                    frequencies.extend([(float(freq), []) for freq in toks])

                elif l.find("Frequency") != -1:
                    toks = l.strip().split()
                    if len(toks) > 1 and toks[0] == "Frequency":
                        parse_freq = True
                        if normal_frequencies is None:
                            normal_frequencies = []
                        normal_frequencies.extend([(float(freq), []) for freq
                                                   in l.strip().split()[1:]])

                elif l.find("MASS-WEIGHTED NUCLEAR HESSIAN") != -1:
                    parse_hess = True
                    if not hessian:
                        hessian = []
                elif l.find("MASS-WEIGHTED PROJECTED HESSIAN") != -1:
                    parse_proj_hess = True
                    if not projected_hessian:
                        projected_hessian = []

                elif l.find("atom               coordinates                        gradient") != -1:
                    parse_force = True

                elif job_type == "" and l.strip().startswith("NWChem"):
                    job_type = l.strip()
                    if job_type == "NWChem DFT Module" and \
                            "COSMO solvation results" in output:
                        job_type += " COSMO"
                else:
                    m = corrections_patt.search(l)
                    if m:
                        corrections[m.group(1)] = FloatWithUnit(
                            m.group(2), "kJ mol^-1").to("eV atom^-1")

        if frequencies:
            for freq, mode in frequencies:
                mode[:] = zip(*[iter(mode)]*3)
        if normal_frequencies:
            for freq, mode in normal_frequencies:
                mode[:] = zip(*[iter(mode)]*3)
        if hessian:
            n = len(hessian)
            for i in range(n):
                for j in range(i + 1, n):
                    hessian[i].append(hessian[j][i])
        if projected_hessian:
            n = len(projected_hessian)
            for i in range(n):
                for j in range(i + 1, n):
                    projected_hessian[i].append(projected_hessian[j][i])

        data.update({"job_type": job_type, "energies": energies,
                     "corrections": corrections,
                     "molecules": molecules,
                     "structures": structures,
                     "basis_set": basis_set,
                     "errors": errors,
                     "has_error": len(errors) > 0,
                     "frequencies": frequencies,
                     "normal_frequencies": normal_frequencies,
                     "hessian": hessian,
                     "projected_hessian": projected_hessian,
                     "forces": all_forces,
                     "task_time": time})

        return data

Example 15

Project: tp-qemu Source File: flag_check.py
@error.context_aware
def run(test, params, env):
    """
    flag_check test:
    steps:
    1. boot guest with -cpu model,+extra_flags (extra_flags is optional)
       a. no defined model_name in cfg file
          guest_model = host_model
       b. model_name defined in cfg file
          guest_model = params.get("cpu_model")
    2. get guest flags
    3. get expected model flags from dump file
       a. -cpu host: qemu_model = host_model
       b. guest_model > host_model --> expected_model = host_model
          e.g guest_model = Haswell, host_model = Sandybridge
              expected_model = Sandybridge
       c. guest_model < host_model --> expected_model = guest_model
    4. get extra flags
       a. add_flags = +flag
          1). flag is exposed to guest if it's supported in host
          2). flag is not supported to guest if it's unknown in host
          3). ignore "check", "enforce" which are params not flag
       b. del_flags = -flag
          flag is removed if it's supported in guest
       c. params check: check lack flag in host include unknow flag
    5. compare expected flag with flags in guest
       a. out_flags: not supported with some conf, this kinds of flag
          will be displayed in dump file, but not in guest.
          e.g tsc-dedline is not supported with -M rhel6.3.0
       b. option_flags: some flag is generated by kernel which is not
          defined in dump file. it's acceptable when display in guest.
          e.g rep_good
       expected_flags = expected_model_flags + add_flags - del_flags
                        - out_flags
       miss_flag = expected_flags - guest_flags
       unexpect_flag = guest_flags - expected_flags - option_flags

    :param test: Kvm test object
    :param params: Dictionary with the test parameters
    :param env: Dictionary with test environment.
    """

    def qemu_model_info(models_list, cpumodel):
        """
        Get cpumodel info from models_list
        :param models_list: all models info
        :param cpumodel: model name
        :return: model info of cpumodel
        """
        for model in models_list:
            if cpumodel in model:
                return model
        return None

    def qemu_support_flag(model_info, reg):
        """
        Get register's supported flags from model_info
        :param model_info: model_info get from dump file
        :param reg: reg name, e.g feature_edx
        """
        reg_re = re.compile(r".*%s.*\((.*)\)\n" % reg)
        flag = reg_re.search(model_info)
        try:
            if flag:
                return flag.groups()[0]
        except Exception as e:
            logging.error("Failed to get support flag %s" % e)

    def get_all_support_flags():
        """
        Get all supported flags with qemu query cmd.
        """
        qemu_binary = utils_misc.get_qemu_binary(params)
        cmd = qemu_binary + params.get("query_cmd", " -cpu ?")
        output = utils.system_output(cmd)
        flags_re = re.compile(params.get("pattern", "flags:(.*)"))
        flag_list = flags_re.search(output)
        flags = []
        if flag_list:
            for flag in flag_list.groups():
                flags += flag
        return set(map(utils_misc.Flag, flags))

    def get_extra_flag(extra_flags, symbol, lack_check=False):
        """
        Get added/removed flags
        :param extra_flags: exposed/removed flags. e.g "+sse4.1,+sse4.2"
        :param symbol: "+","-"
        :return: return all extra_flags if lack_check is true
                 return host supported flags if lack_check is false
        """
        flags = []
        re_flags = [_[1:] for _ in extra_flags.split(",")
                    if _ and symbol == _[0]]
        for flag in re_flags:
            if lack_check:
                flags.append(flag)
            elif flag in host_flags:
                flags.append(flag)
        return set(map(utils_misc.Flag, flags))

    def get_guest_cpuflags(vm_session):
        """
        Get guest system cpuflags.

        :param vm_session: session to checked vm.
        :return: [corespond flags]
        """
        flags_re = re.compile(r'^flags\s*:(.*)$', re.MULTILINE)
        out = vm_session.cmd_output("cat /proc/cpuinfo")
        try:
            flags = flags_re.search(out).groups()[0].split()
            return set(map(utils_misc.Flag, flags))
        except Exception as e:
            logging.error("Failed to get guest cpu flags %s" % e)

    utils_misc.Flag.aliases = utils_misc.kvm_map_flags_aliases

    # Get all models' info from dump file
    dump_file = params.get("dump_file")
    default_dump_path = os.path.join(data_dir.get_deps_dir(), "cpuid")
    dump_path = params.get("dump_path", default_dump_path)
    cpuinfo_file = utils.unmap_url(dump_path, dump_file, dump_path)
    host_flags = utils_misc.get_cpu_flags()

    vm = env.get_vm(params["main_vm"])
    guest_cpumodel = vm.cpuinfo.model
    extra_flags = params.get("cpu_model_flags", " ")

    error.context("Boot guest with -cpu %s,%s" %
                  (guest_cpumodel, extra_flags), logging.info)

    if params.get("start_vm") == "no" and "unknown,check" in extra_flags:
        params["start_vm"] = "yes"
        try:
            vm.create(params=params)
            vm.verify_alive()
            output = vm.process.get_output()
            vm.destroy()
        except virt_vm.VMCreateError as detail:
            output = str(detail)
        if params["qemu_output"] not in output:
            raise error.TestFail("no qemu output: %s" % params["qemu_output"])
    else:
        vm.verify_alive()
        timeout = float(params.get("login_timeout", 240))
        session = vm.wait_for_login(timeout=timeout)

        # Get qemu model
        host_cpumodel = utils_misc.get_host_cpu_models()
        if guest_cpumodel not in host_cpumodel:
            qemu_model = host_cpumodel[0]
        else:
            qemu_model = guest_cpumodel
        error.context("Get model %s support flags" % qemu_model, logging.info)

        # Get flags for every reg from model's info
        models_info = utils.system_output("cat %s" % cpuinfo_file).split("x86")
        model_info = qemu_model_info(models_info, qemu_model)
        reg_list = params.get("reg_list", "feature_edx ").split()
        model_support_flags = " "
        if model_info:
            for reg in reg_list:
                reg_flags = qemu_support_flag(model_info, reg)
                if reg_flags:
                    model_support_flags += " %s" % reg_flags
        model_support_flags = set(map(utils_misc.Flag,
                                      model_support_flags.split()))

        error.context("Get guest flags", logging.info)
        guest_flags = get_guest_cpuflags(session)

        error.context("Get expected flag list", logging.info)

        # out_flags is definded in dump file, but not in guest
        out_flags = params.get("out_flags", " ").split()
        out_flags = set(map(utils_misc.Flag, out_flags))
        # no_check_flags is definded in all_support_flags, but not in guest and host
        no_check_flags = params.get("no_check_flags", " ").split()
        no_check_flags = set(map(utils_misc.Flag, no_check_flags))
        # option_flags are generated by kernel or kvm, which are not definded in
        # dump file, but can be displayed in guest
        option_flags = params.get("option_flags", " ").split()
        if params['smp'] == '1' and 'up' not in option_flags:
            option_flags.append('up')
        option_flags = set(map(utils_misc.Flag, option_flags))
        # add_flags are exposed by +flag
        add_flags = get_extra_flag(extra_flags, "+")
        # del_flags are disabled by -flag
        del_flags = get_extra_flag(extra_flags, "-", lack_check=True)
        expected_flags = ((model_support_flags | add_flags) -
                          del_flags - out_flags)
        # get all flags for host lack flag checking
        check_flags = get_extra_flag(extra_flags, "+", lack_check=True)
        check_flags = check_flags - no_check_flags
        host_flags = set(map(utils_misc.Flag, host_flags))
        lack_flags = set(expected_flags | check_flags) - host_flags

        if "check" in extra_flags and "unknown" not in extra_flags:
            error.context("Check lack flag in host", logging.info)
            process_output = vm.process.get_output()
            miss_warn = []
            if lack_flags:
                for flag in lack_flags:
                    if flag not in process_output:
                        miss_warn.extend(flag.split())
            if miss_warn:
                raise error.TestFail("no warning for lack flag %s" % miss_warn)

        error.context("Compare guest flags with expected flags", logging.info)
        all_support_flags = get_all_support_flags()
        missing_flags = expected_flags - guest_flags
        unexpect_flags = (guest_flags - expected_flags -
                          all_support_flags - option_flags)
        if missing_flags or unexpect_flags:
            raise error.TestFail("missing flags:\n %s\n"
                                 "more flags than expected:\n %s\n"
                                 "expected flags:\n %s\n"
                                 "guest flags:\n %s\n"
                                 % (missing_flags, unexpect_flags, expected_flags,
                                    guest_flags))

Example 16

Project: strsync Source File: strsync.py
def main():
    parser = argparse.ArgumentParser(description='Automatically translate and synchronize .strings files from defined base language.')
    parser.add_argument('-b','--base-lang-name', help='A base(or source) localizable resource name.(default=\'Base\'), (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default='Base', required=False)
    parser.add_argument('-x','--excluding-lang-names', type=str, help='A localizable resource name that you want to exclude. (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default=[], required=False, nargs='+')
    parser.add_argument('-c','--client-id', help='Client ID for MS Translation API', required=True)
    parser.add_argument('-s','--client-secret', help='Client Secret key for MS Translation API', required=True)
    parser.add_argument('-f','--force-translate-keys', type=str, help='Keys in the strings to update and translate by force. (input nothing for all keys.)', default=[], required=False, nargs='*')
    parser.add_argument('-fb','--following-base-keys', type=str, help='Keys in the strings to follow from "Base".', default=[], required=False, nargs='+')
    parser.add_argument('-fbl','--following-base-keys-if-length-longer', type=str, help='Keys in the strings to follow from "Base" if its length longer than length of "Base" value.', default=[], required=False, nargs='+')
    parser.add_argument('-ic','--ignore-comments', help='Allows to ignore comment synchronization.', default=None, required=False, nargs='*')
    parser.add_argument('target path', help='Target localizable resource path. (root path of Base.lproj, default=./)', default='./', nargs='?')
    args = vars(parser.parse_args())

    reload(sys)
    sys.setdefaultencoding('utf-8')

    # configure arguments
    __LANG_SEP__ = '-'
    __DIR_SUFFIX__ = ".lproj"
    __FILE_SUFFIX__ = ".strings"
    __RESOURCE_PATH__ = expanduser(args['target path'])
    __BASE_LANG__ = args['base_lang_name']
    __EXCLUDING_LANGS__ = args['excluding_lang_names']
    __KEYS_FORCE_TRANSLATE__ = args['force_translate_keys']
    __KEYS_FORCE_TRANSLATE_ALL__ = ('--force-translate-keys' in sys.argv or '-f' in sys.argv) and not __KEYS_FORCE_TRANSLATE__
    __KEYS_FOLLOW_BASE__ = args['following_base_keys']
    __KEYS_FOLLOW_BASE_IF_LENGTH_LONGER__ = args['following_base_keys_if_length_longer']
    __IGNORE_COMMENTS__ = args['ignore_comments'] is not None
    __BASE_RESOUCE_DIR__ = None

    __LITERNAL_FORMAT__ = "%@"
    __LITERNAL_FORMAT_RE__ = re.compile(r"(%\s{1,}@)|(@\s{0,}%)")
    __LITERNAL_REPLACEMENT__ = "**"
    __LITERNAL_REPLACEMENT_RE__ = re.compile(r"\*\s{0,}\*")

    __QUOTES_RE__ = re.compile(r"\"")
    __QUOTES_REPLACEMENT__ = "'"

    if __BASE_LANG__.endswith(__DIR_SUFFIX__):
        __BASE_RESOUCE_DIR__ = __BASE_LANG__
        __BASE_LANG__ = __BASE_LANG__.split(__DIR_SUFFIX__)[0]
    else:
        __BASE_RESOUCE_DIR__ = __BASE_LANG__+__DIR_SUFFIX__

    # setup Translator & langs

    # read ios langs
    print '(i) Fetching supported locale codes for ios9 ...'
    __IOS9_CODES__ = [lang_row[0] for lang_row in csv.reader(open(resolve_file_path('lc_ios9.tsv'),'rb'), delimiter='\t')]
    print '(i) Supported numbers of locale code :', len(__IOS9_CODES__)

    __MS_CODE_ALIASES__ = {
        # MS API Supported : ios9 supported ISO639 1-2 codes
        'zh-CHS' : ['zh-Hans', 'zh-CN', 'zh-SG'],
        'zh-CHT' : ['zh-Hant', 'zh-MO', 'zh-HK', 'zh-TW'],
        'en' : ['en-AU', 'en-GB'],
        'es' : ['es-MX'],
        'fr' : ['fr-CA'],
        'pt' : ['pt-BR','pt-PT']
    }

    # read mst langs
    print '(i) Fetching supported locales from Microsoft Translation API...'
    trans = Translator(args['client_id'], args['client_secret'])

    __MS_LANG_FILE__ = resolve_file_path('lc_ms.cached.tsv')
    __MS_SUPPORTED_CODES__ = None
    if os.path.exists(__MS_LANG_FILE__):
        __MS_SUPPORTED_CODES__ = [l.strip() for l in open(__MS_LANG_FILE__,'rb').readlines()]
    else:
        __MS_SUPPORTED_CODES__ = trans.get_languages()
        cfile = open(__MS_LANG_FILE__,'w')
        codes = ''
        for code in __MS_SUPPORTED_CODES__:
            codes += code+'\n'
        cfile.write(codes)
        cfile.close()
    print '(i) Supported numbers of locale code :', len(__MS_SUPPORTED_CODES__)
    
    #
    global_result_logs = {}

    # methods
    def supported_lang(code):
        alias = [ms for ms, ios in __MS_CODE_ALIASES__.items() if code in ios]
        # check es-{Custom defined alias}
        if len(alias)==1:
            return alias[0]
        # check es-MX
        elif code in __MS_SUPPORTED_CODES__:
            return code
        # check es
        elif code.split(__LANG_SEP__)[0] in __MS_SUPPORTED_CODES__:
            return code.split(__LANG_SEP__)[0]
        else:
            return None

    def preprocessing_translate_strs(strs):
        return [__LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, s.strip()).replace(__LITERNAL_FORMAT__, __LITERNAL_REPLACEMENT__) for s in strs]

    def postprocessing_translate_str(str):
        str = str.strip()
        # remove Quotes
        str = __QUOTES_RE__.sub(__QUOTES_REPLACEMENT__, str)
        # replace tp liternal replacement
        str = validate_liternal_replacement(str)
        # liternal replacement to liternal for format
        str = str.replace(__LITERNAL_REPLACEMENT__, __LITERNAL_FORMAT__)
        return str

    def validate_liternal_format(str):
        return __LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, str)

    def validate_liternal_replacement(str):
        return __LITERNAL_REPLACEMENT_RE__.sub(__LITERNAL_FORMAT__, str)

    def translate_ms(strs, to):
        lang = supported_lang(to)
        strs = preprocessing_translate_strs(strs)
        return [postprocessing_translate_str(r['TranslatedText']) for r in trans.translate_array(strs, lang)] if lang else strs

    def strings_obj_from_file(file):
        return strsparser.parse_strings(filename=file)

    def merge_two_dicts(x, y):
        '''Given two dicts, merge them into a new dict as a shallow copy.'''
        z = x.copy()
        z.update(y)
        return z

    # core function
    def insert_or_translate(target_file, lc):
        #parse target file
        target_kv = {}
        target_kc = {}
        target_error_lines = []
        if not notexist_or_empty_file(target_file):
            parsed_strings = strsparser.parse_strings(filename=target_file)
            for item in parsed_strings:
                k, e = item['key'], item['error']
                # line error
                if e:
                    target_error_lines.append(e)
                if not target_error_lines:
                    target_kv[k] = item['value']
                    target_kc[k] = item['comment']

        #parsing complete or return.
        if target_error_lines:
            print '(!) Syntax error - Skip'
            return False, None, None, target_error_lines

        #base
        base_content = base_dict[os.path.basename(target_file)]
        base_kv = {}
        base_kc = {}
        for item in base_content:
            k, e = item['key'], item['error']
            # line error
            if e:
                print '(!) WARNING : Syntax error from Base -> ', k, ':' , e
            base_kv[k] = item['value']
            base_kc[k] = item['comment']

        force_adding_keys = base_kv.keys() if __KEYS_FORCE_TRANSLATE_ALL__ else __KEYS_FORCE_TRANSLATE__
        adding_keys = list(((set(base_kv.keys()) - set(target_kv.keys())) | (set(base_kv.keys()) & set(force_adding_keys))) - set(__KEYS_FOLLOW_BASE__))
        removing_keys = list(set(target_kv.keys()) - set(base_kv.keys()))
        existing_keys = list(set(base_kv.keys()) - (set(adding_keys) | set(removing_keys)))
        updated_keys = []

        """
        perform translate
        """
        translated_kv = {}
        if len(adding_keys):
            print 'Translating...'
            translated_kv = dict(zip(adding_keys, translate_ms([base_kv[k] for k in adding_keys], lc)))

        updated_content = []
        for item in base_content:
            k = item['key']
            newitem = dict.fromkeys(item.keys())
            newitem['key'] = k
            target_value, target_comment = target_kv.get(k), target_kc.get(k)
            newitem['comment'] = target_comment if __IGNORE_COMMENTS__ else target_comment or base_kc[k]
            needs_update_comment = False if __IGNORE_COMMENTS__ else not target_comment and base_kc[k]
            
            #added
            if k in adding_keys:
                if k in translated_kv:
                    newitem['value'] = translated_kv[k]
                    if not newitem['comment']:
                        newitem['comment'] = 'Translated from: {0}'.format(base_kv[k])
                    print '[Add] "{0}" = "{1}" <- {2}'.format(k, newitem['value'], base_kv[k])
                else:
                    newitem['value'] = target_kv[k]
                    if not newitem['comment']:
                        newitem['comment'] = 'Translate failed from: {0}'.format(base_kv[k])
                    print '[Error] "{0}" = "{1}" X <- {2}'.format(k, newitem['value'], base_kv[k])
            #exists
            elif k in existing_keys:
                
                if k in __KEYS_FOLLOW_BASE_IF_LENGTH_LONGER__:
                    if target_value != base_kv[k] and len(target_value) > len(base_kv[k]) or needs_update_comment:
                        print '(!) Length of "', target_value, '" is longer than"', base_kv[k], '" as', len(target_value), '>', len(base_kv[k])
                        newitem['value'] = base_kv[k]
                        updated_keys.append(k)
                        
                        if not lc in global_result_logs:                            
                            global_result_logs[lc] = {}
                        global_result_logs[lc][k] = (target_value, base_kv[k])
                    else:
                        newitem['value'] = target_value or base_kv[k]
                        
                elif k in __KEYS_FOLLOW_BASE__:
                    newitem['value'] = base_kv[k]
                    if target_value != base_kv[k] or needs_update_comment:
                        updated_keys.append(k)
                        
                else:
                    newitem['value'] = target_value or base_kv[k]
                    if not target_value or needs_update_comment:
                        updated_keys.append(k)

            updated_content.append(newitem)

        #removed or wrong
        for k in removing_keys:
            print '[Remove]', k

        if len(adding_keys) or len(removing_keys):
            print '(i) Changed Keys: Added {0}, Updated {1}, Removed {2}'.format(len(adding_keys), len(updated_keys), len(removing_keys))

        return updated_content and (len(adding_keys)>0 or len(updated_keys)>0 or len(removing_keys)>0), updated_content, translated_kv, target_error_lines

    def write_file(target_file, list_of_content):
        suc = False
        try:
            f = codecs.open(target_file, "w", "utf-8")
            contents = ''
            for content in list_of_content:
                if content['comment']:
                    contents += '/*{0}*/'.format(content['comment']) + '\n'
                contents += '"{0}" = "{1}";'.format(content['key'], content['value']) + '\n'
            f.write(contents)
            suc = True
        except IOError:
            print 'IOError to open', target_file
        finally:
            f.close()
        return suc

    def remove_file(target_file):
        try:
            os.rename(target_file, target_file+'.deleted')
            return True
        except IOError:
            print 'IOError to rename', target_file
            return False

    def create_file(target_file):
        open(target_file, 'a').close()

    def notexist_or_empty_file(target_file):
        return not os.path.exists(target_file) or os.path.getsize(target_file)==0

    def resolve_file_names(target_file_names):
        return map(lambda f: f.decode('utf-8'), filter(lambda f: f.endswith(__FILE_SUFFIX__), target_file_names))

    base_dict = {}
    results_dict = {}

    # Get Base Language Specs

    walked = list(os.walk(__RESOURCE_PATH__, topdown=True))

    for dir, subdirs, files in walked:
        if os.path.basename(dir)==__BASE_RESOUCE_DIR__:
            for _file in resolve_file_names(files):
                f = os.path.join(dir, _file)
                if notexist_or_empty_file(f):
                    continue

                base_dict[_file] = strings_obj_from_file(f)

    if not base_dict:
        print '[!] Not found "{0}" in target path "{1}"'.format(__BASE_RESOUCE_DIR__, __RESOURCE_PATH__)
        sys.exit(0)

    print 'Start synchronizing...'
    for file in base_dict:
        print 'Target:', file

    for dir, subdirs, files in walked:
        files = resolve_file_names(files)

        if dir.endswith((__DIR_SUFFIX__)):
            lc = os.path.basename(dir).split(__DIR_SUFFIX__)[0]
            if lc.find('_'): lc = lc.replace('_', __LANG_SEP__)
            if lc == __BASE_LANG__:
                continue

            if lc in __EXCLUDING_LANGS__:
                print 'Skip: ', lc
                continue

            # lc = supported_lang(lc)
            results_dict[lc] = {
                'deleted_files' : [],
                'added_files' : [],
                'updated_files' : [],
                'skipped_files' : [],
                'translated_files_lines' : {},
                'error_lines_kv' : {}
            }

            if not supported_lang(lc):
                print 'Does not supported: ', lc
                results_dict[lc]['skipped_files'] = join_path_all(dir, files)
                continue

            print '\n', 'Analayzing localizables... {1} (at {0})'.format(dir, lc)

            added_files = list(set(base_dict.keys()) - set(files))
            removed_files = list(set(files) - set(base_dict.keys()))
            existing_files = list(set(files) - (set(added_files) | set(removed_files)))

            added_files = join_path_all(dir, added_files)
            removed_files = join_path_all(dir, removed_files)
            existing_files = join_path_all(dir, existing_files)

            added_cnt, updated_cnt, removed_cnt = 0, 0, 0
            translated_files_lines = results_dict[lc]['translated_files_lines']
            error_files = results_dict[lc]['error_lines_kv']

            #remove - file
            for removed_file in removed_files:
                print 'Removing File... {0}'.format(removed_file)
                if remove_file(removed_file):
                    removed_cnt+=1

            #add - file
            for added_file in added_files:
                print 'Adding File... {0}'.format(added_file)
                create_file(added_file)
                u, c, t, e = insert_or_translate(added_file, lc)
                #error
                if e:
                    error_files[added_file] = e
                #normal
                elif u and write_file(added_file, c):
                    added_cnt+=1
                    translated_files_lines[added_file] = t

            #exist - lookup lines
            for ext_file in existing_files:
                u, c, t, e = insert_or_translate(ext_file, lc)
                #error
                if e:
                    error_files[ext_file] = e
                #normal
                elif u:
                    print 'Updating File... {0}'.format(ext_file)
                    if write_file(ext_file, c):
                        updated_cnt=+1
                        translated_files_lines[ext_file] = t

            if added_cnt or updated_cnt or removed_cnt or error_files:
                print '(i) Changed Files : Added {0}, Updated {1}, Removed {2}, Error {3}'.format(added_cnt, updated_cnt, removed_cnt, len(error_files.keys()))
            else:
                print 'Nothing to translate or add.'

            """
            Results
            """
            results_dict[lc]['deleted_files'] = removed_files
            results_dict[lc]['added_files'] = list(set(added_files) & set(translated_files_lines.keys()))
            results_dict[lc]['updated_files'] = list(set(existing_files) & set(translated_files_lines.keys()))
            if error_files:
                print error_files
            results_dict[lc]['error_lines_kv'] = error_files

    # print total Results
    print ''
    t_file_cnt, t_line_cnt = 0, 0
    file_add_cnt, file_remove_cnt, file_update_cnt, file_skip_cnt = 0,0,0,0

    for lc in results_dict.keys():
        result_lc = results_dict[lc]

        file_add_cnt += len(result_lc['added_files'])
        file_remove_cnt += len(result_lc['deleted_files'])
        file_update_cnt += len(result_lc['updated_files'])
        file_skip_cnt += len(result_lc['skipped_files'])

        for f in result_lc['added_files']: print 'Added',f
        for f in result_lc['deleted_files']: print 'Removed',f
        for f in result_lc['updated_files']: print 'Updated',f
        for f in result_lc['skipped_files']: print 'Skiped',f

        tfiles = result_lc['translated_files_lines']
        if tfiles:
            # print '============ Results for langcode : {0} ============='.format(lc)
            for f in tfiles:
                t_file_cnt += 1
                if len(tfiles[f]):
                    # print '', f
                    for key in tfiles[f]:
                        t_line_cnt += 1
                        # print key, ' = ', tfiles[f][key]
          
    for lc in global_result_logs.keys():
        print lc
        for t in global_result_logs[lc].keys():
            o, b = global_result_logs[lc][t]
            print o.decode('utf-8'), ' -> ', b

    print ''
    found_warining = filter(lambda i: i or None, rget(results_dict, 'error_lines_kv'))

    if file_add_cnt or file_update_cnt or file_remove_cnt or file_skip_cnt or found_warining:
        print 'Total New Translated Strings : {0}'.format(t_line_cnt)
        print 'Changed Files Total : Added {0}, Updated {1}, Removed {2}, Skipped {3}'.format(file_add_cnt, file_update_cnt, file_remove_cnt, file_skip_cnt)
        print "Synchronized."

        if found_warining:
            print '\n[!!] WARNING: Found strings that contains the syntax error. Please confirm.'
            for a in found_warining:
                for k in a:
                    print 'at', k
                    for i in a[k]:
                        print ' ', i
    else:
        print "All strings are already synchronized. Nothing to translate or add."

    return

Example 17

Project: cgat Source File: gff_compare.py
Function: main
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: gff_compare.py 2781 2009-09-10 11:33:14Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-f", "--output-full", dest="write_full",
                      help="write full gff entries.", action="store_true")
    parser.add_option("-e", "--output-matched-exons", dest="write_matched_exons",
                      help="write matched exons.", action="store_true")
    parser.add_option("-o", "--output-missed-exons", dest="write_missed_exons", action="store_true",
                      help="write missed exons.")
    parser.add_option("-g", "--output-missed-genes", dest="write_missed_genes", action="store_true",
                      help="write missed genes.")
    parser.add_option("-r", "--regex-reference", dest="regex_reference", type="string",
                      help="regular expression mapping exon to transcript in reference.")
    parser.add_option("-t", "--regex-target", dest="regex_target", type="string",
                      help="regular expression mapping exon to transcript in target.")
    parser.add_option("--no-nucleotides", dest="do_nucleotides", action="store_false",
                      help="skip nucleotide benchmark.")
    parser.add_option("--no-exons", dest="do_exons", action="store_false",
                      help="skip exon benchmark.")
    parser.add_option("--no-genes", dest="do_genes", action="store_false",
                      help="skip gene benchmark.")
    parser.add_option("--output-filename-pattern", dest="outfile_pattern", type="string",
                      help="output filename pattern for extra info (%s will be substituted with reference,target).")

    parser.set_defaults(
        remove_redundancy=False,
        max_exon_slippage=9,
        write_missed_exons=False,
        write_matched_exons=False,
        write_missed_genes=False,
        write_wrong_exons=False,
        write_wrong_genes=False,
        do_nucleotides=True,
        do_exons=True,
        do_genes=True,
        regex_reference=None,
        regex_target=None,
        outfile_pattern="%s.info",
    )

    (options, args) = E.Start(parser)

    if len(args) != 2:
        print(USAGE)
        print("two arguments required")
        sys.exit(1)

    input_filename_target, input_filename_reference = args

    if options.loglevel >= 1:
        print("# target entries from %s" % input_filename_target)
        print("# reading target entries ...", end=' ')
        sys.stdout.flush()

    gff_targets = GTF.readFromFile(open(input_filename_target, "r"))

    if options.loglevel >= 1:
        print("finished: %i" % (len(gff_targets)))
        sys.stdout.flush()

    if options.loglevel >= 1:
        print("# reference entries from %s" % input_filename_reference)
        print("# reading reference entries ...", end=' ')
        sys.stdout.flush()

    gff_references = GTF.readFromFile(open(input_filename_reference, "r"))

    if options.loglevel >= 1:
        print("finished: %i" % (len(gff_references)))
        sys.stdout.flush()

    if options.remove_redundancy:
        gff_targets = GTF.CombineOverlaps(gff_targets)
        gff_references = GTF.CombineOverlaps(gff_references)

        if options.loglevel >= 1:
            print("# after filtering: targets=%i, references=%i" % (len(gff_targets), len(gff_references)))

    ##########################################################################
    # sort exons
    if options.loglevel >= 1:
        print("# sorting exons ...", end=' ')
        sys.stdout.flush()

    gff_targets.sort(lambda x, y: cmp((x.mName, x.strand, x.start, x.end),
                                      (y.mName, y.strand, y.start, y.end)))

    gff_references.sort(lambda x, y: cmp((x.mName, x.strand, x.start, x.end),
                                         (y.mName, y.strand, y.start, y.end)))

    ntargets = len(gff_targets)
    nreferences = len(gff_references)

    if options.loglevel >= 1:
        print("finished")
        sys.stdout.flush()

    ##########################################################################
    # get nucleotide level accuracy
    # process each fragment separately
    if options.do_nucleotides:
        print("""############################################################""")

        headers = (
            "contig", "strand", "tp", "fp", "tn", "fn", "sp", "sn", "cc")

        print("\t".join(headers))

        first_r, first_t = 0, 0
        r, t = 0, 0

        ttp, tfp, ttn, tfn = 0, 0, 0, 0

        # this only works, if all contigs in reference are present in target.
        while r < nreferences and t < ntargets:

            this_name = gff_references[r].mName
            this_strand = gff_references[r].strand

            # get all in references
            while r < nreferences and \
                    gff_references[r].mName == this_name and \
                    gff_references[r].strand == this_strand:
                r += 1

            # skip over extra contigs in target
            while t < ntargets and \
                (gff_targets[t].mName != this_name or
                 gff_targets[t].strand != this_strand):
                t += 1
            first_t = t

            # get all in targets
            while t < ntargets and \
                    gff_targets[t].mName == this_name and \
                    gff_targets[t].strand == this_strand:
                t += 1

            tp, fp, tn, fn = AnalyseOverlaps(gff_references[first_r:r],
                                             gff_targets[first_t:t])

            spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
            cc = CalculateCorrelationCoefficient(tp, fp, tn, fn)
            print("%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f" % (this_name, this_strand, tp, fp, tn, fn, spec, sens, cc))

            ttp += tp
            tfp += fp
            ttn += tn
            tfn += fn
            first_r, first_t = r, t

        spec, sens = CalculateSpecificitySensitivity(ttp, tfp, ttn, tfn)
        cc = CalculateCorrelationCoefficient(ttp, tfp, ttn, tfn)
        print("%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f" % ("all", "all", ttp, tfp, ttn, tfn, spec, sens, cc))

        sys.stdout.flush()

    ##########################################################################
    if options.do_exons or options.do_genes:

        print("""############################################################""")

        headers = ("category", "contig", "strand", "tp", "fp", "tn",
                   "fn", "sp", "sn", "cc", "me", "we", "me", "we")

        print("\t".join(headers))

        r, t = 0, 0
        next_r, next_t = r, t

        # strict false positves/negatives
        tp, fp, tn, fn = 0, 0, 0, 0
        ttp, tfp, ttn, tfn = 0, 0, 0, 0
        # partial false positives/negatives
        ptp, pfp, ptn, pfn = 0, 0, 0, 0
        tptp, tpfp, tptn, tpfn = 0, 0, 0, 0

        # missed and wrong exons
        missed_exons, wrong_exons = 0, 0
        tmissed_exons, twrong_exons = 0, 0

        # Flag set, if partial overlap in previous pair
        last_partial_overlap = False
        # Flag set, if partial overlap and reference was last increased
        last_increased_ref = False

        while r < nreferences and t < ntargets:

            this_name = gff_references[r].mName
            this_strand = gff_references[r].strand

            # get overlap segments
            if next_r == r:
                ref_overlaps, next_r, ref_start, ref_end = GetFirstOverlaps(
                    gff_references, r)
            if next_t == t:
                target_overlaps, next_t, target_start, target_end = GetFirstOverlaps(
                    gff_targets, t)

            if options.loglevel >= 3:
                print("########################################################")
                for x in ref_overlaps:
                    print("#", str(x))
                for x in target_overlaps:
                    print("#", str(x))

            do_summary = False
            # check strand switch in reference
            if next_r < nreferences and \
                (this_name != gff_references[next_r].mName or
                    this_strand != gff_references[next_r].strand):
                if options.loglevel >= 3:
                    print("# target advance")
                do_summary = True

                last_increased_ref = False
                last_partial_overlap = False

                # advance in target until next name is found
                next_name = gff_references[next_r].mName
                next_strand = gff_references[next_r].strand
                while next_t < ntargets and \
                        next_name != gff_targets[next_t].mName or \
                        next_strand != gff_targets[next_t].strand:
                    fp += 1
                    pfp += 1
                    target_overlaps, next_t, target_start, target_end = GetFirstOverlaps(
                        gff_targets, next_t)

                for x in gff_targets[t:next_t]:
                    x.mStatus = "extra"
                for x in gff_references[r:next_r]:
                    x.mStatus = "extra"

                r, t = next_r, next_t
            # check strand switch in target
            elif next_t < ntargets and \
                (this_name != gff_targets[next_t].mName or
                 this_strand != gff_targets[next_t].strand):
                    # advance in reference until next name is found
                if options.loglevel >= 3:
                    print("# reference advance")
                do_summary = True

                last_increased_ref = False
                last_partial_overlap = False

                next_name = gff_targets[next_t].mName
                next_strand = gff_targets[next_t].strand
                while next_r < nreferences and \
                        next_name != gff_references[next_r].mName or \
                        next_strand != gff_references[next_r].strand:
                    fn += 1
                    pfn += 1
                    reference_overlaps, next_r, references_start, references_end = GetFirstOverlaps(
                        gff_references, next_r)

                for x in gff_targets[t:next_t]:
                    x.mStatus = "extra"
                for x in gff_references[r:next_r]:
                    x.mStatus = "extra"

                r, t = next_r, next_t
            # otherwise
            else:

                ref_status, target_status = None, None

                if options.loglevel >= 3:
                    print("# same chromosome")

                # overlap between segments
                if min(ref_end, target_end) - max(ref_start, target_start) > 0:

                    # clear flags
                    last_increased_ref = False
                    last_partial_overlap = False
                    found = False

                    for rr in ref_overlaps:
                        xfound = False
                        for tt in target_overlaps:
                            if GTF.Identity(rr, tt, max_slippage=options.max_exon_slippage):
                                xfound = True
                                break
                        if xfound:
                            found = True
                            break

                    if found:
                        ref_status = "match"
                        target_status = "match"
                        tp += 1
                        ptp += 1
                        if options.write_matched_exons:
                            print("############# matching exons ###########################")
                            for x in ref_overlaps:
                                print("#", str(x))
                            for x in target_overlaps:
                                print("#", str(x))
                    else:
                        fn += 1

                        # check for one-sided matches
                        for rr in ref_overlaps:
                            xfound = False
                            for tt in target_overlaps:
                                if GTF.HalfIdentity(rr, tt, max_slippage=options.max_exon_slippage):
                                    xfound = True
                                    break
                            if xfound:
                                found = True
                                break

                        if found:
                            ptp += 1
                            code = "partial"
                            ref_status = "partial"
                            target_status = "partial"
                        else:
                            pfn += 1
                            code = "complete"
                            ref_status = "mismatch"
                            target_status = "mismatch"

                        if options.write_missed_exons:
                            print("############# %s non-overlapping exons ###########################" % code)
                            for x in ref_overlaps:
                                print("#", str(x))
                            for x in target_overlaps:
                                print("#", str(x))

                    ###########################################################
                    # r, t = next_r, next_t
                    if ref_end == target_end:
                        r, t = next_r, next_t
                    elif ref_end < target_end:
                        r = next_r
                        last_increased_ref = True
                        last_partial_overlap = True
                    else:
                        t = next_t
                        last_increased_ref = False
                        last_partial_overlap = True

                # non-overlap between segments
                else:

                    if ref_end < target_start:

                        # for non-overlap, check whether there was partial overlap before
                        # and reference was not increased.
                        # if there was, just increment reference, but do not
                        # count.

                        if not (last_partial_overlap and not last_increased_ref):

                            if options.write_missed_exons:
                                print("############# missed exon ###########################")
                                for x in ref_overlaps:
                                    print("#", str(x))
                            missed_exons += 1
                            fn += 1
                            pfn += 1
                            ref_status = "extra"

                        r = next_r

                    else:

                        # for non-overlap, check whether there was partial overlap before
                        # and target was not increased.
                        # if there was, just increment target, but do not
                        # count.

                        if not (last_partial_overlap and last_increased_ref):
                            if options.write_wrong_exons:
                                print("############# wrong exon ###########################")
                                for x in target_overlaps:
                                    print("#", str(x))

                            wrong_exons += 1
                            fp += 1
                            pfp += 1
                            target_status = "extra"

                        t = next_t

                    last_partial_overlap = False

                if options.loglevel >= 3:
                    print("# ref_status=%s, target_status=%s" % (ref_status, target_status))

                if ref_status:
                    for rr in ref_overlaps:
                        rr.mStatus = ref_status

                    if ref_status in ("match", "partial") and options.do_genes:
                        for rr in ref_overlaps:
                            rr.mMatches = target_overlaps

                if target_status:
                    for tt in target_overlaps:
                        tt.mStatus = target_status

                    if target_status in ("match", "partial") and options.do_genes:
                        for tt in target_overlaps:
                            tt.mMatches = ref_overlaps

            if do_summary or r >= nreferences or t >= ntargets:
                ttp += tp
                tfp += fp
                ttn += tn
                tfn += fn

                tptp += ptp
                tpfp += pfp
                tptn += ptn
                tpfn += pfn

                tmissed_exons += missed_exons
                twrong_exons += wrong_exons

                if tp + fn != 0:
                    pmissed_exons = "%5.2f" % (float(missed_exons) / (tp + fn))
                else:
                    pmissed_exons = "0"

                if tp + fp != 0:
                    pwrong_exons = "%5.2f" % (float(wrong_exons) / (tp + fp))
                else:
                    pwrong_exons = "na"

                spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
                cc = (spec + sens) / 2.0
                print("full\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%s\t%s" %
                      (this_name, this_strand,
                       tp, fp, tn, fn,
                       spec, sens, cc,
                       missed_exons, wrong_exons,
                       pmissed_exons, pwrong_exons))

                spec, sens = CalculateSpecificitySensitivity(
                    ptp, pfp, ptn, pfn)
                cc = (spec + sens) / 2.0
                print("half\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%s\t%s" %
                      (this_name, this_strand,
                       ptp, pfp, ptn, pfn,
                       spec, sens, cc,
                       missed_exons, wrong_exons,
                       pmissed_exons, pwrong_exons))

                tp, fp, tn, fn = 0, 0, 0, 0
                ptp, pfp, ptn, pfn = 0, 0, 0, 0
                missed_exons, wrong_exons = 0, 0

        if t < ntargets:
            for x in gff_targets[t:ntargets]:
                x.mStatus = "extra"
        if r < nreferences:
            for x in gff_references[r:nreferences]:
                x.mStatus = "extra"

        spec, sens = CalculateSpecificitySensitivity(ttp, tfp, ttn, tfn)
        cc = (spec + sens) / 2.0
        print("full\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
              ("all", "all", ttp, tfp, ttn, tfn,
               spec, sens, cc,
               tmissed_exons, twrong_exons,
               float(tmissed_exons) / (ttp + tfn),
               float(twrong_exons) / (ttp + tfp)))

        spec, sens = CalculateSpecificitySensitivity(tptp, tpfp, tptn, tpfn)
        cc = (spec + sens) / 2.0
        print("half\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
              ("all", "all", tptp, tpfp, tptn, tpfn,
               spec, sens, cc,
               tmissed_exons, twrong_exons,
               float(tmissed_exons) / (ttp + tfn),
               float(twrong_exons) / (ttp + tfp)))

    if options.do_genes and \
            options.regex_reference and \
            options.regex_target:

        print("""###############################################################""")

        out_options = []
        if options.write_missed_genes:
            out_options.append("missed")

        if options.loglevel >= 2:
            print("# counting matches for reference.")
            sys.stdout.flush()

        (ref_total, ref_match, ref_partial, ref_extra) =\
            CountMatchesPerGene(gff_references,
                                re.compile(options.regex_reference),
                                re.compile(options.regex_target),
                                write=out_options,
                                outfile=open(options.outfile_pattern % "reference", "w"))

        if options.loglevel >= 2:
            print("# counting matches for target.")
            sys.stdout.flush()

        (target_total, target_match, target_partial, target_extra) =\
            CountMatchesPerGene(gff_targets,
                                re.compile(options.regex_target),
                                re.compile(
                                    options.regex_reference),
                                write=out_options,
                                outfile=open(options.outfile_pattern % "target", "w"))

        if options.loglevel >= 1:
            print("# reference: genes=%6i, matches=%6i, partial=%6i, extra=%6i" %
                  (ref_total, ref_match, ref_partial, ref_extra))
            print("# target   : genes=%6i, matches=%6i, partial=%6i, extra=%6i" %
                  (target_total, target_match, target_partial, target_extra))

        headers = ("category", "tp", "fp", "tn", "fn",
                   "sp", "sn", "cc", "mg", "wg", "mg", "wg")
        print("\t".join(headers))

        tp = ref_match
        fp = target_extra
        tn = 0
        fn = ref_total - ref_match
        wrong_genes = target_extra
        missed_genes = ref_extra

        spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
        cc = (spec + sens) / 2.0

        if tp + fp == 0:
            fp = nreferences

        print("full\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
              (tp, fp, tn, fn,
               spec, sens, cc,
               missed_genes, wrong_genes,
               float(missed_genes) / (tp + fn),
               float(wrong_genes) / (tp + fp)))

        tp = ref_match + ref_partial
        fp = target_extra
        tn = 0
        fn = ref_total - ref_match - ref_partial
        wrong_genes = target_extra
        missed_genes = ref_extra

        spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
        cc = (spec + sens) / 2.0
        print("half\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
              (tp, fp, tn, fn,
               spec, sens, cc,
               missed_genes, wrong_genes,
               float(missed_genes) / (tp + fn),
               float(wrong_genes) / (tp + fp)))

    E.Stop()

Example 18

Project: dashman Source File: tx.py
def main():
    parser = argparse.ArgumentParser(
        description="Manipulate bitcoin (or alt coin) transactions.",
        epilog=EPILOG)

    parser.add_argument('-t', "--transaction-version", type=int,
                        help='Transaction version, either 1 (default) or 3 (not yet supported).')

    parser.add_argument('-l', "--lock-time", type=parse_locktime, help='Lock time; either a block'
                        'index, or a date/time (example: "2014-01-01T15:00:00"')

    parser.add_argument('-n', "--network", default="BTC",
                        help='Define network code (M=Bitcoin mainnet, T=Bitcoin testnet).')

    parser.add_argument('-a', "--augment", action='store_true',
                        help='augment tx by adding any missing spendable metadata by fetching'
                             ' inputs from cache and/or web services')

    parser.add_argument('-s', "--verbose-signature", action='store_true',
                        help='Display technical signature details.')

    parser.add_argument("-i", "--fetch-spendables", metavar="address", action="append",
                        help='Add all unspent spendables for the given bitcoin address. This information'
                        ' is fetched from web services.')

    parser.add_argument('-f', "--private-key-file", metavar="path-to-private-keys", action="append",
                        help='file containing WIF or BIP0032 private keys. If file name ends with .gpg, '
                        '"gpg -d" will be invoked automatically. File is read one line at a time, and if '
                        'the file contains only one WIF per line, it will also be scanned for a bitcoin '
                        'address, and any addresses found will be assumed to be public keys for the given'
                        ' private key.',
                        type=argparse.FileType('r'))

    parser.add_argument('-g', "--gpg-argument", help='argument to pass to gpg (besides -d).', default='')

    parser.add_argument("--remove-tx-in", metavar="tx_in_index_to_delete", action="append", type=int,
                        help='remove a tx_in')

    parser.add_argument("--remove-tx-out", metavar="tx_out_index_to_delete", action="append", type=int,
                        help='remove a tx_out')

    parser.add_argument('-F', "--fee", help='fee, in satoshis, to pay on transaction, or '
                        '"standard" to auto-calculate. This is only useful if the "split pool" '
                        'is used; otherwise, the fee is automatically set to the unclaimed funds.',
                        default="standard", metavar="transaction-fee", type=parse_fee)

    parser.add_argument('-C', "--cache", help='force the resultant transaction into the transaction cache.'
                        ' Mostly for testing.', action='store_true'),

    parser.add_argument('-u', "--show-unspents", action='store_true',
                        help='show TxOut items for this transaction in Spendable form.')

    parser.add_argument('-b', "--bitcoind-url",
                        help='URL to bitcoind instance to validate against (http://user:pass@host:port).')

    parser.add_argument('-o', "--output-file", metavar="path-to-output-file", type=argparse.FileType('wb'),
                        help='file to write transaction to. This supresses most other output.')

    parser.add_argument('-p', "--pay-to-script", metavar="pay-to-script", action="append",
                        help='a hex version of a script required for a pay-to-script input (a bitcoin address that starts with 3)')

    parser.add_argument('-P', "--pay-to-script-file", metavar="pay-to-script-file", nargs=1, type=argparse.FileType('r'),
                        help='a file containing hex scripts (one per line) corresponding to pay-to-script inputs')

    parser.add_argument("argument", nargs="+", help='generic argument: can be a hex transaction id '
                        '(exactly 64 characters) to be fetched from cache or a web service;'
                        ' a transaction as a hex string; a path name to a transaction to be loaded;'
                        ' a spendable 4-tuple of the form tx_id/tx_out_idx/script_hex/satoshi_count '
                        'to be added to TxIn list; an address/satoshi_count to be added to the TxOut '
                        'list; an address to be added to the TxOut list and placed in the "split'
                        ' pool".')

    args = parser.parse_args()

    # defaults

    txs = []
    spendables = []
    payables = []

    key_iters = []

    TX_ID_RE = re.compile(r"^[0-9a-fA-F]{64}$")

    # there are a few warnings we might optionally print out, but only if
    # they are relevant. We don't want to print them out multiple times, so we
    # collect them here and print them at the end if they ever kick in.

    warning_tx_cache = None
    warning_get_tx = None
    warning_spendables = None

    if args.private_key_file:
        wif_re = re.compile(r"[1-9a-km-zA-LMNP-Z]{51,111}")
        # address_re = re.compile(r"[1-9a-kmnp-zA-KMNP-Z]{27-31}")
        for f in args.private_key_file:
            if f.name.endswith(".gpg"):
                gpg_args = ["gpg", "-d"]
                if args.gpg_argument:
                    gpg_args.extend(args.gpg_argument.split())
                gpg_args.append(f.name)
                popen = subprocess.Popen(gpg_args, stdout=subprocess.PIPE)
                f = popen.stdout
            for line in f.readlines():
                # decode
                if isinstance(line, bytes):
                    line = line.decode("utf8")
                # look for WIFs
                possible_keys = wif_re.findall(line)

                def make_key(x):
                    try:
                        return Key.from_text(x)
                    except Exception:
                        return None

                keys = [make_key(x) for x in possible_keys]
                for key in keys:
                    if key:
                        key_iters.append((k.wif() for k in key.subkeys("")))

                # if len(keys) == 1 and key.hierarchical_wallet() is None:
                #    # we have exactly 1 WIF. Let's look for an address
                #   potential_addresses = address_re.findall(line)

    # update p2sh_lookup
    p2sh_lookup = {}
    if args.pay_to_script:
        for p2s in args.pay_to_script:
            try:
                script = h2b(p2s)
                p2sh_lookup[hash160(script)] = script
            except Exception:
                print("warning: error parsing pay-to-script value %s" % p2s)

    if args.pay_to_script_file:
        hex_re = re.compile(r"[0-9a-fA-F]+")
        for f in args.pay_to_script_file:
            count = 0
            for l in f:
                try:
                    m = hex_re.search(l)
                    if m:
                        p2s = m.group(0)
                        script = h2b(p2s)
                        p2sh_lookup[hash160(script)] = script
                        count += 1
                except Exception:
                    print("warning: error parsing pay-to-script file %s" % f.name)
            if count == 0:
                print("warning: no scripts found in %s" % f.name)

    # we create the tx_db lazily
    tx_db = None

    for arg in args.argument:

        # hex transaction id
        if TX_ID_RE.match(arg):
            if tx_db is None:
                warning_tx_cache = message_about_tx_cache_env()
                warning_get_tx = message_about_get_tx_env()
                tx_db = get_tx_db()
            tx = tx_db.get(h2b_rev(arg))
            if not tx:
                for m in [warning_tx_cache, warning_get_tx, warning_spendables]:
                    if m:
                        print("warning: %s" % m, file=sys.stderr)
                parser.error("can't find Tx with id %s" % arg)
            txs.append(tx)
            continue

        # hex transaction data
        try:
            tx = Tx.from_hex(arg)
            txs.append(tx)
            continue
        except Exception:
            pass

        is_valid = is_address_valid(arg, allowable_netcodes=[args.network])
        if is_valid:
            payables.append((arg, 0))
            continue

        try:
            key = Key.from_text(arg)
            # TODO: check network
            if key.wif() is None:
                payables.append((key.address(), 0))
                continue
            # TODO: support paths to subkeys
            key_iters.append((k.wif() for k in key.subkeys("")))
            continue
        except Exception:
            pass

        if os.path.exists(arg):
            try:
                with open(arg, "rb") as f:
                    if f.name.endswith("hex"):
                        f = io.BytesIO(codecs.getreader("hex_codec")(f).read())
                    tx = Tx.parse(f)
                    txs.append(tx)
                    try:
                        tx.parse_unspents(f)
                    except Exception as ex:
                        pass
                    continue
            except Exception:
                pass

        parts = arg.split("/")
        if len(parts) == 4:
            # spendable
            try:
                spendables.append(Spendable.from_text(arg))
                continue
            except Exception:
                pass

        if len(parts) == 2 and is_address_valid(parts[0], allowable_netcodes=[args.network]):
            try:
                payables.append(parts)
                continue
            except ValueError:
                pass

        parser.error("can't parse %s" % arg)

    if args.fetch_spendables:
        warning_spendables = message_about_spendables_for_address_env()
        for address in args.fetch_spendables:
            spendables.extend(spendables_for_address(address))

    for tx in txs:
        if tx.missing_unspents() and args.augment:
            if tx_db is None:
                warning_tx_cache = message_about_tx_cache_env()
                warning_get_tx = message_about_get_tx_env()
                tx_db = get_tx_db()
            tx.unspents_from_db(tx_db, ignore_missing=True)

    txs_in = []
    txs_out = []
    unspents = []
    # we use a clever trick here to keep each tx_in corresponding with its tx_out
    for tx in txs:
        smaller = min(len(tx.txs_in), len(tx.txs_out))
        txs_in.extend(tx.txs_in[:smaller])
        txs_out.extend(tx.txs_out[:smaller])
        unspents.extend(tx.unspents[:smaller])
    for tx in txs:
        smaller = min(len(tx.txs_in), len(tx.txs_out))
        txs_in.extend(tx.txs_in[smaller:])
        txs_out.extend(tx.txs_out[smaller:])
        unspents.extend(tx.unspents[smaller:])
    for spendable in spendables:
        txs_in.append(spendable.tx_in())
        unspents.append(spendable)
    for address, coin_value in payables:
        script = standard_tx_out_script(address)
        txs_out.append(TxOut(coin_value, script))

    lock_time = args.lock_time
    version = args.transaction_version

    # if no lock_time is explicitly set, inherit from the first tx or use default
    if lock_time is None:
        if txs:
            lock_time = txs[0].lock_time
        else:
            lock_time = DEFAULT_LOCK_TIME

    # if no version is explicitly set, inherit from the first tx or use default
    if version is None:
        if txs:
            version = txs[0].version
        else:
            version = DEFAULT_VERSION

    if args.remove_tx_in:
        s = set(args.remove_tx_in)
        txs_in = [tx_in for idx, tx_in in enumerate(txs_in) if idx not in s]

    if args.remove_tx_out:
        s = set(args.remove_tx_out)
        txs_out = [tx_out for idx, tx_out in enumerate(txs_out) if idx not in s]

    tx = Tx(txs_in=txs_in, txs_out=txs_out, lock_time=lock_time, version=version, unspents=unspents)

    fee = args.fee
    try:
        distribute_from_split_pool(tx, fee)
    except ValueError as ex:
        print("warning: %s" % ex.args[0], file=sys.stderr)

    unsigned_before = tx.bad_signature_count()
    if unsigned_before > 0 and key_iters:
        def wif_iter(iters):
            while len(iters) > 0:
                for idx, iter in enumerate(iters):
                    try:
                        wif = next(iter)
                        yield wif
                    except StopIteration:
                        iters = iters[:idx] + iters[idx+1:]
                        break

        print("signing...", file=sys.stderr)
        sign_tx(tx, wif_iter(key_iters), p2sh_lookup=p2sh_lookup)

    unsigned_after = tx.bad_signature_count()
    if unsigned_after > 0 and key_iters:
        print("warning: %d TxIn items still unsigned" % unsigned_after, file=sys.stderr)

    if len(tx.txs_in) == 0:
        print("warning: transaction has no inputs", file=sys.stderr)

    if len(tx.txs_out) == 0:
        print("warning: transaction has no outputs", file=sys.stderr)

    include_unspents = (unsigned_after > 0)
    tx_as_hex = tx.as_hex(include_unspents=include_unspents)

    if args.output_file:
        f = args.output_file
        if f.name.endswith(".hex"):
            f.write(tx_as_hex.encode("utf8"))
        else:
            tx.stream(f)
            if include_unspents:
                tx.stream_unspents(f)
        f.close()
    elif args.show_unspents:
        for spendable in tx.tx_outs_as_spendable():
            print(spendable.as_text())
    else:
        if not tx.missing_unspents():
            check_fees(tx)
        dump_tx(tx, args.network, args.verbose_signature)
        if include_unspents:
            print("including unspents in hex dump since transaction not fully signed")
        print(tx_as_hex)

    if args.cache:
        if tx_db is None:
            warning_tx_cache = message_about_tx_cache_env()
            warning_get_tx = message_about_get_tx_env()
            tx_db = get_tx_db()
        tx_db.put(tx)

    if args.bitcoind_url:
        if tx_db is None:
            warning_tx_cache = message_about_tx_cache_env()
            warning_get_tx = message_about_get_tx_env()
            tx_db = get_tx_db()
        validate_bitcoind(tx, tx_db, args.bitcoind_url)

    if tx.missing_unspents():
        print("\n** can't validate transaction as source transactions missing", file=sys.stderr)
    else:
        try:
            if tx_db is None:
                warning_tx_cache = message_about_tx_cache_env()
                warning_get_tx = message_about_get_tx_env()
                tx_db = get_tx_db()
            tx.validate_unspents(tx_db)
            print('all incoming transaction values validated')
        except BadSpendableError as ex:
            print("\ncuem ERROR: FEES INCORRECTLY STATED: %s" % ex.args[0], file=sys.stderr)
        except Exception as ex:
            print("\n*** can't validate source transactions as untampered: %s" %
                  ex.args[0], file=sys.stderr)

    # print warnings
    for m in [warning_tx_cache, warning_get_tx, warning_spendables]:
        if m:
            print("warning: %s" % m, file=sys.stderr)

Example 19

Project: helloworld Source File: users.py
def get_remote_user_info(handler, user_url, profile):
  # get host-meta first
  lrdd_link = None
  try:
    lrdd_link = get_lrdd_link(user_url)
  except:
    pass
  salmon_url = ''
  webmention_url = ''
  magic_key = ''
  alias = ''
  webfinger_doc = None

  user_response = urllib2.urlopen(user_url)
  user_doc = BeautifulSoup(user_response.read())

  if not lrdd_link:
    atom_url = user_doc.find('link', rel=re.compile(r"\balternate\b"),
        type='application/atom+xml')
    rss_url = user_doc.find('link', rel=re.compile(r"\balternate\b"),
        type='application/rss+xml')

    feed_url = atom_url or rss_url
  else:
    # get webfinger
    try:
      webfinger_doc = get_webfinger(lrdd_link, user_url)
      feed_url = webfinger_doc.find('link',
          rel='http://schemas.google.com/g/2010#updates-from')
      salmon_url = webfinger_doc.find('link', rel='salmon')
      if salmon_url:
        salmon_url = salmon_url['href']
      webmention_url = webfinger_doc.find('link', rel='webmention')
      if webmention_url:
        webmention_url = webmention_url['href']
      magic_key = webfinger_doc.find('link', rel='magic-public-key')
      if magic_key:
        magic_key = magic_key['href'];
        magic_key = magic_key.replace('data:application/magic-public-key,', '')
      alias = webfinger_doc.find('alias')
      if alias:
        alias = alias.string
    except:
      feed_url = None

  if not webmention_url:
    webmention_url = user_doc.find('link', rel=re.compile(r"\bwebmention\b"))
    if webmention_url:
        webmention_url = webmention_url['href']

  if not feed_url:
    feed_url = user_url
  else:
    feed_url = feed_url['href']
  base_url = None

  if (not feed_url.startswith('/') and not (feed_url.startswith('http://') or
      feed_url.startswith('https://'))):
    base_url = user_doc.find('base')
    if base_url:
      base_url = base_url['href']
    else:
      base_url = ''
    feed_url = base_url + feed_url

  parsed_url = urlparse.urlparse(user_url)
  if not (feed_url.startswith('http://') or feed_url.startswith('https://')):
    if (not feed_url.startswith('/')):
      feed_url = '/' + feed_url
    feed_url = parsed_url.scheme + '://' + parsed_url.hostname + feed_url

  feed_response = urllib2.urlopen(feed_url)
  feed_doc = BeautifulSoup(feed_response.read())
  author = feed_doc.find('author')

  alias = None
  if author:
    uri = author.find('uri')
    if uri:
      alias = uri.string  # alias or user_url

  if not alias:
    alt_link = feed_doc.find('link', rel=re.compile(r"\balternate\b"))
    if alt_link:
      alias = alt_link['href']
    else:
      # XXX UGH, BeautifulSoup treats <link> as self-closing tag
      # LAMESAUCE for rss
      alias = feed_doc.find('link').nextSibling
  if not alias or not alias.strip():
    raise tornado.web.HTTPError(400)

  alias = alias.strip()
  user_remote = handler.models.users_remote.get(local_username=profile,
      profile_url=alias)[0]
  hub_url = feed_doc.find(re.compile('.+:link$'), rel='hub')

  if not user_remote:
    user_remote = handler.models.users_remote()

  favicon = None
  favicon = user_doc.find('link', rel='shortcut icon')
  if favicon:
    if (favicon['href'].startswith('http://') or
        favicon['href'].startswith('https://')):
      favicon = favicon['href']
    else:
      if base_url:
        favicon = base_url + favicon['href']
      else:
        favicon = (parsed_url.scheme + '://' + parsed_url.hostname +
            ('' if favicon['href'].startswith('/') else '/') + favicon['href'])
  else:
    favicon = parsed_url.scheme + '://' + parsed_url.hostname + '/favicon.ico'
  user_remote.favicon = favicon

  user_remote.local_username = profile
  logo = feed_doc.find('logo')
  if logo and logo.parent.name == 'source':
    logo = None
  image = feed_doc.find('image')
  if logo:
    user_remote.avatar = logo.string
  elif image:
    image = feed_doc.find('image')
    url = image.find('url')
    user_remote.avatar = url.string
  else:
    user_remote.avatar = favicon

  if not favicon:
    user_remote.favicon = user_remote.avatar

  preferred_username = None
  display_name = None
  if author:
    preferred_username = author.find(re.compile('.+:preferredusername$'))
    display_name = author.find(re.compile('.+:displayname$'))
  if author and preferred_username and display_name:
    #user_remote.avatar = author.find('link', rel='avatar')['href']
    user_remote.username = preferred_username.string
    user_remote.name = display_name.string
  elif webfinger_doc:
    user_remote.username = webfinger_doc.find('Property',
        type="http://apinamespace.org/atom/username").string
  else:
    user_remote.username = feed_doc.find('title').string
  user_remote.profile_url = alias
  user_remote.magic_key = magic_key
  user_remote.salmon_url = salmon_url
  user_remote.webmention_url = webmention_url
  user_remote.feed_url = feed_url
  if hub_url:
    user_remote.hub_url = hub_url['href']
  user_remote.save()

  try:
    # TODO(mime): Add hub.secret
    if user_remote.hub_url:
      callback_url = handler.nav_url(host=True, username=profile,
          section='push')
      pubsubhubbub_subscribe.subscribe_topic(user_remote.hub_url,
          user_remote.feed_url, callback_url, verify="sync")
  except:
    import logging
    logging.error("couldn't subscribe on the hub!")

  return user_remote

Example 20

Project: quickbot_bbb Source File: base.py
Function: parse_cmd
def parse_cmd(self):
    """ Command parser """
    try:
        while self.run_flag:
            try:
                line = self.robotSocket.recv(1024)
            except socket.error as msg:
                continue

            self.cmdBuffer += line

            # String contained within $ and * (with no $ or * symbols in it)
            buf_pattern = r'\$[^\$\*]*?\*'
            buf_regex = re.compile(buf_pattern)
            buf_result = buf_regex.search(self.cmdBuffer)

            if buf_result:
                msg = buf_result.group()
                print msg
                self.cmdBuffer = ''

                cmd_pattern = r'(?P<CMD>[A-Z]{3,})'
                set_pattern = r'(?P<SET>=?)'
                query_pattern = r'(?P<QUERY>\??)'
                arg_pattern = r'(?(2)(?P<ARGS>.*))'
                msg_pattern = r'\$' + \
                    cmd_pattern + \
                    set_pattern + \
                    query_pattern + \
                    arg_pattern + \
                    r'.*\*'

                msg_regex = re.compile(msg_pattern)
                msg_result = msg_regex.search(msg)

                if msg_result.group('CMD') == 'CHECK':
                    self.robotSocket.sendto(
                        'Hello from QuickBot\n', (self.base_ip, self.port))

                elif msg_result.group('CMD') == 'PWM':
                    if msg_result.group('QUERY'):
                        if VERBOSE:
                            print str(self.get_pwm())
                        self.robotSocket.sendto(str(self.get_pwm()) + '\n',
                                                (self.base_ip, self.port))

                    elif msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        pwm_pattern = r'(?P<LEFT>[-]?\d+),(?P<RIGHT>[-]?\d+)'
                        pwm_regex = re.compile(pwm_pattern)
                        pwm_result = pwm_regex.match(args)
                        if pwm_result:
                            pwm = [int(pwm_result.group('LEFT')), \
                                    int(pwm_result.group('RIGHT'))]
                            self.set_pwm(pwm)

                    self.robotSocket.sendto(str(self.get_pwm()) + '\n',
                                            (self.base_ip, self.port))

                elif msg_result.group('CMD') == 'IRVAL':
                    if msg_result.group('QUERY'):
                        reply = '[' + ', '.join(map(str, self.get_ir())) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                elif msg_result.group('CMD') == 'ULTRAVAL':
                    if msg_result.group('QUERY'):
                        reply = '[' + ', '.join(map(str, self.ultraVal)) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                elif msg_result.group('CMD') == 'WHEELANG':
                    if msg_result.group('QUERY'):
                        print 'Sending: ' + str(self.get_wheel_ang())
                        self.robotSocket.sendto(
                            str(self.get_wheel_ang()) +
                            '\n', (self.base_ip, self.port))

                    elif msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        arg_pattern = \
                        r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
                        regex = re.compile(arg_pattern)
                        result = regex.match(args)
                        if result:
                            pos = [float(regex.match(args).group('LEFT')), \
                                float(regex.match(args).group('RIGHT'))]
                            self.set_wheel_ang(pos)

                elif msg_result.group('CMD') == 'ENVAL':
                    if msg_result.group('QUERY'):
                        reply = \
                            '[' + ', '.join(map(str, self.get_enc_val())) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                    elif msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        arg_pattern = \
                        r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
                        regex = re.compile(arg_pattern)
                        result = regex.match(args)
                        if result:
                            enc_pos = [float(regex.match(args).group('LEFT')), \
                                     float(regex.match(args).group('RIGHT'))]
                            self.set_enc_val(enc_pos)

                elif msg_result.group('CMD') == 'ENRAW':
                    if msg_result.group('QUERY'):
                        reply = \
                            '[' + ', '.join(map(str, self.get_enc_raw())) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                elif msg_result.group('CMD') == 'ENOFFSET':
                    if msg_result.group('QUERY'):
                        reply = '[' + \
                            ', '.join(map(str, self.get_enc_offset())) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                    elif msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        arg_pattern = \
                        r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
                        regex = re.compile(arg_pattern)
                        result = regex.match(args)
                        if result:
                            offset = [float(regex.match(args).group('LEFT')), \
                                     float(regex.match(args).group('RIGHT'))]
                            self.set_enc_offset(offset)

                elif msg_result.group('CMD') == 'ENVEL':
                    if msg_result.group('QUERY'):
                        reply = \
                            '[' + ', '.join(map(str, self.get_enc_vel())) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                    elif msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        arg_pattern = \
                        r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
                        regex = re.compile(arg_pattern)
                        result = regex.match(args)
                        if result:
                            enc_vel = [float(regex.match(args).group('LEFT')), \
                            float(regex.match(args).group('RIGHT'))]
                            self.set_enc_vel(enc_vel)

                elif msg_result.group('CMD') == 'WHEELANGVEL':
                    if msg_result.group('QUERY'):
                        reply = \
                            '[' + ', '.join(map(str, self.get_wheel_ang_vel())) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                    elif msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        arg_pattern = \
                        r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
                        regex = re.compile(arg_pattern)
                        result = regex.match(args)
                        if result:
                            wheel_ang_vel = [float(regex.match(args).group('LEFT')), \
                                     float(regex.match(args).group('RIGHT'))]
                            self.set_wheel_ang_vel(wheel_ang_vel)

                elif msg_result.group('CMD') == 'ENRESET':
                    self.reset_enc_val()
                    reply = \
                            '[' + ', '.join(map(str, self.get_enc_val())) + ']'
                    print 'Encoder values reset to ' + reply

                elif msg_result.group('CMD') == 'UPDATE':
                    if msg_result.group('SET') and msg_result.group('ARGS'):
                        args = msg_result.group('ARGS')
                        pwm_pattern = r'(?P<LEFT>[-]?\d+),(?P<RIGHT>[-]?\d+)'
                        pwm_regex = re.compile(pwm_pattern)
                        pwm_result = pwm_regex.match(args)
                        if pwm_result:
                            pwm = [int(pwm_regex.match(args).group('LEFT')), \
                                    int(pwm_regex.match(args).group('RIGHT'))]
                            self.set_pwm(pwm)

                        reply = '[' + ', '.join(map(str, self.enc_pos)) + ', ' \
                            + ', '.join(map(str, self.encVel)) + ']'
                        print 'Sending: ' + reply
                        self.robotSocket.sendto(
                            reply + '\n', (self.base_ip, self.port))

                elif msg_result.group('CMD') == 'END':
                    self.end_run()

                else:
                    print 'Invalid: ' + msg
    except:
        self.end_run()
        raise

Example 21

Project: WikiDAT Source File: logitem.py
def process_logitem(log_iter):
    """
    Processor for LogItem objects extracted from the 'logging' DB table in
    Wikipedia
    """
    ip_pat = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
    time_unit_ft = re.compile(r"""sec|min|h|d|week|fortnight|month|year|
                                  indefinite|infinite""")
    lead_zero_pat = re.compile(r"(0\d\d)")
    triple_zero_pat = re.compile(r"000")
    # Case 'month', rounded to 30 days per month
    # Case 'year', multiply by 365.25 days per year
    # Case 'fortnight' is equivalent to 2 weeks
    # Case 'infinite' will default to timedelta.max
    time_units = {'sec': 'seconds',
                  'min': 'minutes',
                  'h': 'hours',
                  'd': 'days',
                  'week': 'weeks',
                  'fortnight': 'weeks',
                  'month': 'days',
                  'year': 'days'
                  }
    time_fac = {'sec': 1,
                'min': 1,
                'h': 1,
                'd': 1,
                'week': 1,
                'fortnight': 2,
                'month': 30,
                'year': 365.25
                }

    for logitem in log_iter:
        # Clean timestamp string
        logitem['timestamp'] = (logitem['timestamp'].
                                replace('Z', '').replace('T', ' '))
        # INFO FLAGGED REVISIONS
        # Content of log_old_flag and log_new_flag
        # for languages with flagged revisions
        if (logitem['type'] == 'review' and
            (logitem['action'] == 'approve' or
             logitem['action'] == 'approve-a' or
             logitem['action'] == 'unapprove' or
             logitem['action'] == 'approve-ia' or
             logitem['action'] == 'approve-i')):

            logitem['flagged'] = True
            # Check presence of params
            # TODO: Investigate review items without params
            if 'params' in logitem:
                flags = logitem['params'].split('\n')

                # Standard case before March 2010
                # Only new stable version if no previous stable version
                # is available
                if (len(flags) == 1):
                    logitem['new_flag'] = flags[0]
                    logitem['old_flag'] = '0'
                # Standard case before March 2010
                # 2 params: new stable revision and old stable revision
                # ----
                # Case after March 2010
                # Timestamp of new stable version was introduced
                # as a third param. This is redundant with info from
                # table revision. Thus, we only get the first two params:
                # rev_id of new stable revision and rev_id of
                # previous stable revision
                elif (len(flags) == 2 or len(flags) == 3):
                    logitem['new_flag'] = flags[0]
                    logitem['old_flag'] = flags[1]

            # TODO: Evaluate the possibility of extracting flagged-revs
            # related data to an independent DB table

        # INFO BLOCKED USERS
        if (logitem['type'] == 'block' and
            (logitem['action'] == 'block' or
             logitem['action'] == 'unblock' or
             logitem['action'] == 'reblock')):

            logitem['block'] = {}  # Flag block action for later
            # Identify target user from log_title field
            title = logitem['logtitle'].split(':')
            if len(title) == 2:
                target = title[1]
                if re.search(ip_pat, target):
                    # Case of IP addresses
                    # Fix malformed records: del leading 0s if present
                    target = re.sub(triple_zero_pat, '0', target)
                    target = re.sub(lead_zero_pat,
                                    lambda x: x.group().lstrip('0'), target)
                    try:
                        logitem['block']['target_ip'] = int(ipaddress.ip_address(target))
                    except ValueError:
                        print("Invalid IP address to block: ", target)
                        logitem['block']['target_ip'] = 0
                else:
                    # Case of logged user
                    logitem['block']['target'] = target

            # Calculate duration of block action from log_params field
            # This field might be blank
            # Case 1: Figure + range (e.g. '1 week', '2 days', '6 months')
            # Case 2: Timestamp with expiration date for block
            # e.g. Wed, 22 Jan 2014 10:14:10 GMT
            if 'params' in logitem and logitem['params']:
                # Identify formation of duration param
                par_dur = logitem['params'].split('\n')[0]
                par_dur = par_dur.replace('Z', '').replace('T', ' ')
                try:
                    # exp = dateutil.parser.parse(par_dur.rsplit(' ', 1)[0])
                    exp = dateutil.parser.parse(par_dur)
                    if re.search('GMT', par_dur):
                        ts = dateutil.parser.parse(logitem['timestamp']+'GMT')
                    else:
                        ts = dateutil.parser.parse(logitem['timestamp'])
                    logitem['block']['duration'] = (exp-ts).total_seconds()
                # Try automated detection of block duration, expressed
                # in "natural language" units
                except Exception:
                    exp_par = re.split(r'(\D+)', par_dur)
                    try:
                        duration = exp_par[0]
                        units = exp_par[1].lower()
                    except IndexError:
                        print("No valid pair duration/units found!")
                        print("params:", logitem['params'])
                        logitem['block']['duration'] = 0.0

                    if (units == 'infinite' or units == 'indefininte'):
                        logitem['block']['duration'] = (datetime.timedelta.max.total_seconds())
                    elif duration:
                        try:
                            time_unit = re.search(time_unit_ft,
                                                  units).group()
                            delta_args = {time_units[time_unit]:
                                          int(duration) * time_fac[time_unit]}
                            logitem['block']['duration'] = datetime.timedelta(**delta_args).total_seconds()
                        except AttributeError:
                            print("params:", logitem['params'])
                            logitem['block']['duration'] = 0.0
                        except OverflowError:
                            logitem['block']['duration'] = (datetime.timedelta.max.total_seconds())
                    else:
                        # TODO: Inspect this case later on
                        # Address case of empty duration
                        logitem['block']['duration'] = 0.0
            else:
                # TODO: Inspect this case later on
                # Address case of empty duration
                logitem['block']['duration'] = 0.0

        # INFO DELETIONS
        # TODO:

        # INFO PROTECTIONS
        # TODO:

        # INFO USER REGISTRATIONS
        if (logitem['type'] == 'newusers' and
            (logitem['action'] == 'newusers' or
             logitem['action'] == 'create' or
             logitem['action'] == 'create2' or
             logitem['action'] == 'autocreate' or
             logitem['action'] == 'byemail')):

                # TODO: Evaluate if we need additional info about newusers
                logitem['newuser'] = {}  # Flag new user for later

        # INFO RIGHTS GRANTING
        if (logitem['type'] == 'rights' and logitem['action'] == 'rights'):

            logitem['rights'] = {}  # Flag new rights granting for later
            try:
                logitem['rights']['username'] = logitem['logtitle'].split(':')[1]
            except IndexError:
                print("No user name info in change of user level.")
                if 'params' in logitem:
                    print("params:", logitem['params'])
                logitem['rights']['username'] = ""

            if 'params' in logitem and logitem['params']:
                pars = logitem['params'].split('\n')
                # Case of old format for parameters, with previous status
                # in first line, then new list of privileges in new line
                if len(pars) > 1:
                    logitem['rights']['right_old'] = pars[0]
                    logitem['rights']['right_new'] = pars[1]
                else:
                    # Case of new single-line format oldgroups --> new groups
                    if (re.search('"4::oldgroups"', pars[0])):
                        priv_list = (pars[0].partition('"4::oldgroups"')[2].
                                     partition('"5::newgroups"'))
                        priv_old = re.findall(r'\"(.+?)\"', priv_list[0])
                        priv_new = re.findall(r'\"(.+?)\"', priv_list[2])
                        logitem['rights']['right_old'] = str(priv_old)
                        logitem['rights']['right_new'] = str(priv_new)

                    # Case of primitive free format
                    else:
                        logitem['rights']['right_old'] = ""
                        logitem['rights']['right_new'] = pars[0]
            elif logitem['comment']:
                logitem['rights']['right_old'] = ""
                logitem['rights']['right_new'] = logitem['comment']
            else:
                # No information recorded about new user levels
                logitem['rights']['right_old'] = ""
                logitem['rights']['right_new'] = ""
        yield(logitem)
        del logitem

Example 22

Project: alignak Source File: install_hooks.py
def fix_alignak_cfg(config):
    """
    Fix paths, user and group in alignak.cfg and daemons/*.ini
    Called one all files are copied.

    :param config:
    :return:
    """
    default_paths = {
        'workdir': '/var/run/alignak',
        'logdir': '/var/log/alignak',
        # TODO: confirm is is unuseful...
        'modules_dir': '/var/lib/alignak/modules',
        'plugins_dir': '/var/libexec/alignak',

        'lock_file': '/var/run/alignak/arbiterd.pid',
        'local_log': '/var/log/alignak/arbiterd.log',
        'pidfile': '/var/run/alignak/arbiterd.pid',

        'pack_distribution_file': '/var/lib/alignak/pack_distribution.dat'
    }

    default_macros = {
        'LOGSDIR': '/var/log/alignak',
        'PLUGINSDIR': '/var/libexec/alignak',
    }

    default_ssl = {
        'ca_cert': '/etc/alignak/certs/ca.pem',
        'server_cert': '/etc/alignak/certs/server.cert',
        'server_key': '/etc/alignak/certs/server.key',
    }

    # Changing default user/group if root
    default_users = {}
    if getpass.getuser() == 'root':
        default_users['alignak_user'] = 'alignak'
        default_users['alignak_group'] = 'alignak'
        default_users['user'] = 'alignak'
        default_users['group'] = 'alignak'
        default_users['ALIGNAKUSER'] = 'alignak'
        default_users['ALIGNAKGROUP'] = 'alignak'
        default_users['HOME'] = '`grep ^$ALIGNAKUSER: /etc/passwd | cut -d: -f 6`'

    # Prepare pattern for alignak.cfg
    pattern = "|".join(default_paths.keys())
    changing_path = re.compile("^(%s) *= *" % pattern)
    pattern = "|".join(default_users.keys())
    changing_user = re.compile("^#(%s) *= *" % pattern)
    pattern = "|".join(default_ssl.keys())
    changing_ssl = re.compile("^#(%s) *= *" % pattern)
    pattern = "|".join(default_macros.keys())
    changing_mac = re.compile("^\$(%s)\$ *= *" % pattern)

    # Fix resource paths
    alignak_file = os.path.join(
        config.install_dir, "etc", "alignak", "arbiter", "resource.d", "paths.cfg"
    )
    if not os.path.exists(alignak_file):
        print(
            "\n"
            "================================================================================\n"
            "==  The configuration file '%s' is missing.                                   ==\n"
            "================================================================================\n"
            % alignak_file
        )

    for line in fileinput.input(alignak_file, inplace=True):
        line = line.strip()
        mac_attr_name = changing_mac.match(line)
        if mac_attr_name:
            new_path = os.path.join(config.install_dir,
                                    default_macros[mac_attr_name.group(1)].strip("/"))
            print("$%s$=%s" % (mac_attr_name.group(1),
                             new_path))
        else:
            print(line)

    # Fix alignak.cfg
    alignak_file = os.path.join(config.install_dir, "etc", "alignak", "alignak.cfg")
    if not os.path.exists(alignak_file):
        print(
            "\n"
            "================================================================================\n"
            "==  The configuration file '%s' is missing.                                   ==\n"
            "================================================================================\n"
            % alignak_file
        )

    for line in fileinput.input(alignak_file, inplace=True):
        line = line.strip()
        path_attr_name = changing_path.match(line)
        user_attr_name = changing_user.match(line)
        ssl_attr_name = changing_ssl.match(line)
        if path_attr_name:
            new_path = os.path.join(config.install_dir,
                                    default_paths[path_attr_name.group(1)].strip("/"))
            print("%s=%s" % (path_attr_name.group(1),
                             new_path))
        elif user_attr_name:
            print("#%s=%s" % (user_attr_name.group(1),
                             default_users[user_attr_name.group(1)]))
        elif ssl_attr_name:
            new_path = os.path.join(config.install_dir,
                                    default_ssl[ssl_attr_name.group(1)].strip("/"))
            print("#%s=%s" % (ssl_attr_name.group(1),
                             new_path))
        else:
            print(line)

    # Handle daemons ini files
    for ini_file in ["arbiterd.ini", "brokerd.ini", "schedulerd.ini",
                     "pollerd.ini", "reactionnerd.ini", "receiverd.ini"]:
        # Prepare pattern for ini files
        daemon_name = ini_file.strip(".ini")
        default_paths['lock_file'] = '/var/run/alignak/%s.pid' % daemon_name
        default_paths['local_log'] = '/var/log/alignak/%s.log' % daemon_name
        default_paths['pidfile'] = '/var/run/alignak/%s.pid' % daemon_name
        pattern = "|".join(default_paths.keys())
        changing_path = re.compile("^(%s) *= *" % pattern)

        # Fix ini file
        alignak_file = os.path.join(config.install_dir, "etc", "alignak", "daemons", ini_file)
        if not os.path.exists(alignak_file):
            print(
                "\n"
                "================================================================================\n"
                "==  The configuration file '%s' is missing.                                   ==\n"
                "================================================================================\n"
                % alignak_file
            )

        for line in fileinput.input(alignak_file, inplace=True):
            line = line.strip()
            path_attr_name = changing_path.match(line)
            user_attr_name = changing_user.match(line)
            ssl_attr_name = changing_ssl.match(line)
            if path_attr_name:
                new_path = os.path.join(config.install_dir,
                                        default_paths[path_attr_name.group(1)].strip("/"))
                print("%s=%s" % (path_attr_name.group(1),
                                 new_path))
            elif user_attr_name:
                print("#%s=%s" % (user_attr_name.group(1),
                                 default_users[user_attr_name.group(1)]))
            elif ssl_attr_name:
                new_path = os.path.join(config.install_dir,
                                        default_ssl[ssl_attr_name.group(1)].strip("/"))
                print("#%s=%s" % (ssl_attr_name.group(1),
                                 new_path))
            else:
                print(line)

    # Handle default/alignak
    if 'linux' in sys.platform or 'sunos5' in sys.platform:
        old_name = os.path.join(config.install_dir, "etc", "default", "alignak.in")
        if not os.path.exists(old_name):
            print("\n"
                  "=======================================================================================================\n"
                  "==  The configuration file '%s' is missing.\n"
                  "=======================================================================================================\n"
                  % alignak_file)

        new_name = os.path.join(config.install_dir, "etc", "default", "alignak")
        try:
            os.rename(old_name, new_name)
        except OSError as e:
            print("\n"
                  "=======================================================================================================\n"
                  "==  The configuration file '%s' could not be renamed to '%s'.\n"
                  "==  The newly installed configuration will not be up-to-date.\n"
                  "=======================================================================================================\n"
                  % (old_name, new_name))

        default_paths = {
            'ETC': '/etc/alignak',
            'VAR': '/var/lib/alignak',
            'BIN': '/bin',
            'RUN': '/var/run/alignak',
            'LOG': '/var/log/alignak',
            'LIB': '/var/libexec/alignak',
        }
        pattern = "|".join(default_paths.keys())
        changing_path = re.compile("^(%s) *= *" % pattern)
        for line in fileinput.input(new_name,  inplace=True):
            line = line.strip()
            path_attr_name = changing_path.match(line)
            user_attr_name = changing_user.match(line)
            if path_attr_name:
                new_path = os.path.join(config.install_dir,
                                        default_paths[path_attr_name.group(1)].strip("/"))
                print("%s=%s" % (path_attr_name.group(1),
                                 new_path))
            elif user_attr_name:
                print("#%s=%s" % (user_attr_name.group(1),
                                 default_users[user_attr_name.group(1)]))

            else:
                print(line)

    # Alignak run script
    alignak_run = ''
    if 'win' in sys.platform:
        pass
    elif 'linux' in sys.platform or 'sunos5' in sys.platform:
        alignak_run = os.path.join(config.install_dir, "etc", "init.d", "alignak start")
    elif 'bsd' in sys.platform or 'dragonfly' in sys.platform:
        alignak_run = os.path.join(config.install_dir, "etc", "rc.d", "alignak start")

    # Alignak configuration root directory
    alignak_etc = os.path.join(config.install_dir, "etc", "alignak")

    # Add ENV vars only if we are in virtualenv
    # in order to get init scripts working
    if 'VIRTUAL_ENV' in os.environ:
        activate_file = os.path.join(os.environ.get("VIRTUAL_ENV"), 'bin', 'activate')
        try:
            afd = open(activate_file, 'r+')
        except Exception as exp:
            print(exp)
            raise Exception("Virtual environment error")

        env_config = ("""export PYTHON_EGG_CACHE=.\n"""
                      """export ALIGNAK_DEFAULT_FILE=%s/etc/default/alignak\n"""
                      % os.environ.get("VIRTUAL_ENV"))
        alignak_etc = "%s/etc/alignak" % os.environ.get("VIRTUAL_ENV")
        alignak_run = "%s/etc/init.d alignak start" % os.environ.get("VIRTUAL_ENV")

        if afd.read().find(env_config) == -1:
            afd.write(env_config)
            print(
                "\n"
                "================================================================================\n"
                "==                                                                            ==\n"
                "==  You need to REsource env/bin/activate in order to set appropriate         ==\n"
                "== variables to use init scripts                                              ==\n"
                "==                                                                            ==\n"
                "================================================================================\n"
            )

    print("\n"
          "================================================================================\n"
          "==                                                                            ==\n"
          "==  The installation succeded.                                                ==\n"
          "==                                                                            ==\n"
          "== -------------------------------------------------------------------------- ==\n"
          "==                                                                            ==\n"
          "== You can run Alignak with:                                                  ==\n"
          "==   %s\n"
          "==                                                                            ==\n"
          "== The default installed configuration is located here:                       ==\n"
          "==   %s\n"
          "==                                                                            ==\n"
          "== You will find more information about Alignak configuration here:           ==\n"
          "==   http://alignak-doc.readthedocs.io/en/latest/04_configuration/index.html  ==\n"
          "==                                                                            ==\n"
          "== -------------------------------------------------------------------------- ==\n"
          "==                                                                            ==\n"
          "== You should grant the write permissions on the configuration directory to   ==\n"
          "== the user alignak:                                                          ==\n"
          "==   find %s -type f -exec chmod 664 {} +\n"
          "==   find %s -type d -exec chmod 775 {} +\n"
          "== -------------------------------------------------------------------------- ==\n"
          "==                                                                            ==\n"
          "== You should also grant ownership on those directories to the user alignak:  ==\n"
          "==   chown -R alignak:alignak /usr/local/var/run/alignak                      ==\n"
          "==   chown -R alignak:alignak /usr/local/var/log/alignak                      ==\n"
          "==   chown -R alignak:alignak /usr/local/var/libexec/alignak                  ==\n"
          "==                                                                            ==\n"
          "== -------------------------------------------------------------------------- ==\n"
          "==                                                                            ==\n"
          "== Please note that installing Alignak with the setup.py script is not the    ==\n"
          "== recommended way. You'd rather use the packaging built for your OS          ==\n"
          "== distribution that you can find here:                                       ==\n"
          "==   http://alignak-monitoring.github.io/download/                            ==\n"
          "==                                                                            ==\n"
          "================================================================================\n"
          % (alignak_run, alignak_etc, alignak_etc, alignak_etc)
          )

    # Check Alignak recommended user existence
    if not user_exists('alignak'):
        print(
            "\n"
            "================================================================================\n"
            "==                                                                            ==\n"
            "== The user account 'alignak' does not exist on your system.                  ==\n"
            "==                                                                            ==\n"
            "================================================================================\n"
        )

    if not group_exists('alignak'):
        print(
            "\n"
            "================================================================================\n"
            "==                                                                            ==\n"
            "== The user group 'alignak' does not exist on your system.                    ==\n"
            "==                                                                            ==\n"
            "================================================================================\n"
        )

Example 23

Project: TADbit Source File: fastq_utils.py
def quality_plot(fnam, r_enz=None, nreads=None, axe=None, savefig=None, paired=False):
    """
    Plots the sequencing quality of a given FASTQ file. If a restrinction enzyme
    (RE) name is provided, can also represent the distribution of digested and
    undigested RE sites and estimate an expected proportion of dangling-ends.

    Proportion of dangling-ends is inferred by counting the number of times a
    dangling-end site, is found at the beginning of any of the reads (divided by
    the number of reads).

    :param fnam: path to FASTQ file
    :param None nreads: max number of reads to read, not necesary to read all
    :param None savefig: path to a file where to save the image generated;
       if None, the image will be shown using matplotlib GUI (the extension
       of the file name will determine the desired format).
    :param False paired: is input FASTQ contains both ends

    :returns: the percentage of dangling-ends (sensu stricto) and the percentage of
       reads with at least a ligation site.
    """
    phred = dict([(c, i) for i, c in enumerate(
        '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')])
    quals = []
    henes = []
    sites = []
    fixes = []
    liges = []
    ligep = 0
    tkw = dict(size=4, width=1.5)
    if fnam.endswith('.gz'):
        fhandler = gopen(fnam)
    elif fnam.endswith('.dsrc'):
        proc = Popen(['dsrc', 'd', '-t8', '-s', fnam], stdout=PIPE)
        fhandler = proc.stdout
    else:
        fhandler = open(fnam)
    if not r_enz:
        if nreads:
            while True:
                try:
                    next(fhandler)
                except EOFError:
                    break
                seq = next(fhandler)
                if 'N' in seq:
                    henes.extend([i for i, s in enumerate(seq) if s == 'N'])
                next(fhandler)
                line = next(fhandler)
                quals.append([phred[i] for i in line.strip()])
                if len(quals) > nreads:
                    break
        else: # do this because it's faster
            while True:
                try:
                    next(fhandler)
                except EOFError:
                    break
                seq = next(fhandler)
                if 'N' in seq:
                    henes.extend([i for i, s in enumerate(seq) if s == 'N'])
                next(fhandler)
                line = next(fhandler)
                quals.append([phred[i] for i in line.strip()])
    else:
        r_site = RESTRICTION_ENZYMES[r_enz].replace('|', '')
        l_site = religated(r_enz)
        d_site = repaired(r_enz)
        if r_site*2 == l_site:
            # in case the religated site equals 2 restriction sites (like DnpII)
            site = re.compile('(?<!%s)' % r_site + r_site + '(?!%s)' % r_site)
            fixe = re.compile('(?<!%s)' % d_site + d_site + '(?!%s)' % d_site)
        else:
            site = re.compile(r_site)
            fixe = re.compile(d_site)
        lige = re.compile(l_site)
        if nreads:
            while True:
                try:
                    next(fhandler)
                except StopIteration:
                    break
                seq = next(fhandler)
                sites.extend([m.start() for m in site.finditer(seq)])
                fixes.extend([m.start() for m in fixe.finditer(seq)])
                liges.extend([m.start() for m in lige.finditer(seq)])
                ligep += l_site in seq
                if 'N' in seq:
                    henes.extend([i for i, s in enumerate(seq) if s == 'N'])
                next(fhandler)
                line = next(fhandler)
                quals.append([phred[i] for i in line.strip()])
                if len(quals) > nreads:
                    break
        else: # do this because it's faster
            while True:
                try:
                    next(fhandler)
                except StopIteration:
                    break
                seq = next(fhandler)
                sites.extend([m.start() for m in site.finditer(seq)])
                fixes.extend([m.start() for m in fixe.finditer(seq)])
                liges.extend([m.start() for m in lige.finditer(seq)])
                ligep += l_site in seq
                if 'N' in seq:
                    henes.extend([i for i, s in enumerate(seq) if s == 'N'])
                next(fhandler)
                line = next(fhandler)
                quals.append([phred[i] for i in line.strip()])
    fhandler.close()
    if not nreads:
        nreads = len(quals)
    quals = zip(*quals)
    meanquals = [np.mean(q) for q in quals]
    errorquals = [np.std(q) for q in quals]

    if axe:
        ax = axe
        fig = axe.get_figure()
        ax2 = fig.add_subplot(212)
    else:
        if r_enz:
            _, (ax, ax2) = plt.subplots(2,1, figsize=(15, 12))
        else:
            _, ax = plt.subplots(1,1, figsize=(15, 6))
        ax.patch.set_facecolor('lightgrey')
        ax.patch.set_alpha(0.4)
        ax.grid(ls='-', color='w', lw=1.5, alpha=0.6, which='major')
        ax.grid(ls='-', color='w', lw=1, alpha=0.3, which='minor')
        ax.set_axisbelow(True)
        # remove tick marks
        ax.tick_params(axis='both', direction='out', top=False, right=False,
                       left=False, bottom=False)
        ax.tick_params(axis='both', direction='out', top=False, right=False,
                       left=False, bottom=False, which='minor')
    ax.errorbar(range(len(line.strip())), meanquals,
                linewidth=1, elinewidth=1, color='darkblue',
                yerr=errorquals, ecolor='orange')

    ax.set_xlim((0, len(line)))
    ax.set_xlabel('Nucleotidic position')
    ax.set_ylabel('PHRED score')
    ax.set_title('Sequencing Quality (%d reads)' % (nreads))
    ax.yaxis.label.set_color('darkblue')
    ax.tick_params(axis='y', colors='darkblue', **tkw)
    axb = ax.twinx()
    axb.plot([henes.count(i) for i in xrange(len(line))], linewidth=1,
             color='black', linestyle='--')
    axb.yaxis.label.set_color('black')
    axb.tick_params(axis='y', colors='black', **tkw)
    axb.set_ylabel('Number of "N" per position')
    try: # no Ns found (yes... it happens)
        axb.set_yscale('log')
        axb.set_ylim((0, axb.get_ylim()[1] * 1000))
    except ValueError:
        axb.set_yscale('linear')
    ax.set_ylim((0, ax.get_ylim()[1]))
    ax.set_xlim((0, len(line)))

    if r_enz:
        ax.set_title('Sequencing Quality and deconvolution (%s %d reads)' % (
            r_enz, nreads))
        ax.set_xlabel('')
        plt.setp(ax.get_xticklabels(), visible=False)
        ax2.patch.set_facecolor('lightgrey')
        ax2.patch.set_alpha(0.4)
        ax2.grid(ls='-', color='w', lw=1.5, alpha=0.6, which='major')
        ax2.grid(ls='-', color='w', lw=1, alpha=0.3, which='minor')
        ax2.set_axisbelow(True)
        ax2.set_xlabel('Nucleotidic position')
        seq_len = len(line) - max((len(r_site), len(l_site), len(d_site)))
        sites = [sites.count(k) for k in xrange(seq_len)] # Undigested
        liges = [liges.count(k) for k in xrange(seq_len)] # OK
        fixes = [fixes.count(k) for k in xrange(seq_len)] # DE
        if d_site in r_site:
            pos = r_site.find(d_site)
            fixes = (fixes[:pos] +
                     [fixes[k] - sites[k-pos] for k in xrange(pos, seq_len)])
        if d_site in l_site:
            pos = l_site.find(d_site)
            fixes = (fixes[:pos] +
                     [fixes[k] - liges[k-pos] for k in xrange(pos, seq_len)])
        site_len = max((len(r_site), len(l_site), len(d_site)))
        if paired:
            sites[len(line) / 2 - site_len:
                  len(line) / 2] = [float('nan')] * site_len
            liges[len(line) / 2 - site_len:
                  len(line) / 2] = [float('nan')] * site_len
            fixes[len(line) / 2 - site_len:
                  len(line) / 2] = [float('nan')] * site_len
        ax2.plot(sites, linewidth=2, color='darkred')
        ax2.set_ylabel('Undigested RE site (%s)' % r_site)
        ax2.yaxis.label.set_color('darkred')
        ax2.tick_params(axis='y', colors='darkred', **tkw)
        ax3 = ax2.twinx()
        ax3.plot(liges, linewidth=2, color='darkblue')
        ax3.yaxis.label.set_color('darkblue')
        ax3.tick_params(axis='y', colors='darkblue', **tkw)
        ax3.set_ylabel('Religated (%s)' % l_site)
        if any([f > 0 for f in fixes]):
            ax4 = ax2.twinx()
            ax4.spines["right"].set_position(("axes", 1.07))
            make_patch_spines_invisible(ax4)
            ax4.spines["right"].set_visible(True)        
            ax4.plot(fixes, linewidth=2, color='darkorange')
            ax4.yaxis.label.set_color('darkorange')
            ax4.tick_params(axis='y', colors='darkorange', **tkw)
            ax4.set_ylabel('Dangling-ends (%s)' % d_site)
        else:
            ax2.set_ylabel('RE site & Dangling-ends  (%s)' % r_site)
        ax2.set_xlim((0, len(line)))
        lig_cnt = (np.nansum(liges) - liges[0] - liges[len(line) / 2])
        sit_cnt = (np.nansum(sites) - sites[0] - sites[len(line) / 2])
        des = ((100. * (fixes[0] + (fixes[(len(line) / 2)]
                                            if paired else 0)))
                       / nreads) if any([f > 0 for f in fixes]) else (
            100. * (sites[0] + (sites[(len(line) / 2)] if paired else 0))) / nreads
        plt.title(('Percentage of digested sites: %.0f%%, of dangling-ends: %.0f%%\n' +
                   'Percentage of reads with ligation site: %.0f%%') %(
                      (100. * lig_cnt) / (lig_cnt + sit_cnt),
                      des,
                      (ligep * 100.) / nreads))
        plt.subplots_adjust(right=0.85)
    if savefig:
        tadbit_savefig(savefig)
        plt.close('all')
    elif not axe:
        plt.show()
    return des, (ligep * 100.) / nreads

Example 24

Project: corpkit Source File: interrogator.py
def interrogator(corpus, 
    search='w', 
    query='any',
    show='w',
    exclude=False,
    excludemode='any',
    searchmode='all',
    case_sensitive=False,
    save=False,
    subcorpora=False,
    just_metadata=False,
    skip_metadata=False,
    preserve_case=False,
    lemmatag=False,
    files_as_subcorpora=False,
    only_unique=False,
    only_format_match=True,
    multiprocess=False,
    spelling=False,
    regex_nonword_filter=r'[A-Za-z0-9]',
    gramsize=1,
    conc=False,
    maxconc=9999,
    window=None,
    no_closed=False,
    no_punct=True,
    discard=False,
    **kwargs):
    """
    Interrogate corpus, corpora, subcorpus and file objects.
    See corpkit.interrogation.interrogate() for docstring
    """
    
    conc = kwargs.get('do_concordancing', conc)
    quiet = kwargs.get('quiet', False)
    coref = kwargs.pop('coref', False)
    show_conc_metadata = kwargs.pop('show_conc_metadata', False)
    fsi_index = kwargs.pop('fsi_index', True)
    dep_type = kwargs.pop('dep_type', 'collapsed-ccprocessed-dependencies')

    nosubmode = subcorpora is None
    #todo: temporary
    #if getattr(corpus, '_dlist', False):
    #    subcorpora = 'file'

    # store kwargs and locs
    locs = locals().copy()
    locs.update(kwargs)
    locs.pop('kwargs', None)

    # so you can do corpus.interrogate('features/postags/wordclasses/lexicon')
    if search == 'features':
        search = 'v'
        query = 'any'
    if search in ['postags', 'wordclasses']:
        query = 'any'
        preserve_case = True
        show = 'p' if search == 'postags' else 'x'
        # use tregex if simple because it's faster
        # but use dependencies otherwise
        search = 't' if subcorpora else {'w': 'any'}
    if search == 'lexicon':
        search = {'w': 'any'}

    if not kwargs.get('cql') and isinstance(search, STRINGTYPE) and len(search) > 3:
        raise ValueError('search argument not recognised.')

    import codecs
    import signal
    import os
    from time import localtime, strftime
    from collections import Counter

    import pandas as pd
    from pandas import DataFrame, Series

    from corpkit.interrogation import Interrogation, Interrodict
    from corpkit.corpus import Datalist, Corpora, Corpus, File, Subcorpus
    from corpkit.process import (tregex_engine, get_deps, unsplitter, sanitise_dict, 
                                 animator, filtermaker, fix_search,
                                 pat_format, auto_usecols, format_tregex,
                                 make_conc_lines_from_whole_mid)
    from corpkit.other import as_regex
    from corpkit.dictionaries.process_types import Wordlist
    from corpkit.build import check_jdk
    from corpkit.conll import pipeline

    import re
    if regex_nonword_filter:
        is_a_word = re.compile(regex_nonword_filter)
    else:
        is_a_word = re.compile(r'.*')

    from traitlets import TraitError
    
    have_java = check_jdk()

    # convert cql-style queries---pop for the sake of multiprocessing
    cql = kwargs.pop('cql', None)
    if cql:
        from corpkit.cql import to_corpkit
        search, exclude = to_corpkit(search)

    def signal_handler(signal, _):
        """
        Allow pausing and restarting whn not in GUI
        """
        if root:
            return  
        import signal
        import sys
        from time import localtime, strftime
        signal.signal(signal.SIGINT, original_sigint)
        thetime = strftime("%H:%M:%S", localtime())
        INPUTFUNC('\n\n%s: Paused. Press any key to resume, or ctrl+c to quit.\n' % thetime)
        time = strftime("%H:%M:%S", localtime())
        print('%s: Interrogation resumed.\n' % time)
        signal.signal(signal.SIGINT, signal_handler)

    def add_adj_for_ngram(show, gramsize):
        """
        If there's a gramsize of more than 1, remake show
        for ngramming
        """
        if gramsize == 1:
            return show
        out = []
        for i in show:
            out.append(i)
        for i in range(1, gramsize):
            for bit in show:
                out.append('+%d%s' % (i, bit))
        return out

    def fix_show_bit(show_bit):
        """
        Take a single search/show_bit type, return match
        """
        ends = ['w', 'l', 'i', 'n', 'f', 'p', 'x', 's', 'a', 'e']
        starts = ['d', 'g', 'm', 'b', 'h', '+', '-', 'r']
        show_bit = show_bit.lstrip('n')
        show_bit = show_bit.lstrip('b')
        show_bit = list(show_bit)
        if show_bit[-1] not in ends:
            show_bit.append('w')
        if show_bit[0] not in starts:
            show_bit.insert(0, 'm')
        return ''.join(show_bit)

    def fix_show(show, gramsize):
        """
        Lowercase anything in show and turn into list
        """
        if isinstance(show, list):
            show = [i.lower() for i in show]
        elif isinstance(show, STRINGTYPE):
            show = show.lower()
            show = [show]
        show = [fix_show_bit(i) for i in show]
        return add_adj_for_ngram(show, gramsize)

    def is_multiquery(corpus, search, query, outname):
        """
        Determine if multiprocessing is needed/possibe, and 
        do some retyping if need be as well
        """
        is_mul = False
        from collections import OrderedDict
        from corpkit.dictionaries.process_types import Wordlist
        
        if isinstance(query, Wordlist):
            query = list(query)

        if subcorpora and multiprocess:
            is_mul = 'subcorpora'

        if isinstance(subcorpora, (list, tuple)):
            is_mul = 'subcorpora'

        if isinstance(query, (dict, OrderedDict)):
            is_mul = 'namedqueriessingle'
        
        if isinstance(search, dict):
            if all(isinstance(i, dict) for i in list(search.values())):
                is_mul = 'namedqueriesmultiple'
        return is_mul, corpus, search, query

    def ispunct(s):
        import string
        return all(c in string.punctuation for c in s)

    def uniquify(conc_lines):
        """get unique concordance lines"""
        from collections import OrderedDict
        unique_lines = []
        checking = []
        for index, (_, speakr, start, middle, end) in enumerate(conc_lines):
            joined = ' '.join([speakr, start, 'MIDDLEHERE:', middle, ':MIDDLEHERE', end])
            if joined not in checking:
                unique_lines.append(conc_lines[index])
            checking.append(joined)
        return unique_lines

    def compiler(pattern):
        """
        Compile regex or fail gracefully
        """
        if hasattr(pattern, 'pattern'):
            return pattern
        import re
        try:
            if case_sensitive:
                comped = re.compile(pattern)
            else:
                comped = re.compile(pattern, re.IGNORECASE)
            return comped
        except:
            import traceback
            import sys
            from time import localtime, strftime
            exc_type, exc_value, exc_traceback = sys.exc_info()
            lst = traceback.format_exception(exc_type, exc_value, exc_traceback)
            error_message = lst[-1]
            thetime = strftime("%H:%M:%S", localtime())
            print('%s: Query %s' % (thetime, error_message))
            if root:
                return 'Bad query'
            else:
                raise ValueError('%s: Query %s' % (thetime, error_message))

    def determine_search_func(show):
        """Figure out what search function we're using"""

        simple_tregex_mode = False
        statsmode = False
        tree_to_text = False
        search_trees = False
            
        simp_crit = all(not i for i in [kwargs.get('tgrep'),
                                        files_as_subcorpora,
                                        subcorpora,
                                        just_metadata,
                                        skip_metadata])

        if search.get('t') and simp_crit:
            if have_java:
                simple_tregex_mode = True
            else:
                search_trees = 'tgrep'
            optiontext = 'Searching parse trees'

        elif datatype == 'conll':
        
            if any(i.endswith('t') for i in search.keys()):
                if have_java and not kwargs.get('tgrep'):
                    search_trees = 'tregex'
                else:
                    search_trees = 'tgrep'
                optiontext = 'Searching parse trees'
            elif any(i.endswith('v') for i in search.keys()):
                # either of these searchers now seems to work
                #seacher = get_stats_conll
                statsmode = True
                optiontext = 'General statistics'
            elif any(i.endswith('r') for i in search.keys()):
                optiontext = 'Distance from root'
            else:
                optiontext = 'Querying CONLL data'

        return optiontext, simple_tregex_mode, statsmode, tree_to_text, search_trees

    def get_tregex_values(show):
        """If using Tregex, set appropriate values

        - Check for valid query
        - Make 'any' query
        - Make list query
        """

        translated_option = 't'
        if isinstance(search['t'], Wordlist):
            search['t'] = list(search['t'])
        q = tregex_engine(corpus=False,
                          query=search.get('t'),
                          options=['-t'],
                          check_query=True,
                          root=root,
                          preserve_case=preserve_case
                         )

        # so many of these bad fixing loops!
        nshow = []
        for i in show:
            if i == 'm':
                nshow.append('w')
            else:
                nshow.append(i.lstrip('m'))
        show = nshow

        if q is False:
            if root:
                return 'Bad query', None
            else:
                return 'Bad query', None

        if isinstance(search['t'], list):
            regex = as_regex(search['t'], boundaries='line', case_sensitive=case_sensitive)
        else:
            regex = ''

        # listquery, anyquery, translated_option
        treg_dict = {'p': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'u'],
                     'pl': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'u'],
                     'x': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'u'],
                     't': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'o'],
                     'w': [r'/%s/ !< __' % regex, r'/.?[A-Za-z0-9].?/ !< __', 't'],
                     'c': [r'/%s/ !< __'  % regex, r'/.?[A-Za-z0-9].?/ !< __', 'C'],
                     'l': [r'/%s/ !< __'  % regex, r'/.?[A-Za-z0-9].?/ !< __', 't'],
                     'u': [r'/%s/ !< __'  % regex, r'/.?[A-Za-z0-9].?/ !< __', 'v']
                    }

        newshow = []

        listq, anyq, translated_option = treg_dict.get(show[0][-1].lower())
        newshow.append(translated_option)
        for item in show[1:]:
            _, _, noption = treg_dict.get(item.lower())
            newshow.append(noption)

        if isinstance(search['t'], list):
            search['t'] = listq
        elif search['t'] == 'any':   
            search['t'] = anyq
        return search['t'], newshow

    def correct_spelling(a_string):
        """correct spelling within a string"""
        if not spelling:
            return a_string
        from corpkit.dictionaries.word_transforms import usa_convert
        if spelling.lower() == 'uk':
            usa_convert = {v: k for k, v in list(usa_convert.items())}
        bits = a_string.split('/')
        for index, i in enumerate(bits):
            converted = usa_convert.get(i.lower(), i)
            if i.islower() or preserve_case is False:
                converted = converted.lower()
            elif i.isupper() and preserve_case:
                converted = converted.upper()
            elif i.istitle() and preserve_case:
                converted = converted.title()
            bits[index] = converted
        r = '/'.join(bits)
        return r

    def make_search_iterable(corpus):
        """determine how to structure the corpus for interrogation"""
        # skip file definitions if they are not needed
        if getattr(corpus, '_dlist', False):

            return {(i.name, i.path): [i] for i in list(corpus.files)}
            #return {('Sample', 'Sample'): list(corpus.files)}

        if simple_tregex_mode:
            if corpus.level in ['s', 'f', 'd']:
                return {(corpus.name, corpus.path): False}
            else:
                return {(os.path.basename(i), os.path.join(corpus.path, i)): False
                    for i in os.listdir(corpus.path)
                    if os.path.isdir(os.path.join(corpus.path, i))}

        if isinstance(corpus, Datalist):
            to_iterate_over = {}
            # it could be files or subcorpus objects
            if corpus[0].level in ['s', 'd']:
                if files_as_subcorpora:
                    for subc in corpus:
                        for f in subc.files:
                            to_iterate_over[(f.name, f.path)] = [f]
                else:
                    for subc in corpus:
                        to_iterate_over[(subc.name, subc.path)] = subc.files
            elif corpus[0].level == 'f':
                for f in corpus:
                    to_iterate_over[(f.name, f.path)] = [f]
        elif corpus.singlefile:
            to_iterate_over = {(corpus.name, corpus.path): [corpus]}
        elif not hasattr(corpus, 'subcorpora') or not corpus.subcorpora:
            # just files in a directory
            if files_as_subcorpora:
                to_iterate_over = {}
                for f in corpus.files:
                    to_iterate_over[(f.name, f.path)] = [f]
            else:
                to_iterate_over = {(corpus.name, corpus.path): corpus.files}
        else:
            to_iterate_over = {}
            if files_as_subcorpora:
                # don't know if possible: has subcorpora but also .files
                if hasattr(corpus, 'files') and corpus.files is not None:
                    for f in corpus.files:
                        to_iterate_over[(f.name, f.path)] = [f]
                # has subcorpora with files in those
                elif hasattr(corpus, 'files') and corpus.files is None:
                    for subc in corpus.subcorpora:
                        for f in subc.files:
                            to_iterate_over[(f.name, f.path)] = [f]
            else:
                if corpus[0].level == 's':
                    for subcorpus in corpus:
                        to_iterate_over[(subcorpus.name, subcorpus.path)] = subcorpus.files
                elif corpus[0].level == 'f':
                    for f in corpus:
                        to_iterate_over[(f.name, f.path)] = [f]
                else:
                    for subcorpus in corpus.subcorpora:
                        to_iterate_over[(subcorpus.name, subcorpus.path)] = subcorpus.files
        return to_iterate_over

    def welcome_printer(return_it=False):
        """Print welcome message"""
        if no_conc:
            message = 'Interrogating'
        else:
            message = 'Interrogating and concordancing'
        if only_conc:
            message = 'Concordancing'
        if kwargs.get('printstatus', True):
            thetime = strftime("%H:%M:%S", localtime())
            from corpkit.process import dictformat
            sformat = dictformat(search)
            welcome = ('\n%s: %s %s ...\n          %s\n          ' \
                        'Query: %s\n          %s corpus ... \n' % \
                      (thetime, message, cname, optiontext, sformat, message))
            if return_it:
                return welcome
            else:
                print(welcome)

    def goodbye_printer(return_it=False, only_conc=False):
        """Say goodbye before exiting"""
        if not kwargs.get('printstatus', True):
            return
        thetime = strftime("%H:%M:%S", localtime())
        if only_conc:
            
            show_me = (thetime, len(conc_df))
            finalstring = '\n\n%s: Concordancing finished! %d results.' % show_me
        else:
            finalstring = '\n\n%s: Interrogation finished!' % thetime
            if countmode:
                finalstring += ' %d matches.' % tot
            else:
                dat = (numentries, total_total)
                finalstring += ' %d unique results, %d total occurrences.' % dat
        if return_it:
            return finalstring
        else:
            print(finalstring)

    def get_conc_colnames(corpus,
                          fsi_index=False,
                          simple_tregex_mode=False):
    
        fields = []
        base = 'c f s l m r'
        
        if simple_tregex_mode:
            base = base.replace('f ', '')

        if fsi_index and not simple_tregex_mode:
            base = 'i ' + base
        
        if PYTHON_VERSION == 2:
            base = base.encode('utf-8').split()
        else:
            base = base.split() 

        if show_conc_metadata:
            from corpkit.build import get_all_metadata_fields
            meta = get_all_metadata_fields(corpus.path)

            if isinstance(show_conc_metadata, list):
                meta = [i for i in meta if i in show_conc_metadata]
            #elif show_conc_metadata is True:
            #    pass
            for i in sorted(meta):
                if i in ['speaker', 'sent_id', 'parse']:
                    continue
                if PYTHON_VERSION == 2:
                    base.append(i.encode('utf-8'))
                else:
                    base.append(i)
        return base

    def make_conc_obj_from_conclines(conc_results, fsi_index=False):
        """
        Turn conclines into DataFrame
        """
        from corpkit.interrogation import Concordance
        #fsi_place = 2 if fsi_index else 0

        all_conc_lines = []
        for sc_name, resu in sorted(conc_results.items()):
            if only_unique:
                unique_results = uniquify(resu)
            else:
                unique_results = resu
            #make into series
            for lin in unique_results:
                #spkr = str(spkr, errors = 'ignore')
                #if not subcorpora:
                #    lin[fsi_place] = lin[fsi_place]
                #lin.insert(fsi_place, sc_name)

                if len(lin) < len(conc_col_names):
                    diff = len(conc_col_names) - len(lin)
                    lin.extend(['none'] * diff)

                all_conc_lines.append(Series(lin, index=conc_col_names))

        conc_df = pd.concat(all_conc_lines, axis=1).T

        if all(x == '' for x in list(conc_df['s'].values)) or \
           all(x == 'none' for x in list(conc_df['s'].values)):
            conc_df.drop('s', axis=1, inplace=True)
        
        # count each thing that occurs in the middle col
        # remove things that only appear once?!
        # i have no idea what this was doing in here.
        #if not language_model:
        #    counted = Counter(conc_df['m'])
        #    indices = [l for l in list(conc_df.index) if counted[conc_df.ix[l]['m']] > 1] 
        #    conc_df = conc_df.ix[indices]
        #    conc_df = conc_df.reset_index(drop=True)

        locs['corpus'] = corpus.name

        if maxconc:
            conc_df = Concordance(conc_df[:maxconc])
        else:
            conc_df = Concordance(conc_df)
        try:
            conc_df.query = locs
        except AttributeError:
            pass
        return conc_df

    def lowercase_result(res):
        """      
        Take any result and do spelling/lowercasing if need be

        todo: remove lowercase and change name
        """
        if not res or statsmode:
            return res
        # this is likely broken, but spelling in interrogate is deprecated anyway
        if spelling:
            res = [correct_spelling(r) for r in res]
        return res

    def postprocess_concline(line, fsi_index=False, conc=False):
        # todo: are these right?
        if not conc:
            return line
        subc, star, en = 0, 2, 5
        if fsi_index:
            subc, star, en = 2, 4, 7
        if not preserve_case:
            line[star:en] = [str(x).lower() for x in line[star:en]]
        if spelling:
            line[star:en] = [correct_spelling(str(b)) for b in line[star:en]]
        return line

    def make_progress_bar():
        """generate a progress bar"""

        if simple_tregex_mode:
            total_files = len(list(to_iterate_over.keys()))
        else:
            total_files = sum(len(x) for x in list(to_iterate_over.values()))

        par_args = {'printstatus': kwargs.get('printstatus', True),
                    'root': root, 
                    'note': note,
                    'quiet': quiet,
                    'length': total_files,
                    'startnum': kwargs.get('startnum'),
                    'denom': kwargs.get('denominator', 1)}

        term = None
        if kwargs.get('paralleling', None) is not None:
            from blessings import Terminal
            term = Terminal()
            par_args['terminal'] = term
            par_args['linenum'] = kwargs.get('paralleling')

        if in_notebook:
            par_args['welcome_message'] = welcome_message

        outn = kwargs.get('outname', '')
        if outn:
            outn = getattr(outn, 'name', outn)
            outn = outn + ': '

        tstr = '%s%d/%d' % (outn, current_iter, total_files)
        p = animator(None, None, init=True, tot_string=tstr, **par_args)
        tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
        animator(p, current_iter, tstr, **par_args)
        return p, outn, total_files, par_args

    # find out if using gui
    root = kwargs.get('root')
    note = kwargs.get('note')
    language_model = kwargs.get('language_model')

    # set up pause method
    original_sigint = signal.getsignal(signal.SIGINT)
    if kwargs.get('paralleling', None) is None:
        if not root:
            original_sigint = signal.getsignal(signal.SIGINT)
            signal.signal(signal.SIGINT, signal_handler)

    # find out about concordancing
    only_conc = False
    no_conc = False
    if conc is False:
        no_conc = True
    if isinstance(conc, str) and conc.lower() == 'only':
        only_conc = True
        no_conc = False
    numconc = 0

    # wipe non essential class attributes to not bloat query attrib
    if isinstance(corpus, Corpus):
        import copy
        corpus = copy.copy(corpus)
        for k, v in corpus.__dict__.items():
            if isinstance(v, (Interrogation, Interrodict)):
                corpus.__dict__.pop(k, None)

    # convert path to corpus object
    if not isinstance(corpus, (Corpus, Corpora, Subcorpus, File, Datalist)):
        if not multiprocess and not kwargs.get('outname'):
            corpus = Corpus(corpus, print_info=False)

    # figure out how the user has entered the query and show, and normalise
    from corpkit.process import searchfixer
    search = searchfixer(search, query)
    show = fix_show(show, gramsize)

    # instantiate lemmatiser if need be
    lem_instance = False
    if any(i.endswith('l') for i in show) and isinstance(search, dict) and search.get('t'):
        from nltk.stem.wordnet import WordNetLemmatizer
        lem_instance = WordNetLemmatizer()

    # do multiprocessing if need be
    im, corpus, search, query, = is_multiquery(corpus, search, query, 
                                                             kwargs.get('outname', False))

    # figure out if we can multiprocess the corpus
    if hasattr(corpus, '__iter__') and im:
        corpus = Corpus(corpus, print_info=False)
    if hasattr(corpus, '__iter__') and not im:
        im = 'datalist'
    if isinstance(corpus, Corpora):
        im = 'multiplecorpora'

    # split corpus if the user wants multiprocessing but no other iterable
    if not im and multiprocess:
        im = 'datalist'
        if hasattr(corpus, 'subcorpora') and corpus.subcorpora:
            corpus = corpus[:]
        else:
            corpus = corpus.files

    search = fix_search(search, case_sensitive=case_sensitive, root=root)
    exclude = fix_search(exclude, case_sensitive=case_sensitive, root=root)

    # if it's already been through pmultiquery, don't do it again
    locs['search'] = search
    locs['exclude'] = exclude
    locs['query'] = query
    locs['corpus'] = corpus
    locs['multiprocess'] = multiprocess
    locs['print_info'] = kwargs.get('printstatus', True)
    locs['multiple'] = im
    locs['subcorpora'] = subcorpora

    # send to multiprocess function
    if im:
        signal.signal(signal.SIGINT, original_sigint)
        from corpkit.multiprocess import pmultiquery
        return pmultiquery(**locs)

    # get corpus metadata
    cname = corpus.name
    if isinstance(save, STRINGTYPE):
        savename = corpus.name + '-' + save
    if save is True:
        raise ValueError('save must be str, not bool.')


    datatype = getattr(corpus, 'datatype', 'conll')
    singlefile = getattr(corpus, 'singlefile', False)
    level = getattr(corpus, 'level', 'c')
        
    # store all results in here
    from collections import defaultdict
    results = defaultdict(Counter)
    count_results = defaultdict(list)
    conc_results = defaultdict(list)

    # check if just counting, turn off conc if so
    countmode = 'c' in show or 'mc' in show
    if countmode:
        no_conc = True
        only_conc = False
    # where we are at in interrogation
    current_iter = 0

    # multiprocessing progress bar
    denom = kwargs.get('denominator', 1)
    startnum = kwargs.get('startnum', 0)

    # Determine the search function to be used #
    optiontext, simple_tregex_mode, statsmode, tree_to_text, search_trees = determine_search_func(show)
    
    # no conc for statsmode
    if statsmode:
        no_conc = True
        only_conc = False
        conc = False

    # Set some Tregex-related values
    translated_option = False
    if search.get('t'):
        query, translated_option = get_tregex_values(show)
        if query == 'Bad query' and translated_option is None:
            if root:
                return 'Bad query'
            else:
                return
    # more tregex options
    if tree_to_text:
        treg_q = r'ROOT << __'
        op = ['-o', '-t', '-w', '-f']
    elif simple_tregex_mode:
        treg_q = search['t']
        op = ['-%s' % i for i in translated_option] + ['-o', '-f']

    # make iterable object for corpus interrogation
    to_iterate_over = make_search_iterable(corpus)

    try:
        from ipywidgets import IntProgress
        _ = IntProgress(min=0, max=10, value=1)
        in_notebook = True
    except TraitError:
        in_notebook = False
    except ImportError:
        in_notebook = False
    # caused in newest ipython
    except AttributeError:
        in_notebook = False

    lemtag = False
    if search.get('t'):
        from corpkit.process import gettag
        lemtag = gettag(search.get('t'), lemmatag)

    usecols = auto_usecols(search, exclude, show, kwargs.pop('usecols', None), coref=coref)

    # print welcome message
    welcome_message = welcome_printer(return_it=in_notebook)

    # create a progress bar
    p, outn, total_files, par_args = make_progress_bar()

    if conc:
        conc_col_names = get_conc_colnames(corpus,
                                           fsi_index=fsi_index,
                                           simple_tregex_mode=False)

 

    # Iterate over data, doing interrogations
    for (subcorpus_name, subcorpus_path), files in sorted(to_iterate_over.items()):
        if nosubmode:
            subcorpus_name = '_nosubmode'

        # results for subcorpus go here
        #conc_results[subcorpus_name] = []
        #count_results[subcorpus_name] = []
        #results[subcorpus_name] = Counter()

        # get either everything (tree_to_text) or the search['t'] query
        if tree_to_text or simple_tregex_mode:
            result = tregex_engine(query=treg_q,
                                   options=op,
                                   corpus=subcorpus_path,
                                   root=root,
                                   preserve_case=preserve_case)

            # format search results with slashes etc
            if not countmode and not tree_to_text:
                result = format_tregex(result, show, translated_option=translated_option,
                            exclude=exclude, excludemode=excludemode, lemtag=lemtag,
                            lem_instance=lem_instance, countmode=countmode, speaker_data=False)

            # if concordancing, do the query again with 'whole' sent and fname
            if not no_conc:
                ops = ['-w'] + op
                #ops = [i for i in ops if i != '-n']
                whole_result = tregex_engine(query=search['t'],
                                             options=ops,
                                             corpus=subcorpus_path,
                                             root=root,
                                             preserve_case=preserve_case
                                            )

                # format match too depending on option
                if not only_format_match:
                    wholeresult = format_tregex(whole_result, show, translated_option=translated_option,
                                exclude=exclude, excludemode=excludemode, lemtag=lemtag,
                            lem_instance=lem_instance, countmode=countmode, speaker_data=False, whole=True)

                # make conc lines from conc results
                conc_result = make_conc_lines_from_whole_mid(whole_result, result, show=show)
                for lin in conc_result:
                    if maxconc is False or numconc < maxconc:
                        conc_results[subcorpus_name].append(lin)
                    numconc += 1

            # add matches to ongoing counts
            if countmode:
                count_results[subcorpus_name] += [result]            
            else:
                if result:
                    results[subcorpus_name] += Counter([i[-1] for i in result])
                else:
                    results[subcorpus_name] += Counter()

            # update progress bar
            current_iter += 1
            tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
            animator(p, current_iter, tstr, **par_args)
            continue

        # todo: move this
        kwargs.pop('by_metadata', None)
        
        # conll querying goes by file, not subcorpus
        for f in files:
            slow_treg_speaker_guess = kwargs.get('outname', '') if kwargs.get('multispeaker') else ''
            filepath, corefs = f.path, coref
            res, conc_res = pipeline(filepath, search=search, show=show,
                                     dep_type=dep_type,
                                     exclude=exclude,
                                     excludemode=excludemode,
                                     searchmode=searchmode,
                                     case_sensitive=case_sensitive,
                                     conc=conc,
                                     only_format_match=only_format_match,
                                     speaker=slow_treg_speaker_guess,
                                     gramsize=gramsize,
                                     no_punct=no_punct,
                                     no_closed=no_closed,
                                     window=window,
                                     filename=f.path,
                                     coref=corefs,
                                     countmode=countmode,
                                     maxconc=(maxconc, numconc),
                                     is_a_word=is_a_word,
                                     by_metadata=subcorpora,
                                     show_conc_metadata=show_conc_metadata,
                                     just_metadata=just_metadata,
                                     skip_metadata=skip_metadata,
                                     fsi_index=fsi_index,
                                     category=subcorpus_name,
                                     translated_option=translated_option,
                                     statsmode=statsmode,
                                     preserve_case=preserve_case,
                                     usecols=usecols,
                                     search_trees=search_trees,
                                     lem_instance=lem_instance,
                                     lemtag=lemtag,
                                     **kwargs)

            if res is None and conc_res is None:
                current_iter += 1
                tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
                animator(p, current_iter, tstr, **par_args)
                continue

            # deal with symbolic structures---that is, rather than adding
            # results by subcorpora, add them by metadata value
            # todo: sorting?
            if subcorpora:
                for (k, v), concl in zip(res.items(), conc_res.values()):                            
                    v = lowercase_result(v)
                    results[k] += Counter(v)
                    for line in concl:
                        if maxconc is False or numconc < maxconc:
                            line = postprocess_concline(line,
                                fsi_index=fsi_index, conc=conc)
                            conc_results[k].append(line)
                            numconc += 1
                
                current_iter += 1
                tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
                animator(p, current_iter, tstr, **par_args)
                continue

            # garbage collection needed?
            sents = None
            corefs = None
                
            if res == 'Bad query':
                return 'Bad query'

            if countmode:
                count_results[subcorpus_name] += [res]

            else:
                # add filename and do lowercasing for conc
                if not no_conc:
                    for line in conc_res:
                        line = postprocess_concline(line,
                            fsi_index=fsi_index, conc=conc)
                        if maxconc is False or numconc < maxconc:
                            conc_results[subcorpus_name].append(line)
                            numconc += 1

                # do lowercasing and spelling
                if not only_conc:
                    res = lowercase_result(res)
                    # discard removes low results, helping with 
                    # curse of dimensionality
                    countres = Counter(res)
                    if isinstance(discard, float):
                        countres.most_common()
                        nkeep = len(counter) - len(counter) * discard
                        countres = Counter({k: v for i, (k, v) in enumerate(countres.most_common()) if i <= nkeep})
                    elif isinstance(discard, int):
                        countres = Counter({k: v for k, v in countres.most_common() if v >= discard})
                    results[subcorpus_name] += countres
                    #else:
                    #results[subcorpus_name] += res

            # update progress bar
            current_iter += 1
            tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
            animator(p, current_iter, tstr, **par_args)

    # Get concordances into DataFrame, return if just conc
    if not no_conc:
        # fail on this line with typeerror if no results?
        conc_df = make_conc_obj_from_conclines(conc_results, fsi_index=fsi_index)
        if only_conc and conc_df is None:
            return
        elif only_conc:
            locs = sanitise_dict(locs)
            try:
                conc_df.query = locs
            except AttributeError:
                return conc_df
            if save and not kwargs.get('outname'):
                print('\n')
                conc_df.save(savename)
            goodbye_printer(only_conc=True)
            if not root:
                signal.signal(signal.SIGINT, original_sigint)            
            return conc_df
    else:
        conc_df = None

    # Get interrogation into DataFrame
    if countmode:
        df = Series({k: sum(v) for k, v in sorted(count_results.items())})
        tot = df.sum()
    else:
        the_big_dict = {}
        unique_results = set(item for sublist in list(results.values()) for item in sublist)
        sortres = sorted(results.items(), key=lambda x: x[0])
        for word in unique_results:
            the_big_dict[word] = [subcorp_result[word] for _, subcorp_result in sortres]
        # turn master dict into dataframe, sorted
        df = DataFrame(the_big_dict, index=sorted(results.keys()))

        # for ngrams, remove hapaxes
        #if show_ngram or show_collocates:
        #    if not language_model:
        #        df = df[[i for i in list(df.columns) if df[i].sum() > 1]]

        numentries = len(df.columns)
        tot = df.sum(axis=1)
        total_total = df.sum().sum()

    # turn df into series if all conditions met
    conds = [countmode,
             files_as_subcorpora,
             subcorpora,
             kwargs.get('df1_always_df')]
    anyxs = [level == 's',
             singlefile,
             nosubmode]
    if all(not x for x in conds) and any(x for x in anyxs):
        df = Series(df.ix[0])
        df.sort_values(ascending=False, inplace=True)
        tot = df.sum()
        numentries = len(df.index)
        total_total = tot

    # turn data into DF for GUI if need be
    if isinstance(df, Series) and kwargs.get('df1_always_df'):
        total_total = df.sum()
        df = DataFrame(df)
        tot = Series(total_total, index=['Total'])

    # if we're doing files as subcorpora,  we can remove the extension etc
    if isinstance(df, DataFrame) and files_as_subcorpora:
        cname = corpus.name.replace('-stripped', '').replace('-parsed', '')
        edits = [(r'(-[0-9][0-9][0-9])?\.txt\.conll', ''),
                 (r'-%s(-stripped)?(-parsed)?' % cname, '')]
        from corpkit.editor import editor
        df = editor(df, replace_subcorpus_names=edits).results
        tot = df.sum(axis=1)
        total_total = df.sum().sum()

    if conc_df is not None and conc_df is not False:
        # removed 'f' from here for now
        for col in ['c']:
            for pat in ['.txt', '.conll']:
                conc_df[col] = conc_df[col].str.replace(pat, '')
            conc_df[col] = conc_df[col].str.replace(r'-[0-9][0-9][0-9]$', '')

        #df.index = df.index.str.replace('w', 'this')

    # make interrogation object
    locs['corpus'] = corpus.path
    locs = sanitise_dict(locs)
    interro = Interrogation(results=df, totals=tot, query=locs, concordance=conc_df)

    # save it
    if save and not kwargs.get('outname'):
        print('\n')
        interro.save(savename)
    
    goodbye = goodbye_printer(return_it=in_notebook)
    if in_notebook:
        try:
            p.children[2].value = goodbye.replace('\n', '')
        except AttributeError:
            pass
    if not root:
        signal.signal(signal.SIGINT, original_sigint)
    return interro

Example 25

Project: pySecurityCenter Source File: population.py
Function: gen
def gen(sc, asset, expire):
    '''
    Database population function.

    What we are doing here is trying to interpret the output of plugin ID 20811
    and use that information to help populate the database with individualized
    entries of the software that is installed on the host.  This information will
    later be used to build the report.
    '''

    # The following regex patters are used to pull out the needed fields from
    # Plugin ID 20811
    redate = re.compile(r'\[installed on (\d{4})/(\d{1,2})/(\d{1,2})\]')
    reinvdate = re.compile(r'\[installed on (\d{1,2})/(\d{1,2})/(\d{4})\]')
    rever = re.compile(r'\[version (.*?)\]')
    resw = re.compile(r'^([\w\s\.\(\-\)\+]*)')
    s = Session()
    ts = datetime.datetime.now()
    for vuln in sc.analysis(('pluginID','=','20811,22869'),
                            ('asset', '=', {'id': str(asset)}),
                            tool='vulndetails'):


        # First we need to get the host information...
        nh = False
        host = s.query(Host).filter_by(ip=vuln['ip']).first()
        if not host:
            host = Host()
            nh = True
        hdata = sc.analysis(('ip', '=', vuln['ip']),tool='sumip')[0]
        host.ip = vuln['ip']
        host.name = vuln['netbiosName']
        host.cpe = hdata['osCPE']
        host.dns = hdata['dnsName']
        host.asset_id = asset
        if nh:
            s.add(host)
        else:
            s.merge(host)
        s.commit()
        sys.stdout.write('%4d\t%-16s\t%-40s' % (host.id, host.ip, host.dns))
        sys.stdout.flush()

        if vuln['pluginID'] == '22869':
            if 'CentOS Linux system' in vuln['pluginText'] or 'Red Hat Linux system' in vuln['pluginText']:
                software = re.findall('  ([a-zA-Z0-9\.\-]*)\|',vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            elif 'SunOS 5.10' in vuln['pluginText']:
                software = re.findall('Patch: ([^ ]*)', vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            elif 'Solaris 11 system' in vuln['pluginText']:
                software = re.findall('([\w\/]+)\W+([0-9\.\-]+).*\n',vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item[0]
                    entry.version = item[1]
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            elif 'Mac OS X system' in vuln['pluginText']:
                software = re.findall('  ([a-zA-Z0-9\.\-\_]*\.pkg)\n',vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            else:
                sys.stdout.write('\t[NO FORMATTER]')
                sys.stdout.flush()

        if vuln['pluginID'] == '20811':
            software = False
            patches = False
            sw = None
            nh = False
            s.commit()
            for line in vuln['pluginText'].split('\n'):
                if '</plugin_output>' in line:
                    continue
                if line == u'The following software are installed on the remote host :':
                    software = True
                    patches = False
                    continue
                if line == u'The following updates are installed :':
                    patches = True
                    continue

                if software and line != '':
                    names = resw.findall(line)
                    vers = rever.findall(line)
                    dates = redate.findall(line)
                    new = Entry()
                    if len(names) > 0: new.name = names[0].strip()
                    if len(vers) > 0: new.version = vers[0]
                    try:
                        if len(dates) > 0:
                            date = datetime.date(year=int(dates[0][0]),
                                                 month=int(dates[0][1]),
                                                 day=int(dates[0][2]))
                            new.date = date
                        else:
                            dates = reinvdate.findall(line)
                            if len(dates) > 0:
                                date = datetime.date(year=int(dates[0][2]),
                                                     month=int(dates[0][0]),
                                                     day=int(dates[0][1]))
                                new.date = date
                    except:
                        pass
                    if patches:
                        if line[:2] != '  ':
                            sw = line.strip(':').strip()
                            continue
                        else:
                            new.name = '%s (%s)' % (new.name, sw)

                    new.timestamp = ts
                    new.host_id = host.id
                    s.add(new)
        s.commit()
        sys.stdout.write('\tdone\n')
        sys.stdout.flush()
    s.commit()

    # Now to expire the old data out...
    exp = datetime.datetime.now() - datetime.timedelta(days=expire)
    print exp

    # First to delete the aged out entries
    for entry in s.query(Entry).filter(Entry.timestamp < exp).all():
        s.delete(entry)
    s.commit()

    # Next to delete any hosts that we arent pulling info for anymore...
    for host in s.query(Host).all():
        if len(host.entries) == 0:
            s.delete(host)
    s.commit()
    s.close()

Example 26

Project: Flowgen Source File: makeflows.py
def process_find_functions(node, MAX_diagram_zoomlevel):
    # \s --> [ \t\r\f\v] : avoids newlines \n
    # (?! ): negative lookahead
    # ()?: optional group
    regextextActionComment = r'^\s*//\$(?!\s+\[)(\s+(?P<tag><\w+>))?\s+(?P<action>.+)$'
    regextextActionComment1 = r'^\s*//\$1(?!\s+\[)\s+(?P<action>.+)$'
    regextextAnyActionComment1 = r'^\s*//\$1?(?!\s+\[)\s+(?P<action>.+)$'
    regextextAnyActionComment = r'^\s*//\$(?P<zoomlevel>[0-9])?(?!\s+\[)\s+(?P<action>.+)$'
    regexActionComment = re.compile(regextextActionComment)
    regexActionComment1 = re.compile(regextextActionComment1)
    regexAnyActionCommentZoomArray = [regexActionComment, re.compile(regextextAnyActionComment1)]
    #anycomment_previousline = regexAnyActionCommentZoomArray[zoom].match(enum_file[i-1-1][1])
    def regexActionComment(zoom):
        if zoom == 0:
            zoom = ''
        regextextActionComment_zoom = r'^\s*//\$' + str(zoom) + r'(?!\s+\[)\s+(?P<action>.+)$'
        return re.compile(regextextActionComment_zoom)

    regexContextualComment = re.compile(r'^\s*//\$\s+\[(?P<condition>.+)\]\s*$')
    regexHighlightComment = re.compile(r'^\s*(?P<commandline>.+?)\s+//\$\s*(?:$|//.+$)')
    #regexIf = re.compile(r'^\s*if\s*\((?P<condition>.*)\)\s*{?\s*(?:$|//.*$)')
    #regexElseIf = re.compile(r'^\s*}?\s*else if\s*\((?P<condition>.*)\)\s*{\s*(?:$|//.*$)')
    #this only works in a one line
    #regexIf1line = re.compile(r'^\s*if\s*\((?P<condition>.*)\)\s*{\s*(?:$|//.*$)')

    start_line = node.extent.start.line
    end_line = node.extent.end.line
    infile_clang = node.location.file
    global infile_str
    infile_str = node.location.file.name.decode("utf-8")
    infile = open(infile_str, 'r')
    #lines enumerated starting from 1
    enum_file = list(enumerate(infile, start=1))
    infile.close()

    ##look for comment inside function/method
    #comment_inside_method = False
    #if lookfor_lowestZoomactionAnnotation_inNode(node,0):
    #   comment_inside_method = True

    ##if ActionComment inside function/method:
    #if comment_inside_method == True :            
    print('Processing %s of kind %s [start_line=%s, end_line=%s. At "%s"]' % (
        node.spelling.decode("utf-8"), node.kind.name, node.extent.start.line, node.extent.end.line,
        node.location.file))

    # TO DO: zoom loop generates all possible zoom levels. Instead, only relevant zoom for each diagram should be generated.
    zoom_str_Array = ['', '1', '2']
    for diagram_zoomlevel in range(0, MAX_diagram_zoomlevel + 1):

        class_name = ''
        if node.kind.name == 'CXX_METHOD':
            class_name = str(node.semantic_parent.spelling.decode("utf8")) + '_'
            #also see node.lexical_parent.spelling
        outfile_str = str(node.get_usr().decode("utf8")) + zoom_str_Array[diagram_zoomlevel]
        #remove special characters from outfile_str 
        outfile_str = ''.join(e for e in outfile_str if e.isalnum())
        #outfile= open(outfile_str+'.txt', "w+")  

        # find if statements inside the function
        ifbeginlineArray, ifendlineArray, ifnodeArray = find_ifstmt(node)
        # print (ifbeginlineArray, ifendlineArray, ifnodeArray)

        # find loop statements inside the function
        loopbeginlineArray, loopendlineArray, loopnodeArray, looptypeArray = find_loopstmt(node)
        # print (loopbeginlineArray, loopendlineArray, loopnodeArray)

        #variables for conditional statements ('Nested' means nested inside another conditional statement)
        elseifbeginlineArray = []
        elsebeginline = None
        ifstructurenodeArray = []
        ifbeginlineNestedArray = []
        ifendlineNestedArray = []
        ifnodeNestedArray = []
        ifstructureelseifnodeArray = []
        elseifbeginlineNestedArray = []
        elsebeginlineNested = None
        ifstructurenodeNestedArray = []
        ifstructureelseifnodeNestedArray = []
        endifWrite = False
        endifNestedWrite = False
        elseifNum = 0
        elseifNumNested = 0
        IdxIfbeginlineArray = None
        IdxIfbeginlineArrayNested = None
        #write_zoomlevel_beforeifstmt=None
        ifstmt_write_zoomlevel = None
        ifstmtNested_write_zoomlevel = None

        #variables for loop statements
        endloopWrite = False
        IdxLoopbeginlineArray = None
        loopstmt_write_zoomlevel = None
        loopdescription_flag=False

        #find return statements inside the function
        returnlineArray, returnTypeArray = find_returnstmt(node, diagram_zoomlevel)

        #other variables
        #TO DO: use depthlevel
        depthlevel = 0
        #flagparallelactions=(flag TRUE/FALSE,depthlevel)
        #TO DO: change array for another more transparent structure, like an object with attributes
        flagparallelactions = [False, 0]
        lastcommentlinematched = [0, 0, 0]
        tab = '   '
        indentation_level = 0
        last_comment_str = ["", "", ""]
        string_notes = ["", "", ""]
        string = ''
        string_tmp = ["", "", ""]
        inside_comment_flag = [False, False, False]
        actioncallsdefArray = []
        write_zoomlevel = None


        def increase_depthlevel():
            nonlocal depthlevel
            depthlevel += 1
            write_strings(write_zoomlevel)
            return

        def decrease_depthlevel():
            nonlocal flagparallelactions, depthlevel, string, indentation_level
            depthlevel -= 1
            write_strings(write_zoomlevel)
            ##if activated parallelflag
            #if flagparallelactions[0]==True and depthlevel==flagparallelactions[1]:
            #   string+= indentation_level*tab+'end fork\n'
            #   flagparallelactions[0]=False
            #   flagparallelactions[1]=None
            return


        def add_note(stringIN):
            nonlocal string_notes
            string_notes[write_zoomlevel] += stringIN + '\n'
            return

            #taken from http://stackoverflow.com/questions/2657693/insert-a-newline-character-every-64-characters-using-python

        #def insert_newlines(string, every=75):
        #    lines = []
        #    for i in range(0, len(string), every):
        #       lines.append(string[i:i+every])
        #    return '\n'.join(lines)     

        def color(zoomlevel_IN):
            if zoomlevel_IN == 0:
                return '#84add6'
            elif zoomlevel_IN == 1:
                return '#b2cce5'
            elif zoomlevel_IN == 2:
                return '#e0eaf4'


        def write_strings(write_zoomlevelMIN):
            nonlocal string, string_tmp, diagram_zoomlevel
            write_zoomlevelMAX = -100  #initialize variable to absurd value
            #write_zoomlevelMIN: the MIN zoomlevel annotations that will be written. Specified as an entry to the function.
            #write_zoomlevelMAX: the MAX zoomlevel annotations that will be written. Found out inside this function.
            #diagram_zoomlevel: the diagram zoomlevel. write_zoomlevelMAX is lower or equal.

            def write_string_container(write_zoomlevelIN):
                nonlocal string_tmp, last_comment_str, inside_comment_flag

                string_tmp[write_zoomlevelIN] += indentation_level * tab + 'partition ' + color(
                    write_zoomlevelIN) + ' "' + last_comment_str[write_zoomlevelIN] + '" {\n' + string_tmp[
                                                     write_zoomlevelIN + 1] + indentation_level * tab + '}\n'
                last_comment_str[write_zoomlevelIN] = ""
                inside_comment_flag[write_zoomlevelIN] = False
                string_tmp[write_zoomlevelIN + 1] = ""
                return

            def write_string_normal(write_zoomlevelIN):
                nonlocal string_notes
                nonlocal string_tmp
                nonlocal last_comment_str
                nonlocal inside_comment_flag
                nonlocal actioncallsdefArray
                if inside_comment_flag[write_zoomlevelIN]:
                    #write action comment
                    last_comment_str[write_zoomlevelIN] = indentation_level * tab + ':' + color(
                        write_zoomlevelIN) + ':' + last_comment_str[write_zoomlevelIN] + ';\n'
                    #write extra if there are calls
                    if actioncallsdefArray:
                        last_comment_str[write_zoomlevelIN] = last_comment_str[write_zoomlevelIN][:-2] + "\n----"
                        for it7 in actioncallsdefArray:
                            usr_id_str = str(it7.get_usr().decode("utf-8"))
                            usr_id_str = ''.join(e for e in usr_id_str if e.isalnum())
                            classname = ''
                            if it7.kind.name == 'CXX_METHOD':
                                classname = str(it7.semantic_parent.spelling.decode("utf-8")) + '::'
                            if read_flowdbs(it7.get_usr().decode("utf8")):
                                call_in_filename_str = read_flowdbs.file + '.html'
                                last_comment_str[write_zoomlevelIN] += '\n' + str(
                                    it7.result_type.kind.name) + ' ' + classname + str(it7.displayname.decode(
                                    "utf-8")) + ' -- [[' + call_in_filename_str + '#' + usr_id_str + ' link]]'
                            else:
                                last_comment_str[write_zoomlevelIN] += '\n' + str(
                                    it7.result_type.kind.name) + ' ' + classname + str(it7.displayname.decode("utf-8"))

                                #last_comment_str+=str(it7.result_type.kind.name)+' '+str()+str(it7.displayname.decode("utf-8"))+' -- [[http://www.google.es]]'+'\\n'
                        last_comment_str[write_zoomlevelIN] += ';\n'
                    #write extra if there are notes
                    if string_notes[write_zoomlevelIN] != "":
                        last_comment_str[write_zoomlevelIN] += "note right\n" + string_notes[
                            write_zoomlevelIN] + "end note\n"
                        string_notes[write_zoomlevelIN] = ""
                    #write in temporal string
                    string_tmp[write_zoomlevelIN] += last_comment_str[write_zoomlevelIN]
                    last_comment_str[write_zoomlevelIN] = ''
                    #reinitialize flags
                    inside_comment_flag[write_zoomlevelIN] = False
                    actioncallsdefArray = []
                return


            #reverse loop to find write_zoomlevelMAX and call write_string_normal(write_zoomlevelMAX) if necessary
            for zoom_it in range(diagram_zoomlevel, write_zoomlevelMIN - 1, -1):
                #annotation exists at this level and is not written in temporal string yet
                if inside_comment_flag[zoom_it]:
                    write_zoomlevelMAX = zoom_it
                    write_string_normal(write_zoomlevelMAX)
                    break
                #the temporal string exists at this level
                elif string_tmp[zoom_it] != "":
                    write_zoomlevelMAX = zoom_it
                    break

            #reverse loop from ( write_zoomlevelMAX - 1 ) to write_zoomlevelMIN, where write_string_container() is called
            for zoom_it2 in range(write_zoomlevelMAX - 1, write_zoomlevelMIN - 1, -1):
                write_string_container(zoom_it2)


            #if zoomlevelMIN=0 write temporal string to main string
            if write_zoomlevelMIN == 0:
                string += string_tmp[0]
                string_tmp[0] = ''

            return

            ##write last action annotations for current zoom level and all possible higher ones in their corresponding temporal string
            #for zoom_it in range(write_zoomlevelMIN, diagram_zoomlevel+1):
            #   write_string_normal(zoom_it)
            ##write temporal strings of higher level zooms in the current zoomlevel temporal string
            #for zoom_it2 in range(write_zoomlevelMIN+1,diagram_zoomlevel+1):
            #   string_tmp[write_zoomlevelMIN]+=string_tmp[zoom_it2]
            #   string_tmp[zoom_it2]=''


        # Functions for the if statements.
        # TO DO: reuse parent-if-statement functions as nested-if-statement functions

        def ifbeginlineArray_method():
            nonlocal elseifbeginlineArray, elsebeginline, ifstructurenodeArray, ifstructureelseifnodeArray
            nonlocal ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
            nonlocal string_tmp, indentation_level, depthlevel
            nonlocal endifWrite, IdxIfbeginlineArray, write_zoomlevel, ifstmt_write_zoomlevel
            # look for comment inside if statement
            IdxIfbeginlineArray = ifbeginlineArray.index(i)
            node = ifnodeArray[IdxIfbeginlineArray]
            #if comment inside if statement:
            if lookfor_lowestZoomactionAnnotation_inNode(node, diagram_zoomlevel):
                #adjust zoomlevel
                ifstmt_write_zoomlevel = lookfor_lowestZoomactionAnnotation_inNode.write_zoomlevel
                #write_zoomlevel_beforeifstmt=write_zoomlevel
                write_zoomlevel = ifstmt_write_zoomlevel
                #increase depthlevel
                increase_depthlevel()
                #write 'if' in string
                description = regexContextualComment.match(enum_file[i - 1 - 1][1])
                if description:
                    string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + description.group(
                        'condition') + ') then(yes)''\n'
                else:
                    string_condition = ' '.join(
                        t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
                    string_condition = string_condition[:-1]
                    string_tmp[
                        write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + string_condition + ' ?) then(yes)''\n'
                #mark } endif to be written in string
                endifWrite = True
                indentation_level += 1
                #explore substructure: then / else if/ else: elseifbeginlineArray, elsebeginline, ifstructurenodeArray, ifstructureelseifnodeArray
                elseifbeginlineArray, elsebeginline, ifstructurenodeArray, ifstructureelseifnodeArray = find_elsestmt(
                    ifnodeArray[IdxIfbeginlineArray])
                #explore then and update ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
                ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray = find_ifstmt(ifstructurenodeArray[0])
            return

        def elseifbeginlineArray_method():
            nonlocal ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
            nonlocal elseifNum, string_tmp, indentation_level, write_zoomlevel
            write_zoomlevel = ifstmt_write_zoomlevel
            decrease_depthlevel()
            increase_depthlevel()
            elseifNum += 1
            node = ifstructureelseifnodeArray[elseifNum - 1]
            #write 'else if' in string
            description = regexContextualComment.match(enum_file[i - 1 - 1][1])
            if description:
                string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'elseif (' + description.group(
                    'condition') + ') then (yes)' + '\n'
            else:
                string_condition = ' '.join(
                    t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
                string_condition = string_condition[:-1]
                string_tmp[write_zoomlevel] += (
                                                   indentation_level - 1) * tab + 'elseif (' + string_condition + ' ?) then (yes)' + '\n'
                #explore elseif and update ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
            ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray = find_ifstmt(
                ifstructurenodeArray[elseifNum])
            return

        def elsebeginline_method():
            nonlocal ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
            nonlocal string_tmp, indentation_level, write_zoomlevel
            write_zoomlevel = ifstmt_write_zoomlevel
            decrease_depthlevel()
            increase_depthlevel()
            #write 'else' in string
            string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
            #explore else and update ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
            ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray = find_ifstmt(ifstructurenodeArray[-1])
            return

        def ifendlineArray_method():
            nonlocal string_tmp, indentation_level, depthlevel
            nonlocal endifWrite, elsebeginline, elseifNum, ifstmt_write_zoomlevel, write_zoomlevel
            write_zoomlevel = ifstmt_write_zoomlevel
            decrease_depthlevel()
            #is the else condition explicitly written? Otherwise write now
            if elsebeginline == None:
                string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
            #write endif's in string
            string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endif' + '\n' + '\n'
            indentation_level -= 1

            #reset all variables
            depthlevel -= 1
            endifWrite = False
            elseifNum = 0
            del elseifbeginlineArray[:]
            elsebeginline = None
            ifstmt_write_zoomlevel = None
            #write_zoomlevel=write_zoomlevel_beforeifstmt
            #write_zoomlevel_before_ifstmt=None
            return

        ##
        def ifbeginlineNestedArray_method():
            nonlocal IdxIfbeginlineArrayNested, string_tmp, indentation_level, depthlevel, endifNestedWrite
            nonlocal elseifbeginlineNestedArray, elsebeginlineNested, ifstructurenodeNestedArray, ifstructureelseifnodeNestedArray, ifstmtNested_write_zoomlevel, write_zoomlevel
            #look for comment inside Nested if statement
            IdxIfbeginlineArrayNested = ifbeginlineNestedArray.index(i)
            node = ifnodeArray[IdxIfbeginlineArrayNested]
            #if comment inside if statement:
            if lookfor_lowestZoomactionAnnotation_inNode(node, diagram_zoomlevel):
                #adjust zoomlevel
                ifstmtNested_write_zoomlevel = lookfor_lowestZoomactionAnnotation_inNode.write_zoomlevel
                write_zoomlevel = ifstmtNested_write_zoomlevel
                #increase depthlevel
                increase_depthlevel()
                #write 'if' in string
                description = regexContextualComment.match(enum_file[i - 1 - 1][1])
                if description:
                    string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + description.group(
                        'condition') + ') then(yes)''\n'
                else:
                    string_condition = ' '.join(
                        t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
                    string_condition = string_condition[:-1]
                    string_tmp[
                        write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + string_condition + ' ?) then(yes)''\n'
                #mark } Nested endif to be written in string
                endifNestedWrite = True
                indentation_level += 1
                #explore substructure: then / else if/ else: elseifbeginlineNestedArray, elsebeginlineNested, ifstructurenodeNestedArray
                elseifbeginlineNestedArray, elsebeginlineNested, ifstructurenodeNestedArray, ifstructureelseifnodeNestedArray = find_elsestmt(
                    ifnodeNestedArray[IdxIfbeginlineArrayNested])
            return

        def elseifbeginlineNestedArray_method():
            nonlocal string, indentation_level, elseifNumNested, write_zoomlevel
            elseifNumNested += 1
            node = ifstructureelseifnodeNestedArray[elseifNumNested - 1]
            write_zoomlevel = ifstmtNested_write_zoomlevel
            decrease_depthlevel()
            increase_depthlevel()
            #write 'else if' in string
            description = regexContextualComment.match(enum_file[i - 1 - 1][1])
            if description:
                string_tmp[write_zoomlevel] += (
                                                   indentation_level - 1) * tab + 'else(no)' + '\n' + indentation_level * tab + 'if (' + description.group(
                    'condition') + ') then (yes)' + '\n'
            else:
                string_condition = ' '.join(
                    t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
                string_condition = string_condition[:-1]
                string_tmp[write_zoomlevel] += (
                                                   indentation_level - 1) * tab + 'else(no)' + '\n' + indentation_level * tab + 'if (' + string_condition + ' ?) then (yes)' + '\n'
            indentation_level += 1
            return

        def elsebeginlineNested_method():
            nonlocal string_tmp, indentation_level, write_zoomlevel
            write_zoomlevel = ifstmtNested_write_zoomlevel
            decrease_depthlevel()
            increase_depthlevel()
            #write 'else' in string
            string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
            return

        def ifendlineNestedArray_method():
            nonlocal string_tmp, indentation_level, depthlevel
            nonlocal endifNestedWrite, elsebeginlineNested, elseifNumNested, ifstmtNested_write_zoomlevel, write_zoomlevel
            write_zoomlevel = ifstmtNested_write_zoomlevel
            decrease_depthlevel()
            #is the else condition explicitly written? Otherwise write now
            if elsebeginlineNested == None:
                string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
                #write endif's in string
            for n in range(elseifNumNested):
                string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endif' + '\n'
                indentation_level -= 1
            string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endif' + '\n' + '\n'
            indentation_level -= 1

            #reset all variables
            depthlevel -= 1
            endifNestedWrite = False
            elseifNumNested = 0
            del elseifbeginlineNestedArray[:]
            elsebeginlineNested = None
            ifstmtNested_write_zoomlevel = None
            return


        # Functions for the loop statements.
        def loopbeginlineArray_method():
            nonlocal string_tmp, indentation_level, depthlevel
            nonlocal endloopWrite, IdxLoopbeginlineArray, write_zoomlevel, loopstmt_write_zoomlevel, loopdescription_flag
            IdxLoopbeginlineArray = loopbeginlineArray.index(i)
            node = loopnodeArray[IdxLoopbeginlineArray]
            # if comment inside loop statement with the adequate zoom level:
            if lookfor_lowestZoomactionAnnotation_inNode(node, diagram_zoomlevel):
                # adjust zoomlevels and depthlevel
                loopstmt_write_zoomlevel = lookfor_lowestZoomactionAnnotation_inNode.write_zoomlevel
                write_zoomlevel = loopstmt_write_zoomlevel
                increase_depthlevel()
                # write 'loop' in string
                description = regexContextualComment.match(enum_file[i - 1 - 1][1])
                if description:
                    string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'while (' + description.group(
                        'condition') + ')''\n'
                    loopdescription_flag=True
                else:
                    # depends on the loop type
                    # 207: A while statement.
                    if looptypeArray[IdxLoopbeginlineArray] == 207:
                        string_condition = ' '.join(
                            t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())[:-1]
                        string_tmp[
                            write_zoomlevel] += '\n' + indentation_level * tab + 'while (' + string_condition + '? )''\n'
                    # 208: A do statement.
                    elif looptypeArray[IdxLoopbeginlineArray] == 208:
                        string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'repeat''\n'
                    # 209: A for statement.
                    elif looptypeArray[IdxLoopbeginlineArray] == 209:
                        #the '0','1','2' children of the node contain the spellings of the three elements of the FOR loop. 
                        #We have to call the command get_tokens, which produces an iterator over all tokens and then join them into the same string.
                        #However, for the '0' and '2' children, we don't want the last token. We have first to convert the iterator into a list and then use [:-1]
                        string_condition = 'FOR ('+' '.join(
                            t.spelling.decode("utf-8") for t in list(list(node.get_children())[0].get_tokens())[:-1])+' '+' '.join(
                            t.spelling.decode("utf-8") for t in list(node.get_children())[1].get_tokens())+' '+' '.join(
                            t.spelling.decode("utf-8") for t in list(list(node.get_children())[2].get_tokens())[:-1])+' )'
                        string_tmp[
                            write_zoomlevel] += '\n' + indentation_level * tab + 'while (' + string_condition + ')''\n'
                # mark } endloop to be written in string
                endloopWrite = True
                indentation_level += 1
            return

        def loopendlineArray_method():
            nonlocal string_tmp, indentation_level, depthlevel, IdxLoopbeginlineArray
            nonlocal endloopWrite, loopstmt_write_zoomlevel, write_zoomlevel, loopdescription_flag
            write_zoomlevel = loopstmt_write_zoomlevel
            decrease_depthlevel()
            #write 'loop end' in string; it depends on the loop type
            # 207: A while statement.
            if loopdescription_flag:
                string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endwhile' + '\n' + '\n'
            else:
                if looptypeArray[IdxLoopbeginlineArray]==207:
                    string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endwhile' + '\n' + '\n'
                # 208: A do statement.
                elif looptypeArray[IdxLoopbeginlineArray]==208:
                    pass
                    node = loopnodeArray[IdxLoopbeginlineArray]
                    string_condition = ' '.join(
                            t.spelling.decode("utf-8") for t in list(node.get_children())[1].get_tokens())[:-1]
                    string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'repeat while ('+ string_condition+ '? )''\n' + '\n'
                # 209: A for statement.
                elif looptypeArray[IdxLoopbeginlineArray]==209:
                    string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endwhile' + '\n' + '\n'
            indentation_level -= 1
            #reset all variables
            depthlevel -= 1
            endloopWrite = False
            loopstmt_write_zoomlevel = None
            loopdescription_flag=False
            return


        string += '@startuml\n\nstart\n skinparam activityBackgroundColor #white \n'

        #main loop over source code lines
        #TO DO: optimization
        for i, line in enum_file:
            if i in range(start_line, end_line):

                #look for an annotated action and set zoomlevel if found
                for zoom_it2 in range(0, diagram_zoomlevel + 1):
                    anyactionannotation = regexActionComment(zoom_it2).match(line)
                    if anyactionannotation:
                        write_zoomlevel = zoom_it2
                        break
                #look for highlight annotation
                comment_highlight = regexHighlightComment.match(line)
                #actions
                if anyactionannotation:
                    #this line continues a previous multi-line action annotation
                    if lastcommentlinematched[write_zoomlevel] == i - 1:
                        last_comment_str[write_zoomlevel] += '\\n' + anyactionannotation.group('action')
                    #first line of action annotation
                    else:
                        write_strings(write_zoomlevel)
                        #new comment at the given zoom level
                        inside_comment_flag[write_zoomlevel] = True
                        ##if <parallel>
                        ##TO DO combine parallel and if statements. paralell inside parallel
                        #if comment.group('tag'):
                        #   if comment.group('tag')=="<parallel>":
                        #      #if begin of parallel actions:
                        #      if flagparallelactions[0]==False:
                        #         string+= indentation_level*tab+'fork\n'
                        #         flagparallelactions[0]=True
                        #         flagparallelactions[1]=depthlevel
                        #      #else
                        #      else:
                        #         if depthlevel==flagparallelactions[1]:
                        #            string+= indentation_level*tab+'fork again\n'
                        ##if not <parallel> but activated parallelflag
                        #else:
                        #   if flagparallelactions[0]==True and depthlevel==flagparallelactions[1]:
                        #      string+= indentation_level*tab+'end fork\n'
                        #      flagparallelactions[0]=False
                        #      flagparallelactions[1]=None
                        #add line to current action annotation
                        last_comment_str[write_zoomlevel] += anyactionannotation.group('action')

                    lastcommentlinematched[write_zoomlevel] = i

                else:

                    #  calls,...
                    if comment_highlight:
                        scan_column_start = 1 + comment_highlight.start('commandline')
                        #the end character is -1. There is an offset of +1 with respect to the file
                        scan_column_end = 1 + comment_highlight.end('commandline') - 1
                        scan_file = infile_clang
                        scan_line = i
                        print('LOOKING FOR CALLS AT: ', scan_file, scan_line, scan_column_start, scan_column_end)
                        singlelinecallsdefArray = find_calls(scan_file, scan_line, scan_column_start, scan_column_end)
                        #for it4 in singlelinecallsdefArray:
                        #print ('singlelinecallsdefArray',it4.displayname.decode("utf-8"))
                        for it5 in singlelinecallsdefArray:
                            if it5 not in actioncallsdefArray:
                                actioncallsdefArray.append(it5)

                    #### ...,OR if statements,...
                    elif i in ifbeginlineArray:
                        ifbeginlineArray_method()
                    #if i in elseifbeginlineArray
                    elif i in elseifbeginlineArray:
                        elseifbeginlineArray_method()
                        #if i in elsebeginline
                    elif i == elsebeginline:
                        elsebeginline_method()
                    #if i is ifendlineArray[IdxIfbeginlineArray] and } is marked to be written in string:
                    elif endifWrite and (i == ifendlineArray[IdxIfbeginlineArray]):
                        ifendlineArray_method()
                        #### Nested if statements
                    elif i in ifbeginlineNestedArray:
                        ifbeginlineNestedArray_method()
                        #if i in elseifbeginlineNestedArray
                    elif i in elseifbeginlineNestedArray:
                        elseifbeginlineNestedArray_method()
                        #if i in elsebeginlineNested
                    elif i == elsebeginlineNested:
                        elsebeginlineNested_method()
                    #if i is ifendlineNestedArray[IdxIfbeginlineArrayNested] and } is marked to be written in string:
                    elif endifNestedWrite and (i == ifendlineNestedArray[IdxIfbeginlineArrayNested]):
                        ifendlineNestedArray_method()

                    #### ...,OR loops,...
                    elif i in loopbeginlineArray:
                        loopbeginlineArray_method()
                    # if i is loopendlineArray[IdxLoopbeginlineArray] and } is marked to be written in string:
                    elif endloopWrite and (i == loopendlineArray[IdxLoopbeginlineArray]):
                        loopendlineArray_method()

                    # ...,OR return statements):
                    elif i in returnlineArray:
                        #print('RETURN:',i,line)
                        if returnTypeArray[returnlineArray.index(i)] == True:
                            #if pending flags, finish them
                            #write_zoomlevel=0
                            #print('write_zoomlevel',write_zoomlevel)
                            #decrease_depthlevel()
                            #print('write_zoomlevel2',write_zoomlevel)
                            write_strings(write_zoomlevel)
                            string_tmp[write_zoomlevel] += "\nstop\n"
                        if returnTypeArray[returnlineArray.index(i)] == False:
                            #print('possible stop', i, line)
                            add_note("possible STOP")

        write_strings(0)
        string += '\n@enduml'
        #print (string)

        write_htmlonline(string, outfile_str)
        write_txt(string, outfile_str)

    return

Example 27

Project: FanFicFare Source File: adapter_fimfictionnet.py
Function: doextractchapterurlsandmetadata
    def doExtractChapterUrlsAndMetadata(self,get_cover=True):

        if self.is_adult or self.getConfig("is_adult"):
            self.set_adult_cookie()

        ##---------------------------------------------------------------------------------------------------
        ## Get the story's title page. Check if it exists.

        try:
            # don't use cache if manual is_adult--should only happen
            # if it's an adult story and they don't have is_adult in ini.
            data = self.do_fix_blockquotes(self._fetchUrl(self.url,
                                                          usecache=(not self.is_adult)))
            soup = self.make_soup(data)
        except urllib2.HTTPError, e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(self.url)
            else:
                raise e

        if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data:
            raise exceptions.StoryDoesNotExist(self.url)

        if "This story has been marked as having adult content. Please click below to confirm you are of legal age to view adult material in your country." in data:
            raise exceptions.AdultCheckRequired(self.url)

        if self.password:
            params = {}
            params['password'] = self.password
            data = self._postUrl(self.url, params)
            soup = self.make_soup(data)

        if not (soup.find('form', {'id' : 'password_form'}) == None):
            if self.getConfig('fail_on_password'):
                raise exceptions.FailedToDownload("%s requires story password and fail_on_password is true."%self.url)
            else:
                raise exceptions.FailedToLogin(self.url,"Story requires individual password",passwdonly=True)

        ##----------------------------------------------------------------------------------------------------
        ## Extract metadata

        storyContentBox = soup.find('div', {'class':'story_content_box'})

        # Title
        title = storyContentBox.find('a', {'class':re.compile(r'.*\bstory_name\b.*')})
        self.story.setMetadata('title',stripHTML(title))

        # Author
        author = storyContentBox.find('div', {'class':'author'}).find('a')
        self.story.setMetadata("author", stripHTML(author))
        #No longer seems to be a way to access Fimfiction's internal author ID
        self.story.setMetadata("authorId", self.story.getMetadata("author"))
        self.story.setMetadata("authorUrl", "http://%s/user/%s" % (self.getSiteDomain(), stripHTML(author)))

        #Rating text is replaced with full words for historical compatibility after the site changed
        #on 2014-10-27
        rating = stripHTML(storyContentBox.find('a', {'class':re.compile(r'.*\bcontent-rating-.*')}))
        rating = rating.replace("E", "Everyone").replace("T", "Teen").replace("M", "Mature")
        self.story.setMetadata("rating", rating)

        # Chapters
        for chapter in storyContentBox.find_all('a',{'class':'chapter_link'}):
            self.chapterUrls.append((stripHTML(chapter), 'http://'+self.host+chapter['href']))

        self.story.setMetadata('numChapters',len(self.chapterUrls))

        # Status
        # In the case of Fimfiction, possible statuses are 'Completed', 'Incomplete', 'On Hiatus' and 'Cancelled'
        # For the sake of bringing it in line with the other adapters, 'Incomplete' becomes 'In-Progress'
        # and 'Complete' becomes 'Completed'. 'Cancelled' and 'On Hiatus' are passed through, it's easy now for users
        # to change/remove if they want with replace_metadata
        status = stripHTML(storyContentBox.find('span', {'class':re.compile(r'.*\bcompleted-status-.*')}))
        status = status.replace("Incomplete", "In-Progress").replace("Complete", "Completed")
        self.story.setMetadata("status", status)

        # Genres and Warnings
        # warnings were folded into general categories in the 2014-10-27 site update
        categories = storyContentBox.find_all('a', {'class':re.compile(r'.*\bstory_category\b.*')})
        for category in categories:
            category = stripHTML(category)
            if category == "Gore" or category == "Sex":
                self.story.addToList('warnings', category)
            else:
                self.story.addToList('genre', category)

        # Word count
        wordCountText = stripHTML(storyContentBox.find('li', {'class':'bottom'}).find('div', {'class':'word_count'}))
        self.story.setMetadata("numWords", re.sub(r'[^0-9]', '', wordCountText))

        # Cover image
        storyImage = storyContentBox.find('div', {'class':'story_image'})
        if storyImage:
            coverurl = storyImage.find('a')['href']
            if coverurl.startswith('//'): # fix for img urls missing 'http:'
                coverurl = "http:"+coverurl
            if get_cover:
                # try setting from href, if fails, try using the img src
                if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
                    img = storyImage.find('img')
                    # try src, then data-src, then leave None.
                    coverurl = img.get('src',img.get('data-src',None))
                    if coverurl:
                        self.setCoverImage(self.url,coverurl)

            coverSource = storyImage.find('a', {'class':'source'})
            if coverSource:
                self.story.setMetadata('coverSourceUrl', coverSource['href'])
                #There's no text associated with the cover source link, so just
                #reuse the URL. Makes it clear it's an external link leading
                #outside of the fanfic site, at least.
                self.story.setMetadata('coverSource', coverSource['href'])

        # fimf has started including extra stuff inside the description div.
        descdivstr = u"%s"%storyContentBox.find("div", {"class":"description"})
        hrstr=u"<hr/>"
        descdivstr = u'<div class="description">'+descdivstr[descdivstr.index(hrstr)+len(hrstr):]
        self.setDescription(self.url,descdivstr)

        # Find the newest and oldest chapter dates
        storyData = storyContentBox.find('div', {'class':'story_data'})
        oldestChapter = None
        newestChapter = None
        self.newestChapterNum = None # save for comparing during update.
        # Scan all chapters to find the oldest and newest, on
        # FiMFiction it's possible for authors to insert new chapters
        # out-of-order or change the dates of earlier ones by editing
        # them--That WILL break epub update.
        for index, chapterDate in enumerate(storyData.find_all('span', {'class':'date'})):
            chapterDate = self.ordinal_date_string_to_date(chapterDate.contents[1])
            if oldestChapter == None or chapterDate < oldestChapter:
                oldestChapter = chapterDate
            if newestChapter == None or chapterDate > newestChapter:
                newestChapter = chapterDate
                self.newestChapterNum = index

        if newestChapter is None:
            #this will only be true when updating metadata for stories that have 0 chapters
            #there is a "last modified" date given on the page, extract it and use that.
            moddatetag = storyContentBox.find('span', {'class':'last_modified'})
            if not moddatetag is None:
                newestChapter = self.ordinal_date_string_to_date(moddatetag('span')[1].text)

        # Date updated
        self.story.setMetadata("dateUpdated", newestChapter)

        # Date published
        # falls back to oldest chapter date for stories that haven't been officially published yet
        pubdatetag = storyContentBox.find('span', {'class':'date_approved'})
        if pubdatetag is None:
            if oldestChapter is None:
                #this will only be true when updating metadata for stories that have 0 chapters
                #and that have never been officially published - a rare occurrence. Fall back to last
                #modified date as the publication date, it's all that we've got.
                self.story.setMetadata("datePublished", newestChapter)
            else:
                self.story.setMetadata("datePublished", oldestChapter)
        else:
            pubDate = self.ordinal_date_string_to_date(pubdatetag('span')[1].text)
            self.story.setMetadata("datePublished", pubDate)

        # Characters
        chars = storyContentBox.find("div", {"class":"extra_story_data"})
        for character in chars.find_all("a", {"class":"character_icon"}):
            self.story.addToList("characters", character['title'])

        # Likes and dislikes
        storyToolbar = soup.find('div', {'class':'story-toolbar'})
        likes = storyToolbar.find('span', {'class':'likes'})
        if not likes is None:
            self.story.setMetadata("likes", stripHTML(likes))
        dislikes = storyToolbar.find('span', {'class':'dislikes'})
        if not dislikes is None:
            self.story.setMetadata("dislikes", stripHTML(dislikes))

        # Highest view for a chapter and total views
        viewSpan = storyToolbar.find('span', {'title':re.compile(r'.*\btotal views\b.*')})
        self.story.setMetadata("views", re.sub(r'[^0-9]', '', stripHTML(viewSpan)))
        self.story.setMetadata("total_views", re.sub(r'[^0-9]', '', viewSpan['title']))

        # Comment count
        commentSpan = storyToolbar.find('span', {'title':re.compile(r'.*\bcomments\b.*')})
        self.story.setMetadata("comment_count", re.sub(r'[^0-9]', '', stripHTML(commentSpan)))

        # Short description
        descriptionMeta = soup.find('meta', {'property':'og:description'})
        self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))

        #groups
        if soup.find('button', {'id':'button-view-all-groups'}):
            groupResponse = self._fetchUrl("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
            groupData = json.loads(groupResponse)
            groupList = self.make_soup(groupData["content"])
        else:
            groupList = soup.find('ul', {'id':'story-groups-list'})

        if not (groupList == None):
            for groupName in groupList.find_all('a'):
                self.story.addToList("groupsUrl", 'http://'+self.host+groupName["href"])
                self.story.addToList("groups",stripHTML(groupName).replace(',', ';'))

        #sequels
        for header in soup.find_all('h1', {'class':'header-stories'}):
            # I don't know why using text=re.compile with find() wouldn't work, but it didn't.
            if header.text.startswith('Sequels'):
                sequelContainer = header.parent
                for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
                    self.story.addToList("sequelsUrl", 'http://'+self.host+sequel["href"])
                    self.story.addToList("sequels", stripHTML(sequel).replace(',', ';'))

        #author last login
        userPageHeader = soup.find('div', {'class':re.compile(r'\buser-page-header\b')})
        if not userPageHeader == None:
            infoContainer = userPageHeader.find('div', {'class':re.compile(r'\binfo-container\b')})
            listItems = infoContainer.find_all('li')
            lastLoginString = stripHTML(listItems[1])
            lastLogin = None
            if "online" in lastLoginString:
                lastLogin = date.today()
            elif "offline" in lastLoginString:
                #this regex extracts the number of weeks and the number of days from the last login string.
                #durations under a day are ignored.
                #group 1 is weeks, group 2 is days
                durationGroups = re.match(r"(?:[^0-9]*(\d+?)w)?[^0-9]*(?:(\d+?)d)?", lastLoginString)
                lastLogin = date.today() - timedelta(days=int(durationGroups.group(2) or 0), weeks=int(durationGroups.group(1) or 0))
            self.story.setMetadata("authorLastLogin", lastLogin)

        #The link to the prequel is embedded in the description text, so erring
        #on the side of caution and wrapping this whole thing in a try block.
        #If anything goes wrong this probably wasn't a valid prequel link.
        try:
            description = soup.find('div', {'class':'description'})
            firstHR = description.find("hr")
            nextSib = firstHR.nextSibling
            if "This story is a sequel to" in nextSib.string:
                link = nextSib.nextSibling
                if link.name == "a":
                    self.story.setMetadata("prequelUrl", 'http://'+self.host+link["href"])
                    self.story.setMetadata("prequel", stripHTML(link))
        except:
            pass

Example 28

Project: MakerDroid Source File: STLImporter.py
def ImportModel(filename, use_kdtree=True, callback=None, **kwargs):
    global vertices, edges, kdtree
    vertices = 0
    edges = 0
    kdtree = None

    normal_conflict_warning_seen = False

    if hasattr(filename, "read"):
        f = filename
        # useful for later error messages
        filename = "input stream"
    else:
        try:
            url_file = pycam.Utils.URIHandler(filename).open()
            # urllib.urlopen objects do not support "seek" - so we need to read
            # the whole file at once. This is ugly - anyone with a better idea?
            f = StringIO.StringIO(url_file.read())
            url_file.close()
        except IOError, err_msg:
            log.error("STLImporter: Failed to read file (%s): %s" \
                    % (filename, err_msg))
            return None
    # Read the first two lines of (potentially non-binary) input - they should
    # contain "solid" and "facet".
    header_lines = []
    while len(header_lines) < 2:
        line = f.readline(200)
        if len(line) == 0:
            # empty line (not even a line-feed) -> EOF
            log.error("STLImporter: No valid lines found in '%s'" % filename)
            return None
        # ignore comment lines
        # note: partial comments (starting within a line) are not handled
        if not line.startswith(";"):
            header_lines.append(line)
    header = "".join(header_lines)
    # read byte 80 to 83 - they contain the "numfacets" value in binary format
    f.seek(80)
    numfacets = unpack("<I", f.read(4))[0]
    binary = False

    if f.len == (84 + 50*numfacets):
        binary = True
    elif header.find("solid") >= 0 and header.find("facet") >= 0:
        binary = False
        f.seek(0)
    else:
        log.error("STLImporter: STL binary/ascii detection failed")
        return None

    if use_kdtree:
        kdtree = PointKdtree([], 3, 1, epsilon)
    model = Model(use_kdtree)

    t = None
    p1 = None
    p2 = None
    p3 = None

    if binary:
        for i in range(1, numfacets + 1): 
            if callback and callback():
                log.warn("STLImporter: load model operation cancelled")
                return None
            a1 = unpack("<f", f.read(4))[0] 
            a2 = unpack("<f", f.read(4))[0] 
            a3 = unpack("<f", f.read(4))[0] 

            n = Vector(float(a1), float(a2), float(a3))
            
            v11 = unpack("<f", f.read(4))[0] 
            v12 = unpack("<f", f.read(4))[0] 
            v13 = unpack("<f", f.read(4))[0] 

            p1 = UniqueVertex(float(v11), float(v12), float(v13))
            
            v21 = unpack("<f", f.read(4))[0] 
            v22 = unpack("<f", f.read(4))[0] 
            v23 = unpack("<f", f.read(4))[0] 

            p2 = UniqueVertex(float(v21), float(v22), float(v23))
            
            v31 = unpack("<f", f.read(4))[0] 
            v32 = unpack("<f", f.read(4))[0] 
            v33 = unpack("<f", f.read(4))[0] 
            
            p3 = UniqueVertex(float(v31), float(v32), float(v33))

            # not used
            attribs = unpack("<H", f.read(2)) 
            
            dotcross = n.dot(p2.sub(p1).cross(p3.sub(p1)))
            if a1 == a2 == a3 == 0:
                dotcross = p2.sub(p1).cross(p3.sub(p1)).z
                n = None

            if dotcross > 0:
                # Triangle expects the vertices in clockwise order
                t = Triangle(p1, p3, p2)
            elif dotcross < 0:
                if not normal_conflict_warning_seen:
                    log.warn(("Inconsistent normal/vertices found in facet " + \
                            "definition %d of '%s'. Please validate the " + \
                            "STL file!") % (i, filename))
                    normal_conflict_warning_seen = True
                t = Triangle(p1, p2, p3)
            else:
                # the three points are in a line - or two points are identical
                # usually this is caused by points, that are too close together
                # check the tolerance value in pycam/Geometry/PointKdtree.py
                log.warn("Skipping invalid triangle: %s / %s / %s " \
                        % (p1, p2, p3) + "(maybe the resolution of the model " \
                        + "is too high?)")
                continue
            if n:
                t.normal = n

            model.append(t)
    else:
        solid = re.compile(r"\s*solid\s+(\w+)\s+.*")
        endsolid = re.compile(r"\s*endsolid\s*")
        facet = re.compile(r"\s*facet\s*")
        normal = re.compile(r"\s*facet\s+normal" \
                + r"\s+(?P<x>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
                + r"\s+(?P<y>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
                + r"\s+(?P<z>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)\s+")
        endfacet = re.compile(r"\s*endfacet\s+")
        loop = re.compile(r"\s*outer\s+loop\s+")
        endloop = re.compile(r"\s*endloop\s+")
        vertex = re.compile(r"\s*vertex" \
                + r"\s+(?P<x>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
                + r"\s+(?P<y>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
                + r"\s+(?P<z>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)\s+")

        current_line = 0

        for line in f:
            if callback and callback():
                log.warn("STLImporter: load model operation cancelled")
                return None
            current_line += 1
            m = solid.match(line)
            if m:
                model.name = m.group(1)
                continue

            m = facet.match(line)
            if m:
                m = normal.match(line)
                if m:
                    n = Vector(float(m.group('x')), float(m.group('y')),
                            float(m.group('z')))
                else:
                    n = None
                continue
            m = loop.match(line)
            if m:
                continue
            m = vertex.match(line)
            if m:
                p = UniqueVertex(float(m.group('x')), float(m.group('y')),
                        float(m.group('z')))
                if p1 is None:
                    p1 = p
                elif p2 is None:
                    p2 = p
                elif p3 is None:
                    p3 = p
                else:
                    log.error("STLImporter: more then 3 points in facet " \
                            + "(line %d)" % current_line)
                continue
            m = endloop.match(line)
            if m:
                continue
            m = endfacet.match(line)
            if m:
                if p1 is None or p2 is None or p3 is None:
                    log.warn(("Invalid facet definition in line " \
                            + "%d of '%s'. Please validate the STL file!") \
                            % (current_line, filename))
                    n, p1, p2, p3 = None, None, None, None
                    continue
                if not n:
                    n = p2.sub(p1).cross(p3.sub(p1)).normalized()

                # validate the normal
                # The three vertices of a triangle in an STL file are supposed
                # to be in counter-clockwise order. This should match the
                # direction of the normal.
                if n is None:
                    # invalid triangle (zero-length vector)
                    dotcross = 0
                else:
                    # make sure the points are in ClockWise order
                    dotcross = n.dot(p2.sub(p1).cross(p3.sub(p1)))
                if dotcross > 0:
                    # Triangle expects the vertices in clockwise order
                    t = Triangle(p1, p3, p2, n)
                elif dotcross < 0:
                    if not normal_conflict_warning_seen:
                        log.warn(("Inconsistent normal/vertices found in " + \
                                "line %d of '%s'. Please validate the STL " + \
                                "file!") % (current_line, filename))
                        normal_conflict_warning_seen = True
                    t = Triangle(p1, p2, p3, n)
                else:
                    # The three points are in a line - or two points are
                    # identical. Usually this is caused by points, that are too
                    # close together. Check the tolerance value in
                    # pycam/Geometry/PointKdtree.py.
                    log.warn("Skipping invalid triangle: %s / %s / %s " \
                            % (p1, p2, p3) + "(maybe the resolution of the " \
                            + "model is too high?)")
                    n, p1, p2, p3 = (None, None, None, None)
                    continue
                n, p1, p2, p3 = (None, None, None, None)
                model.append(t)
                continue
            m = endsolid.match(line)
            if m:
                continue

    log.info("Imported STL model: %d vertices, %d edges, %d triangles" \
            % (vertices, edges, len(model.triangles())))
    vertices = 0
    edges = 0
    kdtree = None

    if not model:
        # no valid items added to the model
        return None
    else:
        return model

Example 29

Project: calibre Source File: preprocess.py
    def __call__(self, html, remove_special_chars=None,
            get_preprocess_html=False):
        if remove_special_chars is not None:
            html = remove_special_chars.sub('', html)
        html = html.replace('\0', '')
        is_pdftohtml = self.is_pdftohtml(html)
        if self.is_baen(html):
            rules = []
        elif self.is_book_designer(html):
            rules = self.BOOK_DESIGNER
        elif is_pdftohtml:
            rules = self.PDFTOHTML
        else:
            rules = []

        start_rules = []
        if is_pdftohtml:
            # Remove non breaking spaces
            start_rules.append((re.compile(ur'\u00a0'), lambda match : ' '))

        if not getattr(self.extra_opts, 'keep_ligatures', False):
            html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)

        user_sr_rules = {}
        # Function for processing search and replace

        def do_search_replace(search_pattern, replace_txt):
            try:
                search_re = re.compile(search_pattern)
                if not replace_txt:
                    replace_txt = ''
                rules.insert(0, (search_re, replace_txt))
                user_sr_rules[(search_re, replace_txt)] = search_pattern
            except Exception as e:
                self.log.error('Failed to parse %r regexp because %s' %
                        (search, as_unicode(e)))

        # search / replace using the sr?_search / sr?_replace options
        for i in range(1, 4):
            search, replace = 'sr%d_search'%i, 'sr%d_replace'%i
            search_pattern = getattr(self.extra_opts, search, '')
            replace_txt = getattr(self.extra_opts, replace, '')
            if search_pattern:
                do_search_replace(search_pattern, replace_txt)

        # multi-search / replace using the search_replace option
        search_replace = getattr(self.extra_opts, 'search_replace', None)
        if search_replace:
            search_replace = json.loads(search_replace)
            for search_pattern, replace_txt in reversed(search_replace):
                do_search_replace(search_pattern, replace_txt)

        end_rules = []
        # delete soft hyphens - moved here so it's executed after header/footer removal
        if is_pdftohtml:
            # unwrap/delete soft hyphens
            end_rules.append((re.compile(u'[­](</p>\s*<p>\s*)+\s*(?=[[a-z\d])'), lambda match: ''))
            # unwrap/delete soft hyphens with formatting
            end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(</p>\s*<p>\s*)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))

        length = -1
        if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
            docanalysis = DocAnalysis('pdf', html)
            length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
            if length:
                # print "The pdf line length returned is " + str(length)
                # unwrap em/en dashes
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),  # noqa
                )

        for rule in self.PREPROCESS + start_rules:
            html = rule[0].sub(rule[1], html)

        if self.regex_wizard_callback is not None:
            self.regex_wizard_callback(self.current_href, html)

        if get_preprocess_html:
            return html

        def dump(raw, where):
            import os
            dp = getattr(self.extra_opts, 'debug_pipeline', None)
            if dp and os.path.exists(dp):
                odir = os.path.join(dp, 'input')
                if os.path.exists(odir):
                    odir = os.path.join(odir, where)
                    if not os.path.exists(odir):
                        os.makedirs(odir)
                    name, i = None, 0
                    while not name or os.path.exists(os.path.join(odir, name)):
                        i += 1
                        name = '%04d.html'%i
                    with open(os.path.join(odir, name), 'wb') as f:
                        f.write(raw.encode('utf-8'))

        # dump(html, 'pre-preprocess')

        for rule in rules + end_rules:
            try:
                html = rule[0].sub(rule[1], html)
            except re.error as e:
                if rule in user_sr_rules:
                    self.log.error(
                        'User supplied search & replace rule: %s -> %s '
                        'failed with error: %s, ignoring.'%(
                            user_sr_rules[rule], rule[1], e))
                else:
                    raise

        if is_pdftohtml and length > -1:
            # Dehyphenate
            dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
            html = dehyphenator(html,'html', length)

        if is_pdftohtml:
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            pdf_markup = HeuristicProcessor(self.extra_opts, None)
            totalwords = 0
            if pdf_markup.get_word_count(html) > 7000:
                html = pdf_markup.markup_chapters(html, totalwords, True)

        # dump(html, 'post-preprocess')

        # Handle broken XHTML w/ SVG (ugh)
        if 'svg:' in html and SVG_NS not in html:
            html = html.replace(
                '<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
        if 'xlink:' in html and XLINK_NS not in html:
            html = html.replace(
                '<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)

        html = XMLDECL_RE.sub('', html)

        if getattr(self.extra_opts, 'asciiize', False):
            from calibre.utils.localization import get_udc
            from calibre.utils.mreplace import MReplace
            unihandecoder = get_udc()
            mr = MReplace(data={u'«':u'<'*3, u'»':u'>'*3})
            html = mr.mreplace(html)
            html = unihandecoder.decode(html)

        if getattr(self.extra_opts, 'enable_heuristics', False):
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            preprocessor = HeuristicProcessor(self.extra_opts, self.log)
            html = preprocessor(html)

        if getattr(self.extra_opts, 'smarten_punctuation', False):
            html = smarten_punctuation(html, self.log)

        try:
            unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
        except AttributeError:
            unsupported_unicode_chars = u''
        if unsupported_unicode_chars:
            from calibre.utils.localization import get_udc
            unihandecoder = get_udc()
            for char in unsupported_unicode_chars:
                asciichar = unihandecoder.decode(char)
                html = html.replace(char, asciichar)

        return html

Example 30

Project: AWS-Lambda-ML-Microservice-Skeleton Source File: f2py2e.py
def run_compile():
    """
    Do it all in one call!
    """
    import tempfile

    i = sys.argv.index('-c')
    del sys.argv[i]

    remove_build_dir = 0
    try:
        i = sys.argv.index('--build-dir')
    except ValueError:
        i = None
    if i is not None:
        build_dir = sys.argv[i + 1]
        del sys.argv[i + 1]
        del sys.argv[i]
    else:
        remove_build_dir = 1
        build_dir = tempfile.mkdtemp()

    _reg1 = re.compile(r'[-][-]link[-]')
    sysinfo_flags = [_m for _m in sys.argv[1:] if _reg1.match(_m)]
    sys.argv = [_m for _m in sys.argv if _m not in sysinfo_flags]
    if sysinfo_flags:
        sysinfo_flags = [f[7:] for f in sysinfo_flags]

    _reg2 = re.compile(
        r'[-][-]((no[-]|)(wrap[-]functions|lower)|debug[-]capi|quiet)|[-]include')
    f2py_flags = [_m for _m in sys.argv[1:] if _reg2.match(_m)]
    sys.argv = [_m for _m in sys.argv if _m not in f2py_flags]
    f2py_flags2 = []
    fl = 0
    for a in sys.argv[1:]:
        if a in ['only:', 'skip:']:
            fl = 1
        elif a == ':':
            fl = 0
        if fl or a == ':':
            f2py_flags2.append(a)
    if f2py_flags2 and f2py_flags2[-1] != ':':
        f2py_flags2.append(':')
    f2py_flags.extend(f2py_flags2)

    sys.argv = [_m for _m in sys.argv if _m not in f2py_flags2]
    _reg3 = re.compile(
        r'[-][-]((f(90)?compiler([-]exec|)|compiler)=|help[-]compiler)')
    flib_flags = [_m for _m in sys.argv[1:] if _reg3.match(_m)]
    sys.argv = [_m for _m in sys.argv if _m not in flib_flags]
    _reg4 = re.compile(
        r'[-][-]((f(77|90)(flags|exec)|opt|arch)=|(debug|noopt|noarch|help[-]fcompiler))')
    fc_flags = [_m for _m in sys.argv[1:] if _reg4.match(_m)]
    sys.argv = [_m for _m in sys.argv if _m not in fc_flags]

    if 1:
        del_list = []
        for s in flib_flags:
            v = '--fcompiler='
            if s[:len(v)] == v:
                from numpy.distutils import fcompiler
                fcompiler.load_all_fcompiler_classes()
                allowed_keys = list(fcompiler.fcompiler_class.keys())
                nv = ov = s[len(v):].lower()
                if ov not in allowed_keys:
                    vmap = {}  # XXX
                    try:
                        nv = vmap[ov]
                    except KeyError:
                        if ov not in vmap.values():
                            print('Unknown vendor: "%s"' % (s[len(v):]))
                    nv = ov
                i = flib_flags.index(s)
                flib_flags[i] = '--fcompiler=' + nv
                continue
        for s in del_list:
            i = flib_flags.index(s)
            del flib_flags[i]
        assert len(flib_flags) <= 2, repr(flib_flags)

    _reg5 = re.compile(r'[-][-](verbose)')
    setup_flags = [_m for _m in sys.argv[1:] if _reg5.match(_m)]
    sys.argv = [_m for _m in sys.argv if _m not in setup_flags]

    if '--quiet' in f2py_flags:
        setup_flags.append('--quiet')

    modulename = 'untitled'
    sources = sys.argv[1:]

    for optname in ['--include_paths', '--include-paths']:
        if optname in sys.argv:
            i = sys.argv.index(optname)
            f2py_flags.extend(sys.argv[i:i + 2])
            del sys.argv[i + 1], sys.argv[i]
            sources = sys.argv[1:]

    if '-m' in sys.argv:
        i = sys.argv.index('-m')
        modulename = sys.argv[i + 1]
        del sys.argv[i + 1], sys.argv[i]
        sources = sys.argv[1:]
    else:
        from numpy.distutils.command.build_src import get_f2py_modulename
        pyf_files, sources = filter_files('', '[.]pyf([.]src|)', sources)
        sources = pyf_files + sources
        for f in pyf_files:
            modulename = get_f2py_modulename(f)
            if modulename:
                break

    extra_objects, sources = filter_files('', '[.](o|a|so)', sources)
    include_dirs, sources = filter_files('-I', '', sources, remove_prefix=1)
    library_dirs, sources = filter_files('-L', '', sources, remove_prefix=1)
    libraries, sources = filter_files('-l', '', sources, remove_prefix=1)
    undef_macros, sources = filter_files('-U', '', sources, remove_prefix=1)
    define_macros, sources = filter_files('-D', '', sources, remove_prefix=1)
    for i in range(len(define_macros)):
        name_value = define_macros[i].split('=', 1)
        if len(name_value) == 1:
            name_value.append(None)
        if len(name_value) == 2:
            define_macros[i] = tuple(name_value)
        else:
            print('Invalid use of -D:', name_value)

    from numpy.distutils.system_info import get_info

    num_info = {}
    if num_info:
        include_dirs.extend(num_info.get('include_dirs', []))

    from numpy.distutils.core import setup, Extension
    ext_args = {'name': modulename, 'sources': sources,
                'include_dirs': include_dirs,
                'library_dirs': library_dirs,
                'libraries': libraries,
                'define_macros': define_macros,
                'undef_macros': undef_macros,
                'extra_objects': extra_objects,
                'f2py_options': f2py_flags,
                }

    if sysinfo_flags:
        from numpy.distutils.misc_util import dict_append
        for n in sysinfo_flags:
            i = get_info(n)
            if not i:
                outmess('No %s resources found in system'
                        ' (try `f2py --help-link`)\n' % (repr(n)))
            dict_append(ext_args, **i)

    ext = Extension(**ext_args)
    sys.argv = [sys.argv[0]] + setup_flags
    sys.argv.extend(['build',
                     '--build-temp', build_dir,
                     '--build-base', build_dir,
                     '--build-platlib', '.'])
    if fc_flags:
        sys.argv.extend(['config_fc'] + fc_flags)
    if flib_flags:
        sys.argv.extend(['build_ext'] + flib_flags)

    setup(ext_modules=[ext])

    if remove_build_dir and os.path.exists(build_dir):
        import shutil
        outmess('Removing build directory %s\n' % (build_dir))
        shutil.rmtree(build_dir)

Example 31

Project: androwarn Source File: core.py
def match_current_instruction(current_instruction, registers_found) :
	"""
		@param current_instruction : the current instruction to be analyzed
		@param registers_found : a dictionary of registers recovered so far
	
		@rtype : the instruction name from the constants above, the local register number and its value, an updated version of the registers_found
	"""
	p_const 				= re.compile('^const(?:\/4|\/16|\/high16|-wide(?:\/16|\/32)|-wide\/high16|)? v([0-9]+), \#\+?(-?[0-9]+(?:\.[0-9]+)?)$')
	p_const_string			= re.compile("^const-string(?:||-jumbo) v([0-9]+), '(.*)'$")
	p_move					= re.compile('^move(?:|\/from16|-wide(?:\/from16|\/16)|-object(?:|\/from16|\/16))? v([0-9]+), (v[0-9]+)$')
	p_move_result			= re.compile('^move(?:-result(?:|-wide|-object)|-exception)? v([0-9]+)$')
	p_aput					= re.compile('^aput(?:-wide|-object|-boolean|-byte|-char|-short|) v([0-9]+), v([0-9]+), v([0-9]+)$')
	p_invoke 				= re.compile('^invoke-(?:static|virtual|direct|super|interface|interface-range|virtual-quick|super-quick) v([0-9]+), (L(?:.*);->.*)$')
	p_invoke_2_registers 	= re.compile('^invoke-(?:static|virtual|direct|super|interface|interface-range|virtual-quick|super-quick) v([0-9]+), v([0-9]+), (L(?:.*);->.*)$')
	p_invoke_no_register	= re.compile('^invoke-(?:static|virtual|direct|super|interface|interface-range|virtual-quick|super-quick) (L(?:.*);->.*)$')
	p_new_instance 			= re.compile('^new-instance v([0-9]+), (L(?:.*);)$')
	
	
	# String concat
	current_instruction = "%s %s" % (current_instruction.get_name(), current_instruction.get_output())
	
	# Returned values init
	instruction_name = ''
	local_register_number = -1
	local_register_value = -1
	
	
	if p_const_string.match(current_instruction) :
		#print p_const_string.match(current_instruction).groups()
		
		instruction_name = CONST_STRING
		
		register_number = p_const_string.match(current_instruction).groups()[0]
		register_value = p_const_string.match(current_instruction).groups()[1]
		
		if not(register_number in registers_found) :
			registers_found[register_number] = register_value
		else :
			old_string = registers_found[register_number]
			new_string = "%s %s" % (str(register_value), str(old_string))
			registers_found[register_number] = new_string
		
		local_register_number = register_number
		local_register_value = register_value


	if p_const.match(current_instruction) :
		#print p_const.match(current_instruction).groups()
		
		instruction_name = CONST
		
		register_number = p_const.match(current_instruction).groups()[0]
		register_value = p_const.match(current_instruction).groups()[1]
		
		if not(register_number in registers_found) :
			registers_found[register_number] = register_value
		
		local_register_number = register_number
		local_register_value = register_value


	if p_move.match(current_instruction) :
		#print p_move.match(current_instruction).groups()
		
		instruction_name = MOVE
		
		register_number = p_move.match(current_instruction).groups()[0]
		register_value = p_move.match(current_instruction).groups()[1]
		
		if not(register_number in registers_found) :
			registers_found[register_number] = register_value				
		
		local_register_number = register_number
		local_register_value = register_value


	if p_move_result.match(current_instruction) :
		#print p_move_result.match(current_instruction).groups()
		
		instruction_name = MOVE_RESULT
		
		register_number = p_move_result.match(current_instruction).groups()[0]
		register_value = ''
		
		if not(register_number in registers_found) :
			registers_found[register_number] = register_value		
		
		local_register_number = register_number
		local_register_value = register_value	
		#print "number returned %s" % local_register_number
		#print "value returned %s" % local_register_value	

	if p_invoke.match(current_instruction) :
		#print p_invoke.match(current_instruction).groups()
		
		instruction_name = INVOKE
		
		register_number = p_invoke.match(current_instruction).groups()[0]
		register_value = p_invoke.match(current_instruction).groups()[1]
		
		if not(register_number in registers_found) :
			registers_found[register_number] = register_value		
		
		local_register_number = register_number
		local_register_value = register_value		
	
	if p_invoke_no_register.match(current_instruction) :
		#print p_invoke.match(current_instruction).groups()
		
		instruction_name = INVOKE_NO_REGISTER
		
		register_number = ''
		register_value = p_invoke_no_register.match(current_instruction).groups()[0]
		
		local_register_number = register_number
		local_register_value = register_value
	
	if p_invoke_2_registers.match(current_instruction) :
		#print p_invoke.match(current_instruction).groups()
		
		instruction_name = INVOKE_NO_REGISTER
		
		register_number = p_invoke_2_registers.match(current_instruction).groups()[0]
		register_value = p_invoke_2_registers.match(current_instruction).groups()[1]
		
		local_register_number = register_number
		local_register_value = register_value		
		
	if p_new_instance.match(current_instruction) :
		#print p_new_instance.match(current_instruction).groups()
		
		instruction_name = NEW_INSTANCE
		
		register_number = p_new_instance.match(current_instruction).groups()[0]
		register_value = p_new_instance.match(current_instruction).groups()[1]
		
		if not(register_number in registers_found) :
			registers_found[register_number] = register_value		
		
		local_register_number = register_number
		local_register_value = register_value
	
	if p_aput.match(current_instruction) :
		#print p_aput.match(current_instruction).groups()
		
		instruction_name = APUT
		
		register_object_reference = p_aput.match(current_instruction).groups()[0]
		register_array_reference = p_aput.match(current_instruction).groups()[1]
		register_element_index = p_aput.match(current_instruction).groups()[2]

		local_register_number = register_object_reference 
		local_register_value =  register_array_reference
		
	
	return instruction_name, local_register_number, local_register_value, registers_found	

Example 32

Project: feaTools Source File: parser.py
def _parseUnknown(writer, text):
    text = text.strip()
    ## extract all table names
    tableNames = table_findAll_RE.findall(text)
    for precedingMark, tableName in tableNames:
        # a regular expression specific to this lookup must
        # be created so that nested lookups are safely handled
        thisTableContentRE = list(tableContentRE)
        thisTableContentRE.insert(2, tableName)
        thisTableContentRE.insert(6, tableName)
        thisTableContentRE = re.compile("".join(thisTableContentRE))
        found = thisTableContentRE.search(text)
        tableText = found.group(2)
        start, end = found.span()
        precedingText = text[:start]
        if precedingMark:
            precedingText += precedingMark
        _parseUnknown(writer, precedingText)
        _parseTable(writer, tableName, tableText)
        text = text[end:]
    ## extract all feature names
    featureTags = feature_findAll_RE.findall(text)
    for precedingMark, featureTag in featureTags:
        # a regular expression specific to this lookup must
        # be created so that nested lookups are safely handled
        thisFeatureContentRE = list(featureContentRE)
        thisFeatureContentRE.insert(2, featureTag)
        thisFeatureContentRE.insert(6, featureTag)
        thisFeatureContentRE = re.compile("".join(thisFeatureContentRE))
        found = thisFeatureContentRE.search(text)
        featureText = found.group(2)
        start, end = found.span()
        precedingText = text[:start]
        if precedingMark:
            precedingText += precedingMark
        _parseUnknown(writer, precedingText)
        _parseFeature(writer, featureTag, featureText)
        text = text[end:]
    ## extract all lookup names
    lookupNames = lookup_findAll_RE.findall(text)
    for precedingMark, lookupName in lookupNames:
        # a regular expression specific to this lookup must
        # be created so that nested lookups are safely handled
        thisLookupContentRE = list(lookupContentRE)
        thisLookupContentRE.insert(2, lookupName)
        thisLookupContentRE.insert(6, lookupName)
        thisLookupContentRE = re.compile("".join(thisLookupContentRE))
        found = thisLookupContentRE.search(text)
        lookupText = found.group(2)
        start, end = found.span()
        precedingText = text[:start]
        if precedingMark:
            precedingText += precedingMark
        _parseUnknown(writer, precedingText)
        _parseLookup(writer, lookupName, lookupText)
        text = text[end:]
    ## extract all class data
    classes = classDefinitionRE.findall(text)
    for precedingMark, className, classContent in classes:
        text = _executeSimpleSlice(precedingMark, text, classDefinitionRE, writer)
        className = "@" + className
        _parseClass(writer, className, classContent)
    ## extract substitutions
    # sub type 1 and 4
    subType1s = subType1And2And4RE.findall(text)
    for precedingMark, target, replacement in subType1s:
        text = _executeSimpleSlice(precedingMark, text, subType1And2And4RE, writer)
        _parseSubType1And2And4(writer, target, replacement)
    # sub type 3
    subType3s = subType3RE.findall(text)
    for precedingMark, target, replacement in subType3s:
        text = _executeSimpleSlice(precedingMark, text, subType3RE, writer)
        _parseSubType3(writer, target, replacement)
    # sub type 6
    subType6s = subType6RE.findall(text)
    for precedingMark, target, replacement in subType6s:
        text = _executeSimpleSlice(precedingMark, text, subType6RE, writer)
        _parseSubType6(writer, target, replacement)
    # ignore sub type 6
    ignoreSubType6s = ignoreSubType6RE.findall(text)
    for precedingMark, target in ignoreSubType6s:
        text = _executeSimpleSlice(precedingMark, text, ignoreSubType6RE, writer)
        _parseSubType6(writer, target, replacement=None, ignore=True)
    ## extract positions
    # pos type 1
    posType1s = posType1RE.findall(text)
    for precedingMark, target, value in posType1s:
        text = _executeSimpleSlice(precedingMark, text, posType1RE, writer)
        _parsePosType1(writer, target, value)
    # pos type 2
    posType2s = posType2RE.findall(text)
    for precedingMark, enumTag, posTag, targetAndValue in posType2s:
        text = _executeSimpleSlice(precedingMark, text, posType2RE, writer)
        _parsePosType2(writer, targetAndValue, needEnum=enumTag.strip())
    ## extract other data
    # XXX look at FDK spec. sometimes a language tag of dflt will be passed
    # it should be handled differently than the other tags.
    # languagesystem
    languagesystems = languagesystemRE.findall(text)
    for precedingMark, scriptTag, languageTag in languagesystems:
        text = _executeSimpleSlice(precedingMark, text, languagesystemRE, writer)
        writer.languageSystem(languageTag, scriptTag)
    # script
    scripts = scriptRE.findall(text)
    for precedingMark, scriptTag in scripts:
        text = _executeSimpleSlice(precedingMark, text, scriptRE, writer)
        writer.script(scriptTag)
    # language
    languages = languageRE.findall(text)
    for precedingMark, languageTag, otherKeyword in languages:
        text = _executeSimpleSlice(precedingMark, text, languageRE, writer)
        if not otherKeyword or otherKeyword == "include_dflt":
            writer.language(languageTag)
        elif otherKeyword == "exclude_dflt":
            writer.language(languageTag, includeDefault=False)
    # include
    inclusions = includeRE.findall(text)
    for precedingMark, path in inclusions:
        text = _executeSimpleSlice(precedingMark, text, includeRE, writer)
        writer.include(path)
    # feature reference
    featureReferences = featureReferenceRE.findall(text)
    for precedingMark, featureTag in featureReferences:
        text = _executeSimpleSlice(precedingMark, text, featureReferenceRE, writer)
        writer.featureReference(featureTag)
    # lookup reference
    lookupReferences = lookupReferenceRE.findall(text)
    for precedingMark, lookupName in lookupReferences:
        text = _executeSimpleSlice(precedingMark, text, lookupReferenceRE, writer)
        writer.lookupReference(lookupName)
    # lookupflag
    lookupflags = lookupflagRE.findall(text)
    for precedingMark, lookupflagValues in lookupflags:
        text = _executeSimpleSlice(precedingMark, text, lookupflagRE, writer)
        _parseLookupFlag(writer, lookupflagValues)
    # subtable break
    subtables = subtableRE.findall(text)
    for precedingMark in subtables:
        text = _executeSimpleSlice(precedingMark, text, subtableRE, writer)
        writer.subtableBreak()
    ## extract all featureNames
    featureNames = featureNamesRE.findall(text)
    for precedingMark in featureNames:
        text = _executeSimpleSlice(precedingMark, text, featureNamesRE, writer)
    # empty instructions
    terminators = terminatorRE.findall(text)
    for terminator in terminators:
        text = _executeSimpleSlice(None, text, terminatorRE, writer)
        writer.rawText(terminator)
    text = text.strip()
    if text:
        raise FeaToolsParserSyntaxError("Invalid Syntax: %s" % text)

Example 33

Project: pyCAF Source File: analyze_packages.py
Function: analyze
    def analyze(self):
        """
        Core function which is called in the run
        Do the effective work
        """
        start_time = time.time()

        path = self.config.ressources_path

        if path is None:
            path = "/tmp/"
        
        # Create the directory containig data if it does not exist
        if not os.path.exists(path):
            os.makedirs(path)
        
        # Check packages algorithm
        # Step 1 : check packages in updates packages
        # Ok -> uptodate    KO -> undetermine (uptodate or obsolete or unchecked)
        # Step 2 : check name with updates packages
        # Ok -> obsolete    KO -> undetermine (uptodate or obsolete or unchecked)
        # Step 3 : check packages with centos release
        # Ok -> uptodate    KO -> undetermine (obsolete or unchecked)
        # Step 4 : check name with centos release
        # Ok -> obsolete    KO -> unchecked
        
        # List of package object to analyze
        packages_list_to_analyze = self.packages.dict.values()
        
        # Temporary lists to manage packages
        packages_tmp_uptodate = []
        packages_tmp_obsolete = []
        packages_tmp_unchecked = []
            
        # Download packages list if necessary 
        # "http://mirrors.atosworldline.com/public/centos/filelist.gz"
        url = self.config_server.centos["packages_url_stable_version"]
        print "url : " + url
        file_name = "centos_packages.gz"
        
        pkg_string_header = "./" + self.server.osversion + "/os/" + self.server.osarchitecture + "/CentOS/"
        pkg_string_header_update = "./" + self.server.osversion + "/updates/" + self.server.osarchitecture + "/RPMS/"        
        pkg_release_expression = pkg_string_header + "(?P<pkg_release>[A-Za-z0-9:\-~.+_]+)"
        pkg_update_expression = pkg_string_header_update + "(?P<pkg_update>[A-Za-z0-9:\-~.+_]+)" 
        #pkg_obsolete_expression = "(?P<pkg_name_start>[A-Za-z0-9:+~\-\._]+)(?P<pkg_version>(\-))"
        pkg_obsolete_expression = "(?P<pkg_name_start>[A-Za-z0-9:+~\-\._]+)"
        
        pkg_release_reg = re.compile(pkg_release_expression)
        pkg_update_reg = re.compile(pkg_update_expression)
        pkg_obsolete_reg = re.compile(pkg_obsolete_expression)
        
        # List of release packages
        release_list = []
        # List of updates packages
        updates_list = []
        
        if self.server.osversion in self.config_server.centos["stable_versions"]:
            self._lock.acquire()
            download_boolean = tools.downloadFile(url, file_name, self.config, path)
            self._lock.release()
            if not download_boolean:
                self._logger.error("Download file error")
                return False
                
            # Read the downloaded file containing packages list 
            file_path = path + file_name
            f_1 = gzip.open(file_path,'rb')
            rawtext = f_1.read()
            f_1.close()
            
            # Split text in lines
            releases = rawtext.split('\n')
            
            # Read the lines of packages and fill the release and update pacakges lists
            for l_1 in releases:
                if pkg_release_reg.match(l_1) is not None:
                    # Fill the release list
                    result_re = pkg_release_reg.match(l_1)
                    pkg_name = result_re.group('pkg_release')
                    release_list.append(pkg_name)
                elif pkg_update_reg.match(l_1) is not None:
                    # Fill the updates list
                    result_re = pkg_update_reg.match(l_1)
                    pkg_name = result_re.group('pkg_update')
                    updates_list.append(pkg_name)
        else:
            # url_os = "http://vault.centos.org/" + str(self.server.osversion) + "/os/" + str(self.server.osarchitecture) + "/CentOS/"
            url_os = "http://vault.centos.org/" + str(self.server.osversion) + "/os/SRPMS/"
            url_updates = "http://vault.centos.org/" + str(self.server.osversion) + "/updates/SRPMS/" # + str(self.server.osarchitecture) + "/RPMS/"
            pattern = r"(?P<var1>.*)(?P<var2><a href=\")(?P<pkg>.+)(?P<var3>\.src.rpm\">)"
            
            reg = re.compile(pattern)
            
            lines_os = urllib2.urlopen(url_os).read().split('\n')
            lines_updates = urllib2.urlopen(url_updates).read().split('\n')
            
            for line in lines_os:
                if reg.match(line):
                    result_re = reg.match(line)
                    # release_list.append(result_re.group('pkg') + ".rpm")
                    release_list.append(result_re.group('pkg'))
                    
            for line in lines_updates:
                if reg.match(line):
                    result_re = reg.match(line)
                    # updates_list.append(result_re.group('pkg') + ".rpm")
                    updates_list.append(result_re.group('pkg'))
        # --------------------------------------------
        # ------------------ Step 1 ------------------
        # --------------------------------------------
    
        # tmp_list wich will be the future packages_list_to_analyze after the loop
        tmp_list = []
        for pkg in packages_list_to_analyze:
            # Add the suffix .osarchitecure.rpm
            # ex : package.x86_64.rpm
            pkg_with_arch = pkg.name + "." + self.server.osarchitecture + ".rpm"

            if pkg_with_arch in updates_list or pkg_with_arch.replace(self.server.osarchitecture + ".rpm", "noarch.rpm") in updates_list:
                packages_tmp_uptodate.append(sf.Package(pkg.name, pkg.version))

            else:
                tmp_list.append(pkg)
        packages_list_to_analyze = tmp_list
        
        # --------------------------------------------
        # ------------------ Step 2 ------------------
        # --------------------------------------------
        
        # Dictionnary wich contain the start of the pacakge and the full line
        updates_dict_start={}
        for pkg_update in updates_list:
            if pkg_obsolete_reg.match(pkg_update) is not None:
                result_re = pkg_obsolete_reg.match(pkg_update)
                updates_dict_start[result_re.group('pkg_name_start')] = pkg_update
            else:
                self._logger.error("Regular expression parsing error : step 2 - a")
        
        tmp_list = []
        for pkg in packages_list_to_analyze:
            pkg_found = False    
            if pkg_obsolete_reg.match(pkg.name) is not None:
                result_re = pkg_obsolete_reg.match(pkg.name)
                pkg_name_start = result_re.group('pkg_name_start')
                
                for pkg_update_start in updates_dict_start.keys():
                    if pkg_name_start == pkg_update_start:
                        pkg_found = True
                        packages_tmp_obsolete.append(sf.Package(pkg.name, pkg.version, updates_dict_start[pkg_update_start]))
            else:
                self._logger.error("Regular expression parsing error : step 2 - b")
                
            if not pkg_found:
                tmp_list.append(pkg)

        packages_list_to_analyze = tmp_list
        
        # --------------------------------------------
        # ------------------ Step 3 ------------------
        # --------------------------------------------            
        
        tmp_list = []
        for pkg in packages_list_to_analyze:
            # Add the suffix .osarchitecure.rpm
            # ex : package.x86_64.rpm
            pkg_with_arch = pkg.name + "." + self.server.osarchitecture + ".rpm"

            if pkg_with_arch in release_list or pkg_with_arch.replace(self.server.osarchitecture + ".rpm", "noarch.rpm") in release_list:
                packages_tmp_uptodate.append(sf.Package(pkg.name, pkg.version))

            else:
                tmp_list.append(pkg)
        packages_list_to_analyze = tmp_list
        
        # --------------------------------------------
        # ------------------ Step 4 ------------------
        # --------------------------------------------
        
        release_dict_start={}
        print len(release_list)
        for pkg_release in release_list:
            if pkg_obsolete_reg.match(pkg_release) is not None:
                result_re = pkg_obsolete_reg.match(pkg_release)
                release_dict_start[result_re.group('pkg_name_start')] = pkg_release 
            else:
                self._logger.error("Regular expression parsing error : step 4 - a")
        
        for pkg in packages_list_to_analyze:
            # print "Package  :: " + pkg.name
            pkg_found = False
            if pkg_obsolete_reg.match(pkg.name) is not None:
                result_re = pkg_obsolete_reg.match(pkg.name)
                
                pkg_name_start = result_re.group('pkg_name_start')
                # print "info :: " + pkg_name_start
                if pkg.name == pkg_name_start:
                    pkg_found = True
                    packages_tmp_uptodate.append(sf.Package(pkg.name, pkg.version, ""))
                    print " Package installé : " + pkg.name + "// Package attendu : " + pkg_name_start
                else:
                    print "Package installé : " + pkg.name
                    print "Package attendu  :" + pkg_name_start
                #for pkg_release_start in release_dict_start.keys():
                #    if pkg_name_start == pkg_release_start:
                #        pkg_found = True
                #        packages_tmp_obsolete.append(sf.Package(pkg.name, pkg.version, release_dict_start[pkg_release_start]))
            else:
                self._logger.error("Regular expression parsing error : step 4 - b")
                print "plop"
            if not pkg_found:
                packages_tmp_unchecked.append(sf.Package(pkg.name, pkg.version))

            
#
        # Stored results and sort the list in name alphabetic order 
        self.server.packages_uptodate.push_package_list(packages_tmp_uptodate)
        self.server.packages_obsolete.push_package_list(packages_tmp_obsolete)
        self.server.packages_unchecked.push_package_list(packages_tmp_unchecked)
#    
        # Fill the reporting
        self.server.nb_packages = self.packages.get_number()
        self.server.nb_packages_uptodate = self.server.packages_uptodate.get_number()
        self.server.nb_packages_obsolete = self.server.packages_obsolete.get_number()
        self.server.nb_packages_unchecked = self.server.packages_unchecked.get_number()
        
        self._logger.debug("Nb Packages : "+str(self.server.nb_packages))
        self._logger.debug("Nb up to date : "+str(self.server.nb_packages_uptodate))
        self._logger.debug("Nb Packages obsolete : "+str(self.server.nb_packages_obsolete))
        self._logger.debug("Nb Packages unchecked : "+str(self.server.nb_packages_unchecked) )
    
        
#        
        end_time = time.time()
        self._logger.info("Elapsed time: "+str((end_time - start_time) * 1000)+" msecs")
        self._logger.info("CentOS packages successfully analyzed !")

        
            
        return True

Example 34

Project: dolo Source File: modfile.py
def parse_dynare_text(txt,add_model=True,full_output=False, debug=False):
    '''
    Imports the content of a modfile into the current interpreter scope
    '''
    # here we call "instruction group", a string finishing by a semicolon
    # an "instruction group" can have several lines
    # a line can be
    # - a comment //...
    # - an old-style tag //$...
    # - a new-style tag [key1='value1',..]
    # - macro-instruction @#...
    # A Modfile contains several blocks (in this order) :
    # - an initblock defining variables, exovariables, parameters, initialization
    #   inside the initblock the order of declaration doesn't matter
    # - a model block with two special lines (model; end;)
    # - optional blocks (like endval, shocks)
    #    seperated by free matlab instructions in any order;
    # - all other instructions are ignored

    otxt = txt
    otxt = otxt.replace("\r\n","\n")
    otxt = otxt.replace("^","**")

    # first, we remove end-of-line comments : they are definitely lost
    regex = re.compile("(.+)//[^#](.*)")
    def remove_end_comment(line):
        res = regex.search(line)
        if res:
            l = res.groups(1)[0]
            return(l)
        else:
            return line
    txt = str.join("\n",map(remove_end_comment,otxt.split("\n")))

    name_regex = re.compile("//\s*fname\s*=\s*'(.*)'")
    m = name_regex.search(txt)
    if m:
        fname = m.group(1)
    else:
        fname = None


    instruction_groups = [Instruction_group(s) for s in txt.split(";")]

    instructions = [ig.instruction for ig in instruction_groups]

    if debug:
        print('Elementary instructions')
        for i in instruction_groups:
            print(i)

    try:
        imodel = [re.compile('model(\(.*\)|)').match(e) is not None for e in instructions]
        imodel = imodel.index(True)
        #imodel = instructions.index("model") #this doesn't work for "MODEL"
        iend = instructions.index("end")
        model_block = instruction_groups[imodel:(iend+1)]
        init_block = instruction_groups[0:imodel]
    except:
        raise Exception('Model block could not be found.')

    next_instructions = instructions[(iend+1):]
    next_instruction_groups = instruction_groups[(iend+1):]

    if 'initval' in next_instructions:
        iinitval = next_instructions.index('initval')
        iend = next_instructions.index('end',iinitval)
        matlab_block_1 = next_instruction_groups[0:iinitval]
        initval_block = next_instruction_groups[iinitval:(iend+1)]
        next_instruction_groups = next_instruction_groups[(iend+1):]
        next_instructions = next_instructions[(iend+1):]
    else:
        initval_block = None
        matlab_block_1 = None

    if 'endval' in next_instructions:
        iendval = next_instructions.index('endval')
        iend = next_instructions.index('end',iendval)
        matlab_block_2 = next_instruction_groups[0:iendval]
        endval_block = next_instruction_groups[iendval:(iend+1)]
        next_instruction_groups = next_instruction_groups[(iend+1):]
        next_instructions = next_instructions[(iend+1):]
    else:
        endval_block = None
        matlab_block_2 = None

    # TODO : currently shocks block needs to follow initval, this restriction should be removed
    if 'shocks' in next_instructions:
        ishocks = next_instructions.index('shocks')
        iend = next_instructions.index('end',ishocks)
        matlab_block_3 = next_instruction_groups[0:ishocks]
        shocks_block = next_instruction_groups[ishocks:(iend+1)]
        next_instruction_groups = next_instruction_groups[(iend+1):]
        next_instructions = next_instructions[(iend+1):]
    else:
        shocks_block = None
        matlab_block_3 = None

    try:
        init_regex = re.compile("(parameters |var |varexo |)(.*)")
        var_names = []
        varexo_names = []
        parameters_names = []
        declarations = {}
        for ig in init_block:
            if ig.instruction != '':
                m = init_regex.match(ig.instruction)
                if not m:
                    raise Exception("Unexpected instruction in init block : " + str(ig.instruction))
                if m.group(1) == '':
                    [lhs,rhs] = m.group(2).split("=")
                    lhs = lhs.strip()
                    rhs = rhs.strip()
                    declarations[lhs] = rhs
                else:
                    arg = m.group(2).replace(","," ")
                    names = [vn.strip() for vn in arg.split()]
                    if m.group(1).strip() == 'var':
                        dest = var_names
                    elif m.group(1).strip() == 'varexo':
                        dest = varexo_names
                    elif m.group(1).strip() == 'parameters':
                        dest = parameters_names
                    for n in names:
                        if not n in dest:
                            dest.append(n)
                        else:
                            raise Exception("symbol %s has already been defined".format(n))
    except Exception as e:
        raise Exception('Init block could not be read : ' + str(e) )
    # the following instruction set the variables "variables","shocks","parameters"


    variables = []
    for vn in var_names:
        v = Variable(vn)
        variables.append(v)

    shocks = []
    for vn in varexo_names:
        s = Shock(vn)
        shocks.append(s)

    parameters = []
    for vn in parameters_names:
        p = Parameter(vn)
        parameters.append(p)

    parse_dict = dict()
    for v in variables + shocks + parameters:
        parse_dict[v.name] = v

    special_symbols = [sympy.exp,sympy.log,sympy.sin,sympy.cos, sympy.atan, sympy.tan]
    for s in special_symbols:
        parse_dict[str(s)] = s
    parse_dict['sqrt'] = sympy.sqrt


    # Read parameters values
    parameters_values = {}
    for p in declarations:
        try:
            rhs = eval(declarations[p], parse_dict)
        except Exception as e:
            Exception("Impossible to evaluate parameter value : " + str(e))
        try:
            lhs = eval(p,parse_dict)
        except Exception as e:
            # here we could declare p
            raise e
        parameters_values[lhs] = rhs


    # Now we read the model block
    model_tags = model_block[0].tags
    equations = []
    for ig in model_block[1:-1]:
        if ig.instruction != '':
            teq = ig.instruction.replace('^',"**")
            if '=' in teq:
                teqlhs,teqrhs = teq.split("=")
            else:
                teqlhs = teq
                teqrhs = '0'
            eqlhs = eval(teqlhs, parse_dict)
            eqrhs = eval(teqrhs, parse_dict)
            eq = Equation(eqlhs,eqrhs)
            eq.tags.update(ig.tags)
    #        if eq.tags.has_key('name'):
    #            eq.tags[] = ig.tags['name']
            equations.append(eq)

    # Now we read the initval block
    init_values = {}
    if initval_block != None:
        for ig in initval_block[1:-1]:
            if len(ig.instruction.strip()) >0:
                try:
                    [lhs,rhs] = ig.instruction.split("=")
                except Exception as e:
                    print(ig.instruction)
                    raise e
                init_values[eval(lhs,parse_dict)] = eval(rhs,parse_dict)

    # Now we read the endval block
    # I don't really care about the endval block !

    end_values = {}
    if endval_block != None:
        for ig in endval_block[1:-1]:
            [lhs,rhs] = ig.instruction.split("=")
            end_values[eval(lhs)] = eval(rhs)

    # Now we read the shocks block
    covariances = None
    if shocks_block != None:
        covariances = sympy.zeros(len(shocks))
        regex1 = re.compile("var (.*?),(.*?)=(.*)|var (.*?)=(.*)")
        for ig in shocks_block[1:-1]:
            m = regex1.match(ig.instruction)
            if not m:
                raise Exception("unrecognized instruction in block shocks : " + str(ig.instruction))
            if m.group(1) != None:
                varname1 = m.group(1).strip()
                varname2 = m.group(2).strip()
                value = m.group(3).strip().replace("^","**")
            elif m.group(4) != None:
                varname1 = m.group(4).strip()
                varname2 = varname1
                value = m.group(5).strip().replace("^","**")
            i = varexo_names.index(varname1)
            j = varexo_names.index(varname2)
            covariances[i,j] = eval(value,parse_dict)
            covariances[j,i] = eval(value,parse_dict)

    calibration = {}
    calibration.update(parameters_values)
    calibration.update(init_values)
    symbols = {'variables': variables, 'shocks': shocks, 'parameters': parameters}

    from trash.dolo.symbolic.model import SModel
    model = SModel({'dynare_block': equations}, symbols, calibration, covariances)
    return model

Example 35

Project: carml Source File: downloadbundle.py
    @defer.inlineCallbacks
    def run(self, options, mainoptions, connection):
        # NOTE the middle cert changed on April 10 or thereabouts;
        # still need to confirm this is legitimate?
        chain = [ssl.Certificate.loadPEM(pkg_resources.resource_string('carml', 'keys/torproject.pem')),
                 ssl.Certificate.loadPEM(pkg_resources.resource_string('carml', 'keys/digicert-sha2.pem')),
                 ssl.Certificate.loadPEM(pkg_resources.resource_string('carml', 'keys/digicert-root-ca.pem')),
                 ]
        cf = VerifyCertChainContextFactory(chain)

        error_wrapper = None
        if options['use-clearnet']:
            print(util.colors.red('WARNING') + ': downloading over plain Internet (not via Tor).')
            agent = Agent(reactor, contextFactory=cf)

        else:
            try:
                import txsocksx.http
                conn = "tcp:127.0.0.1:9050"
                tor_ep = endpoints.clientFromString(reactor, conn)
                agent = txsocksx.http.SOCKS5Agent(reactor,
                                                  proxyEndpoint=tor_ep,
                                                  contextFactory=cf)

                def nicer_error(fail):
                    if fail.trap(error.ConnectError):
                        m = fail.getErrorMessage()
                        raise RuntimeError("Couldn't contact Tor on SOCKS5 (via \"%s\"): %s" % (conn, m))
                    return fail
                error_wrapper = nicer_error
            except ImportError:
                raise RuntimeError('You need "txsocksx" installed to download via Tor.')

        uri = 'https://www.torproject.org/projects/torbrowser/RecommendedTBBVersions'
        data = StringIO()
        print('Getting recommended versions from "%s".' % uri)
        d = download(agent, uri, data)

        def ssl_errors(fail):
            if hasattr(fail.value, 'reasons'):
                msg = ''
                for r in fail.value.reasons:
                    msg += str(r.value.args[-1])
                raise RuntimeError(msg)
            return fail
        d.addErrback(ssl_errors)
        if error_wrapper is not None:
            d.addErrback(error_wrapper)
        yield d

        # valid platforms from check.torproject.org can be one of:
        # 'Linux', 'MacOS' or 'Windows'
        plat = platform.system().lower()
        arch = platform.uname()[-2]
        plat_to_tor = dict(linux='Linux', darwin='MacOS', windows='Win')
        if plat not in plat_to_tor:
            print('Unknown platform "%s".' % plat)
            raise RuntimeError('Unknown platform "%s".' % plat)
        tor_plat = plat_to_tor[plat]

        try:
            versions = json.loads(data.getvalue())

        except:
            print('Error getting versions; invalid JSON:')
            print(data.getvalue())
            raise RuntimeError('Invalid JSON:\n%s' % data.getvalue())

        alpha_re = re.compile(r'[0-9]*.[0-9]*a[0-9]-(Windows|MacOS|Linux)')
        beta_re = re.compile(r'[0-9]*.[0-9]*b[0-9]-(Windows|MacOS|Linux)')
        hardened_re = re.compile(r'(.*)-hardened-(.*)')

        print(util.wrap(', '.join(versions), 60, '  '))
        alphas = filter(lambda x: alpha_re.match(x), versions)
        betas = filter(lambda x: beta_re.match(x), versions)
        # the 'hardened' browser names don't follow the pattern of the
        # others; for now, just ignoring them... (XXX FIXME)
        hardened = filter(lambda x: hardened_re.match(x), versions)
        others = set(versions).difference(alphas, betas, hardened)
        if options['alpha']:
            versions = alphas
        elif options['beta']:
            versions = betas
        else:
            versions = others

        if alphas:
            print(util.colors.yellow("Note: there are alpha versions available; use --alpha to download."))
        if betas:
            print(util.colors.yellow("Note: there are beta versions available; use --beta to download."))
        if hardened:
            print(util.colors.yellow("Note: there are hardened versions available but we don't support downloading them yet."))

        target_version = None
        for v in versions:
            if v.endswith(tor_plat):
                target_version = v[:v.rfind('-')]

        if target_version is None:
            print("Can't find a version to download")
            print("          My platform is: %s (%s)" % (plat, plat_to_tor[plat]))
            print("  Potential versions are: %s" % ', '.join(versions))
            if options['beta']:
                print("(Try without --beta)")
            elif options['alpha']:
                print("(Try without --alpha)")
            raise RuntimeError("Nothing to download found.")

        # download the signature, then browser-bundle (if they don't
        # already exist locally).
        sig_fname, dist_fname = get_download_urls(plat, arch, target_version)
        for to_download in [sig_fname, dist_fname]:
            uri = bytes('https://www.torproject.org/dist/torbrowser/%s/%s' % (target_version, to_download))
            if os.path.exists(to_download):
                print(util.colors.red(to_download) + ': already exists, so not downloading.')
            else:
                def cleanup(failure, fname):
                    print('removing "%s"...' % fname)
                    os.unlink(fname)
                    return failure

                f = open(to_download, 'w')
                print('Downloading "%s".' % to_download)
                d = download(agent, uri, f)
                d.addErrback(cleanup, to_download)
                yield d
                f.close()

        # ensure the signature matches
        if verify_signature(sig_fname, system_gpg=bool(options['system-keychain'])):
            print(util.colors.green("Signature is good."))

            if options['no-extract']:
                print("Download and signature check of the Tor Browser Bundle")
                print("has SUCCEEDED.\n")
                print("It is here: %s\n" % os.path.realpath(dist_fname))
                extraction_instructions(dist_fname)
                print("and then:")

            else:
                try:
                    extract_7zip(dist_fname)
                    print("Tor Browser Bundle downloaded and extracted.")

                except ImportError:
                    msg = 'You need "backports.lzma" installed to do 7zip extraction.'
                    print(util.colors.red('Error: ') + msg, isError=True)
                    extraction_instructions(dist_fname)

                print("To run:")

            # running instructions
            lang = dist_fname[-12:-7]
            tbb_path = './tor-browser_%s/Browser/start-tor-browser' % lang
            if options['no-launch']:
                print("To run: %s" % tbb_path)
            else:
                print("running: %s" % tbb_path)
                os.execl(tbb_path, tbb_path)

        else:
            print(util.colors.bold('Deleting tarball; signature verification failed.'))
            os.unlink(dist_fname)
            print('...however signature file is being kept for reference (%s).' % sig_fname)

Example 36

Project: pyxform Source File: xls2json.py
def workbook_to_json(
        workbook_dict, form_name=None,
        default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    """
    # ensure required headers are present
    if warnings is None:
        warnings = []
    is_valid = False
    for row in workbook_dict.get('survey', []):
        is_valid = 'type' in row
        if is_valid:
            break
    if not is_valid:
        raise PyXFormError(
            u"The survey sheet is either empty or missing important "
            u"column headers.")

    row_format_string = '[row : %s]'

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them
    # or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace
    # syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects
    # (settings, choices, survey_sheet):
    # ########## Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []),
        aliases.settings_header, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(
        constants.DEFAULT_LANGUAGE, default_language)

    # add_none_option is a boolean that when true,
    # indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.SMS_KEYWORD: sms_keyword,
        constants.DEFAULT_LANGUAGE: default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause
        # problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    # ########## Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated,
    # but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(
        columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(
        choices_sheet, aliases.list_header, use_double_colons,
        default_language)
    # ########## Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists
    # Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if 'name' not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                warnings.append(
                    "On the choices sheet there is a option with no label. " +
                    info)
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
            option_keys = list(option.keys())
            for headername in option_keys:
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnedabout.add(headername)
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" +
                                        headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    # ########## Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError(
            "You must have a sheet named (case-sensitive): " +
            constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(
        survey_sheet, aliases.survey_header,
        use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)

    osm_sheet = workbook_dict.get(constants.OSM, [])
    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
    # #################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
    #                 included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag
    # this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(aliases.control.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
        r")) (?P<list_name>\S+)" +
        "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>(" +
        '|'.join(aliases.cascading.keys()) +
        r")) (?P<cascading_level>\S+)?$")
    osm_regexp = re.compile(
        r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) +
        ')) (?P<list_name>\S+)')

    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        # Disabled should probably be first
        # so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                row_format_string % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            # then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                warnings.append(
                    row_format_string % row_number +
                    " Row without name, text, or label is being skipped:\n" +
                    str(row))
                continue
            raise PyXFormError(
                row_format_string % row_number +
                " Question with no type.\n" + str(row))

        if question_type == 'calculate':
            calculation = row.get('bind', {}).get('calculate')
            if not calculation:
                raise PyXFormError(
                    row_format_string % row_number + " Missing calculation.")

        # Check if the question is actually a setting specified
        # on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement
        # (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = aliases.control[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) +
                        ", Control type: " + str(control_type))
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if constants.NAME not in row:
            if row['type'] == 'note':
                # autogenerate names for notes without them
                row['name'] = "generated_note_name_" + str(row_number)
            # elif 'group' in row['type'].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
            else:
                raise PyXFormError(row_format_string % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = row_format_string % row_number
            error_message += " Invalid question name [" + \
                             question_name.encode('utf-8') + "] "
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers," \
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
            warnings.append(
                row_format_string % row_number +
                " Question has no label: " + str(row))

        # Try to parse question as begin control statement
        # (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type,
                # and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array
                # (so following questions are nested under it)
                # until an end command is encountered.
                control_type = aliases.control[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(
                            row_format_string % row_number +
                            " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            row_format_string % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Generate a new node for the jr:count column so
                # xpath expressions can be used.
                repeat_count_expression = new_json_dict.get(
                    'control', {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict['name'] + "_count"
                    parent_children_array.append({
                        "name": generated_node_name,
                        "bind": {
                            "readonly": "true()",
                            "calculate": repeat_count_expression,
                        },
                        "type": "calculate",
                    })
                    new_json_dict['control']['jr:count'] = \
                        "${" + generated_node_name + "}"

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name":
                                "generated_table_list_label_" + str(row_number)
                        }
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                                new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                                new_json_dict['hint']
                            del new_json_dict['hint']
                        child_list.append(generated_label_element)
                if 'intent' in new_json_dict:
                    new_json_dict['control'] = \
                        new_json_dict.get(u"control", {})
                    new_json_dict['control']['intent'] = \
                        new_json_dict['intent']

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Cascading select needs a name.")
                # cascading_json = get_cascading_json(
                # cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or \
                   'questions' not in cascading_choices[0]:
                    raise PyXFormError(
                        "Found a cascading_select " + cascading_level +
                        ", but could not find " + cascading_level +
                        "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if 'bind' in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq['bind'] = row['bind']

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(
                                    lambda x: replace_prefix(x, prefix), v)
                        return d

                    parent_children_array.append(
                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.select[parse_dict["select_command"]]
                if select_type == 'select one external' \
                        and 'choice_filter' not in row:
                    warnings.append(
                        row_format_string % row_number +
                        u" select one external is only meant for"
                        u" filtered selects.")
                    select_type = aliases.select['select_one']
                list_name = parse_dict["list_name"]
                list_file_name, file_extension = os.path.splitext(list_name)

                if list_name not in choices \
                        and select_type != 'select one external' \
                        and file_extension not in ['.csv', '.xml']:
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the choices sheet name is "
                            u"all in small caps and has columns 'list name', "
                            u"'name', and 'label' (or aliased column names).")
                    raise PyXFormError(
                        row_format_string % row_number +
                        " List name not in choices sheet: " + list_name)

                # Validate select_multiple choice names by making sure
                # they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY \
                        and file_extension not in ['.csv', '.xml']:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" +
                                list_name + "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    # With this code we no longer need to handle or_other
                    # questions in survey builder.
                    # However, it depends on being able to use choice filters
                    # and xpath expressions that return empty sets.
                    # choices[list_name].append(
                    # {
                    #     'name': 'other',
                    #     'label': {default_language : 'Other'},
                    #     'orOther': 'true',
                    # })
                    # or_other_xpath = 'isNull(orOther)'
                    # if 'choice_filter' in row:
                    #   row['choice_filter'] += ' or ' + or_other_xpath
                    # else:
                    #   row['choice_filter'] = or_other_xpath

                    # specify_other_question = \
                    # {
                    #       'type':'text',
                    #       'name': row['name'] + '_specify_other',
                    #       'label':
                    #        'Specify Other for:\n"' + row['label'] + '"',
                    #       'bind' : {'relevant':
                    #                "selected(../%s, 'other')" % row['name']},
                    #     }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                    else:
                        new_json_dict['itemset'] = list_name
                        json_dict['choices'] = choices
                elif file_extension in ['.csv', '.xml']:
                    new_json_dict['itemset'] = list_name
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                if table_list is not None:
                    # Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                            constants.NAME:
                                "reserved_name_for_field_list_labels_" +
                                str(row_number),
                            # Adding row number for uniqueness # noqa
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            # Do we care about filtered selects in table lists?
                            # 'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list != list_name:
                        error_message = row_format_string % row_number
                        error_message += " Badly formatted table list," \
                                         " list names don't match: " + \
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = \
                        new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # Try to parse question as osm:
        osm_parse = osm_regexp.search(question_type)
        if osm_parse:
            parse_dict = osm_parse.groupdict()
            new_dict = row.copy()
            new_dict['type'] = constants.OSM

            if parse_dict.get('list_name') is not None:
                tags = osm_tags.get(parse_dict.get('list_name'))
                for tag in tags:
                    if osm_tags.get(tag.get('name')):
                        tag['choices'] = osm_tags.get(tag.get('name'))
                new_dict['tags'] = tags

            parent_children_array.append(new_dict)

            continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    if settings.get('flat', False):
        # print "Generating flattened instance..."
        add_flat_annotations(stack[0][1])

    meta_children = []

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
    else:
        # Automatically add an instanceID element:
        meta_children.append({
            "name": "instanceID",
            "bind": {
                "readonly": "true()",
                "calculate": settings.get(
                    "instance_id", "concat('uuid:', uuid())"),
            },
            "type": "calculate",
        })

    if 'instance_name' in settings:
        # Automatically add an instanceName element:
        meta_children.append({
            "name": "instanceName",
            "bind": {
                "calculate": settings['instance_name']
            },
            "type": "calculate",
        })

    if len(meta_children) > 0:
        meta_element = \
            {
                "name": "meta",
                "type": "group",
                "control": {
                    "bodyless": True
                },
                "children": meta_children
            }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)
    return json_dict

Example 37

Project: CVE-Scan Source File: TermDisplay.py
  @classmethod
  def start(self, scan=None):
    systems = scan['systems'] if scan and 'systems' in scan else None
    colors = {'vulnerable': ('red', 'black', False)}
    screen = specter.Specter(markupSet=colors)

    # Functions
    def product(banner):
      if banner:
        r=make_dict(banner)
        return r['product']
      else:
        return 'Unknown'


    def getSystemInfo(s):
      cpes    =s['cpes']      if 'cpes'       in s else ['Not Detected']
      mac     =s['mac']       if s['mac']          else 'Unknown'
      marked  ='vulnerable'   if 'cves' in cpes[0] else 'normal'
      hosts   =s['hostnames'] if 'hostnames'  in s else ['None']
      services=s['services']  if 'services'   in s else [_NoServ]
      serv    =services[0]

      cont=[        {'tn':'i',
                     'tc':[{'t': 'IP',           'm': 'title'},
                           {'t': s['ip']                     } ]},
                    {'tn':'i',
                     'tc':[{'t': 'MAC',          'm': 'title'},
                           {'t': mac                         } ]},
                    {'tn':'i',
                     'tc':[{'t': 'Status',       'm': 'title'},
                           {'t': s['status']                 } ]},
                    {'tn':'i',
                     'tc':[{'t': 'CPEs',         'm': 'title'},
                           {'t': cpes[0]['cpe'], 'm': marked } ]}]
      for cpe in cpes[1:]:
        marked='vulnerable' if 'cves' in cpe else 'normal'
        cont.append({'tn':'i',
                     'tc':[{'t': ' '                         },
                           {'t': cpes[0]['cpe'], 'm':marked  } ]})
      cont.append(  {'tn':'i',
                     'tc':[{'t': 'Vendor',       'm': 'title'},
                           {'t': s['vendor']                 } ]})
      cont.append(  {'tn':'i',
                     'tc':[{'t': 'Hostnames',    'm': 'title'},
                           {'t': hosts[0]                    } ]})
      for host in hosts[1:]:
        cont.append({'tn':'i',
                     'tc':[{'t': ' ',            'm': 'title'},
                           {'t': host                        } ]})
      cont.append(  {'tn':'i',
                     'tc':[{'t': 'Distance',     'm': 'title'},
                           {'t': s['distance']               } ]})
      ser='%s (%s/%s) is %s'%(serv['name'],serv['port'],serv['protocol'],serv['state'])
      marked='vulnerable' if len(serv['cves'])>0 else 'normal'
      cont.extend( [{'tn':'i',
                     'tc':[{'t': 'Services',     'm': 'title'},
                           {'t': ser,            'm':marked  } ]},
                    {'tn':'i',
                     'tc':[{'t': ' ',            'm': 'title'},
                           {'t': ' > %s'%product(serv['banner']),
                                                 'm': marked } ]},
                    {'tn':'i',
                     'tc':[{'t': ' ',            'm': 'title'},
                           {'t': ' > %s'%serv['cpe'],
                                                 'm':marked  } ]} ])

      for serv in services[1:]:
        marked='vulnerable' if len(serv['cves'])>0 else 'normal'
        ser='%s (%s/%s) is %s'%(serv['name'],serv['port'],serv['protocol'],serv['state'])
        cont.extend([{'tn':'i',
                      'tc':[{'t': ' '                        },
                            {'t': ser,           'm':marked  } ]},
                     {'tn':'i',
                      'tc':[{'t': ' '                        },
                            {'t': ' > %s'%product(serv['banner']),
                                                 'm':marked  } ]},
                    {'tn':'i',
                     'tc':[{'t': ' '                         },
                           {'t': ' > %s'%serv['cpe'],
                                                 'm':marked  } ]} ])
      return cont

    def cvesForcpe(line,sys,args=None):
      if type(line) == dict:
        if 'tc' in line: line = line['tc'][1]
        if 't'  in line: line = line['t']
      if type(line) is not str: line = str(line)

      # Clean out collumns
      if line.startswith("CPEs"):     line = line[4:].strip()
      if line.startswith("Services"): line = line[8:].strip()
      # reset variables
      service=None
      cves=None
      # make sure we're dealing with strings
      line=str(line)
      # handle args if present, else use current line
      if args:
        line=str(args[0])
      else:
        if line.strip().startswith('> '): # We're either working by cpe or product
          line=line.strip('> ')
        elif line.startswith('cpe:'):     # We're working by cpe
          pass
        else:                             # We're working by port
          line=rePortLine.search(line)
          if line: line=line.group()[1:-5]
          else:    return
      # see if we match on port
      if rePort.match(line):
        for s in sys['services']:
          if str(s['port'])==line:
            service=s;
            cves=s['cves']
            break
      # see if we match on cpe
      elif reCPE.match(line):
        for s in sys['services']:
          if str(s['cpe'])==line:
            service=s;
            cves=s['cves']
            break
        if not service:
          for c in sys['cpes']:
            if c['cpe']==line:cves=c['cves'];break
      # see if we match on product name
      else:
        for s in sys['services']:
          if product(s['banner'])==line:
            service=s;
            cves=s['cves']
            break
      if cves:
        cveList(cves,service)

    # Windows
    def splash():
      x, y = screen.getMaxXY()
      if y<10:raise("Please make sure your terminal has at least 10 rows")
      screen.splash(tSplash)

    def help():
      screen.scroll(tHelp, footer=tDefFoot, nav=extendedNav)

    def info():
      text=[{'t': "Scan", 'm': "title"},
            {'t': "  Date: %s"%fromEpoch(scan['scan']['time'])},
            {'t': "  Type: %s"%scan['scan']['type']},
            {'t': "Enhancement", 'm': "title"},
            {'t': "  Date: %s"%fromEpoch(scan['enhanced']['time'])}]
      screen.scroll(text, footer=tDefFoot, nav=extendedNav)

    def cveList(cves, service=None):
      navSet=copy.deepcopy(extendedNav)
      navSet['enter'] = ['o']
      text=[]
      for cve in cves:
        C = cve[_I][_IC][0] if _I in cve and _IC in cve[_I] else "?"
        I = cve[_I][_II][0] if _I in cve and _II in cve[_I] else "?"
        A = cve[_I][_IA][0] if _I in cve and _IA in cve[_I] else "?"
        V = cve[_A][_AV][0] if _A in cve and _AV in cve[_A] else "?"
        Co= cve[_A][_AC][0] if _A in cve and _AC in cve[_A] else "?"
        text.append({'t': "%s - %s%s%s - %s %s"%(cve['id'], C, I, A, V, Co),
                     'a': cveDetails, 'p': cve})
      screen.scroll(text,header=tServiceHead,footer=tServiceFoot,
                    cursor=True, nav=navSet)

    def cveDetails(cve):
      maxx, maxy = screen.getMaxXY()
      summary=splitByLength(cve['summary'],maxx-18)

      text=      ["CVE id    %s"%cve['id']]
      text.append("Summary   %s"%summary[0])
      for i, x in enumerate(summary[1:]):
        text.append("          %s"%summary[i+1])
      text.append(" ")
      text.append("CVSS      Base:             %s"%(cve['cvss']))
      text.append("          Exploitability:   %s"%(cve['exploitCVSS'] if 'exploitCVSS' in cve else ' -'))
      text.append("          Impact:           %s"%(cve['impactCVSS']  if 'impactCVSS'  in cve else ' -'))
      text.append(" ")
      text.append("Access    Vector:           %s"%cve['access']['vector'])
      text.append("          Complexity:       %s"%cve['access']['complexity'])
      text.append("          Authentication:   %s"%cve['access']['authentication'])
      text.append(" ")
      text.append("Impact    Confidentiality:  %s"%cve['impact']['confidentiality'])
      text.append("          Integrity:        %s"%cve['impact']['integrity'])
      text.append("          Availability:     %s"%cve['impact']['availability'])
      screen.scroll(text, footer=tDefFoot, nav=extendedNav)

    def home():
      index = 0
      lineNr = 0
      while True:
        system = systems[index]
        content = getSystemInfo(system)
        foot=copy.deepcopy(tNavFoot)
        foot[1]=foot[1]%(index+1,len(systems))
        key, lineNr = screen.scroll(content, footer=foot, cursor=lineNr,
                                    blocking=False, functions=keyFuncts,
                                    nav=sysNav)
        if   key in ['n']:
          index+=1
          if index>=len(systems):index=0
        elif key in ['p']:
          index-=1
          if index<0:index=len(systems)-1
        elif key in ['o']:
          cvesForcpe(content[lineNr], system)
        elif key in ['c']:
          parts = screen.userInput("Enter your command").lower().split()
          if parts:
            command = parts[0]
            args    = parts[1:]
            if   command in ['h', 'help']: help()
            elif command in ['c', 'cve' ]:
              line = content[lineNr]
              if type(line) == dict and 't' in line: line=line['t']
              if type(line) == str: cvesForcpe(line,system,args)
            elif command in ['i', 'info']: info()
            else: screen.popup(tInvalidCommand)
        elif key in ['q', chr(specter.KEY_ESC)]:
          break


    tSplash=[{'t': " _____ _   _ _____      _____                 ", 'm': 'header'},
             {'t': "/  __ \ | | |  ___|    /  ___|                ", 'm': 'header'},
             {'t': "| /  \/ | | | |__ _____\ `--.  ___ __ _ _ __  ", 'm': 'header'},
             {'t': "| |   | | | |  __|_____|`--. \/ __/ _` | '_ \ ", 'm': 'header'},
             {'t': "| \__/\ \_/ / |___     /\__/ / (_| (_| | | | |", 'm': 'header'},
             {'t': " \____/\___/\____/     \____/ \___\__,_|_| |_|", 'm': 'header'},
             {'t': "                            (c) NorthernSec   ", 'm': 'header'},
             {'t': "             [Press the any key]              ", 'm': 'title'}]

    tNavFoot=["(u)p   | (n)ext    | (p)revious | (q)uit |",
              "(d)own | (j)ump to | (c)ommand  | (o)pen | [%s/%s]"]

    tServiceHead=['CVE           - CIA - Vector Complexity']

    tServiceFoot=['Vector:     N(etwork) - A(djecent network) - L(ocal)',
                  'CIA Impact: L(ow)     - M(edium)           - H(igh)',
                  'Press Enter or o for more info']

    tHelp=[{'t': '----------', 'm': 'title'},
           {'t': '|  HELP  |', 'm': 'title'},
           {'t': '----------', 'm': 'title'},
           {'t': ' '},
           {'t':'Navigating','m': 'title'},
           {'t':' * You can navigate through the scanned systems with p and n, or the left and right arrow key'},
           {'t':' * You can scroll through the current system with u and d, or the up and down arrow key'},
           {'t':' * You can jump directly to a scanned system by entering the page number with j, and entering the page number.'},
           {'t':' '},
           {'t':'Commands','m': 'title'},
           {'t':'By pressing c, you can enter commands:'},
           {'t':'  h/help                  - Displays this menu'},
           {'t':'  c/cve [port/cpe/banner] - If found, displays CVEs of the current line, or service with the parameter'},
           {'t':'  i/info                  - Display info of the scan'}]
    tDefFoot=[" - press q or ESC to return to the previous page-"]

    tInvalidCommand = [{'t': 'Invalid command', 'm': 'title'}]

    extendedNav={'esc': ["q"], 'up':["u"], 'down':["d"]}
    sysNav={'esc': ["q"], 'up':["u"], 'down':["d"], 'left':["p"], 'right':["n"]}
    keyFuncts={'i': info, 'h': help}

    rePort=re.compile('^([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$')
    reCPE=re.compile('^((%s|%s)[aoh]:.*)$'%(re.escape('cpe:/'),re.escape('cpe:2.3:')))
    rePortLine=re.compile('\\([0-9]*/(tcp|udp)\\)')
    _I ="impact"
    _IC="confidentiality"
    _II="integrity"
    _IA="availability"
    _A ="access"
    _AV="vector"
    _AC="complexity"

    _NoServ="No services found"
    try:
      splash()
      home()
      screen.stop()
    except Exception as ex:
      screen.stop()
      raise(ex)

Example 38

Project: pycoin Source File: tx.py
def parse_context(args, parser):
    # defaults

    txs = []
    spendables = []
    payables = []

    key_iters = []

    TX_ID_RE = re.compile(r"^[0-9a-fA-F]{64}$")

    # there are a few warnings we might optionally print out, but only if
    # they are relevant. We don't want to print them out multiple times, so we
    # collect them here and print them at the end if they ever kick in.

    warning_tx_cache = None
    warning_tx_for_tx_hash = None
    warning_spendables = None

    if args.private_key_file:
        wif_re = re.compile(r"[1-9a-km-zA-LMNP-Z]{51,111}")
        # address_re = re.compile(r"[1-9a-kmnp-zA-KMNP-Z]{27-31}")
        for f in args.private_key_file:
            if f.name.endswith(".gpg"):
                gpg_args = ["gpg", "-d"]
                if args.gpg_argument:
                    gpg_args.extend(args.gpg_argument.split())
                gpg_args.append(f.name)
                popen = subprocess.Popen(gpg_args, stdout=subprocess.PIPE)
                f = popen.stdout
            for line in f.readlines():
                # decode
                if isinstance(line, bytes):
                    line = line.decode("utf8")
                # look for WIFs
                possible_keys = wif_re.findall(line)

                def make_key(x):
                    try:
                        return Key.from_text(x)
                    except Exception:
                        return None

                keys = [make_key(x) for x in possible_keys]
                for key in keys:
                    if key:
                        key_iters.append((k.wif() for k in key.subkeys("")))

                # if len(keys) == 1 and key.hierarchical_wallet() is None:
                #    # we have exactly 1 WIF. Let's look for an address
                #   potential_addresses = address_re.findall(line)

    # update p2sh_lookup
    p2sh_lookup = {}
    if args.pay_to_script:
        for p2s in args.pay_to_script:
            try:
                script = h2b(p2s)
                p2sh_lookup[hash160(script)] = script
            except Exception:
                print("warning: error parsing pay-to-script value %s" % p2s)

    if args.pay_to_script_file:
        hex_re = re.compile(r"[0-9a-fA-F]+")
        for f in args.pay_to_script_file:
            count = 0
            for l in f:
                try:
                    m = hex_re.search(l)
                    if m:
                        p2s = m.group(0)
                        script = h2b(p2s)
                        p2sh_lookup[hash160(script)] = script
                        count += 1
                except Exception:
                    print("warning: error parsing pay-to-script file %s" % f.name)
            if count == 0:
                print("warning: no scripts found in %s" % f.name)

    # we create the tx_db lazily
    tx_db = None

    for arg in args.argument:

        # hex transaction id
        if TX_ID_RE.match(arg):
            if tx_db is None:
                warning_tx_cache = message_about_tx_cache_env()
                warning_tx_for_tx_hash = message_about_tx_for_tx_hash_env(args.network)
                tx_db = get_tx_db(args.network)
            tx = tx_db.get(h2b_rev(arg))
            if not tx:
                for m in [warning_tx_cache, warning_tx_for_tx_hash, warning_spendables]:
                    if m:
                        print("warning: %s" % m, file=sys.stderr)
                parser.error("can't find Tx with id %s" % arg)
            txs.append(tx)
            continue

        # hex transaction data
        try:
            tx = Tx.from_hex(arg)
            txs.append(tx)
            continue
        except Exception:
            pass

        is_valid = is_address_valid(arg, allowable_netcodes=[args.network])
        if is_valid:
            payables.append((arg, 0))
            continue

        try:
            key = Key.from_text(arg)
            # TODO: check network
            if key.wif() is None:
                payables.append((key.address(), 0))
                continue
            # TODO: support paths to subkeys
            key_iters.append((k.wif() for k in key.subkeys("")))
            continue
        except Exception:
            pass

        if os.path.exists(arg):
            try:
                with open(arg, "rb") as f:
                    if f.name.endswith("hex"):
                        f = io.BytesIO(codecs.getreader("hex_codec")(f).read())
                    tx = Tx.parse(f)
                    txs.append(tx)
                    try:
                        tx.parse_unspents(f)
                    except Exception as ex:
                        pass
                    continue
            except Exception:
                pass

        parts = arg.split("/")
        if len(parts) == 4:
            # spendable
            try:
                spendables.append(Spendable.from_text(arg))
                continue
            except Exception:
                pass

        if len(parts) == 2 and is_address_valid(parts[0], allowable_netcodes=[args.network]):
            try:
                payables.append(parts)
                continue
            except ValueError:
                pass

        parser.error("can't parse %s" % arg)

    if args.fetch_spendables:
        warning_spendables = message_about_spendables_for_address_env(args.network)
        for address in args.fetch_spendables:
            spendables.extend(spendables_for_address(address))

    for tx in txs:
        if tx.missing_unspents() and args.augment:
            if tx_db is None:
                warning_tx_cache = message_about_tx_cache_env()
                warning_tx_for_tx_hash = message_about_tx_for_tx_hash_env(args.network)
                tx_db = get_tx_db(args.network)
            tx.unspents_from_db(tx_db, ignore_missing=True)

    return (txs, spendables, payables, key_iters, p2sh_lookup, tx_db, warning_tx_cache,
            warning_tx_for_tx_hash, warning_spendables)

Example 39

Project: stopstalk-deployment Source File: appadmin.py
Function: select
def select():
    import re
    db = get_database(request)
    dbname = request.args[0]
    try:
        is_imap = db._uri.startswith("imap://")
    except (KeyError, AttributeError, TypeError):
        is_imap = False
    regex = re.compile('(?P<table>\w+)\.(?P<field>\w+)=(?P<value>\d+)')
    if len(request.args) > 1 and hasattr(db[request.args[1]], '_primarykey'):
        regex = re.compile('(?P<table>\w+)\.(?P<field>\w+)=(?P<value>.+)')
    if request.vars.query:
        match = regex.match(request.vars.query)
        if match:
            request.vars.query = '%s.%s.%s==%s' % (request.args[0],
                                                   match.group('table'), match.group('field'),
                                                   match.group('value'))
    else:
        request.vars.query = session.last_query
    query = get_query(request)
    if request.vars.start:
        start = int(request.vars.start)
    else:
        start = 0
    nrows = 0

    step = 100
    fields = []

    if is_imap:
        step = 3

    stop = start + step

    table = None
    rows = []
    orderby = request.vars.orderby
    if orderby:
        orderby = dbname + '.' + orderby
        if orderby == session.last_orderby:
            if orderby[0] == '~':
                orderby = orderby[1:]
            else:
                orderby = '~' + orderby
    session.last_orderby = orderby
    session.last_query = request.vars.query
    form = FORM(DIV(DIV(INPUT(_style='width:400px',
                              _name='query',
                              _id='query_inp',
                              _value=request.vars.query or '',
                              requires=IS_NOT_EMPTY(
                                    error_message=T("Cannot be empty"))),
                        LABEL("Query:", _for='query_inp'),
                        _class="input-field col offset-s3 s6"),
                    _class="row"),
                DIV(DIV(INPUT(_name='update_check',
                              _id='update_inp',
                              _type='checkbox',
                              value=False),
                        LABEL("Update", _for='update_inp'),
                        _class="input-field col offset-s3 s6"),
                    _class="row"),
                DIV(DIV(INPUT(_style='width:400px',
                              _name='update_fields',
                              _id='update_field_inp',
                              _value=request.vars.update_fields or ''),
                        LABEL("Update Fields:", _for='update_fields_inp'),
                        _class="input-field col offset-s3 s6"),
                    _class="row"),
                DIV(DIV(INPUT(_name='delete_check',
                              _class='delete',
                              _id='delete_inp',
                              _type='checkbox',
                              value=False),
                        LABEL("Delete:", _for='delete_inp'),
                        _class="input-field col offset-s3 s6"),
                    _class="row"),
                DIV(DIV(INPUT(_type='submit',
                              _value=T('submit')),
                        _class="input-field col offset-s3 s6"),
                    _class="row"),
                _action=URL(r=request, args=request.args),
                _class="row col s12")

    tb = None
    if form.accepts(request.vars, formname=None):
        regex = re.compile(request.args[0] + '\.(?P<table>\w+)\..+')
        match = regex.match(form.vars.query.strip())
        if match:
            table = match.group('table')
        try:
            nrows = db(query, ignore_common_filters=True).count()
            if form.vars.update_check and form.vars.update_fields:
                db(query, ignore_common_filters=True).update(
                    **eval_in_global_env('dict(%s)' % form.vars.update_fields))
                response.flash = T('%s %%{row} updated', nrows)
            elif form.vars.delete_check:
                db(query, ignore_common_filters=True).delete()
                response.flash = T('%s %%{row} deleted', nrows)
            nrows = db(query, ignore_common_filters=True).count()

            if is_imap:
                fields = [db[table][name] for name in
                    ("id", "uid", "created", "to",
                     "sender", "subject")]
            if orderby:
                rows = db(query, ignore_common_filters=True).select(
                              *fields, limitby=(start, stop),
                              orderby=eval_in_global_env(orderby))
            else:
                rows = db(query, ignore_common_filters=True).select(
                    *fields, limitby=(start, stop))
        except Exception, e:
            import traceback
            tb = traceback.format_exc()
            (rows, nrows) = ([], 0)
            response.flash = DIV(T('Invalid Query'), PRE(str(e)))
    # begin handle upload csv
    csv_table = table or request.vars.table
    if csv_table:
        formcsv = FORM(DIV(str(T('or import from csv file')) + " ", _class="row"),
                       DIV(DIV(DIV(SPAN("File"),
                                   INPUT(_type='file', _name='csvfile'),
                                         _class="btn"),
                               DIV(INPUT(_class="file-path", _type="text"),
                                   _class="file-path-wrapper"),
                               _class="col offset-s4 s4 file-field input-field"),
                           _class="row"),
                       DIV(DIV(INPUT(_type='hidden', _value=csv_table, _name='table'),
                               INPUT(_type='submit', _value=T('import')),
                               _class="col offset-s4 s4"),
                           _class="row"),
                       _class="row center")
    else:
        formcsv = None
    if formcsv and formcsv.process().accepted:
        try:
            import_csv(db[request.vars.table],
                       request.vars.csvfile.file)
            response.flash = T('data uploaded')
        except Exception, e:
            response.flash = DIV(T('unable to parse csv file'), PRE(str(e)))
    # end handle upload csv

    return dict(
        form=form,
        table=table,
        start=start,
        stop=stop,
        step=step,
        nrows=nrows,
        rows=rows,
        query=request.vars.query,
        formcsv=formcsv,
        tb=tb
    )

Example 40

Project: FanFicFare Source File: adapter_storiesonlinenet.py
Function: doextractchapterurlsandmetadata
    def doExtractChapterUrlsAndMetadata(self, get_cover=True):

        # index=1 makes sure we see the story chapter index.  Some
        # sites skip that for one-chapter stories.
        url = self.url
        logger.debug("URL: "+url)

        self.needToLogin = False
        try:
            data = self._fetchUrl(url+":i")
        except urllib2.HTTPError, e:
            if e.code in (404, 410):
                raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
            elif e.code == 401:
                self.needToLogin = True
                data = ''
            else:
                raise e

        if self.needToLoginCheck(data):
            # need to log in for this one.
            self.performLogin(url)
            try:
                data = self._fetchUrl(url+":i",usecache=False)
            except urllib2.HTTPError, e:
                if e.code in (404, 410):
                    raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
                elif e.code == 401:
                    self.needToLogin = True
                    data = ''
                else:
                    raise e

        if "Access denied. This story has not been validated by the adminstrators of this site." in data:
            raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
        elif "Error! The story you're trying to access is being filtered by your choice of contents filtering." in data:
            raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.")
        elif "Error! Daily Limit Reached" in data:
            raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! Daily Limit Reached")

        # use BeautifulSoup HTML parser to make everything easier to find.
        soup = self.make_soup(data)
        #print data

        # Now go hunting for all the meta data and the chapter list.

        ## Title
        a = soup.find('h1')
        self.story.setMetadata('title',stripHTML(a))

        notice = soup.find('div', {'class' : 'notice'})
        if notice:
            self.story.setMetadata('notice',unicode(notice))

        # Find authorid and URL from... author url.
        for a in soup.findAll('a', href=re.compile(r"/a/\w+")):
            self.story.addToList('authorId',a['href'].split('/')[2])
            self.story.addToList('authorUrl','http://'+self.host+a['href'])
            self.story.addToList('author',stripHTML(a).replace("'s Page",""))

        # Find the chapters:
        chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+(/.*)?$"))
        if len(chapters) != 0:
            for chapter in chapters:
                # just in case there's tags, like <i> in chapter titles.
                self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']))
        else:
            self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/s/'+self.story.getMetadata('storyId')))

        self.story.setMetadata('numChapters',len(self.chapterUrls))

        # surprisingly, the detailed page does not give enough details, so go to author's page
        page=0
        i=0
        while i == 0:
            data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
            asoup = self.make_soup(data)

            a = asoup.findAll('td', {'class' : 'lc2'})
            for lc2 in a:
                if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))):
                    i=1
                    break
                if a[len(a)-1] == lc2:
                    page=page+1

        for cat in lc2.findAll('div', {'class' : 'typediv'}):
            self.story.addToList('genre',cat.text)

        # in lieu of word count.
        self.story.setMetadata('size', lc2.findNext('td', {'class' : 'num'}).text)

        score = lc2.findNext('th', {'class' : 'ynum'}).text
        if score != '-':
            self.story.setMetadata('score', score)

        lc4 = lc2.findNext('td', {'class' : 'lc4'})
        desc = lc4.contents[0]

        try:
            a = lc4.find('a', href=re.compile(r"/series/\d+/.*"))
            # logger.debug("Looking for series - a='{0}'".format(a))
            if a:
                # if there's a number after the series name, series_contents is a two element list:
                # [<a href="...">Title</a>, u' (2)']
                series_contents = a.parent.contents
                i = 0 if len(series_contents) == 1 else series_contents[1].strip(' ()')
                seriesUrl = 'http://'+self.host+a['href']
                self.story.setMetadata('seriesUrl',seriesUrl)
                series_name = stripHTML(a)
                # logger.debug("Series name= %s" % series_name)
                series_soup = self.make_soup(self._fetchUrl(seriesUrl))
                if series_soup:
                    # logger.debug("Retrieving Series - looking for name")
                    series_name = stripHTML(series_soup.find('span', {'id' : 'ptitle'}))
                    series_name = re.sub(r' . a series by.*$','',series_name)
                    # logger.debug("Series name: '%s'" % series_name)
                self.setSeries(series_name, i)
                desc = lc4.contents[2]
                # Check if series is in a universe
                if "/universes" in data:
                    universe_url = self.story.getList('authorUrl')[0]  + "&type=uni"
                    universes_soup = self.make_soup(self._fetchUrl(universe_url) )
                    # logger.debug("Universe url='{0}'".format(universe_url))
                    if universes_soup:
                        universes = universes_soup.findAll('div', {'class' : 'ser-box'})
                        # logger.debug("Number of Universes: %d" % len(universes))
                        for universe in universes:
                            # logger.debug("universe.find('a')={0}".format(universe.find('a')))
                            # The universe id is in an "a" tag that has an id but nothing else. It is the first tag.
                            # The id is prefixed with the letter "u".
                            universe_id = universe.find('a')['id'][1:]
                            # logger.debug("universe_id='%s'" % universe_id)
                            universe_name = stripHTML(universe.find('div', {'class' : 'ser-name'})).partition(' ')[2]
                            # logger.debug("universe_name='%s'" % universe_name)
                            # If there is link to the story, we have the right universe
                            story_a = universe.find('a', href=re.compile('/s/'+self.story.getMetadata('storyId')))
                            if story_a:
                                # logger.debug("Story is in a series that is in a universe! The universe is '%s'" % universe_name)
                                self.story.setMetadata("universe", universe_name)
                                self.story.setMetadata('universeUrl','http://'+self.host+ '/library/universe.php?id=' + universe_id)
                                break
                    else:
                        logger.debug("No universe page")
        except:
            raise
            pass
        try:
            a = lc4.find('a', href=re.compile(r"/universe/\d+/.*"))
            # logger.debug("Looking for universe - a='{0}'".format(a))
            if a:
                self.story.setMetadata("universe",stripHTML(a))
                desc = lc4.contents[2]
                # Assumed only one universe, but it does have a URL--use universeHTML
                universe_name = stripHTML(a)
                universeUrl = 'http://'+self.host+a['href']
                # logger.debug("Retrieving Universe - about to get page - universeUrl='{0}".format(universeUrl))
                universe_soup = self.make_soup(self._fetchUrl(universeUrl))
                logger.debug("Retrieving Universe - have page")
                if universe_soup:
                    logger.debug("Retrieving Universe - looking for name")
                    universe_name = stripHTML(universe_soup.find('h1', {'id' : 'ptitle'}))
                    universe_name = re.sub(r' . A Universe from the Mind.*$','',universe_name)
                    # logger.debug("Universes name: '{0}'".format(universe_name))

                self.story.setMetadata('universeUrl',universeUrl)
                # logger.debug("Setting universe name: '{0}'".format(universe_name))
                self.story.setMetadata('universe',universe_name)
                if self.getConfig("universe_as_series"):
                    self.setSeries(universe_name, 0)
                    self.story.setMetadata('seriesUrl',universeUrl)
            else:
                logger.debug("Do not have a universe")
        except:
            raise
            pass

        self.setDescription('http://'+self.host+'/s/'+self.story.getMetadata('storyId'),desc)

        for b in lc4.findAll('b'):
            #logger.debug('Getting metadata: "%s"' % b)
            label = b.text
            if label in ['Posted:', 'Concluded:', 'Updated:']:
                value = b.findNext('noscript').text
                #logger.debug('Have a date field label: "%s", value: "%s"' % (label, value))
            else:
                value = b.nextSibling
            #logger.debug('label: "%s", value: "%s"' % (label, value))

            if 'Sex' in label:
                self.story.setMetadata('rating', value)

            if 'Tags' in label or 'Codes' in label:
                for code in re.split(r'\s*,\s*', value.strip()):
                    self.story.addToList('sitetags',code)

            if 'Posted' in label:
                self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))

            if 'Concluded' in label:
                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))

            if 'Updated' in label:
                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))

        # Some books have a cover in the index page.
        # Samples are:
        #     http://storiesonline.net/s/11999
        #     http://storiesonline.net/s/10823
        if get_cover:
            # logger.debug("Looking for the cover image...")
            cover_url = ""
            img = soup.find('img')
            if img:
                cover_url=img['src']
            # logger.debug("cover_url: %s"%cover_url)
            if cover_url:
                self.setCoverImage(url,cover_url)

        status = lc4.find('span', {'class' : 'ab'})
        if  status != None:
            if 'Incomplete and Inactive' in status.text:
                self.story.setMetadata('status', 'Incomplete')
            else:
                self.story.setMetadata('status', 'In-Progress')
            if "Last Activity" in status.text:
                # date is passed as a timestamp and converted in JS.
                value = status.findNext('noscript').text
                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
        else:
            self.story.setMetadata('status', 'Completed')

Example 41

Project: auto-sklearn Source File: ensemble_builder.py
    def main(self):

        watch = StopWatch()
        watch.start_task('ensemble_builder')

        used_time = 0
        time_iter = 0
        index_run = 0
        num_iteration = 0
        current_num_models = 0
        last_hash = None
        current_hash = None

        dir_ensemble = os.path.join(self.backend.temporary_directory,
                                    '.auto-sklearn',
                                    'predictions_ensemble')
        dir_valid = os.path.join(self.backend.temporary_directory,
                                 '.auto-sklearn',
                                 'predictions_valid')
        dir_test = os.path.join(self.backend.temporary_directory,
                                '.auto-sklearn',
                                'predictions_test')
        paths_ = [dir_ensemble, dir_valid, dir_test]

        dir_ensemble_list_mtimes = []

        self.logger.debug('Starting main loop with %f seconds and %d iterations '
                          'left.' % (self.limit - used_time, num_iteration))
        while used_time < self.limit or (self.max_iterations > 0 and
                                         self.max_iterations >= num_iteration):
            num_iteration += 1
            self.logger.debug('Time left: %f', self.limit - used_time)
            self.logger.debug('Time last ensemble building: %f', time_iter)

            # Reload the ensemble targets every iteration, important, because cv may
            # update the ensemble targets in the cause of running auto-sklearn
            # TODO update cv in order to not need this any more!
            targets_ensemble = self.backend.load_targets_ensemble()

            # Load the predictions from the models
            exists = [os.path.isdir(dir_) for dir_ in paths_]
            if not exists[0]:  # all(exists):
                self.logger.debug('Prediction directory %s does not exist!' %
                              dir_ensemble)
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            if self.shared_mode is False:
                dir_ensemble_list = sorted(glob.glob(os.path.join(
                    dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed)))
                if exists[1]:
                    dir_valid_list = sorted(glob.glob(os.path.join(
                        dir_valid, 'predictions_valid_%s_*.npy' % self.seed)))
                else:
                    dir_valid_list = []
                if exists[2]:
                    dir_test_list = sorted(glob.glob(os.path.join(
                        dir_test, 'predictions_test_%s_*.npy' % self.seed)))
                else:
                    dir_test_list = []
            else:
                dir_ensemble_list = sorted(os.listdir(dir_ensemble))
                dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
                dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []

            # Check the modification times because predictions can be updated
            # over time!
            old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
            dir_ensemble_list_mtimes = []
            # The ensemble dir can contain non-model files. We filter them and
            # use the following list instead
            dir_ensemble_model_files = []

            for dir_ensemble_file in dir_ensemble_list:
                if dir_ensemble_file.endswith("/"):
                    dir_ensemble_file = dir_ensemble_file[:-1]
                if not dir_ensemble_file.endswith(".npy"):
                    self.logger.warning('Error loading file (not .npy): %s', dir_ensemble_file)
                    continue

                dir_ensemble_model_files.append(dir_ensemble_file)
                basename = os.path.basename(dir_ensemble_file)
                dir_ensemble_file = os.path.join(dir_ensemble, basename)
                mtime = os.path.getmtime(dir_ensemble_file)
                dir_ensemble_list_mtimes.append(mtime)

            if len(dir_ensemble_model_files) == 0:
                self.logger.debug('Directories are empty')
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            if len(dir_ensemble_model_files) <= current_num_models and \
                    old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
                self.logger.debug('Nothing has changed since the last time')
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                # TODO restructure time management in the ensemble builder,
                # what is the time of index_run actually needed for?
                watch.start_task('index_run' + str(index_run))
            watch.start_task('ensemble_iter_' + str(num_iteration))

            # List of num_runs (which are in the filename) which will be included
            #  later
            include_num_runs = []
            backup_num_runs = []
            model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$')
            if self.ensemble_nbest is not None:
                # Keeps track of the single scores of each model in our ensemble
                scores_nbest = []
                # The indices of the model that are currently in our ensemble
                indices_nbest = []
                # The names of the models
                model_names = []

            model_names_to_scores = dict()

            model_idx = 0
            for model_name in dir_ensemble_model_files:
                if model_name.endswith("/"):
                    model_name = model_name[:-1]
                basename = os.path.basename(model_name)

                try:
                    if self.precision is "16":
                        predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float16)
                    elif self.precision is "32":
                        predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float32)
                    elif self.precision is "64":
                        predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float64)
                    else:
                        predictions = np.load(os.path.join(dir_ensemble, basename))

                    score = calculate_score(targets_ensemble, predictions,
                                            self.task_type, self.metric,
                                            predictions.shape[1])

                except Exception as e:
                    self.logger.warning('Error loading %s: %s - %s',
                                        basename, type(e), e)
                    score = -1

                model_names_to_scores[model_name] = score
                match = model_and_automl_re.search(model_name)
                automl_seed = int(match.group(1))
                num_run = int(match.group(2))

                if self.ensemble_nbest is not None:
                    if score <= 0.001:
                        self.logger.info('Model only predicts at random: ' +
                                         model_name + ' has score: ' + str(score))
                        backup_num_runs.append((automl_seed, num_run))
                    # If we have less models in our ensemble than ensemble_nbest add
                    # the current model if it is better than random
                    elif len(scores_nbest) < self.ensemble_nbest:
                        scores_nbest.append(score)
                        indices_nbest.append(model_idx)
                        include_num_runs.append((automl_seed, num_run))
                        model_names.append(model_name)
                    else:
                        # Take the worst performing model in our ensemble so far
                        idx = np.argmin(np.array([scores_nbest]))

                        # If the current model is better than the worst model in
                        # our ensemble replace it by the current model
                        if scores_nbest[idx] < score:
                            self.logger.info(
                                'Worst model in our ensemble: %s with score %f '
                                'will be replaced by model %s with score %f',
                                model_names[idx], scores_nbest[idx], model_name,
                                score)
                            # Exclude the old model
                            del scores_nbest[idx]
                            scores_nbest.append(score)
                            del include_num_runs[idx]
                            del indices_nbest[idx]
                            indices_nbest.append(model_idx)
                            include_num_runs.append((automl_seed, num_run))
                            del model_names[idx]
                            model_names.append(model_name)

                        # Otherwise exclude the current model from the ensemble
                        else:
                            # include_num_runs.append(True)
                            pass

                else:
                    # Load all predictions that are better than random
                    if score <= 0.001:
                        # include_num_runs.append(True)
                        self.logger.info('Model only predicts at random: ' +
                                         model_name + ' has score: ' +
                                         str(score))
                        backup_num_runs.append((automl_seed, num_run))
                    else:
                        include_num_runs.append((automl_seed, num_run))

                model_idx += 1

            # If there is no model better than random guessing, we have to use
            # all models which do random guessing
            if len(include_num_runs) == 0:
                include_num_runs = backup_num_runs

            indices_to_model_names = dict()
            indices_to_run_num = dict()
            for i, model_name in enumerate(dir_ensemble_model_files):
                match = model_and_automl_re.search(model_name)
                automl_seed = int(match.group(1))
                num_run = int(match.group(2))
                if (automl_seed, num_run) in include_num_runs:
                    num_indices = len(indices_to_model_names)
                    indices_to_model_names[num_indices] = model_name
                    indices_to_run_num[num_indices] = (automl_seed, num_run)

            try:
                all_predictions_train, all_predictions_valid, all_predictions_test =\
                    self.get_all_predictions(dir_ensemble,
                                             dir_ensemble_model_files,
                                             dir_valid, dir_valid_list,
                                             dir_test, dir_test_list,
                                             include_num_runs,
                                             model_and_automl_re,
                                             self.precision)
            except IOError:
                self.logger.error('Could not load the predictions.')
                continue

            if len(include_num_runs) == 0:
                self.logger.error('All models do just random guessing')
                time.sleep(2)
                continue

            else:
                ensemble = EnsembleSelection(ensemble_size=self.ensemble_size,
                                             task_type=self.task_type,
                                             metric=self.metric)

                try:
                    ensemble.fit(all_predictions_train, targets_ensemble,
                                 include_num_runs)
                    self.logger.info(ensemble)

                except ValueError as e:
                    self.logger.error('Caught ValueError: ' + str(e))
                    used_time = watch.wall_elapsed('ensemble_builder')
                    time.sleep(2)
                    continue
                except IndexError as e:
                    self.logger.error('Caught IndexError: ' + str(e))
                    used_time = watch.wall_elapsed('ensemble_builder')
                    time.sleep(2)
                    continue
                except Exception as e:
                    self.logger.error('Caught error! %s', str(e))
                    used_time = watch.wall_elapsed('ensemble_builder')
                    time.sleep(2)
                    continue

                # Output the score
                self.logger.info('Training performance: %f' % ensemble.train_score_)

                self.logger.info('Building the ensemble took %f seconds' %
                            watch.wall_elapsed('ensemble_iter_' + str(num_iteration)))

            # Set this variable here to avoid re-running the ensemble builder
            # every two seconds in case the ensemble did not change
            current_num_models = len(dir_ensemble_model_files)

            ensemble_predictions = ensemble.predict(all_predictions_train)
            if sys.version_info[0] == 2:
                ensemble_predictions.flags.writeable = False
                current_hash = hash(ensemble_predictions.data)
            else:
                current_hash = hash(ensemble_predictions.data.tobytes())

            # Only output a new ensemble and new predictions if the output of the
            # ensemble would actually change!
            # TODO this is neither safe (collisions, tests only with the ensemble
            #  prediction, but not the ensemble), implement a hash function for
            # each possible ensemble builder.
            if last_hash is not None:
                if current_hash == last_hash:
                    self.logger.info('Ensemble output did not change.')
                    time.sleep(2)
                    continue
                else:
                    last_hash = current_hash
            else:
                last_hash = current_hash

            # Save the ensemble for later use in the main auto-sklearn module!
            self.backend.save_ensemble(ensemble, index_run, self.seed)

            # Save predictions for valid and test data set
            if len(dir_valid_list) == len(dir_ensemble_model_files):
                all_predictions_valid = np.array(all_predictions_valid)
                ensemble_predictions_valid = ensemble.predict(all_predictions_valid)
                if self.task_type == BINARY_CLASSIFICATION:
                    ensemble_predictions_valid = ensemble_predictions_valid[:, 1]
                if self.low_precision:
                    if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
                        ensemble_predictions_valid[ensemble_predictions_valid < 1e-4] = 0.
                    if self.metric in [BAC_METRIC, F1_METRIC]:
                        bin_array = np.zeros(ensemble_predictions_valid.shape, dtype=np.int32)
                        if (self.task_type != MULTICLASS_CLASSIFICATION) or (
                            ensemble_predictions_valid.shape[1] == 1):
                            bin_array[ensemble_predictions_valid >= 0.5] = 1
                        else:
                            sample_num = ensemble_predictions_valid.shape[0]
                            for i in range(sample_num):
                                j = np.argmax(ensemble_predictions_valid[i, :])
                                bin_array[i, j] = 1
                        ensemble_predictions_valid = bin_array
                    if self.task_type in CLASSIFICATION_TASKS:
                        if ensemble_predictions_valid.size < (20000 * 20):
                            precision = 3
                        else:
                            precision = 2
                    else:
                        if ensemble_predictions_valid.size > 1000000:
                            precision = 4
                        else:
                            # File size maximally 2.1MB
                            precision = 6

                self.backend.save_predictions_as_txt(ensemble_predictions_valid,
                                                'valid', index_run, prefix=self.dataset_name,
                                                precision=precision)
            else:
                self.logger.info('Could not find as many validation set predictions (%d)'
                             'as ensemble predictions (%d)!.',
                            len(dir_valid_list), len(dir_ensemble_model_files))

            del all_predictions_valid

            if len(dir_test_list) == len(dir_ensemble_model_files):
                all_predictions_test = np.array(all_predictions_test)
                ensemble_predictions_test = ensemble.predict(all_predictions_test)
                if self.task_type == BINARY_CLASSIFICATION:
                    ensemble_predictions_test = ensemble_predictions_test[:, 1]
                if self.low_precision:
                    if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
                        ensemble_predictions_test[ensemble_predictions_test < 1e-4] = 0.
                    if self.metric in [BAC_METRIC, F1_METRIC]:
                        bin_array = np.zeros(ensemble_predictions_test.shape,
                                             dtype=np.int32)
                        if (self.task_type != MULTICLASS_CLASSIFICATION) or (
                                    ensemble_predictions_test.shape[1] == 1):
                            bin_array[ensemble_predictions_test >= 0.5] = 1
                        else:
                            sample_num = ensemble_predictions_test.shape[0]
                            for i in range(sample_num):
                                j = np.argmax(ensemble_predictions_test[i, :])
                                bin_array[i, j] = 1
                        ensemble_predictions_test = bin_array
                    if self.task_type in CLASSIFICATION_TASKS:
                        if ensemble_predictions_test.size < (20000 * 20):
                            precision = 3
                        else:
                            precision = 2
                    else:
                        if ensemble_predictions_test.size > 1000000:
                            precision = 4
                        else:
                            precision = 6

                self.backend.save_predictions_as_txt(ensemble_predictions_test,
                                                     'test', index_run, prefix=self.dataset_name,
                                                     precision=precision)
            else:
                self.logger.info('Could not find as many test set predictions (%d) as '
                             'ensemble predictions (%d)!',
                            len(dir_test_list), len(dir_ensemble_model_files))

            del all_predictions_test

            current_num_models = len(dir_ensemble_model_files)
            watch.stop_task('index_run' + str(index_run))
            time_iter = watch.get_wall_dur('index_run' + str(index_run))
            used_time = watch.wall_elapsed('ensemble_builder')
            index_run += 1
        return

Example 42

Project: tp-qemu Source File: cpuid.py
def run(test, params, env):
    """
    Boot guest with different cpu_models and cpu flags and check if guest works correctly.

    :param test: kvm test object.
    :param params: Dictionary with the test parameters.
    :param env: Dictionary with test environment.
    """
    qemu_binary = utils_misc.get_qemu_binary(params)

    cpu_model = params.get("cpu_model", "qemu64")

    xfail = False
    if (params.get("xfail") is not None) and (params.get("xfail") == "yes"):
        xfail = True

    def cpu_models_to_test():
        """Return the list of CPU models to be tested, based on the
        cpu_models and cpu_model config options.

        Config option "cpu_model" may be used to ask a single CPU model
        to be tested. Config option "cpu_models" may be used to ask
        multiple CPU models to be tested.

        If cpu_models is "*", all CPU models reported by QEMU will be tested.
        """
        models_opt = params.get("cpu_models")
        model_opt = params.get("cpu_model")

        if (models_opt is None and model_opt is None):
            raise error.TestError("No cpu_models or cpu_model option is set")

        cpu_models = set()

        if models_opt == '*':
            cpu_models.update(utils_misc.get_qemu_cpu_models(qemu_binary))
        elif models_opt:
            cpu_models.update(models_opt.split())

        if model_opt:
            cpu_models.add(model_opt)

        return cpu_models

    def test_qemu_cpu_models_list(self):
        """
        check CPU models returned by <qemu> -cpu '?' are what is expected
        """
        """
        test method
        """
        cpu_models = cpu_models_to_test()
        qemu_models = utils_misc.get_qemu_cpu_models(qemu_binary)
        missing = set(cpu_models) - set(qemu_models)
        if missing:
            raise error.TestFail(
                "Some CPU models not in QEMU CPU model list: %r" % (missing))
        added = set(qemu_models) - set(cpu_models)
        if added:
            logging.info("Extra CPU models in QEMU CPU listing: %s", added)

    def compare_cpuid_output(a, b):
        """
        Generates a list of (bit, va, vb) tuples for
        each bit that is different between a and b.
        """
        for bit in range(32):
            ba = (a & (1 << bit)) >> bit
            if b is not None:
                bb = (b & (1 << bit)) >> bit
            else:
                bb = None
            if ba != bb:
                yield (bit, ba, bb)

    def parse_cpuid_dump(output):
        dbg("parsing cpuid dump: %r", output)
        cpuid_re = re.compile(
            "^ *(0x[0-9a-f]+) +0x([0-9a-f]+): +eax=0x([0-9a-f]+) ebx=0x([0-9a-f]+) ecx=0x([0-9a-f]+) edx=0x([0-9a-f]+)$")
        output_match = re.search('(==START TEST==.*==END TEST==)', output, re.M | re.DOTALL)
        if output_match is None:
            dbg("cpuid dump doesn't follow expected pattern")
            return None
        output = output_match.group(1)
        out_lines = output.splitlines()
        if out_lines[0] != '==START TEST==' or out_lines[-1] != '==END TEST==':
            dbg("cpuid dump doesn't have expected delimiters")
            return None
        if out_lines[1] != 'CPU:':
            dbg("cpuid dump doesn't start with 'CPU:' line")
            return None
        result = {}
        for l in out_lines[2:-1]:
            m = cpuid_re.match(l)
            if m is None:
                dbg("invalid cpuid dump line: %r", l)
                return None
            in_eax = int(m.group(1), 16)
            in_ecx = int(m.group(2), 16)
            result[in_eax, in_ecx, 'eax'] = int(m.group(3), 16)
            result[in_eax, in_ecx, 'ebx'] = int(m.group(4), 16)
            result[in_eax, in_ecx, 'ecx'] = int(m.group(5), 16)
            result[in_eax, in_ecx, 'edx'] = int(m.group(6), 16)
        return result

    def get_test_kernel_cpuid(self, vm):
        vm.resume()

        timeout = float(params.get("login_timeout", 240))
        logging.debug("Will wait for CPUID serial output at %r",
                      vm.serial_console)
        if not utils_misc.wait_for(lambda:
                                   re.search("==END TEST==",
                                             vm.serial_console.get_output()),
                                   timeout, 1):
            raise error.TestFail("Could not get test complete message.")

        test_output = parse_cpuid_dump(vm.serial_console.get_output())
        logging.debug("Got CPUID serial output: %r", test_output)
        if test_output is None:
            raise error.TestFail("Test output signature not found in "
                                 "output:\n %s", vm.serial_console.get_output())
        vm.destroy(gracefully=False)
        return test_output

    def find_cpu_obj(vm):
        """Find path of a valid VCPU object"""
        roots = ['/machine/icc-bridge/icc', '/machine/unattached/device']
        for root in roots:
            for child in vm.monitor.cmd('qom-list', dict(path=root)):
                logging.debug('child: %r', child)
                if child['type'].rstrip('>').endswith('-cpu'):
                    return root + '/' + child['name']

    def get_qom_cpuid(self, vm):
        assert vm.monitor.protocol == "qmp"
        cpu_path = find_cpu_obj(vm)
        logging.debug('cpu path: %r', cpu_path)
        r = {}
        for prop in 'feature-words', 'filtered-features':
            words = vm.monitor.cmd('qom-get', dict(path=cpu_path, property=prop))
            logging.debug('%s property: %r', prop, words)
            for w in words:
                reg = w['cpuid-register'].lower()
                key = (w['cpuid-input-eax'], w.get('cpuid-input-ecx', 0), reg)
                r.setdefault(key, 0)
                r[key] |= w['features']
        return r

    def get_guest_cpuid(self, cpu_model, feature=None, extra_params=None, qom_mode=False):
        if not qom_mode:
            test_kernel_dir = os.path.join(data_dir.get_deps_dir(), "cpuid", "src")
            os.chdir(test_kernel_dir)
            utils.make("cpuid_dump_kernel.bin")

        vm_name = params['main_vm']
        params_b = params.copy()
        if not qom_mode:
            params_b["kernel"] = os.path.join(
                test_kernel_dir, "cpuid_dump_kernel.bin")
        params_b["cpu_model"] = cpu_model
        params_b["cpu_model_flags"] = feature
        del params_b["images"]
        del params_b["nics"]
        if extra_params:
            params_b.update(extra_params)
        env_process.preprocess_vm(self, params_b, env, vm_name)
        vm = env.get_vm(vm_name)
        dbg('is dead: %r', vm.is_dead())
        vm.create()
        self.vm = vm
        if qom_mode:
            return get_qom_cpuid(self, vm)
        else:
            return get_test_kernel_cpuid(self, vm)

    def cpuid_to_vendor(cpuid_dump, idx):
        dst = []
        map(lambda i:
            dst.append((chr(cpuid_dump[idx, 0, 'ebx'] >> (8 * i) & 0xff))),
            range(0, 4))
        map(lambda i:
            dst.append((chr(cpuid_dump[idx, 0, 'edx'] >> (8 * i) & 0xff))),
            range(0, 4))
        map(lambda i:
            dst.append((chr(cpuid_dump[idx, 0, 'ecx'] >> (8 * i) & 0xff))),
            range(0, 4))
        return ''.join(dst)

    def default_vendor(self):
        """
        Boot qemu with specified cpu models and
        verify that CPU vendor matches requested
        """
        cpu_models = cpu_models_to_test()

        vendor = params.get("vendor")
        if vendor is None or vendor == "host":
            cmd = "grep 'vendor_id' /proc/cpuinfo | head -n1 | awk '{print $3}'"
            cmd_result = utils.run(cmd, ignore_status=True)
            vendor = cmd_result.stdout.strip()

        ignore_cpus = set(params.get("ignore_cpu_models", "").split(' '))
        cpu_models = cpu_models - ignore_cpus

        for cpu_model in cpu_models:
            out = get_guest_cpuid(self, cpu_model)
            guest_vendor = cpuid_to_vendor(out, 0x00000000)
            logging.debug("Guest's vendor: " + guest_vendor)
            if guest_vendor != vendor:
                raise error.TestFail("Guest vendor [%s], doesn't match "
                                     "required vendor [%s] for CPU [%s]" %
                                     (guest_vendor, vendor, cpu_model))

    def custom_vendor(self):
        """
        Boot qemu with specified vendor
        """
        has_error = False
        vendor = params["vendor"]

        try:
            out = get_guest_cpuid(self, cpu_model, "vendor=" + vendor)
            guest_vendor0 = cpuid_to_vendor(out, 0x00000000)
            guest_vendor80000000 = cpuid_to_vendor(out, 0x80000000)
            logging.debug("Guest's vendor[0]: " + guest_vendor0)
            logging.debug("Guest's vendor[0x80000000]: " +
                          guest_vendor80000000)
            if guest_vendor0 != vendor:
                raise error.TestFail("Guest vendor[0] [%s], doesn't match "
                                     "required vendor [%s] for CPU [%s]" %
                                     (guest_vendor0, vendor, cpu_model))
            if guest_vendor80000000 != vendor:
                raise error.TestFail("Guest vendor[0x80000000] [%s], "
                                     "doesn't match required vendor "
                                     "[%s] for CPU [%s]" %
                                     (guest_vendor80000000, vendor,
                                      cpu_model))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_to_level(cpuid_dump):
        r = cpuid_dump[0, 0]
        return r['eax']

    def custom_level(self):
        """
        Boot qemu with specified level
        """
        has_error = False
        level = params["level"]
        try:
            out = get_guest_cpuid(self, cpu_model, "level=" + level)
            guest_level = str(cpuid_to_level(out))
            if guest_level != level:
                raise error.TestFail("Guest's level [%s], doesn't match "
                                     "required level [%s]" %
                                     (guest_level, level))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_to_family(cpuid_dump):
        # Intel Processor Identification and the CPUID Instruction
        # http://www.intel.com/Assets/PDF/appnote/241618.pdf
        # 5.1.2 Feature Information (Function 01h)
        eax = cpuid_dump[1, 0]['eax']
        family = (eax >> 8) & 0xf
        if family == 0xf:
            # extract extendend family
            return family + ((eax >> 20) & 0xff)
        return family

    def custom_family(self):
        """
        Boot qemu with specified family
        """
        has_error = False
        family = params["family"]
        try:
            out = get_guest_cpuid(self, cpu_model, "family=" + family)
            guest_family = str(cpuid_to_family(out))
            if guest_family != family:
                raise error.TestFail("Guest's family [%s], doesn't match "
                                     "required family [%s]" %
                                     (guest_family, family))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_to_model(cpuid_dump):
        # Intel Processor Identification and the CPUID Instruction
        # http://www.intel.com/Assets/PDF/appnote/241618.pdf
        # 5.1.2 Feature Information (Function 01h)
        eax = cpuid_dump[1, 0]['eax']
        model = (eax >> 4) & 0xf
        # extended model
        model |= (eax >> 12) & 0xf0
        return model

    def custom_model(self):
        """
        Boot qemu with specified model
        """
        has_error = False
        model = params["model"]
        try:
            out = get_guest_cpuid(self, cpu_model, "model=" + model)
            guest_model = str(cpuid_to_model(out))
            if guest_model != model:
                raise error.TestFail("Guest's model [%s], doesn't match "
                                     "required model [%s]" %
                                     (guest_model, model))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_to_stepping(cpuid_dump):
        # Intel Processor Identification and the CPUID Instruction
        # http://www.intel.com/Assets/PDF/appnote/241618.pdf
        # 5.1.2 Feature Information (Function 01h)
        eax = cpuid_dump[1, 0]['eax']
        stepping = eax & 0xf
        return stepping

    def custom_stepping(self):
        """
        Boot qemu with specified stepping
        """
        has_error = False
        stepping = params["stepping"]
        try:
            out = get_guest_cpuid(self, cpu_model, "stepping=" + stepping)
            guest_stepping = str(cpuid_to_stepping(out))
            if guest_stepping != stepping:
                raise error.TestFail("Guest's stepping [%s], doesn't match "
                                     "required stepping [%s]" %
                                     (guest_stepping, stepping))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_to_xlevel(cpuid_dump):
        # Intel Processor Identification and the CPUID Instruction
        # http://www.intel.com/Assets/PDF/appnote/241618.pdf
        # 5.2.1 Largest Extendend Function # (Function 80000000h)
        return cpuid_dump[0x80000000, 0x00]['eax']

    def custom_xlevel(self):
        """
        Boot qemu with specified xlevel
        """
        has_error = False
        xlevel = params["xlevel"]
        if params.get("expect_xlevel") is not None:
            xlevel = params.get("expect_xlevel")

        try:
            out = get_guest_cpuid(self, cpu_model, "xlevel=" +
                                  params.get("xlevel"))
            guest_xlevel = str(cpuid_to_xlevel(out))
            if guest_xlevel != xlevel:
                raise error.TestFail("Guest's xlevel [%s], doesn't match "
                                     "required xlevel [%s]" %
                                     (guest_xlevel, xlevel))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_to_model_id(cpuid_dump):
        # Intel Processor Identification and the CPUID Instruction
        # http://www.intel.com/Assets/PDF/appnote/241618.pdf
        # 5.2.3 Processor Brand String (Functions 80000002h, 80000003h,
        # 80000004h)
        m_id = ""
        for idx in (0x80000002, 0x80000003, 0x80000004):
            regs = cpuid_dump[idx, 0]
            for name in ('eax', 'ebx', 'ecx', 'edx'):
                for shift in range(4):
                    c = ((regs[name] >> (shift * 8)) & 0xff)
                    if c == 0:  # drop trailing \0-s
                        break
                    m_id += chr(c)
        return m_id

    def custom_model_id(self):
        """
        Boot qemu with specified model_id
        """
        has_error = False
        model_id = params["model_id"]

        try:
            out = get_guest_cpuid(self, cpu_model, "model_id='%s'" %
                                  model_id)
            guest_model_id = cpuid_to_model_id(out)
            if guest_model_id != model_id:
                raise error.TestFail("Guest's model_id [%s], doesn't match "
                                     "required model_id [%s]" %
                                     (guest_model_id, model_id))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_regs_to_string(cpuid_dump, leaf, idx, regs):
        r = cpuid_dump[leaf, idx]
        signature = ""
        for i in regs:
            for shift in range(0, 4):
                c = chr((r[i] >> (shift * 8)) & 0xFF)
                if c in string.printable:
                    signature = signature + c
                else:
                    signature = "%s\\x%02x" % (signature, ord(c))
        logging.debug("(%s.%s:%s: signature: %s" % (leaf, idx, str(regs),
                                                    signature))
        return signature

    def cpuid_signature(self):
        """
        test signature in specified leaf:index:regs
        """
        has_error = False
        flags = params.get("flags", "")
        leaf = int(params.get("leaf", "0x40000000"), 0)
        idx = int(params.get("index", "0x00"), 0)
        regs = params.get("regs", "ebx ecx edx").split()
        signature = params["signature"]
        try:
            out = get_guest_cpuid(self, cpu_model, flags)
            _signature = cpuid_regs_to_string(out, leaf, idx, regs)
            if _signature != signature:
                raise error.TestFail("Guest's signature [%s], doesn't"
                                     "match required signature [%s]" %
                                     (_signature, signature))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_bit_test(self):
        """
        test bits in specified leaf:func:reg
        """
        has_error = False
        flags = params.get("flags", "")
        leaf = int(params.get("leaf", "0x40000000"), 0)
        idx = int(params.get("index", "0x00"), 0)
        reg = params.get("reg", "eax")
        bits = params["bits"].split()
        try:
            out = get_guest_cpuid(self, cpu_model, flags)
            r = out[leaf, idx][reg]
            logging.debug("CPUID(%s.%s).%s=0x%08x" % (leaf, idx, reg, r))
            for i in bits:
                if (r & (1 << int(i))) == 0:
                    raise error.TestFail("CPUID(%s.%s).%s[%s] is not set" %
                                         (leaf, idx, reg, i))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def cpuid_reg_test(self):
        """
        test register value in specified leaf:index:reg
        """
        has_error = False
        flags = params.get("flags", "")
        leaf = int(params.get("leaf", "0x00"), 0)
        idx = int(params.get("index", "0x00"), 0)
        reg = params.get("reg", "eax")
        val = int(params["value"], 0)
        try:
            out = get_guest_cpuid(self, cpu_model, flags)
            r = out[leaf, idx][reg]
            logging.debug("CPUID(%s.%s).%s=0x%08x" % (leaf, idx, reg, r))
            if r != val:
                raise error.TestFail("CPUID(%s.%s).%s is not 0x%08x" %
                                     (leaf, idx, reg, val))
        except:
            has_error = True
            if xfail is False:
                raise
        if (has_error is False) and (xfail is True):
            raise error.TestFail("Test was expected to fail, but it didn't")

    def check_cpuid_dump(self):
        """
        Compare full CPUID dump data
        """
        machine_type = params.get("machine_type_to_check", "")
        kvm_enabled = params.get("enable_kvm", "yes") == "yes"

        ignore_cpuid_leaves = params.get("ignore_cpuid_leaves", "")
        ignore_cpuid_leaves = ignore_cpuid_leaves.split()
        whitelist = []
        for l in ignore_cpuid_leaves:
            l = l.split(',')
            # syntax of ignore_cpuid_leaves:
            # <in_eax>[,<in_ecx>[,<register>[ ,<bit>]]] ...
            for i in 0, 1, 3:  # integer fields:
                if len(l) > i:
                    l[i] = int(l[i], 0)
            whitelist.append(tuple(l))

        if not machine_type:
            raise error.TestNAError("No machine_type_to_check defined")
        cpu_model_flags = params.get('cpu_model_flags', '')
        full_cpu_model_name = cpu_model
        if cpu_model_flags:
            full_cpu_model_name += ','
            full_cpu_model_name += cpu_model_flags.lstrip(',')
        ref_file = os.path.join(data_dir.get_deps_dir(), 'cpuid',
                                "cpuid_dumps",
                                kvm_enabled and "kvm" or "nokvm",
                                machine_type, '%s-dump.txt' % (full_cpu_model_name))
        if not os.path.exists(ref_file):
            raise error.TestNAError("no cpuid dump file: %s" % (ref_file))
        reference = open(ref_file, 'r').read()
        if not reference:
            raise error.TestNAError(
                "no cpuid dump data on file: %s" % (ref_file))
        reference = parse_cpuid_dump(reference)
        if reference is None:
            raise error.TestNAError(
                "couldn't parse reference cpuid dump from file; %s" % (ref_file))
        qom_mode = params.get('qom_mode', "no").lower() == 'yes'
        if not qom_mode:
            cpu_model_flags += ',enforce'
        try:

            out = get_guest_cpuid(
                self, cpu_model, cpu_model_flags,
                extra_params=dict(machine_type=machine_type, smp=1),
                qom_mode=qom_mode)
        except (virt_vm.VMStartError, virt_vm.VMCreateError) as e:
            output = getattr(e, 'reason', getattr(e, 'output', ''))
            if "host doesn't support requested feature:" in output \
                or ("host cpuid" in output and
                    ("lacks requested flag" in output or
                     "flag restricted to guest" in output)) \
                    or ("Unable to find CPU definition:" in output):
                raise error.TestNAError(
                    "Can't run CPU model %s on this host" % (full_cpu_model_name))
            else:
                raise
        dbg('ref_file: %r', ref_file)
        dbg('ref: %r', reference)
        dbg('out: %r', out)
        ok = True
        for k in reference.keys():
            in_eax, in_ecx, reg = k
            diffs = compare_cpuid_output(reference[k], out.get(k))
            for d in diffs:
                bit, vreference, vout = d
                whitelisted = (in_eax,) in whitelist \
                    or (in_eax, in_ecx) in whitelist \
                    or (in_eax, in_ecx, reg) in whitelist \
                    or (in_eax, in_ecx, reg, bit) in whitelist
                silent = False

                if vout is None and params.get('ok_missing', 'no') == 'yes':
                    whitelisted = True
                    silent = True

                if not silent:
                    info(
                        "Non-matching bit: CPUID[0x%x,0x%x].%s[%d]: found %s instead of %s%s",
                        in_eax, in_ecx, reg, bit, vout, vreference,
                        whitelisted and " (whitelisted)" or "")

                if not whitelisted:
                    ok = False
        if not ok:
            raise error.TestFail("Unexpected CPUID data")

    # subtests runner
    test_type = params["test_type"]
    if test_type not in locals():
        raise error.TestError("Test function '%s' is not defined in"
                              " test" % test_type)

    test_func = locals()[test_type]
    return test_func(test)

Example 43

Project: python-beaver Source File: config.py
    def _parse(self, args):
        def _main_parser(config):
            transpose = ['config', 'confd_path', 'debug', 'daemonize', 'files', 'format', 'fqdn', 'hostname', 'path', 'pid', 'transport']
            namspace_dict = vars(args)
            for key in transpose:
                if key not in namspace_dict or namspace_dict[key] is None or namspace_dict[key] == '':
                    continue

                config[key] = namspace_dict[key]

            if args.mode:
                config['zeromq_bind'] = args.mode

            # HACK: Python 2.6 ConfigParser does not properly
            #       handle non-string values
            for key in config:
                if config[key] == '':
                    config[key] = None

            require_bool = ['debug', 'daemonize', 'fqdn', 'rabbitmq_exchange_durable', 'rabbitmq_queue_durable',
                            'rabbitmq_ha_queue', 'rabbitmq_ssl', 'tcp_ssl_enabled', 'tcp_ssl_verify']

            for key in require_bool:
                config[key] = bool(int(config[key]))

            require_int = [
                'max_failure',
                'max_queue_size',
                'queue_timeout',
                'rabbitmq_port',
                'rabbitmq_timeout',
                'rabbitmq_delivery_mode',
                'respawn_delay',
                'subprocess_poll_sleep',
                'refresh_worker_process',
                'tcp_port',
                'udp_port',
                'wait_timeout',
                'zeromq_hwm',
                'logstash_version',
                'kafka_batch_n',
                'kafka_batch_t',
                'kafka_ack_timeout',
                'number_of_consumer_processes',
                'ignore_old_files'
            ]
            for key in require_int:
                if config[key] is not None:
                    config[key] = int(config[key])

            require_float = [
                'update_file_mapping_time',
                'discover_interval',
            ]

            for key in require_float:
                if config[key] is not None:
                    config[key] = float(config[key])

            if config.get('format') == 'null':
                config['format'] = 'raw'

            if config['files'] is not None and type(config['files']) == str:
                config['files'] = config['files'].split(',')

            if config['path'] is not None:
                config['path'] = os.path.realpath(config['path'])
                if not os.path.isdir(config['path']):
                    raise LookupError('{0} does not exist'.format(config['path']))

            if config.get('hostname') is None:
                if config.get('fqdn') is True:
                    config['hostname'] = socket.getfqdn()
                else:
                    config['hostname'] = socket.gethostname()

            if config.get('sincedb_path'):
                config['sincedb_path'] = os.path.realpath(config.get('sincedb_path'))

            if config['zeromq_address'] and type(config['zeromq_address']) == str:
                config['zeromq_address'] = [x.strip() for x in config.get('zeromq_address').split(',')]

            if config.get('ssh_options') is not None:
                csv = config.get('ssh_options')
                config['ssh_options'] = []
                if type(csv) == str:
                    for opt in csv.split(','):
                        config['ssh_options'].append('-o %s' % opt.strip())
            else:
                config['ssh_options'] = []

            config['globs'] = {}

            return config

        def _section_parser(config, raise_exceptions=True):
            '''Parse a given INI-style config file using ConfigParser module.
            Stanza's names match file names, and properties are defaulted as in
            http://logstash.net/docs/1.1.1/inputs/file

            Config file example:

            [/var/log/syslog]
            type: syslog
            tags: sys,main

            [/var/log/auth]
            type: syslog
            ;tags: auth,main
            '''

            fields = config.get('add_field', '')
            if type(fields) != dict:
                try:
                    if type(fields) == str:
                        fields = filter(None, fields.split(','))
                    if len(fields) == 0:
                        config['fields'] = {}
                    elif (len(fields) % 2) == 1:
                        if raise_exceptions:
                            raise Exception('Wrong number of values for add_field')
                    else:
                        fieldkeys = fields[0::2]
                        fieldvalues = [[x] for x in fields[1::2]]
                        config['fields'] = dict(zip(fieldkeys, fieldvalues))
                except TypeError:
                    config['fields'] = {}

            if 'add_field' in config:
                del config['add_field']

            envFields = config.get('add_field_env', '')
            if type(envFields) != dict:
                try:
                    if type(envFields) == str:
                        envFields = envFields.replace(" ","")
                        envFields = filter(None, envFields.split(','))
                    if len(envFields) == 0:
                        config['envFields'] = {}
                    elif (len(envFields) % 2) == 1:
                        if raise_exceptions:
                            raise Exception('Wrong number of values for add_field_env')
                    else:
                        envFieldkeys = envFields[0::2]
                        envFieldvalues = []
                        for x in envFields[1::2]:
                            envFieldvalues.append(os.environ.get(x))
                        config['fields'].update(dict(zip(envFieldkeys, envFieldvalues)))
                except TypeError:
                    config['envFields'] = {}

            if 'add_field_env' in config:
                del config['add_field_env']

            try:
                tags = config.get('tags', '')
                if type(tags) == str:
                    tags = filter(None, tags.split(','))
                if len(tags) == 0:
                    tags = []
                config['tags'] = tags
            except TypeError:
                config['tags'] = []

            if config.get('format') == 'null':
                config['format'] = 'raw'

            file_type = config.get('type', None)
            if not file_type:
                config['type'] = 'file'

            require_bool = ['debug', 'ignore_empty', 'ignore_truncate']
            for k in require_bool:
                config[k] = bool(int(config[k]))

            config['delimiter'] = config['delimiter'].decode('string-escape')

            if config['multiline_regex_after']:
                config['multiline_regex_after'] = re.compile(config['multiline_regex_after'])
            if config['multiline_regex_before']:
                config['multiline_regex_before'] = re.compile(config['multiline_regex_before'])

            require_int = ['sincedb_write_interval', 'stat_interval', 'tail_lines']
            for k in require_int:
                config[k] = int(config[k])

            return config

        conf = Configuration(
            name='beaver',
            path=self._configfile,
            main_defaults=self._main_defaults,
            section_defaults=self._section_defaults,
            main_parser=_main_parser,
            section_parser=_section_parser,
            path_from_main='confd_path',
            config_parser=self._config_parser
        )

        config = conf.raw()
        self._beaver_config = config['beaver']
        self._file_config = config['sections']

        self._main_parser = _main_parser(self._main_defaults)
        self._section_defaults = _section_parser(self._section_defaults, raise_exceptions=False)

        self._files = {}
        for section in config['sections']:
            globs = eglob(section, config['sections'][section].get('exclude', ''))
            if not globs:
                self._logger.debug('Skipping glob due to no files found: %s' % section)
                continue

            for globbed_file in globs:
                self._files[os.path.realpath(globbed_file)] = config['sections'][section]

Example 44

Project: powerline Source File: bat.py
def _fetch_battery_info(pl):
	try:
		import dbus
	except ImportError:
		pl.debug('Not using DBUS+UPower as dbus is not available')
	else:
		try:
			bus = dbus.SystemBus()
		except Exception as e:
			pl.exception('Failed to connect to system bus: {0}', str(e))
		else:
			interface = 'org.freedesktop.UPower'
			try:
				up = bus.get_object(interface, '/org/freedesktop/UPower')
			except dbus.exceptions.DBusException as e:
				if getattr(e, '_dbus_error_name', '').endswith('ServiceUnknown'):
					pl.debug('Not using DBUS+UPower as UPower is not available via dbus')
				else:
					pl.exception('Failed to get UPower service with dbus: {0}', str(e))
			else:
				devinterface = 'org.freedesktop.DBus.Properties'
				devtype_name = interface + '.Device'
				devices = []
				for devpath in up.EnumerateDevices(dbus_interface=interface):
					dev = bus.get_object(interface, devpath)
					devget = lambda what: dev.Get(
						devtype_name,
						what,
						dbus_interface=devinterface
					)
					if int(devget('Type')) != 2:
						pl.debug('Not using DBUS+UPower with {0}: invalid type', devpath)
						continue
					if not bool(devget('IsPresent')):
						pl.debug('Not using DBUS+UPower with {0}: not present', devpath)
						continue
					if not bool(devget('PowerSupply')):
						pl.debug('Not using DBUS+UPower with {0}: not a power supply', devpath)
						continue
					devices.append(devpath)
					pl.debug('Using DBUS+UPower with {0}', devpath)
				if devices:
					def _flatten_battery(pl):
						energy = 0.0
						energy_full = 0.0
						state = True
						for devpath in devices:
							dev = bus.get_object(interface, devpath)
							energy_full += float(
								dbus.Interface(dev, dbus_interface=devinterface).Get(
									devtype_name,
									'EnergyFull'
								),
							)
							energy += float(
								dbus.Interface(dev, dbus_interface=devinterface).Get(
									devtype_name,
									'Energy'
								),
							)
							state &= dbus.Interface(dev, dbus_interface=devinterface).Get(
								devtype_name,
								'State'
							) != 2
						return (energy * 100.0 / energy_full), state
					return _flatten_battery
				pl.debug('Not using DBUS+UPower as no batteries were found')

	if os.path.isdir('/sys/class/power_supply'):
		linux_energy_full_fmt = '/sys/class/power_supply/{0}/energy_full'
		linux_energy_fmt = '/sys/class/power_supply/{0}/energy_now'
		linux_status_fmt = '/sys/class/power_supply/{0}/status'
		devices = []
		for linux_supplier in os.listdir('/sys/class/power_supply'):
			energy_path = linux_energy_fmt.format(linux_supplier)
			if not os.path.exists(energy_path):
				continue
			pl.debug('Using /sys/class/power_supply with battery {0}', linux_supplier)
			devices.append(linux_supplier)
		if devices:
			def _get_battery_status(pl):
				energy = 0.0
				energy_full = 0.0
				state = True
				for device in devices:
					with open(linux_energy_full_fmt.format(device), 'r') as f:
						energy_full += int(float(f.readline().split()[0]))
					with open(linux_energy_fmt.format(device), 'r') as f:
						energy += int(float(f.readline().split()[0]))
					try:
						with open(linux_status_fmt.format(device), 'r') as f:
							state &= (f.readline().strip() != 'Discharging')
					except IOError:
						state = None
				return (energy * 100.0 / energy_full), state
			return _get_battery_status
			pl.debug('Not using /sys/class/power_supply as no batteries were found')
	else:
		pl.debug('Not using /sys/class/power_supply: no directory')

	try:
		from shutil import which  # Python-3.3 and later
	except ImportError:
		pl.info('Using dumb “which” which only checks for file in /usr/bin')
		which = lambda f: (lambda fp: os.path.exists(fp) and fp)(os.path.join('/usr/bin', f))

	if which('pmset'):
		pl.debug('Using pmset')

		BATTERY_PERCENT_RE = re.compile(r'(\d+)%')

		def _get_battery_status(pl):
			battery_summary = run_cmd(pl, ['pmset', '-g', 'batt'])
			battery_percent = BATTERY_PERCENT_RE.search(battery_summary).group(1)
			ac_charging = 'AC' in battery_summary
			return int(battery_percent), ac_charging
		return _get_battery_status
	else:
		pl.debug('Not using pmset: executable not found')

	if sys.platform.startswith('win') or sys.platform == 'cygwin':
		# From http://stackoverflow.com/a/21083571/273566, reworked
		try:
			from win32com.client import GetObject
		except ImportError:
			pl.debug('Not using win32com.client as it is not available')
		else:
			try:
				wmi = GetObject('winmgmts:')
			except Exception as e:
				pl.exception('Failed to run GetObject from win32com.client: {0}', str(e))
			else:
				for battery in wmi.InstancesOf('Win32_Battery'):
					pl.debug('Using win32com.client with Win32_Battery')

					def _get_battery_status(pl):
						# http://msdn.microsoft.com/en-us/library/aa394074(v=vs.85).aspx
						return battery.EstimatedChargeRemaining, battery.BatteryStatus == 6

					return _get_battery_status
				pl.debug('Not using win32com.client as no batteries were found')
		from ctypes import Structure, c_byte, c_ulong, byref
		if sys.platform == 'cygwin':
			pl.debug('Using cdll to communicate with kernel32 (Cygwin)')
			from ctypes import cdll
			library_loader = cdll
		else:
			pl.debug('Using windll to communicate with kernel32 (Windows)')
			from ctypes import windll
			library_loader = windll

		class PowerClass(Structure):
			_fields_ = [
				('ACLineStatus', c_byte),
				('BatteryFlag', c_byte),
				('BatteryLifePercent', c_byte),
				('Reserved1', c_byte),
				('BatteryLifeTime', c_ulong),
				('BatteryFullLifeTime', c_ulong)
			]

		def _get_battery_status(pl):
			powerclass = PowerClass()
			result = library_loader.kernel32.GetSystemPowerStatus(byref(powerclass))
			# http://msdn.microsoft.com/en-us/library/windows/desktop/aa372693(v=vs.85).aspx
			if result:
				return None
			return powerclass.BatteryLifePercent, powerclass.ACLineStatus == 1

		if _get_battery_status() is None:
			pl.debug('Not using GetSystemPowerStatus because it failed')
		else:
			pl.debug('Using GetSystemPowerStatus')

		return _get_battery_status

	raise NotImplementedError

Example 45

Project: airship Source File: airship.py
Function: sync
def sync():
    games = [gameobj({  # The Banner Saga
        'regexformats': {
            'base': (r'^[0-4]/(resume|sav_(chapter[1235]|(leaving)?(einartof' +
                     r't|frostvellr)|(dengl|dund|hridvaldy|radormy|skog)r|bj' +
                     r'orulf|boersgard|finale|grofheim|hadeborg|ingrid|marek' +
                     r'|ridgehorn|sigrholm|stravhs|wyrmtoe))\.(bmpzip|png|im' +
                     r'g|save\.json)$')
        },
        'folder': 'save/saga1',
        'modules': {
            'steamcloud': {
                'id': '237990'
            },
            'icloud': {
                'id': 'MQ92743Y4D~com~stoicstudio~BannerSaga'
            }
        },
        'read': bannersaga_read,
        'write': bannersaga_write
    }), gameobj({  # Transistor
        'regexformats': {
            'base': r'^[Pp]rofile[1-5]\.sav$'
        },
        'modules': {
            'steamcloud': {
                'id': '237930'
            },
            'icloud': {
                'id': 'GPYC69L4CR~iCloud~com~supergiantgames~transistor',
                'folder': 'Docuements',
            }
        },
        'read': transistor_read,
        'write': transistor_write
    }), gameobj({  # Costume Quest
        'regexformats': {
            'base': r'^CQ(_DLC)?_save_[012]$',
            'timeplayed': b'^.+(;TimePlayed=([1-9]*[0-9](\.[0-9]+)?)).*$',
            'level': b'worlds\/([a-z_]+)\/\1'
        },
        'modules': {
            'steamcloud': {
                'id': '115100'
            },
            'icloud': {
                'id': '8VM2L59D89~com~doublefine~cqios',
                'folder': 'Docuements'
            }
        },
        'read': costumequest_read,
        'write': costumequest_write
    }), gameobj({  # Race the Sun
        'regexformats': {
            'base': r'^(savegame|rts_save)\.xml$'
        },
        'modules': {
            'steamcloud': {
                'id': '253030'
            },
            'icloud': {
                'id': 'iCloud~com~flippfly~racethesun',
                'folder': 'Docuements'
            }
        },
        'read': racethesun_read,
        'write': racethesun_write
    })]

    if len(modules) > 1:

        workingmodules = {}
        modulenum = 0

        for module in modules:
            if module.init():
                workingmodules[modulename(module.__name__)] = module
                modulenum += 1

        if modulenum > 1:

            for game in games:
                gamemodules = []
                metadata = {}
                cancontinue = True

                for module in modules:
                    name = modulename(module.__name__)
                    if name in game['modules']:
                        if name not in workingmodules:
                            cancontinue = False
                            break
                        else:
                            module = workingmodules[name]

                            if ('folder' in game['modules'][name] or
                                    'folder' in game):
                                module.set_folder(game['folder']
                                                  if 'folder' not in
                                                  game['modules'][name] else
                                                  game['modules'][name]
                                                                 ['folder'])

                            module.set_id(game['modules'][name]['id'])

                            if module.will_work():
                                gamemodules.append(module)
                            else:
                                module.shutdown()
                                cancontinue = False
                                break

                if cancontinue:
                    regexes = {}
                    filetimestamps = {}
                    filedata = {}
                    files = {}
                    for regex in game['regexformats']:
                        if regex == 'base':
                            fileregex = re.compile(game['regexformats']
                                                       ['base'])
                        else:
                            regexes[regex] = re.compile(game['regexformats']
                                                            [regex])
                    moduleindex = 0
                    while cancontinue and moduleindex < len(gamemodules):
                        cancontinue = False
                        for filename in (gamemodules[moduleindex]
                                         .get_file_names()):
                            if fileregex.match(filename):
                                readobject = (game['read']
                                              (filename,
                                               gamemodules[moduleindex]
                                               .get_file_timestamp(filename),
                                               gamemodules[moduleindex]
                                               .read_file(filename),
                                               modulename(
                                                 gamemodules[moduleindex]
                                                 .__name__), regexes))
                                metadata.update(readobject[1])
                                for (itemfilename, itemfiletimestamp,
                                        itemfiledata) in readobject[0]:
                                    if fileregex.match(itemfilename):
                                        if itemfilename not in filetimestamps:
                                            filetimestamps[itemfilename] = \
                                                [-1] * len(gamemodules)
                                        (filetimestamps[itemfilename]
                                            [moduleindex]) = itemfiletimestamp
                                        if itemfilename not in filedata:
                                            filedata[itemfilename] = \
                                                [b''] * len(gamemodules)
                                        filedata[itemfilename][moduleindex] = \
                                            itemfiledata
                                        cancontinue = True
                        moduleindex += 1
                    if cancontinue:
                        for filename in filetimestamps:
                            for timestamp in filetimestamps[filename]:
                                if timestamp == 0:
                                    cancontinue = False
                                    break
                            if cancontinue:
                                newerfilesmayexist = True
                                highestlowtimestamp = -1
                                if cancontinue:
                                    while newerfilesmayexist:
                                        newerfilesmayexist = False
                                        lowesttimestamp = 2000000000
                                        lowesttimestampindex = -1
                                        for moduleindex in \
                                                range(len(gamemodules)):
                                            if (highestlowtimestamp <
                                                filetimestamps[filename]
                                                              [moduleindex] <
                                                lowesttimestamp and
                                                filetimestamps[filename]
                                                              [moduleindex] >
                                                    0):
                                                lowesttimestamp = \
                                                    (filetimestamps[filename]
                                                        [moduleindex])
                                                lowesttimestampindex = \
                                                    moduleindex
                                        if lowesttimestampindex != -1:
                                            newerfilesmayexist = True
                                            highestlowtimestamp = \
                                                lowesttimestamp
                                            for moduleindex in \
                                                    range(len(gamemodules)):
                                                if (moduleindex !=
                                                    lowesttimestampindex and
                                                    filetimestamps[filename]
                                                        [moduleindex] > 0 and
                                                    filedata[filename]
                                                        [lowesttimestampindex]
                                                        ==
                                                    filedata[filename]
                                                        [moduleindex]):
                                                    (filetimestamps[filename]
                                                        [moduleindex]) = \
                                                        lowesttimestamp

                                    highesttimestamp = -1
                                    highesttimestampindex = -1
                                    for moduleindex in range(len(gamemodules)):
                                        if (filetimestamps[filename]
                                                [moduleindex] >
                                                highesttimestamp):
                                            highesttimestamp = \
                                                (filetimestamps
                                                    [filename][moduleindex])
                                            highesttimestampindex = moduleindex
                                    files[filename] = \
                                        (filedata[filename]
                                         [highesttimestampindex],
                                         highesttimestamp)
                                    for moduleindex in range(len(gamemodules)):
                                        if (moduleindex !=
                                            highesttimestampindex and
                                            filetimestamps[filename]
                                            [moduleindex] <
                                                highesttimestamp):
                                            writeobject = (game['write']
                                                           (filename,
                                                            files[filename][0],
                                                            modulename(
                                                                gamemodules
                                                                [moduleindex]
                                                                .__name__),
                                                            metadata, regexes))
                                            if writeobject[0]:
                                                (gamemodules[moduleindex]
                                                    .write_file(
                                                        *writeobject[1]))
                        game['after'](files, workingmodules, metadata)
                for module in gamemodules:
                    module.shutdown()

Example 46

Project: dirigible-spreadsheet Source File: test_2787_SignUp.py
    def test_can_sign_up_from_signup_page(self):
        # Harold goes to the Dirigible home page
        self.go_to_url('/')

        # He notes that there are two "sign up" links, both pointing to the same URL
        self.click_link("id_signup_link")
        signup_url = self.browser.current_url
        self.go_to_url('/')

        # He follows one of them
        self.click_link("id_signup_call_to_action")
        self.assertEquals(self.browser.current_url, signup_url)

        # He notices a "sign up" form that requires a username, an email address,
        # and two copies of the same password.
        self.assertTrue(self.is_element_present(
            'css=input#id_username'))
        self.assertTrue(self.is_element_present(
            'css=input#id_email'))
        self.assertTrue(self.is_element_present(
            'css=input#id_password1'))
        self.assertTrue(self.is_element_present(
            'css=input#id_password2'))
        self.assertTrue(self.is_element_present(
            'css=input#id_signup_button'))
        self.assertEquals(
            self.selenium.get_attribute(
                'css=#id_signup_button@value'),
            'Sign up')
        self.assertEquals(
            self.selenium.get_attribute(
                'css=input#id_signup_button@type'),
            'submit')

        # Being an awkward sod, he tries to sign up with no details.
        self.click_link('id_signup_button')

        # He is told off.
        self.assertEquals(
            self.get_text('id=id_username_error'),
            "Please enter a username."
        )
        self.assertEquals(
            self.get_text('id=id_email_error'),
            "Please enter your email address."
        )
        self.assertEquals(
            self.get_text('id=id_password1_error'),
            "Please enter a password."
        )
        self.assertEquals(
            self.get_text('id=id_password2_error'),
            "Please enter a password."
        )

        # He tries again, this time using his friend's username,
        # but entering sensible details for everything else.
        username = self.get_my_username() + "_x"
        duplicate_username = self.get_my_username()
        self.email_address = 'harold.testuser-%[email protected]' % (self.get_my_username(),)
        password = 'p4ssw0rd'
        self.selenium.type(
            'id=id_username',
            duplicate_username)
        self.selenium.type(
            'id=id_email',
            self.email_address)
        self.selenium.type(
            'id=id_password1',
            password)
        self.selenium.type(
            'id=id_password2',
            password)
        self.click_link('id_signup_button')

        # He is told off.
        self.assertEquals(
            self.get_text('id=id_username_error'),
            "This username is already taken. Please choose another."
        )

        # He tries again with a unique username but mistypes the email address
        self.selenium.type(
            'id=id_username',
            username)
        self.selenium.type(
            'id=id_email',
            '@@@@@')
        self.selenium.type(
            'id=id_password1',
            password)
        self.selenium.type(
            'id=id_password2',
            password)
        self.click_link('id_signup_button')

        # He is told off.
        self.assertEquals(
            self.get_text('id=id_email_error'),
            "Please enter a valid email address."
        )

        # He tries again with a unique username but mistypes the password
        self.selenium.type(
            'id=id_username',
            username)
        self.selenium.type(
            'id=id_email',
            self.email_address)
        self.selenium.type(
            'id=id_password1',
            password)
        self.selenium.type(
            'id=id_password2',
            "hello")
        ## Do the last character using native keypresses to make sure that
        ## all of our client-side validation JS really gets called
        self.selenium.focus('id=id_password2')
        self.human_key_press(key_codes.NUMBER_1)

        # Even before he submits the form, the page is grumbling at him
        self.wait_for(
            self.are_password_fields_showing_error,
            lambda : "Password error to appear"
        )

        # With misplaced self-confidence, he goes ahead and clicks the button
        self.click_link('id_signup_button')

        # He is told off.
        self.assertEquals(
            self.get_text('id=id_non_field_errors'),
            "You must type the same password each time"
        )

        # He finally does it correctly
        self.selenium.type(
            'id=id_username',
            username)
        self.selenium.type(
            'id=id_email',
            self.email_address)
        self.selenium.type(
            'id=id_password1',
            password)
        self.selenium.type(
            'id=id_password2',
            password)

        # Before he clicks the link, he confirms that there is no error in the password fields
        self.wait_for(
            lambda : not self.are_password_fields_showing_error(),
            lambda : "Password errors to not be there"
        )

        self.click_link('id_signup_button')

        # He gets a message saying "Thank you" that tells him that an email has been
        # sent to his address.
        self.assertTrue('Thank you' in self.selenium.get_body_text())
        self.assertTrue(self.email_address in self.selenium.get_body_text())

        # There is a link to the Dirigible home page, which he follows and discovers
        # that it works.
        self.click_link('id_link_home')
        self.assertEquals(self.browser.current_url, Url.ROOT)

        # He checks his email, and after a short wait finds a message
        # from the Dirigible server, that looks like the following string:
        email_from, email_to, subject, message = self.pop_email_for_client(self.email_address)
        self.assertEquals(email_to, self.email_address)
        self.assertEquals(email_from, '[email protected]')
        self.assertEquals(subject, 'Dirigible Beta Sign-up')
        self.assertTrue('Click on the following link' in message)
        confirm_url_re = re.compile(
            r'<(http://projectdirigible\.com/signup/activate/[^>]+)>')
        match = confirm_url_re.search(message)
        self.assertTrue(match)
        confirmation_url = match.group(1).replace('projectdirigible.com', SERVER_IP)

        # He decides to type the confirmation link manually into his browser and,
        # inevitably, gets it completely wrong
        self.go_to_url(urljoin(Url.ROOT, '/signup/activate/wibble'))

        # He's given a kindly warning.
        self.assertTrue('the activation link you used was not recognised' in self.selenium.get_body_text())

        # He clicks on the link in the email instead
        self.go_to_url(confirmation_url)

        body_text = self.selenium.get_body_text()
        # He's taken to a page that welcomes him aboard and allows him to log in.
        self.assertTrue(
            'Welcome to Dirigible' in body_text,
            'could not find "Welcome to Dirigible" on page.  URL:<%s>, body text:\n%s' % (confirmation_url, body_text[:-100])
        )

        # He logs in, using the fields on the page.
        self.login(username, password, already_on_login_page=True)

        # He is taken to his dashboard
        self.assertEquals(self.browser.title, "%s's Dashboard: Dirigible" % (username,))
        _, __, path, ___, ____, _____ = urlparse(self.browser.current_url)
        self.assertEquals(path, '/')

        # He's super keen to get in on the Dirigible action, so when he sees the
        # link saying "Create new sheet", he clicks it with gusto
        self.click_link('id_create_new_sheet')

        # He sees a dialog box promoting the tutorial
        self.wait_for_element_visibility('id_tutorial_promo_dialog', True)
        dialog_text = self.get_text('id=id_tutorial_promo_dialog')
        self.assertTrue('tutorial' in dialog_text.lower())

        # He notes that even when the spinner stops, the focus stays on the dialog's OK
        # button
        self.wait_for_spinner_to_stop()
        self.assertTrue(
                self.is_element_focused('css=#id_tutorial_promo_dialog_close')
        )

        # He notices a link to the tutorial inside the dialog
        tutorial_link_inside_dialog_locator = 'css=#id_tutorial_promo_dialog a#id_tutorial_link'
        self.wait_for_element_to_appear(tutorial_link_inside_dialog_locator)
        tutorial_link_url = self.selenium.get_attribute('%s@href' % (tutorial_link_inside_dialog_locator))

        # He clicks the OK button to dismiss the dialog
        self.selenium.click('id=id_tutorial_promo_dialog_close')

        # the dialog disappears
        self.wait_for_element_visibility('id=id_tutorial_promo_dialog', False)

        # he goes to the tutorial url he remembers from earlier
        self.go_to_url(tutorial_link_url)

        # He finds himself on a page which contains the first tutorial
        expected_title = 'Tutorial part 1: First steps, adding Python to a spreadsheet'
        self.assertTrue(expected_title in self.browser.title)

        # He goes back to the dashboard
        self.go_to_url(Url.ROOT)

        # He creates another sheet, ready to get annoyed if he sees that dialog again...
        self.click_link('id_create_new_sheet')

        # And is happy that it isn't there,
        self.wait_for_grid_to_appear()
        self.assertFalse(
                self.is_element_present('id=id_tutorial_promo_dialog')
        )

        # He logs out
        self.logout()

        # He decides that he enjoyed confirming his account so much, he'd like to
        # do it again.
        self.go_to_url(confirmation_url)

        # He's given a kindly warning.
        self.assertTrue('your account might already be activated' in self.selenium.get_body_text())

        # When he returns to his email app, he sees a second email from us,
        email_from, email_to, subject, message = self.pop_email_for_client(self.email_address)
        self.assertEquals(email_to, self.email_address)
        self.assertEquals(email_from, '[email protected]')
        self.assertEquals(subject, 'Welcome to Dirigible')

        # pointing him towards the tutorial.
        self.assertTrue('tutorial' in message.lower())
        self.assertTrue('/docuementation/tutorial01.html' in message)

        # We also recommend that he subscribe to the Dirigible blog
        self.assertTrue('blog.projectdirigible.com' in message)

        # or follows us on Twitter
        self.assertTrue('twitter.com/dirigiblegrid' in message)

        # Satisfied, he goes back to sleep.
        self.assertTrue('sleep')

Example 47

Project: tp-libvirt Source File: virsh_nodecpustats.py
Function: run
def run(test, params, env):
    """
    Test the command virsh nodecpustats

    (1) Call the virsh nodecpustats command for all cpu host cpus
        separately
    (2) Get the output
    (3) Check the against /proc/stat output(o) for respective cpu
        user: o[0] + o[1]
        system: o[2] + o[5] + o[6]
        idle: o[3]
        iowait: o[4]
    (4) Call the virsh nodecpustats command with an unexpected option
    (5) Call the virsh nodecpustats command with libvirtd service stop
    """

    def virsh_check_nodecpustats_percpu(actual_stats):
        """
        Check the acual nodecpustats output value
        total time <= system uptime
        """

        # Normalise to seconds from nano seconds
        total = float((actual_stats['system'] + actual_stats['user'] +
                       actual_stats['idle'] + actual_stats['iowait']) / (10 ** 9))
        uptime = float(utils.get_uptime())
        if not total <= uptime:
            raise error.TestFail("Commands 'virsh nodecpustats' not succeeded"
                                 " as total time: %f is more"
                                 " than uptime: %f" % (total, uptime))
        return True

    def virsh_check_nodecpustats(actual_stats, cpu_count):
        """
        Check the acual nodecpustats output value
        total time <= system uptime
        """

        # Normalise to seconds from nano seconds and get for one cpu
        total = float(((actual_stats['system'] + actual_stats['user'] +
                        actual_stats['idle'] + actual_stats['iowait']) / (10 ** 9)) / (
                      cpu_count))
        uptime = float(utils.get_uptime())
        if not total <= uptime:
            raise error.TestFail("Commands 'virsh nodecpustats' not succeeded"
                                 " as total time: %f is more"
                                 " than uptime: %f" % (total, uptime))
        return True

    def virsh_check_nodecpustats_percentage(actual_per):
        """
        Check the actual nodecpustats percentage adds up to 100%
        """

        total = int(round(actual_per['user'] + actual_per['system'] +
                          actual_per['idle'] + actual_per['iowait']))

        if not total == 100:
            raise error.TestFail("Commands 'virsh nodecpustats' not succeeded"
                                 " as the total percentage value: %d"
                                 " is not equal 100" % total)

    def parse_output(output):
        """
        To get the output parsed into a dictionary
        :param virsh command output

        :return: dict of user,system,idle,iowait times
        """

        # From the beginning of a line, group 1 is one or more word-characters,
        # followed by zero or more whitespace characters and a ':',
        # then one or more whitespace characters,
        # followed by group 2, which is one or more digit characters,
        # e.g as below
        # user:                  6163690000000
        #
        regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+)")
        actual = {}

        for line in output.stdout.split('\n'):
            match_obj = regex_obj.search(line)
            # Due to the extra space in the list
            if match_obj is not None:
                name = match_obj.group(1)
                value = match_obj.group(2)
                actual[name] = int(value)
        return actual

    def parse_percentage_output(output):
        """
        To get the output parsed into a dictionary
        :param virsh command output

        :return: dict of user,system,idle,iowait times
        """

        # From the beginning of a line, group 1 is one or more word-characters,
        # followed by zero or more whitespace characters and a ':',
        # then one or more whitespace characters,
        # followed by group 2, which is one or more digit characters,
        # e.g as below
        # user:             1.5%
        #
        regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+.\d+)")
        actual_percentage = {}

        for line in output.stdout.split('\n'):
            match_obj = regex_obj.search(line)
            # Due to the extra space in the list
            if match_obj is not None:
                name = match_obj.group(1)
                value = match_obj.group(2)
                actual_percentage[name] = float(value)
        return actual_percentage

    # Initialize the variables
    itr = int(params.get("inner_test_iterations"))
    option = params.get("virsh_cpunodestats_options")
    invalid_cpunum = params.get("invalid_cpunum")
    status_error = params.get("status_error")
    libvirtd = params.get("libvirtd", "on")

    # Prepare libvirtd service
    if libvirtd == "off":
        utils_libvirtd.libvirtd_stop()

    # Get the host cpu list
    host_cpus_list = utils.cpu_online_map()

    # Run test case for 5 iterations default can be changed in subtests.cfg
    # file
    for i in range(itr):

        if status_error == "yes":
            if invalid_cpunum == "yes":
                option = "--cpu %s" % (len(host_cpus_list) + 1)
            output = virsh.nodecpustats(ignore_status=True, option=option)
            status = output.exit_status

            if status == 0:
                if libvirtd == "off":
                    utils_libvirtd.libvirtd_start()
                    raise error.TestFail("Command 'virsh nodecpustats' "
                                         "succeeded with libvirtd service "
                                         "stopped, incorrect")
                else:
                    raise error.TestFail("Command 'virsh nodecpustats %s' "
                                         "succeeded (incorrect command)" % option)

        elif status_error == "no":
            # Run the testcase for each cpu to get the cpu stats
            for cpu in host_cpus_list:
                option = "--cpu %s" % cpu
                output = virsh.nodecpustats(ignore_status=True, option=option)
                status = output.exit_status

                if status == 0:
                    actual_value = parse_output(output)
                    virsh_check_nodecpustats_percpu(actual_value)
                else:
                    raise error.TestFail("Command 'virsh nodecpustats %s'"
                                         "not succeeded" % option)

            # Run the test case for each cpu to get the cpu stats in percentage
            for cpu in host_cpus_list:
                option = "--cpu %s --percent" % cpu
                output = virsh.nodecpustats(ignore_status=True, option=option)
                status = output.exit_status

                if status == 0:
                    actual_value = parse_percentage_output(output)
                    virsh_check_nodecpustats_percentage(actual_value)
                else:
                    raise error.TestFail("Command 'virsh nodecpustats %s'"
                                         " not succeeded" % option)

            option = ''
            # Run the test case for total cpus to get the cpus stats
            output = virsh.nodecpustats(ignore_status=True, option=option)
            status = output.exit_status

            if status == 0:
                actual_value = parse_output(output)
                virsh_check_nodecpustats(actual_value, len(host_cpus_list))
            else:
                raise error.TestFail("Command 'virsh nodecpustats %s'"
                                     " not succeeded" % option)

            # Run the test case for the total cpus to get the stats in
            # percentage
            option = "--percent"
            output = virsh.nodecpustats(ignore_status=True, option=option)
            status = output.exit_status

            if status == 0:
                actual_value = parse_percentage_output(output)
                virsh_check_nodecpustats_percentage(actual_value)
            else:
                raise error.TestFail("Command 'virsh nodecpustats %s'"
                                     " not succeeded" % option)

    # Recover libvirtd service start
    if libvirtd == "off":
        utils_libvirtd.libvirtd_start()

Example 48

Project: PokemonGo-Map Source File: runserver.py
def main():
    # Patch threading to make exceptions catchable
    install_thread_excepthook()

    # Make sure exceptions get logged
    sys.excepthook = handle_exception

    args = get_args()

    # Add file logging if enabled
    if args.verbose and args.verbose != 'nofile':
        filelog = logging.FileHandler(args.verbose)
        filelog.setFormatter(logging.Formatter('%(asctime)s [%(threadName)16s][%(module)14s][%(levelname)8s] %(message)s'))
        logging.getLogger('').addHandler(filelog)
    if args.very_verbose and args.very_verbose != 'nofile':
        filelog = logging.FileHandler(args.very_verbose)
        filelog.setFormatter(logging.Formatter('%(asctime)s [%(threadName)16s][%(module)14s][%(levelname)8s] %(message)s'))
        logging.getLogger('').addHandler(filelog)

    # Check if we have the proper encryption library file and get its path
    encryption_lib_path = get_encryption_lib_path(args)
    if encryption_lib_path is "":
        sys.exit(1)

    if args.verbose or args.very_verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)

    # Let's not forget to run Grunt / Only needed when running with webserver
    if not args.no_server:
        if not os.path.exists(os.path.join(os.path.dirname(__file__), 'static/dist')):
            log.critical('Missing front-end assets (static/dist) -- please run "npm install && npm run build" before starting the server')
            sys.exit()

    # These are very noisey, let's shush them up a bit
    logging.getLogger('peewee').setLevel(logging.INFO)
    logging.getLogger('requests').setLevel(logging.WARNING)
    logging.getLogger('pgoapi.pgoapi').setLevel(logging.WARNING)
    logging.getLogger('pgoapi.rpc_api').setLevel(logging.INFO)
    logging.getLogger('werkzeug').setLevel(logging.ERROR)

    config['parse_pokemon'] = not args.no_pokemon
    config['parse_pokestops'] = not args.no_pokestops
    config['parse_gyms'] = not args.no_gyms

    # Turn these back up if debugging
    if args.verbose or args.very_verbose:
        logging.getLogger('pgoapi').setLevel(logging.DEBUG)
    if args.very_verbose:
        logging.getLogger('peewee').setLevel(logging.DEBUG)
        logging.getLogger('requests').setLevel(logging.DEBUG)
        logging.getLogger('pgoapi.pgoapi').setLevel(logging.DEBUG)
        logging.getLogger('pgoapi.rpc_api').setLevel(logging.DEBUG)
        logging.getLogger('rpc_api').setLevel(logging.DEBUG)
        logging.getLogger('werkzeug').setLevel(logging.DEBUG)

    # use lat/lng directly if matches such a pattern
    prog = re.compile("^(\-?\d+\.\d+),?\s?(\-?\d+\.\d+)$")
    res = prog.match(args.location)
    if res:
        log.debug('Using coordinates from CLI directly')
        position = (float(res.group(1)), float(res.group(2)), 0)
    else:
        log.debug('Looking up coordinates in API')
        position = util.get_pos_by_name(args.location)

    # Use the latitude and longitude to get the local altitude from Google
    try:
        url = 'https://maps.googleapis.com/maps/api/elevation/json?locations={},{}'.format(
            str(position[0]), str(position[1]))
        altitude = requests.get(url).json()[u'results'][0][u'elevation']
        log.debug('Local altitude is: %sm', altitude)
        position = (position[0], position[1], altitude)
    except (requests.exceptions.RequestException, IndexError, KeyError):
        log.error('Unable to retrieve altitude from Google APIs; setting to 0')

    if not any(position):
        log.error('Could not get a position by name, aborting')
        sys.exit()

    log.info('Parsed location is: %.4f/%.4f/%.4f (lat/lng/alt)',
             position[0], position[1], position[2])

    if args.no_pokemon:
        log.info('Parsing of Pokemon disabled')
    if args.no_pokestops:
        log.info('Parsing of Pokestops disabled')
    if args.no_gyms:
        log.info('Parsing of Gyms disabled')
    if args.encounter:
        log.info('Encountering pokemon enabled')

    config['LOCALE'] = args.locale
    config['CHINA'] = args.china

    app = Pogom(__name__)
    db = init_database(app)
    if args.clear_db:
        log.info('Clearing database')
        if args.db_type == 'mysql':
            drop_tables(db)
        elif os.path.isfile(args.db):
            os.remove(args.db)
    create_tables(db)

    app.set_current_location(position)

    # Control the search status (running or not) across threads
    pause_bit = Event()
    pause_bit.clear()
    if args.on_demand_timeout > 0:
        pause_bit.set()

    heartbeat = [now()]

    # Setup the location tracking queue and push the first location on
    new_location_queue = Queue()
    new_location_queue.put(position)

    # DB Updates
    db_updates_queue = Queue()

    # Thread(s) to process database updates
    for i in range(args.db_threads):
        log.debug('Starting db-updater worker thread %d', i)
        t = Thread(target=db_updater, name='db-updater-{}'.format(i), args=(args, db_updates_queue))
        t.daemon = True
        t.start()

    # db clearner; really only need one ever
    if not args.disable_clean:
        t = Thread(target=clean_db_loop, name='db-cleaner', args=(args,))
        t.daemon = True
        t.start()

    # WH Updates
    wh_updates_queue = Queue()

    # Thread to process webhook updates
    for i in range(args.wh_threads):
        log.debug('Starting wh-updater worker thread %d', i)
        t = Thread(target=wh_updater, name='wh-updater-{}'.format(i), args=(args, wh_updates_queue))
        t.daemon = True
        t.start()

    if not args.only_server:

        # Check all proxies before continue so we know they are good
        if args.proxy and not args.proxy_skip_check:

            # Overwrite old args.proxy with new working list
            args.proxy = check_proxies(args)

        # Gather the pokemons!

        # attempt to dump the spawn points (do this before starting threads of endure the woe)
        if args.spawnpoint_scanning and args.spawnpoint_scanning != 'nofile' and args.dump_spawnpoints:
            with open(args.spawnpoint_scanning, 'w+') as file:
                log.info('Saving spawn points to %s', args.spawnpoint_scanning)
                spawns = Pokemon.get_spawnpoints_in_hex(position, args.step_limit)
                file.write(json.dumps(spawns))
                log.info('Finished exporting spawn points')

        argset = (args, new_location_queue, pause_bit, heartbeat, encryption_lib_path, db_updates_queue, wh_updates_queue)

        log.debug('Starting a %s search thread', args.scheduler)
        search_thread = Thread(target=search_overseer_thread, name='search-overseer', args=argset)
        search_thread.daemon = True
        search_thread.start()

    if args.cors:
        CORS(app)

    # No more stale JS
    init_cache_busting(app)

    app.set_search_control(pause_bit)
    app.set_heartbeat_control(heartbeat)
    app.set_location_queue(new_location_queue)

    config['ROOT_PATH'] = app.root_path
    config['GMAPS_KEY'] = args.gmaps_key

    if args.no_server:
        # This loop allows for ctrl-c interupts to work since flask won't be holding the program open
        while search_thread.is_alive():
            time.sleep(60)
    else:
        ssl_context = None
        if args.ssl_certificate and args.ssl_privatekey \
                and os.path.exists(args.ssl_certificate) and os.path.exists(args.ssl_privatekey):
            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
            ssl_context.load_cert_chain(args.ssl_certificate, args.ssl_privatekey)
            log.info('Web server in SSL mode.')
        if args.verbose or args.very_verbose:
            app.run(threaded=True, use_reloader=False, debug=True, host=args.host, port=args.port, ssl_context=ssl_context)
        else:
            app.run(threaded=True, use_reloader=False, debug=False, host=args.host, port=args.port, ssl_context=ssl_context)

Example 49

Project: zeya Source File: zeya.py
def ZeyaHandler(backend, library_repr, resource_basedir, bitrate,
                auth_type=None, auth_data=None):
    """
    Wrapper around the actual HTTP request handler implementation class. We
    need to create a closure so that the inner class can receive the following
    data:

    Backend to use.
    Library data.
    Base directory for resources.
    Bitrate for encoding.
    Authentication data.
    """

    class ZeyaHandlerImpl(BaseHTTPRequestHandler, object):
        """
        Web server request handler.
        """
        def do_GET(self):
            """
            Handle a GET request.
            """
            # http://host/ yields the library main page.
            if self.path == '/':
                self.serve_static_content('/library.html')
            # http://host/getlibrary returns a representation of the music
            # collection.
            elif self.path == '/getlibrary':
                self.serve_library()
            # http://host/getcontent?key=N yields an Ogg stream of the file
            # associated with the specified key.
            elif self.path.startswith('/getcontent?'):
                self.serve_content(urllib.unquote(self.path[12:]))
            # All other paths are assumed to be static content.
            # http://host/foo is mapped to resources/foo.
            else:
                self.serve_static_content(self.path)
        def get_content_type(self, path):
            """
            Return the MIME type associated with the given path.
            """
            path = path.lower()
            if path.endswith('.html'):
                return 'text/html'
            elif path.endswith('.png'):
                return 'image/png'
            elif path.endswith('.gif'):
                return 'image/gif'
            elif path.endswith('.css'):
                return 'text/css'
            elif path.endswith('.js'):
                return 'text/javascript'
            elif path.endswith('.ogg'):
                return 'audio/ogg'
            else:
                print ("Warning: couldn't identify content-type for %r, "
                       + "serving as application/octet-stream") % (path,)
                return 'application/octet-stream'
        def serve_content(self, query):
            """
            Serve an audio stream (audio/ogg).
            """
            # The query is of the form key=N or key=N&buffered=true.
            args = parse_qs(query)
            key = args['key'][0] if args.has_key('key') else ''
            # If buffering is activated, encode the entire file and serve the
            # Content-Length header. This increases song load latency because
            # we can't serve any of the file until we've finished encoding the
            # whole thing. However, Chrome needs the Content-Length header to
            # accompany audio data.
            buffered = args['buffered'][0] if args.has_key('buffered') else ''

            # TODO: send error 500 when we encounter an error during the
            # decoding phase. This is needed for reliable client-side error
            # dialogs.
            self.send_response(200)
            self.send_header('Content-type', 'audio/ogg')
            if buffered:
                # Complete the transcode and write to a temporary file.
                # Determine its length and serve the Content-Length header.
                output_file = tempfile.TemporaryFile()
                backend.get_content(key, output_file, bitrate, buffered=True)
                output_file.seek(0)
                data = output_file.read()
                self.send_header('Content-Length', str(len(data)))
                self.end_headers()
                output_file.seek(0)
                try:
                    backends.copy_output_with_shaping(
                        output_file.fileno(), self.wfile, bitrate)
                except socket.error:
                    pass
                output_file.close()
            else:
                # Don't determine the Content-Length. Just stream to the client
                # on the fly.
                self.end_headers()
                backend.get_content(key, self.wfile, bitrate)
            self.wfile.close()

        def send_data(self, ctype, data):
            """
            Send data to the client.

            Use deflate compression if client headers indicate that the
            other end supports it and if it's appropriate for this
            content-type.
            """
            compress_data = \
                (ctype.startswith('text/')
                 and 'Accept-Encoding' in self.headers
                 and 'deflate' in self.headers['Accept-Encoding'].split(','))
            self.send_response(200)
            if compress_data:
                data = zlib.compress(data)
                self.send_header('Content-Encoding', 'deflate')
                self.send_header('Vary', 'Accept-Encoding')
            self.send_header('Content-Length', str(len(data)))
            self.send_header('Content-Type', ctype)
            self.end_headers()
            self.wfile.write(data)
            self.wfile.close()

        def serve_library(self):
            """
            Serve a representation of the library.
            """
            self.send_data('text/html', library_repr.encode('utf-8'))

        def serve_static_content(self, path):
            """
            Serve static content from the resources/ directory.
            """
            try:
                # path already has a leading '/' in front of it. Strip it.
                full_path = os.path.join(resource_basedir, path[1:])
                # Ensure that the basedir we use for security checks ends in '/'.
                effective_basedir = os.path.join(resource_basedir, '')
                # Prevent directory traversal attacks. Canonicalize the
                # filename we're going to open and verify that it's inside the
                # resource directory.
                if not os.path.abspath(full_path).startswith(effective_basedir):
                    self.send_error(404, 'File not found: %s' % (path,))
                    return
                with open(full_path) as f:
                    self.send_data(self.get_content_type(path), f.read())
            except IOError:
                traceback.print_exc()
                self.send_error(404, 'File not found: %s' % (path,))

    class ZeyaBasicAuthHandlerImpl(ZeyaHandlerImpl):
        def __init__(self, *args, **kwargs):
            self.auth_regexp = re.compile('Basic ([%s[]*)' % b64dict)
            super(ZeyaBasicAuthHandlerImpl, self).__init__(*args, **kwargs)

        def send_no_auth(self):
            """
            Send an unauthorized required page.
            """
            self.send_response(401)
            self.send_header('Content-type', 'text/html')
            self.send_header('Content-Length', str(len(no_auth_rval)))
            self.send_header('WWW-Authenticate', 'Basic realm="Zeya Secure"')
            self.end_headers()
            self.wfile.write(no_auth_rval)

        def authorized(self):
            """
            Return true if self.headers has valid authentication information.
            """
            if auth in self.headers and self.auth_regexp.match(self.headers[auth]):
                encoded_auth = self.auth_regexp.sub('\\1', self.headers[auth])
                decoded_auth = base64.b64decode(encoded_auth)
                client_user, client_pass = split_user_pass(decoded_auth)
                if client_user in auth_data:
                    client_crypt_pass = crypt.crypt(\
                            client_pass, auth_data[client_user][:2])
                    return client_crypt_pass == auth_data[client_user]
            return False

        def do_GET(self):
            """
            Handle a GET request, sending an authentication required header if
            not authenticated.
            """
            if self.authorized():
                ZeyaHandlerImpl.do_GET(self)
            else:
                self.send_no_auth()

    if auth_type == BASIC_AUTH:
        print 'Using Basic Auth Handler...'
        return ZeyaBasicAuthHandlerImpl
    return ZeyaHandlerImpl

Example 50

Project: tp-libvirt Source File: virsh_volume.py
def run(test, params, env):
    """
    1. Create a pool
    2. Create n number of volumes(vol-create-as)
    3. Check the volume details from the following commands
       vol-info
       vol-key
       vol-list
       vol-name
       vol-path
       vol-pool
       qemu-img info
    4. Delete the volume and check in vol-list
    5. Repeat the steps for number of volumes given
    6. Delete the pool and target
    TODO: Handle negative testcases
    """

    def delete_volume(expected_vol):
        """
        Deletes Volume
        """
        pool_name = expected_vol['pool_name']
        vol_name = expected_vol['name']
        pv = libvirt_storage.PoolVolume(pool_name)
        if not pv.delete_volume(vol_name):
            raise error.TestFail("Delete volume failed." % vol_name)
        else:
            logging.debug("Volume: %s successfully deleted on pool: %s",
                          vol_name, pool_name)

    def get_vol_list(pool_name, vol_name):
        """
        Parse the volume list
        """
        output = virsh.vol_list(pool_name, "--details")
        rg = re.compile(
            r'^(\S+)\s+(\S+)\s+(\S+)\s+(\d+.\d+\s\S+)\s+(\d+.\d+.*)')
        vol = {}
        vols = []
        volume_detail = None
        for line in output.stdout.splitlines():
            match = re.search(rg, line.lstrip())
            if match is not None:
                vol['name'] = match.group(1)
                vol['path'] = match.group(2)
                vol['type'] = match.group(3)
                vol['capacity'] = match.group(4)
                vol['allocation'] = match.group(5)
                vols.append(vol)
                vol = {}
        for volume in vols:
            if volume['name'] == vol_name:
                volume_detail = volume
        return volume_detail

    def norm_capacity(capacity):
        """
        Normalize the capacity values to bytes
        """
        # Normaize all values to bytes
        norm_capacity = {}
        des = {'B': 'B', 'bytes': 'B', 'b': 'B', 'kib': 'K',
               'KiB': 'K', 'K': 'K', 'k': 'K', 'KB': 'K',
               'mib': 'M', 'MiB': 'M', 'M': 'M', 'm': 'M',
               'MB': 'M', 'gib': 'G', 'GiB': 'G', 'G': 'G',
               'g': 'G', 'GB': 'G', 'Gb': 'G', 'tib': 'T',
               'TiB': 'T', 'TB': 'T', 'T': 'T', 't': 'T'
               }
        val = {'B': 1,
               'K': 1024,
               'M': 1048576,
               'G': 1073741824,
               'T': 1099511627776
               }

        reg_list = re.compile(r'(\S+)\s(\S+)')
        match_list = re.search(reg_list, capacity['list'])
        if match_list is not None:
            mem_value = float(match_list.group(1))
            norm = val[des[match_list.group(2)]]
            norm_capacity['list'] = int(mem_value * norm)
        else:
            raise error.TestFail("Error in parsing capacity value in"
                                 " virsh vol-list")

        match_info = re.search(reg_list, capacity['info'])
        if match_info is not None:
            mem_value = float(match_info.group(1))
            norm = val[des[match_list.group(2)]]
            norm_capacity['info'] = int(mem_value * norm)
        else:
            raise error.TestFail("Error in parsing capacity value "
                                 "in virsh vol-info")

        norm_capacity['qemu_img'] = capacity['qemu_img']
        norm_capacity['xml'] = int(capacity['xml'])

        return norm_capacity

    def check_vol(expected, avail=True):
        """
        Checks the expected volume details with actual volume details from
        vol-dumpxml
        vol-list
        vol-info
        vol-key
        vol-path
        qemu-img info
        """
        error_count = 0

        pv = libvirt_storage.PoolVolume(expected['pool_name'])
        vol_exists = pv.volume_exists(expected['name'])
        if vol_exists:
            if not avail:
                error_count += 1
                logging.error("Expect volume %s not exists but find it",
                              expected['name'])
                return error_count
        else:
            if avail:
                error_count += 1
                logging.error("Expect volume %s exists but not find it",
                              expected['name'])
                return error_count
            else:
                logging.info("Volume %s checked successfully for deletion",
                             expected['name'])
                return error_count

        actual_list = get_vol_list(expected['pool_name'], expected['name'])
        actual_info = pv.volume_info(expected['name'])
        # Get values from vol-dumpxml
        volume_xml = vol_xml.VolXML.new_from_vol_dumpxml(expected['name'],
                                                         expected['pool_name'])

        # Check against virsh vol-key
        vol_key = virsh.vol_key(expected['name'], expected['pool_name'])
        if vol_key.stdout.strip() != volume_xml.key:
            logging.error("Volume key is mismatch \n%s"
                          "Key from xml: %s\nKey from command: %s",
                          expected['name'], volume_xml.key, vol_key)
            error_count += 1
        else:
            logging.debug("virsh vol-key for volume: %s successfully"
                          " checked against vol-dumpxml", expected['name'])

        # Check against virsh vol-name
        get_vol_name = virsh.vol_name(expected['path'])
        if get_vol_name.stdout.strip() != expected['name']:
            logging.error("Volume name mismatch\n"
                          "Expected name: %s\nOutput of vol-name: %s",
                          expected['name'], get_vol_name)

        # Check against virsh vol-path
        vol_path = virsh.vol_path(expected['name'], expected['pool_name'])
        if expected['path'] != vol_path.stdout.strip():
            logging.error("Volume path mismatch for volume: %s\n"
                          "Expected path: %s\nOutput of vol-path: %s\n",
                          expected['name'],
                          expected['path'], vol_path)
            error_count += 1
        else:
            logging.debug("virsh vol-path for volume: %s successfully checked"
                          " against created volume path", expected['name'])

        # Check path against virsh vol-list
        if expected['path'] != actual_list['path']:
            logging.error("Volume path mismatch for volume:%s\n"
                          "Expected Path: %s\nPath from virsh vol-list: %s",
                          expected['name'], expected['path'],
                          actual_list['path'])
            error_count += 1
        else:
            logging.debug("Path of volume: %s from virsh vol-list "
                          "successfully checked against created "
                          "volume path", expected['name'])

        # Check path against virsh vol-dumpxml
        if expected['path'] != volume_xml.path:
            logging.error("Volume path mismatch for volume: %s\n"
                          "Expected Path: %s\nPath from virsh vol-dumpxml: %s",
                          expected['name'], expected['path'], volume_xml.path)
            error_count += 1

        else:
            logging.debug("Path of volume: %s from virsh vol-dumpxml "
                          "successfully checked against created volume path",
                          expected['name'])

        # Check type against virsh vol-list
        if expected['type'] != actual_list['type']:
            logging.error("Volume type mismatch for volume: %s\n"
                          "Expected Type: %s\n Type from vol-list: %s",
                          expected['name'], expected['type'],
                          actual_list['type'])
            error_count += 1
        else:
            logging.debug("Type of volume: %s from virsh vol-list "
                          "successfully checked against the created "
                          "volume type", expected['name'])

        # Check type against virsh vol-info
        if expected['type'] != actual_info['Type']:
            logging.error("Volume type mismatch for volume: %s\n"
                          "Expected Type: %s\n Type from vol-info: %s",
                          expected['name'], expected['type'],
                          actual_info['Type'])
            error_count += 1
        else:
            logging.debug("Type of volume: %s from virsh vol-info successfully"
                          " checked against the created volume type",
                          expected['name'])

        # Check name against virsh vol-info
        if expected['name'] != actual_info['Name']:
            logging.error("Volume name mismatch for volume: %s\n"
                          "Expected name: %s\n Name from vol-info: %s",
                          expected['name'],
                          expected['name'], actual_info['Name'])
            error_count += 1
        else:
            logging.debug("Name of volume: %s from virsh vol-info successfully"
                          " checked against the created volume name",
                          expected['name'])

        # Check format from against qemu-img info
        img_info = utils_misc.get_image_info(expected['path'])
        if expected['format']:
            if expected['format'] != img_info['format']:
                logging.error("Volume format mismatch for volume: %s\n"
                              "Expected format: %s\n"
                              "Format from qemu-img info: %s",
                              expected['name'], expected['format'],
                              img_info['format'])
                error_count += 1
            else:
                logging.debug("Format of volume: %s from qemu-img info "
                              "checked successfully against the created "
                              "volume format", expected['name'])

        # Check format against vol-dumpxml
        if expected['format']:
            if expected['format'] != volume_xml.format:
                logging.error("Volume format mismatch for volume: %s\n"
                              "Expected format: %s\n"
                              "Format from vol-dumpxml: %s",
                              expected['name'], expected['format'],
                              volume_xml.format)
                error_count += 1
            else:
                logging.debug("Format of volume: %s from virsh vol-dumpxml "
                              "checked successfully against the created"
                              " volume format", expected['name'])

        logging.info(expected['encrypt_format'])
        # Check encrypt against vol-dumpxml
        if expected['encrypt_format']:
            # As the 'default' format will change to specific valut(qcow), so
            # just output it here
            logging.debug("Encryption format of volume '%s' is: %s",
                          expected['name'], volume_xml.encryption.format)
            # And also output encryption secret uuid
            secret_uuid = volume_xml.encryption.secret['uuid']
            logging.debug("Encryption secret of volume '%s' is: %s",
                          expected['name'], secret_uuid)
            if expected['encrypt_secret']:
                if expected['encrypt_secret'] != secret_uuid:
                    logging.error("Encryption secret mismatch for volume: %s\n"
                                  "Expected secret uuid: %s\n"
                                  "Secret uuid from vol-dumpxml: %s",
                                  expected['name'], expected['encrypt_secret'],
                                  secret_uuid)
                    error_count += 1
                else:
                    # If no set encryption secret value, automatically
                    # generate a secret value at the time of volume creation
                    logging.debug("Volume encryption secret is %s", secret_uuid)

        # Check pool name against vol-pool
        vol_pool = virsh.vol_pool(expected['path'])
        if expected['pool_name'] != vol_pool.stdout.strip():
            logging.error("Pool name mismatch for volume: %s against"
                          "virsh vol-pool", expected['name'])
            error_count += 1
        else:
            logging.debug("Pool name of volume: %s checked successfully"
                          " against the virsh vol-pool", expected['name'])

        norm_cap = {}
        capacity = {}
        capacity['list'] = actual_list['capacity']
        capacity['info'] = actual_info['Capacity']
        capacity['xml'] = volume_xml.capacity
        capacity['qemu_img'] = img_info['vsize']
        norm_cap = norm_capacity(capacity)
        delta_size = params.get('delta_size', "1024")
        if abs(expected['capacity'] - norm_cap['list']) > delta_size:
            logging.error("Capacity mismatch for volume: %s against virsh"
                          " vol-list\nExpected: %s\nActual: %s",
                          expected['name'], expected['capacity'],
                          norm_cap['list'])
            error_count += 1
        else:
            logging.debug("Capacity value checked successfully against"
                          " virsh vol-list for volume %s", expected['name'])

        if abs(expected['capacity'] - norm_cap['info']) > delta_size:
            logging.error("Capacity mismatch for volume: %s against virsh"
                          " vol-info\nExpected: %s\nActual: %s",
                          expected['name'], expected['capacity'],
                          norm_cap['info'])
            error_count += 1
        else:
            logging.debug("Capacity value checked successfully against"
                          " virsh vol-info for volume %s", expected['name'])

        if abs(expected['capacity'] - norm_cap['xml']) > delta_size:
            logging.error("Capacity mismatch for volume: %s against virsh"
                          " vol-dumpxml\nExpected: %s\nActual: %s",
                          expected['name'], expected['capacity'],
                          norm_cap['xml'])
            error_count += 1
        else:
            logging.debug("Capacity value checked successfully against"
                          " virsh vol-dumpxml for volume: %s",
                          expected['name'])

        if abs(expected['capacity'] - norm_cap['qemu_img']) > delta_size:
            logging.error("Capacity mismatch for volume: %s against "
                          "qemu-img info\nExpected: %s\nActual: %s",
                          expected['name'], expected['capacity'],
                          norm_cap['qemu_img'])
            error_count += 1
        else:
            logging.debug("Capacity value checked successfully against"
                          " qemu-img info for volume: %s",
                          expected['name'])
        return error_count

    def get_all_secrets():
        """
        Return all exist libvirt secrets uuid in a list
        """
        secret_list = []
        secrets = virsh.secret_list().stdout.strip()
        for secret in secrets.splitlines()[2:]:
            secret_list.append(secret.strip().split()[0])
        return secret_list

    # Initialize the variables
    pool_name = params.get("pool_name")
    pool_type = params.get("pool_type")
    pool_target = params.get("pool_target")
    if os.path.dirname(pool_target) is "":
        pool_target = os.path.join(test.tmpdir, pool_target)
    vol_name = params.get("volume_name")
    vol_number = int(params.get("number_of_volumes", "2"))
    capacity = params.get("volume_size", "1048576")
    allocation = params.get("volume_allocation", "1048576")
    vol_format = params.get("volume_format")
    source_name = params.get("gluster_source_name", "gluster-vol1")
    source_path = params.get("gluster_source_path", "/")
    encrypt_format = params.get("vol_encrypt_format")
    encrypt_secret = params.get("encrypt_secret")
    emulated_image = params.get("emulated_image")
    emulated_image_size = params.get("emulated_image_size")
    if not libvirt_version.version_compare(1, 0, 0):
        if pool_type == "gluster":
            raise error.TestNAError("Gluster pool is not supported in current"
                                    " libvirt version.")

    try:
        str_capa = utils_misc.normalize_data_size(capacity, "B")
        int_capa = int(str(str_capa).split('.')[0])
    except ValueError:
        raise error.TestError("Translate size %s to 'B' failed" % capacity)
    try:
        str_capa = utils_misc.normalize_data_size(allocation, "B")
        int_allo = int(str(str_capa).split('.')[0])
    except ValueError:
        raise error.TestError("Translate size %s to 'B' failed" % allocation)

    # Stop multipathd to avoid start pool fail(For fs like pool, the new add
    # disk may in use by device-mapper, so start pool will report disk already
    # mounted error).
    multipathd = service.Factory.create_service("multipathd")
    multipathd_status = multipathd.status()
    if multipathd_status:
        multipathd.stop()

    # Get exists libvirt secrets before test
    ori_secrets = get_all_secrets()
    expected_vol = {}
    vol_type = 'file'
    if pool_type in ['disk', 'logical']:
        vol_type = 'block'
    if pool_type == 'gluster':
        vol_type = 'network'
    logging.debug("Debug:\npool_name:%s\npool_type:%s\npool_target:%s\n"
                  "vol_name:%s\nvol_number:%s\ncapacity:%s\nallocation:%s\n"
                  "vol_format:%s", pool_name, pool_type, pool_target,
                  vol_name, vol_number, capacity, allocation, vol_format)

    libv_pvt = utlv.PoolVolumeTest(test, params)
    # Run Testcase
    total_err_count = 0
    try:
        # Create a new pool
        libv_pvt.pre_pool(pool_name=pool_name,
                          pool_type=pool_type,
                          pool_target=pool_target,
                          emulated_image=emulated_image,
                          image_size=emulated_image_size,
                          source_name=source_name,
                          source_path=source_path)
        for i in range(vol_number):
            volume_name = "%s_%d" % (vol_name, i)
            expected_vol['pool_name'] = pool_name
            expected_vol['pool_type'] = pool_type
            expected_vol['pool_target'] = pool_target
            expected_vol['capacity'] = int_capa
            expected_vol['allocation'] = int_allo
            expected_vol['format'] = vol_format
            expected_vol['name'] = volume_name
            expected_vol['type'] = vol_type
            expected_vol['encrypt_format'] = encrypt_format
            expected_vol['encrypt_secret'] = encrypt_secret
            # Creates volume
            if pool_type != "gluster":
                expected_vol['path'] = pool_target + '/' + volume_name
                new_volxml = vol_xml.VolXML()
                new_volxml.name = volume_name
                new_volxml.capacity = int_capa
                new_volxml.allocation = int_allo
                if vol_format:
                    new_volxml.format = vol_format
                encrypt_dict = {}
                if encrypt_format:
                    encrypt_dict.update({"format": encrypt_format})
                if encrypt_secret:
                    encrypt_dict.update({"secret": {'uuid': encrypt_secret}})
                if encrypt_dict:
                    new_volxml.encryption = new_volxml.new_encryption(**encrypt_dict)
                logging.debug("Volume XML for creation:\n%s", str(new_volxml))
                virsh.vol_create(pool_name, new_volxml.xml, debug=True)
            else:
                ip_addr = utlv.get_host_ipv4_addr()
                expected_vol['path'] = "gluster://%s/%s/%s" % (ip_addr,
                                                               source_name,
                                                               volume_name)
                utils.run("qemu-img create -f %s %s %s" % (vol_format,
                                                           expected_vol['path'],
                                                           capacity))
            virsh.pool_refresh(pool_name)
            # Check volumes
            total_err_count += check_vol(expected_vol)
            # Delete volume and check for results
            delete_volume(expected_vol)
            total_err_count += check_vol(expected_vol, False)
        if total_err_count > 0:
            raise error.TestFail("Get %s errors when checking volume" % total_err_count)
    finally:
        # Clean up
        for sec in get_all_secrets():
            if sec not in ori_secrets:
                virsh.secret_undefine(sec)
        try:
            libv_pvt.cleanup_pool(pool_name, pool_type, pool_target,
                                  emulated_image, source_name=source_name)
        except error.TestFail, detail:
            logging.error(str(detail))
        if multipathd_status:
            multipathd.start()
See More Examples - Go to Next Page
Page 1 Selected Page 2 Page 3 Page 4