Here are the examples of the python api re.compile taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
165 Examples
2
Example 1
Project: django-rosetta Source File: views.py
@never_cache
@user_passes_test(lambda user: can_translate(user), settings.LOGIN_URL)
def home(request):
"""
Displays a list of messages to be translated
"""
def fix_nls(in_, out_):
"""Fixes submitted translations by filtering carriage returns and pairing
newlines at the begging and end of the translated string with the original
"""
if 0 == len(in_) or 0 == len(out_):
return out_
if "\r" in out_ and "\r" not in in_:
out_ = out_.replace("\r", '')
if "\n" == in_[0] and "\n" != out_[0]:
out_ = "\n" + out_
elif "\n" != in_[0] and "\n" == out_[0]:
out_ = out_.lstrip()
if 0 == len(out_):
pass
elif "\n" == in_[-1] and "\n" != out_[-1]:
out_ = out_ + "\n"
elif "\n" != in_[-1] and "\n" == out_[-1]:
out_ = out_.rstrip()
return out_
def _request_request(key, default=None):
if key in request.GET:
return request.GET.get(key)
elif key in request.POST:
return request.POST.get(key)
return default
storage = get_storage(request)
query = ''
if storage.has('rosetta_i18n_fn'):
rosetta_i18n_fn = storage.get('rosetta_i18n_fn')
rosetta_i18n_app = get_app_name(rosetta_i18n_fn)
rosetta_i18n_lang_code = storage.get('rosetta_i18n_lang_code')
rosetta_i18n_lang_bidi = rosetta_i18n_lang_code.split('-')[0] in settings.LANGUAGES_BIDI
rosetta_i18n_write = storage.get('rosetta_i18n_write', True)
if rosetta_i18n_write:
rosetta_i18n_pofile = pofile(rosetta_i18n_fn, wrapwidth=rosetta_settings.POFILE_WRAP_WIDTH)
for entry in rosetta_i18n_pofile:
entry.md5hash = hashlib.md5(
(six.text_type(entry.msgid) +
six.text_type(entry.msgstr) +
six.text_type(entry.msgctxt or "")).encode('utf8')
).hexdigest()
else:
rosetta_i18n_pofile = storage.get('rosetta_i18n_pofile')
if 'filter' in request.GET:
if request.GET.get('filter') in ('untranslated', 'translated', 'fuzzy', 'all'):
filter_ = request.GET.get('filter')
storage.set('rosetta_i18n_filter', filter_)
return HttpResponseRedirect(reverse('rosetta-home'))
rosetta_i18n_filter = storage.get('rosetta_i18n_filter', 'all')
if '_next' in request.POST:
rx = re.compile(r'^m_([0-9a-f]+)')
rx_plural = re.compile(r'^m_([0-9a-f]+)_([0-9]+)')
file_change = False
for key, value in request.POST.items():
md5hash = None
plural_id = None
if rx_plural.match(key):
md5hash = str(rx_plural.match(key).groups()[0])
# polib parses .po files into unicode strings, but
# doesn't bother to convert plural indexes to int,
# so we need unicode here.
plural_id = six.text_type(rx_plural.match(key).groups()[1])
# Above no longer true as of Polib 1.0.4
if plural_id and plural_id.isdigit():
plural_id = int(plural_id)
elif rx.match(key):
md5hash = str(rx.match(key).groups()[0])
if md5hash is not None:
entry = rosetta_i18n_pofile.find(md5hash, 'md5hash')
# If someone did a makemessage, some entries might
# have been removed, so we need to check.
if entry:
old_msgstr = entry.msgstr
if plural_id is not None:
plural_string = fix_nls(entry.msgid_plural, value)
entry.msgstr_plural[plural_id] = plural_string
else:
entry.msgstr = fix_nls(entry.msgid, value)
is_fuzzy = bool(request.POST.get('f_%s' % md5hash, False))
old_fuzzy = 'fuzzy' in entry.flags
if old_fuzzy and not is_fuzzy:
entry.flags.remove('fuzzy')
elif not old_fuzzy and is_fuzzy:
entry.flags.append('fuzzy')
file_change = True
if old_msgstr != value or old_fuzzy != is_fuzzy:
entry_changed.send(sender=entry,
user=request.user,
old_msgstr=old_msgstr,
old_fuzzy=old_fuzzy,
pofile=rosetta_i18n_fn,
language_code=rosetta_i18n_lang_code,
)
else:
storage.set('rosetta_last_save_error', True)
if file_change and rosetta_i18n_write:
try:
rosetta_i18n_pofile.metadata['Last-Translator'] = unicodedata.normalize('NFKD', u"%s %s <%s>" % (
getattr(request.user, 'first_name', 'Anonymous'),
getattr(request.user, 'last_name', 'User'),
getattr(request.user, 'email', '[email protected]')
)).encode('ascii', 'ignore')
rosetta_i18n_pofile.metadata['X-Translated-Using'] = u"django-rosetta %s" % rosetta.get_version(False)
rosetta_i18n_pofile.metadata['PO-Revision-Date'] = timestamp_with_timezone()
except UnicodeDecodeError:
pass
try:
rosetta_i18n_pofile.save()
po_filepath, ext = os.path.splitext(rosetta_i18n_fn)
if rosetta_settings.AUTO_COMPILE:
save_as_mo_filepath = po_filepath + '.mo'
rosetta_i18n_pofile.save_as_mofile(save_as_mo_filepath)
post_save.send(sender=None, language_code=rosetta_i18n_lang_code, request=request)
# Try auto-reloading via the WSGI daemon mode reload mechanism
if rosetta_settings.WSGI_AUTO_RELOAD and \
'mod_wsgi.process_group' in request.environ and \
request.environ.get('mod_wsgi.process_group', None) and \
'SCRIPT_FILENAME' in request.environ and \
int(request.environ.get('mod_wsgi.script_reloading', '0')):
try:
os.utime(request.environ.get('SCRIPT_FILENAME'), None)
except OSError:
pass
# Try auto-reloading via uwsgi daemon reload mechanism
if rosetta_settings.UWSGI_AUTO_RELOAD:
try:
import uwsgi
# pretty easy right?
uwsgi.reload()
except:
# we may not be running under uwsgi :P
pass
except Exception as e:
messages.error(request, e)
storage.set('rosetta_i18n_write', False)
storage.set('rosetta_i18n_pofile', rosetta_i18n_pofile)
# Retain query arguments
query_arg = '?_next=1'
if _request_request('query', False):
query_arg += '&query=%s' % _request_request('query')
if 'page' in request.GET:
query_arg += '&page=%d&_next=1' % int(request.GET.get('page'))
return HttpResponseRedirect(reverse('rosetta-home') + iri_to_uri(query_arg))
rosetta_i18n_lang_code = storage.get('rosetta_i18n_lang_code')
if _request_request('query', False) and _request_request('query', '').strip():
query = _request_request('query', '').strip()
rx = re.compile(re.escape(query), re.IGNORECASE)
paginator = Paginator([e_ for e_ in rosetta_i18n_pofile if not e_.obsolete and rx.search(six.text_type(e_.msgstr) + six.text_type(e_.msgid) + u''.join([o[0] for o in e_.occurrences]))], rosetta_settings.MESSAGES_PER_PAGE)
else:
if rosetta_i18n_filter == 'untranslated':
paginator = Paginator(rosetta_i18n_pofile.untranslated_entries(), rosetta_settings.MESSAGES_PER_PAGE)
elif rosetta_i18n_filter == 'translated':
paginator = Paginator(rosetta_i18n_pofile.translated_entries(), rosetta_settings.MESSAGES_PER_PAGE)
elif rosetta_i18n_filter == 'fuzzy':
paginator = Paginator([e_ for e_ in rosetta_i18n_pofile.fuzzy_entries() if not e_.obsolete], rosetta_settings.MESSAGES_PER_PAGE)
else:
paginator = Paginator([e_ for e_ in rosetta_i18n_pofile if not e_.obsolete], rosetta_settings.MESSAGES_PER_PAGE)
if rosetta_settings.ENABLE_REFLANG:
ref_lang = storage.get('rosetta_i18n_ref_lang_code', 'msgid')
ref_pofile = None
if ref_lang != 'msgid':
ref_fn = re.sub('/locale/[a-z]{2}/', '/locale/%s/' % ref_lang, rosetta_i18n_fn)
try:
ref_pofile = pofile(ref_fn)
except IOError:
# there's a syntax error in the PO file and polib can't open it. Let's just
# do nothing and thus display msgids.
pass
for o in paginator.object_list:
# default
o.ref_txt = o.msgid
if ref_pofile is not None:
ref_entry = ref_pofile.find(o.msgid)
if ref_entry is not None and ref_entry.msgstr:
o.ref_txt = ref_entry.msgstr
LANGUAGES = list(settings.LANGUAGES) + [('msgid', 'MSGID')]
else:
ref_lang = None
LANGUAGES = settings.LANGUAGES
page = 1
if 'page' in request.GET:
try:
get_page = int(request.GET.get('page'))
except ValueError:
page = 1 # fall back to page 1
else:
if 0 < get_page <= paginator.num_pages:
page = get_page
if '_next' in request.GET or '_next' in request.POST:
page += 1
if page > paginator.num_pages:
page = 1
query_arg = '?page=%d' % page
return HttpResponseRedirect(reverse('rosetta-home') + iri_to_uri(query_arg))
rosetta_messages = paginator.page(page).object_list
main_language = None
if rosetta_settings.MAIN_LANGUAGE and rosetta_settings.MAIN_LANGUAGE != rosetta_i18n_lang_code:
for language in settings.LANGUAGES:
if language[0] == rosetta_settings.MAIN_LANGUAGE:
main_language = _(language[1])
break
fl = ("/%s/" % rosetta_settings.MAIN_LANGUAGE).join(rosetta_i18n_fn.split("/%s/" % rosetta_i18n_lang_code))
po = pofile(fl)
for message in rosetta_messages:
message.main_lang = po.find(message.msgid).msgstr
needs_pagination = paginator.num_pages > 1
if needs_pagination:
if paginator.num_pages >= 10:
page_range = pagination_range(1, paginator.num_pages, page)
else:
page_range = range(1, 1 + paginator.num_pages)
try:
ADMIN_MEDIA_PREFIX = settings.ADMIN_MEDIA_PREFIX
ADMIN_IMAGE_DIR = ADMIN_MEDIA_PREFIX + 'img/admin/'
except AttributeError:
ADMIN_MEDIA_PREFIX = settings.STATIC_URL + 'admin/'
ADMIN_IMAGE_DIR = ADMIN_MEDIA_PREFIX + 'img/'
if storage.has('rosetta_last_save_error'):
storage.delete('rosetta_last_save_error')
rosetta_last_save_error = True
else:
rosetta_last_save_error = False
try:
rosetta_i18n_lang_name = force_text(_(storage.get('rosetta_i18n_lang_name')))
except:
rosetta_i18n_lang_name = force_text(storage.get('rosetta_i18n_lang_name'))
return render(request, 'rosetta/pofile.html', dict(
version=rosetta.get_version(True),
ADMIN_MEDIA_PREFIX=ADMIN_MEDIA_PREFIX,
ADMIN_IMAGE_DIR=ADMIN_IMAGE_DIR,
ENABLE_REFLANG=rosetta_settings.ENABLE_REFLANG,
LANGUAGES=LANGUAGES,
rosetta_settings=rosetta_settings,
rosetta_i18n_lang_name=rosetta_i18n_lang_name,
rosetta_i18n_lang_code=rosetta_i18n_lang_code,
rosetta_i18n_lang_bidi=rosetta_i18n_lang_bidi,
rosetta_last_save_error=rosetta_last_save_error,
rosetta_i18n_filter=rosetta_i18n_filter,
rosetta_i18n_write=rosetta_i18n_write,
rosetta_messages=rosetta_messages,
page_range=needs_pagination and page_range,
needs_pagination=needs_pagination,
main_language=main_language,
rosetta_i18n_app=rosetta_i18n_app,
page=page,
query=query,
paginator=paginator,
rosetta_i18n_pofile=rosetta_i18n_pofile,
ref_lang=ref_lang,
))
else:
return list_languages(request, do_session_warn=True)
2
Example 2
def _is_valid_language(self):
"""
Return True if the value of component in attribute "language" is valid,
and otherwise False.
:returns: True if value is valid, False otherwise
:rtype: boolean
CASE 1: Language part with/without region part
CASE 2: Language part without region part
CASE 3: Region part with language part
CASE 4: Region part without language part
"""
def check_generic_language(self, value):
"""
Check possible values in language part
when region part exists or not in language value.
Possible values of language attribute: a=letter
| *a
| *aa
| aa
| aaa
| ?a
| ?aa
| ??
| ??a
| ???
"""
lang_pattern = []
lang_pattern.append("^(\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append("[a-z]{1,2}")
lang_pattern.append("|\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append("(([a-z][a-z]?)|(\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append("(\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append("|[a-z])?))")
lang_pattern.append("|([a-z]{2,3}))$")
lang_rxc = re.compile("".join(lang_pattern))
return lang_rxc.match(value)
def check_language_without_region(self, value):
"""
Check possible values in language part
when region part not exist in language value.
Possible values of language attribute: a=letter
| a?
| aa?
| a??
| a*
| aa*
| aaa*
| *a*
| *a?
| ?a*
| ?a?
"""
lang_pattern = []
lang_pattern.append("^([a-z]")
lang_pattern.append("([a-z](\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append("|\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append("|")
lang_pattern.append("([a-z]\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append("))")
lang_pattern.append("|")
lang_pattern.append("\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append("(\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append(")?")
lang_pattern.append("|\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append(")|\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append("[a-z](\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append("|\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append(")")
lang_pattern.append("|\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append("[a-z](\\")
lang_pattern.append(self.WILDCARD_MULTI)
lang_pattern.append("|\\")
lang_pattern.append(self.WILDCARD_ONE)
lang_pattern.append(")")
lang_pattern.append(")$")
lang_rxc = re.compile("".join(lang_pattern))
return lang_rxc.match(value)
def check_region_with_language(self, value):
"""
Check possible values in region part when language part exists.
Possible values of language attribute: a=letter, 1=digit
| *
| a*
| a?
| aa
| ??
| 1*
| 1??
| 11*
| 11?
| 111
| ???
"""
region_pattern = []
region_pattern.append("^(")
region_pattern.append("(\\")
region_pattern.append(self.WILDCARD_MULTI)
region_pattern.append(")|((\\")
region_pattern.append(self.WILDCARD_ONE)
region_pattern.append("){2,3})|([a-z]([a-z]|\\")
region_pattern.append(self.WILDCARD_MULTI)
region_pattern.append("|\\")
region_pattern.append(self.WILDCARD_ONE)
region_pattern.append("))|([0-9](\\")
region_pattern.append(self.WILDCARD_MULTI)
region_pattern.append("|\\")
region_pattern.append(self.WILDCARD_ONE)
region_pattern.append("(\\")
region_pattern.append(self.WILDCARD_ONE)
region_pattern.append(")?|[0-9][0-9\\")
region_pattern.append(self.WILDCARD_MULTI)
region_pattern.append("\\")
region_pattern.append(self.WILDCARD_ONE)
region_pattern.append("])))$")
region_rxc = re.compile("".join(region_pattern))
return region_rxc.match(region)
def check_region_without_language(self, value):
"""
Check possible values in region part when language part not exist.
Possible values of language attribute: 1=digit
| *111
| *11
| *1
"""
region_pattern = []
region_pattern.append("^(")
region_pattern.append("(\\")
region_pattern.append(self.WILDCARD_MULTI)
region_pattern.append("[0-9])")
region_pattern.append("([0-9]([0-9])?)?")
region_pattern.append(")$")
region_rxc = re.compile("".join(region_pattern))
return region_rxc.match(region)
comp_str = self._encoded_value.lower()
# Value with wildcards; separate language and region of value
parts = comp_str.split(self.SEPARATOR_LANG)
language = parts[0]
region_exists = len(parts) == 2
# Check the language part
if check_generic_language(self, language) is not None:
# Valid language, check region part
if region_exists:
# Region part exists; check it
region = parts[1]
return (check_region_with_language(self, region) is not None)
else:
# Not region part
return True
elif check_language_without_region(self, language) is not None:
# Language without region; region part should not exist
return not region_exists
else:
# Language part not exist; check region part
region = parts[0]
return check_region_without_language(self, region) is not None
2
Example 3
Project: geoinference Source File: geocoder.py
def geocode_noisy(self, location_name):
"""
Returns the latitude and lonitude (tuple) of a noisy location name
(e.g., the location field of a social media user's profile). If your
input isn't cleaned, you probably want this method instead of geocode().
"""
usaRegex = re.compile("\\bUSA\\b")
usRegex = re.compile("\\bUS\\b")
ukRegex = re.compile("\\bUK\\b")
name = location_name
name = name.strip()
# Correct for a few common noisy prefices
if name.startswith("the city of "):
name = name[12:] #.substring("the city of ".length())
if name.startswith("downtown "):
name = name[9:] #.substring("downtown ".length())
# Swap out the three common contry abbrevations
name = re.sub(usaRegex, "United States", name)
name = re.sub(usRegex, "United States", name)
name = re.sub(ukRegex, "United Kingdom", name)
# Substitute out state names from the US
matches = re.search(self.state_abbv_regex, name)
if not matches is None:
abbv = matches.group(0)
expanded = name[:matches.start(0)] + self.abbv_to_state[abbv] + name[matches.end(0):]
#print "%s:: %s -> %s" % (abbv, name, expanded)
name = expanded
# Once we've matched abbreivations, lower case for all further
# comparisons
name = name.lower();
if name == "nyc":
name = "new york, new york"
# Strip off all the cruft on either side
name = re.sub(ur'^[\W+]+', " ", name);
name = re.sub(ur'[\W+]+$', " ", name);
name = name.strip();
# Rename the dict for brevity since we're going to referencing it a lot
# in the next section
locs = self.lc_name_to_location
lat_lon = None
# print "SEACHING %s..." % (name)
# Look for some name delimeters in the name to try matching on
# city/state, etc.
if name.find(',') >= 0 or name.find('-') >= 0 or name.find('|') >= 0:
parts = re.split(r'[,\-|]+', name)
if len(parts) == 2:
p1 = parts[0].strip()
p2 = parts[1].strip()
# print "CASE1: (%s) (%s)" % (p1, p2)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p2 + '\t' + p1 in locs:
lat_lon = locs[p2 + '\t' + p1]
elif p1 in locs:
lat_lon = locs[p1]
if lat_lon is None and p1.find("st.") >= 0:
p1 = re.sub("st.", "saint", p1)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p2 + '\t' + p1 in locs:
lat_lon = locs[p2 + '\t' + p1]
elif p1 in locs:
lat_lon = locs[p1]
elif lat_lon is None and p1.find("saint") >= 0:
p1 = re.sub("saint", "st.", p1)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p2 + '\t' + p1 in locs:
lat_lon = locs[p2 + '\t' + p1]
elif p1 in locs:
lat_lon = locs[p1]
elif len(parts) == 3:
p1 = parts[0].strip()
p2 = parts[1].strip()
p3 = parts[2].strip()
# print "CASE2: (%s) (%s) (%s)" % (p1, p2, p3)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p1 + '\t' + p3 in locs:
lat_lon = locs[p1 + '\t' + p3]
elif p1 in locs:
lat_lon = locs[p1]
if lat_lon is None and p1.find("st.") >= 0:
p1 = re.sub("st.", "saint", p1)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p1 + '\t' + p3 in locs:
lat_lon = locs[p1 + '\t' + p3]
elif p1 in locs:
lat_lon = locs[p1]
if lat_lon is None and p1.find("saint") >= 0:
p1 = re.sub("saint", "st.", p1)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p1 + '\t' + p3 in locs:
lat_lon = locs[p1 + '\t' + p3]
elif p1 in locs:
lat_lon = locs[p1]
else:
pass #print "CASE5: %s" % (parts)
# Otherwise no delimeters so we're left to guess at where the name
# breaks
else:
parts = re.split(r'[ \t\n\r]+', name)
if len(parts) == 2:
p1 = parts[0]
p2 = parts[1]
#print "CASE3: (%s) (%s)" % (p1, p2)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p2 + '\t' + p1 in locs:
lat_lon = locs[p2 + '\t' + p1]
elif p1 in locs:
lat_lon = locs[p1]
if lat_lon is None and p1.find("st.") >= 0:
p1 = re.sub("st.", "saint", p1)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p2 + '\t' + p1 in locs:
lat_lon = locs[p2 + '\t' + p1]
elif p1 in locs:
lat_lon = locs[p1]
elif lat_lon is None and p1.find("saint") >= 0:
p1 = re.sub("saint", "st.", p1)
if p1 + '\t' + p2 in locs:
lat_lon = locs[p1 + '\t' + p2]
elif p2 + '\t' + p1 in locs:
lat_lon = locs[p2 + '\t' + p1]
elif p1 in locs:
lat_lon = locs[p1]
elif len(parts) > 2:
# Guess that the last name is a country/state and try
# city/<whatever>
#print "CASE4: %s" % (parts)
last = parts[-1]
city = ' '.join(parts[:-1])
else:
pass #print "CASE6: %s" % (parts)
# Last ditch effort: just try matching the whole name and hope it's
# a single unambiguous city match
if lat_lon is None and name in locs:
lat_lon = locs[name]
#print "FOUND? %s ('%s') -> %s" % (location_name, name, lat_lon)
return lat_lon
2
Example 4
Project: lair-drones-version1-deprecated Source File: nexpose.py
def parse(project, nexpose_file, include_informational=False):
"""Parses a Nexpose XMLv2 file and updates the Lair database
:param project: The project id
:param nexpose_file: The Nexpose xml file to be parsed
:include_informational: Whether to include info findings in data. Default False
"""
cve_pattern = re.compile(r'(CVE-|CAN-)')
html_tag_pattern = re.compile(r'<.*?>')
white_space_pattern = re.compile(r'\s+', re.MULTILINE)
# Used to create unique notes in DB
note_id = 1
tree = et.parse(nexpose_file)
root = tree.getroot()
if root is None or \
root.tag != "NexposeReport" or \
root.attrib['version'] != "2.0":
raise IncompatibleDataVersionError("Nexpose XML 2.0")
# Create the project dictionary which acts as foundation of docuement
project_dict = dict(models.project_model)
project_dict['commands'] = list()
project_dict['vulnerabilities'] = list()
project_dict['project_id'] = project
project_dict['commands'].append({'tool': TOOL, 'command': 'scan'})
# Used to maintain a running list of host:port vulnerabilities by plugin
vuln_host_map = dict()
for vuln in root.iter('vulnerability'):
v = copy.deepcopy(models.vulnerability_model)
v['cves'] = list()
v['plugin_ids'] = list()
v['identified_by'] = list()
v['hosts'] = list()
v['cvss'] = float(vuln.attrib['cvssScore'])
v['title'] = vuln.attrib['title']
plugin_id = vuln.attrib['id'].lower()
# Set plugin id
plugin_dict = dict(models.plugin_id_model)
plugin_dict['tool'] = TOOL
plugin_dict['id'] = plugin_id
v['plugin_ids'].append(plugin_dict)
# Set identified by information
identified_dict = dict(models.identified_by_model)
identified_dict['tool'] = TOOL
identified_dict['id'] = plugin_id
v['identified_by'].append(identified_dict)
# Search for exploits
for exploit in vuln.iter('exploit'):
v['flag'] = True
note_dict = copy.deepcopy(models.note_model)
note_dict['title'] = "{0} ({1})".format(
exploit.attrib['type'],
exploit.attrib['id']
)
note_dict['content'] = "{0}\n{1}".format(
exploit.attrib['title'].encode('ascii', 'replace'),
exploit.attrib['link'].encode('ascii', 'replace')
)
note_dict['last_modified_by'] = TOOL
v['notes'].append(note_dict)
# Search for CVE references
for reference in vuln.iter('reference'):
if reference.attrib['source'] == 'CVE':
cve = cve_pattern.sub('', reference.text)
v['cves'].append(cve)
# Search for solution
solution = vuln.find('solution')
if solution is not None:
for text in solution.itertext():
s = text.encode('ascii', 'replace').strip()
v['solution'] += white_space_pattern.sub(" ", s)
# Search for description
description = vuln.find('description')
if description is not None:
for text in description.itertext():
s = text.encode('ascii', 'replace').strip()
v['description'] += white_space_pattern.sub(" ", s)
# Build mapping of plugin-id to host to vuln dictionary
vuln_host_map[plugin_id] = dict()
vuln_host_map[plugin_id]['vuln'] = v
vuln_host_map[plugin_id]['hosts'] = set()
for node in root.iter('node'):
host_dict = dict(models.host_model)
host_dict['os'] = list()
host_dict['ports'] = list()
host_dict['hostnames'] = list()
# Set host status
if node.attrib['status'] != 'alive':
host_dict['alive'] = False
# Set IP address
host_dict['string_addr'] = node.attrib['address']
host_dict['long_addr'] = helper.ip2long(node.attrib['address'])
# Set the OS fingerprint
certainty = 0
for os in node.iter('os'):
if float(os.attrib['certainty']) > certainty:
certainty = float(os.attrib['certainty'])
os_dict = dict(models.os_model)
os_dict['tool'] = TOOL
os_dict['weight'] = OS_WEIGHT
fingerprint = ''
if 'vendor' in os.attrib:
fingerprint += os.attrib['vendor'] + " "
# Make an extra check to limit duplication of data in the
# event that the product name was already in the vendor name
if 'product' in os.attrib and \
os.attrib['product'] not in fingerprint:
fingerprint += os.attrib['product'] + " "
fingerprint = fingerprint.strip()
os_dict['fingerprint'] = fingerprint
host_dict['os'] = list()
host_dict['os'].append(os_dict)
# Test for general, non-port related vulnerabilities
# Add them as tcp, port 0
tests = node.find('tests')
if tests is not None:
port_dict = dict(models.port_model)
port_dict['service'] = "general"
for test in tests.findall('test'):
# vulnerable-since attribute is used to flag
# confirmed vulns
if 'vulnerable-since' in test.attrib:
plugin_id = test.attrib['id'].lower()
# This is used to track evidence for the host/port
# and plugin
h = "{0}:{1}:{2}".format(
host_dict['string_addr'],
"0",
models.PROTOCOL_TCP
)
vuln_host_map[plugin_id]['hosts'].add(h)
host_dict['ports'].append(port_dict)
# Use the endpoint elements to populate port data
for endpoint in node.iter('endpoint'):
port_dict = copy.deepcopy(models.port_model)
port_dict['port'] = int(endpoint.attrib['port'])
port_dict['protocol'] = endpoint.attrib['protocol']
if endpoint.attrib['status'] != 'open':
port_dict['alive'] = False
# Use the service elements to identify service
for service in endpoint.iter('service'):
# Ignore unknown services
if 'unknown' not in service.attrib['name'].lower():
if not port_dict['service']:
port_dict['service'] = service.attrib['name'].lower()
# Use the test elements to identify vulnerabilities for
# the host
for test in service.iter('test'):
# vulnerable-since attribute is used to flag
# confirmed vulns
if 'vulnerable-since' in test.attrib:
plugin_id = test.attrib['id'].lower()
# Add service notes for evidence
note_dict = copy.deepcopy(models.note_model)
note_dict['title'] = "{0} (ID{1})".format(plugin_id,
str(note_id))
for evidence in test.iter():
if evidence.text:
for line in evidence.text.split("\n"):
line = line.strip()
if line:
note_dict['content'] += " " + \
line + "\n"
elif evidence.tag == "URLLink":
note_dict['content'] += " "
note_dict['content'] += evidence.attrib[
'LinkURL'
] + "\n"
note_dict['last_modified_by'] = TOOL
port_dict['notes'].append(note_dict)
note_id += 1
# This is used to track evidence for the host/port
# and plugin
h = "{0}:{1}:{2}".format(
host_dict['string_addr'],
str(port_dict['port']),
port_dict['protocol']
)
vuln_host_map[plugin_id]['hosts'].add(h)
# Use the fingerprint elements to identify product
certainty = 0
for fingerprint in endpoint.iter('fingerprint'):
if float(fingerprint.attrib['certainty']) > certainty:
certainty = float(fingerprint.attrib['certainty'])
prod = ''
if 'vendor' in fingerprint.attrib:
prod += fingerprint.attrib['vendor'] + " "
if 'product' in fingerprint.attrib:
prod += fingerprint.attrib['product'] + " "
if 'version' in fingerprint.attrib:
prod += fingerprint.attrib['version'] + " "
prod = prod.strip()
port_dict['product'] = prod
host_dict['ports'].append(port_dict)
project_dict['hosts'].append(host_dict)
# This code block uses the plugin/host/vuln mapping to associate
# all vulnerable hosts to their vulnerability data within the
# context of the expected Lair schema structure.
for plugin_id, data in vuln_host_map.items():
# Build list of host and ports affected by vulnerability and
# assign that list to the vulnerability model
for key in data['hosts']:
(string_addr, port, protocol) = key.split(':')
host_key_dict = dict(models.host_key_model)
host_key_dict['string_addr'] = string_addr
host_key_dict['port'] = int(port)
host_key_dict['protocol'] = protocol
data['vuln']['hosts'].append(host_key_dict)
# By default, don't include informational findings unless
# explicitly told to do so.
if data['vuln']['cvss'] == 0 and not include_informational:
continue
project_dict['vulnerabilities'].append(data['vuln'])
return project_dict
2
Example 5
Project: gramps Source File: importgeneweb.py
def parse_person(self,fields,idx,gender,father_surname):
if not father_surname:
if not idx < len(fields):
LOG.warning("Missing surname of person in line %d!" % self.lineno)
surname =""
else:
surname = self.decode(fields[idx])
idx += 1
else:
surname = father_surname
if not idx < len(fields):
LOG.warning("Missing firstname of person in line %d!" % self.lineno)
firstname = ""
else:
firstname = self.decode(fields[idx])
idx += 1
if idx < len(fields) and father_surname:
noSurnameRe = re.compile("^[({\[~><?0-9#].*$")
if not noSurnameRe.match(fields[idx]):
surname = self.decode(fields[idx])
idx += 1
LOG.debug("Person: %s %s" % (firstname, surname))
person = self.get_or_create_person(firstname,surname)
name = Name()
name.set_type( NameType(NameType.BIRTH))
name.set_first_name(firstname)
surname_obj = name.get_primary_surname()
surname_obj.set_surname(surname)
person.set_primary_name(name)
if person.get_gender() == Person.UNKNOWN and gender is not None:
person.set_gender(gender)
self.db.commit_person(person,self.trans)
personDataRe = re.compile("^[kmes0-9<>~#\[({!].*$")
dateRe = re.compile("^[kmes0-9~<>?]+.*$")
source = None
birth_parsed = False
birth_date = None
birth_place = None
birth_source = None
bapt_date = None
bapt_place = None
bapt_source = None
death_date = None
death_place = None
death_source = None
death_cause = None
crem_date = None
bur_date = None
bur_place = None
bur_source = None
public_name = None
firstname_aliases = []
nick_names = []
name_aliases = []
surname_aliases = []
while idx < len(fields) and personDataRe.match(fields[idx]):
field = fields[idx]
idx += 1
if field.startswith('('):
LOG.debug("Public Name: %s" % field)
public_name = self.decode(field[1:-1])
elif field.startswith('{'):
LOG.debug("Firstsname Alias: %s" % field)
firstname_aliases.append(self.decode(field[1:-1]))
elif field.startswith('['):
LOG.debug("Title: %s" % field)
titleparts = self.decode(field[1:-1]).split(":")
tname = ttitle = tplace = tstart = tend = tnth = None
try:
tname = titleparts[0]
ttitle = titleparts[1]
if titleparts[2]:
tplace = self.get_or_create_place(titleparts[2])
tstart = self.parse_date(titleparts[3])
tend = self.parse_date(titleparts[4])
tnth = titleparts[5]
except IndexError: # not all parts are written all the time
pass
if tnth: # Append title numer to title
ttitle += ", " + tnth
title = self.create_event(
EventType.NOB_TITLE, ttitle, tstart, tplace)
# TODO: Geneweb has a start date and an end date, and therefore
# supports stuff like: FROM about 1955 TO between 1998 and 1999
# gramps only supports one single date or range.
if tname and tname != "*":
n = Note()
n.set(tname)
self.db.add_note(n,self.trans)
title.add_note( n.handle)
title_ref = EventRef()
title_ref.set_reference_handle(title.get_handle())
person.add_event_ref(title_ref)
elif field == '#nick' and idx < len(fields):
LOG.debug("Nick Name: %s" % fields[idx])
nick_names.append(self.decode(fields[idx]))
idx += 1
elif field == '#occu' and idx < len(fields):
LOG.debug("Occupation: %s" % fields[idx])
occu = self.create_event(
EventType.OCCUPATION, self.decode(fields[idx]))
occu_ref = EventRef()
occu_ref.set_reference_handle(occu.get_handle())
person.add_event_ref(occu_ref)
idx += 1
elif field == '#alias' and idx < len(fields):
LOG.debug("Name Alias: %s" % fields[idx])
name_aliases.append(self.decode(fields[idx]))
idx += 1
elif field == '#salias' and idx < len(fields):
LOG.debug("Surname Alias: %s" % fields[idx])
surname_aliases.append(self.decode(fields[idx]))
idx += 1
elif field == '#image' and idx < len(fields):
LOG.debug("Image: %s" % fields[idx])
idx += 1
elif field == '#src' and idx < len(fields):
LOG.debug("Source: %s" % fields[idx])
source = self.get_or_create_source(self.decode(fields[idx]))
idx += 1
elif field == '#bs' and idx < len(fields):
LOG.debug("Birth Source: %s" % fields[idx])
birth_source = self.get_or_create_source(self.decode(fields[idx]))
idx += 1
elif field[0] == '!':
LOG.debug("Baptize at: %s" % field[1:])
bapt_date = self.parse_date(self.decode(field[1:]))
elif field == '#bp' and idx < len(fields):
LOG.debug("Birth Place: %s" % fields[idx])
birth_place = self.get_or_create_place(self.decode(fields[idx]))
idx += 1
elif field == '#pp' and idx < len(fields):
LOG.debug("Baptize Place: %s" % fields[idx])
bapt_place = self.get_or_create_place(self.decode(fields[idx]))
idx += 1
elif field == '#ps' and idx < len(fields):
LOG.debug("Baptize Source: %s" % fields[idx])
bapt_source = self.get_or_create_source(self.decode(fields[idx]))
idx += 1
elif field == '#dp' and idx < len(fields):
LOG.debug("Death Place: %s" % fields[idx])
death_place = self.get_or_create_place(self.decode(fields[idx]))
idx += 1
elif field == '#ds' and idx < len(fields):
LOG.debug("Death Source: %s" % fields[idx])
death_source = self.get_or_create_source(self.decode(fields[idx]))
idx += 1
elif field == '#buri' and idx < len(fields):
if fields[idx][0]!='#': # bug in GeneWeb: empty #buri fields
LOG.debug("Burial Date: %s" % fields[idx])
bur_date = self.parse_date(self.decode(fields[idx]))
idx += 1
elif field == '#crem' and idx < len(fields):
LOG.debug("Cremention Date: %s" % fields[idx])
crem_date = self.parse_date(self.decode(fields[idx]))
idx += 1
elif field == '#rp' and idx < len(fields):
LOG.debug("Burial Place: %s" % fields[idx])
bur_place = self.get_or_create_place(self.decode(fields[idx]))
idx += 1
elif field == '#rs' and idx < len(fields):
LOG.debug("Burial Source: %s" % fields[idx])
bur_source = self.get_or_create_source(self.decode(fields[idx]))
idx += 1
elif field == '#apubl':
LOG.debug("This is a public record")
elif field == '#apriv':
LOG.debug("This is a private record")
person.set_privacy(True)
elif field == '#h':
LOG.debug("This is a restricted record")
#TODO: Gramps does currently not feature this level
person.set_privacy(True)
elif dateRe.match(field):
if not birth_parsed:
LOG.debug("Birth Date: %s" % field)
birth_date = self.parse_date(self.decode(field))
birth_parsed = True
else:
LOG.debug("Death Date: %s" % field)
death_date = self.parse_date(self.decode(field))
if field == "mj":
death_cause = "Died joung"
elif field.startswith("k"):
death_cause = "Killed"
elif field.startswith("m"):
death_cause = "Murdered"
elif field.startswith("e"):
death_cause = "Executed"
elif field.startswith("d"):
death_cause = "Disappeared"
#TODO: Set special death types more properly
else:
LOG.warning(("parse_person(): Unknown field " +
"'%s' for person in line %d!") % (field, self.lineno))
if public_name:
name = person.get_primary_name()
name.set_type(NameType(NameType.BIRTH))
person.add_alternate_name(name)
name = Name()
name.set_type(NameType(NameType.AKA))
name.set_first_name(public_name)
surname_obj = name.get_primary_surname()
surname_obj.set_surname(surname)
person.set_primary_name(name)
for aka in nick_names:
name = Attribute()
name.set_type(AttributeType(AttributeType.NICKNAME))
name.set_value(aka)
person.add_attribute(name)
for aka in firstname_aliases:
name = Name()
name.set_type(NameType(NameType.AKA))
name.set_first_name(aka)
surname_obj = name.get_primary_surname()
surname_obj.set_surname(surname)
person.add_alternate_name(name)
for aka in name_aliases:
name = Name()
name.set_type(NameType(NameType.AKA))
name.set_first_name(aka)
surname_obj = name.get_primary_surname()
surname_obj.set_surname(surname)
person.add_alternate_name(name)
for aka in surname_aliases:
name = Name()
name.set_type(NameType(NameType.AKA))
if public_name:
name.set_first_name(public_name)
else:
name.set_first_name(firstname)
surname_obj = name.get_primary_surname()
surname_obj.set_surname(aka)
person.add_alternate_name(name)
if source:
person.add_citation(source.get_handle())
if birth_date or birth_place or birth_source:
birth = self.create_event(EventType.BIRTH, None, birth_date, birth_place, birth_source)
birth_ref = EventRef()
birth_ref.set_reference_handle( birth.get_handle())
person.set_birth_ref( birth_ref)
if bapt_date or bapt_place or bapt_source:
babt = self.create_event(EventType.BAPTISM, None, bapt_date, bapt_place, bapt_source)
babt_ref = EventRef()
babt_ref.set_reference_handle( babt.get_handle())
person.add_event_ref( babt_ref)
if death_date or death_place or death_source or death_cause:
death = self.create_event(EventType.DEATH, None, death_date, death_place, death_source)
if death_cause:
death.set_description(death_cause)
self.db.commit_event(death,self.trans)
death_ref = EventRef()
death_ref.set_reference_handle( death.get_handle())
person.set_death_ref( death_ref)
if bur_date:
bur = self.create_event(EventType.BURIAL, None, bur_date, bur_place, bur_source)
bur_ref = EventRef()
bur_ref.set_reference_handle( bur.get_handle())
person.add_event_ref( bur_ref)
if crem_date:
crem = self.create_event(EventType.CREMATION, None, crem_date, bur_place, bur_source)
crem_ref = EventRef()
crem_ref.set_reference_handle( crem.get_handle())
person.add_event_ref(crem_ref)
self.db.commit_person(person,self.trans)
return (idx,person)
2
Example 6
Project: uberwriter Source File: UberwriterInlinePreview.py
def populate_popup(self, editor, menu, data=None):
# popover = Gtk.Popover.new(editor)
# pop_cont = Gtk.Container.new()
# popover.add(pop_cont)
# popover.show_all()
item = Gtk.MenuItem.new()
item.set_name("PreviewMenuItem")
separator = Gtk.SeparatorMenuItem.new()
# table_item = Gtk.MenuItem.new()
# table_item.set_label('Fix that table')
# table_item.connect('activate', self.fix_table)
# table_item.show()
# menu.prepend(table_item)
# menu.show()
start_iter = self.TextBuffer.get_iter_at_mark(self.ClickMark)
# Line offset of click mark
line_offset = start_iter.get_line_offset()
end_iter = start_iter.copy()
start_iter.set_line_offset(0)
end_iter.forward_to_line_end()
text = self.TextBuffer.get_text(start_iter, end_iter, False)
math = MarkupBuffer.regex["MATH"]
link = MarkupBuffer.regex["LINK"]
footnote = re.compile('\[\^([^\s]+?)\]')
image = re.compile("!\[(.+?)\]\((.+?)\)")
buf = self.TextBuffer
context_offset = 0
matchlist = []
found_match = False
matches = re.finditer(math, text)
for match in matches:
logger.debug(match.group(1))
if match.start() < line_offset and match.end() > line_offset:
success, result = self.LatexConverter.generatepng(match.group(1))
if success:
image = Gtk.Image.new_from_file(result)
image.show()
logger.debug("logging image")
# item.add(image)
self.open_popover_with_widget(image)
else:
label = Gtk.Label()
msg = 'Formula looks incorrect:\n' + result
label.set_alignment(0.0, 0.5)
label.set_text(msg)
label.show()
item.add(label)
item.show()
menu.prepend(separator)
separator.show()
menu.prepend(item)
menu.show()
found_match = True
break
if not found_match:
# Links
matches = re.finditer(link, text)
for match in matches:
if match.start() < line_offset and match.end() > line_offset:
text = text[text.find("http://"):-1]
item.connect("activate", lambda w: webbrowser.open(text))
logger.debug(text)
statusitem = Gtk.MenuItem.new()
statusitem.show()
spinner = Gtk.Spinner.new()
spinner.start()
statusitem.add(spinner)
spinner.show()
thread = threading.Thread(target=check_url,
args=(text, statusitem, spinner))
thread.start()
webphoto_item = Gtk.MenuItem.new()
webphoto_item.show()
spinner_2 = Gtk.Spinner.new()
spinner_2.start()
webphoto_item.add(spinner_2)
spinner_2.show()
thread_image = threading.Thread(target=get_web_thumbnail,
args=(text, webphoto_item, spinner_2))
thread_image.start()
item.set_label(_("Open Link in Webbrowser"))
item.show()
menu.prepend(separator)
separator.show()
menu.prepend(webphoto_item)
menu.prepend(statusitem)
menu.prepend(item)
menu.show()
found_match = True
break
if not found_match:
matches = re.finditer(image, text)
for match in matches:
if match.start() < line_offset and match.end() > line_offset:
path = match.group(2)
if path.startswith("file://"):
path = path[7:]
logger.info(path)
pb = GdkPixbuf.Pixbuf.new_from_file_at_size(path, 400, 300)
image = Gtk.Image.new_from_pixbuf(pb)
image.show()
self.open_popover_with_widget(image)
item.set_property('width-request', 50)
# item.add(image)
# item.set_property('width-request', 50)
# item.show()
# menu.prepend(separator)
# separator.show()
# menu.prepend(item)
# menu.show()
found_match = True
break
if not found_match:
matches = re.finditer(footnote, text)
for match in matches:
if match.start() < line_offset and match.end() > line_offset:
logger.debug(match.group(1))
footnote_match = re.compile("\[\^" + match.group(1) + "\]: (.+(?:\n|\Z)(?:^[\t].+(?:\n|\Z))*)", re.MULTILINE)
replace = re.compile("^\t", re.MULTILINE)
start, end = self.TextBuffer.get_bounds()
fn_match = re.search(footnote_match, self.TextBuffer.get_text(start, end, False))
label = Gtk.Label()
label.set_alignment(0.0, 0.5)
logger.debug(fn_match)
if fn_match:
result = re.sub(replace, "", fn_match.group(1))
if result.endswith("\n"):
result = result[:-1]
else:
result = _("No matching footnote found")
label.set_max_width_chars(40)
label.set_line_wrap(True)
label.set_text(result)
label.show()
item.add(label)
item.show()
menu.prepend(separator)
separator.show()
menu.prepend(item)
menu.show()
found_match = True
break
if not found_match:
start_iter = self.TextBuffer.get_iter_at_mark(self.ClickMark)
start_iter.backward_word_start()
end_iter = start_iter.copy()
end_iter.forward_word_end()
word = self.TextBuffer.get_text(start_iter, end_iter, False)
terms = get_dictionary(word)
if terms:
sc = Gtk.ScrolledWindow.new()
sc.add(fill_lexikon_bubble(word, terms))
sc.props.width_request = 500
sc.props.height_request = 400
sc.show_all()
self.open_popover_with_widget(sc)
return
2
Example 7
Project: Arelle Source File: DTS.py
def checkFilingDTS(val, modelDocuement, visited):
global targetNamespaceDatePattern, efmFilenamePattern, roleTypePattern, arcroleTypePattern, \
arcroleDefinitionPattern, namePattern, linkroleDefinitionBalanceIncomeSheet, \
namespacesConflictPattern
if targetNamespaceDatePattern is None:
targetNamespaceDatePattern = re.compile(r"/([12][0-9]{3})-([01][0-9])-([0-3][0-9])|"
r"/([12][0-9]{3})([01][0-9])([0-3][0-9])|")
efmFilenamePattern = re.compile(r"^[a-z0-9][a-zA-Z0-9_\.\-]*(\.xsd|\.xml)$")
roleTypePattern = re.compile(r"^.*/role/[^/\s]+$")
arcroleTypePattern = re.compile(r"^.*/arcrole/[^/\s]+$")
arcroleDefinitionPattern = re.compile(r"^.*[^\\s]+.*$") # at least one non-whitespace character
namePattern = re.compile("[][()*+?\\\\/^{}|@#%^=~`\"';:,<>&$\u00a3\u20ac]") # u20ac=Euro, u00a3=pound sterling
linkroleDefinitionBalanceIncomeSheet = re.compile(r"[^-]+-\s+Statement\s+-\s+.*(income|balance|financial\W+position)",
re.IGNORECASE)
namespacesConflictPattern = re.compile(r"http://(xbrl\.us|fasb\.org|xbrl\.sec\.gov)/(dei|us-types|us-roles|rr)/([0-9]{4}-[0-9]{2}-[0-9]{2})$")
visited.append(modelDocuement)
for referencedDocuement, modelDocuementReference in modelDocuement.referencesDocuement.items():
#6.07.01 no includes
if modelDocuementReference.referenceType == "include":
val.modelXbrl.error("SBR.NL.2.2.0.18",
_("Taxonomy schema %(schema)s includes %(include)s, only import is allowed"),
modelObject=modelDocuementReference.referringModelObject,
schema=os.path.basename(modelDocuement.uri),
include=os.path.basename(referencedDocuement.uri))
if referencedDocuement not in visited:
checkFilingDTS(val, referencedDocuement, visited)
if val.disclosureSystem.standardTaxonomiesDict is None:
pass
if (modelDocuement.type == ModelDocuement.Type.SCHEMA and
modelDocuement.targetNamespace not in val.disclosureSystem.baseTaxonomyNamespaces and
modelDocuement.uri.startswith(val.modelXbrl.uriDir)):
# check schema contents types
definesLinkroles = False
definesArcroles = False
definesLinkParts = False
definesAbstractItems = False
definesNonabstractItems = False
definesConcepts = False
definesTuples = False
definesPresentationTuples = False
definesSpecificationTuples = False
definesTypes = False
definesEnumerations = False
definesDimensions = False
definesDomains = False
definesHypercubes = False
genrlSpeclRelSet = val.modelXbrl.relationshipSet(XbrlConst.generalSpecial)
for modelConcept in modelDocuement.xmlRootElement.iterdescendants(tag="{http://www.w3.org/2001/XMLSchema}element"):
if isinstance(modelConcept,ModelConcept):
# 6.7.16 name not duplicated in standard taxonomies
name = modelConcept.get("name")
if name is None:
name = ""
if modelConcept.get("ref") is not None:
continue # don't validate ref's here
for c in val.modelXbrl.nameConcepts.get(name, []):
if c.modelDocuement != modelDocuement:
if not (genrlSpeclRelSet.isRelated(modelConcept, "child", c) or genrlSpeclRelSet.isRelated(c, "child", modelConcept)):
val.modelXbrl.error("SBR.NL.2.2.2.02",
_("Concept %(concept)s is also defined in standard taxonomy schema %(standardSchema)s without a general-special relationship"),
modelObject=c, concept=modelConcept.qname, standardSchema=os.path.basename(c.modelDocuement.uri))
''' removed RH 2011-12-23 corresponding set up of table in ValidateFiling
if val.validateSBRNL and name in val.nameWordsTable:
if not any( any( genrlSpeclRelSet.isRelated(c, "child", modelConcept)
for c in val.modelXbrl.nameConcepts.get(partialWordName, []))
for partialWordName in val.nameWordsTable[name]):
val.modelXbrl.error("SBR.NL.2.3.2.01",
_("Concept %(specialName)s is appears to be missing a general-special relationship to %(generalNames)s"),
modelObject=c, specialName=modelConcept.qname, generalNames=', or to '.join(val.nameWordsTable[name]))
'''
if modelConcept.isTuple:
if modelConcept.substitutionGroupQname.localName == "presentationTuple" and modelConcept.substitutionGroupQname.namespaceURI.endswith("/basis/sbr/xbrl/xbrl-syntax-extension"): # namespace may change each year
definesPresentationTuples = True
elif modelConcept.substitutionGroupQname.localName == "specificationTuple" and modelConcept.substitutionGroupQname.namespaceURI.endswith("/basis/sbr/xbrl/xbrl-syntax-extension"): # namespace may change each year
definesSpecificationTuples = True
else:
definesTuples = True
definesConcepts = True
if modelConcept.isAbstract:
val.modelXbrl.error("SBR.NL.2.2.2.03",
_("Concept %(concept)s is an abstract tuple"),
modelObject=modelConcept, concept=modelConcept.qname)
if tupleCycle(val,modelConcept):
val.modelXbrl.error("SBR.NL.2.2.2.07",
_("Tuple %(concept)s has a tuple cycle"),
modelObject=modelConcept, concept=modelConcept.qname)
if modelConcept.get("nillable") != "false" and modelConcept.isRoot:
val.modelXbrl.error("SBR.NL.2.2.2.17", #don't want default, just what was really there
_("Tuple %(concept)s must have nillable='false'"),
modelObject=modelConcept, concept=modelConcept.qname)
elif modelConcept.isItem:
definesConcepts = True
if modelConcept.abstract == "true":
if modelConcept.isRoot:
if modelConcept.get("nillable") != "false": #don't want default, just what was really there
val.modelXbrl.error("SBR.NL.2.2.2.16",
_("Abstract root concept %(concept)s must have nillable='false'"),
modelObject=modelConcept, concept=modelConcept.qname)
if modelConcept.typeQname != XbrlConst.qnXbrliStringItemType:
val.modelXbrl.error("SBR.NL.2.2.2.21",
_("Abstract root concept %(concept)s must have type='xbrli:stringItemType'"),
modelObject=modelConcept, concept=modelConcept.qname)
if modelConcept.balance:
val.modelXbrl.error("SBR.NL.2.2.2.22",
_("Abstract concept %(concept)s must not have a balance attribute"),
modelObject=modelConcept, concept=modelConcept.qname)
if modelConcept.isHypercubeItem:
definesHypercubes = True
elif modelConcept.isDimensionItem:
definesDimensions = True
elif modelConcept.substitutionGroupQname and modelConcept.substitutionGroupQname.localName in ("domainItem","domainMemberItem"):
definesDomains = True
elif modelConcept.isItem:
definesAbstractItems = True
else: # not abstract
if modelConcept.isItem:
definesNonabstractItems = True
if not (modelConcept.label(preferredLabel=XbrlConst.docuementationLabel,fallbackToQname=False,lang="nl") or
val.modelXbrl.relationshipSet(XbrlConst.conceptReference).fromModelObject(c) or
modelConcept.genLabel(role=XbrlConst.genDocuementationLabel,lang="nl") or
val.modelXbrl.relationshipSet(XbrlConst.elementReference).fromModelObject(c)):
val.modelXbrl.error("SBR.NL.2.2.2.28",
_("Concept %(concept)s must have a docuementation label or reference"),
modelObject=modelConcept, concept=modelConcept.qname)
if modelConcept.balance and not modelConcept.instanceOfType(XbrlConst.qnXbrliMonetaryItemType):
val.modelXbrl.error("SBR.NL.2.2.2.24",
_("Non-monetary concept %(concept)s must not have a balance attribute"),
modelObject=modelConcept, concept=modelConcept.qname)
if modelConcept.isLinkPart:
definesLinkParts = True
val.modelXbrl.error("SBR.NL.2.2.5.01",
_("Link:part concept %(concept)s is not allowed"),
modelObject=modelConcept, concept=modelConcept.qname)
if not modelConcept.genLabel(fallbackToQname=False,lang="nl"):
val.modelXbrl.error("SBR.NL.2.2.5.02",
_("Link part definition %(concept)s must have a generic label in language 'nl'"),
modelObject=modelConcept, concept=modelConcept.qname)
# 6.7.9 role types authority
for e in modelDocuement.xmlRootElement.iterdescendants(tag="{http://www.xbrl.org/2003/linkbase}roleType"):
if isinstance(e,ModelObject):
roleURI = e.get("roleURI")
# 6.7.10 only one role type declaration in DTS
modelRoleTypes = val.modelXbrl.roleTypes.get(roleURI)
if modelRoleTypes is not None:
modelRoleType = modelRoleTypes[0]
definition = modelRoleType.definitionNotStripped
usedOns = modelRoleType.usedOns
if usedOns & XbrlConst.standardExtLinkQnames or XbrlConst.qnGenLink in usedOns:
definesLinkroles = True
if not e.genLabel():
val.modelXbrl.error("SBR.NL.2.2.3.03",
_("Link RoleType %(roleType)s missing a generic standard label"),
modelObject=e, roleType=roleURI)
nlLabel = e.genLabel(lang="nl")
if definition != nlLabel:
val.modelXbrl.error("SBR.NL.2.2.3.04",
_("Link RoleType %(roleType)s definition does not match NL standard generic label, \ndefinition: %(definition)s \nNL label: %(label)s"),
modelObject=e, roleType=roleURI, definition=definition, label=nlLabel)
if definition and (definition[0].isspace() or definition[-1].isspace()):
val.modelXbrl.error("SBR.NL.2.2.3.07",
_('Link RoleType %(roleType)s definition has leading or trailing spaces: "%(definition)s"'),
modelObject=e, roleType=roleURI, definition=definition)
# 6.7.13 arcrole types authority
for e in modelDocuement.xmlRootElement.iterdescendants(tag="{http://www.xbrl.org/2003/linkbase}arcroleType"):
if isinstance(e,ModelObject):
arcroleURI = e.get("arcroleURI")
definesArcroles = True
val.modelXbrl.error("SBR.NL.2.2.4.01",
_("Arcrole type definition is not allowed: %(arcroleURI)s"),
modelObject=e, arcroleURI=arcroleURI)
for appinfoElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}appinfo"):
for nonLinkElt in appinfoElt.iterdescendants():
if isinstance(nonLinkElt, ModelObject) and nonLinkElt.namespaceURI != XbrlConst.link:
val.modelXbrl.error("SBR.NL.2.2.11.05",
_("Appinfo contains disallowed non-link element %(element)s"),
modelObject=nonLinkElt, element=nonLinkElt.qname)
for cplxTypeElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}complexType"):
choiceElt = cplxTypeElt.find("{http://www.w3.org/2001/XMLSchema}choice")
if choiceElt is not None:
val.modelXbrl.error("SBR.NL.2.2.11.09",
_("ComplexType contains disallowed xs:choice element"),
modelObject=choiceElt)
for cplxContentElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}complexContent"):
if XmlUtil.descendantAttr(cplxContentElt, "http://www.w3.org/2001/XMLSchema", ("extension","restriction"), "base") != "sbr:placeholder":
val.modelXbrl.error("SBR.NL.2.2.11.10",
_("ComplexContent is disallowed"),
modelObject=cplxContentElt)
for typeEltTag in ("{http://www.w3.org/2001/XMLSchema}complexType",
"{http://www.w3.org/2001/XMLSchema}simpleType"):
for typeElt in modelDocuement.xmlRootElement.iter(tag=typeEltTag):
definesTypes = True
name = typeElt.get("name")
if name:
if not name[0].islower() or not name.isalnum():
val.modelXbrl.error("SBR.NL.3.2.8.09",
_("Type name attribute must be lower camelcase: %(name)s."),
modelObject=typeElt, name=name)
for enumElt in modelDocuement.xmlRootElement.iter(tag="{http://www.w3.org/2001/XMLSchema}enumeration"):
definesEnumerations = True
if any(not valueElt.genLabel(lang="nl")
for valueElt in enumElt.iter(tag="{http://www.w3.org/2001/XMLSchema}value")):
val.modelXbrl.error("SBR.NL.2.2.7.05",
_("Enumeration element has value(s) without generic label."),
modelObject=enumElt)
if (definesLinkroles + definesArcroles + definesLinkParts +
definesAbstractItems + definesNonabstractItems +
definesTuples + definesPresentationTuples + definesSpecificationTuples + definesTypes +
definesEnumerations + definesDimensions + definesDomains +
definesHypercubes) != 1:
schemaContents = []
if definesLinkroles: schemaContents.append(_("linkroles"))
if definesArcroles: schemaContents.append(_("arcroles"))
if definesLinkParts: schemaContents.append(_("link parts"))
if definesAbstractItems: schemaContents.append(_("abstract items"))
if definesNonabstractItems: schemaContents.append(_("nonabstract items"))
if definesTuples: schemaContents.append(_("tuples"))
if definesPresentationTuples: schemaContents.append(_("sbrPresentationTuples"))
if definesSpecificationTuples: schemaContents.append(_("sbrSpecificationTuples"))
if definesTypes: schemaContents.append(_("types"))
if definesEnumerations: schemaContents.append(_("enumerations"))
if definesDimensions: schemaContents.append(_("dimensions"))
if definesDomains: schemaContents.append(_("domains"))
if definesHypercubes: schemaContents.append(_("hypercubes"))
if schemaContents:
if not ((definesTuples or definesPresentationTuples or definesSpecificationTuples) and
not (definesLinkroles or definesArcroles or definesLinkParts or definesAbstractItems or
definesTypes or definesDimensions or definesDomains or definesHypercubes)):
val.modelXbrl.error("SBR.NL.2.2.1.01",
_("Taxonomy schema may only define one of these: %(contents)s"),
modelObject=modelDocuement, contents=', '.join(schemaContents))
elif not any(refDoc.inDTS and refDoc.targetNamespace not in val.disclosureSystem.baseTaxonomyNamespaces
for refDoc in modelDocuement.referencesDocuement.keys()): # no linkbase ref or includes
val.modelXbrl.error("SBR.NL.2.2.1.01",
_("Taxonomy schema must be a DTS entrypoint OR define linkroles OR arcroles OR link:parts OR context fragments OR abstract items OR tuples OR non-abstract elements OR types OR enumerations OR dimensions OR domains OR hypercubes"),
modelObject=modelDocuement)
if definesConcepts ^ any( # xor so either concepts and no label LB or no concepts and has label LB
(refDoc.type == ModelDocuement.Type.LINKBASE and
XmlUtil.descendant(refDoc.xmlRootElement, XbrlConst.link, "labelLink") is not None)
for refDoc in modelDocuement.referencesDocuement.keys()): # no label linkbase
val.modelXbrl.error("SBR.NL.2.2.1.02",
_("A schema that defines concepts MUST have a linked 2.1 label linkbase"),
modelObject=modelDocuement)
if (definesNonabstractItems or definesTuples) and not any( # was xor but changed to and not per RH 1/11/12
(refDoc.type == ModelDocuement.Type.LINKBASE and
(XmlUtil.descendant(refDoc.xmlRootElement, XbrlConst.link, "referenceLink") is not None or
XmlUtil.descendant(refDoc.xmlRootElement, XbrlConst.link, "label", "{http://www.w3.org/1999/xlink}role", "http://www.xbrl.org/2003/role/docuementation" ) is not None))
for refDoc in modelDocuement.referencesDocuement.keys()):
val.modelXbrl.error("SBR.NL.2.2.1.03",
_("A schema that defines non-abstract items MUST have a linked (2.1) reference linkbase AND/OR a label linkbase with @xlink:role=docuementation"),
modelObject=modelDocuement)
elif modelDocuement.type == ModelDocuement.Type.LINKBASE:
pass
visited.remove(modelDocuement)
2
Example 8
Project: autospec Source File: tarball.py
def download_tarball(url_argument, name_argument, archives, target_dir):
global name
global rawname
global version
global url
global path
global tarball_prefix
global gcov_file
# go naming
global golibpath
global go_pkgname
url = url_argument
tarfile = os.path.basename(url)
pattern_options = [
r"(.*?)[\-_](v*[0-9]+[alpha\+_spbfourcesigedsvstableP0-9\.\-\~]*)\.src\.(tgz|tar|zip)",
r"(.*?)[\-_](v*[0-9]+[alpha\+_sbpfourcesigedsvstableP0-9\.\-\~]*)\.(tgz|tar|zip)",
r"(.*?)[\-_](v*[0-9]+[a-zalpha\+_spbfourcesigedsvstableP0-9\.\-\~]*)\.orig\.tar",
r"(.*?)[\-_](v*[0-9]+[\+_spbfourcesigedsvstableP0-9\.\~]*)(-.*?)?\.tar",
]
for pattern in pattern_options:
p = re.compile(pattern)
m = p.search(tarfile)
if m:
name = m.group(1).strip()
version = m.group(2).strip()
b = version.find("-")
if b >= 0:
version = version[:b]
break
rawname = name
# R package
if url_argument.find("cran.r-project.org") > 0 or url_argument.find("cran.rstudio.com") > 0:
buildpattern.set_build_pattern("R", 10)
files.want_dev_split = 0
buildreq.add_buildreq("clr-R-helpers")
p = re.compile(r"([A-Za-z0-9]+)_(v*[0-9]+[\+_spbfourcesigedsvstableP0-9\.\~\-]*)\.tar\.gz")
m = p.search(tarfile)
if m:
name = "R-" + m.group(1).strip()
rawname = m.group(1).strip()
version = m.group(2).strip()
b = version.find("-")
if b >= 0:
version = version[:b]
if url_argument.find("pypi.python.org") > 0:
buildpattern.set_build_pattern("distutils", 10)
url_argument = "http://pypi.debian.net/" + name + "/" + tarfile
if url_argument.find("pypi.debian.net") > 0:
buildpattern.set_build_pattern("distutils", 10)
if url_argument.find(".cpan.org/CPAN/") > 0:
buildpattern.set_build_pattern("cpan", 10)
if name:
name = "perl-" + name
if url_argument.find(".metacpan.org/") > 0:
buildpattern.set_build_pattern("cpan", 10)
if name:
name = "perl-" + name
if "github.com" in url_argument:
# golibpath = golang_libpath(url_argument)
# go_pkgname = golang_name(url_argument)
# define regex accepted for valid packages
github_patterns = [r"https://github.com/.*/(.*?)/archive/(.*)-final.tar",
r"https://github.com/.*/.*/archive/[0-9a-fA-F]{1,40}\/(.*)\-(.*).tar",
r"https://github.com/.*/(.*?)/archive/(.*).zip",
r"https://github.com/.*/(.*?)/archive/v?(.*).tar"]
for pattern in github_patterns:
p = re.compile(pattern)
m = p.search(url_argument)
if m:
name = m.group(1).strip()
version = m.group(2).strip()
b = version.find("-")
if b > 0:
version = version[:b]
break
if url_argument.find("bitbucket.org") > 0:
p = re.compile(r"https://bitbucket.org/.*/(.*?)/get/[a-zA-Z_-]*([0-9][0-9_.]*).tar")
m = p.search(url_argument)
if m:
name = m.group(1).strip()
version = m.group(2).strip().replace('_', '.')
else:
version = "1"
# ruby
if url_argument.find("rubygems.org/") > 0:
buildpattern.set_build_pattern("ruby", 10)
p = re.compile(r"(.*?)[\-_](v*[0-9]+[alpha\+_spbfourcesigedsvstableP0-9\.\-\~]*)\.gem")
m = p.search(tarfile)
if m:
buildreq.add_buildreq("ruby")
buildreq.add_buildreq("rubygem-rdoc")
name = "rubygem-" + m.group(1).strip()
rawname = m.group(1).strip()
version = m.group(2).strip()
b = version.find("-")
if b >= 0:
version = version[:b]
# override from commandline
if name_argument and name_argument[0] != name:
pattern = name_argument[0] + r"[\-]*(.*)\.(tgz|tar|zip)"
p = re.compile(pattern)
m = p.search(tarfile)
if m:
name = name_argument[0]
rawname = name
version = m.group(1).strip()
b = version.find("-")
if b >= 0 and version.find("-beta") < 0:
version = version[:b]
if version.startswith('.'):
version = version[1:]
else:
name = name_argument[0]
if not name:
split = url_argument.split('/')
if len(split) > 3 and split[-2] in ('archive', 'tarball'):
name = split[-3]
version = split[-1]
if version.startswith('v'):
version = version[1:]
# remove extension
version = '.'.join(version.split('.')[:-1])
if version.endswith('.tar'):
version = '.'.join(version.split('.')[:-1])
b = version.find("-")
if b >= 0 and version.find("-beta") < 0:
b = b + 1
version = version[b:]
if len(version) > 0 and version[0] in ['v', 'r']:
version = version[1:]
assert name != ""
if not target_dir:
build.download_path = os.getcwd() + "/" + name
else:
build.download_path = target_dir
call("mkdir -p %s" % build.download_path)
gcov_path = build.download_path + "/" + name + ".gcov"
if os.path.isfile(gcov_path):
gcov_file = name + ".gcov"
tarball_path = check_or_get_file(url, tarfile)
sha1 = get_sha1sum(tarball_path)
with open(build.download_path + "/upstream", "w") as file:
file.write(sha1 + "/" + tarfile + "\n")
tarball_prefix = name + "-" + version
if tarfile.lower().endswith('.zip'):
tarball_contents = subprocess.check_output(
["unzip", "-l", tarball_path], universal_newlines=True)
if tarball_contents and len(tarball_contents.splitlines()) > 3:
tarball_prefix = tarball_contents.splitlines()[3].rsplit("/")[0].split()[-1]
extract_cmd = "unzip -d {0} {1}".format(build.base_path, tarball_path)
elif tarfile.lower().endswith('.gem'):
tarball_contents = subprocess.check_output(
["gem", "unpack", "--verbose", tarball_path], universal_newlines=True)
extract_cmd = "gem unpack --target={0} {1}".format(build.base_path, tarball_path)
if tarball_contents:
tarball_prefix = tarball_contents.splitlines()[-1].rsplit("/")[-1]
if tarball_prefix.endswith("'"):
tarball_prefix = tarball_prefix[:-1]
else:
extract_cmd, tarball_prefix = build_untar(tarball_path)
if version == "":
version = "1"
print("\n")
print("Processing", url_argument)
print(
"=============================================================================================")
print("Name :", name)
print("Version :", version)
print("Prefix :", tarball_prefix)
with open(build.download_path + "/Makefile", "w") as file:
file.write("PKG_NAME := " + name + "\n")
file.write("URL := " + url_argument + "\n")
file.write("ARCHIVES :=")
for archive in archives:
file.write(" {}".format(archive))
file.write("\n")
file.write("\n")
file.write("include ../common/Makefile.common\n")
shutil.rmtree("{}".format(build.base_path), ignore_errors=True)
os.makedirs("{}".format(build.output_path))
call("mkdir -p %s" % build.download_path)
call(extract_cmd)
path = build.base_path + tarball_prefix
for archive, destination in zip(archives[::2], archives[1::2]):
source_tarball_path = check_or_get_file(archive, os.path.basename(archive))
if source_tarball_path.lower().endswith('.zip'):
tarball_contents = subprocess.check_output(
["unzip", "-l", source_tarball_path], universal_newlines=True)
if tarball_contents and len(tarball_contents.splitlines()) > 3:
source_tarball_prefix = tarball_contents.splitlines()[3].rsplit("/")[0].split()[-1]
extract_cmd = "unzip -d {0} {1}".format(build.base_path, source_tarball_path)
else:
extract_cmd, source_tarball_prefix = build_untar(source_tarball_path)
buildpattern.archive_details[archive + "prefix"] = source_tarball_prefix
call(extract_cmd)
tar_files = glob.glob("{0}{1}/*".format(build.base_path, source_tarball_prefix))
move_cmd = "mv "
for tar_file in tar_files:
move_cmd += tar_file + " "
move_cmd += '{0}/{1}'.format(path, destination)
mkdir_cmd = "mkdir -p "
mkdir_cmd += '{0}/{1}'.format(path, destination)
print("mkdir " + mkdir_cmd)
call(mkdir_cmd)
call(move_cmd)
sha1 = get_sha1sum(source_tarball_path)
with open(build.download_path + "/upstream", "a") as file:
file.write(sha1 + "/" + os.path.basename(archive) + "\n")
2
Example 9
Project: trelby Source File: myimport.py
def importFountain(fileName, frame):
# regular expressions for fountain markdown.
# https://github.com/vilcans/screenplain/blob/master/screenplain/richstring.py
ire = re.compile(
# one star
r'\*'
# anything but a space, then text
r'([^\s].*?)'
# finishing with one star
r'\*'
# must not be followed by star
r'(?!\*)'
)
bre = re.compile(
# two stars
r'\*\*'
# must not be followed by space
r'(?=\S)'
# inside text
r'(.+?[*_]*)'
# finishing with two stars
r'(?<=\S)\*\*'
)
ure = re.compile(
# underline
r'_'
# must not be followed by space
r'(?=\S)'
# inside text
r'([^_]+)'
# finishing with underline
r'(?<=\S)_'
)
boneyard_re = re.compile('/\\*.*?\\*/', flags=re.DOTALL)
# random magicstring used to escape literal star '\*'
literalstar = "Aq7RR"
# returns s with markdown formatting removed.
def unmarkdown(s):
s = s.replace("\\*", literalstar)
for style in (bre, ire, ure):
s = style.sub(r'\1', s)
return s.replace(literalstar, "*")
data = util.loadFile(fileName, frame, 1000000)
if data == None:
return None
if len(data) == 0:
wx.MessageBox("File is empty.", "Error", wx.OK, frame)
return None
inf = []
inf.append(misc.CheckBoxItem("Import titles as action lines."))
inf.append(misc.CheckBoxItem("Remove unsupported formatting markup."))
inf.append(misc.CheckBoxItem("Import section/synopsis as notes."))
dlg = misc.CheckBoxDlg(frame, "Fountain import options", inf,
"Import options:", False)
if dlg.ShowModal() != wx.ID_OK:
dlg.Destroy()
return None
importTitles = inf[0].selected
removeMarkdown = inf[1].selected
importSectSyn = inf[2].selected
# pre-process data - fix newlines, remove boneyard.
data = util.fixNL(data)
data = boneyard_re.sub('', data)
prelines = data.split("\n")
for i in xrange(len(prelines)):
try:
util.toLatin1(prelines[i])
except:
prelines[i] = util.cleanInput(u"" + prelines[i].decode('UTF-8', "ignore"))
lines = []
tabWidth = 4
lns = []
sceneStartsList = ("INT", "EXT", "EST", "INT./EXT", "INT/EXT", "I/E", "I./E")
TWOSPACE = " "
skipone = False
# First check if title lines are present:
c = 0
while c < len(prelines):
if prelines[c] != "":
c = c+1
else:
break
# prelines[0:i] are the first bunch of lines, that could be titles.
# Our check for title is simple:
# - the line does not start with 'fade'
# - the first line has a single ':'
if c > 0:
l = util.toInputStr(prelines[0].expandtabs(tabWidth).lstrip().lower())
if not l.startswith("fade") and l.count(":") == 1:
# these are title lines. Now do what the user requested.
if importTitles:
# add TWOSPACE to all the title lines.
for i in xrange(c):
prelines[i] += TWOSPACE
else:
#remove these lines
prelines = prelines[c+1:]
for l in prelines:
if l != TWOSPACE:
lines.append(util.toInputStr(l.expandtabs(tabWidth)))
else:
lines.append(TWOSPACE)
linesLen = len(lines)
def isPrevEmpty():
if lns and lns[-1].text == "":
return True
return False
def isPrevType(ltype):
return (lns and lns[-1].lt == ltype)
# looks ahead to check if next line is not empty
def isNextEmpty(i):
return (i+1 < len(lines) and lines[i+1] == "")
def getPrevType():
if lns:
return lns[-1].lt
else:
return screenplay.ACTION
def isParen(s):
return (s.startswith('(') and s.endswith(')'))
def isScene(s):
if s.endswith(TWOSPACE):
return False
if s.startswith(".") and not s.startswith(".."):
return True
tmp = s.upper()
if (re.match(r'^(INT|EXT|EST)[ .]', tmp) or
re.match(r'^(INT\.?/EXT\.?)[ .]', tmp) or
re.match(r'^I/E[ .]', tmp)):
return True
return False
def isTransition(s):
return ((s.isupper() and s.endswith("TO:")) or
(s.startswith(">") and not s.endswith("<")))
def isCentered(s):
return s.startswith(">") and s.endswith("<")
def isPageBreak(s):
return s.startswith('===') and s.lstrip('=') == ''
def isNote(s):
return s.startswith("[[") and s.endswith("]]")
def isSection(s):
return s.startswith("#")
def isSynopsis(s):
return s.startswith("=") and not s.startswith("==")
# first pass - identify linetypes
for i in range(linesLen):
if skipone:
skipone = False
continue
s = lines[i]
sl = s.lstrip()
# mark as ACTION by default.
line = screenplay.Line(screenplay.LB_FORCED, screenplay.ACTION, s)
# Start testing lines for element type. Go in order:
# Scene Character, Paren, Dialog, Transition, Note.
if s == "" or isCentered(s) or isPageBreak(s):
# do nothing - import as action.
pass
elif s == TWOSPACE:
line.lt = getPrevType()
elif isScene(s):
line.lt = screenplay.SCENE
if sl.startswith('.'):
line.text = sl[1:]
else:
line.text = sl
elif isTransition(sl) and isPrevEmpty() and isNextEmpty(i):
line.lt = screenplay.TRANSITION
if line.text.startswith('>'):
line.text = sl[1:].lstrip()
elif s.isupper() and isPrevEmpty() and not isNextEmpty(i):
line.lt = screenplay.CHARACTER
if s.endswith(TWOSPACE):
line.lt = screenplay.ACTION
elif isParen(sl) and (isPrevType(screenplay.CHARACTER) or
isPrevType(screenplay.DIALOGUE)):
line.lt = screenplay.PAREN
elif (isPrevType(screenplay.CHARACTER) or
isPrevType(screenplay.DIALOGUE) or
isPrevType(screenplay.PAREN)):
line.lt = screenplay.DIALOGUE
elif isNote(sl):
line.lt = screenplay.NOTE
line.text = sl.strip('[]')
elif isSection(s) or isSynopsis(s):
if not importSectSyn:
if isNextEmpty(i):
skipone = True
continue
line.lt = screenplay.NOTE
line.text = sl.lstrip('=#')
if line.text == TWOSPACE:
pass
elif line.lt != screenplay.ACTION:
line.text = line.text.lstrip()
else:
tmp = line.text.rstrip()
# we don't support center align, so simply add required indent.
if isCentered(tmp):
tmp = tmp[1:-1].strip()
width = frame.panel.ctrl.sp.cfg.getType(screenplay.ACTION).width
if len(tmp) < width:
tmp = ' ' * ((width - len(tmp)) // 2) + tmp
line.text = tmp
if removeMarkdown:
line.text = unmarkdown(line.text)
if line.lt == screenplay.CHARACTER and line.text.endswith('^'):
line.text = line.text[:-1]
lns.append(line)
ret = []
# second pass helper functions.
def isLastLBForced():
return ret and ret[-1].lb == screenplay.LB_FORCED
def makeLastLBLast():
if ret:
ret[-1].lb = screenplay.LB_LAST
def isRetPrevType(t):
return ret and ret[-1].lt == t
# second pass - remove unneeded empty lines, and fix the linebreaks.
for ln in lns:
if ln.text == '':
if isLastLBForced():
makeLastLBLast()
else:
ret.append(ln)
elif not isRetPrevType(ln.lt):
makeLastLBLast()
ret.append(ln)
else:
ret.append(ln)
makeLastLBLast()
return ret
2
Example 10
Project: ru Source File: default.py
def ListSeries(params):
prtitle = ''; infoSet = {}; vhost_marks = []
global layout_marks
layout_marks = []
common_list = ['ФИЛЬМ', 'СМОТРЕТЬ', 'ТЕЛЕШОУ', 'МУЛЬТСЕРИАЛ', '\n', 'СЕРИАЛ', 'Полный Фильм', 'Фильм']
common_titles_list = ['фильм', 'Фильм', 'документальный фильм', 'мультфильм', 'Телешоу', 'Концерт']
http = GET(params['url'])
http = clean_html(http, 'ext')
#diagnose(http.decode('utf-8').encode('ascii','replace'))
try: soup = bs(http, 'html5lib', from_encoding = "utf-8")
except Exception, e:
print "BS load error: " + str(e)
ShowMessage(addon_name, "BS error")
return True
#print soup.prettify('utf-8')
content = soup.find('div', class_ = 'full-item')
#content = soup.find('div', id = 'allEntries')
#print content.prettify('utf-8')
if not content:
print "Content container is not found, used uncut html"
content = soup
try:
videos = content.find_all(video_conditions)
except Exception, e:
print "BS exception: " + str(e)
ShowMessage(addon_name, "Exception in BS module")
return True
if len(videos) == 0:
removedmes = content.find(removed_message_conditions, attrs = {"style": "color:red"})
if removedmes:
ShowMessage("Cinema-hd.ru", removedmes.string.encode('utf-8'), times = 55000)
return True
else:
print "Failed to parse"
ShowMessage(addon_name, "неизвестный тип верстки")
return True
#print videos
#plot = content.find('span', itemprop = "description")
try: plot = content.find('div', class_ = "item-info inline")
except Exception, e: print str(e)
if plot:
try:
imdata = plot.find_parent('div', class_ = 'full-item-content')
plot = ' '.join(plot.stripped_strings).encode('utf-8')
infoSet['plot'] = plot
#imdata = imdata.find('a', target = "_blank", class_ = "ulightbox")
imdata = imdata.find('img', itemprop = "image")
img = imdata['src']
#print img
except Exception, e:
print str(e)
img = params['image']
else:
img = params['image']
#Metadata
try:
metadata = content.find('ul', class_ = 'film-tech-info')
director = metadata.find('strong', itemprop = "director").next_sibling.strip().encode('utf-8')
genre = content.find('span', itemprop = "genre").string.strip().encode('utf-8')
actors = content.find('strong', itemprop = "actor").next_sibling.strip().encode('utf-8').split(', ')
year = content.find('strong', itemprop = "dateCreated").next_sibling.encode('utf-8')
infoSet.update({
'genre': genre,
'year': int(year),
'director': director,
'cast': actors
})
except Exception, e: print str(e)
#Fanart
fanartcontlist = content.find_all('a', attrs = {"class": "ulightbox", "data-fancybox-group": "screenshots"})
if fanartcontlist: fanartlist = [i['href'] for i in fanartcontlist]
else: fanartlist = None
#print fanartlist
for iframe in videos:
#Layout 1
title = iframe.find_previous_sibling('span', style = re.compile("color\:.?(#ff9900|orange|yellow)|font-size\:.?(14|13)pt"))
if title:
#print "Layout 1"
layout_marks.append('1')
#Layout 2
if not title:
title = iframe.find_parent('span', style = re.compile("color\:.?(#ff9900|orange|yellow)|font-size\:.?(14|13)pt"))
if title:
#print "Layout 2"
layout_marks.append('2')
#Layout 3
if not title:
title = iframe.find_previous('font', color = "ff9900")
if title:
titlecont = list(title.stripped_strings)
if len(titlecont) == 0:
title = title.find_previous('font', color = "ff9900")
if title:
#print "Layout 3b"
layout_marks.append('3b')
elif not title.font:
#print "Layout 3"
layout_marks.append('3')
#Layout 3a
if title and title.font:
titlecontalt = list(title.stripped_strings)
title.font.decompose()
titlecont = list(title.stripped_strings)
if len(titlecont) == 0:
if len(titlecontalt) > 0:
title = titlecontalt[0].encode('utf-8')
#print "Layout 3a1"
layout_marks.append('3a1')
else: title = None
else:
#print "Layout 3a"
layout_marks.append('3a')
#Layout 4
if not title:
title = iframe.find_previous('span', style = re.compile("color\:.?(#ff9900|orange|yellow)|font-size\:.?(14|13)pt"))
#print title
#print str(type(title.contents[0]))
if title and str(type(title.contents[0])) == "<class 'bs4.element.Tag'>":
#if title.contents[0].has_attr('style') and title.contents[0]['style']=='font-size:13pt':
title = None
else:
if title:
#print "Layout 4"
layout_marks.append('4')
#print type(title)
if str(type(title)) == "<class 'bs4.element.Tag'>":
titlecont = list(title.stripped_strings)
title = titlecont[0].encode('utf-8')
#Layout 5
if not title or title in common_titles_list:
title = content.find('meta', itemprop = "name")
if title:
title = title['content'].encode('utf-8')
#print "Layout 5"
layout_marks.append('5')
#Layout N
if not title:
title = params['title']
#print "Layout N"
layout_marks.append('N')
for common in common_list:
if title and common in title:
title = title.replace(common, '', 1).strip()
#don't add trailer with the same name
#if len(videos) == 2 and title == prtitle: break
prtitle = title
#print title, url
#if title == 'трейлер' or title == 'Трейлер': continue
url = iframe['src']
#print url
vhost = re.findall(r'(?:www\.)?(?:[\w\-]+\.)*([\w\-]+)\.\w+/', url)
if vhost:
vhost = vhost[0]
vhost_marks.append(vhost)
else:
layout_marks.pop()
continue
'''if 'moonwalk.cc/serial' in url:
ListMWSeasons(url, params['url'])
continue'''
li = xbmcgui.ListItem(title, iconImage = addon_icon, thumbnailImage = img)
li.setInfo(type = "video", infoLabels = infoSet)
if fanartlist:
import random
fanart = random.choice(fanartlist)
if xbmcver >= 13: li.setArt({'fanart': fanart})
else: li.setProperty('fanart_image', fanart)
IF = False; IP = True
uri = {'url': url};
if re.search('moonwalk\.cc\/serial|serpens\.nl\/serial', url):
uri['func'] = 'ListMWSeasons'
uri['ref'] = params['url']
uri['tvshowtitle'] = title
uri['img'] = img
IF = True; IP = False
else:
uri['func'] = 'Play'
uri['title'] = title
if 'moonwalk.cc/video' in url and use_ahds:
#IP = False
IP = True
uri = construct_request(uri)
if IP: li.setProperty('IsPlayable', 'true')
xbmcplugin.addDirectoryItem(hos, uri, li, IF)
if debug_mode:
ShowMessage(addon_name, "[COLOR bisque]" + "-".join(layout_marks) + "[/COLOR] " + ", ".join(vhost_marks), times = 8000)
xbmcplugin.setContent(hos, 'movies')
#skin = xbmc.getSkinDir()
#if skin == 'skin.aeonmq5':
# print xbmc.getInfoLabel('Container.Viewmode')
# xbmc.executebuiltin('Container.SetViewMode(55)')
xbmcplugin.endOfDirectory(hos)
2
Example 11
Project: disco-dop Source File: runexp.py
def getgrammars(trees, sents, stages, testmaxwords, resultdir,
numproc, lexmodel, simplelexsmooth, top):
"""Read off the requested grammars."""
tbfanout, n = treetransforms.treebankfanout(trees)
logging.info('binarized treebank fan-out: %d #%d', tbfanout, n)
mappings = [None for _ in stages]
for n, stage in enumerate(stages):
traintrees = trees
stage.mapping = None
prevn = 0
if n and stage.prune:
prevn = [a.name for a in stages].index(stage.prune)
if stage.split:
traintrees = [treetransforms.binarize(
treetransforms.splitdiscnodes(
tree.copy(True),
stage.markorigin),
childchar=':', dot=True, ids=grammar.UniqueIDs())
for tree in traintrees]
logging.info('splitted discontinuous nodes')
if stage.collapse:
traintrees, mappings[n] = treebanktransforms.collapselabels(
[tree.copy(True) for tree in traintrees],
tbmapping=treebanktransforms.MAPPINGS[
stage.collapse[0]][stage.collapse[1]])
logging.info('collapsed phrase labels for multilevel '
'coarse-to-fine parsing to %s level %d',
*stage.collapse)
if n and mappings[prevn] is not None:
# Given original labels A, convert CTF mapping1 A => C,
# and mapping2 A => B to a mapping B => C.
mapping1, mapping2 = mappings[prevn], mappings[n]
if mappings[n] is None:
stage.mapping = {a: mapping1[a] for a in mapping1}
else:
stage.mapping = {mapping2[a]: mapping1[a] for a in mapping2}
if stage.mode.startswith('pcfg'):
if tbfanout != 1 and not stage.split:
raise ValueError('Cannot extract PCFG from treebank '
'with discontinuities.')
backtransform = extrarules = None
if lexmodel and simplelexsmooth:
extrarules = lexicon.simplesmoothlexicon(lexmodel)
if stage.mode == 'mc-rerank':
from . import _fragments
gram = parser.DictObj(_fragments.getctrees(zip(trees, sents)))
tree = gram.trees1.extract(0, gram.vocab)
gram.start = tree[:tree.index(' ')].lstrip('(')
with gzip.open('%s/%s.train.pickle.gz' % (resultdir, stage.name),
'wb') as out:
out.write(pickle.dumps(gram, protocol=-1))
elif stage.dop:
if stage.dop in ('doubledop', 'dop1'):
if stage.dop == 'doubledop':
(xgrammar, backtransform,
altweights, fragments) = grammar.doubledop(
traintrees, sents, binarized=stage.binarized,
iterate=stage.iterate, complement=stage.complement,
numproc=numproc, maxdepth=stage.maxdepth,
maxfrontier=stage.maxfrontier,
extrarules=extrarules)
elif stage.dop == 'dop1':
(xgrammar, backtransform,
altweights, fragments) = grammar.dop1(
traintrees, sents, binarized=stage.binarized,
maxdepth=stage.maxdepth,
maxfrontier=stage.maxfrontier,
extrarules=extrarules)
# dump fragments
with codecs.getwriter('utf8')(gzip.open('%s/%s.fragments.gz' %
(resultdir, stage.name), 'w')) as out:
out.writelines('%s\t%d\n' % (a, len(b))
for a, b in fragments)
elif stage.dop == 'reduction':
xgrammar, altweights = grammar.dopreduction(
traintrees, sents, packedgraph=stage.packedgraph,
extrarules=extrarules)
else:
raise ValueError('unrecognized DOP model: %r' % stage.dop)
nodes = sum(len(list(a.subtrees())) for a in traintrees)
if lexmodel and not simplelexsmooth: # FIXME: altweights?
xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
msg = grammar.grammarinfo(xgrammar)
rules, lex = grammar.writegrammar(
xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
resultdir, stage.name), 'wb')) as rulesfile:
rulesfile.write(rules)
with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
resultdir, stage.name), 'wb')) as lexiconfile:
lexiconfile.write(lex)
gram = Grammar(rules, lex, start=top,
binarized=stage.binarized)
for name in altweights:
gram.register('%s' % name, altweights[name])
logging.info('DOP model based on %d sentences, %d nodes, '
'%d nonterminals', len(traintrees), nodes, len(gram.toid))
logging.info(msg)
if stage.estimator != 'rfe':
gram.switch('%s' % stage.estimator)
logging.info(gram.testgrammar()[1])
if stage.dop in ('doubledop', 'dop1'):
# backtransform keys are line numbers to rules file;
# to see them together do:
# $ paste <(zcat dop.rules.gz) <(zcat dop.backtransform.gz)
with codecs.getwriter('utf8')(gzip.open(
'%s/%s.backtransform.gz' % (resultdir, stage.name),
'wb')) as out:
out.writelines('%s\n' % a for a in backtransform)
if n and stage.prune:
msg = gram.getmapping(stages[prevn].grammar,
striplabelre=None if stages[prevn].dop
else re.compile('@.+$'),
neverblockre=re.compile('.+}<'),
splitprune=stage.splitprune and stages[prevn].split,
markorigin=stages[prevn].markorigin,
mapping=stage.mapping)
else:
# recoverfragments() relies on this mapping to identify
# binarization nodes
msg = gram.getmapping(None,
striplabelre=None,
neverblockre=re.compile('.+}<'),
splitprune=False, markorigin=False,
mapping=stage.mapping)
logging.info(msg)
elif n and stage.prune: # dop reduction
msg = gram.getmapping(stages[prevn].grammar,
striplabelre=None if stages[prevn].dop
and stages[prevn].dop not in ('doubledop', 'dop1')
else re.compile('@[-0-9]+$'),
neverblockre=re.compile(stage.neverblockre)
if stage.neverblockre else None,
splitprune=stage.splitprune and stages[prevn].split,
markorigin=stages[prevn].markorigin,
mapping=stage.mapping)
if stage.mode == 'dop-rerank':
gram.getrulemapping(
stages[prevn].grammar, re.compile(r'@[-0-9]+\b'))
logging.info(msg)
# write prob models
np.savez_compressed('%s/%s.probs.npz' % (resultdir, stage.name),
**{name: mod for name, mod
in zip(gram.modelnames, gram.models)})
else: # not stage.dop
xgrammar = grammar.treebankgrammar(traintrees, sents,
extrarules=extrarules)
logging.info('induced %s based on %d sentences',
('PCFG' if tbfanout == 1 or stage.split else 'PLCFRS'),
len(traintrees))
if stage.split or os.path.exists('%s/pcdist.txt' % resultdir):
logging.info(grammar.grammarinfo(xgrammar))
else:
logging.info(grammar.grammarinfo(xgrammar,
dump='%s/pcdist.txt' % resultdir))
if lexmodel and not simplelexsmooth:
xgrammar = lexicon.smoothlexicon(xgrammar, lexmodel)
rules, lex = grammar.writegrammar(
xgrammar, bitpar=stage.mode.startswith('pcfg-bitpar'))
with codecs.getwriter('utf8')(gzip.open('%s/%s.rules.gz' % (
resultdir, stage.name), 'wb')) as rulesfile:
rulesfile.write(rules)
with codecs.getwriter('utf8')(gzip.open('%s/%s.lex.gz' % (
resultdir, stage.name), 'wb')) as lexiconfile:
lexiconfile.write(lex)
gram = Grammar(rules, lex, start=top)
logging.info(gram.testgrammar()[1])
if n and stage.prune:
msg = gram.getmapping(stages[prevn].grammar,
striplabelre=None,
neverblockre=re.compile(stage.neverblockre)
if stage.neverblockre else None,
splitprune=stage.splitprune and stages[prevn].split,
markorigin=stages[prevn].markorigin,
mapping=stage.mapping)
logging.info(msg)
logging.info('wrote grammar to %s/%s.{rules,lex%s}.gz',
resultdir, stage.name,
',backtransform' if stage.dop in ('doubledop', 'dop1') else '')
outside = None
if stage.estimates in ('SX', 'SXlrgaps'):
if stage.estimates == 'SX' and tbfanout != 1 and not stage.split:
raise ValueError('SX estimate requires PCFG.')
elif stage.mode != 'plcfrs':
raise ValueError('estimates require parser w/agenda.')
begin = time.clock()
logging.info('computing %s estimates', stage.estimates)
if stage.estimates == 'SX':
outside = estimates.getpcfgestimates(gram, testmaxwords,
gram.toid[trees[0].label])
elif stage.estimates == 'SXlrgaps':
outside = estimates.getestimates(gram, testmaxwords,
gram.toid[trees[0].label])
logging.info('estimates done. cpu time elapsed: %gs',
time.clock() - begin)
np.savez_compressed('%s/%s.outside.npz' % (
resultdir, stage.name), outside=outside)
logging.info('saved %s estimates', stage.estimates)
elif stage.estimates:
raise ValueError('unrecognized value; specify SX or SXlrgaps.')
stage.update(grammar=gram, backtransform=backtransform,
outside=outside)
if any(stage.mapping is not None for stage in stages):
with codecs.getwriter('utf8')(gzip.open('%s/mapping.json.gz' % (
resultdir), 'wb')) as mappingfile:
mappingfile.write(json.dumps([stage.mapping for stage in stages]))
2
Example 12
Project: FanFicFare Source File: adapter_fanfictionnet.py
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
# fetch the chapter. From that we will get almost all the
# metadata and chapter list
url = self.origurl
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
data = self._fetchUrl(url)
#logger.debug("\n===================\n%s\n===================\n"%data)
soup = self.make_soup(data)
except urllib2.HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(url)
else:
raise e
if "Unable to locate story" in data:
raise exceptions.StoryDoesNotExist(url)
# some times "Chapter not found...", sometimes "Chapter text not found..."
if "not found. Please check to see you are not using an outdated url." in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url)
if self.getConfig('check_next_chapter'):
try:
## ffnet used to have a tendency to send out update
## notices in email before all their servers were
## showing the update on the first chapter. It
## generates another server request and doesn't seem
## to be needed lately, so now default it to off.
try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
# get chapter part of url.
except:
chapcount = 1
chapter = url.split('/',)[5]
tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
self.story.getMetadata('storyId'),
chapcount+1)
logger.debug('=Trying newer chapter: %s' % tryurl)
newdata = self._fetchUrl(tryurl)
if "not found. Please check to see you are not using an outdated url." not in newdata \
and "This request takes too long to process, it is timed out by the server." not in newdata:
logger.debug('=======Found newer chapter: %s' % tryurl)
soup = self.make_soup(newdata)
except urllib2.HTTPError as e:
if e.code == 503:
raise e
except e:
logger.warn("Caught an exception reading URL: %s sleeptime(%s) Exception %s."%(unicode(url),sleeptime,unicode(e)))
pass
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/u/\d+"))
self.story.setMetadata('authorId',a['href'].split('/')[2])
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
self.story.setMetadata('author',a.string)
## Pull some additional data from html.
## ffnet shows category two ways
## 1) class(Book, TV, Game,etc) >> category(Harry Potter, Sailor Moon, etc)
## 2) cat1_cat2_Crossover
## For 1, use the second link.
## For 2, fetch the crossover page and pull the two categories from there.
categories = soup.find('div',{'id':'pre_story_links'}).findAll('a',{'class':'xcontrast_txt'})
#print("xcontrast_txt a:%s"%categories)
if len(categories) > 1:
# Strangely, the ones with *two* links are the
# non-crossover categories. Each is in a category itself
# of Book, Movie, etc.
self.story.addToList('category',stripHTML(categories[1]))
elif 'Crossover' in categories[0]['href']:
caturl = "https://%s%s"%(self.getSiteDomain(),categories[0]['href'])
catsoup = self.make_soup(self._fetchUrl(caturl))
found = False
for a in catsoup.findAll('a',href=re.compile(r"^/crossovers/.+?/\d+/")):
self.story.addToList('category',stripHTML(a))
found = True
if not found:
# Fall back. I ran across a story with a Crossver
# category link to a broken page once.
# http://www.fanfiction.net/s/2622060/1/
# Naruto + Harry Potter Crossover
logger.info("Fall back category collection")
for c in stripHTML(categories[0]).replace(" Crossover","").split(' + '):
self.story.addToList('category',c)
a = soup.find('a', href=re.compile(r'https?://www\.fictionratings\.com/'))
rating = a.string
if 'Fiction' in rating: # if rating has 'Fiction ', strip that out for consistency with past.
rating = rating[8:]
self.story.setMetadata('rating',rating)
# after Rating, the same bit of text containing id:123456 contains
# Complete--if completed.
gui_table1i = soup.find('div',{'id':'content_wrapper_inner'})
self.story.setMetadata('title', stripHTML(gui_table1i.find('b'))) # title appears to be only(or at least first) bold tag in gui_table1i
summarydiv = gui_table1i.find('div',{'style':'margin-top:2px'})
if summarydiv:
self.setDescription(url,stripHTML(summarydiv))
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
# for b in grayspan.findAll('button'):
# b.extract()
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
#logger.debug("metatext:(%s)"%metatext)
if 'Status: Complete' in metatext:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
metalist = metatext.split(" - ")
#logger.debug("metalist:(%s)"%metalist)
# Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
# Rated: Fiction T - English - Adventure/Sci-Fi - Naruto U. - Chapters: 22 - Words: 114,414 - Reviews: 395 - Favs: 779 - Follows: 835 - Updated: 03-21-13 - Published: 04-28-12 - id: 8067258
# rating is obtained above more robustly.
if metalist[0].startswith('Rated:'):
metalist=metalist[1:]
# next is assumed to be language.
self.story.setMetadata('language',metalist[0])
metalist=metalist[1:]
# next might be genre.
genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
goodgenres=True
for g in genrelist:
#logger.debug("g:(%s)"%g)
if g.strip() not in ffnetgenres:
#logger.info("g not in ffnetgenres")
goodgenres=False
if goodgenres:
self.story.extendList('genre',genrelist)
metalist=metalist[1:]
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
# Published: <span data-xutime='1384358726'>8m ago</span>
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
if len(dates) > 1 :
# updated get set to the same as published upstream if not found.
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
self.story.setMetadata('datePublished',datetime.fromtimestamp(float(dates[-1]['data-xutime'])))
# Meta key titles and the metadata they go into, if any.
metakeys = {
# These are already handled separately.
'Chapters':False,
'Status':False,
'id':False,
'Updated':False,
'Published':False,
'Reviews':'reviews',
'Favs':'favs',
'Follows':'follows',
'Words':'numWords',
}
chars_ships_list=[]
while len(metalist) > 0:
m = metalist.pop(0)
if ':' in m:
key = m.split(':')[0].strip()
if key in metakeys:
if metakeys[key]:
self.story.setMetadata(metakeys[key],m.split(':')[1].strip())
continue
# no ':' or not found in metakeys
chars_ships_list.append(m)
# all because sometimes chars can have ' - ' in them.
chars_ships_text = (' - ').join(chars_ships_list)
# print("chars_ships_text:%s"%chars_ships_text)
# with 'pairing' support, pairings are bracketed w/o comma after
# [Caspian X, Lucy Pevensie] Edmund Pevensie, Peter Pevensie
self.story.extendList('characters',chars_ships_text.replace('[','').replace(']',',').split(','))
l = chars_ships_text
while '[' in l:
self.story.addToList('ships',l[l.index('[')+1:l.index(']')].replace(', ','/'))
l = l[l.index(']')+1:]
if get_cover:
# Try the larger image first.
cover_url = ""
try:
img = soup.select('img.lazy.cimage')
cover_url=img[0]['data-original']
except:
img = soup.select('img.cimage')
if img:
cover_url=img[0]['src']
logger.debug("cover_url:%s"%cover_url)
authimg_url = ""
if cover_url and self.getConfig('skip_author_cover'):
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
try:
img = authsoup.select('img.lazy.cimage')
authimg_url=img[0]['data-original']
except:
img = authsoup.select('img.cimage')
if img:
authimg_url=img[0]['src']
logger.debug("authimg_url:%s"%authimg_url)
## ffnet uses different sizes on auth & story pages, but same id.
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/150/
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/180/
try:
cover_id = cover_url.split('/')[4]
except:
cover_id = None
try:
authimg_id = authimg_url.split('/')[4]
except:
authimg_id = None
## don't use cover if it matches the auth image.
if cover_id and authimg_id and cover_id == authimg_id:
cover_url = None
if cover_url:
self.setCoverImage(url,cover_url)
# Find the chapter selector
select = soup.find('select', { 'name' : 'chapter' } )
if select is None:
# no selector found, so it's a one-chapter story.
self.chapterUrls.append((self.story.getMetadata('title'),url))
else:
allOptions = select.findAll('option')
for o in allOptions:
url = u'https://%s/s/%s/%s/' % ( self.getSiteDomain(),
self.story.getMetadata('storyId'),
o['value'])
# just in case there's tags, like <i> in chapter titles.
title = u"%s" % o
title = re.sub(r'<[^>]+>','',title)
self.chapterUrls.append((title,url))
self.story.setMetadata('numChapters',len(self.chapterUrls))
return
2
Example 13
def isleap(year):
return year % 4 == 0 and (year % 100 <> 0 or year % 400 == 0)
# Return number of leap years in range [y1, y2)
# Assume y1 <= y2 and no funny (non-leap century) years
def leapdays(y1, y2):
return (y2+3)/4 - (y1+3)/4
EPOCH = 1970
def timegm(tuple):
"""Unrelated but handy function to calculate Unix timestamp from GMT."""
year, month, day, hour, minute, second = tuple[:6]
assert year >= EPOCH
assert 1 <= month <= 12
days = 365*(year-EPOCH) + leapdays(EPOCH, year)
for i in range(1, month):
days = days + mdays[i]
if month > 2 and isleap(year):
days = days + 1
days = days + day - 1
hours = days*24 + hour
minutes = hours*60 + minute
seconds = minutes*60L + second
return seconds
# Date/time conversion routines for formats used by the HTTP protocol.
EPOCH = 1970
def my_timegm(tt):
year, month, mday, hour, min, sec = tt[:6]
if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
return timegm(tt)
else:
return None
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
months_lower = []
for month in months: months_lower.append(string.lower(month))
def time2isoz(t=None):
"""Return a string representing time in seconds since epoch, t.
If the function is called without an argument, it will use the current
time.
The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
representing Universal Time (UTC, aka GMT). An example of this format is:
1994-11-24 08:49:37Z
"""
if t is None: t = time.time()
year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
year, mon, mday, hour, min, sec)
def time2netscape(t=None):
"""Return a string representing time in seconds since epoch, t.
If the function is called without an argument, it will use the current
time.
The format of the returned string is like this:
Wed, DD-Mon-YYYY HH:MM:SS GMT
"""
if t is None: t = time.time()
year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
days[wday], mday, months[mon-1], year, hour, min, sec)
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
def offset_from_tz_string(tz):
offset = None
if UTC_ZONES.has_key(tz):
offset = 0
else:
m = timezone_re.search(tz)
if m:
offset = 3600 * int(m.group(2))
if m.group(3):
offset = offset + 60 * int(m.group(3))
if m.group(1) == '-':
offset = -offset
return offset
def _str2time(day, mon, yr, hr, min, sec, tz):
# translate month name to number
# month numbers start with 1 (January)
try:
mon = months_lower.index(string.lower(mon))+1
except ValueError:
# maybe it's already a number
try:
imon = int(mon)
except ValueError:
return None
if 1 <= imon <= 12:
mon = imon
else:
return None
# make sure clock elements are defined
if hr is None: hr = 0
if min is None: min = 0
if sec is None: sec = 0
yr = int(yr)
day = int(day)
hr = int(hr)
min = int(min)
sec = int(sec)
if yr < 1000:
# find "obvious" year
cur_yr = time.localtime(time.time())[0]
m = cur_yr % 100
tmp = yr
yr = yr + cur_yr - m
m = m - tmp
if abs(m) > 50:
if m > 0: yr = yr + 100
else: yr = yr - 100
# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
t = my_timegm((yr, mon, day, hr, min, sec, tz))
if t is not None:
# adjust time using timezone string, to get absolute time since epoch
if tz is None:
tz = "UTC"
tz = string.upper(tz)
offset = offset_from_tz_string(tz)
if offset is None:
return None
t = t - offset
return t
strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
wkday_re = re.compile(
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
loose_http_re = re.compile(
r"""^
(\d\d?) # day
(?:\s+|[-\/])
(\w+) # month
(?:\s+|[-\/])
(\d+) # year
(?:
(?:\s+|:) # separator before clock
(\d\d?):(\d\d) # hour:min
(?::(\d\d))? # optional seconds
)? # optional clock
\s*
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
\s*
(?:\(\w+\))? # ASCII representation of timezone in parens.
\s*$""", re.X)
def http2time(text):
"""Returns time in seconds since epoch of time represented by a string.
Return value is an integer.
None is returned if the format of str is unrecognized, the time is outside
the representable range, or the timezone string is not recognized. If the
string contains no timezone, UTC is assumed.
The timezone in the string may be numerical (like "-0800" or "+0100") or a
string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
timezone strings equivalent to UTC (zero offset) are known to the function.
The function loosely parses the following formats:
Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
The parser ignores leading and trailing whitespace. The time may be
absent.
If the year is given with only 2 digits, the function will select the
century that makes the year closest to the current date.
"""
# fast exit for strictly conforming string
m = strict_re.search(text)
if m:
g = m.groups()
mon = months_lower.index(string.lower(g[1])) + 1
tt = (int(g[2]), mon, int(g[0]),
int(g[3]), int(g[4]), float(g[5]))
return my_timegm(tt)
# No, we need some messy parsing...
# clean up
text = string.lstrip(text)
text = wkday_re.sub("", text, 1) # Useless weekday
# tz is time zone specifier string
day, mon, yr, hr, min, sec, tz = [None]*7
# loose regexp parse
m = loose_http_re.search(text)
if m is not None:
day, mon, yr, hr, min, sec, tz = m.groups()
else:
return None # bad format
return _str2time(day, mon, yr, hr, min, sec, tz)
iso_re = re.compile(
"""^
(\d{4}) # year
[-\/]?
(\d\d?) # numerical month
[-\/]?
(\d\d?) # day
(?:
(?:\s+|[-:Tt]) # separator before clock
(\d\d?):?(\d\d) # hour:min
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
)? # optional clock
\s*
([-+]?\d\d?:?(:?\d\d)?
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
\s*$""", re.X)
def iso2time(text):
"""
As for http2time, but parses the ISO 8601 formats:
1994-02-03 14:15:29 -0100 -- ISO 8601 format
1994-02-03 14:15:29 -- zone is optional
1994-02-03 -- only date
1994-02-03T14:15:29 -- Use T as separator
19940203T141529Z -- ISO 8601 compact format
19940203 -- only date
"""
# clean up
text = string.lstrip(text)
# tz is time zone specifier string
day, mon, yr, hr, min, sec, tz = [None]*7
# loose regexp parse
m = iso_re.search(text)
if m is not None:
# XXX there's an extra bit of the timezone I'm ignoring here: is
# this the right thing to do?
yr, mon, day, hr, min, sec, tz, _ = m.groups()
else:
return None # bad format
return _str2time(day, mon, yr, hr, min, sec, tz)
# XXX Andrew Dalke kindly sent me a similar class in response to my request on
# comp.lang.python, which I then proceeded to lose. I wrote this class
# instead, but I think he's released his code publicly since, could pinch the
# tests from it, at least...
class seek_wrapper:
"""Adds a seek method to a file object.
This is only designed for seeking on readonly file-like objects.
Wrapped file-like object must have a read method. The readline method is
only supported if that method is present on the wrapped object. The
readlines method is always supported. xreadlines and iteration are
supported only for Python 2.2 and above.
Public attribute: wrapped (the wrapped file object).
WARNING: All other attributes of the wrapped object (ie. those that are not
one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
are passed through unaltered, which may or may not make sense for your
particular file object.
"""
# General strategy is to check that cache is full enough, then delegate
# everything to the cache (self._cache, which is a StringIO.StringIO
# instance. Seems to be some cStringIO.StringIO problem on 1.5.2 -- I
# get a StringOobject, with no readlines method.
# Invariant: the end of the cache is always at the same place as the
# end of the wrapped file:
# self.wrapped.tell() == self.__cache.tell()
def __init__(self, wrapped):
self.wrapped = wrapped
self.__have_readline = hasattr(self.wrapped, "readline")
self.__cache = StringIO()
def __getattr__(self, name):
wrapped = self.__dict__.get("wrapped")
if wrapped:
return getattr(wrapped, name)
return getattr(self.__class__, name)
def seek(self, offset, whence=0):
# make sure we have read all data up to the point we are seeking to
pos = self.__cache.tell()
if whence == 0: # absolute
to_read = offset - pos
elif whence == 1: # relative to current position
to_read = offset
elif whence == 2: # relative to end of *wrapped* file
# since we don't know yet where the end of that file is, we must
# read everything
to_read = None
if to_read is None or to_read >= 0:
if to_read is None:
self.__cache.write(self.wrapped.read())
else:
self.__cache.write(self.wrapped.read(to_read))
self.__cache.seek(pos)
return self.__cache.seek(offset, whence)
def tell(self):
return self.__cache.tell()
def read(self, size=-1):
pos = self.__cache.tell()
self.__cache.seek(pos)
end = len(self.__cache.getvalue())
available = end - pos
# enough data already cached?
if size <= available and size != -1:
return self.__cache.read(size)
# no, so read sufficient data from wrapped file and cache it
to_read = size - available
assert to_read > 0 or size == -1
self.__cache.seek(0, 2)
if size == -1:
self.__cache.write(self.wrapped.read())
else:
self.__cache.write(self.wrapped.read(to_read))
self.__cache.seek(pos)
return self.__cache.read(size)
def readline(self, size=-1):
if not self.__have_readline:
raise NotImplementedError("no readline method on wrapped object")
# line we're about to read might not be complete in the cache, so
# read another line first
pos = self.__cache.tell()
self.__cache.seek(0, 2)
self.__cache.write(self.wrapped.readline())
self.__cache.seek(pos)
data = self.__cache.readline()
if size != -1:
r = data[:size]
self.__cache.seek(pos+size)
else:
r = data
return r
def readlines(self, sizehint=-1):
pos = self.__cache.tell()
self.__cache.seek(0, 2)
self.__cache.write(self.wrapped.read())
self.__cache.seek(pos)
try:
return self.__cache.readlines(sizehint)
except TypeError: # 1.5.2 hack
return self.__cache.readlines()
def __iter__(self): return self
def next(self):
line = self.readline()
if line == "": raise StopIteration
return line
xreadlines = __iter__
def __repr__(self):
return ("<%s at %s whose wrapped object = %s>" %
(self.__class__.__name__, `id(self)`, `self.wrapped`))
def close(self):
self._cache = None
self.read = None
self.readline = None
self.readlines = None
self.seek = None
if self.wrapped: self.wrapped.close()
self.wrapped = None
class eoffile:
# file-like object that always claims to be at end-of-file
def read(self, size=-1): return ""
def readline(self, size=-1): return ""
class response_seek_wrapper(seek_wrapper):
"""Avoids unnecessarily clobbering methods on .close().
Also supports pickling.
Class name is for historical reasons.
"""
def close(self):
self.wrapped.close()
self.wrapped = eoffile()
def __getstate__(self):
# There are three obvious options here:
# 1. truncate
# 2. read to end
# 3. close socket, pickle state including read position, then open
# again on unpickle and use Range header
# 2 breaks pickle protocol, because one expects the original object
# to be left unscathed by pickling. 3 is too complicated and
# surprising (and too much work ;-) to happen in a sane __getstate__.
# So we do 1.
state = self.__dict__.copy()
state["wrapped"] = eoffile()
return state
2
Example 14
Project: pymatgen Source File: nwchem.py
def _parse_job(self, output):
energy_patt = re.compile("Total \w+ energy\s+=\s+([\.\-\d]+)")
energy_gas_patt = re.compile("gas phase energy\s+=\s+([\.\-\d]+)")
energy_sol_patt = re.compile("sol phase energy\s+=\s+([\.\-\d]+)")
coord_patt = re.compile("\d+\s+(\w+)\s+[\.\-\d]+\s+([\.\-\d]+)\s+"
"([\.\-\d]+)\s+([\.\-\d]+)")
lat_vector_patt = re.compile("a[123]=<\s+([\.\-\d]+)\s+"
"([\.\-\d]+)\s+([\.\-\d]+)\s+>")
corrections_patt = re.compile("([\w\-]+ correction to \w+)\s+="
"\s+([\.\-\d]+)")
preamble_patt = re.compile("(No. of atoms|No. of electrons"
"|SCF calculation type|Charge|Spin "
"multiplicity)\s*:\s*(\S+)")
force_patt = re.compile("\s+(\d+)\s+(\w+)" + 6 * "\s+([0-9\.\-]+)")
time_patt = re.compile("\s+ Task \s+ times \s+ cpu: \s+ ([\.\d]+)s .+ ", re.VERBOSE)
error_defs = {
"calculations not reaching convergence": "Bad convergence",
"Calculation failed to converge": "Bad convergence",
"geom_binvr: #indep variables incorrect": "autoz error",
"dft optimize failed": "Geometry optimization failed"}
fort2py = lambda x : x.replace("D", "e")
isfloatstring = lambda s : s.find(".") == -1
parse_hess = False
parse_proj_hess = False
hessian = None
projected_hessian = None
parse_force = False
all_forces = []
forces = []
data = {}
energies = []
frequencies = None
normal_frequencies = None
corrections = {}
molecules = []
structures = []
species = []
coords = []
lattice = []
errors = []
basis_set = {}
bset_header = []
parse_geom = False
parse_freq = False
parse_bset = False
parse_projected_freq = False
job_type = ""
parse_time = False
time = 0
for l in output.split("\n"):
for e, v in error_defs.items():
if l.find(e) != -1:
errors.append(v)
if parse_time:
m = time_patt.search(l)
if m:
time = m.group(1)
parse_time = False
if parse_geom:
if l.strip() == "Atomic Mass":
if lattice:
structures.append(Structure(lattice, species, coords,
coords_are_cartesian=True))
else:
molecules.append(Molecule(species, coords))
species = []
coords = []
lattice = []
parse_geom = False
else:
m = coord_patt.search(l)
if m:
species.append(m.group(1).capitalize())
coords.append([float(m.group(2)), float(m.group(3)),
float(m.group(4))])
m = lat_vector_patt.search(l)
if m:
lattice.append([float(m.group(1)), float(m.group(2)),
float(m.group(3))])
if parse_force:
m = force_patt.search(l)
if m:
forces.extend(map(float, m.groups()[5:]))
elif len(forces) > 0:
all_forces.append(forces)
forces = []
parse_force = False
elif parse_freq:
if len(l.strip()) == 0:
if len(normal_frequencies[-1][1]) == 0:
continue
else:
parse_freq = False
else:
vibs = [float(vib) for vib in l.strip().split()[1:]]
num_vibs = len(vibs)
for mode, dis in zip(normal_frequencies[-num_vibs:], vibs):
mode[1].append(dis)
elif parse_projected_freq:
if len(l.strip()) == 0:
if len(frequencies[-1][1]) == 0:
continue
else:
parse_projected_freq = False
else:
vibs = [float(vib) for vib in l.strip().split()[1:]]
num_vibs = len(vibs)
for mode, dis in zip(
frequencies[-num_vibs:], vibs):
mode[1].append(dis)
elif parse_bset:
if l.strip() == "":
parse_bset = False
else:
toks = l.split()
if toks[0] != "Tag" and not re.match("\-+", toks[0]):
basis_set[toks[0]] = dict(zip(bset_header[1:],
toks[1:]))
elif toks[0] == "Tag":
bset_header = toks
bset_header.pop(4)
bset_header = [h.lower() for h in bset_header]
elif parse_hess:
if l.strip() == "":
continue
if len(hessian) > 0 and l.find("----------") != -1:
parse_hess = False
continue
toks = l.strip().split()
if len(toks) > 1:
try:
row = int(toks[0])
except Exception as e:
continue
if isfloatstring(toks[1]):
continue
vals = [float(fort2py(x)) for x in toks[1:]]
if len(hessian) < row:
hessian.append(vals)
else:
hessian[row - 1].extend(vals)
elif parse_proj_hess:
if l.strip() == "":
continue
nat3 = len(hessian)
toks = l.strip().split()
if len(toks) > 1:
try:
row = int(toks[0])
except Exception as e:
continue
if isfloatstring(toks[1]):
continue
vals = [float(fort2py(x)) for x in toks[1:]]
if len(projected_hessian) < row:
projected_hessian.append(vals)
else:
projected_hessian[row - 1].extend(vals)
if len(projected_hessian[-1]) == nat3:
parse_proj_hess = False
else:
m = energy_patt.search(l)
if m:
energies.append(Energy(m.group(1), "Ha").to("eV"))
parse_time = True
continue
m = energy_gas_patt.search(l)
if m:
cosmo_scf_energy = energies[-1]
energies[-1] = dict()
energies[-1].update({"cosmo scf": cosmo_scf_energy})
energies[-1].update({"gas phase":
Energy(m.group(1), "Ha").to("eV")})
m = energy_sol_patt.search(l)
if m:
energies[-1].update(
{"sol phase": Energy(m.group(1), "Ha").to("eV")})
m = preamble_patt.search(l)
if m:
try:
val = int(m.group(2))
except ValueError:
val = m.group(2)
k = m.group(1).replace("No. of ", "n").replace(" ", "_")
data[k.lower()] = val
elif l.find("Geometry \"geometry\"") != -1:
parse_geom = True
elif l.find("Summary of \"ao basis\"") != -1:
parse_bset = True
elif l.find("P.Frequency") != -1:
parse_projected_freq = True
if frequencies is None:
frequencies = []
toks = l.strip().split()[1:]
frequencies.extend([(float(freq), []) for freq in toks])
elif l.find("Frequency") != -1:
toks = l.strip().split()
if len(toks) > 1 and toks[0] == "Frequency":
parse_freq = True
if normal_frequencies is None:
normal_frequencies = []
normal_frequencies.extend([(float(freq), []) for freq
in l.strip().split()[1:]])
elif l.find("MASS-WEIGHTED NUCLEAR HESSIAN") != -1:
parse_hess = True
if not hessian:
hessian = []
elif l.find("MASS-WEIGHTED PROJECTED HESSIAN") != -1:
parse_proj_hess = True
if not projected_hessian:
projected_hessian = []
elif l.find("atom coordinates gradient") != -1:
parse_force = True
elif job_type == "" and l.strip().startswith("NWChem"):
job_type = l.strip()
if job_type == "NWChem DFT Module" and \
"COSMO solvation results" in output:
job_type += " COSMO"
else:
m = corrections_patt.search(l)
if m:
corrections[m.group(1)] = FloatWithUnit(
m.group(2), "kJ mol^-1").to("eV atom^-1")
if frequencies:
for freq, mode in frequencies:
mode[:] = zip(*[iter(mode)]*3)
if normal_frequencies:
for freq, mode in normal_frequencies:
mode[:] = zip(*[iter(mode)]*3)
if hessian:
n = len(hessian)
for i in range(n):
for j in range(i + 1, n):
hessian[i].append(hessian[j][i])
if projected_hessian:
n = len(projected_hessian)
for i in range(n):
for j in range(i + 1, n):
projected_hessian[i].append(projected_hessian[j][i])
data.update({"job_type": job_type, "energies": energies,
"corrections": corrections,
"molecules": molecules,
"structures": structures,
"basis_set": basis_set,
"errors": errors,
"has_error": len(errors) > 0,
"frequencies": frequencies,
"normal_frequencies": normal_frequencies,
"hessian": hessian,
"projected_hessian": projected_hessian,
"forces": all_forces,
"task_time": time})
return data
2
Example 15
Project: tp-qemu Source File: flag_check.py
@error.context_aware
def run(test, params, env):
"""
flag_check test:
steps:
1. boot guest with -cpu model,+extra_flags (extra_flags is optional)
a. no defined model_name in cfg file
guest_model = host_model
b. model_name defined in cfg file
guest_model = params.get("cpu_model")
2. get guest flags
3. get expected model flags from dump file
a. -cpu host: qemu_model = host_model
b. guest_model > host_model --> expected_model = host_model
e.g guest_model = Haswell, host_model = Sandybridge
expected_model = Sandybridge
c. guest_model < host_model --> expected_model = guest_model
4. get extra flags
a. add_flags = +flag
1). flag is exposed to guest if it's supported in host
2). flag is not supported to guest if it's unknown in host
3). ignore "check", "enforce" which are params not flag
b. del_flags = -flag
flag is removed if it's supported in guest
c. params check: check lack flag in host include unknow flag
5. compare expected flag with flags in guest
a. out_flags: not supported with some conf, this kinds of flag
will be displayed in dump file, but not in guest.
e.g tsc-dedline is not supported with -M rhel6.3.0
b. option_flags: some flag is generated by kernel which is not
defined in dump file. it's acceptable when display in guest.
e.g rep_good
expected_flags = expected_model_flags + add_flags - del_flags
- out_flags
miss_flag = expected_flags - guest_flags
unexpect_flag = guest_flags - expected_flags - option_flags
:param test: Kvm test object
:param params: Dictionary with the test parameters
:param env: Dictionary with test environment.
"""
def qemu_model_info(models_list, cpumodel):
"""
Get cpumodel info from models_list
:param models_list: all models info
:param cpumodel: model name
:return: model info of cpumodel
"""
for model in models_list:
if cpumodel in model:
return model
return None
def qemu_support_flag(model_info, reg):
"""
Get register's supported flags from model_info
:param model_info: model_info get from dump file
:param reg: reg name, e.g feature_edx
"""
reg_re = re.compile(r".*%s.*\((.*)\)\n" % reg)
flag = reg_re.search(model_info)
try:
if flag:
return flag.groups()[0]
except Exception as e:
logging.error("Failed to get support flag %s" % e)
def get_all_support_flags():
"""
Get all supported flags with qemu query cmd.
"""
qemu_binary = utils_misc.get_qemu_binary(params)
cmd = qemu_binary + params.get("query_cmd", " -cpu ?")
output = utils.system_output(cmd)
flags_re = re.compile(params.get("pattern", "flags:(.*)"))
flag_list = flags_re.search(output)
flags = []
if flag_list:
for flag in flag_list.groups():
flags += flag
return set(map(utils_misc.Flag, flags))
def get_extra_flag(extra_flags, symbol, lack_check=False):
"""
Get added/removed flags
:param extra_flags: exposed/removed flags. e.g "+sse4.1,+sse4.2"
:param symbol: "+","-"
:return: return all extra_flags if lack_check is true
return host supported flags if lack_check is false
"""
flags = []
re_flags = [_[1:] for _ in extra_flags.split(",")
if _ and symbol == _[0]]
for flag in re_flags:
if lack_check:
flags.append(flag)
elif flag in host_flags:
flags.append(flag)
return set(map(utils_misc.Flag, flags))
def get_guest_cpuflags(vm_session):
"""
Get guest system cpuflags.
:param vm_session: session to checked vm.
:return: [corespond flags]
"""
flags_re = re.compile(r'^flags\s*:(.*)$', re.MULTILINE)
out = vm_session.cmd_output("cat /proc/cpuinfo")
try:
flags = flags_re.search(out).groups()[0].split()
return set(map(utils_misc.Flag, flags))
except Exception as e:
logging.error("Failed to get guest cpu flags %s" % e)
utils_misc.Flag.aliases = utils_misc.kvm_map_flags_aliases
# Get all models' info from dump file
dump_file = params.get("dump_file")
default_dump_path = os.path.join(data_dir.get_deps_dir(), "cpuid")
dump_path = params.get("dump_path", default_dump_path)
cpuinfo_file = utils.unmap_url(dump_path, dump_file, dump_path)
host_flags = utils_misc.get_cpu_flags()
vm = env.get_vm(params["main_vm"])
guest_cpumodel = vm.cpuinfo.model
extra_flags = params.get("cpu_model_flags", " ")
error.context("Boot guest with -cpu %s,%s" %
(guest_cpumodel, extra_flags), logging.info)
if params.get("start_vm") == "no" and "unknown,check" in extra_flags:
params["start_vm"] = "yes"
try:
vm.create(params=params)
vm.verify_alive()
output = vm.process.get_output()
vm.destroy()
except virt_vm.VMCreateError as detail:
output = str(detail)
if params["qemu_output"] not in output:
raise error.TestFail("no qemu output: %s" % params["qemu_output"])
else:
vm.verify_alive()
timeout = float(params.get("login_timeout", 240))
session = vm.wait_for_login(timeout=timeout)
# Get qemu model
host_cpumodel = utils_misc.get_host_cpu_models()
if guest_cpumodel not in host_cpumodel:
qemu_model = host_cpumodel[0]
else:
qemu_model = guest_cpumodel
error.context("Get model %s support flags" % qemu_model, logging.info)
# Get flags for every reg from model's info
models_info = utils.system_output("cat %s" % cpuinfo_file).split("x86")
model_info = qemu_model_info(models_info, qemu_model)
reg_list = params.get("reg_list", "feature_edx ").split()
model_support_flags = " "
if model_info:
for reg in reg_list:
reg_flags = qemu_support_flag(model_info, reg)
if reg_flags:
model_support_flags += " %s" % reg_flags
model_support_flags = set(map(utils_misc.Flag,
model_support_flags.split()))
error.context("Get guest flags", logging.info)
guest_flags = get_guest_cpuflags(session)
error.context("Get expected flag list", logging.info)
# out_flags is definded in dump file, but not in guest
out_flags = params.get("out_flags", " ").split()
out_flags = set(map(utils_misc.Flag, out_flags))
# no_check_flags is definded in all_support_flags, but not in guest and host
no_check_flags = params.get("no_check_flags", " ").split()
no_check_flags = set(map(utils_misc.Flag, no_check_flags))
# option_flags are generated by kernel or kvm, which are not definded in
# dump file, but can be displayed in guest
option_flags = params.get("option_flags", " ").split()
if params['smp'] == '1' and 'up' not in option_flags:
option_flags.append('up')
option_flags = set(map(utils_misc.Flag, option_flags))
# add_flags are exposed by +flag
add_flags = get_extra_flag(extra_flags, "+")
# del_flags are disabled by -flag
del_flags = get_extra_flag(extra_flags, "-", lack_check=True)
expected_flags = ((model_support_flags | add_flags) -
del_flags - out_flags)
# get all flags for host lack flag checking
check_flags = get_extra_flag(extra_flags, "+", lack_check=True)
check_flags = check_flags - no_check_flags
host_flags = set(map(utils_misc.Flag, host_flags))
lack_flags = set(expected_flags | check_flags) - host_flags
if "check" in extra_flags and "unknown" not in extra_flags:
error.context("Check lack flag in host", logging.info)
process_output = vm.process.get_output()
miss_warn = []
if lack_flags:
for flag in lack_flags:
if flag not in process_output:
miss_warn.extend(flag.split())
if miss_warn:
raise error.TestFail("no warning for lack flag %s" % miss_warn)
error.context("Compare guest flags with expected flags", logging.info)
all_support_flags = get_all_support_flags()
missing_flags = expected_flags - guest_flags
unexpect_flags = (guest_flags - expected_flags -
all_support_flags - option_flags)
if missing_flags or unexpect_flags:
raise error.TestFail("missing flags:\n %s\n"
"more flags than expected:\n %s\n"
"expected flags:\n %s\n"
"guest flags:\n %s\n"
% (missing_flags, unexpect_flags, expected_flags,
guest_flags))
2
Example 16
Project: strsync Source File: strsync.py
def main():
parser = argparse.ArgumentParser(description='Automatically translate and synchronize .strings files from defined base language.')
parser.add_argument('-b','--base-lang-name', help='A base(or source) localizable resource name.(default=\'Base\'), (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default='Base', required=False)
parser.add_argument('-x','--excluding-lang-names', type=str, help='A localizable resource name that you want to exclude. (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default=[], required=False, nargs='+')
parser.add_argument('-c','--client-id', help='Client ID for MS Translation API', required=True)
parser.add_argument('-s','--client-secret', help='Client Secret key for MS Translation API', required=True)
parser.add_argument('-f','--force-translate-keys', type=str, help='Keys in the strings to update and translate by force. (input nothing for all keys.)', default=[], required=False, nargs='*')
parser.add_argument('-fb','--following-base-keys', type=str, help='Keys in the strings to follow from "Base".', default=[], required=False, nargs='+')
parser.add_argument('-fbl','--following-base-keys-if-length-longer', type=str, help='Keys in the strings to follow from "Base" if its length longer than length of "Base" value.', default=[], required=False, nargs='+')
parser.add_argument('-ic','--ignore-comments', help='Allows to ignore comment synchronization.', default=None, required=False, nargs='*')
parser.add_argument('target path', help='Target localizable resource path. (root path of Base.lproj, default=./)', default='./', nargs='?')
args = vars(parser.parse_args())
reload(sys)
sys.setdefaultencoding('utf-8')
# configure arguments
__LANG_SEP__ = '-'
__DIR_SUFFIX__ = ".lproj"
__FILE_SUFFIX__ = ".strings"
__RESOURCE_PATH__ = expanduser(args['target path'])
__BASE_LANG__ = args['base_lang_name']
__EXCLUDING_LANGS__ = args['excluding_lang_names']
__KEYS_FORCE_TRANSLATE__ = args['force_translate_keys']
__KEYS_FORCE_TRANSLATE_ALL__ = ('--force-translate-keys' in sys.argv or '-f' in sys.argv) and not __KEYS_FORCE_TRANSLATE__
__KEYS_FOLLOW_BASE__ = args['following_base_keys']
__KEYS_FOLLOW_BASE_IF_LENGTH_LONGER__ = args['following_base_keys_if_length_longer']
__IGNORE_COMMENTS__ = args['ignore_comments'] is not None
__BASE_RESOUCE_DIR__ = None
__LITERNAL_FORMAT__ = "%@"
__LITERNAL_FORMAT_RE__ = re.compile(r"(%\s{1,}@)|(@\s{0,}%)")
__LITERNAL_REPLACEMENT__ = "**"
__LITERNAL_REPLACEMENT_RE__ = re.compile(r"\*\s{0,}\*")
__QUOTES_RE__ = re.compile(r"\"")
__QUOTES_REPLACEMENT__ = "'"
if __BASE_LANG__.endswith(__DIR_SUFFIX__):
__BASE_RESOUCE_DIR__ = __BASE_LANG__
__BASE_LANG__ = __BASE_LANG__.split(__DIR_SUFFIX__)[0]
else:
__BASE_RESOUCE_DIR__ = __BASE_LANG__+__DIR_SUFFIX__
# setup Translator & langs
# read ios langs
print '(i) Fetching supported locale codes for ios9 ...'
__IOS9_CODES__ = [lang_row[0] for lang_row in csv.reader(open(resolve_file_path('lc_ios9.tsv'),'rb'), delimiter='\t')]
print '(i) Supported numbers of locale code :', len(__IOS9_CODES__)
__MS_CODE_ALIASES__ = {
# MS API Supported : ios9 supported ISO639 1-2 codes
'zh-CHS' : ['zh-Hans', 'zh-CN', 'zh-SG'],
'zh-CHT' : ['zh-Hant', 'zh-MO', 'zh-HK', 'zh-TW'],
'en' : ['en-AU', 'en-GB'],
'es' : ['es-MX'],
'fr' : ['fr-CA'],
'pt' : ['pt-BR','pt-PT']
}
# read mst langs
print '(i) Fetching supported locales from Microsoft Translation API...'
trans = Translator(args['client_id'], args['client_secret'])
__MS_LANG_FILE__ = resolve_file_path('lc_ms.cached.tsv')
__MS_SUPPORTED_CODES__ = None
if os.path.exists(__MS_LANG_FILE__):
__MS_SUPPORTED_CODES__ = [l.strip() for l in open(__MS_LANG_FILE__,'rb').readlines()]
else:
__MS_SUPPORTED_CODES__ = trans.get_languages()
cfile = open(__MS_LANG_FILE__,'w')
codes = ''
for code in __MS_SUPPORTED_CODES__:
codes += code+'\n'
cfile.write(codes)
cfile.close()
print '(i) Supported numbers of locale code :', len(__MS_SUPPORTED_CODES__)
#
global_result_logs = {}
# methods
def supported_lang(code):
alias = [ms for ms, ios in __MS_CODE_ALIASES__.items() if code in ios]
# check es-{Custom defined alias}
if len(alias)==1:
return alias[0]
# check es-MX
elif code in __MS_SUPPORTED_CODES__:
return code
# check es
elif code.split(__LANG_SEP__)[0] in __MS_SUPPORTED_CODES__:
return code.split(__LANG_SEP__)[0]
else:
return None
def preprocessing_translate_strs(strs):
return [__LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, s.strip()).replace(__LITERNAL_FORMAT__, __LITERNAL_REPLACEMENT__) for s in strs]
def postprocessing_translate_str(str):
str = str.strip()
# remove Quotes
str = __QUOTES_RE__.sub(__QUOTES_REPLACEMENT__, str)
# replace tp liternal replacement
str = validate_liternal_replacement(str)
# liternal replacement to liternal for format
str = str.replace(__LITERNAL_REPLACEMENT__, __LITERNAL_FORMAT__)
return str
def validate_liternal_format(str):
return __LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, str)
def validate_liternal_replacement(str):
return __LITERNAL_REPLACEMENT_RE__.sub(__LITERNAL_FORMAT__, str)
def translate_ms(strs, to):
lang = supported_lang(to)
strs = preprocessing_translate_strs(strs)
return [postprocessing_translate_str(r['TranslatedText']) for r in trans.translate_array(strs, lang)] if lang else strs
def strings_obj_from_file(file):
return strsparser.parse_strings(filename=file)
def merge_two_dicts(x, y):
'''Given two dicts, merge them into a new dict as a shallow copy.'''
z = x.copy()
z.update(y)
return z
# core function
def insert_or_translate(target_file, lc):
#parse target file
target_kv = {}
target_kc = {}
target_error_lines = []
if not notexist_or_empty_file(target_file):
parsed_strings = strsparser.parse_strings(filename=target_file)
for item in parsed_strings:
k, e = item['key'], item['error']
# line error
if e:
target_error_lines.append(e)
if not target_error_lines:
target_kv[k] = item['value']
target_kc[k] = item['comment']
#parsing complete or return.
if target_error_lines:
print '(!) Syntax error - Skip'
return False, None, None, target_error_lines
#base
base_content = base_dict[os.path.basename(target_file)]
base_kv = {}
base_kc = {}
for item in base_content:
k, e = item['key'], item['error']
# line error
if e:
print '(!) WARNING : Syntax error from Base -> ', k, ':' , e
base_kv[k] = item['value']
base_kc[k] = item['comment']
force_adding_keys = base_kv.keys() if __KEYS_FORCE_TRANSLATE_ALL__ else __KEYS_FORCE_TRANSLATE__
adding_keys = list(((set(base_kv.keys()) - set(target_kv.keys())) | (set(base_kv.keys()) & set(force_adding_keys))) - set(__KEYS_FOLLOW_BASE__))
removing_keys = list(set(target_kv.keys()) - set(base_kv.keys()))
existing_keys = list(set(base_kv.keys()) - (set(adding_keys) | set(removing_keys)))
updated_keys = []
"""
perform translate
"""
translated_kv = {}
if len(adding_keys):
print 'Translating...'
translated_kv = dict(zip(adding_keys, translate_ms([base_kv[k] for k in adding_keys], lc)))
updated_content = []
for item in base_content:
k = item['key']
newitem = dict.fromkeys(item.keys())
newitem['key'] = k
target_value, target_comment = target_kv.get(k), target_kc.get(k)
newitem['comment'] = target_comment if __IGNORE_COMMENTS__ else target_comment or base_kc[k]
needs_update_comment = False if __IGNORE_COMMENTS__ else not target_comment and base_kc[k]
#added
if k in adding_keys:
if k in translated_kv:
newitem['value'] = translated_kv[k]
if not newitem['comment']:
newitem['comment'] = 'Translated from: {0}'.format(base_kv[k])
print '[Add] "{0}" = "{1}" <- {2}'.format(k, newitem['value'], base_kv[k])
else:
newitem['value'] = target_kv[k]
if not newitem['comment']:
newitem['comment'] = 'Translate failed from: {0}'.format(base_kv[k])
print '[Error] "{0}" = "{1}" X <- {2}'.format(k, newitem['value'], base_kv[k])
#exists
elif k in existing_keys:
if k in __KEYS_FOLLOW_BASE_IF_LENGTH_LONGER__:
if target_value != base_kv[k] and len(target_value) > len(base_kv[k]) or needs_update_comment:
print '(!) Length of "', target_value, '" is longer than"', base_kv[k], '" as', len(target_value), '>', len(base_kv[k])
newitem['value'] = base_kv[k]
updated_keys.append(k)
if not lc in global_result_logs:
global_result_logs[lc] = {}
global_result_logs[lc][k] = (target_value, base_kv[k])
else:
newitem['value'] = target_value or base_kv[k]
elif k in __KEYS_FOLLOW_BASE__:
newitem['value'] = base_kv[k]
if target_value != base_kv[k] or needs_update_comment:
updated_keys.append(k)
else:
newitem['value'] = target_value or base_kv[k]
if not target_value or needs_update_comment:
updated_keys.append(k)
updated_content.append(newitem)
#removed or wrong
for k in removing_keys:
print '[Remove]', k
if len(adding_keys) or len(removing_keys):
print '(i) Changed Keys: Added {0}, Updated {1}, Removed {2}'.format(len(adding_keys), len(updated_keys), len(removing_keys))
return updated_content and (len(adding_keys)>0 or len(updated_keys)>0 or len(removing_keys)>0), updated_content, translated_kv, target_error_lines
def write_file(target_file, list_of_content):
suc = False
try:
f = codecs.open(target_file, "w", "utf-8")
contents = ''
for content in list_of_content:
if content['comment']:
contents += '/*{0}*/'.format(content['comment']) + '\n'
contents += '"{0}" = "{1}";'.format(content['key'], content['value']) + '\n'
f.write(contents)
suc = True
except IOError:
print 'IOError to open', target_file
finally:
f.close()
return suc
def remove_file(target_file):
try:
os.rename(target_file, target_file+'.deleted')
return True
except IOError:
print 'IOError to rename', target_file
return False
def create_file(target_file):
open(target_file, 'a').close()
def notexist_or_empty_file(target_file):
return not os.path.exists(target_file) or os.path.getsize(target_file)==0
def resolve_file_names(target_file_names):
return map(lambda f: f.decode('utf-8'), filter(lambda f: f.endswith(__FILE_SUFFIX__), target_file_names))
base_dict = {}
results_dict = {}
# Get Base Language Specs
walked = list(os.walk(__RESOURCE_PATH__, topdown=True))
for dir, subdirs, files in walked:
if os.path.basename(dir)==__BASE_RESOUCE_DIR__:
for _file in resolve_file_names(files):
f = os.path.join(dir, _file)
if notexist_or_empty_file(f):
continue
base_dict[_file] = strings_obj_from_file(f)
if not base_dict:
print '[!] Not found "{0}" in target path "{1}"'.format(__BASE_RESOUCE_DIR__, __RESOURCE_PATH__)
sys.exit(0)
print 'Start synchronizing...'
for file in base_dict:
print 'Target:', file
for dir, subdirs, files in walked:
files = resolve_file_names(files)
if dir.endswith((__DIR_SUFFIX__)):
lc = os.path.basename(dir).split(__DIR_SUFFIX__)[0]
if lc.find('_'): lc = lc.replace('_', __LANG_SEP__)
if lc == __BASE_LANG__:
continue
if lc in __EXCLUDING_LANGS__:
print 'Skip: ', lc
continue
# lc = supported_lang(lc)
results_dict[lc] = {
'deleted_files' : [],
'added_files' : [],
'updated_files' : [],
'skipped_files' : [],
'translated_files_lines' : {},
'error_lines_kv' : {}
}
if not supported_lang(lc):
print 'Does not supported: ', lc
results_dict[lc]['skipped_files'] = join_path_all(dir, files)
continue
print '\n', 'Analayzing localizables... {1} (at {0})'.format(dir, lc)
added_files = list(set(base_dict.keys()) - set(files))
removed_files = list(set(files) - set(base_dict.keys()))
existing_files = list(set(files) - (set(added_files) | set(removed_files)))
added_files = join_path_all(dir, added_files)
removed_files = join_path_all(dir, removed_files)
existing_files = join_path_all(dir, existing_files)
added_cnt, updated_cnt, removed_cnt = 0, 0, 0
translated_files_lines = results_dict[lc]['translated_files_lines']
error_files = results_dict[lc]['error_lines_kv']
#remove - file
for removed_file in removed_files:
print 'Removing File... {0}'.format(removed_file)
if remove_file(removed_file):
removed_cnt+=1
#add - file
for added_file in added_files:
print 'Adding File... {0}'.format(added_file)
create_file(added_file)
u, c, t, e = insert_or_translate(added_file, lc)
#error
if e:
error_files[added_file] = e
#normal
elif u and write_file(added_file, c):
added_cnt+=1
translated_files_lines[added_file] = t
#exist - lookup lines
for ext_file in existing_files:
u, c, t, e = insert_or_translate(ext_file, lc)
#error
if e:
error_files[ext_file] = e
#normal
elif u:
print 'Updating File... {0}'.format(ext_file)
if write_file(ext_file, c):
updated_cnt=+1
translated_files_lines[ext_file] = t
if added_cnt or updated_cnt or removed_cnt or error_files:
print '(i) Changed Files : Added {0}, Updated {1}, Removed {2}, Error {3}'.format(added_cnt, updated_cnt, removed_cnt, len(error_files.keys()))
else:
print 'Nothing to translate or add.'
"""
Results
"""
results_dict[lc]['deleted_files'] = removed_files
results_dict[lc]['added_files'] = list(set(added_files) & set(translated_files_lines.keys()))
results_dict[lc]['updated_files'] = list(set(existing_files) & set(translated_files_lines.keys()))
if error_files:
print error_files
results_dict[lc]['error_lines_kv'] = error_files
# print total Results
print ''
t_file_cnt, t_line_cnt = 0, 0
file_add_cnt, file_remove_cnt, file_update_cnt, file_skip_cnt = 0,0,0,0
for lc in results_dict.keys():
result_lc = results_dict[lc]
file_add_cnt += len(result_lc['added_files'])
file_remove_cnt += len(result_lc['deleted_files'])
file_update_cnt += len(result_lc['updated_files'])
file_skip_cnt += len(result_lc['skipped_files'])
for f in result_lc['added_files']: print 'Added',f
for f in result_lc['deleted_files']: print 'Removed',f
for f in result_lc['updated_files']: print 'Updated',f
for f in result_lc['skipped_files']: print 'Skiped',f
tfiles = result_lc['translated_files_lines']
if tfiles:
# print '============ Results for langcode : {0} ============='.format(lc)
for f in tfiles:
t_file_cnt += 1
if len(tfiles[f]):
# print '', f
for key in tfiles[f]:
t_line_cnt += 1
# print key, ' = ', tfiles[f][key]
for lc in global_result_logs.keys():
print lc
for t in global_result_logs[lc].keys():
o, b = global_result_logs[lc][t]
print o.decode('utf-8'), ' -> ', b
print ''
found_warining = filter(lambda i: i or None, rget(results_dict, 'error_lines_kv'))
if file_add_cnt or file_update_cnt or file_remove_cnt or file_skip_cnt or found_warining:
print 'Total New Translated Strings : {0}'.format(t_line_cnt)
print 'Changed Files Total : Added {0}, Updated {1}, Removed {2}, Skipped {3}'.format(file_add_cnt, file_update_cnt, file_remove_cnt, file_skip_cnt)
print "Synchronized."
if found_warining:
print '\n[!!] WARNING: Found strings that contains the syntax error. Please confirm.'
for a in found_warining:
for k in a:
print 'at', k
for i in a[k]:
print ' ', i
else:
print "All strings are already synchronized. Nothing to translate or add."
return
2
Example 17
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if argv is None:
argv = sys.argv
parser = E.OptionParser(
version="%prog version: $Id: gff_compare.py 2781 2009-09-10 11:33:14Z andreas $", usage=globals()["__doc__"])
parser.add_option("-f", "--output-full", dest="write_full",
help="write full gff entries.", action="store_true")
parser.add_option("-e", "--output-matched-exons", dest="write_matched_exons",
help="write matched exons.", action="store_true")
parser.add_option("-o", "--output-missed-exons", dest="write_missed_exons", action="store_true",
help="write missed exons.")
parser.add_option("-g", "--output-missed-genes", dest="write_missed_genes", action="store_true",
help="write missed genes.")
parser.add_option("-r", "--regex-reference", dest="regex_reference", type="string",
help="regular expression mapping exon to transcript in reference.")
parser.add_option("-t", "--regex-target", dest="regex_target", type="string",
help="regular expression mapping exon to transcript in target.")
parser.add_option("--no-nucleotides", dest="do_nucleotides", action="store_false",
help="skip nucleotide benchmark.")
parser.add_option("--no-exons", dest="do_exons", action="store_false",
help="skip exon benchmark.")
parser.add_option("--no-genes", dest="do_genes", action="store_false",
help="skip gene benchmark.")
parser.add_option("--output-filename-pattern", dest="outfile_pattern", type="string",
help="output filename pattern for extra info (%s will be substituted with reference,target).")
parser.set_defaults(
remove_redundancy=False,
max_exon_slippage=9,
write_missed_exons=False,
write_matched_exons=False,
write_missed_genes=False,
write_wrong_exons=False,
write_wrong_genes=False,
do_nucleotides=True,
do_exons=True,
do_genes=True,
regex_reference=None,
regex_target=None,
outfile_pattern="%s.info",
)
(options, args) = E.Start(parser)
if len(args) != 2:
print(USAGE)
print("two arguments required")
sys.exit(1)
input_filename_target, input_filename_reference = args
if options.loglevel >= 1:
print("# target entries from %s" % input_filename_target)
print("# reading target entries ...", end=' ')
sys.stdout.flush()
gff_targets = GTF.readFromFile(open(input_filename_target, "r"))
if options.loglevel >= 1:
print("finished: %i" % (len(gff_targets)))
sys.stdout.flush()
if options.loglevel >= 1:
print("# reference entries from %s" % input_filename_reference)
print("# reading reference entries ...", end=' ')
sys.stdout.flush()
gff_references = GTF.readFromFile(open(input_filename_reference, "r"))
if options.loglevel >= 1:
print("finished: %i" % (len(gff_references)))
sys.stdout.flush()
if options.remove_redundancy:
gff_targets = GTF.CombineOverlaps(gff_targets)
gff_references = GTF.CombineOverlaps(gff_references)
if options.loglevel >= 1:
print("# after filtering: targets=%i, references=%i" % (len(gff_targets), len(gff_references)))
##########################################################################
# sort exons
if options.loglevel >= 1:
print("# sorting exons ...", end=' ')
sys.stdout.flush()
gff_targets.sort(lambda x, y: cmp((x.mName, x.strand, x.start, x.end),
(y.mName, y.strand, y.start, y.end)))
gff_references.sort(lambda x, y: cmp((x.mName, x.strand, x.start, x.end),
(y.mName, y.strand, y.start, y.end)))
ntargets = len(gff_targets)
nreferences = len(gff_references)
if options.loglevel >= 1:
print("finished")
sys.stdout.flush()
##########################################################################
# get nucleotide level accuracy
# process each fragment separately
if options.do_nucleotides:
print("""############################################################""")
headers = (
"contig", "strand", "tp", "fp", "tn", "fn", "sp", "sn", "cc")
print("\t".join(headers))
first_r, first_t = 0, 0
r, t = 0, 0
ttp, tfp, ttn, tfn = 0, 0, 0, 0
# this only works, if all contigs in reference are present in target.
while r < nreferences and t < ntargets:
this_name = gff_references[r].mName
this_strand = gff_references[r].strand
# get all in references
while r < nreferences and \
gff_references[r].mName == this_name and \
gff_references[r].strand == this_strand:
r += 1
# skip over extra contigs in target
while t < ntargets and \
(gff_targets[t].mName != this_name or
gff_targets[t].strand != this_strand):
t += 1
first_t = t
# get all in targets
while t < ntargets and \
gff_targets[t].mName == this_name and \
gff_targets[t].strand == this_strand:
t += 1
tp, fp, tn, fn = AnalyseOverlaps(gff_references[first_r:r],
gff_targets[first_t:t])
spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
cc = CalculateCorrelationCoefficient(tp, fp, tn, fn)
print("%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f" % (this_name, this_strand, tp, fp, tn, fn, spec, sens, cc))
ttp += tp
tfp += fp
ttn += tn
tfn += fn
first_r, first_t = r, t
spec, sens = CalculateSpecificitySensitivity(ttp, tfp, ttn, tfn)
cc = CalculateCorrelationCoefficient(ttp, tfp, ttn, tfn)
print("%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f" % ("all", "all", ttp, tfp, ttn, tfn, spec, sens, cc))
sys.stdout.flush()
##########################################################################
if options.do_exons or options.do_genes:
print("""############################################################""")
headers = ("category", "contig", "strand", "tp", "fp", "tn",
"fn", "sp", "sn", "cc", "me", "we", "me", "we")
print("\t".join(headers))
r, t = 0, 0
next_r, next_t = r, t
# strict false positves/negatives
tp, fp, tn, fn = 0, 0, 0, 0
ttp, tfp, ttn, tfn = 0, 0, 0, 0
# partial false positives/negatives
ptp, pfp, ptn, pfn = 0, 0, 0, 0
tptp, tpfp, tptn, tpfn = 0, 0, 0, 0
# missed and wrong exons
missed_exons, wrong_exons = 0, 0
tmissed_exons, twrong_exons = 0, 0
# Flag set, if partial overlap in previous pair
last_partial_overlap = False
# Flag set, if partial overlap and reference was last increased
last_increased_ref = False
while r < nreferences and t < ntargets:
this_name = gff_references[r].mName
this_strand = gff_references[r].strand
# get overlap segments
if next_r == r:
ref_overlaps, next_r, ref_start, ref_end = GetFirstOverlaps(
gff_references, r)
if next_t == t:
target_overlaps, next_t, target_start, target_end = GetFirstOverlaps(
gff_targets, t)
if options.loglevel >= 3:
print("########################################################")
for x in ref_overlaps:
print("#", str(x))
for x in target_overlaps:
print("#", str(x))
do_summary = False
# check strand switch in reference
if next_r < nreferences and \
(this_name != gff_references[next_r].mName or
this_strand != gff_references[next_r].strand):
if options.loglevel >= 3:
print("# target advance")
do_summary = True
last_increased_ref = False
last_partial_overlap = False
# advance in target until next name is found
next_name = gff_references[next_r].mName
next_strand = gff_references[next_r].strand
while next_t < ntargets and \
next_name != gff_targets[next_t].mName or \
next_strand != gff_targets[next_t].strand:
fp += 1
pfp += 1
target_overlaps, next_t, target_start, target_end = GetFirstOverlaps(
gff_targets, next_t)
for x in gff_targets[t:next_t]:
x.mStatus = "extra"
for x in gff_references[r:next_r]:
x.mStatus = "extra"
r, t = next_r, next_t
# check strand switch in target
elif next_t < ntargets and \
(this_name != gff_targets[next_t].mName or
this_strand != gff_targets[next_t].strand):
# advance in reference until next name is found
if options.loglevel >= 3:
print("# reference advance")
do_summary = True
last_increased_ref = False
last_partial_overlap = False
next_name = gff_targets[next_t].mName
next_strand = gff_targets[next_t].strand
while next_r < nreferences and \
next_name != gff_references[next_r].mName or \
next_strand != gff_references[next_r].strand:
fn += 1
pfn += 1
reference_overlaps, next_r, references_start, references_end = GetFirstOverlaps(
gff_references, next_r)
for x in gff_targets[t:next_t]:
x.mStatus = "extra"
for x in gff_references[r:next_r]:
x.mStatus = "extra"
r, t = next_r, next_t
# otherwise
else:
ref_status, target_status = None, None
if options.loglevel >= 3:
print("# same chromosome")
# overlap between segments
if min(ref_end, target_end) - max(ref_start, target_start) > 0:
# clear flags
last_increased_ref = False
last_partial_overlap = False
found = False
for rr in ref_overlaps:
xfound = False
for tt in target_overlaps:
if GTF.Identity(rr, tt, max_slippage=options.max_exon_slippage):
xfound = True
break
if xfound:
found = True
break
if found:
ref_status = "match"
target_status = "match"
tp += 1
ptp += 1
if options.write_matched_exons:
print("############# matching exons ###########################")
for x in ref_overlaps:
print("#", str(x))
for x in target_overlaps:
print("#", str(x))
else:
fn += 1
# check for one-sided matches
for rr in ref_overlaps:
xfound = False
for tt in target_overlaps:
if GTF.HalfIdentity(rr, tt, max_slippage=options.max_exon_slippage):
xfound = True
break
if xfound:
found = True
break
if found:
ptp += 1
code = "partial"
ref_status = "partial"
target_status = "partial"
else:
pfn += 1
code = "complete"
ref_status = "mismatch"
target_status = "mismatch"
if options.write_missed_exons:
print("############# %s non-overlapping exons ###########################" % code)
for x in ref_overlaps:
print("#", str(x))
for x in target_overlaps:
print("#", str(x))
###########################################################
# r, t = next_r, next_t
if ref_end == target_end:
r, t = next_r, next_t
elif ref_end < target_end:
r = next_r
last_increased_ref = True
last_partial_overlap = True
else:
t = next_t
last_increased_ref = False
last_partial_overlap = True
# non-overlap between segments
else:
if ref_end < target_start:
# for non-overlap, check whether there was partial overlap before
# and reference was not increased.
# if there was, just increment reference, but do not
# count.
if not (last_partial_overlap and not last_increased_ref):
if options.write_missed_exons:
print("############# missed exon ###########################")
for x in ref_overlaps:
print("#", str(x))
missed_exons += 1
fn += 1
pfn += 1
ref_status = "extra"
r = next_r
else:
# for non-overlap, check whether there was partial overlap before
# and target was not increased.
# if there was, just increment target, but do not
# count.
if not (last_partial_overlap and last_increased_ref):
if options.write_wrong_exons:
print("############# wrong exon ###########################")
for x in target_overlaps:
print("#", str(x))
wrong_exons += 1
fp += 1
pfp += 1
target_status = "extra"
t = next_t
last_partial_overlap = False
if options.loglevel >= 3:
print("# ref_status=%s, target_status=%s" % (ref_status, target_status))
if ref_status:
for rr in ref_overlaps:
rr.mStatus = ref_status
if ref_status in ("match", "partial") and options.do_genes:
for rr in ref_overlaps:
rr.mMatches = target_overlaps
if target_status:
for tt in target_overlaps:
tt.mStatus = target_status
if target_status in ("match", "partial") and options.do_genes:
for tt in target_overlaps:
tt.mMatches = ref_overlaps
if do_summary or r >= nreferences or t >= ntargets:
ttp += tp
tfp += fp
ttn += tn
tfn += fn
tptp += ptp
tpfp += pfp
tptn += ptn
tpfn += pfn
tmissed_exons += missed_exons
twrong_exons += wrong_exons
if tp + fn != 0:
pmissed_exons = "%5.2f" % (float(missed_exons) / (tp + fn))
else:
pmissed_exons = "0"
if tp + fp != 0:
pwrong_exons = "%5.2f" % (float(wrong_exons) / (tp + fp))
else:
pwrong_exons = "na"
spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
cc = (spec + sens) / 2.0
print("full\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%s\t%s" %
(this_name, this_strand,
tp, fp, tn, fn,
spec, sens, cc,
missed_exons, wrong_exons,
pmissed_exons, pwrong_exons))
spec, sens = CalculateSpecificitySensitivity(
ptp, pfp, ptn, pfn)
cc = (spec + sens) / 2.0
print("half\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%s\t%s" %
(this_name, this_strand,
ptp, pfp, ptn, pfn,
spec, sens, cc,
missed_exons, wrong_exons,
pmissed_exons, pwrong_exons))
tp, fp, tn, fn = 0, 0, 0, 0
ptp, pfp, ptn, pfn = 0, 0, 0, 0
missed_exons, wrong_exons = 0, 0
if t < ntargets:
for x in gff_targets[t:ntargets]:
x.mStatus = "extra"
if r < nreferences:
for x in gff_references[r:nreferences]:
x.mStatus = "extra"
spec, sens = CalculateSpecificitySensitivity(ttp, tfp, ttn, tfn)
cc = (spec + sens) / 2.0
print("full\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
("all", "all", ttp, tfp, ttn, tfn,
spec, sens, cc,
tmissed_exons, twrong_exons,
float(tmissed_exons) / (ttp + tfn),
float(twrong_exons) / (ttp + tfp)))
spec, sens = CalculateSpecificitySensitivity(tptp, tpfp, tptn, tpfn)
cc = (spec + sens) / 2.0
print("half\t%s\t%s\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
("all", "all", tptp, tpfp, tptn, tpfn,
spec, sens, cc,
tmissed_exons, twrong_exons,
float(tmissed_exons) / (ttp + tfn),
float(twrong_exons) / (ttp + tfp)))
if options.do_genes and \
options.regex_reference and \
options.regex_target:
print("""###############################################################""")
out_options = []
if options.write_missed_genes:
out_options.append("missed")
if options.loglevel >= 2:
print("# counting matches for reference.")
sys.stdout.flush()
(ref_total, ref_match, ref_partial, ref_extra) =\
CountMatchesPerGene(gff_references,
re.compile(options.regex_reference),
re.compile(options.regex_target),
write=out_options,
outfile=open(options.outfile_pattern % "reference", "w"))
if options.loglevel >= 2:
print("# counting matches for target.")
sys.stdout.flush()
(target_total, target_match, target_partial, target_extra) =\
CountMatchesPerGene(gff_targets,
re.compile(options.regex_target),
re.compile(
options.regex_reference),
write=out_options,
outfile=open(options.outfile_pattern % "target", "w"))
if options.loglevel >= 1:
print("# reference: genes=%6i, matches=%6i, partial=%6i, extra=%6i" %
(ref_total, ref_match, ref_partial, ref_extra))
print("# target : genes=%6i, matches=%6i, partial=%6i, extra=%6i" %
(target_total, target_match, target_partial, target_extra))
headers = ("category", "tp", "fp", "tn", "fn",
"sp", "sn", "cc", "mg", "wg", "mg", "wg")
print("\t".join(headers))
tp = ref_match
fp = target_extra
tn = 0
fn = ref_total - ref_match
wrong_genes = target_extra
missed_genes = ref_extra
spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
cc = (spec + sens) / 2.0
if tp + fp == 0:
fp = nreferences
print("full\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
(tp, fp, tn, fn,
spec, sens, cc,
missed_genes, wrong_genes,
float(missed_genes) / (tp + fn),
float(wrong_genes) / (tp + fp)))
tp = ref_match + ref_partial
fp = target_extra
tn = 0
fn = ref_total - ref_match - ref_partial
wrong_genes = target_extra
missed_genes = ref_extra
spec, sens = CalculateSpecificitySensitivity(tp, fp, tn, fn)
cc = (spec + sens) / 2.0
print("half\t%i\t%i\t%i\t%i\t%5.2f\t%5.2f\t%5.2f\t%i\t%i\t%5.2f\t%5.2f" %
(tp, fp, tn, fn,
spec, sens, cc,
missed_genes, wrong_genes,
float(missed_genes) / (tp + fn),
float(wrong_genes) / (tp + fp)))
E.Stop()
2
Example 18
Project: dashman Source File: tx.py
def main():
parser = argparse.ArgumentParser(
description="Manipulate bitcoin (or alt coin) transactions.",
epilog=EPILOG)
parser.add_argument('-t', "--transaction-version", type=int,
help='Transaction version, either 1 (default) or 3 (not yet supported).')
parser.add_argument('-l', "--lock-time", type=parse_locktime, help='Lock time; either a block'
'index, or a date/time (example: "2014-01-01T15:00:00"')
parser.add_argument('-n', "--network", default="BTC",
help='Define network code (M=Bitcoin mainnet, T=Bitcoin testnet).')
parser.add_argument('-a', "--augment", action='store_true',
help='augment tx by adding any missing spendable metadata by fetching'
' inputs from cache and/or web services')
parser.add_argument('-s', "--verbose-signature", action='store_true',
help='Display technical signature details.')
parser.add_argument("-i", "--fetch-spendables", metavar="address", action="append",
help='Add all unspent spendables for the given bitcoin address. This information'
' is fetched from web services.')
parser.add_argument('-f', "--private-key-file", metavar="path-to-private-keys", action="append",
help='file containing WIF or BIP0032 private keys. If file name ends with .gpg, '
'"gpg -d" will be invoked automatically. File is read one line at a time, and if '
'the file contains only one WIF per line, it will also be scanned for a bitcoin '
'address, and any addresses found will be assumed to be public keys for the given'
' private key.',
type=argparse.FileType('r'))
parser.add_argument('-g', "--gpg-argument", help='argument to pass to gpg (besides -d).', default='')
parser.add_argument("--remove-tx-in", metavar="tx_in_index_to_delete", action="append", type=int,
help='remove a tx_in')
parser.add_argument("--remove-tx-out", metavar="tx_out_index_to_delete", action="append", type=int,
help='remove a tx_out')
parser.add_argument('-F', "--fee", help='fee, in satoshis, to pay on transaction, or '
'"standard" to auto-calculate. This is only useful if the "split pool" '
'is used; otherwise, the fee is automatically set to the unclaimed funds.',
default="standard", metavar="transaction-fee", type=parse_fee)
parser.add_argument('-C', "--cache", help='force the resultant transaction into the transaction cache.'
' Mostly for testing.', action='store_true'),
parser.add_argument('-u', "--show-unspents", action='store_true',
help='show TxOut items for this transaction in Spendable form.')
parser.add_argument('-b', "--bitcoind-url",
help='URL to bitcoind instance to validate against (http://user:pass@host:port).')
parser.add_argument('-o', "--output-file", metavar="path-to-output-file", type=argparse.FileType('wb'),
help='file to write transaction to. This supresses most other output.')
parser.add_argument('-p', "--pay-to-script", metavar="pay-to-script", action="append",
help='a hex version of a script required for a pay-to-script input (a bitcoin address that starts with 3)')
parser.add_argument('-P', "--pay-to-script-file", metavar="pay-to-script-file", nargs=1, type=argparse.FileType('r'),
help='a file containing hex scripts (one per line) corresponding to pay-to-script inputs')
parser.add_argument("argument", nargs="+", help='generic argument: can be a hex transaction id '
'(exactly 64 characters) to be fetched from cache or a web service;'
' a transaction as a hex string; a path name to a transaction to be loaded;'
' a spendable 4-tuple of the form tx_id/tx_out_idx/script_hex/satoshi_count '
'to be added to TxIn list; an address/satoshi_count to be added to the TxOut '
'list; an address to be added to the TxOut list and placed in the "split'
' pool".')
args = parser.parse_args()
# defaults
txs = []
spendables = []
payables = []
key_iters = []
TX_ID_RE = re.compile(r"^[0-9a-fA-F]{64}$")
# there are a few warnings we might optionally print out, but only if
# they are relevant. We don't want to print them out multiple times, so we
# collect them here and print them at the end if they ever kick in.
warning_tx_cache = None
warning_get_tx = None
warning_spendables = None
if args.private_key_file:
wif_re = re.compile(r"[1-9a-km-zA-LMNP-Z]{51,111}")
# address_re = re.compile(r"[1-9a-kmnp-zA-KMNP-Z]{27-31}")
for f in args.private_key_file:
if f.name.endswith(".gpg"):
gpg_args = ["gpg", "-d"]
if args.gpg_argument:
gpg_args.extend(args.gpg_argument.split())
gpg_args.append(f.name)
popen = subprocess.Popen(gpg_args, stdout=subprocess.PIPE)
f = popen.stdout
for line in f.readlines():
# decode
if isinstance(line, bytes):
line = line.decode("utf8")
# look for WIFs
possible_keys = wif_re.findall(line)
def make_key(x):
try:
return Key.from_text(x)
except Exception:
return None
keys = [make_key(x) for x in possible_keys]
for key in keys:
if key:
key_iters.append((k.wif() for k in key.subkeys("")))
# if len(keys) == 1 and key.hierarchical_wallet() is None:
# # we have exactly 1 WIF. Let's look for an address
# potential_addresses = address_re.findall(line)
# update p2sh_lookup
p2sh_lookup = {}
if args.pay_to_script:
for p2s in args.pay_to_script:
try:
script = h2b(p2s)
p2sh_lookup[hash160(script)] = script
except Exception:
print("warning: error parsing pay-to-script value %s" % p2s)
if args.pay_to_script_file:
hex_re = re.compile(r"[0-9a-fA-F]+")
for f in args.pay_to_script_file:
count = 0
for l in f:
try:
m = hex_re.search(l)
if m:
p2s = m.group(0)
script = h2b(p2s)
p2sh_lookup[hash160(script)] = script
count += 1
except Exception:
print("warning: error parsing pay-to-script file %s" % f.name)
if count == 0:
print("warning: no scripts found in %s" % f.name)
# we create the tx_db lazily
tx_db = None
for arg in args.argument:
# hex transaction id
if TX_ID_RE.match(arg):
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_get_tx = message_about_get_tx_env()
tx_db = get_tx_db()
tx = tx_db.get(h2b_rev(arg))
if not tx:
for m in [warning_tx_cache, warning_get_tx, warning_spendables]:
if m:
print("warning: %s" % m, file=sys.stderr)
parser.error("can't find Tx with id %s" % arg)
txs.append(tx)
continue
# hex transaction data
try:
tx = Tx.from_hex(arg)
txs.append(tx)
continue
except Exception:
pass
is_valid = is_address_valid(arg, allowable_netcodes=[args.network])
if is_valid:
payables.append((arg, 0))
continue
try:
key = Key.from_text(arg)
# TODO: check network
if key.wif() is None:
payables.append((key.address(), 0))
continue
# TODO: support paths to subkeys
key_iters.append((k.wif() for k in key.subkeys("")))
continue
except Exception:
pass
if os.path.exists(arg):
try:
with open(arg, "rb") as f:
if f.name.endswith("hex"):
f = io.BytesIO(codecs.getreader("hex_codec")(f).read())
tx = Tx.parse(f)
txs.append(tx)
try:
tx.parse_unspents(f)
except Exception as ex:
pass
continue
except Exception:
pass
parts = arg.split("/")
if len(parts) == 4:
# spendable
try:
spendables.append(Spendable.from_text(arg))
continue
except Exception:
pass
if len(parts) == 2 and is_address_valid(parts[0], allowable_netcodes=[args.network]):
try:
payables.append(parts)
continue
except ValueError:
pass
parser.error("can't parse %s" % arg)
if args.fetch_spendables:
warning_spendables = message_about_spendables_for_address_env()
for address in args.fetch_spendables:
spendables.extend(spendables_for_address(address))
for tx in txs:
if tx.missing_unspents() and args.augment:
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_get_tx = message_about_get_tx_env()
tx_db = get_tx_db()
tx.unspents_from_db(tx_db, ignore_missing=True)
txs_in = []
txs_out = []
unspents = []
# we use a clever trick here to keep each tx_in corresponding with its tx_out
for tx in txs:
smaller = min(len(tx.txs_in), len(tx.txs_out))
txs_in.extend(tx.txs_in[:smaller])
txs_out.extend(tx.txs_out[:smaller])
unspents.extend(tx.unspents[:smaller])
for tx in txs:
smaller = min(len(tx.txs_in), len(tx.txs_out))
txs_in.extend(tx.txs_in[smaller:])
txs_out.extend(tx.txs_out[smaller:])
unspents.extend(tx.unspents[smaller:])
for spendable in spendables:
txs_in.append(spendable.tx_in())
unspents.append(spendable)
for address, coin_value in payables:
script = standard_tx_out_script(address)
txs_out.append(TxOut(coin_value, script))
lock_time = args.lock_time
version = args.transaction_version
# if no lock_time is explicitly set, inherit from the first tx or use default
if lock_time is None:
if txs:
lock_time = txs[0].lock_time
else:
lock_time = DEFAULT_LOCK_TIME
# if no version is explicitly set, inherit from the first tx or use default
if version is None:
if txs:
version = txs[0].version
else:
version = DEFAULT_VERSION
if args.remove_tx_in:
s = set(args.remove_tx_in)
txs_in = [tx_in for idx, tx_in in enumerate(txs_in) if idx not in s]
if args.remove_tx_out:
s = set(args.remove_tx_out)
txs_out = [tx_out for idx, tx_out in enumerate(txs_out) if idx not in s]
tx = Tx(txs_in=txs_in, txs_out=txs_out, lock_time=lock_time, version=version, unspents=unspents)
fee = args.fee
try:
distribute_from_split_pool(tx, fee)
except ValueError as ex:
print("warning: %s" % ex.args[0], file=sys.stderr)
unsigned_before = tx.bad_signature_count()
if unsigned_before > 0 and key_iters:
def wif_iter(iters):
while len(iters) > 0:
for idx, iter in enumerate(iters):
try:
wif = next(iter)
yield wif
except StopIteration:
iters = iters[:idx] + iters[idx+1:]
break
print("signing...", file=sys.stderr)
sign_tx(tx, wif_iter(key_iters), p2sh_lookup=p2sh_lookup)
unsigned_after = tx.bad_signature_count()
if unsigned_after > 0 and key_iters:
print("warning: %d TxIn items still unsigned" % unsigned_after, file=sys.stderr)
if len(tx.txs_in) == 0:
print("warning: transaction has no inputs", file=sys.stderr)
if len(tx.txs_out) == 0:
print("warning: transaction has no outputs", file=sys.stderr)
include_unspents = (unsigned_after > 0)
tx_as_hex = tx.as_hex(include_unspents=include_unspents)
if args.output_file:
f = args.output_file
if f.name.endswith(".hex"):
f.write(tx_as_hex.encode("utf8"))
else:
tx.stream(f)
if include_unspents:
tx.stream_unspents(f)
f.close()
elif args.show_unspents:
for spendable in tx.tx_outs_as_spendable():
print(spendable.as_text())
else:
if not tx.missing_unspents():
check_fees(tx)
dump_tx(tx, args.network, args.verbose_signature)
if include_unspents:
print("including unspents in hex dump since transaction not fully signed")
print(tx_as_hex)
if args.cache:
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_get_tx = message_about_get_tx_env()
tx_db = get_tx_db()
tx_db.put(tx)
if args.bitcoind_url:
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_get_tx = message_about_get_tx_env()
tx_db = get_tx_db()
validate_bitcoind(tx, tx_db, args.bitcoind_url)
if tx.missing_unspents():
print("\n** can't validate transaction as source transactions missing", file=sys.stderr)
else:
try:
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_get_tx = message_about_get_tx_env()
tx_db = get_tx_db()
tx.validate_unspents(tx_db)
print('all incoming transaction values validated')
except BadSpendableError as ex:
print("\ncuem ERROR: FEES INCORRECTLY STATED: %s" % ex.args[0], file=sys.stderr)
except Exception as ex:
print("\n*** can't validate source transactions as untampered: %s" %
ex.args[0], file=sys.stderr)
# print warnings
for m in [warning_tx_cache, warning_get_tx, warning_spendables]:
if m:
print("warning: %s" % m, file=sys.stderr)
2
Example 19
Project: helloworld Source File: users.py
def get_remote_user_info(handler, user_url, profile):
# get host-meta first
lrdd_link = None
try:
lrdd_link = get_lrdd_link(user_url)
except:
pass
salmon_url = ''
webmention_url = ''
magic_key = ''
alias = ''
webfinger_doc = None
user_response = urllib2.urlopen(user_url)
user_doc = BeautifulSoup(user_response.read())
if not lrdd_link:
atom_url = user_doc.find('link', rel=re.compile(r"\balternate\b"),
type='application/atom+xml')
rss_url = user_doc.find('link', rel=re.compile(r"\balternate\b"),
type='application/rss+xml')
feed_url = atom_url or rss_url
else:
# get webfinger
try:
webfinger_doc = get_webfinger(lrdd_link, user_url)
feed_url = webfinger_doc.find('link',
rel='http://schemas.google.com/g/2010#updates-from')
salmon_url = webfinger_doc.find('link', rel='salmon')
if salmon_url:
salmon_url = salmon_url['href']
webmention_url = webfinger_doc.find('link', rel='webmention')
if webmention_url:
webmention_url = webmention_url['href']
magic_key = webfinger_doc.find('link', rel='magic-public-key')
if magic_key:
magic_key = magic_key['href'];
magic_key = magic_key.replace('data:application/magic-public-key,', '')
alias = webfinger_doc.find('alias')
if alias:
alias = alias.string
except:
feed_url = None
if not webmention_url:
webmention_url = user_doc.find('link', rel=re.compile(r"\bwebmention\b"))
if webmention_url:
webmention_url = webmention_url['href']
if not feed_url:
feed_url = user_url
else:
feed_url = feed_url['href']
base_url = None
if (not feed_url.startswith('/') and not (feed_url.startswith('http://') or
feed_url.startswith('https://'))):
base_url = user_doc.find('base')
if base_url:
base_url = base_url['href']
else:
base_url = ''
feed_url = base_url + feed_url
parsed_url = urlparse.urlparse(user_url)
if not (feed_url.startswith('http://') or feed_url.startswith('https://')):
if (not feed_url.startswith('/')):
feed_url = '/' + feed_url
feed_url = parsed_url.scheme + '://' + parsed_url.hostname + feed_url
feed_response = urllib2.urlopen(feed_url)
feed_doc = BeautifulSoup(feed_response.read())
author = feed_doc.find('author')
alias = None
if author:
uri = author.find('uri')
if uri:
alias = uri.string # alias or user_url
if not alias:
alt_link = feed_doc.find('link', rel=re.compile(r"\balternate\b"))
if alt_link:
alias = alt_link['href']
else:
# XXX UGH, BeautifulSoup treats <link> as self-closing tag
# LAMESAUCE for rss
alias = feed_doc.find('link').nextSibling
if not alias or not alias.strip():
raise tornado.web.HTTPError(400)
alias = alias.strip()
user_remote = handler.models.users_remote.get(local_username=profile,
profile_url=alias)[0]
hub_url = feed_doc.find(re.compile('.+:link$'), rel='hub')
if not user_remote:
user_remote = handler.models.users_remote()
favicon = None
favicon = user_doc.find('link', rel='shortcut icon')
if favicon:
if (favicon['href'].startswith('http://') or
favicon['href'].startswith('https://')):
favicon = favicon['href']
else:
if base_url:
favicon = base_url + favicon['href']
else:
favicon = (parsed_url.scheme + '://' + parsed_url.hostname +
('' if favicon['href'].startswith('/') else '/') + favicon['href'])
else:
favicon = parsed_url.scheme + '://' + parsed_url.hostname + '/favicon.ico'
user_remote.favicon = favicon
user_remote.local_username = profile
logo = feed_doc.find('logo')
if logo and logo.parent.name == 'source':
logo = None
image = feed_doc.find('image')
if logo:
user_remote.avatar = logo.string
elif image:
image = feed_doc.find('image')
url = image.find('url')
user_remote.avatar = url.string
else:
user_remote.avatar = favicon
if not favicon:
user_remote.favicon = user_remote.avatar
preferred_username = None
display_name = None
if author:
preferred_username = author.find(re.compile('.+:preferredusername$'))
display_name = author.find(re.compile('.+:displayname$'))
if author and preferred_username and display_name:
#user_remote.avatar = author.find('link', rel='avatar')['href']
user_remote.username = preferred_username.string
user_remote.name = display_name.string
elif webfinger_doc:
user_remote.username = webfinger_doc.find('Property',
type="http://apinamespace.org/atom/username").string
else:
user_remote.username = feed_doc.find('title').string
user_remote.profile_url = alias
user_remote.magic_key = magic_key
user_remote.salmon_url = salmon_url
user_remote.webmention_url = webmention_url
user_remote.feed_url = feed_url
if hub_url:
user_remote.hub_url = hub_url['href']
user_remote.save()
try:
# TODO(mime): Add hub.secret
if user_remote.hub_url:
callback_url = handler.nav_url(host=True, username=profile,
section='push')
pubsubhubbub_subscribe.subscribe_topic(user_remote.hub_url,
user_remote.feed_url, callback_url, verify="sync")
except:
import logging
logging.error("couldn't subscribe on the hub!")
return user_remote
2
Example 20
def parse_cmd(self):
""" Command parser """
try:
while self.run_flag:
try:
line = self.robotSocket.recv(1024)
except socket.error as msg:
continue
self.cmdBuffer += line
# String contained within $ and * (with no $ or * symbols in it)
buf_pattern = r'\$[^\$\*]*?\*'
buf_regex = re.compile(buf_pattern)
buf_result = buf_regex.search(self.cmdBuffer)
if buf_result:
msg = buf_result.group()
print msg
self.cmdBuffer = ''
cmd_pattern = r'(?P<CMD>[A-Z]{3,})'
set_pattern = r'(?P<SET>=?)'
query_pattern = r'(?P<QUERY>\??)'
arg_pattern = r'(?(2)(?P<ARGS>.*))'
msg_pattern = r'\$' + \
cmd_pattern + \
set_pattern + \
query_pattern + \
arg_pattern + \
r'.*\*'
msg_regex = re.compile(msg_pattern)
msg_result = msg_regex.search(msg)
if msg_result.group('CMD') == 'CHECK':
self.robotSocket.sendto(
'Hello from QuickBot\n', (self.base_ip, self.port))
elif msg_result.group('CMD') == 'PWM':
if msg_result.group('QUERY'):
if VERBOSE:
print str(self.get_pwm())
self.robotSocket.sendto(str(self.get_pwm()) + '\n',
(self.base_ip, self.port))
elif msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
pwm_pattern = r'(?P<LEFT>[-]?\d+),(?P<RIGHT>[-]?\d+)'
pwm_regex = re.compile(pwm_pattern)
pwm_result = pwm_regex.match(args)
if pwm_result:
pwm = [int(pwm_result.group('LEFT')), \
int(pwm_result.group('RIGHT'))]
self.set_pwm(pwm)
self.robotSocket.sendto(str(self.get_pwm()) + '\n',
(self.base_ip, self.port))
elif msg_result.group('CMD') == 'IRVAL':
if msg_result.group('QUERY'):
reply = '[' + ', '.join(map(str, self.get_ir())) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('CMD') == 'ULTRAVAL':
if msg_result.group('QUERY'):
reply = '[' + ', '.join(map(str, self.ultraVal)) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('CMD') == 'WHEELANG':
if msg_result.group('QUERY'):
print 'Sending: ' + str(self.get_wheel_ang())
self.robotSocket.sendto(
str(self.get_wheel_ang()) +
'\n', (self.base_ip, self.port))
elif msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
arg_pattern = \
r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
regex = re.compile(arg_pattern)
result = regex.match(args)
if result:
pos = [float(regex.match(args).group('LEFT')), \
float(regex.match(args).group('RIGHT'))]
self.set_wheel_ang(pos)
elif msg_result.group('CMD') == 'ENVAL':
if msg_result.group('QUERY'):
reply = \
'[' + ', '.join(map(str, self.get_enc_val())) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
arg_pattern = \
r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
regex = re.compile(arg_pattern)
result = regex.match(args)
if result:
enc_pos = [float(regex.match(args).group('LEFT')), \
float(regex.match(args).group('RIGHT'))]
self.set_enc_val(enc_pos)
elif msg_result.group('CMD') == 'ENRAW':
if msg_result.group('QUERY'):
reply = \
'[' + ', '.join(map(str, self.get_enc_raw())) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('CMD') == 'ENOFFSET':
if msg_result.group('QUERY'):
reply = '[' + \
', '.join(map(str, self.get_enc_offset())) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
arg_pattern = \
r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
regex = re.compile(arg_pattern)
result = regex.match(args)
if result:
offset = [float(regex.match(args).group('LEFT')), \
float(regex.match(args).group('RIGHT'))]
self.set_enc_offset(offset)
elif msg_result.group('CMD') == 'ENVEL':
if msg_result.group('QUERY'):
reply = \
'[' + ', '.join(map(str, self.get_enc_vel())) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
arg_pattern = \
r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
regex = re.compile(arg_pattern)
result = regex.match(args)
if result:
enc_vel = [float(regex.match(args).group('LEFT')), \
float(regex.match(args).group('RIGHT'))]
self.set_enc_vel(enc_vel)
elif msg_result.group('CMD') == 'WHEELANGVEL':
if msg_result.group('QUERY'):
reply = \
'[' + ', '.join(map(str, self.get_wheel_ang_vel())) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
arg_pattern = \
r'(?P<LEFT>[-]?\d+[\.]?\d*),(?P<RIGHT>[-]?\d+[\.]?\d*)'
regex = re.compile(arg_pattern)
result = regex.match(args)
if result:
wheel_ang_vel = [float(regex.match(args).group('LEFT')), \
float(regex.match(args).group('RIGHT'))]
self.set_wheel_ang_vel(wheel_ang_vel)
elif msg_result.group('CMD') == 'ENRESET':
self.reset_enc_val()
reply = \
'[' + ', '.join(map(str, self.get_enc_val())) + ']'
print 'Encoder values reset to ' + reply
elif msg_result.group('CMD') == 'UPDATE':
if msg_result.group('SET') and msg_result.group('ARGS'):
args = msg_result.group('ARGS')
pwm_pattern = r'(?P<LEFT>[-]?\d+),(?P<RIGHT>[-]?\d+)'
pwm_regex = re.compile(pwm_pattern)
pwm_result = pwm_regex.match(args)
if pwm_result:
pwm = [int(pwm_regex.match(args).group('LEFT')), \
int(pwm_regex.match(args).group('RIGHT'))]
self.set_pwm(pwm)
reply = '[' + ', '.join(map(str, self.enc_pos)) + ', ' \
+ ', '.join(map(str, self.encVel)) + ']'
print 'Sending: ' + reply
self.robotSocket.sendto(
reply + '\n', (self.base_ip, self.port))
elif msg_result.group('CMD') == 'END':
self.end_run()
else:
print 'Invalid: ' + msg
except:
self.end_run()
raise
2
Example 21
Project: WikiDAT Source File: logitem.py
def process_logitem(log_iter):
"""
Processor for LogItem objects extracted from the 'logging' DB table in
Wikipedia
"""
ip_pat = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
time_unit_ft = re.compile(r"""sec|min|h|d|week|fortnight|month|year|
indefinite|infinite""")
lead_zero_pat = re.compile(r"(0\d\d)")
triple_zero_pat = re.compile(r"000")
# Case 'month', rounded to 30 days per month
# Case 'year', multiply by 365.25 days per year
# Case 'fortnight' is equivalent to 2 weeks
# Case 'infinite' will default to timedelta.max
time_units = {'sec': 'seconds',
'min': 'minutes',
'h': 'hours',
'd': 'days',
'week': 'weeks',
'fortnight': 'weeks',
'month': 'days',
'year': 'days'
}
time_fac = {'sec': 1,
'min': 1,
'h': 1,
'd': 1,
'week': 1,
'fortnight': 2,
'month': 30,
'year': 365.25
}
for logitem in log_iter:
# Clean timestamp string
logitem['timestamp'] = (logitem['timestamp'].
replace('Z', '').replace('T', ' '))
# INFO FLAGGED REVISIONS
# Content of log_old_flag and log_new_flag
# for languages with flagged revisions
if (logitem['type'] == 'review' and
(logitem['action'] == 'approve' or
logitem['action'] == 'approve-a' or
logitem['action'] == 'unapprove' or
logitem['action'] == 'approve-ia' or
logitem['action'] == 'approve-i')):
logitem['flagged'] = True
# Check presence of params
# TODO: Investigate review items without params
if 'params' in logitem:
flags = logitem['params'].split('\n')
# Standard case before March 2010
# Only new stable version if no previous stable version
# is available
if (len(flags) == 1):
logitem['new_flag'] = flags[0]
logitem['old_flag'] = '0'
# Standard case before March 2010
# 2 params: new stable revision and old stable revision
# ----
# Case after March 2010
# Timestamp of new stable version was introduced
# as a third param. This is redundant with info from
# table revision. Thus, we only get the first two params:
# rev_id of new stable revision and rev_id of
# previous stable revision
elif (len(flags) == 2 or len(flags) == 3):
logitem['new_flag'] = flags[0]
logitem['old_flag'] = flags[1]
# TODO: Evaluate the possibility of extracting flagged-revs
# related data to an independent DB table
# INFO BLOCKED USERS
if (logitem['type'] == 'block' and
(logitem['action'] == 'block' or
logitem['action'] == 'unblock' or
logitem['action'] == 'reblock')):
logitem['block'] = {} # Flag block action for later
# Identify target user from log_title field
title = logitem['logtitle'].split(':')
if len(title) == 2:
target = title[1]
if re.search(ip_pat, target):
# Case of IP addresses
# Fix malformed records: del leading 0s if present
target = re.sub(triple_zero_pat, '0', target)
target = re.sub(lead_zero_pat,
lambda x: x.group().lstrip('0'), target)
try:
logitem['block']['target_ip'] = int(ipaddress.ip_address(target))
except ValueError:
print("Invalid IP address to block: ", target)
logitem['block']['target_ip'] = 0
else:
# Case of logged user
logitem['block']['target'] = target
# Calculate duration of block action from log_params field
# This field might be blank
# Case 1: Figure + range (e.g. '1 week', '2 days', '6 months')
# Case 2: Timestamp with expiration date for block
# e.g. Wed, 22 Jan 2014 10:14:10 GMT
if 'params' in logitem and logitem['params']:
# Identify formation of duration param
par_dur = logitem['params'].split('\n')[0]
par_dur = par_dur.replace('Z', '').replace('T', ' ')
try:
# exp = dateutil.parser.parse(par_dur.rsplit(' ', 1)[0])
exp = dateutil.parser.parse(par_dur)
if re.search('GMT', par_dur):
ts = dateutil.parser.parse(logitem['timestamp']+'GMT')
else:
ts = dateutil.parser.parse(logitem['timestamp'])
logitem['block']['duration'] = (exp-ts).total_seconds()
# Try automated detection of block duration, expressed
# in "natural language" units
except Exception:
exp_par = re.split(r'(\D+)', par_dur)
try:
duration = exp_par[0]
units = exp_par[1].lower()
except IndexError:
print("No valid pair duration/units found!")
print("params:", logitem['params'])
logitem['block']['duration'] = 0.0
if (units == 'infinite' or units == 'indefininte'):
logitem['block']['duration'] = (datetime.timedelta.max.total_seconds())
elif duration:
try:
time_unit = re.search(time_unit_ft,
units).group()
delta_args = {time_units[time_unit]:
int(duration) * time_fac[time_unit]}
logitem['block']['duration'] = datetime.timedelta(**delta_args).total_seconds()
except AttributeError:
print("params:", logitem['params'])
logitem['block']['duration'] = 0.0
except OverflowError:
logitem['block']['duration'] = (datetime.timedelta.max.total_seconds())
else:
# TODO: Inspect this case later on
# Address case of empty duration
logitem['block']['duration'] = 0.0
else:
# TODO: Inspect this case later on
# Address case of empty duration
logitem['block']['duration'] = 0.0
# INFO DELETIONS
# TODO:
# INFO PROTECTIONS
# TODO:
# INFO USER REGISTRATIONS
if (logitem['type'] == 'newusers' and
(logitem['action'] == 'newusers' or
logitem['action'] == 'create' or
logitem['action'] == 'create2' or
logitem['action'] == 'autocreate' or
logitem['action'] == 'byemail')):
# TODO: Evaluate if we need additional info about newusers
logitem['newuser'] = {} # Flag new user for later
# INFO RIGHTS GRANTING
if (logitem['type'] == 'rights' and logitem['action'] == 'rights'):
logitem['rights'] = {} # Flag new rights granting for later
try:
logitem['rights']['username'] = logitem['logtitle'].split(':')[1]
except IndexError:
print("No user name info in change of user level.")
if 'params' in logitem:
print("params:", logitem['params'])
logitem['rights']['username'] = ""
if 'params' in logitem and logitem['params']:
pars = logitem['params'].split('\n')
# Case of old format for parameters, with previous status
# in first line, then new list of privileges in new line
if len(pars) > 1:
logitem['rights']['right_old'] = pars[0]
logitem['rights']['right_new'] = pars[1]
else:
# Case of new single-line format oldgroups --> new groups
if (re.search('"4::oldgroups"', pars[0])):
priv_list = (pars[0].partition('"4::oldgroups"')[2].
partition('"5::newgroups"'))
priv_old = re.findall(r'\"(.+?)\"', priv_list[0])
priv_new = re.findall(r'\"(.+?)\"', priv_list[2])
logitem['rights']['right_old'] = str(priv_old)
logitem['rights']['right_new'] = str(priv_new)
# Case of primitive free format
else:
logitem['rights']['right_old'] = ""
logitem['rights']['right_new'] = pars[0]
elif logitem['comment']:
logitem['rights']['right_old'] = ""
logitem['rights']['right_new'] = logitem['comment']
else:
# No information recorded about new user levels
logitem['rights']['right_old'] = ""
logitem['rights']['right_new'] = ""
yield(logitem)
del logitem
2
Example 22
Project: alignak Source File: install_hooks.py
def fix_alignak_cfg(config):
"""
Fix paths, user and group in alignak.cfg and daemons/*.ini
Called one all files are copied.
:param config:
:return:
"""
default_paths = {
'workdir': '/var/run/alignak',
'logdir': '/var/log/alignak',
# TODO: confirm is is unuseful...
'modules_dir': '/var/lib/alignak/modules',
'plugins_dir': '/var/libexec/alignak',
'lock_file': '/var/run/alignak/arbiterd.pid',
'local_log': '/var/log/alignak/arbiterd.log',
'pidfile': '/var/run/alignak/arbiterd.pid',
'pack_distribution_file': '/var/lib/alignak/pack_distribution.dat'
}
default_macros = {
'LOGSDIR': '/var/log/alignak',
'PLUGINSDIR': '/var/libexec/alignak',
}
default_ssl = {
'ca_cert': '/etc/alignak/certs/ca.pem',
'server_cert': '/etc/alignak/certs/server.cert',
'server_key': '/etc/alignak/certs/server.key',
}
# Changing default user/group if root
default_users = {}
if getpass.getuser() == 'root':
default_users['alignak_user'] = 'alignak'
default_users['alignak_group'] = 'alignak'
default_users['user'] = 'alignak'
default_users['group'] = 'alignak'
default_users['ALIGNAKUSER'] = 'alignak'
default_users['ALIGNAKGROUP'] = 'alignak'
default_users['HOME'] = '`grep ^$ALIGNAKUSER: /etc/passwd | cut -d: -f 6`'
# Prepare pattern for alignak.cfg
pattern = "|".join(default_paths.keys())
changing_path = re.compile("^(%s) *= *" % pattern)
pattern = "|".join(default_users.keys())
changing_user = re.compile("^#(%s) *= *" % pattern)
pattern = "|".join(default_ssl.keys())
changing_ssl = re.compile("^#(%s) *= *" % pattern)
pattern = "|".join(default_macros.keys())
changing_mac = re.compile("^\$(%s)\$ *= *" % pattern)
# Fix resource paths
alignak_file = os.path.join(
config.install_dir, "etc", "alignak", "arbiter", "resource.d", "paths.cfg"
)
if not os.path.exists(alignak_file):
print(
"\n"
"================================================================================\n"
"== The configuration file '%s' is missing. ==\n"
"================================================================================\n"
% alignak_file
)
for line in fileinput.input(alignak_file, inplace=True):
line = line.strip()
mac_attr_name = changing_mac.match(line)
if mac_attr_name:
new_path = os.path.join(config.install_dir,
default_macros[mac_attr_name.group(1)].strip("/"))
print("$%s$=%s" % (mac_attr_name.group(1),
new_path))
else:
print(line)
# Fix alignak.cfg
alignak_file = os.path.join(config.install_dir, "etc", "alignak", "alignak.cfg")
if not os.path.exists(alignak_file):
print(
"\n"
"================================================================================\n"
"== The configuration file '%s' is missing. ==\n"
"================================================================================\n"
% alignak_file
)
for line in fileinput.input(alignak_file, inplace=True):
line = line.strip()
path_attr_name = changing_path.match(line)
user_attr_name = changing_user.match(line)
ssl_attr_name = changing_ssl.match(line)
if path_attr_name:
new_path = os.path.join(config.install_dir,
default_paths[path_attr_name.group(1)].strip("/"))
print("%s=%s" % (path_attr_name.group(1),
new_path))
elif user_attr_name:
print("#%s=%s" % (user_attr_name.group(1),
default_users[user_attr_name.group(1)]))
elif ssl_attr_name:
new_path = os.path.join(config.install_dir,
default_ssl[ssl_attr_name.group(1)].strip("/"))
print("#%s=%s" % (ssl_attr_name.group(1),
new_path))
else:
print(line)
# Handle daemons ini files
for ini_file in ["arbiterd.ini", "brokerd.ini", "schedulerd.ini",
"pollerd.ini", "reactionnerd.ini", "receiverd.ini"]:
# Prepare pattern for ini files
daemon_name = ini_file.strip(".ini")
default_paths['lock_file'] = '/var/run/alignak/%s.pid' % daemon_name
default_paths['local_log'] = '/var/log/alignak/%s.log' % daemon_name
default_paths['pidfile'] = '/var/run/alignak/%s.pid' % daemon_name
pattern = "|".join(default_paths.keys())
changing_path = re.compile("^(%s) *= *" % pattern)
# Fix ini file
alignak_file = os.path.join(config.install_dir, "etc", "alignak", "daemons", ini_file)
if not os.path.exists(alignak_file):
print(
"\n"
"================================================================================\n"
"== The configuration file '%s' is missing. ==\n"
"================================================================================\n"
% alignak_file
)
for line in fileinput.input(alignak_file, inplace=True):
line = line.strip()
path_attr_name = changing_path.match(line)
user_attr_name = changing_user.match(line)
ssl_attr_name = changing_ssl.match(line)
if path_attr_name:
new_path = os.path.join(config.install_dir,
default_paths[path_attr_name.group(1)].strip("/"))
print("%s=%s" % (path_attr_name.group(1),
new_path))
elif user_attr_name:
print("#%s=%s" % (user_attr_name.group(1),
default_users[user_attr_name.group(1)]))
elif ssl_attr_name:
new_path = os.path.join(config.install_dir,
default_ssl[ssl_attr_name.group(1)].strip("/"))
print("#%s=%s" % (ssl_attr_name.group(1),
new_path))
else:
print(line)
# Handle default/alignak
if 'linux' in sys.platform or 'sunos5' in sys.platform:
old_name = os.path.join(config.install_dir, "etc", "default", "alignak.in")
if not os.path.exists(old_name):
print("\n"
"=======================================================================================================\n"
"== The configuration file '%s' is missing.\n"
"=======================================================================================================\n"
% alignak_file)
new_name = os.path.join(config.install_dir, "etc", "default", "alignak")
try:
os.rename(old_name, new_name)
except OSError as e:
print("\n"
"=======================================================================================================\n"
"== The configuration file '%s' could not be renamed to '%s'.\n"
"== The newly installed configuration will not be up-to-date.\n"
"=======================================================================================================\n"
% (old_name, new_name))
default_paths = {
'ETC': '/etc/alignak',
'VAR': '/var/lib/alignak',
'BIN': '/bin',
'RUN': '/var/run/alignak',
'LOG': '/var/log/alignak',
'LIB': '/var/libexec/alignak',
}
pattern = "|".join(default_paths.keys())
changing_path = re.compile("^(%s) *= *" % pattern)
for line in fileinput.input(new_name, inplace=True):
line = line.strip()
path_attr_name = changing_path.match(line)
user_attr_name = changing_user.match(line)
if path_attr_name:
new_path = os.path.join(config.install_dir,
default_paths[path_attr_name.group(1)].strip("/"))
print("%s=%s" % (path_attr_name.group(1),
new_path))
elif user_attr_name:
print("#%s=%s" % (user_attr_name.group(1),
default_users[user_attr_name.group(1)]))
else:
print(line)
# Alignak run script
alignak_run = ''
if 'win' in sys.platform:
pass
elif 'linux' in sys.platform or 'sunos5' in sys.platform:
alignak_run = os.path.join(config.install_dir, "etc", "init.d", "alignak start")
elif 'bsd' in sys.platform or 'dragonfly' in sys.platform:
alignak_run = os.path.join(config.install_dir, "etc", "rc.d", "alignak start")
# Alignak configuration root directory
alignak_etc = os.path.join(config.install_dir, "etc", "alignak")
# Add ENV vars only if we are in virtualenv
# in order to get init scripts working
if 'VIRTUAL_ENV' in os.environ:
activate_file = os.path.join(os.environ.get("VIRTUAL_ENV"), 'bin', 'activate')
try:
afd = open(activate_file, 'r+')
except Exception as exp:
print(exp)
raise Exception("Virtual environment error")
env_config = ("""export PYTHON_EGG_CACHE=.\n"""
"""export ALIGNAK_DEFAULT_FILE=%s/etc/default/alignak\n"""
% os.environ.get("VIRTUAL_ENV"))
alignak_etc = "%s/etc/alignak" % os.environ.get("VIRTUAL_ENV")
alignak_run = "%s/etc/init.d alignak start" % os.environ.get("VIRTUAL_ENV")
if afd.read().find(env_config) == -1:
afd.write(env_config)
print(
"\n"
"================================================================================\n"
"== ==\n"
"== You need to REsource env/bin/activate in order to set appropriate ==\n"
"== variables to use init scripts ==\n"
"== ==\n"
"================================================================================\n"
)
print("\n"
"================================================================================\n"
"== ==\n"
"== The installation succeded. ==\n"
"== ==\n"
"== -------------------------------------------------------------------------- ==\n"
"== ==\n"
"== You can run Alignak with: ==\n"
"== %s\n"
"== ==\n"
"== The default installed configuration is located here: ==\n"
"== %s\n"
"== ==\n"
"== You will find more information about Alignak configuration here: ==\n"
"== http://alignak-doc.readthedocs.io/en/latest/04_configuration/index.html ==\n"
"== ==\n"
"== -------------------------------------------------------------------------- ==\n"
"== ==\n"
"== You should grant the write permissions on the configuration directory to ==\n"
"== the user alignak: ==\n"
"== find %s -type f -exec chmod 664 {} +\n"
"== find %s -type d -exec chmod 775 {} +\n"
"== -------------------------------------------------------------------------- ==\n"
"== ==\n"
"== You should also grant ownership on those directories to the user alignak: ==\n"
"== chown -R alignak:alignak /usr/local/var/run/alignak ==\n"
"== chown -R alignak:alignak /usr/local/var/log/alignak ==\n"
"== chown -R alignak:alignak /usr/local/var/libexec/alignak ==\n"
"== ==\n"
"== -------------------------------------------------------------------------- ==\n"
"== ==\n"
"== Please note that installing Alignak with the setup.py script is not the ==\n"
"== recommended way. You'd rather use the packaging built for your OS ==\n"
"== distribution that you can find here: ==\n"
"== http://alignak-monitoring.github.io/download/ ==\n"
"== ==\n"
"================================================================================\n"
% (alignak_run, alignak_etc, alignak_etc, alignak_etc)
)
# Check Alignak recommended user existence
if not user_exists('alignak'):
print(
"\n"
"================================================================================\n"
"== ==\n"
"== The user account 'alignak' does not exist on your system. ==\n"
"== ==\n"
"================================================================================\n"
)
if not group_exists('alignak'):
print(
"\n"
"================================================================================\n"
"== ==\n"
"== The user group 'alignak' does not exist on your system. ==\n"
"== ==\n"
"================================================================================\n"
)
2
Example 23
Project: TADbit Source File: fastq_utils.py
def quality_plot(fnam, r_enz=None, nreads=None, axe=None, savefig=None, paired=False):
"""
Plots the sequencing quality of a given FASTQ file. If a restrinction enzyme
(RE) name is provided, can also represent the distribution of digested and
undigested RE sites and estimate an expected proportion of dangling-ends.
Proportion of dangling-ends is inferred by counting the number of times a
dangling-end site, is found at the beginning of any of the reads (divided by
the number of reads).
:param fnam: path to FASTQ file
:param None nreads: max number of reads to read, not necesary to read all
:param None savefig: path to a file where to save the image generated;
if None, the image will be shown using matplotlib GUI (the extension
of the file name will determine the desired format).
:param False paired: is input FASTQ contains both ends
:returns: the percentage of dangling-ends (sensu stricto) and the percentage of
reads with at least a ligation site.
"""
phred = dict([(c, i) for i, c in enumerate(
'!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')])
quals = []
henes = []
sites = []
fixes = []
liges = []
ligep = 0
tkw = dict(size=4, width=1.5)
if fnam.endswith('.gz'):
fhandler = gopen(fnam)
elif fnam.endswith('.dsrc'):
proc = Popen(['dsrc', 'd', '-t8', '-s', fnam], stdout=PIPE)
fhandler = proc.stdout
else:
fhandler = open(fnam)
if not r_enz:
if nreads:
while True:
try:
next(fhandler)
except EOFError:
break
seq = next(fhandler)
if 'N' in seq:
henes.extend([i for i, s in enumerate(seq) if s == 'N'])
next(fhandler)
line = next(fhandler)
quals.append([phred[i] for i in line.strip()])
if len(quals) > nreads:
break
else: # do this because it's faster
while True:
try:
next(fhandler)
except EOFError:
break
seq = next(fhandler)
if 'N' in seq:
henes.extend([i for i, s in enumerate(seq) if s == 'N'])
next(fhandler)
line = next(fhandler)
quals.append([phred[i] for i in line.strip()])
else:
r_site = RESTRICTION_ENZYMES[r_enz].replace('|', '')
l_site = religated(r_enz)
d_site = repaired(r_enz)
if r_site*2 == l_site:
# in case the religated site equals 2 restriction sites (like DnpII)
site = re.compile('(?<!%s)' % r_site + r_site + '(?!%s)' % r_site)
fixe = re.compile('(?<!%s)' % d_site + d_site + '(?!%s)' % d_site)
else:
site = re.compile(r_site)
fixe = re.compile(d_site)
lige = re.compile(l_site)
if nreads:
while True:
try:
next(fhandler)
except StopIteration:
break
seq = next(fhandler)
sites.extend([m.start() for m in site.finditer(seq)])
fixes.extend([m.start() for m in fixe.finditer(seq)])
liges.extend([m.start() for m in lige.finditer(seq)])
ligep += l_site in seq
if 'N' in seq:
henes.extend([i for i, s in enumerate(seq) if s == 'N'])
next(fhandler)
line = next(fhandler)
quals.append([phred[i] for i in line.strip()])
if len(quals) > nreads:
break
else: # do this because it's faster
while True:
try:
next(fhandler)
except StopIteration:
break
seq = next(fhandler)
sites.extend([m.start() for m in site.finditer(seq)])
fixes.extend([m.start() for m in fixe.finditer(seq)])
liges.extend([m.start() for m in lige.finditer(seq)])
ligep += l_site in seq
if 'N' in seq:
henes.extend([i for i, s in enumerate(seq) if s == 'N'])
next(fhandler)
line = next(fhandler)
quals.append([phred[i] for i in line.strip()])
fhandler.close()
if not nreads:
nreads = len(quals)
quals = zip(*quals)
meanquals = [np.mean(q) for q in quals]
errorquals = [np.std(q) for q in quals]
if axe:
ax = axe
fig = axe.get_figure()
ax2 = fig.add_subplot(212)
else:
if r_enz:
_, (ax, ax2) = plt.subplots(2,1, figsize=(15, 12))
else:
_, ax = plt.subplots(1,1, figsize=(15, 6))
ax.patch.set_facecolor('lightgrey')
ax.patch.set_alpha(0.4)
ax.grid(ls='-', color='w', lw=1.5, alpha=0.6, which='major')
ax.grid(ls='-', color='w', lw=1, alpha=0.3, which='minor')
ax.set_axisbelow(True)
# remove tick marks
ax.tick_params(axis='both', direction='out', top=False, right=False,
left=False, bottom=False)
ax.tick_params(axis='both', direction='out', top=False, right=False,
left=False, bottom=False, which='minor')
ax.errorbar(range(len(line.strip())), meanquals,
linewidth=1, elinewidth=1, color='darkblue',
yerr=errorquals, ecolor='orange')
ax.set_xlim((0, len(line)))
ax.set_xlabel('Nucleotidic position')
ax.set_ylabel('PHRED score')
ax.set_title('Sequencing Quality (%d reads)' % (nreads))
ax.yaxis.label.set_color('darkblue')
ax.tick_params(axis='y', colors='darkblue', **tkw)
axb = ax.twinx()
axb.plot([henes.count(i) for i in xrange(len(line))], linewidth=1,
color='black', linestyle='--')
axb.yaxis.label.set_color('black')
axb.tick_params(axis='y', colors='black', **tkw)
axb.set_ylabel('Number of "N" per position')
try: # no Ns found (yes... it happens)
axb.set_yscale('log')
axb.set_ylim((0, axb.get_ylim()[1] * 1000))
except ValueError:
axb.set_yscale('linear')
ax.set_ylim((0, ax.get_ylim()[1]))
ax.set_xlim((0, len(line)))
if r_enz:
ax.set_title('Sequencing Quality and deconvolution (%s %d reads)' % (
r_enz, nreads))
ax.set_xlabel('')
plt.setp(ax.get_xticklabels(), visible=False)
ax2.patch.set_facecolor('lightgrey')
ax2.patch.set_alpha(0.4)
ax2.grid(ls='-', color='w', lw=1.5, alpha=0.6, which='major')
ax2.grid(ls='-', color='w', lw=1, alpha=0.3, which='minor')
ax2.set_axisbelow(True)
ax2.set_xlabel('Nucleotidic position')
seq_len = len(line) - max((len(r_site), len(l_site), len(d_site)))
sites = [sites.count(k) for k in xrange(seq_len)] # Undigested
liges = [liges.count(k) for k in xrange(seq_len)] # OK
fixes = [fixes.count(k) for k in xrange(seq_len)] # DE
if d_site in r_site:
pos = r_site.find(d_site)
fixes = (fixes[:pos] +
[fixes[k] - sites[k-pos] for k in xrange(pos, seq_len)])
if d_site in l_site:
pos = l_site.find(d_site)
fixes = (fixes[:pos] +
[fixes[k] - liges[k-pos] for k in xrange(pos, seq_len)])
site_len = max((len(r_site), len(l_site), len(d_site)))
if paired:
sites[len(line) / 2 - site_len:
len(line) / 2] = [float('nan')] * site_len
liges[len(line) / 2 - site_len:
len(line) / 2] = [float('nan')] * site_len
fixes[len(line) / 2 - site_len:
len(line) / 2] = [float('nan')] * site_len
ax2.plot(sites, linewidth=2, color='darkred')
ax2.set_ylabel('Undigested RE site (%s)' % r_site)
ax2.yaxis.label.set_color('darkred')
ax2.tick_params(axis='y', colors='darkred', **tkw)
ax3 = ax2.twinx()
ax3.plot(liges, linewidth=2, color='darkblue')
ax3.yaxis.label.set_color('darkblue')
ax3.tick_params(axis='y', colors='darkblue', **tkw)
ax3.set_ylabel('Religated (%s)' % l_site)
if any([f > 0 for f in fixes]):
ax4 = ax2.twinx()
ax4.spines["right"].set_position(("axes", 1.07))
make_patch_spines_invisible(ax4)
ax4.spines["right"].set_visible(True)
ax4.plot(fixes, linewidth=2, color='darkorange')
ax4.yaxis.label.set_color('darkorange')
ax4.tick_params(axis='y', colors='darkorange', **tkw)
ax4.set_ylabel('Dangling-ends (%s)' % d_site)
else:
ax2.set_ylabel('RE site & Dangling-ends (%s)' % r_site)
ax2.set_xlim((0, len(line)))
lig_cnt = (np.nansum(liges) - liges[0] - liges[len(line) / 2])
sit_cnt = (np.nansum(sites) - sites[0] - sites[len(line) / 2])
des = ((100. * (fixes[0] + (fixes[(len(line) / 2)]
if paired else 0)))
/ nreads) if any([f > 0 for f in fixes]) else (
100. * (sites[0] + (sites[(len(line) / 2)] if paired else 0))) / nreads
plt.title(('Percentage of digested sites: %.0f%%, of dangling-ends: %.0f%%\n' +
'Percentage of reads with ligation site: %.0f%%') %(
(100. * lig_cnt) / (lig_cnt + sit_cnt),
des,
(ligep * 100.) / nreads))
plt.subplots_adjust(right=0.85)
if savefig:
tadbit_savefig(savefig)
plt.close('all')
elif not axe:
plt.show()
return des, (ligep * 100.) / nreads
2
Example 24
Project: corpkit Source File: interrogator.py
def interrogator(corpus,
search='w',
query='any',
show='w',
exclude=False,
excludemode='any',
searchmode='all',
case_sensitive=False,
save=False,
subcorpora=False,
just_metadata=False,
skip_metadata=False,
preserve_case=False,
lemmatag=False,
files_as_subcorpora=False,
only_unique=False,
only_format_match=True,
multiprocess=False,
spelling=False,
regex_nonword_filter=r'[A-Za-z0-9]',
gramsize=1,
conc=False,
maxconc=9999,
window=None,
no_closed=False,
no_punct=True,
discard=False,
**kwargs):
"""
Interrogate corpus, corpora, subcorpus and file objects.
See corpkit.interrogation.interrogate() for docstring
"""
conc = kwargs.get('do_concordancing', conc)
quiet = kwargs.get('quiet', False)
coref = kwargs.pop('coref', False)
show_conc_metadata = kwargs.pop('show_conc_metadata', False)
fsi_index = kwargs.pop('fsi_index', True)
dep_type = kwargs.pop('dep_type', 'collapsed-ccprocessed-dependencies')
nosubmode = subcorpora is None
#todo: temporary
#if getattr(corpus, '_dlist', False):
# subcorpora = 'file'
# store kwargs and locs
locs = locals().copy()
locs.update(kwargs)
locs.pop('kwargs', None)
# so you can do corpus.interrogate('features/postags/wordclasses/lexicon')
if search == 'features':
search = 'v'
query = 'any'
if search in ['postags', 'wordclasses']:
query = 'any'
preserve_case = True
show = 'p' if search == 'postags' else 'x'
# use tregex if simple because it's faster
# but use dependencies otherwise
search = 't' if subcorpora else {'w': 'any'}
if search == 'lexicon':
search = {'w': 'any'}
if not kwargs.get('cql') and isinstance(search, STRINGTYPE) and len(search) > 3:
raise ValueError('search argument not recognised.')
import codecs
import signal
import os
from time import localtime, strftime
from collections import Counter
import pandas as pd
from pandas import DataFrame, Series
from corpkit.interrogation import Interrogation, Interrodict
from corpkit.corpus import Datalist, Corpora, Corpus, File, Subcorpus
from corpkit.process import (tregex_engine, get_deps, unsplitter, sanitise_dict,
animator, filtermaker, fix_search,
pat_format, auto_usecols, format_tregex,
make_conc_lines_from_whole_mid)
from corpkit.other import as_regex
from corpkit.dictionaries.process_types import Wordlist
from corpkit.build import check_jdk
from corpkit.conll import pipeline
import re
if regex_nonword_filter:
is_a_word = re.compile(regex_nonword_filter)
else:
is_a_word = re.compile(r'.*')
from traitlets import TraitError
have_java = check_jdk()
# convert cql-style queries---pop for the sake of multiprocessing
cql = kwargs.pop('cql', None)
if cql:
from corpkit.cql import to_corpkit
search, exclude = to_corpkit(search)
def signal_handler(signal, _):
"""
Allow pausing and restarting whn not in GUI
"""
if root:
return
import signal
import sys
from time import localtime, strftime
signal.signal(signal.SIGINT, original_sigint)
thetime = strftime("%H:%M:%S", localtime())
INPUTFUNC('\n\n%s: Paused. Press any key to resume, or ctrl+c to quit.\n' % thetime)
time = strftime("%H:%M:%S", localtime())
print('%s: Interrogation resumed.\n' % time)
signal.signal(signal.SIGINT, signal_handler)
def add_adj_for_ngram(show, gramsize):
"""
If there's a gramsize of more than 1, remake show
for ngramming
"""
if gramsize == 1:
return show
out = []
for i in show:
out.append(i)
for i in range(1, gramsize):
for bit in show:
out.append('+%d%s' % (i, bit))
return out
def fix_show_bit(show_bit):
"""
Take a single search/show_bit type, return match
"""
ends = ['w', 'l', 'i', 'n', 'f', 'p', 'x', 's', 'a', 'e']
starts = ['d', 'g', 'm', 'b', 'h', '+', '-', 'r']
show_bit = show_bit.lstrip('n')
show_bit = show_bit.lstrip('b')
show_bit = list(show_bit)
if show_bit[-1] not in ends:
show_bit.append('w')
if show_bit[0] not in starts:
show_bit.insert(0, 'm')
return ''.join(show_bit)
def fix_show(show, gramsize):
"""
Lowercase anything in show and turn into list
"""
if isinstance(show, list):
show = [i.lower() for i in show]
elif isinstance(show, STRINGTYPE):
show = show.lower()
show = [show]
show = [fix_show_bit(i) for i in show]
return add_adj_for_ngram(show, gramsize)
def is_multiquery(corpus, search, query, outname):
"""
Determine if multiprocessing is needed/possibe, and
do some retyping if need be as well
"""
is_mul = False
from collections import OrderedDict
from corpkit.dictionaries.process_types import Wordlist
if isinstance(query, Wordlist):
query = list(query)
if subcorpora and multiprocess:
is_mul = 'subcorpora'
if isinstance(subcorpora, (list, tuple)):
is_mul = 'subcorpora'
if isinstance(query, (dict, OrderedDict)):
is_mul = 'namedqueriessingle'
if isinstance(search, dict):
if all(isinstance(i, dict) for i in list(search.values())):
is_mul = 'namedqueriesmultiple'
return is_mul, corpus, search, query
def ispunct(s):
import string
return all(c in string.punctuation for c in s)
def uniquify(conc_lines):
"""get unique concordance lines"""
from collections import OrderedDict
unique_lines = []
checking = []
for index, (_, speakr, start, middle, end) in enumerate(conc_lines):
joined = ' '.join([speakr, start, 'MIDDLEHERE:', middle, ':MIDDLEHERE', end])
if joined not in checking:
unique_lines.append(conc_lines[index])
checking.append(joined)
return unique_lines
def compiler(pattern):
"""
Compile regex or fail gracefully
"""
if hasattr(pattern, 'pattern'):
return pattern
import re
try:
if case_sensitive:
comped = re.compile(pattern)
else:
comped = re.compile(pattern, re.IGNORECASE)
return comped
except:
import traceback
import sys
from time import localtime, strftime
exc_type, exc_value, exc_traceback = sys.exc_info()
lst = traceback.format_exception(exc_type, exc_value, exc_traceback)
error_message = lst[-1]
thetime = strftime("%H:%M:%S", localtime())
print('%s: Query %s' % (thetime, error_message))
if root:
return 'Bad query'
else:
raise ValueError('%s: Query %s' % (thetime, error_message))
def determine_search_func(show):
"""Figure out what search function we're using"""
simple_tregex_mode = False
statsmode = False
tree_to_text = False
search_trees = False
simp_crit = all(not i for i in [kwargs.get('tgrep'),
files_as_subcorpora,
subcorpora,
just_metadata,
skip_metadata])
if search.get('t') and simp_crit:
if have_java:
simple_tregex_mode = True
else:
search_trees = 'tgrep'
optiontext = 'Searching parse trees'
elif datatype == 'conll':
if any(i.endswith('t') for i in search.keys()):
if have_java and not kwargs.get('tgrep'):
search_trees = 'tregex'
else:
search_trees = 'tgrep'
optiontext = 'Searching parse trees'
elif any(i.endswith('v') for i in search.keys()):
# either of these searchers now seems to work
#seacher = get_stats_conll
statsmode = True
optiontext = 'General statistics'
elif any(i.endswith('r') for i in search.keys()):
optiontext = 'Distance from root'
else:
optiontext = 'Querying CONLL data'
return optiontext, simple_tregex_mode, statsmode, tree_to_text, search_trees
def get_tregex_values(show):
"""If using Tregex, set appropriate values
- Check for valid query
- Make 'any' query
- Make list query
"""
translated_option = 't'
if isinstance(search['t'], Wordlist):
search['t'] = list(search['t'])
q = tregex_engine(corpus=False,
query=search.get('t'),
options=['-t'],
check_query=True,
root=root,
preserve_case=preserve_case
)
# so many of these bad fixing loops!
nshow = []
for i in show:
if i == 'm':
nshow.append('w')
else:
nshow.append(i.lstrip('m'))
show = nshow
if q is False:
if root:
return 'Bad query', None
else:
return 'Bad query', None
if isinstance(search['t'], list):
regex = as_regex(search['t'], boundaries='line', case_sensitive=case_sensitive)
else:
regex = ''
# listquery, anyquery, translated_option
treg_dict = {'p': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'u'],
'pl': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'u'],
'x': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'u'],
't': [r'__ < (/%s/ !< __)' % regex, r'__ < (/.?[A-Za-z0-9].?/ !< __)', 'o'],
'w': [r'/%s/ !< __' % regex, r'/.?[A-Za-z0-9].?/ !< __', 't'],
'c': [r'/%s/ !< __' % regex, r'/.?[A-Za-z0-9].?/ !< __', 'C'],
'l': [r'/%s/ !< __' % regex, r'/.?[A-Za-z0-9].?/ !< __', 't'],
'u': [r'/%s/ !< __' % regex, r'/.?[A-Za-z0-9].?/ !< __', 'v']
}
newshow = []
listq, anyq, translated_option = treg_dict.get(show[0][-1].lower())
newshow.append(translated_option)
for item in show[1:]:
_, _, noption = treg_dict.get(item.lower())
newshow.append(noption)
if isinstance(search['t'], list):
search['t'] = listq
elif search['t'] == 'any':
search['t'] = anyq
return search['t'], newshow
def correct_spelling(a_string):
"""correct spelling within a string"""
if not spelling:
return a_string
from corpkit.dictionaries.word_transforms import usa_convert
if spelling.lower() == 'uk':
usa_convert = {v: k for k, v in list(usa_convert.items())}
bits = a_string.split('/')
for index, i in enumerate(bits):
converted = usa_convert.get(i.lower(), i)
if i.islower() or preserve_case is False:
converted = converted.lower()
elif i.isupper() and preserve_case:
converted = converted.upper()
elif i.istitle() and preserve_case:
converted = converted.title()
bits[index] = converted
r = '/'.join(bits)
return r
def make_search_iterable(corpus):
"""determine how to structure the corpus for interrogation"""
# skip file definitions if they are not needed
if getattr(corpus, '_dlist', False):
return {(i.name, i.path): [i] for i in list(corpus.files)}
#return {('Sample', 'Sample'): list(corpus.files)}
if simple_tregex_mode:
if corpus.level in ['s', 'f', 'd']:
return {(corpus.name, corpus.path): False}
else:
return {(os.path.basename(i), os.path.join(corpus.path, i)): False
for i in os.listdir(corpus.path)
if os.path.isdir(os.path.join(corpus.path, i))}
if isinstance(corpus, Datalist):
to_iterate_over = {}
# it could be files or subcorpus objects
if corpus[0].level in ['s', 'd']:
if files_as_subcorpora:
for subc in corpus:
for f in subc.files:
to_iterate_over[(f.name, f.path)] = [f]
else:
for subc in corpus:
to_iterate_over[(subc.name, subc.path)] = subc.files
elif corpus[0].level == 'f':
for f in corpus:
to_iterate_over[(f.name, f.path)] = [f]
elif corpus.singlefile:
to_iterate_over = {(corpus.name, corpus.path): [corpus]}
elif not hasattr(corpus, 'subcorpora') or not corpus.subcorpora:
# just files in a directory
if files_as_subcorpora:
to_iterate_over = {}
for f in corpus.files:
to_iterate_over[(f.name, f.path)] = [f]
else:
to_iterate_over = {(corpus.name, corpus.path): corpus.files}
else:
to_iterate_over = {}
if files_as_subcorpora:
# don't know if possible: has subcorpora but also .files
if hasattr(corpus, 'files') and corpus.files is not None:
for f in corpus.files:
to_iterate_over[(f.name, f.path)] = [f]
# has subcorpora with files in those
elif hasattr(corpus, 'files') and corpus.files is None:
for subc in corpus.subcorpora:
for f in subc.files:
to_iterate_over[(f.name, f.path)] = [f]
else:
if corpus[0].level == 's':
for subcorpus in corpus:
to_iterate_over[(subcorpus.name, subcorpus.path)] = subcorpus.files
elif corpus[0].level == 'f':
for f in corpus:
to_iterate_over[(f.name, f.path)] = [f]
else:
for subcorpus in corpus.subcorpora:
to_iterate_over[(subcorpus.name, subcorpus.path)] = subcorpus.files
return to_iterate_over
def welcome_printer(return_it=False):
"""Print welcome message"""
if no_conc:
message = 'Interrogating'
else:
message = 'Interrogating and concordancing'
if only_conc:
message = 'Concordancing'
if kwargs.get('printstatus', True):
thetime = strftime("%H:%M:%S", localtime())
from corpkit.process import dictformat
sformat = dictformat(search)
welcome = ('\n%s: %s %s ...\n %s\n ' \
'Query: %s\n %s corpus ... \n' % \
(thetime, message, cname, optiontext, sformat, message))
if return_it:
return welcome
else:
print(welcome)
def goodbye_printer(return_it=False, only_conc=False):
"""Say goodbye before exiting"""
if not kwargs.get('printstatus', True):
return
thetime = strftime("%H:%M:%S", localtime())
if only_conc:
show_me = (thetime, len(conc_df))
finalstring = '\n\n%s: Concordancing finished! %d results.' % show_me
else:
finalstring = '\n\n%s: Interrogation finished!' % thetime
if countmode:
finalstring += ' %d matches.' % tot
else:
dat = (numentries, total_total)
finalstring += ' %d unique results, %d total occurrences.' % dat
if return_it:
return finalstring
else:
print(finalstring)
def get_conc_colnames(corpus,
fsi_index=False,
simple_tregex_mode=False):
fields = []
base = 'c f s l m r'
if simple_tregex_mode:
base = base.replace('f ', '')
if fsi_index and not simple_tregex_mode:
base = 'i ' + base
if PYTHON_VERSION == 2:
base = base.encode('utf-8').split()
else:
base = base.split()
if show_conc_metadata:
from corpkit.build import get_all_metadata_fields
meta = get_all_metadata_fields(corpus.path)
if isinstance(show_conc_metadata, list):
meta = [i for i in meta if i in show_conc_metadata]
#elif show_conc_metadata is True:
# pass
for i in sorted(meta):
if i in ['speaker', 'sent_id', 'parse']:
continue
if PYTHON_VERSION == 2:
base.append(i.encode('utf-8'))
else:
base.append(i)
return base
def make_conc_obj_from_conclines(conc_results, fsi_index=False):
"""
Turn conclines into DataFrame
"""
from corpkit.interrogation import Concordance
#fsi_place = 2 if fsi_index else 0
all_conc_lines = []
for sc_name, resu in sorted(conc_results.items()):
if only_unique:
unique_results = uniquify(resu)
else:
unique_results = resu
#make into series
for lin in unique_results:
#spkr = str(spkr, errors = 'ignore')
#if not subcorpora:
# lin[fsi_place] = lin[fsi_place]
#lin.insert(fsi_place, sc_name)
if len(lin) < len(conc_col_names):
diff = len(conc_col_names) - len(lin)
lin.extend(['none'] * diff)
all_conc_lines.append(Series(lin, index=conc_col_names))
conc_df = pd.concat(all_conc_lines, axis=1).T
if all(x == '' for x in list(conc_df['s'].values)) or \
all(x == 'none' for x in list(conc_df['s'].values)):
conc_df.drop('s', axis=1, inplace=True)
# count each thing that occurs in the middle col
# remove things that only appear once?!
# i have no idea what this was doing in here.
#if not language_model:
# counted = Counter(conc_df['m'])
# indices = [l for l in list(conc_df.index) if counted[conc_df.ix[l]['m']] > 1]
# conc_df = conc_df.ix[indices]
# conc_df = conc_df.reset_index(drop=True)
locs['corpus'] = corpus.name
if maxconc:
conc_df = Concordance(conc_df[:maxconc])
else:
conc_df = Concordance(conc_df)
try:
conc_df.query = locs
except AttributeError:
pass
return conc_df
def lowercase_result(res):
"""
Take any result and do spelling/lowercasing if need be
todo: remove lowercase and change name
"""
if not res or statsmode:
return res
# this is likely broken, but spelling in interrogate is deprecated anyway
if spelling:
res = [correct_spelling(r) for r in res]
return res
def postprocess_concline(line, fsi_index=False, conc=False):
# todo: are these right?
if not conc:
return line
subc, star, en = 0, 2, 5
if fsi_index:
subc, star, en = 2, 4, 7
if not preserve_case:
line[star:en] = [str(x).lower() for x in line[star:en]]
if spelling:
line[star:en] = [correct_spelling(str(b)) for b in line[star:en]]
return line
def make_progress_bar():
"""generate a progress bar"""
if simple_tregex_mode:
total_files = len(list(to_iterate_over.keys()))
else:
total_files = sum(len(x) for x in list(to_iterate_over.values()))
par_args = {'printstatus': kwargs.get('printstatus', True),
'root': root,
'note': note,
'quiet': quiet,
'length': total_files,
'startnum': kwargs.get('startnum'),
'denom': kwargs.get('denominator', 1)}
term = None
if kwargs.get('paralleling', None) is not None:
from blessings import Terminal
term = Terminal()
par_args['terminal'] = term
par_args['linenum'] = kwargs.get('paralleling')
if in_notebook:
par_args['welcome_message'] = welcome_message
outn = kwargs.get('outname', '')
if outn:
outn = getattr(outn, 'name', outn)
outn = outn + ': '
tstr = '%s%d/%d' % (outn, current_iter, total_files)
p = animator(None, None, init=True, tot_string=tstr, **par_args)
tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
animator(p, current_iter, tstr, **par_args)
return p, outn, total_files, par_args
# find out if using gui
root = kwargs.get('root')
note = kwargs.get('note')
language_model = kwargs.get('language_model')
# set up pause method
original_sigint = signal.getsignal(signal.SIGINT)
if kwargs.get('paralleling', None) is None:
if not root:
original_sigint = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, signal_handler)
# find out about concordancing
only_conc = False
no_conc = False
if conc is False:
no_conc = True
if isinstance(conc, str) and conc.lower() == 'only':
only_conc = True
no_conc = False
numconc = 0
# wipe non essential class attributes to not bloat query attrib
if isinstance(corpus, Corpus):
import copy
corpus = copy.copy(corpus)
for k, v in corpus.__dict__.items():
if isinstance(v, (Interrogation, Interrodict)):
corpus.__dict__.pop(k, None)
# convert path to corpus object
if not isinstance(corpus, (Corpus, Corpora, Subcorpus, File, Datalist)):
if not multiprocess and not kwargs.get('outname'):
corpus = Corpus(corpus, print_info=False)
# figure out how the user has entered the query and show, and normalise
from corpkit.process import searchfixer
search = searchfixer(search, query)
show = fix_show(show, gramsize)
# instantiate lemmatiser if need be
lem_instance = False
if any(i.endswith('l') for i in show) and isinstance(search, dict) and search.get('t'):
from nltk.stem.wordnet import WordNetLemmatizer
lem_instance = WordNetLemmatizer()
# do multiprocessing if need be
im, corpus, search, query, = is_multiquery(corpus, search, query,
kwargs.get('outname', False))
# figure out if we can multiprocess the corpus
if hasattr(corpus, '__iter__') and im:
corpus = Corpus(corpus, print_info=False)
if hasattr(corpus, '__iter__') and not im:
im = 'datalist'
if isinstance(corpus, Corpora):
im = 'multiplecorpora'
# split corpus if the user wants multiprocessing but no other iterable
if not im and multiprocess:
im = 'datalist'
if hasattr(corpus, 'subcorpora') and corpus.subcorpora:
corpus = corpus[:]
else:
corpus = corpus.files
search = fix_search(search, case_sensitive=case_sensitive, root=root)
exclude = fix_search(exclude, case_sensitive=case_sensitive, root=root)
# if it's already been through pmultiquery, don't do it again
locs['search'] = search
locs['exclude'] = exclude
locs['query'] = query
locs['corpus'] = corpus
locs['multiprocess'] = multiprocess
locs['print_info'] = kwargs.get('printstatus', True)
locs['multiple'] = im
locs['subcorpora'] = subcorpora
# send to multiprocess function
if im:
signal.signal(signal.SIGINT, original_sigint)
from corpkit.multiprocess import pmultiquery
return pmultiquery(**locs)
# get corpus metadata
cname = corpus.name
if isinstance(save, STRINGTYPE):
savename = corpus.name + '-' + save
if save is True:
raise ValueError('save must be str, not bool.')
datatype = getattr(corpus, 'datatype', 'conll')
singlefile = getattr(corpus, 'singlefile', False)
level = getattr(corpus, 'level', 'c')
# store all results in here
from collections import defaultdict
results = defaultdict(Counter)
count_results = defaultdict(list)
conc_results = defaultdict(list)
# check if just counting, turn off conc if so
countmode = 'c' in show or 'mc' in show
if countmode:
no_conc = True
only_conc = False
# where we are at in interrogation
current_iter = 0
# multiprocessing progress bar
denom = kwargs.get('denominator', 1)
startnum = kwargs.get('startnum', 0)
# Determine the search function to be used #
optiontext, simple_tregex_mode, statsmode, tree_to_text, search_trees = determine_search_func(show)
# no conc for statsmode
if statsmode:
no_conc = True
only_conc = False
conc = False
# Set some Tregex-related values
translated_option = False
if search.get('t'):
query, translated_option = get_tregex_values(show)
if query == 'Bad query' and translated_option is None:
if root:
return 'Bad query'
else:
return
# more tregex options
if tree_to_text:
treg_q = r'ROOT << __'
op = ['-o', '-t', '-w', '-f']
elif simple_tregex_mode:
treg_q = search['t']
op = ['-%s' % i for i in translated_option] + ['-o', '-f']
# make iterable object for corpus interrogation
to_iterate_over = make_search_iterable(corpus)
try:
from ipywidgets import IntProgress
_ = IntProgress(min=0, max=10, value=1)
in_notebook = True
except TraitError:
in_notebook = False
except ImportError:
in_notebook = False
# caused in newest ipython
except AttributeError:
in_notebook = False
lemtag = False
if search.get('t'):
from corpkit.process import gettag
lemtag = gettag(search.get('t'), lemmatag)
usecols = auto_usecols(search, exclude, show, kwargs.pop('usecols', None), coref=coref)
# print welcome message
welcome_message = welcome_printer(return_it=in_notebook)
# create a progress bar
p, outn, total_files, par_args = make_progress_bar()
if conc:
conc_col_names = get_conc_colnames(corpus,
fsi_index=fsi_index,
simple_tregex_mode=False)
# Iterate over data, doing interrogations
for (subcorpus_name, subcorpus_path), files in sorted(to_iterate_over.items()):
if nosubmode:
subcorpus_name = '_nosubmode'
# results for subcorpus go here
#conc_results[subcorpus_name] = []
#count_results[subcorpus_name] = []
#results[subcorpus_name] = Counter()
# get either everything (tree_to_text) or the search['t'] query
if tree_to_text or simple_tregex_mode:
result = tregex_engine(query=treg_q,
options=op,
corpus=subcorpus_path,
root=root,
preserve_case=preserve_case)
# format search results with slashes etc
if not countmode and not tree_to_text:
result = format_tregex(result, show, translated_option=translated_option,
exclude=exclude, excludemode=excludemode, lemtag=lemtag,
lem_instance=lem_instance, countmode=countmode, speaker_data=False)
# if concordancing, do the query again with 'whole' sent and fname
if not no_conc:
ops = ['-w'] + op
#ops = [i for i in ops if i != '-n']
whole_result = tregex_engine(query=search['t'],
options=ops,
corpus=subcorpus_path,
root=root,
preserve_case=preserve_case
)
# format match too depending on option
if not only_format_match:
wholeresult = format_tregex(whole_result, show, translated_option=translated_option,
exclude=exclude, excludemode=excludemode, lemtag=lemtag,
lem_instance=lem_instance, countmode=countmode, speaker_data=False, whole=True)
# make conc lines from conc results
conc_result = make_conc_lines_from_whole_mid(whole_result, result, show=show)
for lin in conc_result:
if maxconc is False or numconc < maxconc:
conc_results[subcorpus_name].append(lin)
numconc += 1
# add matches to ongoing counts
if countmode:
count_results[subcorpus_name] += [result]
else:
if result:
results[subcorpus_name] += Counter([i[-1] for i in result])
else:
results[subcorpus_name] += Counter()
# update progress bar
current_iter += 1
tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
animator(p, current_iter, tstr, **par_args)
continue
# todo: move this
kwargs.pop('by_metadata', None)
# conll querying goes by file, not subcorpus
for f in files:
slow_treg_speaker_guess = kwargs.get('outname', '') if kwargs.get('multispeaker') else ''
filepath, corefs = f.path, coref
res, conc_res = pipeline(filepath, search=search, show=show,
dep_type=dep_type,
exclude=exclude,
excludemode=excludemode,
searchmode=searchmode,
case_sensitive=case_sensitive,
conc=conc,
only_format_match=only_format_match,
speaker=slow_treg_speaker_guess,
gramsize=gramsize,
no_punct=no_punct,
no_closed=no_closed,
window=window,
filename=f.path,
coref=corefs,
countmode=countmode,
maxconc=(maxconc, numconc),
is_a_word=is_a_word,
by_metadata=subcorpora,
show_conc_metadata=show_conc_metadata,
just_metadata=just_metadata,
skip_metadata=skip_metadata,
fsi_index=fsi_index,
category=subcorpus_name,
translated_option=translated_option,
statsmode=statsmode,
preserve_case=preserve_case,
usecols=usecols,
search_trees=search_trees,
lem_instance=lem_instance,
lemtag=lemtag,
**kwargs)
if res is None and conc_res is None:
current_iter += 1
tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
animator(p, current_iter, tstr, **par_args)
continue
# deal with symbolic structures---that is, rather than adding
# results by subcorpora, add them by metadata value
# todo: sorting?
if subcorpora:
for (k, v), concl in zip(res.items(), conc_res.values()):
v = lowercase_result(v)
results[k] += Counter(v)
for line in concl:
if maxconc is False or numconc < maxconc:
line = postprocess_concline(line,
fsi_index=fsi_index, conc=conc)
conc_results[k].append(line)
numconc += 1
current_iter += 1
tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
animator(p, current_iter, tstr, **par_args)
continue
# garbage collection needed?
sents = None
corefs = None
if res == 'Bad query':
return 'Bad query'
if countmode:
count_results[subcorpus_name] += [res]
else:
# add filename and do lowercasing for conc
if not no_conc:
for line in conc_res:
line = postprocess_concline(line,
fsi_index=fsi_index, conc=conc)
if maxconc is False or numconc < maxconc:
conc_results[subcorpus_name].append(line)
numconc += 1
# do lowercasing and spelling
if not only_conc:
res = lowercase_result(res)
# discard removes low results, helping with
# curse of dimensionality
countres = Counter(res)
if isinstance(discard, float):
countres.most_common()
nkeep = len(counter) - len(counter) * discard
countres = Counter({k: v for i, (k, v) in enumerate(countres.most_common()) if i <= nkeep})
elif isinstance(discard, int):
countres = Counter({k: v for k, v in countres.most_common() if v >= discard})
results[subcorpus_name] += countres
#else:
#results[subcorpus_name] += res
# update progress bar
current_iter += 1
tstr = '%s%d/%d' % (outn, current_iter + 1, total_files)
animator(p, current_iter, tstr, **par_args)
# Get concordances into DataFrame, return if just conc
if not no_conc:
# fail on this line with typeerror if no results?
conc_df = make_conc_obj_from_conclines(conc_results, fsi_index=fsi_index)
if only_conc and conc_df is None:
return
elif only_conc:
locs = sanitise_dict(locs)
try:
conc_df.query = locs
except AttributeError:
return conc_df
if save and not kwargs.get('outname'):
print('\n')
conc_df.save(savename)
goodbye_printer(only_conc=True)
if not root:
signal.signal(signal.SIGINT, original_sigint)
return conc_df
else:
conc_df = None
# Get interrogation into DataFrame
if countmode:
df = Series({k: sum(v) for k, v in sorted(count_results.items())})
tot = df.sum()
else:
the_big_dict = {}
unique_results = set(item for sublist in list(results.values()) for item in sublist)
sortres = sorted(results.items(), key=lambda x: x[0])
for word in unique_results:
the_big_dict[word] = [subcorp_result[word] for _, subcorp_result in sortres]
# turn master dict into dataframe, sorted
df = DataFrame(the_big_dict, index=sorted(results.keys()))
# for ngrams, remove hapaxes
#if show_ngram or show_collocates:
# if not language_model:
# df = df[[i for i in list(df.columns) if df[i].sum() > 1]]
numentries = len(df.columns)
tot = df.sum(axis=1)
total_total = df.sum().sum()
# turn df into series if all conditions met
conds = [countmode,
files_as_subcorpora,
subcorpora,
kwargs.get('df1_always_df')]
anyxs = [level == 's',
singlefile,
nosubmode]
if all(not x for x in conds) and any(x for x in anyxs):
df = Series(df.ix[0])
df.sort_values(ascending=False, inplace=True)
tot = df.sum()
numentries = len(df.index)
total_total = tot
# turn data into DF for GUI if need be
if isinstance(df, Series) and kwargs.get('df1_always_df'):
total_total = df.sum()
df = DataFrame(df)
tot = Series(total_total, index=['Total'])
# if we're doing files as subcorpora, we can remove the extension etc
if isinstance(df, DataFrame) and files_as_subcorpora:
cname = corpus.name.replace('-stripped', '').replace('-parsed', '')
edits = [(r'(-[0-9][0-9][0-9])?\.txt\.conll', ''),
(r'-%s(-stripped)?(-parsed)?' % cname, '')]
from corpkit.editor import editor
df = editor(df, replace_subcorpus_names=edits).results
tot = df.sum(axis=1)
total_total = df.sum().sum()
if conc_df is not None and conc_df is not False:
# removed 'f' from here for now
for col in ['c']:
for pat in ['.txt', '.conll']:
conc_df[col] = conc_df[col].str.replace(pat, '')
conc_df[col] = conc_df[col].str.replace(r'-[0-9][0-9][0-9]$', '')
#df.index = df.index.str.replace('w', 'this')
# make interrogation object
locs['corpus'] = corpus.path
locs = sanitise_dict(locs)
interro = Interrogation(results=df, totals=tot, query=locs, concordance=conc_df)
# save it
if save and not kwargs.get('outname'):
print('\n')
interro.save(savename)
goodbye = goodbye_printer(return_it=in_notebook)
if in_notebook:
try:
p.children[2].value = goodbye.replace('\n', '')
except AttributeError:
pass
if not root:
signal.signal(signal.SIGINT, original_sigint)
return interro
2
Example 25
def gen(sc, asset, expire):
'''
Database population function.
What we are doing here is trying to interpret the output of plugin ID 20811
and use that information to help populate the database with individualized
entries of the software that is installed on the host. This information will
later be used to build the report.
'''
# The following regex patters are used to pull out the needed fields from
# Plugin ID 20811
redate = re.compile(r'\[installed on (\d{4})/(\d{1,2})/(\d{1,2})\]')
reinvdate = re.compile(r'\[installed on (\d{1,2})/(\d{1,2})/(\d{4})\]')
rever = re.compile(r'\[version (.*?)\]')
resw = re.compile(r'^([\w\s\.\(\-\)\+]*)')
s = Session()
ts = datetime.datetime.now()
for vuln in sc.analysis(('pluginID','=','20811,22869'),
('asset', '=', {'id': str(asset)}),
tool='vulndetails'):
# First we need to get the host information...
nh = False
host = s.query(Host).filter_by(ip=vuln['ip']).first()
if not host:
host = Host()
nh = True
hdata = sc.analysis(('ip', '=', vuln['ip']),tool='sumip')[0]
host.ip = vuln['ip']
host.name = vuln['netbiosName']
host.cpe = hdata['osCPE']
host.dns = hdata['dnsName']
host.asset_id = asset
if nh:
s.add(host)
else:
s.merge(host)
s.commit()
sys.stdout.write('%4d\t%-16s\t%-40s' % (host.id, host.ip, host.dns))
sys.stdout.flush()
if vuln['pluginID'] == '22869':
if 'CentOS Linux system' in vuln['pluginText'] or 'Red Hat Linux system' in vuln['pluginText']:
software = re.findall(' ([a-zA-Z0-9\.\-]*)\|',vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
elif 'SunOS 5.10' in vuln['pluginText']:
software = re.findall('Patch: ([^ ]*)', vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
elif 'Solaris 11 system' in vuln['pluginText']:
software = re.findall('([\w\/]+)\W+([0-9\.\-]+).*\n',vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item[0]
entry.version = item[1]
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
elif 'Mac OS X system' in vuln['pluginText']:
software = re.findall(' ([a-zA-Z0-9\.\-\_]*\.pkg)\n',vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
else:
sys.stdout.write('\t[NO FORMATTER]')
sys.stdout.flush()
if vuln['pluginID'] == '20811':
software = False
patches = False
sw = None
nh = False
s.commit()
for line in vuln['pluginText'].split('\n'):
if '</plugin_output>' in line:
continue
if line == u'The following software are installed on the remote host :':
software = True
patches = False
continue
if line == u'The following updates are installed :':
patches = True
continue
if software and line != '':
names = resw.findall(line)
vers = rever.findall(line)
dates = redate.findall(line)
new = Entry()
if len(names) > 0: new.name = names[0].strip()
if len(vers) > 0: new.version = vers[0]
try:
if len(dates) > 0:
date = datetime.date(year=int(dates[0][0]),
month=int(dates[0][1]),
day=int(dates[0][2]))
new.date = date
else:
dates = reinvdate.findall(line)
if len(dates) > 0:
date = datetime.date(year=int(dates[0][2]),
month=int(dates[0][0]),
day=int(dates[0][1]))
new.date = date
except:
pass
if patches:
if line[:2] != ' ':
sw = line.strip(':').strip()
continue
else:
new.name = '%s (%s)' % (new.name, sw)
new.timestamp = ts
new.host_id = host.id
s.add(new)
s.commit()
sys.stdout.write('\tdone\n')
sys.stdout.flush()
s.commit()
# Now to expire the old data out...
exp = datetime.datetime.now() - datetime.timedelta(days=expire)
print exp
# First to delete the aged out entries
for entry in s.query(Entry).filter(Entry.timestamp < exp).all():
s.delete(entry)
s.commit()
# Next to delete any hosts that we arent pulling info for anymore...
for host in s.query(Host).all():
if len(host.entries) == 0:
s.delete(host)
s.commit()
s.close()
2
Example 26
Project: Flowgen Source File: makeflows.py
def process_find_functions(node, MAX_diagram_zoomlevel):
# \s --> [ \t\r\f\v] : avoids newlines \n
# (?! ): negative lookahead
# ()?: optional group
regextextActionComment = r'^\s*//\$(?!\s+\[)(\s+(?P<tag><\w+>))?\s+(?P<action>.+)$'
regextextActionComment1 = r'^\s*//\$1(?!\s+\[)\s+(?P<action>.+)$'
regextextAnyActionComment1 = r'^\s*//\$1?(?!\s+\[)\s+(?P<action>.+)$'
regextextAnyActionComment = r'^\s*//\$(?P<zoomlevel>[0-9])?(?!\s+\[)\s+(?P<action>.+)$'
regexActionComment = re.compile(regextextActionComment)
regexActionComment1 = re.compile(regextextActionComment1)
regexAnyActionCommentZoomArray = [regexActionComment, re.compile(regextextAnyActionComment1)]
#anycomment_previousline = regexAnyActionCommentZoomArray[zoom].match(enum_file[i-1-1][1])
def regexActionComment(zoom):
if zoom == 0:
zoom = ''
regextextActionComment_zoom = r'^\s*//\$' + str(zoom) + r'(?!\s+\[)\s+(?P<action>.+)$'
return re.compile(regextextActionComment_zoom)
regexContextualComment = re.compile(r'^\s*//\$\s+\[(?P<condition>.+)\]\s*$')
regexHighlightComment = re.compile(r'^\s*(?P<commandline>.+?)\s+//\$\s*(?:$|//.+$)')
#regexIf = re.compile(r'^\s*if\s*\((?P<condition>.*)\)\s*{?\s*(?:$|//.*$)')
#regexElseIf = re.compile(r'^\s*}?\s*else if\s*\((?P<condition>.*)\)\s*{\s*(?:$|//.*$)')
#this only works in a one line
#regexIf1line = re.compile(r'^\s*if\s*\((?P<condition>.*)\)\s*{\s*(?:$|//.*$)')
start_line = node.extent.start.line
end_line = node.extent.end.line
infile_clang = node.location.file
global infile_str
infile_str = node.location.file.name.decode("utf-8")
infile = open(infile_str, 'r')
#lines enumerated starting from 1
enum_file = list(enumerate(infile, start=1))
infile.close()
##look for comment inside function/method
#comment_inside_method = False
#if lookfor_lowestZoomactionAnnotation_inNode(node,0):
# comment_inside_method = True
##if ActionComment inside function/method:
#if comment_inside_method == True :
print('Processing %s of kind %s [start_line=%s, end_line=%s. At "%s"]' % (
node.spelling.decode("utf-8"), node.kind.name, node.extent.start.line, node.extent.end.line,
node.location.file))
# TO DO: zoom loop generates all possible zoom levels. Instead, only relevant zoom for each diagram should be generated.
zoom_str_Array = ['', '1', '2']
for diagram_zoomlevel in range(0, MAX_diagram_zoomlevel + 1):
class_name = ''
if node.kind.name == 'CXX_METHOD':
class_name = str(node.semantic_parent.spelling.decode("utf8")) + '_'
#also see node.lexical_parent.spelling
outfile_str = str(node.get_usr().decode("utf8")) + zoom_str_Array[diagram_zoomlevel]
#remove special characters from outfile_str
outfile_str = ''.join(e for e in outfile_str if e.isalnum())
#outfile= open(outfile_str+'.txt', "w+")
# find if statements inside the function
ifbeginlineArray, ifendlineArray, ifnodeArray = find_ifstmt(node)
# print (ifbeginlineArray, ifendlineArray, ifnodeArray)
# find loop statements inside the function
loopbeginlineArray, loopendlineArray, loopnodeArray, looptypeArray = find_loopstmt(node)
# print (loopbeginlineArray, loopendlineArray, loopnodeArray)
#variables for conditional statements ('Nested' means nested inside another conditional statement)
elseifbeginlineArray = []
elsebeginline = None
ifstructurenodeArray = []
ifbeginlineNestedArray = []
ifendlineNestedArray = []
ifnodeNestedArray = []
ifstructureelseifnodeArray = []
elseifbeginlineNestedArray = []
elsebeginlineNested = None
ifstructurenodeNestedArray = []
ifstructureelseifnodeNestedArray = []
endifWrite = False
endifNestedWrite = False
elseifNum = 0
elseifNumNested = 0
IdxIfbeginlineArray = None
IdxIfbeginlineArrayNested = None
#write_zoomlevel_beforeifstmt=None
ifstmt_write_zoomlevel = None
ifstmtNested_write_zoomlevel = None
#variables for loop statements
endloopWrite = False
IdxLoopbeginlineArray = None
loopstmt_write_zoomlevel = None
loopdescription_flag=False
#find return statements inside the function
returnlineArray, returnTypeArray = find_returnstmt(node, diagram_zoomlevel)
#other variables
#TO DO: use depthlevel
depthlevel = 0
#flagparallelactions=(flag TRUE/FALSE,depthlevel)
#TO DO: change array for another more transparent structure, like an object with attributes
flagparallelactions = [False, 0]
lastcommentlinematched = [0, 0, 0]
tab = ' '
indentation_level = 0
last_comment_str = ["", "", ""]
string_notes = ["", "", ""]
string = ''
string_tmp = ["", "", ""]
inside_comment_flag = [False, False, False]
actioncallsdefArray = []
write_zoomlevel = None
def increase_depthlevel():
nonlocal depthlevel
depthlevel += 1
write_strings(write_zoomlevel)
return
def decrease_depthlevel():
nonlocal flagparallelactions, depthlevel, string, indentation_level
depthlevel -= 1
write_strings(write_zoomlevel)
##if activated parallelflag
#if flagparallelactions[0]==True and depthlevel==flagparallelactions[1]:
# string+= indentation_level*tab+'end fork\n'
# flagparallelactions[0]=False
# flagparallelactions[1]=None
return
def add_note(stringIN):
nonlocal string_notes
string_notes[write_zoomlevel] += stringIN + '\n'
return
#taken from http://stackoverflow.com/questions/2657693/insert-a-newline-character-every-64-characters-using-python
#def insert_newlines(string, every=75):
# lines = []
# for i in range(0, len(string), every):
# lines.append(string[i:i+every])
# return '\n'.join(lines)
def color(zoomlevel_IN):
if zoomlevel_IN == 0:
return '#84add6'
elif zoomlevel_IN == 1:
return '#b2cce5'
elif zoomlevel_IN == 2:
return '#e0eaf4'
def write_strings(write_zoomlevelMIN):
nonlocal string, string_tmp, diagram_zoomlevel
write_zoomlevelMAX = -100 #initialize variable to absurd value
#write_zoomlevelMIN: the MIN zoomlevel annotations that will be written. Specified as an entry to the function.
#write_zoomlevelMAX: the MAX zoomlevel annotations that will be written. Found out inside this function.
#diagram_zoomlevel: the diagram zoomlevel. write_zoomlevelMAX is lower or equal.
def write_string_container(write_zoomlevelIN):
nonlocal string_tmp, last_comment_str, inside_comment_flag
string_tmp[write_zoomlevelIN] += indentation_level * tab + 'partition ' + color(
write_zoomlevelIN) + ' "' + last_comment_str[write_zoomlevelIN] + '" {\n' + string_tmp[
write_zoomlevelIN + 1] + indentation_level * tab + '}\n'
last_comment_str[write_zoomlevelIN] = ""
inside_comment_flag[write_zoomlevelIN] = False
string_tmp[write_zoomlevelIN + 1] = ""
return
def write_string_normal(write_zoomlevelIN):
nonlocal string_notes
nonlocal string_tmp
nonlocal last_comment_str
nonlocal inside_comment_flag
nonlocal actioncallsdefArray
if inside_comment_flag[write_zoomlevelIN]:
#write action comment
last_comment_str[write_zoomlevelIN] = indentation_level * tab + ':' + color(
write_zoomlevelIN) + ':' + last_comment_str[write_zoomlevelIN] + ';\n'
#write extra if there are calls
if actioncallsdefArray:
last_comment_str[write_zoomlevelIN] = last_comment_str[write_zoomlevelIN][:-2] + "\n----"
for it7 in actioncallsdefArray:
usr_id_str = str(it7.get_usr().decode("utf-8"))
usr_id_str = ''.join(e for e in usr_id_str if e.isalnum())
classname = ''
if it7.kind.name == 'CXX_METHOD':
classname = str(it7.semantic_parent.spelling.decode("utf-8")) + '::'
if read_flowdbs(it7.get_usr().decode("utf8")):
call_in_filename_str = read_flowdbs.file + '.html'
last_comment_str[write_zoomlevelIN] += '\n' + str(
it7.result_type.kind.name) + ' ' + classname + str(it7.displayname.decode(
"utf-8")) + ' -- [[' + call_in_filename_str + '#' + usr_id_str + ' link]]'
else:
last_comment_str[write_zoomlevelIN] += '\n' + str(
it7.result_type.kind.name) + ' ' + classname + str(it7.displayname.decode("utf-8"))
#last_comment_str+=str(it7.result_type.kind.name)+' '+str()+str(it7.displayname.decode("utf-8"))+' -- [[http://www.google.es]]'+'\\n'
last_comment_str[write_zoomlevelIN] += ';\n'
#write extra if there are notes
if string_notes[write_zoomlevelIN] != "":
last_comment_str[write_zoomlevelIN] += "note right\n" + string_notes[
write_zoomlevelIN] + "end note\n"
string_notes[write_zoomlevelIN] = ""
#write in temporal string
string_tmp[write_zoomlevelIN] += last_comment_str[write_zoomlevelIN]
last_comment_str[write_zoomlevelIN] = ''
#reinitialize flags
inside_comment_flag[write_zoomlevelIN] = False
actioncallsdefArray = []
return
#reverse loop to find write_zoomlevelMAX and call write_string_normal(write_zoomlevelMAX) if necessary
for zoom_it in range(diagram_zoomlevel, write_zoomlevelMIN - 1, -1):
#annotation exists at this level and is not written in temporal string yet
if inside_comment_flag[zoom_it]:
write_zoomlevelMAX = zoom_it
write_string_normal(write_zoomlevelMAX)
break
#the temporal string exists at this level
elif string_tmp[zoom_it] != "":
write_zoomlevelMAX = zoom_it
break
#reverse loop from ( write_zoomlevelMAX - 1 ) to write_zoomlevelMIN, where write_string_container() is called
for zoom_it2 in range(write_zoomlevelMAX - 1, write_zoomlevelMIN - 1, -1):
write_string_container(zoom_it2)
#if zoomlevelMIN=0 write temporal string to main string
if write_zoomlevelMIN == 0:
string += string_tmp[0]
string_tmp[0] = ''
return
##write last action annotations for current zoom level and all possible higher ones in their corresponding temporal string
#for zoom_it in range(write_zoomlevelMIN, diagram_zoomlevel+1):
# write_string_normal(zoom_it)
##write temporal strings of higher level zooms in the current zoomlevel temporal string
#for zoom_it2 in range(write_zoomlevelMIN+1,diagram_zoomlevel+1):
# string_tmp[write_zoomlevelMIN]+=string_tmp[zoom_it2]
# string_tmp[zoom_it2]=''
# Functions for the if statements.
# TO DO: reuse parent-if-statement functions as nested-if-statement functions
def ifbeginlineArray_method():
nonlocal elseifbeginlineArray, elsebeginline, ifstructurenodeArray, ifstructureelseifnodeArray
nonlocal ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
nonlocal string_tmp, indentation_level, depthlevel
nonlocal endifWrite, IdxIfbeginlineArray, write_zoomlevel, ifstmt_write_zoomlevel
# look for comment inside if statement
IdxIfbeginlineArray = ifbeginlineArray.index(i)
node = ifnodeArray[IdxIfbeginlineArray]
#if comment inside if statement:
if lookfor_lowestZoomactionAnnotation_inNode(node, diagram_zoomlevel):
#adjust zoomlevel
ifstmt_write_zoomlevel = lookfor_lowestZoomactionAnnotation_inNode.write_zoomlevel
#write_zoomlevel_beforeifstmt=write_zoomlevel
write_zoomlevel = ifstmt_write_zoomlevel
#increase depthlevel
increase_depthlevel()
#write 'if' in string
description = regexContextualComment.match(enum_file[i - 1 - 1][1])
if description:
string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + description.group(
'condition') + ') then(yes)''\n'
else:
string_condition = ' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
string_condition = string_condition[:-1]
string_tmp[
write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + string_condition + ' ?) then(yes)''\n'
#mark } endif to be written in string
endifWrite = True
indentation_level += 1
#explore substructure: then / else if/ else: elseifbeginlineArray, elsebeginline, ifstructurenodeArray, ifstructureelseifnodeArray
elseifbeginlineArray, elsebeginline, ifstructurenodeArray, ifstructureelseifnodeArray = find_elsestmt(
ifnodeArray[IdxIfbeginlineArray])
#explore then and update ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray = find_ifstmt(ifstructurenodeArray[0])
return
def elseifbeginlineArray_method():
nonlocal ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
nonlocal elseifNum, string_tmp, indentation_level, write_zoomlevel
write_zoomlevel = ifstmt_write_zoomlevel
decrease_depthlevel()
increase_depthlevel()
elseifNum += 1
node = ifstructureelseifnodeArray[elseifNum - 1]
#write 'else if' in string
description = regexContextualComment.match(enum_file[i - 1 - 1][1])
if description:
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'elseif (' + description.group(
'condition') + ') then (yes)' + '\n'
else:
string_condition = ' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
string_condition = string_condition[:-1]
string_tmp[write_zoomlevel] += (
indentation_level - 1) * tab + 'elseif (' + string_condition + ' ?) then (yes)' + '\n'
#explore elseif and update ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray = find_ifstmt(
ifstructurenodeArray[elseifNum])
return
def elsebeginline_method():
nonlocal ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
nonlocal string_tmp, indentation_level, write_zoomlevel
write_zoomlevel = ifstmt_write_zoomlevel
decrease_depthlevel()
increase_depthlevel()
#write 'else' in string
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
#explore else and update ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray
ifbeginlineNestedArray, ifendlineNestedArray, ifnodeNestedArray = find_ifstmt(ifstructurenodeArray[-1])
return
def ifendlineArray_method():
nonlocal string_tmp, indentation_level, depthlevel
nonlocal endifWrite, elsebeginline, elseifNum, ifstmt_write_zoomlevel, write_zoomlevel
write_zoomlevel = ifstmt_write_zoomlevel
decrease_depthlevel()
#is the else condition explicitly written? Otherwise write now
if elsebeginline == None:
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
#write endif's in string
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endif' + '\n' + '\n'
indentation_level -= 1
#reset all variables
depthlevel -= 1
endifWrite = False
elseifNum = 0
del elseifbeginlineArray[:]
elsebeginline = None
ifstmt_write_zoomlevel = None
#write_zoomlevel=write_zoomlevel_beforeifstmt
#write_zoomlevel_before_ifstmt=None
return
##
def ifbeginlineNestedArray_method():
nonlocal IdxIfbeginlineArrayNested, string_tmp, indentation_level, depthlevel, endifNestedWrite
nonlocal elseifbeginlineNestedArray, elsebeginlineNested, ifstructurenodeNestedArray, ifstructureelseifnodeNestedArray, ifstmtNested_write_zoomlevel, write_zoomlevel
#look for comment inside Nested if statement
IdxIfbeginlineArrayNested = ifbeginlineNestedArray.index(i)
node = ifnodeArray[IdxIfbeginlineArrayNested]
#if comment inside if statement:
if lookfor_lowestZoomactionAnnotation_inNode(node, diagram_zoomlevel):
#adjust zoomlevel
ifstmtNested_write_zoomlevel = lookfor_lowestZoomactionAnnotation_inNode.write_zoomlevel
write_zoomlevel = ifstmtNested_write_zoomlevel
#increase depthlevel
increase_depthlevel()
#write 'if' in string
description = regexContextualComment.match(enum_file[i - 1 - 1][1])
if description:
string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + description.group(
'condition') + ') then(yes)''\n'
else:
string_condition = ' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
string_condition = string_condition[:-1]
string_tmp[
write_zoomlevel] += '\n' + indentation_level * tab + 'if (' + string_condition + ' ?) then(yes)''\n'
#mark } Nested endif to be written in string
endifNestedWrite = True
indentation_level += 1
#explore substructure: then / else if/ else: elseifbeginlineNestedArray, elsebeginlineNested, ifstructurenodeNestedArray
elseifbeginlineNestedArray, elsebeginlineNested, ifstructurenodeNestedArray, ifstructureelseifnodeNestedArray = find_elsestmt(
ifnodeNestedArray[IdxIfbeginlineArrayNested])
return
def elseifbeginlineNestedArray_method():
nonlocal string, indentation_level, elseifNumNested, write_zoomlevel
elseifNumNested += 1
node = ifstructureelseifnodeNestedArray[elseifNumNested - 1]
write_zoomlevel = ifstmtNested_write_zoomlevel
decrease_depthlevel()
increase_depthlevel()
#write 'else if' in string
description = regexContextualComment.match(enum_file[i - 1 - 1][1])
if description:
string_tmp[write_zoomlevel] += (
indentation_level - 1) * tab + 'else(no)' + '\n' + indentation_level * tab + 'if (' + description.group(
'condition') + ') then (yes)' + '\n'
else:
string_condition = ' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())
string_condition = string_condition[:-1]
string_tmp[write_zoomlevel] += (
indentation_level - 1) * tab + 'else(no)' + '\n' + indentation_level * tab + 'if (' + string_condition + ' ?) then (yes)' + '\n'
indentation_level += 1
return
def elsebeginlineNested_method():
nonlocal string_tmp, indentation_level, write_zoomlevel
write_zoomlevel = ifstmtNested_write_zoomlevel
decrease_depthlevel()
increase_depthlevel()
#write 'else' in string
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
return
def ifendlineNestedArray_method():
nonlocal string_tmp, indentation_level, depthlevel
nonlocal endifNestedWrite, elsebeginlineNested, elseifNumNested, ifstmtNested_write_zoomlevel, write_zoomlevel
write_zoomlevel = ifstmtNested_write_zoomlevel
decrease_depthlevel()
#is the else condition explicitly written? Otherwise write now
if elsebeginlineNested == None:
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'else(no)' + '\n'
#write endif's in string
for n in range(elseifNumNested):
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endif' + '\n'
indentation_level -= 1
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endif' + '\n' + '\n'
indentation_level -= 1
#reset all variables
depthlevel -= 1
endifNestedWrite = False
elseifNumNested = 0
del elseifbeginlineNestedArray[:]
elsebeginlineNested = None
ifstmtNested_write_zoomlevel = None
return
# Functions for the loop statements.
def loopbeginlineArray_method():
nonlocal string_tmp, indentation_level, depthlevel
nonlocal endloopWrite, IdxLoopbeginlineArray, write_zoomlevel, loopstmt_write_zoomlevel, loopdescription_flag
IdxLoopbeginlineArray = loopbeginlineArray.index(i)
node = loopnodeArray[IdxLoopbeginlineArray]
# if comment inside loop statement with the adequate zoom level:
if lookfor_lowestZoomactionAnnotation_inNode(node, diagram_zoomlevel):
# adjust zoomlevels and depthlevel
loopstmt_write_zoomlevel = lookfor_lowestZoomactionAnnotation_inNode.write_zoomlevel
write_zoomlevel = loopstmt_write_zoomlevel
increase_depthlevel()
# write 'loop' in string
description = regexContextualComment.match(enum_file[i - 1 - 1][1])
if description:
string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'while (' + description.group(
'condition') + ')''\n'
loopdescription_flag=True
else:
# depends on the loop type
# 207: A while statement.
if looptypeArray[IdxLoopbeginlineArray] == 207:
string_condition = ' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[0].get_tokens())[:-1]
string_tmp[
write_zoomlevel] += '\n' + indentation_level * tab + 'while (' + string_condition + '? )''\n'
# 208: A do statement.
elif looptypeArray[IdxLoopbeginlineArray] == 208:
string_tmp[write_zoomlevel] += '\n' + indentation_level * tab + 'repeat''\n'
# 209: A for statement.
elif looptypeArray[IdxLoopbeginlineArray] == 209:
#the '0','1','2' children of the node contain the spellings of the three elements of the FOR loop.
#We have to call the command get_tokens, which produces an iterator over all tokens and then join them into the same string.
#However, for the '0' and '2' children, we don't want the last token. We have first to convert the iterator into a list and then use [:-1]
string_condition = 'FOR ('+' '.join(
t.spelling.decode("utf-8") for t in list(list(node.get_children())[0].get_tokens())[:-1])+' '+' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[1].get_tokens())+' '+' '.join(
t.spelling.decode("utf-8") for t in list(list(node.get_children())[2].get_tokens())[:-1])+' )'
string_tmp[
write_zoomlevel] += '\n' + indentation_level * tab + 'while (' + string_condition + ')''\n'
# mark } endloop to be written in string
endloopWrite = True
indentation_level += 1
return
def loopendlineArray_method():
nonlocal string_tmp, indentation_level, depthlevel, IdxLoopbeginlineArray
nonlocal endloopWrite, loopstmt_write_zoomlevel, write_zoomlevel, loopdescription_flag
write_zoomlevel = loopstmt_write_zoomlevel
decrease_depthlevel()
#write 'loop end' in string; it depends on the loop type
# 207: A while statement.
if loopdescription_flag:
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endwhile' + '\n' + '\n'
else:
if looptypeArray[IdxLoopbeginlineArray]==207:
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endwhile' + '\n' + '\n'
# 208: A do statement.
elif looptypeArray[IdxLoopbeginlineArray]==208:
pass
node = loopnodeArray[IdxLoopbeginlineArray]
string_condition = ' '.join(
t.spelling.decode("utf-8") for t in list(node.get_children())[1].get_tokens())[:-1]
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'repeat while ('+ string_condition+ '? )''\n' + '\n'
# 209: A for statement.
elif looptypeArray[IdxLoopbeginlineArray]==209:
string_tmp[write_zoomlevel] += (indentation_level - 1) * tab + 'endwhile' + '\n' + '\n'
indentation_level -= 1
#reset all variables
depthlevel -= 1
endloopWrite = False
loopstmt_write_zoomlevel = None
loopdescription_flag=False
return
string += '@startuml\n\nstart\n skinparam activityBackgroundColor #white \n'
#main loop over source code lines
#TO DO: optimization
for i, line in enum_file:
if i in range(start_line, end_line):
#look for an annotated action and set zoomlevel if found
for zoom_it2 in range(0, diagram_zoomlevel + 1):
anyactionannotation = regexActionComment(zoom_it2).match(line)
if anyactionannotation:
write_zoomlevel = zoom_it2
break
#look for highlight annotation
comment_highlight = regexHighlightComment.match(line)
#actions
if anyactionannotation:
#this line continues a previous multi-line action annotation
if lastcommentlinematched[write_zoomlevel] == i - 1:
last_comment_str[write_zoomlevel] += '\\n' + anyactionannotation.group('action')
#first line of action annotation
else:
write_strings(write_zoomlevel)
#new comment at the given zoom level
inside_comment_flag[write_zoomlevel] = True
##if <parallel>
##TO DO combine parallel and if statements. paralell inside parallel
#if comment.group('tag'):
# if comment.group('tag')=="<parallel>":
# #if begin of parallel actions:
# if flagparallelactions[0]==False:
# string+= indentation_level*tab+'fork\n'
# flagparallelactions[0]=True
# flagparallelactions[1]=depthlevel
# #else
# else:
# if depthlevel==flagparallelactions[1]:
# string+= indentation_level*tab+'fork again\n'
##if not <parallel> but activated parallelflag
#else:
# if flagparallelactions[0]==True and depthlevel==flagparallelactions[1]:
# string+= indentation_level*tab+'end fork\n'
# flagparallelactions[0]=False
# flagparallelactions[1]=None
#add line to current action annotation
last_comment_str[write_zoomlevel] += anyactionannotation.group('action')
lastcommentlinematched[write_zoomlevel] = i
else:
# calls,...
if comment_highlight:
scan_column_start = 1 + comment_highlight.start('commandline')
#the end character is -1. There is an offset of +1 with respect to the file
scan_column_end = 1 + comment_highlight.end('commandline') - 1
scan_file = infile_clang
scan_line = i
print('LOOKING FOR CALLS AT: ', scan_file, scan_line, scan_column_start, scan_column_end)
singlelinecallsdefArray = find_calls(scan_file, scan_line, scan_column_start, scan_column_end)
#for it4 in singlelinecallsdefArray:
#print ('singlelinecallsdefArray',it4.displayname.decode("utf-8"))
for it5 in singlelinecallsdefArray:
if it5 not in actioncallsdefArray:
actioncallsdefArray.append(it5)
#### ...,OR if statements,...
elif i in ifbeginlineArray:
ifbeginlineArray_method()
#if i in elseifbeginlineArray
elif i in elseifbeginlineArray:
elseifbeginlineArray_method()
#if i in elsebeginline
elif i == elsebeginline:
elsebeginline_method()
#if i is ifendlineArray[IdxIfbeginlineArray] and } is marked to be written in string:
elif endifWrite and (i == ifendlineArray[IdxIfbeginlineArray]):
ifendlineArray_method()
#### Nested if statements
elif i in ifbeginlineNestedArray:
ifbeginlineNestedArray_method()
#if i in elseifbeginlineNestedArray
elif i in elseifbeginlineNestedArray:
elseifbeginlineNestedArray_method()
#if i in elsebeginlineNested
elif i == elsebeginlineNested:
elsebeginlineNested_method()
#if i is ifendlineNestedArray[IdxIfbeginlineArrayNested] and } is marked to be written in string:
elif endifNestedWrite and (i == ifendlineNestedArray[IdxIfbeginlineArrayNested]):
ifendlineNestedArray_method()
#### ...,OR loops,...
elif i in loopbeginlineArray:
loopbeginlineArray_method()
# if i is loopendlineArray[IdxLoopbeginlineArray] and } is marked to be written in string:
elif endloopWrite and (i == loopendlineArray[IdxLoopbeginlineArray]):
loopendlineArray_method()
# ...,OR return statements):
elif i in returnlineArray:
#print('RETURN:',i,line)
if returnTypeArray[returnlineArray.index(i)] == True:
#if pending flags, finish them
#write_zoomlevel=0
#print('write_zoomlevel',write_zoomlevel)
#decrease_depthlevel()
#print('write_zoomlevel2',write_zoomlevel)
write_strings(write_zoomlevel)
string_tmp[write_zoomlevel] += "\nstop\n"
if returnTypeArray[returnlineArray.index(i)] == False:
#print('possible stop', i, line)
add_note("possible STOP")
write_strings(0)
string += '\n@enduml'
#print (string)
write_htmlonline(string, outfile_str)
write_txt(string, outfile_str)
return
2
Example 27
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
if self.is_adult or self.getConfig("is_adult"):
self.set_adult_cookie()
##---------------------------------------------------------------------------------------------------
## Get the story's title page. Check if it exists.
try:
# don't use cache if manual is_adult--should only happen
# if it's an adult story and they don't have is_adult in ini.
data = self.do_fix_blockquotes(self._fetchUrl(self.url,
usecache=(not self.is_adult)))
soup = self.make_soup(data)
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data:
raise exceptions.StoryDoesNotExist(self.url)
if "This story has been marked as having adult content. Please click below to confirm you are of legal age to view adult material in your country." in data:
raise exceptions.AdultCheckRequired(self.url)
if self.password:
params = {}
params['password'] = self.password
data = self._postUrl(self.url, params)
soup = self.make_soup(data)
if not (soup.find('form', {'id' : 'password_form'}) == None):
if self.getConfig('fail_on_password'):
raise exceptions.FailedToDownload("%s requires story password and fail_on_password is true."%self.url)
else:
raise exceptions.FailedToLogin(self.url,"Story requires individual password",passwdonly=True)
##----------------------------------------------------------------------------------------------------
## Extract metadata
storyContentBox = soup.find('div', {'class':'story_content_box'})
# Title
title = storyContentBox.find('a', {'class':re.compile(r'.*\bstory_name\b.*')})
self.story.setMetadata('title',stripHTML(title))
# Author
author = storyContentBox.find('div', {'class':'author'}).find('a')
self.story.setMetadata("author", stripHTML(author))
#No longer seems to be a way to access Fimfiction's internal author ID
self.story.setMetadata("authorId", self.story.getMetadata("author"))
self.story.setMetadata("authorUrl", "http://%s/user/%s" % (self.getSiteDomain(), stripHTML(author)))
#Rating text is replaced with full words for historical compatibility after the site changed
#on 2014-10-27
rating = stripHTML(storyContentBox.find('a', {'class':re.compile(r'.*\bcontent-rating-.*')}))
rating = rating.replace("E", "Everyone").replace("T", "Teen").replace("M", "Mature")
self.story.setMetadata("rating", rating)
# Chapters
for chapter in storyContentBox.find_all('a',{'class':'chapter_link'}):
self.chapterUrls.append((stripHTML(chapter), 'http://'+self.host+chapter['href']))
self.story.setMetadata('numChapters',len(self.chapterUrls))
# Status
# In the case of Fimfiction, possible statuses are 'Completed', 'Incomplete', 'On Hiatus' and 'Cancelled'
# For the sake of bringing it in line with the other adapters, 'Incomplete' becomes 'In-Progress'
# and 'Complete' becomes 'Completed'. 'Cancelled' and 'On Hiatus' are passed through, it's easy now for users
# to change/remove if they want with replace_metadata
status = stripHTML(storyContentBox.find('span', {'class':re.compile(r'.*\bcompleted-status-.*')}))
status = status.replace("Incomplete", "In-Progress").replace("Complete", "Completed")
self.story.setMetadata("status", status)
# Genres and Warnings
# warnings were folded into general categories in the 2014-10-27 site update
categories = storyContentBox.find_all('a', {'class':re.compile(r'.*\bstory_category\b.*')})
for category in categories:
category = stripHTML(category)
if category == "Gore" or category == "Sex":
self.story.addToList('warnings', category)
else:
self.story.addToList('genre', category)
# Word count
wordCountText = stripHTML(storyContentBox.find('li', {'class':'bottom'}).find('div', {'class':'word_count'}))
self.story.setMetadata("numWords", re.sub(r'[^0-9]', '', wordCountText))
# Cover image
storyImage = storyContentBox.find('div', {'class':'story_image'})
if storyImage:
coverurl = storyImage.find('a')['href']
if coverurl.startswith('//'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl
if get_cover:
# try setting from href, if fails, try using the img src
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
img = storyImage.find('img')
# try src, then data-src, then leave None.
coverurl = img.get('src',img.get('data-src',None))
if coverurl:
self.setCoverImage(self.url,coverurl)
coverSource = storyImage.find('a', {'class':'source'})
if coverSource:
self.story.setMetadata('coverSourceUrl', coverSource['href'])
#There's no text associated with the cover source link, so just
#reuse the URL. Makes it clear it's an external link leading
#outside of the fanfic site, at least.
self.story.setMetadata('coverSource', coverSource['href'])
# fimf has started including extra stuff inside the description div.
descdivstr = u"%s"%storyContentBox.find("div", {"class":"description"})
hrstr=u"<hr/>"
descdivstr = u'<div class="description">'+descdivstr[descdivstr.index(hrstr)+len(hrstr):]
self.setDescription(self.url,descdivstr)
# Find the newest and oldest chapter dates
storyData = storyContentBox.find('div', {'class':'story_data'})
oldestChapter = None
newestChapter = None
self.newestChapterNum = None # save for comparing during update.
# Scan all chapters to find the oldest and newest, on
# FiMFiction it's possible for authors to insert new chapters
# out-of-order or change the dates of earlier ones by editing
# them--That WILL break epub update.
for index, chapterDate in enumerate(storyData.find_all('span', {'class':'date'})):
chapterDate = self.ordinal_date_string_to_date(chapterDate.contents[1])
if oldestChapter == None or chapterDate < oldestChapter:
oldestChapter = chapterDate
if newestChapter == None or chapterDate > newestChapter:
newestChapter = chapterDate
self.newestChapterNum = index
if newestChapter is None:
#this will only be true when updating metadata for stories that have 0 chapters
#there is a "last modified" date given on the page, extract it and use that.
moddatetag = storyContentBox.find('span', {'class':'last_modified'})
if not moddatetag is None:
newestChapter = self.ordinal_date_string_to_date(moddatetag('span')[1].text)
# Date updated
self.story.setMetadata("dateUpdated", newestChapter)
# Date published
# falls back to oldest chapter date for stories that haven't been officially published yet
pubdatetag = storyContentBox.find('span', {'class':'date_approved'})
if pubdatetag is None:
if oldestChapter is None:
#this will only be true when updating metadata for stories that have 0 chapters
#and that have never been officially published - a rare occurrence. Fall back to last
#modified date as the publication date, it's all that we've got.
self.story.setMetadata("datePublished", newestChapter)
else:
self.story.setMetadata("datePublished", oldestChapter)
else:
pubDate = self.ordinal_date_string_to_date(pubdatetag('span')[1].text)
self.story.setMetadata("datePublished", pubDate)
# Characters
chars = storyContentBox.find("div", {"class":"extra_story_data"})
for character in chars.find_all("a", {"class":"character_icon"}):
self.story.addToList("characters", character['title'])
# Likes and dislikes
storyToolbar = soup.find('div', {'class':'story-toolbar'})
likes = storyToolbar.find('span', {'class':'likes'})
if not likes is None:
self.story.setMetadata("likes", stripHTML(likes))
dislikes = storyToolbar.find('span', {'class':'dislikes'})
if not dislikes is None:
self.story.setMetadata("dislikes", stripHTML(dislikes))
# Highest view for a chapter and total views
viewSpan = storyToolbar.find('span', {'title':re.compile(r'.*\btotal views\b.*')})
self.story.setMetadata("views", re.sub(r'[^0-9]', '', stripHTML(viewSpan)))
self.story.setMetadata("total_views", re.sub(r'[^0-9]', '', viewSpan['title']))
# Comment count
commentSpan = storyToolbar.find('span', {'title':re.compile(r'.*\bcomments\b.*')})
self.story.setMetadata("comment_count", re.sub(r'[^0-9]', '', stripHTML(commentSpan)))
# Short description
descriptionMeta = soup.find('meta', {'property':'og:description'})
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
#groups
if soup.find('button', {'id':'button-view-all-groups'}):
groupResponse = self._fetchUrl("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
groupData = json.loads(groupResponse)
groupList = self.make_soup(groupData["content"])
else:
groupList = soup.find('ul', {'id':'story-groups-list'})
if not (groupList == None):
for groupName in groupList.find_all('a'):
self.story.addToList("groupsUrl", 'http://'+self.host+groupName["href"])
self.story.addToList("groups",stripHTML(groupName).replace(',', ';'))
#sequels
for header in soup.find_all('h1', {'class':'header-stories'}):
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
if header.text.startswith('Sequels'):
sequelContainer = header.parent
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
self.story.addToList("sequelsUrl", 'http://'+self.host+sequel["href"])
self.story.addToList("sequels", stripHTML(sequel).replace(',', ';'))
#author last login
userPageHeader = soup.find('div', {'class':re.compile(r'\buser-page-header\b')})
if not userPageHeader == None:
infoContainer = userPageHeader.find('div', {'class':re.compile(r'\binfo-container\b')})
listItems = infoContainer.find_all('li')
lastLoginString = stripHTML(listItems[1])
lastLogin = None
if "online" in lastLoginString:
lastLogin = date.today()
elif "offline" in lastLoginString:
#this regex extracts the number of weeks and the number of days from the last login string.
#durations under a day are ignored.
#group 1 is weeks, group 2 is days
durationGroups = re.match(r"(?:[^0-9]*(\d+?)w)?[^0-9]*(?:(\d+?)d)?", lastLoginString)
lastLogin = date.today() - timedelta(days=int(durationGroups.group(2) or 0), weeks=int(durationGroups.group(1) or 0))
self.story.setMetadata("authorLastLogin", lastLogin)
#The link to the prequel is embedded in the description text, so erring
#on the side of caution and wrapping this whole thing in a try block.
#If anything goes wrong this probably wasn't a valid prequel link.
try:
description = soup.find('div', {'class':'description'})
firstHR = description.find("hr")
nextSib = firstHR.nextSibling
if "This story is a sequel to" in nextSib.string:
link = nextSib.nextSibling
if link.name == "a":
self.story.setMetadata("prequelUrl", 'http://'+self.host+link["href"])
self.story.setMetadata("prequel", stripHTML(link))
except:
pass
2
Example 28
Project: MakerDroid Source File: STLImporter.py
def ImportModel(filename, use_kdtree=True, callback=None, **kwargs):
global vertices, edges, kdtree
vertices = 0
edges = 0
kdtree = None
normal_conflict_warning_seen = False
if hasattr(filename, "read"):
f = filename
# useful for later error messages
filename = "input stream"
else:
try:
url_file = pycam.Utils.URIHandler(filename).open()
# urllib.urlopen objects do not support "seek" - so we need to read
# the whole file at once. This is ugly - anyone with a better idea?
f = StringIO.StringIO(url_file.read())
url_file.close()
except IOError, err_msg:
log.error("STLImporter: Failed to read file (%s): %s" \
% (filename, err_msg))
return None
# Read the first two lines of (potentially non-binary) input - they should
# contain "solid" and "facet".
header_lines = []
while len(header_lines) < 2:
line = f.readline(200)
if len(line) == 0:
# empty line (not even a line-feed) -> EOF
log.error("STLImporter: No valid lines found in '%s'" % filename)
return None
# ignore comment lines
# note: partial comments (starting within a line) are not handled
if not line.startswith(";"):
header_lines.append(line)
header = "".join(header_lines)
# read byte 80 to 83 - they contain the "numfacets" value in binary format
f.seek(80)
numfacets = unpack("<I", f.read(4))[0]
binary = False
if f.len == (84 + 50*numfacets):
binary = True
elif header.find("solid") >= 0 and header.find("facet") >= 0:
binary = False
f.seek(0)
else:
log.error("STLImporter: STL binary/ascii detection failed")
return None
if use_kdtree:
kdtree = PointKdtree([], 3, 1, epsilon)
model = Model(use_kdtree)
t = None
p1 = None
p2 = None
p3 = None
if binary:
for i in range(1, numfacets + 1):
if callback and callback():
log.warn("STLImporter: load model operation cancelled")
return None
a1 = unpack("<f", f.read(4))[0]
a2 = unpack("<f", f.read(4))[0]
a3 = unpack("<f", f.read(4))[0]
n = Vector(float(a1), float(a2), float(a3))
v11 = unpack("<f", f.read(4))[0]
v12 = unpack("<f", f.read(4))[0]
v13 = unpack("<f", f.read(4))[0]
p1 = UniqueVertex(float(v11), float(v12), float(v13))
v21 = unpack("<f", f.read(4))[0]
v22 = unpack("<f", f.read(4))[0]
v23 = unpack("<f", f.read(4))[0]
p2 = UniqueVertex(float(v21), float(v22), float(v23))
v31 = unpack("<f", f.read(4))[0]
v32 = unpack("<f", f.read(4))[0]
v33 = unpack("<f", f.read(4))[0]
p3 = UniqueVertex(float(v31), float(v32), float(v33))
# not used
attribs = unpack("<H", f.read(2))
dotcross = n.dot(p2.sub(p1).cross(p3.sub(p1)))
if a1 == a2 == a3 == 0:
dotcross = p2.sub(p1).cross(p3.sub(p1)).z
n = None
if dotcross > 0:
# Triangle expects the vertices in clockwise order
t = Triangle(p1, p3, p2)
elif dotcross < 0:
if not normal_conflict_warning_seen:
log.warn(("Inconsistent normal/vertices found in facet " + \
"definition %d of '%s'. Please validate the " + \
"STL file!") % (i, filename))
normal_conflict_warning_seen = True
t = Triangle(p1, p2, p3)
else:
# the three points are in a line - or two points are identical
# usually this is caused by points, that are too close together
# check the tolerance value in pycam/Geometry/PointKdtree.py
log.warn("Skipping invalid triangle: %s / %s / %s " \
% (p1, p2, p3) + "(maybe the resolution of the model " \
+ "is too high?)")
continue
if n:
t.normal = n
model.append(t)
else:
solid = re.compile(r"\s*solid\s+(\w+)\s+.*")
endsolid = re.compile(r"\s*endsolid\s*")
facet = re.compile(r"\s*facet\s*")
normal = re.compile(r"\s*facet\s+normal" \
+ r"\s+(?P<x>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
+ r"\s+(?P<y>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
+ r"\s+(?P<z>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)\s+")
endfacet = re.compile(r"\s*endfacet\s+")
loop = re.compile(r"\s*outer\s+loop\s+")
endloop = re.compile(r"\s*endloop\s+")
vertex = re.compile(r"\s*vertex" \
+ r"\s+(?P<x>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
+ r"\s+(?P<y>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)" \
+ r"\s+(?P<z>[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)\s+")
current_line = 0
for line in f:
if callback and callback():
log.warn("STLImporter: load model operation cancelled")
return None
current_line += 1
m = solid.match(line)
if m:
model.name = m.group(1)
continue
m = facet.match(line)
if m:
m = normal.match(line)
if m:
n = Vector(float(m.group('x')), float(m.group('y')),
float(m.group('z')))
else:
n = None
continue
m = loop.match(line)
if m:
continue
m = vertex.match(line)
if m:
p = UniqueVertex(float(m.group('x')), float(m.group('y')),
float(m.group('z')))
if p1 is None:
p1 = p
elif p2 is None:
p2 = p
elif p3 is None:
p3 = p
else:
log.error("STLImporter: more then 3 points in facet " \
+ "(line %d)" % current_line)
continue
m = endloop.match(line)
if m:
continue
m = endfacet.match(line)
if m:
if p1 is None or p2 is None or p3 is None:
log.warn(("Invalid facet definition in line " \
+ "%d of '%s'. Please validate the STL file!") \
% (current_line, filename))
n, p1, p2, p3 = None, None, None, None
continue
if not n:
n = p2.sub(p1).cross(p3.sub(p1)).normalized()
# validate the normal
# The three vertices of a triangle in an STL file are supposed
# to be in counter-clockwise order. This should match the
# direction of the normal.
if n is None:
# invalid triangle (zero-length vector)
dotcross = 0
else:
# make sure the points are in ClockWise order
dotcross = n.dot(p2.sub(p1).cross(p3.sub(p1)))
if dotcross > 0:
# Triangle expects the vertices in clockwise order
t = Triangle(p1, p3, p2, n)
elif dotcross < 0:
if not normal_conflict_warning_seen:
log.warn(("Inconsistent normal/vertices found in " + \
"line %d of '%s'. Please validate the STL " + \
"file!") % (current_line, filename))
normal_conflict_warning_seen = True
t = Triangle(p1, p2, p3, n)
else:
# The three points are in a line - or two points are
# identical. Usually this is caused by points, that are too
# close together. Check the tolerance value in
# pycam/Geometry/PointKdtree.py.
log.warn("Skipping invalid triangle: %s / %s / %s " \
% (p1, p2, p3) + "(maybe the resolution of the " \
+ "model is too high?)")
n, p1, p2, p3 = (None, None, None, None)
continue
n, p1, p2, p3 = (None, None, None, None)
model.append(t)
continue
m = endsolid.match(line)
if m:
continue
log.info("Imported STL model: %d vertices, %d edges, %d triangles" \
% (vertices, edges, len(model.triangles())))
vertices = 0
edges = 0
kdtree = None
if not model:
# no valid items added to the model
return None
else:
return model
2
Example 29
Project: calibre Source File: preprocess.py
def __call__(self, html, remove_special_chars=None,
get_preprocess_html=False):
if remove_special_chars is not None:
html = remove_special_chars.sub('', html)
html = html.replace('\0', '')
is_pdftohtml = self.is_pdftohtml(html)
if self.is_baen(html):
rules = []
elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER
elif is_pdftohtml:
rules = self.PDFTOHTML
else:
rules = []
start_rules = []
if is_pdftohtml:
# Remove non breaking spaces
start_rules.append((re.compile(ur'\u00a0'), lambda match : ' '))
if not getattr(self.extra_opts, 'keep_ligatures', False):
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
user_sr_rules = {}
# Function for processing search and replace
def do_search_replace(search_pattern, replace_txt):
try:
search_re = re.compile(search_pattern)
if not replace_txt:
replace_txt = ''
rules.insert(0, (search_re, replace_txt))
user_sr_rules[(search_re, replace_txt)] = search_pattern
except Exception as e:
self.log.error('Failed to parse %r regexp because %s' %
(search, as_unicode(e)))
# search / replace using the sr?_search / sr?_replace options
for i in range(1, 4):
search, replace = 'sr%d_search'%i, 'sr%d_replace'%i
search_pattern = getattr(self.extra_opts, search, '')
replace_txt = getattr(self.extra_opts, replace, '')
if search_pattern:
do_search_replace(search_pattern, replace_txt)
# multi-search / replace using the search_replace option
search_replace = getattr(self.extra_opts, 'search_replace', None)
if search_replace:
search_replace = json.loads(search_replace)
for search_pattern, replace_txt in reversed(search_replace):
do_search_replace(search_pattern, replace_txt)
end_rules = []
# delete soft hyphens - moved here so it's executed after header/footer removal
if is_pdftohtml:
# unwrap/delete soft hyphens
end_rules.append((re.compile(u'[](</p>\s*<p>\s*)+\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting
end_rules.append((re.compile(u'[]\s*(</(i|u|b)>)+(</p>\s*<p>\s*)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
length = -1
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
docanalysis = DocAnalysis('pdf', html)
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
if length:
# print "The pdf line length returned is " + str(length)
# unwrap em/en dashes
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
end_rules.append(
# Un wrap using punctuation
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa
)
for rule in self.PREPROCESS + start_rules:
html = rule[0].sub(rule[1], html)
if self.regex_wizard_callback is not None:
self.regex_wizard_callback(self.current_href, html)
if get_preprocess_html:
return html
def dump(raw, where):
import os
dp = getattr(self.extra_opts, 'debug_pipeline', None)
if dp and os.path.exists(dp):
odir = os.path.join(dp, 'input')
if os.path.exists(odir):
odir = os.path.join(odir, where)
if not os.path.exists(odir):
os.makedirs(odir)
name, i = None, 0
while not name or os.path.exists(os.path.join(odir, name)):
i += 1
name = '%04d.html'%i
with open(os.path.join(odir, name), 'wb') as f:
f.write(raw.encode('utf-8'))
# dump(html, 'pre-preprocess')
for rule in rules + end_rules:
try:
html = rule[0].sub(rule[1], html)
except re.error as e:
if rule in user_sr_rules:
self.log.error(
'User supplied search & replace rule: %s -> %s '
'failed with error: %s, ignoring.'%(
user_sr_rules[rule], rule[1], e))
else:
raise
if is_pdftohtml and length > -1:
# Dehyphenate
dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html', length)
if is_pdftohtml:
from calibre.ebooks.conversion.utils import HeuristicProcessor
pdf_markup = HeuristicProcessor(self.extra_opts, None)
totalwords = 0
if pdf_markup.get_word_count(html) > 7000:
html = pdf_markup.markup_chapters(html, totalwords, True)
# dump(html, 'post-preprocess')
# Handle broken XHTML w/ SVG (ugh)
if 'svg:' in html and SVG_NS not in html:
html = html.replace(
'<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
if 'xlink:' in html and XLINK_NS not in html:
html = html.replace(
'<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)
html = XMLDECL_RE.sub('', html)
if getattr(self.extra_opts, 'asciiize', False):
from calibre.utils.localization import get_udc
from calibre.utils.mreplace import MReplace
unihandecoder = get_udc()
mr = MReplace(data={u'«':u'<'*3, u'»':u'>'*3})
html = mr.mreplace(html)
html = unihandecoder.decode(html)
if getattr(self.extra_opts, 'enable_heuristics', False):
from calibre.ebooks.conversion.utils import HeuristicProcessor
preprocessor = HeuristicProcessor(self.extra_opts, self.log)
html = preprocessor(html)
if getattr(self.extra_opts, 'smarten_punctuation', False):
html = smarten_punctuation(html, self.log)
try:
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
except AttributeError:
unsupported_unicode_chars = u''
if unsupported_unicode_chars:
from calibre.utils.localization import get_udc
unihandecoder = get_udc()
for char in unsupported_unicode_chars:
asciichar = unihandecoder.decode(char)
html = html.replace(char, asciichar)
return html
2
Example 30
Project: AWS-Lambda-ML-Microservice-Skeleton Source File: f2py2e.py
def run_compile():
"""
Do it all in one call!
"""
import tempfile
i = sys.argv.index('-c')
del sys.argv[i]
remove_build_dir = 0
try:
i = sys.argv.index('--build-dir')
except ValueError:
i = None
if i is not None:
build_dir = sys.argv[i + 1]
del sys.argv[i + 1]
del sys.argv[i]
else:
remove_build_dir = 1
build_dir = tempfile.mkdtemp()
_reg1 = re.compile(r'[-][-]link[-]')
sysinfo_flags = [_m for _m in sys.argv[1:] if _reg1.match(_m)]
sys.argv = [_m for _m in sys.argv if _m not in sysinfo_flags]
if sysinfo_flags:
sysinfo_flags = [f[7:] for f in sysinfo_flags]
_reg2 = re.compile(
r'[-][-]((no[-]|)(wrap[-]functions|lower)|debug[-]capi|quiet)|[-]include')
f2py_flags = [_m for _m in sys.argv[1:] if _reg2.match(_m)]
sys.argv = [_m for _m in sys.argv if _m not in f2py_flags]
f2py_flags2 = []
fl = 0
for a in sys.argv[1:]:
if a in ['only:', 'skip:']:
fl = 1
elif a == ':':
fl = 0
if fl or a == ':':
f2py_flags2.append(a)
if f2py_flags2 and f2py_flags2[-1] != ':':
f2py_flags2.append(':')
f2py_flags.extend(f2py_flags2)
sys.argv = [_m for _m in sys.argv if _m not in f2py_flags2]
_reg3 = re.compile(
r'[-][-]((f(90)?compiler([-]exec|)|compiler)=|help[-]compiler)')
flib_flags = [_m for _m in sys.argv[1:] if _reg3.match(_m)]
sys.argv = [_m for _m in sys.argv if _m not in flib_flags]
_reg4 = re.compile(
r'[-][-]((f(77|90)(flags|exec)|opt|arch)=|(debug|noopt|noarch|help[-]fcompiler))')
fc_flags = [_m for _m in sys.argv[1:] if _reg4.match(_m)]
sys.argv = [_m for _m in sys.argv if _m not in fc_flags]
if 1:
del_list = []
for s in flib_flags:
v = '--fcompiler='
if s[:len(v)] == v:
from numpy.distutils import fcompiler
fcompiler.load_all_fcompiler_classes()
allowed_keys = list(fcompiler.fcompiler_class.keys())
nv = ov = s[len(v):].lower()
if ov not in allowed_keys:
vmap = {} # XXX
try:
nv = vmap[ov]
except KeyError:
if ov not in vmap.values():
print('Unknown vendor: "%s"' % (s[len(v):]))
nv = ov
i = flib_flags.index(s)
flib_flags[i] = '--fcompiler=' + nv
continue
for s in del_list:
i = flib_flags.index(s)
del flib_flags[i]
assert len(flib_flags) <= 2, repr(flib_flags)
_reg5 = re.compile(r'[-][-](verbose)')
setup_flags = [_m for _m in sys.argv[1:] if _reg5.match(_m)]
sys.argv = [_m for _m in sys.argv if _m not in setup_flags]
if '--quiet' in f2py_flags:
setup_flags.append('--quiet')
modulename = 'untitled'
sources = sys.argv[1:]
for optname in ['--include_paths', '--include-paths']:
if optname in sys.argv:
i = sys.argv.index(optname)
f2py_flags.extend(sys.argv[i:i + 2])
del sys.argv[i + 1], sys.argv[i]
sources = sys.argv[1:]
if '-m' in sys.argv:
i = sys.argv.index('-m')
modulename = sys.argv[i + 1]
del sys.argv[i + 1], sys.argv[i]
sources = sys.argv[1:]
else:
from numpy.distutils.command.build_src import get_f2py_modulename
pyf_files, sources = filter_files('', '[.]pyf([.]src|)', sources)
sources = pyf_files + sources
for f in pyf_files:
modulename = get_f2py_modulename(f)
if modulename:
break
extra_objects, sources = filter_files('', '[.](o|a|so)', sources)
include_dirs, sources = filter_files('-I', '', sources, remove_prefix=1)
library_dirs, sources = filter_files('-L', '', sources, remove_prefix=1)
libraries, sources = filter_files('-l', '', sources, remove_prefix=1)
undef_macros, sources = filter_files('-U', '', sources, remove_prefix=1)
define_macros, sources = filter_files('-D', '', sources, remove_prefix=1)
for i in range(len(define_macros)):
name_value = define_macros[i].split('=', 1)
if len(name_value) == 1:
name_value.append(None)
if len(name_value) == 2:
define_macros[i] = tuple(name_value)
else:
print('Invalid use of -D:', name_value)
from numpy.distutils.system_info import get_info
num_info = {}
if num_info:
include_dirs.extend(num_info.get('include_dirs', []))
from numpy.distutils.core import setup, Extension
ext_args = {'name': modulename, 'sources': sources,
'include_dirs': include_dirs,
'library_dirs': library_dirs,
'libraries': libraries,
'define_macros': define_macros,
'undef_macros': undef_macros,
'extra_objects': extra_objects,
'f2py_options': f2py_flags,
}
if sysinfo_flags:
from numpy.distutils.misc_util import dict_append
for n in sysinfo_flags:
i = get_info(n)
if not i:
outmess('No %s resources found in system'
' (try `f2py --help-link`)\n' % (repr(n)))
dict_append(ext_args, **i)
ext = Extension(**ext_args)
sys.argv = [sys.argv[0]] + setup_flags
sys.argv.extend(['build',
'--build-temp', build_dir,
'--build-base', build_dir,
'--build-platlib', '.'])
if fc_flags:
sys.argv.extend(['config_fc'] + fc_flags)
if flib_flags:
sys.argv.extend(['build_ext'] + flib_flags)
setup(ext_modules=[ext])
if remove_build_dir and os.path.exists(build_dir):
import shutil
outmess('Removing build directory %s\n' % (build_dir))
shutil.rmtree(build_dir)
2
Example 31
Project: androwarn Source File: core.py
def match_current_instruction(current_instruction, registers_found) :
"""
@param current_instruction : the current instruction to be analyzed
@param registers_found : a dictionary of registers recovered so far
@rtype : the instruction name from the constants above, the local register number and its value, an updated version of the registers_found
"""
p_const = re.compile('^const(?:\/4|\/16|\/high16|-wide(?:\/16|\/32)|-wide\/high16|)? v([0-9]+), \#\+?(-?[0-9]+(?:\.[0-9]+)?)$')
p_const_string = re.compile("^const-string(?:||-jumbo) v([0-9]+), '(.*)'$")
p_move = re.compile('^move(?:|\/from16|-wide(?:\/from16|\/16)|-object(?:|\/from16|\/16))? v([0-9]+), (v[0-9]+)$')
p_move_result = re.compile('^move(?:-result(?:|-wide|-object)|-exception)? v([0-9]+)$')
p_aput = re.compile('^aput(?:-wide|-object|-boolean|-byte|-char|-short|) v([0-9]+), v([0-9]+), v([0-9]+)$')
p_invoke = re.compile('^invoke-(?:static|virtual|direct|super|interface|interface-range|virtual-quick|super-quick) v([0-9]+), (L(?:.*);->.*)$')
p_invoke_2_registers = re.compile('^invoke-(?:static|virtual|direct|super|interface|interface-range|virtual-quick|super-quick) v([0-9]+), v([0-9]+), (L(?:.*);->.*)$')
p_invoke_no_register = re.compile('^invoke-(?:static|virtual|direct|super|interface|interface-range|virtual-quick|super-quick) (L(?:.*);->.*)$')
p_new_instance = re.compile('^new-instance v([0-9]+), (L(?:.*);)$')
# String concat
current_instruction = "%s %s" % (current_instruction.get_name(), current_instruction.get_output())
# Returned values init
instruction_name = ''
local_register_number = -1
local_register_value = -1
if p_const_string.match(current_instruction) :
#print p_const_string.match(current_instruction).groups()
instruction_name = CONST_STRING
register_number = p_const_string.match(current_instruction).groups()[0]
register_value = p_const_string.match(current_instruction).groups()[1]
if not(register_number in registers_found) :
registers_found[register_number] = register_value
else :
old_string = registers_found[register_number]
new_string = "%s %s" % (str(register_value), str(old_string))
registers_found[register_number] = new_string
local_register_number = register_number
local_register_value = register_value
if p_const.match(current_instruction) :
#print p_const.match(current_instruction).groups()
instruction_name = CONST
register_number = p_const.match(current_instruction).groups()[0]
register_value = p_const.match(current_instruction).groups()[1]
if not(register_number in registers_found) :
registers_found[register_number] = register_value
local_register_number = register_number
local_register_value = register_value
if p_move.match(current_instruction) :
#print p_move.match(current_instruction).groups()
instruction_name = MOVE
register_number = p_move.match(current_instruction).groups()[0]
register_value = p_move.match(current_instruction).groups()[1]
if not(register_number in registers_found) :
registers_found[register_number] = register_value
local_register_number = register_number
local_register_value = register_value
if p_move_result.match(current_instruction) :
#print p_move_result.match(current_instruction).groups()
instruction_name = MOVE_RESULT
register_number = p_move_result.match(current_instruction).groups()[0]
register_value = ''
if not(register_number in registers_found) :
registers_found[register_number] = register_value
local_register_number = register_number
local_register_value = register_value
#print "number returned %s" % local_register_number
#print "value returned %s" % local_register_value
if p_invoke.match(current_instruction) :
#print p_invoke.match(current_instruction).groups()
instruction_name = INVOKE
register_number = p_invoke.match(current_instruction).groups()[0]
register_value = p_invoke.match(current_instruction).groups()[1]
if not(register_number in registers_found) :
registers_found[register_number] = register_value
local_register_number = register_number
local_register_value = register_value
if p_invoke_no_register.match(current_instruction) :
#print p_invoke.match(current_instruction).groups()
instruction_name = INVOKE_NO_REGISTER
register_number = ''
register_value = p_invoke_no_register.match(current_instruction).groups()[0]
local_register_number = register_number
local_register_value = register_value
if p_invoke_2_registers.match(current_instruction) :
#print p_invoke.match(current_instruction).groups()
instruction_name = INVOKE_NO_REGISTER
register_number = p_invoke_2_registers.match(current_instruction).groups()[0]
register_value = p_invoke_2_registers.match(current_instruction).groups()[1]
local_register_number = register_number
local_register_value = register_value
if p_new_instance.match(current_instruction) :
#print p_new_instance.match(current_instruction).groups()
instruction_name = NEW_INSTANCE
register_number = p_new_instance.match(current_instruction).groups()[0]
register_value = p_new_instance.match(current_instruction).groups()[1]
if not(register_number in registers_found) :
registers_found[register_number] = register_value
local_register_number = register_number
local_register_value = register_value
if p_aput.match(current_instruction) :
#print p_aput.match(current_instruction).groups()
instruction_name = APUT
register_object_reference = p_aput.match(current_instruction).groups()[0]
register_array_reference = p_aput.match(current_instruction).groups()[1]
register_element_index = p_aput.match(current_instruction).groups()[2]
local_register_number = register_object_reference
local_register_value = register_array_reference
return instruction_name, local_register_number, local_register_value, registers_found
2
Example 32
Project: feaTools Source File: parser.py
def _parseUnknown(writer, text):
text = text.strip()
## extract all table names
tableNames = table_findAll_RE.findall(text)
for precedingMark, tableName in tableNames:
# a regular expression specific to this lookup must
# be created so that nested lookups are safely handled
thisTableContentRE = list(tableContentRE)
thisTableContentRE.insert(2, tableName)
thisTableContentRE.insert(6, tableName)
thisTableContentRE = re.compile("".join(thisTableContentRE))
found = thisTableContentRE.search(text)
tableText = found.group(2)
start, end = found.span()
precedingText = text[:start]
if precedingMark:
precedingText += precedingMark
_parseUnknown(writer, precedingText)
_parseTable(writer, tableName, tableText)
text = text[end:]
## extract all feature names
featureTags = feature_findAll_RE.findall(text)
for precedingMark, featureTag in featureTags:
# a regular expression specific to this lookup must
# be created so that nested lookups are safely handled
thisFeatureContentRE = list(featureContentRE)
thisFeatureContentRE.insert(2, featureTag)
thisFeatureContentRE.insert(6, featureTag)
thisFeatureContentRE = re.compile("".join(thisFeatureContentRE))
found = thisFeatureContentRE.search(text)
featureText = found.group(2)
start, end = found.span()
precedingText = text[:start]
if precedingMark:
precedingText += precedingMark
_parseUnknown(writer, precedingText)
_parseFeature(writer, featureTag, featureText)
text = text[end:]
## extract all lookup names
lookupNames = lookup_findAll_RE.findall(text)
for precedingMark, lookupName in lookupNames:
# a regular expression specific to this lookup must
# be created so that nested lookups are safely handled
thisLookupContentRE = list(lookupContentRE)
thisLookupContentRE.insert(2, lookupName)
thisLookupContentRE.insert(6, lookupName)
thisLookupContentRE = re.compile("".join(thisLookupContentRE))
found = thisLookupContentRE.search(text)
lookupText = found.group(2)
start, end = found.span()
precedingText = text[:start]
if precedingMark:
precedingText += precedingMark
_parseUnknown(writer, precedingText)
_parseLookup(writer, lookupName, lookupText)
text = text[end:]
## extract all class data
classes = classDefinitionRE.findall(text)
for precedingMark, className, classContent in classes:
text = _executeSimpleSlice(precedingMark, text, classDefinitionRE, writer)
className = "@" + className
_parseClass(writer, className, classContent)
## extract substitutions
# sub type 1 and 4
subType1s = subType1And2And4RE.findall(text)
for precedingMark, target, replacement in subType1s:
text = _executeSimpleSlice(precedingMark, text, subType1And2And4RE, writer)
_parseSubType1And2And4(writer, target, replacement)
# sub type 3
subType3s = subType3RE.findall(text)
for precedingMark, target, replacement in subType3s:
text = _executeSimpleSlice(precedingMark, text, subType3RE, writer)
_parseSubType3(writer, target, replacement)
# sub type 6
subType6s = subType6RE.findall(text)
for precedingMark, target, replacement in subType6s:
text = _executeSimpleSlice(precedingMark, text, subType6RE, writer)
_parseSubType6(writer, target, replacement)
# ignore sub type 6
ignoreSubType6s = ignoreSubType6RE.findall(text)
for precedingMark, target in ignoreSubType6s:
text = _executeSimpleSlice(precedingMark, text, ignoreSubType6RE, writer)
_parseSubType6(writer, target, replacement=None, ignore=True)
## extract positions
# pos type 1
posType1s = posType1RE.findall(text)
for precedingMark, target, value in posType1s:
text = _executeSimpleSlice(precedingMark, text, posType1RE, writer)
_parsePosType1(writer, target, value)
# pos type 2
posType2s = posType2RE.findall(text)
for precedingMark, enumTag, posTag, targetAndValue in posType2s:
text = _executeSimpleSlice(precedingMark, text, posType2RE, writer)
_parsePosType2(writer, targetAndValue, needEnum=enumTag.strip())
## extract other data
# XXX look at FDK spec. sometimes a language tag of dflt will be passed
# it should be handled differently than the other tags.
# languagesystem
languagesystems = languagesystemRE.findall(text)
for precedingMark, scriptTag, languageTag in languagesystems:
text = _executeSimpleSlice(precedingMark, text, languagesystemRE, writer)
writer.languageSystem(languageTag, scriptTag)
# script
scripts = scriptRE.findall(text)
for precedingMark, scriptTag in scripts:
text = _executeSimpleSlice(precedingMark, text, scriptRE, writer)
writer.script(scriptTag)
# language
languages = languageRE.findall(text)
for precedingMark, languageTag, otherKeyword in languages:
text = _executeSimpleSlice(precedingMark, text, languageRE, writer)
if not otherKeyword or otherKeyword == "include_dflt":
writer.language(languageTag)
elif otherKeyword == "exclude_dflt":
writer.language(languageTag, includeDefault=False)
# include
inclusions = includeRE.findall(text)
for precedingMark, path in inclusions:
text = _executeSimpleSlice(precedingMark, text, includeRE, writer)
writer.include(path)
# feature reference
featureReferences = featureReferenceRE.findall(text)
for precedingMark, featureTag in featureReferences:
text = _executeSimpleSlice(precedingMark, text, featureReferenceRE, writer)
writer.featureReference(featureTag)
# lookup reference
lookupReferences = lookupReferenceRE.findall(text)
for precedingMark, lookupName in lookupReferences:
text = _executeSimpleSlice(precedingMark, text, lookupReferenceRE, writer)
writer.lookupReference(lookupName)
# lookupflag
lookupflags = lookupflagRE.findall(text)
for precedingMark, lookupflagValues in lookupflags:
text = _executeSimpleSlice(precedingMark, text, lookupflagRE, writer)
_parseLookupFlag(writer, lookupflagValues)
# subtable break
subtables = subtableRE.findall(text)
for precedingMark in subtables:
text = _executeSimpleSlice(precedingMark, text, subtableRE, writer)
writer.subtableBreak()
## extract all featureNames
featureNames = featureNamesRE.findall(text)
for precedingMark in featureNames:
text = _executeSimpleSlice(precedingMark, text, featureNamesRE, writer)
# empty instructions
terminators = terminatorRE.findall(text)
for terminator in terminators:
text = _executeSimpleSlice(None, text, terminatorRE, writer)
writer.rawText(terminator)
text = text.strip()
if text:
raise FeaToolsParserSyntaxError("Invalid Syntax: %s" % text)
2
Example 33
def analyze(self):
"""
Core function which is called in the run
Do the effective work
"""
start_time = time.time()
path = self.config.ressources_path
if path is None:
path = "/tmp/"
# Create the directory containig data if it does not exist
if not os.path.exists(path):
os.makedirs(path)
# Check packages algorithm
# Step 1 : check packages in updates packages
# Ok -> uptodate KO -> undetermine (uptodate or obsolete or unchecked)
# Step 2 : check name with updates packages
# Ok -> obsolete KO -> undetermine (uptodate or obsolete or unchecked)
# Step 3 : check packages with centos release
# Ok -> uptodate KO -> undetermine (obsolete or unchecked)
# Step 4 : check name with centos release
# Ok -> obsolete KO -> unchecked
# List of package object to analyze
packages_list_to_analyze = self.packages.dict.values()
# Temporary lists to manage packages
packages_tmp_uptodate = []
packages_tmp_obsolete = []
packages_tmp_unchecked = []
# Download packages list if necessary
# "http://mirrors.atosworldline.com/public/centos/filelist.gz"
url = self.config_server.centos["packages_url_stable_version"]
print "url : " + url
file_name = "centos_packages.gz"
pkg_string_header = "./" + self.server.osversion + "/os/" + self.server.osarchitecture + "/CentOS/"
pkg_string_header_update = "./" + self.server.osversion + "/updates/" + self.server.osarchitecture + "/RPMS/"
pkg_release_expression = pkg_string_header + "(?P<pkg_release>[A-Za-z0-9:\-~.+_]+)"
pkg_update_expression = pkg_string_header_update + "(?P<pkg_update>[A-Za-z0-9:\-~.+_]+)"
#pkg_obsolete_expression = "(?P<pkg_name_start>[A-Za-z0-9:+~\-\._]+)(?P<pkg_version>(\-))"
pkg_obsolete_expression = "(?P<pkg_name_start>[A-Za-z0-9:+~\-\._]+)"
pkg_release_reg = re.compile(pkg_release_expression)
pkg_update_reg = re.compile(pkg_update_expression)
pkg_obsolete_reg = re.compile(pkg_obsolete_expression)
# List of release packages
release_list = []
# List of updates packages
updates_list = []
if self.server.osversion in self.config_server.centos["stable_versions"]:
self._lock.acquire()
download_boolean = tools.downloadFile(url, file_name, self.config, path)
self._lock.release()
if not download_boolean:
self._logger.error("Download file error")
return False
# Read the downloaded file containing packages list
file_path = path + file_name
f_1 = gzip.open(file_path,'rb')
rawtext = f_1.read()
f_1.close()
# Split text in lines
releases = rawtext.split('\n')
# Read the lines of packages and fill the release and update pacakges lists
for l_1 in releases:
if pkg_release_reg.match(l_1) is not None:
# Fill the release list
result_re = pkg_release_reg.match(l_1)
pkg_name = result_re.group('pkg_release')
release_list.append(pkg_name)
elif pkg_update_reg.match(l_1) is not None:
# Fill the updates list
result_re = pkg_update_reg.match(l_1)
pkg_name = result_re.group('pkg_update')
updates_list.append(pkg_name)
else:
# url_os = "http://vault.centos.org/" + str(self.server.osversion) + "/os/" + str(self.server.osarchitecture) + "/CentOS/"
url_os = "http://vault.centos.org/" + str(self.server.osversion) + "/os/SRPMS/"
url_updates = "http://vault.centos.org/" + str(self.server.osversion) + "/updates/SRPMS/" # + str(self.server.osarchitecture) + "/RPMS/"
pattern = r"(?P<var1>.*)(?P<var2><a href=\")(?P<pkg>.+)(?P<var3>\.src.rpm\">)"
reg = re.compile(pattern)
lines_os = urllib2.urlopen(url_os).read().split('\n')
lines_updates = urllib2.urlopen(url_updates).read().split('\n')
for line in lines_os:
if reg.match(line):
result_re = reg.match(line)
# release_list.append(result_re.group('pkg') + ".rpm")
release_list.append(result_re.group('pkg'))
for line in lines_updates:
if reg.match(line):
result_re = reg.match(line)
# updates_list.append(result_re.group('pkg') + ".rpm")
updates_list.append(result_re.group('pkg'))
# --------------------------------------------
# ------------------ Step 1 ------------------
# --------------------------------------------
# tmp_list wich will be the future packages_list_to_analyze after the loop
tmp_list = []
for pkg in packages_list_to_analyze:
# Add the suffix .osarchitecure.rpm
# ex : package.x86_64.rpm
pkg_with_arch = pkg.name + "." + self.server.osarchitecture + ".rpm"
if pkg_with_arch in updates_list or pkg_with_arch.replace(self.server.osarchitecture + ".rpm", "noarch.rpm") in updates_list:
packages_tmp_uptodate.append(sf.Package(pkg.name, pkg.version))
else:
tmp_list.append(pkg)
packages_list_to_analyze = tmp_list
# --------------------------------------------
# ------------------ Step 2 ------------------
# --------------------------------------------
# Dictionnary wich contain the start of the pacakge and the full line
updates_dict_start={}
for pkg_update in updates_list:
if pkg_obsolete_reg.match(pkg_update) is not None:
result_re = pkg_obsolete_reg.match(pkg_update)
updates_dict_start[result_re.group('pkg_name_start')] = pkg_update
else:
self._logger.error("Regular expression parsing error : step 2 - a")
tmp_list = []
for pkg in packages_list_to_analyze:
pkg_found = False
if pkg_obsolete_reg.match(pkg.name) is not None:
result_re = pkg_obsolete_reg.match(pkg.name)
pkg_name_start = result_re.group('pkg_name_start')
for pkg_update_start in updates_dict_start.keys():
if pkg_name_start == pkg_update_start:
pkg_found = True
packages_tmp_obsolete.append(sf.Package(pkg.name, pkg.version, updates_dict_start[pkg_update_start]))
else:
self._logger.error("Regular expression parsing error : step 2 - b")
if not pkg_found:
tmp_list.append(pkg)
packages_list_to_analyze = tmp_list
# --------------------------------------------
# ------------------ Step 3 ------------------
# --------------------------------------------
tmp_list = []
for pkg in packages_list_to_analyze:
# Add the suffix .osarchitecure.rpm
# ex : package.x86_64.rpm
pkg_with_arch = pkg.name + "." + self.server.osarchitecture + ".rpm"
if pkg_with_arch in release_list or pkg_with_arch.replace(self.server.osarchitecture + ".rpm", "noarch.rpm") in release_list:
packages_tmp_uptodate.append(sf.Package(pkg.name, pkg.version))
else:
tmp_list.append(pkg)
packages_list_to_analyze = tmp_list
# --------------------------------------------
# ------------------ Step 4 ------------------
# --------------------------------------------
release_dict_start={}
print len(release_list)
for pkg_release in release_list:
if pkg_obsolete_reg.match(pkg_release) is not None:
result_re = pkg_obsolete_reg.match(pkg_release)
release_dict_start[result_re.group('pkg_name_start')] = pkg_release
else:
self._logger.error("Regular expression parsing error : step 4 - a")
for pkg in packages_list_to_analyze:
# print "Package :: " + pkg.name
pkg_found = False
if pkg_obsolete_reg.match(pkg.name) is not None:
result_re = pkg_obsolete_reg.match(pkg.name)
pkg_name_start = result_re.group('pkg_name_start')
# print "info :: " + pkg_name_start
if pkg.name == pkg_name_start:
pkg_found = True
packages_tmp_uptodate.append(sf.Package(pkg.name, pkg.version, ""))
print " Package installé : " + pkg.name + "// Package attendu : " + pkg_name_start
else:
print "Package installé : " + pkg.name
print "Package attendu :" + pkg_name_start
#for pkg_release_start in release_dict_start.keys():
# if pkg_name_start == pkg_release_start:
# pkg_found = True
# packages_tmp_obsolete.append(sf.Package(pkg.name, pkg.version, release_dict_start[pkg_release_start]))
else:
self._logger.error("Regular expression parsing error : step 4 - b")
print "plop"
if not pkg_found:
packages_tmp_unchecked.append(sf.Package(pkg.name, pkg.version))
#
# Stored results and sort the list in name alphabetic order
self.server.packages_uptodate.push_package_list(packages_tmp_uptodate)
self.server.packages_obsolete.push_package_list(packages_tmp_obsolete)
self.server.packages_unchecked.push_package_list(packages_tmp_unchecked)
#
# Fill the reporting
self.server.nb_packages = self.packages.get_number()
self.server.nb_packages_uptodate = self.server.packages_uptodate.get_number()
self.server.nb_packages_obsolete = self.server.packages_obsolete.get_number()
self.server.nb_packages_unchecked = self.server.packages_unchecked.get_number()
self._logger.debug("Nb Packages : "+str(self.server.nb_packages))
self._logger.debug("Nb up to date : "+str(self.server.nb_packages_uptodate))
self._logger.debug("Nb Packages obsolete : "+str(self.server.nb_packages_obsolete))
self._logger.debug("Nb Packages unchecked : "+str(self.server.nb_packages_unchecked) )
#
end_time = time.time()
self._logger.info("Elapsed time: "+str((end_time - start_time) * 1000)+" msecs")
self._logger.info("CentOS packages successfully analyzed !")
return True
2
Example 34
Project: dolo Source File: modfile.py
def parse_dynare_text(txt,add_model=True,full_output=False, debug=False):
'''
Imports the content of a modfile into the current interpreter scope
'''
# here we call "instruction group", a string finishing by a semicolon
# an "instruction group" can have several lines
# a line can be
# - a comment //...
# - an old-style tag //$...
# - a new-style tag [key1='value1',..]
# - macro-instruction @#...
# A Modfile contains several blocks (in this order) :
# - an initblock defining variables, exovariables, parameters, initialization
# inside the initblock the order of declaration doesn't matter
# - a model block with two special lines (model; end;)
# - optional blocks (like endval, shocks)
# seperated by free matlab instructions in any order;
# - all other instructions are ignored
otxt = txt
otxt = otxt.replace("\r\n","\n")
otxt = otxt.replace("^","**")
# first, we remove end-of-line comments : they are definitely lost
regex = re.compile("(.+)//[^#](.*)")
def remove_end_comment(line):
res = regex.search(line)
if res:
l = res.groups(1)[0]
return(l)
else:
return line
txt = str.join("\n",map(remove_end_comment,otxt.split("\n")))
name_regex = re.compile("//\s*fname\s*=\s*'(.*)'")
m = name_regex.search(txt)
if m:
fname = m.group(1)
else:
fname = None
instruction_groups = [Instruction_group(s) for s in txt.split(";")]
instructions = [ig.instruction for ig in instruction_groups]
if debug:
print('Elementary instructions')
for i in instruction_groups:
print(i)
try:
imodel = [re.compile('model(\(.*\)|)').match(e) is not None for e in instructions]
imodel = imodel.index(True)
#imodel = instructions.index("model") #this doesn't work for "MODEL"
iend = instructions.index("end")
model_block = instruction_groups[imodel:(iend+1)]
init_block = instruction_groups[0:imodel]
except:
raise Exception('Model block could not be found.')
next_instructions = instructions[(iend+1):]
next_instruction_groups = instruction_groups[(iend+1):]
if 'initval' in next_instructions:
iinitval = next_instructions.index('initval')
iend = next_instructions.index('end',iinitval)
matlab_block_1 = next_instruction_groups[0:iinitval]
initval_block = next_instruction_groups[iinitval:(iend+1)]
next_instruction_groups = next_instruction_groups[(iend+1):]
next_instructions = next_instructions[(iend+1):]
else:
initval_block = None
matlab_block_1 = None
if 'endval' in next_instructions:
iendval = next_instructions.index('endval')
iend = next_instructions.index('end',iendval)
matlab_block_2 = next_instruction_groups[0:iendval]
endval_block = next_instruction_groups[iendval:(iend+1)]
next_instruction_groups = next_instruction_groups[(iend+1):]
next_instructions = next_instructions[(iend+1):]
else:
endval_block = None
matlab_block_2 = None
# TODO : currently shocks block needs to follow initval, this restriction should be removed
if 'shocks' in next_instructions:
ishocks = next_instructions.index('shocks')
iend = next_instructions.index('end',ishocks)
matlab_block_3 = next_instruction_groups[0:ishocks]
shocks_block = next_instruction_groups[ishocks:(iend+1)]
next_instruction_groups = next_instruction_groups[(iend+1):]
next_instructions = next_instructions[(iend+1):]
else:
shocks_block = None
matlab_block_3 = None
try:
init_regex = re.compile("(parameters |var |varexo |)(.*)")
var_names = []
varexo_names = []
parameters_names = []
declarations = {}
for ig in init_block:
if ig.instruction != '':
m = init_regex.match(ig.instruction)
if not m:
raise Exception("Unexpected instruction in init block : " + str(ig.instruction))
if m.group(1) == '':
[lhs,rhs] = m.group(2).split("=")
lhs = lhs.strip()
rhs = rhs.strip()
declarations[lhs] = rhs
else:
arg = m.group(2).replace(","," ")
names = [vn.strip() for vn in arg.split()]
if m.group(1).strip() == 'var':
dest = var_names
elif m.group(1).strip() == 'varexo':
dest = varexo_names
elif m.group(1).strip() == 'parameters':
dest = parameters_names
for n in names:
if not n in dest:
dest.append(n)
else:
raise Exception("symbol %s has already been defined".format(n))
except Exception as e:
raise Exception('Init block could not be read : ' + str(e) )
# the following instruction set the variables "variables","shocks","parameters"
variables = []
for vn in var_names:
v = Variable(vn)
variables.append(v)
shocks = []
for vn in varexo_names:
s = Shock(vn)
shocks.append(s)
parameters = []
for vn in parameters_names:
p = Parameter(vn)
parameters.append(p)
parse_dict = dict()
for v in variables + shocks + parameters:
parse_dict[v.name] = v
special_symbols = [sympy.exp,sympy.log,sympy.sin,sympy.cos, sympy.atan, sympy.tan]
for s in special_symbols:
parse_dict[str(s)] = s
parse_dict['sqrt'] = sympy.sqrt
# Read parameters values
parameters_values = {}
for p in declarations:
try:
rhs = eval(declarations[p], parse_dict)
except Exception as e:
Exception("Impossible to evaluate parameter value : " + str(e))
try:
lhs = eval(p,parse_dict)
except Exception as e:
# here we could declare p
raise e
parameters_values[lhs] = rhs
# Now we read the model block
model_tags = model_block[0].tags
equations = []
for ig in model_block[1:-1]:
if ig.instruction != '':
teq = ig.instruction.replace('^',"**")
if '=' in teq:
teqlhs,teqrhs = teq.split("=")
else:
teqlhs = teq
teqrhs = '0'
eqlhs = eval(teqlhs, parse_dict)
eqrhs = eval(teqrhs, parse_dict)
eq = Equation(eqlhs,eqrhs)
eq.tags.update(ig.tags)
# if eq.tags.has_key('name'):
# eq.tags[] = ig.tags['name']
equations.append(eq)
# Now we read the initval block
init_values = {}
if initval_block != None:
for ig in initval_block[1:-1]:
if len(ig.instruction.strip()) >0:
try:
[lhs,rhs] = ig.instruction.split("=")
except Exception as e:
print(ig.instruction)
raise e
init_values[eval(lhs,parse_dict)] = eval(rhs,parse_dict)
# Now we read the endval block
# I don't really care about the endval block !
end_values = {}
if endval_block != None:
for ig in endval_block[1:-1]:
[lhs,rhs] = ig.instruction.split("=")
end_values[eval(lhs)] = eval(rhs)
# Now we read the shocks block
covariances = None
if shocks_block != None:
covariances = sympy.zeros(len(shocks))
regex1 = re.compile("var (.*?),(.*?)=(.*)|var (.*?)=(.*)")
for ig in shocks_block[1:-1]:
m = regex1.match(ig.instruction)
if not m:
raise Exception("unrecognized instruction in block shocks : " + str(ig.instruction))
if m.group(1) != None:
varname1 = m.group(1).strip()
varname2 = m.group(2).strip()
value = m.group(3).strip().replace("^","**")
elif m.group(4) != None:
varname1 = m.group(4).strip()
varname2 = varname1
value = m.group(5).strip().replace("^","**")
i = varexo_names.index(varname1)
j = varexo_names.index(varname2)
covariances[i,j] = eval(value,parse_dict)
covariances[j,i] = eval(value,parse_dict)
calibration = {}
calibration.update(parameters_values)
calibration.update(init_values)
symbols = {'variables': variables, 'shocks': shocks, 'parameters': parameters}
from trash.dolo.symbolic.model import SModel
model = SModel({'dynare_block': equations}, symbols, calibration, covariances)
return model
2
Example 35
Project: carml Source File: downloadbundle.py
@defer.inlineCallbacks
def run(self, options, mainoptions, connection):
# NOTE the middle cert changed on April 10 or thereabouts;
# still need to confirm this is legitimate?
chain = [ssl.Certificate.loadPEM(pkg_resources.resource_string('carml', 'keys/torproject.pem')),
ssl.Certificate.loadPEM(pkg_resources.resource_string('carml', 'keys/digicert-sha2.pem')),
ssl.Certificate.loadPEM(pkg_resources.resource_string('carml', 'keys/digicert-root-ca.pem')),
]
cf = VerifyCertChainContextFactory(chain)
error_wrapper = None
if options['use-clearnet']:
print(util.colors.red('WARNING') + ': downloading over plain Internet (not via Tor).')
agent = Agent(reactor, contextFactory=cf)
else:
try:
import txsocksx.http
conn = "tcp:127.0.0.1:9050"
tor_ep = endpoints.clientFromString(reactor, conn)
agent = txsocksx.http.SOCKS5Agent(reactor,
proxyEndpoint=tor_ep,
contextFactory=cf)
def nicer_error(fail):
if fail.trap(error.ConnectError):
m = fail.getErrorMessage()
raise RuntimeError("Couldn't contact Tor on SOCKS5 (via \"%s\"): %s" % (conn, m))
return fail
error_wrapper = nicer_error
except ImportError:
raise RuntimeError('You need "txsocksx" installed to download via Tor.')
uri = 'https://www.torproject.org/projects/torbrowser/RecommendedTBBVersions'
data = StringIO()
print('Getting recommended versions from "%s".' % uri)
d = download(agent, uri, data)
def ssl_errors(fail):
if hasattr(fail.value, 'reasons'):
msg = ''
for r in fail.value.reasons:
msg += str(r.value.args[-1])
raise RuntimeError(msg)
return fail
d.addErrback(ssl_errors)
if error_wrapper is not None:
d.addErrback(error_wrapper)
yield d
# valid platforms from check.torproject.org can be one of:
# 'Linux', 'MacOS' or 'Windows'
plat = platform.system().lower()
arch = platform.uname()[-2]
plat_to_tor = dict(linux='Linux', darwin='MacOS', windows='Win')
if plat not in plat_to_tor:
print('Unknown platform "%s".' % plat)
raise RuntimeError('Unknown platform "%s".' % plat)
tor_plat = plat_to_tor[plat]
try:
versions = json.loads(data.getvalue())
except:
print('Error getting versions; invalid JSON:')
print(data.getvalue())
raise RuntimeError('Invalid JSON:\n%s' % data.getvalue())
alpha_re = re.compile(r'[0-9]*.[0-9]*a[0-9]-(Windows|MacOS|Linux)')
beta_re = re.compile(r'[0-9]*.[0-9]*b[0-9]-(Windows|MacOS|Linux)')
hardened_re = re.compile(r'(.*)-hardened-(.*)')
print(util.wrap(', '.join(versions), 60, ' '))
alphas = filter(lambda x: alpha_re.match(x), versions)
betas = filter(lambda x: beta_re.match(x), versions)
# the 'hardened' browser names don't follow the pattern of the
# others; for now, just ignoring them... (XXX FIXME)
hardened = filter(lambda x: hardened_re.match(x), versions)
others = set(versions).difference(alphas, betas, hardened)
if options['alpha']:
versions = alphas
elif options['beta']:
versions = betas
else:
versions = others
if alphas:
print(util.colors.yellow("Note: there are alpha versions available; use --alpha to download."))
if betas:
print(util.colors.yellow("Note: there are beta versions available; use --beta to download."))
if hardened:
print(util.colors.yellow("Note: there are hardened versions available but we don't support downloading them yet."))
target_version = None
for v in versions:
if v.endswith(tor_plat):
target_version = v[:v.rfind('-')]
if target_version is None:
print("Can't find a version to download")
print(" My platform is: %s (%s)" % (plat, plat_to_tor[plat]))
print(" Potential versions are: %s" % ', '.join(versions))
if options['beta']:
print("(Try without --beta)")
elif options['alpha']:
print("(Try without --alpha)")
raise RuntimeError("Nothing to download found.")
# download the signature, then browser-bundle (if they don't
# already exist locally).
sig_fname, dist_fname = get_download_urls(plat, arch, target_version)
for to_download in [sig_fname, dist_fname]:
uri = bytes('https://www.torproject.org/dist/torbrowser/%s/%s' % (target_version, to_download))
if os.path.exists(to_download):
print(util.colors.red(to_download) + ': already exists, so not downloading.')
else:
def cleanup(failure, fname):
print('removing "%s"...' % fname)
os.unlink(fname)
return failure
f = open(to_download, 'w')
print('Downloading "%s".' % to_download)
d = download(agent, uri, f)
d.addErrback(cleanup, to_download)
yield d
f.close()
# ensure the signature matches
if verify_signature(sig_fname, system_gpg=bool(options['system-keychain'])):
print(util.colors.green("Signature is good."))
if options['no-extract']:
print("Download and signature check of the Tor Browser Bundle")
print("has SUCCEEDED.\n")
print("It is here: %s\n" % os.path.realpath(dist_fname))
extraction_instructions(dist_fname)
print("and then:")
else:
try:
extract_7zip(dist_fname)
print("Tor Browser Bundle downloaded and extracted.")
except ImportError:
msg = 'You need "backports.lzma" installed to do 7zip extraction.'
print(util.colors.red('Error: ') + msg, isError=True)
extraction_instructions(dist_fname)
print("To run:")
# running instructions
lang = dist_fname[-12:-7]
tbb_path = './tor-browser_%s/Browser/start-tor-browser' % lang
if options['no-launch']:
print("To run: %s" % tbb_path)
else:
print("running: %s" % tbb_path)
os.execl(tbb_path, tbb_path)
else:
print(util.colors.bold('Deleting tarball; signature verification failed.'))
os.unlink(dist_fname)
print('...however signature file is being kept for reference (%s).' % sig_fname)
2
Example 36
Project: pyxform Source File: xls2json.py
def workbook_to_json(
workbook_dict, form_name=None,
default_language=u"default", warnings=None):
"""
workbook_dict -- nested dictionaries representing a spreadsheet.
should be similar to those returned by xls_to_dict
form_name -- The spreadsheet's filename
default_language -- default_language does two things:
1. In the xform the default language is the language reverted to when
there is no translation available for some itext element. Because
of this every itext element must have a default language translation.
2. In the workbook if media/labels/hints that do not have a
language suffix will be treated as though their suffix is the
default language.
If the default language is used as a suffix for media/labels/hints,
then the suffixless version will be overwritten.
warnings -- an optional list which warnings will be appended to
returns a nested dictionary equivalent to the format specified in the
json form spec.
"""
# ensure required headers are present
if warnings is None:
warnings = []
is_valid = False
for row in workbook_dict.get('survey', []):
is_valid = 'type' in row
if is_valid:
break
if not is_valid:
raise PyXFormError(
u"The survey sheet is either empty or missing important "
u"column headers.")
row_format_string = '[row : %s]'
# Make sure the passed in vars are unicode
form_name = unicode(form_name)
default_language = unicode(default_language)
# We check for double columns to determine whether to use them
# or single colons to delimit grouped headers.
# Single colons are bad because they conflict with with the xform namespace
# syntax (i.e. jr:constraintMsg),
# so we only use them if we have to for backwards compatibility.
use_double_colons = has_double_colon(workbook_dict)
# Break the spreadsheet dict into easier to access objects
# (settings, choices, survey_sheet):
# ########## Settings sheet ##########
settings_sheet = dealias_and_group_headers(
workbook_dict.get(constants.SETTINGS, []),
aliases.settings_header, use_double_colons)
settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
default_language = settings.get(
constants.DEFAULT_LANGUAGE, default_language)
# add_none_option is a boolean that when true,
# indicates a none option should automatically be added to selects.
# It should probably be deprecated but I haven't checked yet.
if u"add_none_option" in settings:
settings[u"add_none_option"] = aliases.yes_no.get(
settings[u"add_none_option"], False)
# Here we create our json dict root with default settings:
id_string = settings.get(constants.ID_STRING, form_name)
sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
json_dict = {
constants.TYPE: constants.SURVEY,
constants.NAME: form_name,
constants.TITLE: id_string,
constants.ID_STRING: id_string,
constants.SMS_KEYWORD: sms_keyword,
constants.DEFAULT_LANGUAGE: default_language,
# By default the version is based on the date and time yyyymmddhh
# Leaving default version out for now since it might cause
# problems for formhub.
# constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
constants.CHILDREN: []
}
# Here the default settings are overridden by those in the settings sheet
json_dict.update(settings)
# ########## Choices sheet ##########
# Columns and "choices and columns" sheets are deprecated,
# but we combine them with the choices sheet for backwards-compatibility.
choices_and_columns_sheet = workbook_dict.get(
constants.CHOICES_AND_COLUMNS, {})
choices_and_columns_sheet = dealias_and_group_headers(
choices_and_columns_sheet, aliases.list_header,
use_double_colons, default_language)
columns_sheet = workbook_dict.get(constants.COLUMNS, [])
columns_sheet = dealias_and_group_headers(
columns_sheet, aliases.list_header,
use_double_colons, default_language)
choices_sheet = workbook_dict.get(constants.CHOICES, [])
choices_sheet = dealias_and_group_headers(
choices_sheet, aliases.list_header, use_double_colons,
default_language)
# ########## Cascading Select sheet ###########
cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
if len(cascading_choices):
if 'choices' in cascading_choices[0]:
choices_sheet = choices_sheet + cascading_choices[0]['choices']
combined_lists = group_dictionaries_by_key(
choices_and_columns_sheet + choices_sheet + columns_sheet,
constants.LIST_NAME)
choices = combined_lists
# Make sure all the options have the required properties:
warnedabout = set()
for list_name, options in choices.items():
for option in options:
if 'name' not in option:
info = "[list_name : " + list_name + ']'
raise PyXFormError("On the choices sheet there is "
"a option with no name. " + info)
if 'label' not in option:
info = "[list_name : " + list_name + ']'
warnings.append(
"On the choices sheet there is a option with no label. " +
info)
# chrislrobert's fix for a cryptic error message:
# see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
option_keys = list(option.keys())
for headername in option_keys:
# Using warnings and removing the bad columns
# instead of throwing errors because some forms
# use choices column headers for notes.
if ' ' in headername:
if headername not in warnedabout:
warnedabout.add(headername)
warnings.append("On the choices sheet there is " +
"a column (\"" +
headername +
"\") with an illegal header. " +
"Headers cannot include spaces.")
del option[headername]
elif headername == '':
warnings.append("On the choices sheet there is a value" +
" in a column with no header.")
del option[headername]
# ########## Survey sheet ###########
if constants.SURVEY not in workbook_dict:
raise PyXFormError(
"You must have a sheet named (case-sensitive): " +
constants.SURVEY)
survey_sheet = workbook_dict[constants.SURVEY]
# Process the headers:
clean_text_values_enabled = aliases.yes_no.get(
settings.get("clean_text_values", "true()"))
if clean_text_values_enabled:
survey_sheet = clean_text_values(survey_sheet)
survey_sheet = dealias_and_group_headers(
survey_sheet, aliases.survey_header,
use_double_colons, default_language)
survey_sheet = dealias_types(survey_sheet)
osm_sheet = workbook_dict.get(constants.OSM, [])
osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
# #################################
# Parse the survey sheet while generating a survey in our json format:
row_number = 1 # We start at 1 because the column header row is not
# included in the survey sheet (presumably).
# A stack is used to keep track of begin/end expressions
stack = [(None, json_dict.get(constants.CHILDREN))]
# If a group has a table-list appearance flag
# this will be set to the name of the list
table_list = None
# For efficiency we compile all the regular expressions
# that will be used to parse types:
end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
'|'.join(aliases.control.keys()) + r"))$")
begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
'|'.join(aliases.control.keys()) +
r"))( (over )?(?P<list_name>\S+))?$")
select_regexp = re.compile(
r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
r")) (?P<list_name>\S+)" +
"( (?P<specify_other>(or specify other|or_other|or other)))?$")
cascading_regexp = re.compile(
r"^(?P<cascading_command>(" +
'|'.join(aliases.cascading.keys()) +
r")) (?P<cascading_level>\S+)?$")
osm_regexp = re.compile(
r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) +
')) (?P<list_name>\S+)')
for row in survey_sheet:
row_number += 1
prev_control_type, parent_children_array = stack[-1]
# Disabled should probably be first
# so the attributes below can be disabled.
if u"disabled" in row:
warnings.append(
row_format_string % row_number +
" The 'disabled' column header is not part of the current" +
" spec. We recommend using relevant instead.")
disabled = row.pop(u"disabled")
if aliases.yes_no.get(disabled):
continue
# skip empty rows
if len(row) == 0:
continue
# Get question type
question_type = row.get(constants.TYPE)
if not question_type:
# if name and label are also missing,
# then its a comment row, and we skip it with warning
if not ((constants.NAME in row) or (constants.LABEL in row)):
warnings.append(
row_format_string % row_number +
" Row without name, text, or label is being skipped:\n" +
str(row))
continue
raise PyXFormError(
row_format_string % row_number +
" Question with no type.\n" + str(row))
if question_type == 'calculate':
calculation = row.get('bind', {}).get('calculate')
if not calculation:
raise PyXFormError(
row_format_string % row_number + " Missing calculation.")
# Check if the question is actually a setting specified
# on the survey sheet
settings_type = aliases.settings_header.get(question_type)
if settings_type:
json_dict[settings_type] = unicode(row.get(constants.NAME))
continue
# Try to parse question as a end control statement
# (i.e. end loop/repeat/group):
end_control_parse = end_control_regex.search(question_type)
if end_control_parse:
parse_dict = end_control_parse.groupdict()
if parse_dict.get("end") and "type" in parse_dict:
control_type = aliases.control[parse_dict["type"]]
if prev_control_type != control_type or len(stack) == 1:
raise PyXFormError(
row_format_string % row_number +
" Unmatched end statement. Previous control type: " +
str(prev_control_type) +
", Control type: " + str(control_type))
stack.pop()
table_list = None
continue
# Make sure the row has a valid name
if constants.NAME not in row:
if row['type'] == 'note':
# autogenerate names for notes without them
row['name'] = "generated_note_name_" + str(row_number)
# elif 'group' in row['type'].lower():
# # autogenerate names for groups without them
# row['name'] = "generated_group_name_" + str(row_number)
else:
raise PyXFormError(row_format_string % row_number +
" Question or group with no name.")
question_name = unicode(row[constants.NAME])
if not is_valid_xml_tag(question_name):
error_message = row_format_string % row_number
error_message += " Invalid question name [" + \
question_name.encode('utf-8') + "] "
error_message += "Names must begin with a letter, colon,"\
+ " or underscore."
error_message += "Subsequent characters can include numbers," \
+ " dashes, and periods."
raise PyXFormError(error_message)
if constants.LABEL not in row and \
row.get(constants.MEDIA) is None and \
question_type not in aliases.label_optional_types:
# TODO: Should there be a default label?
# Not sure if we should throw warnings for groups...
# Warnings can be ignored so I'm not too concerned
# about false positives.
warnings.append(
row_format_string % row_number +
" Question has no label: " + str(row))
# Try to parse question as begin control statement
# (i.e. begin loop/repeat/group):
begin_control_parse = begin_control_regex.search(question_type)
if begin_control_parse:
parse_dict = begin_control_parse.groupdict()
if parse_dict.get("begin") and "type" in parse_dict:
# Create a new json dict with children, and the proper type,
# and add it to parent_children_array in place of a question.
# parent_children_array will then be set to its children array
# (so following questions are nested under it)
# until an end command is encountered.
control_type = aliases.control[parse_dict["type"]]
new_json_dict = row.copy()
new_json_dict[constants.TYPE] = control_type
child_list = list()
new_json_dict[constants.CHILDREN] = child_list
if control_type is constants.LOOP:
if not parse_dict.get("list_name"):
# TODO: Perhaps warn and make repeat into a group?
raise PyXFormError(
row_format_string % row_number +
" Repeat loop without list name.")
list_name = parse_dict["list_name"]
if list_name not in choices:
raise PyXFormError(
row_format_string % row_number +
" List name not in columns sheet: " + list_name)
new_json_dict[constants.COLUMNS] = choices[list_name]
# Generate a new node for the jr:count column so
# xpath expressions can be used.
repeat_count_expression = new_json_dict.get(
'control', {}).get('jr:count')
if repeat_count_expression:
generated_node_name = new_json_dict['name'] + "_count"
parent_children_array.append({
"name": generated_node_name,
"bind": {
"readonly": "true()",
"calculate": repeat_count_expression,
},
"type": "calculate",
})
new_json_dict['control']['jr:count'] = \
"${" + generated_node_name + "}"
# Code to deal with table_list appearance flags
# (for groups of selects)
ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
if ctrl_ap == constants.TABLE_LIST:
table_list = True
new_json_dict[u"control"][u"appearance"] = u"field-list"
# Generate a note label element so hints and labels
# work as expected in table-lists.
# see https://github.com/modilabs/pyxform/issues/62
if 'label' in new_json_dict or 'hint' in new_json_dict:
generated_label_element = {
"type": "note",
"name":
"generated_table_list_label_" + str(row_number)
}
if 'label' in new_json_dict:
generated_label_element[constants.LABEL] = \
new_json_dict[constants.LABEL]
del new_json_dict[constants.LABEL]
if 'hint' in new_json_dict:
generated_label_element['hint'] = \
new_json_dict['hint']
del new_json_dict['hint']
child_list.append(generated_label_element)
if 'intent' in new_json_dict:
new_json_dict['control'] = \
new_json_dict.get(u"control", {})
new_json_dict['control']['intent'] = \
new_json_dict['intent']
parent_children_array.append(new_json_dict)
stack.append((control_type, child_list))
continue
# try to parse as a cascading select
cascading_parse = cascading_regexp.search(question_type)
if cascading_parse:
parse_dict = cascading_parse.groupdict()
if parse_dict.get("cascading_command"):
cascading_level = parse_dict["cascading_level"]
cascading_prefix = row.get(constants.NAME)
if not cascading_prefix:
raise PyXFormError(
row_format_string % row_number +
" Cascading select needs a name.")
# cascading_json = get_cascading_json(
# cascading_choices, cascading_prefix, cascading_level)
if len(cascading_choices) <= 0 or \
'questions' not in cascading_choices[0]:
raise PyXFormError(
"Found a cascading_select " + cascading_level +
", but could not find " + cascading_level +
"in cascades sheet.")
cascading_json = cascading_choices[0]['questions']
json_dict['choices'] = choices
include_bindings = False
if 'bind' in row:
include_bindings = True
for cq in cascading_json:
# include bindings
if include_bindings:
cq['bind'] = row['bind']
def replace_prefix(d, prefix):
for k, v in d.items():
if isinstance(v, basestring):
d[k] = v.replace('$PREFIX$', prefix)
elif isinstance(v, dict):
d[k] = replace_prefix(v, prefix)
elif isinstance(v, list):
d[k] = map(
lambda x: replace_prefix(x, prefix), v)
return d
parent_children_array.append(
replace_prefix(cq, cascading_prefix))
continue # so the row isn't put in as is
# Try to parse question as a select:
select_parse = select_regexp.search(question_type)
if select_parse:
parse_dict = select_parse.groupdict()
if parse_dict.get("select_command"):
select_type = aliases.select[parse_dict["select_command"]]
if select_type == 'select one external' \
and 'choice_filter' not in row:
warnings.append(
row_format_string % row_number +
u" select one external is only meant for"
u" filtered selects.")
select_type = aliases.select['select_one']
list_name = parse_dict["list_name"]
list_file_name, file_extension = os.path.splitext(list_name)
if list_name not in choices \
and select_type != 'select one external' \
and file_extension not in ['.csv', '.xml']:
if not choices:
raise PyXFormError(
u"There should be a choices sheet in this xlsform."
u" Please ensure that the choices sheet name is "
u"all in small caps and has columns 'list name', "
u"'name', and 'label' (or aliased column names).")
raise PyXFormError(
row_format_string % row_number +
" List name not in choices sheet: " + list_name)
# Validate select_multiple choice names by making sure
# they have no spaces (will cause errors in exports).
if select_type == constants.SELECT_ALL_THAT_APPLY \
and file_extension not in ['.csv', '.xml']:
for choice in choices[list_name]:
if ' ' in choice[constants.NAME]:
raise PyXFormError(
"Choice names with spaces cannot be added "
"to multiple choice selects. See [" +
choice[constants.NAME] + "] in [" +
list_name + "]")
specify_other_question = None
if parse_dict.get("specify_other") is not None:
select_type += u" or specify other"
# With this code we no longer need to handle or_other
# questions in survey builder.
# However, it depends on being able to use choice filters
# and xpath expressions that return empty sets.
# choices[list_name].append(
# {
# 'name': 'other',
# 'label': {default_language : 'Other'},
# 'orOther': 'true',
# })
# or_other_xpath = 'isNull(orOther)'
# if 'choice_filter' in row:
# row['choice_filter'] += ' or ' + or_other_xpath
# else:
# row['choice_filter'] = or_other_xpath
# specify_other_question = \
# {
# 'type':'text',
# 'name': row['name'] + '_specify_other',
# 'label':
# 'Specify Other for:\n"' + row['label'] + '"',
# 'bind' : {'relevant':
# "selected(../%s, 'other')" % row['name']},
# }
new_json_dict = row.copy()
new_json_dict[constants.TYPE] = select_type
if row.get('choice_filter'):
if select_type == 'select one external':
new_json_dict['query'] = list_name
else:
new_json_dict['itemset'] = list_name
json_dict['choices'] = choices
elif file_extension in ['.csv', '.xml']:
new_json_dict['itemset'] = list_name
else:
new_json_dict[constants.CHOICES] = choices[list_name]
# Code to deal with table_list appearance flags
# (for groups of selects)
if table_list is not None:
# Then this row is the first select in a table list
if not isinstance(table_list, basestring):
table_list = list_name
table_list_header = {
constants.TYPE: select_type,
constants.NAME:
"reserved_name_for_field_list_labels_" +
str(row_number),
# Adding row number for uniqueness # noqa
constants.CONTROL: {u"appearance": u"label"},
constants.CHOICES: choices[list_name],
# Do we care about filtered selects in table lists?
# 'itemset' : list_name,
}
parent_children_array.append(table_list_header)
if table_list != list_name:
error_message = row_format_string % row_number
error_message += " Badly formatted table list," \
" list names don't match: " + \
table_list + " vs. " + list_name
raise PyXFormError(error_message)
control = new_json_dict[u"control"] = \
new_json_dict.get(u"control", {})
control[u"appearance"] = "list-nolabel"
parent_children_array.append(new_json_dict)
if specify_other_question:
parent_children_array.append(specify_other_question)
continue
# Try to parse question as osm:
osm_parse = osm_regexp.search(question_type)
if osm_parse:
parse_dict = osm_parse.groupdict()
new_dict = row.copy()
new_dict['type'] = constants.OSM
if parse_dict.get('list_name') is not None:
tags = osm_tags.get(parse_dict.get('list_name'))
for tag in tags:
if osm_tags.get(tag.get('name')):
tag['choices'] = osm_tags.get(tag.get('name'))
new_dict['tags'] = tags
parent_children_array.append(new_dict)
continue
# TODO: Consider adding some question_type validation here.
# Put the row in the json dict as is:
parent_children_array.append(row)
if len(stack) != 1:
raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))
if settings.get('flat', False):
# print "Generating flattened instance..."
add_flat_annotations(stack[0][1])
meta_children = []
if aliases.yes_no.get(settings.get("omit_instanceID")):
if settings.get("public_key"):
raise PyXFormError(
"Cannot omit instanceID, it is required for encryption.")
else:
# Automatically add an instanceID element:
meta_children.append({
"name": "instanceID",
"bind": {
"readonly": "true()",
"calculate": settings.get(
"instance_id", "concat('uuid:', uuid())"),
},
"type": "calculate",
})
if 'instance_name' in settings:
# Automatically add an instanceName element:
meta_children.append({
"name": "instanceName",
"bind": {
"calculate": settings['instance_name']
},
"type": "calculate",
})
if len(meta_children) > 0:
meta_element = \
{
"name": "meta",
"type": "group",
"control": {
"bodyless": True
},
"children": meta_children
}
noop, survey_children_array = stack[0]
survey_children_array.append(meta_element)
# print_pyobj_to_json(json_dict)
return json_dict
2
Example 37
Project: CVE-Scan Source File: TermDisplay.py
@classmethod
def start(self, scan=None):
systems = scan['systems'] if scan and 'systems' in scan else None
colors = {'vulnerable': ('red', 'black', False)}
screen = specter.Specter(markupSet=colors)
# Functions
def product(banner):
if banner:
r=make_dict(banner)
return r['product']
else:
return 'Unknown'
def getSystemInfo(s):
cpes =s['cpes'] if 'cpes' in s else ['Not Detected']
mac =s['mac'] if s['mac'] else 'Unknown'
marked ='vulnerable' if 'cves' in cpes[0] else 'normal'
hosts =s['hostnames'] if 'hostnames' in s else ['None']
services=s['services'] if 'services' in s else [_NoServ]
serv =services[0]
cont=[ {'tn':'i',
'tc':[{'t': 'IP', 'm': 'title'},
{'t': s['ip'] } ]},
{'tn':'i',
'tc':[{'t': 'MAC', 'm': 'title'},
{'t': mac } ]},
{'tn':'i',
'tc':[{'t': 'Status', 'm': 'title'},
{'t': s['status'] } ]},
{'tn':'i',
'tc':[{'t': 'CPEs', 'm': 'title'},
{'t': cpes[0]['cpe'], 'm': marked } ]}]
for cpe in cpes[1:]:
marked='vulnerable' if 'cves' in cpe else 'normal'
cont.append({'tn':'i',
'tc':[{'t': ' ' },
{'t': cpes[0]['cpe'], 'm':marked } ]})
cont.append( {'tn':'i',
'tc':[{'t': 'Vendor', 'm': 'title'},
{'t': s['vendor'] } ]})
cont.append( {'tn':'i',
'tc':[{'t': 'Hostnames', 'm': 'title'},
{'t': hosts[0] } ]})
for host in hosts[1:]:
cont.append({'tn':'i',
'tc':[{'t': ' ', 'm': 'title'},
{'t': host } ]})
cont.append( {'tn':'i',
'tc':[{'t': 'Distance', 'm': 'title'},
{'t': s['distance'] } ]})
ser='%s (%s/%s) is %s'%(serv['name'],serv['port'],serv['protocol'],serv['state'])
marked='vulnerable' if len(serv['cves'])>0 else 'normal'
cont.extend( [{'tn':'i',
'tc':[{'t': 'Services', 'm': 'title'},
{'t': ser, 'm':marked } ]},
{'tn':'i',
'tc':[{'t': ' ', 'm': 'title'},
{'t': ' > %s'%product(serv['banner']),
'm': marked } ]},
{'tn':'i',
'tc':[{'t': ' ', 'm': 'title'},
{'t': ' > %s'%serv['cpe'],
'm':marked } ]} ])
for serv in services[1:]:
marked='vulnerable' if len(serv['cves'])>0 else 'normal'
ser='%s (%s/%s) is %s'%(serv['name'],serv['port'],serv['protocol'],serv['state'])
cont.extend([{'tn':'i',
'tc':[{'t': ' ' },
{'t': ser, 'm':marked } ]},
{'tn':'i',
'tc':[{'t': ' ' },
{'t': ' > %s'%product(serv['banner']),
'm':marked } ]},
{'tn':'i',
'tc':[{'t': ' ' },
{'t': ' > %s'%serv['cpe'],
'm':marked } ]} ])
return cont
def cvesForcpe(line,sys,args=None):
if type(line) == dict:
if 'tc' in line: line = line['tc'][1]
if 't' in line: line = line['t']
if type(line) is not str: line = str(line)
# Clean out collumns
if line.startswith("CPEs"): line = line[4:].strip()
if line.startswith("Services"): line = line[8:].strip()
# reset variables
service=None
cves=None
# make sure we're dealing with strings
line=str(line)
# handle args if present, else use current line
if args:
line=str(args[0])
else:
if line.strip().startswith('> '): # We're either working by cpe or product
line=line.strip('> ')
elif line.startswith('cpe:'): # We're working by cpe
pass
else: # We're working by port
line=rePortLine.search(line)
if line: line=line.group()[1:-5]
else: return
# see if we match on port
if rePort.match(line):
for s in sys['services']:
if str(s['port'])==line:
service=s;
cves=s['cves']
break
# see if we match on cpe
elif reCPE.match(line):
for s in sys['services']:
if str(s['cpe'])==line:
service=s;
cves=s['cves']
break
if not service:
for c in sys['cpes']:
if c['cpe']==line:cves=c['cves'];break
# see if we match on product name
else:
for s in sys['services']:
if product(s['banner'])==line:
service=s;
cves=s['cves']
break
if cves:
cveList(cves,service)
# Windows
def splash():
x, y = screen.getMaxXY()
if y<10:raise("Please make sure your terminal has at least 10 rows")
screen.splash(tSplash)
def help():
screen.scroll(tHelp, footer=tDefFoot, nav=extendedNav)
def info():
text=[{'t': "Scan", 'm': "title"},
{'t': " Date: %s"%fromEpoch(scan['scan']['time'])},
{'t': " Type: %s"%scan['scan']['type']},
{'t': "Enhancement", 'm': "title"},
{'t': " Date: %s"%fromEpoch(scan['enhanced']['time'])}]
screen.scroll(text, footer=tDefFoot, nav=extendedNav)
def cveList(cves, service=None):
navSet=copy.deepcopy(extendedNav)
navSet['enter'] = ['o']
text=[]
for cve in cves:
C = cve[_I][_IC][0] if _I in cve and _IC in cve[_I] else "?"
I = cve[_I][_II][0] if _I in cve and _II in cve[_I] else "?"
A = cve[_I][_IA][0] if _I in cve and _IA in cve[_I] else "?"
V = cve[_A][_AV][0] if _A in cve and _AV in cve[_A] else "?"
Co= cve[_A][_AC][0] if _A in cve and _AC in cve[_A] else "?"
text.append({'t': "%s - %s%s%s - %s %s"%(cve['id'], C, I, A, V, Co),
'a': cveDetails, 'p': cve})
screen.scroll(text,header=tServiceHead,footer=tServiceFoot,
cursor=True, nav=navSet)
def cveDetails(cve):
maxx, maxy = screen.getMaxXY()
summary=splitByLength(cve['summary'],maxx-18)
text= ["CVE id %s"%cve['id']]
text.append("Summary %s"%summary[0])
for i, x in enumerate(summary[1:]):
text.append(" %s"%summary[i+1])
text.append(" ")
text.append("CVSS Base: %s"%(cve['cvss']))
text.append(" Exploitability: %s"%(cve['exploitCVSS'] if 'exploitCVSS' in cve else ' -'))
text.append(" Impact: %s"%(cve['impactCVSS'] if 'impactCVSS' in cve else ' -'))
text.append(" ")
text.append("Access Vector: %s"%cve['access']['vector'])
text.append(" Complexity: %s"%cve['access']['complexity'])
text.append(" Authentication: %s"%cve['access']['authentication'])
text.append(" ")
text.append("Impact Confidentiality: %s"%cve['impact']['confidentiality'])
text.append(" Integrity: %s"%cve['impact']['integrity'])
text.append(" Availability: %s"%cve['impact']['availability'])
screen.scroll(text, footer=tDefFoot, nav=extendedNav)
def home():
index = 0
lineNr = 0
while True:
system = systems[index]
content = getSystemInfo(system)
foot=copy.deepcopy(tNavFoot)
foot[1]=foot[1]%(index+1,len(systems))
key, lineNr = screen.scroll(content, footer=foot, cursor=lineNr,
blocking=False, functions=keyFuncts,
nav=sysNav)
if key in ['n']:
index+=1
if index>=len(systems):index=0
elif key in ['p']:
index-=1
if index<0:index=len(systems)-1
elif key in ['o']:
cvesForcpe(content[lineNr], system)
elif key in ['c']:
parts = screen.userInput("Enter your command").lower().split()
if parts:
command = parts[0]
args = parts[1:]
if command in ['h', 'help']: help()
elif command in ['c', 'cve' ]:
line = content[lineNr]
if type(line) == dict and 't' in line: line=line['t']
if type(line) == str: cvesForcpe(line,system,args)
elif command in ['i', 'info']: info()
else: screen.popup(tInvalidCommand)
elif key in ['q', chr(specter.KEY_ESC)]:
break
tSplash=[{'t': " _____ _ _ _____ _____ ", 'm': 'header'},
{'t': "/ __ \ | | | ___| / ___| ", 'm': 'header'},
{'t': "| / \/ | | | |__ _____\ `--. ___ __ _ _ __ ", 'm': 'header'},
{'t': "| | | | | | __|_____|`--. \/ __/ _` | '_ \ ", 'm': 'header'},
{'t': "| \__/\ \_/ / |___ /\__/ / (_| (_| | | | |", 'm': 'header'},
{'t': " \____/\___/\____/ \____/ \___\__,_|_| |_|", 'm': 'header'},
{'t': " (c) NorthernSec ", 'm': 'header'},
{'t': " [Press the any key] ", 'm': 'title'}]
tNavFoot=["(u)p | (n)ext | (p)revious | (q)uit |",
"(d)own | (j)ump to | (c)ommand | (o)pen | [%s/%s]"]
tServiceHead=['CVE - CIA - Vector Complexity']
tServiceFoot=['Vector: N(etwork) - A(djecent network) - L(ocal)',
'CIA Impact: L(ow) - M(edium) - H(igh)',
'Press Enter or o for more info']
tHelp=[{'t': '----------', 'm': 'title'},
{'t': '| HELP |', 'm': 'title'},
{'t': '----------', 'm': 'title'},
{'t': ' '},
{'t':'Navigating','m': 'title'},
{'t':' * You can navigate through the scanned systems with p and n, or the left and right arrow key'},
{'t':' * You can scroll through the current system with u and d, or the up and down arrow key'},
{'t':' * You can jump directly to a scanned system by entering the page number with j, and entering the page number.'},
{'t':' '},
{'t':'Commands','m': 'title'},
{'t':'By pressing c, you can enter commands:'},
{'t':' h/help - Displays this menu'},
{'t':' c/cve [port/cpe/banner] - If found, displays CVEs of the current line, or service with the parameter'},
{'t':' i/info - Display info of the scan'}]
tDefFoot=[" - press q or ESC to return to the previous page-"]
tInvalidCommand = [{'t': 'Invalid command', 'm': 'title'}]
extendedNav={'esc': ["q"], 'up':["u"], 'down':["d"]}
sysNav={'esc': ["q"], 'up':["u"], 'down':["d"], 'left':["p"], 'right':["n"]}
keyFuncts={'i': info, 'h': help}
rePort=re.compile('^([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$')
reCPE=re.compile('^((%s|%s)[aoh]:.*)$'%(re.escape('cpe:/'),re.escape('cpe:2.3:')))
rePortLine=re.compile('\\([0-9]*/(tcp|udp)\\)')
_I ="impact"
_IC="confidentiality"
_II="integrity"
_IA="availability"
_A ="access"
_AV="vector"
_AC="complexity"
_NoServ="No services found"
try:
splash()
home()
screen.stop()
except Exception as ex:
screen.stop()
raise(ex)
2
Example 38
Project: pycoin Source File: tx.py
def parse_context(args, parser):
# defaults
txs = []
spendables = []
payables = []
key_iters = []
TX_ID_RE = re.compile(r"^[0-9a-fA-F]{64}$")
# there are a few warnings we might optionally print out, but only if
# they are relevant. We don't want to print them out multiple times, so we
# collect them here and print them at the end if they ever kick in.
warning_tx_cache = None
warning_tx_for_tx_hash = None
warning_spendables = None
if args.private_key_file:
wif_re = re.compile(r"[1-9a-km-zA-LMNP-Z]{51,111}")
# address_re = re.compile(r"[1-9a-kmnp-zA-KMNP-Z]{27-31}")
for f in args.private_key_file:
if f.name.endswith(".gpg"):
gpg_args = ["gpg", "-d"]
if args.gpg_argument:
gpg_args.extend(args.gpg_argument.split())
gpg_args.append(f.name)
popen = subprocess.Popen(gpg_args, stdout=subprocess.PIPE)
f = popen.stdout
for line in f.readlines():
# decode
if isinstance(line, bytes):
line = line.decode("utf8")
# look for WIFs
possible_keys = wif_re.findall(line)
def make_key(x):
try:
return Key.from_text(x)
except Exception:
return None
keys = [make_key(x) for x in possible_keys]
for key in keys:
if key:
key_iters.append((k.wif() for k in key.subkeys("")))
# if len(keys) == 1 and key.hierarchical_wallet() is None:
# # we have exactly 1 WIF. Let's look for an address
# potential_addresses = address_re.findall(line)
# update p2sh_lookup
p2sh_lookup = {}
if args.pay_to_script:
for p2s in args.pay_to_script:
try:
script = h2b(p2s)
p2sh_lookup[hash160(script)] = script
except Exception:
print("warning: error parsing pay-to-script value %s" % p2s)
if args.pay_to_script_file:
hex_re = re.compile(r"[0-9a-fA-F]+")
for f in args.pay_to_script_file:
count = 0
for l in f:
try:
m = hex_re.search(l)
if m:
p2s = m.group(0)
script = h2b(p2s)
p2sh_lookup[hash160(script)] = script
count += 1
except Exception:
print("warning: error parsing pay-to-script file %s" % f.name)
if count == 0:
print("warning: no scripts found in %s" % f.name)
# we create the tx_db lazily
tx_db = None
for arg in args.argument:
# hex transaction id
if TX_ID_RE.match(arg):
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_tx_for_tx_hash = message_about_tx_for_tx_hash_env(args.network)
tx_db = get_tx_db(args.network)
tx = tx_db.get(h2b_rev(arg))
if not tx:
for m in [warning_tx_cache, warning_tx_for_tx_hash, warning_spendables]:
if m:
print("warning: %s" % m, file=sys.stderr)
parser.error("can't find Tx with id %s" % arg)
txs.append(tx)
continue
# hex transaction data
try:
tx = Tx.from_hex(arg)
txs.append(tx)
continue
except Exception:
pass
is_valid = is_address_valid(arg, allowable_netcodes=[args.network])
if is_valid:
payables.append((arg, 0))
continue
try:
key = Key.from_text(arg)
# TODO: check network
if key.wif() is None:
payables.append((key.address(), 0))
continue
# TODO: support paths to subkeys
key_iters.append((k.wif() for k in key.subkeys("")))
continue
except Exception:
pass
if os.path.exists(arg):
try:
with open(arg, "rb") as f:
if f.name.endswith("hex"):
f = io.BytesIO(codecs.getreader("hex_codec")(f).read())
tx = Tx.parse(f)
txs.append(tx)
try:
tx.parse_unspents(f)
except Exception as ex:
pass
continue
except Exception:
pass
parts = arg.split("/")
if len(parts) == 4:
# spendable
try:
spendables.append(Spendable.from_text(arg))
continue
except Exception:
pass
if len(parts) == 2 and is_address_valid(parts[0], allowable_netcodes=[args.network]):
try:
payables.append(parts)
continue
except ValueError:
pass
parser.error("can't parse %s" % arg)
if args.fetch_spendables:
warning_spendables = message_about_spendables_for_address_env(args.network)
for address in args.fetch_spendables:
spendables.extend(spendables_for_address(address))
for tx in txs:
if tx.missing_unspents() and args.augment:
if tx_db is None:
warning_tx_cache = message_about_tx_cache_env()
warning_tx_for_tx_hash = message_about_tx_for_tx_hash_env(args.network)
tx_db = get_tx_db(args.network)
tx.unspents_from_db(tx_db, ignore_missing=True)
return (txs, spendables, payables, key_iters, p2sh_lookup, tx_db, warning_tx_cache,
warning_tx_for_tx_hash, warning_spendables)
2
Example 39
def select():
import re
db = get_database(request)
dbname = request.args[0]
try:
is_imap = db._uri.startswith("imap://")
except (KeyError, AttributeError, TypeError):
is_imap = False
regex = re.compile('(?P<table>\w+)\.(?P<field>\w+)=(?P<value>\d+)')
if len(request.args) > 1 and hasattr(db[request.args[1]], '_primarykey'):
regex = re.compile('(?P<table>\w+)\.(?P<field>\w+)=(?P<value>.+)')
if request.vars.query:
match = regex.match(request.vars.query)
if match:
request.vars.query = '%s.%s.%s==%s' % (request.args[0],
match.group('table'), match.group('field'),
match.group('value'))
else:
request.vars.query = session.last_query
query = get_query(request)
if request.vars.start:
start = int(request.vars.start)
else:
start = 0
nrows = 0
step = 100
fields = []
if is_imap:
step = 3
stop = start + step
table = None
rows = []
orderby = request.vars.orderby
if orderby:
orderby = dbname + '.' + orderby
if orderby == session.last_orderby:
if orderby[0] == '~':
orderby = orderby[1:]
else:
orderby = '~' + orderby
session.last_orderby = orderby
session.last_query = request.vars.query
form = FORM(DIV(DIV(INPUT(_style='width:400px',
_name='query',
_id='query_inp',
_value=request.vars.query or '',
requires=IS_NOT_EMPTY(
error_message=T("Cannot be empty"))),
LABEL("Query:", _for='query_inp'),
_class="input-field col offset-s3 s6"),
_class="row"),
DIV(DIV(INPUT(_name='update_check',
_id='update_inp',
_type='checkbox',
value=False),
LABEL("Update", _for='update_inp'),
_class="input-field col offset-s3 s6"),
_class="row"),
DIV(DIV(INPUT(_style='width:400px',
_name='update_fields',
_id='update_field_inp',
_value=request.vars.update_fields or ''),
LABEL("Update Fields:", _for='update_fields_inp'),
_class="input-field col offset-s3 s6"),
_class="row"),
DIV(DIV(INPUT(_name='delete_check',
_class='delete',
_id='delete_inp',
_type='checkbox',
value=False),
LABEL("Delete:", _for='delete_inp'),
_class="input-field col offset-s3 s6"),
_class="row"),
DIV(DIV(INPUT(_type='submit',
_value=T('submit')),
_class="input-field col offset-s3 s6"),
_class="row"),
_action=URL(r=request, args=request.args),
_class="row col s12")
tb = None
if form.accepts(request.vars, formname=None):
regex = re.compile(request.args[0] + '\.(?P<table>\w+)\..+')
match = regex.match(form.vars.query.strip())
if match:
table = match.group('table')
try:
nrows = db(query, ignore_common_filters=True).count()
if form.vars.update_check and form.vars.update_fields:
db(query, ignore_common_filters=True).update(
**eval_in_global_env('dict(%s)' % form.vars.update_fields))
response.flash = T('%s %%{row} updated', nrows)
elif form.vars.delete_check:
db(query, ignore_common_filters=True).delete()
response.flash = T('%s %%{row} deleted', nrows)
nrows = db(query, ignore_common_filters=True).count()
if is_imap:
fields = [db[table][name] for name in
("id", "uid", "created", "to",
"sender", "subject")]
if orderby:
rows = db(query, ignore_common_filters=True).select(
*fields, limitby=(start, stop),
orderby=eval_in_global_env(orderby))
else:
rows = db(query, ignore_common_filters=True).select(
*fields, limitby=(start, stop))
except Exception, e:
import traceback
tb = traceback.format_exc()
(rows, nrows) = ([], 0)
response.flash = DIV(T('Invalid Query'), PRE(str(e)))
# begin handle upload csv
csv_table = table or request.vars.table
if csv_table:
formcsv = FORM(DIV(str(T('or import from csv file')) + " ", _class="row"),
DIV(DIV(DIV(SPAN("File"),
INPUT(_type='file', _name='csvfile'),
_class="btn"),
DIV(INPUT(_class="file-path", _type="text"),
_class="file-path-wrapper"),
_class="col offset-s4 s4 file-field input-field"),
_class="row"),
DIV(DIV(INPUT(_type='hidden', _value=csv_table, _name='table'),
INPUT(_type='submit', _value=T('import')),
_class="col offset-s4 s4"),
_class="row"),
_class="row center")
else:
formcsv = None
if formcsv and formcsv.process().accepted:
try:
import_csv(db[request.vars.table],
request.vars.csvfile.file)
response.flash = T('data uploaded')
except Exception, e:
response.flash = DIV(T('unable to parse csv file'), PRE(str(e)))
# end handle upload csv
return dict(
form=form,
table=table,
start=start,
stop=stop,
step=step,
nrows=nrows,
rows=rows,
query=request.vars.query,
formcsv=formcsv,
tb=tb
)
2
Example 40
Project: FanFicFare Source File: adapter_storiesonlinenet.py
Function: doextractchapterurlsandmetadata
Function: doextractchapterurlsandmetadata
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logger.debug("URL: "+url)
self.needToLogin = False
try:
data = self._fetchUrl(url+":i")
except urllib2.HTTPError, e:
if e.code in (404, 410):
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
elif e.code == 401:
self.needToLogin = True
data = ''
else:
raise e
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
try:
data = self._fetchUrl(url+":i",usecache=False)
except urllib2.HTTPError, e:
if e.code in (404, 410):
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
elif e.code == 401:
self.needToLogin = True
data = ''
else:
raise e
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
elif "Error! The story you're trying to access is being filtered by your choice of contents filtering." in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.")
elif "Error! Daily Limit Reached" in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! Daily Limit Reached")
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
#print data
# Now go hunting for all the meta data and the chapter list.
## Title
a = soup.find('h1')
self.story.setMetadata('title',stripHTML(a))
notice = soup.find('div', {'class' : 'notice'})
if notice:
self.story.setMetadata('notice',unicode(notice))
# Find authorid and URL from... author url.
for a in soup.findAll('a', href=re.compile(r"/a/\w+")):
self.story.addToList('authorId',a['href'].split('/')[2])
self.story.addToList('authorUrl','http://'+self.host+a['href'])
self.story.addToList('author',stripHTML(a).replace("'s Page",""))
# Find the chapters:
chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+(/.*)?$"))
if len(chapters) != 0:
for chapter in chapters:
# just in case there's tags, like <i> in chapter titles.
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']))
else:
self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/s/'+self.story.getMetadata('storyId')))
self.story.setMetadata('numChapters',len(self.chapterUrls))
# surprisingly, the detailed page does not give enough details, so go to author's page
page=0
i=0
while i == 0:
data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
asoup = self.make_soup(data)
a = asoup.findAll('td', {'class' : 'lc2'})
for lc2 in a:
if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))):
i=1
break
if a[len(a)-1] == lc2:
page=page+1
for cat in lc2.findAll('div', {'class' : 'typediv'}):
self.story.addToList('genre',cat.text)
# in lieu of word count.
self.story.setMetadata('size', lc2.findNext('td', {'class' : 'num'}).text)
score = lc2.findNext('th', {'class' : 'ynum'}).text
if score != '-':
self.story.setMetadata('score', score)
lc4 = lc2.findNext('td', {'class' : 'lc4'})
desc = lc4.contents[0]
try:
a = lc4.find('a', href=re.compile(r"/series/\d+/.*"))
# logger.debug("Looking for series - a='{0}'".format(a))
if a:
# if there's a number after the series name, series_contents is a two element list:
# [<a href="...">Title</a>, u' (2)']
series_contents = a.parent.contents
i = 0 if len(series_contents) == 1 else series_contents[1].strip(' ()')
seriesUrl = 'http://'+self.host+a['href']
self.story.setMetadata('seriesUrl',seriesUrl)
series_name = stripHTML(a)
# logger.debug("Series name= %s" % series_name)
series_soup = self.make_soup(self._fetchUrl(seriesUrl))
if series_soup:
# logger.debug("Retrieving Series - looking for name")
series_name = stripHTML(series_soup.find('span', {'id' : 'ptitle'}))
series_name = re.sub(r' . a series by.*$','',series_name)
# logger.debug("Series name: '%s'" % series_name)
self.setSeries(series_name, i)
desc = lc4.contents[2]
# Check if series is in a universe
if "/universes" in data:
universe_url = self.story.getList('authorUrl')[0] + "&type=uni"
universes_soup = self.make_soup(self._fetchUrl(universe_url) )
# logger.debug("Universe url='{0}'".format(universe_url))
if universes_soup:
universes = universes_soup.findAll('div', {'class' : 'ser-box'})
# logger.debug("Number of Universes: %d" % len(universes))
for universe in universes:
# logger.debug("universe.find('a')={0}".format(universe.find('a')))
# The universe id is in an "a" tag that has an id but nothing else. It is the first tag.
# The id is prefixed with the letter "u".
universe_id = universe.find('a')['id'][1:]
# logger.debug("universe_id='%s'" % universe_id)
universe_name = stripHTML(universe.find('div', {'class' : 'ser-name'})).partition(' ')[2]
# logger.debug("universe_name='%s'" % universe_name)
# If there is link to the story, we have the right universe
story_a = universe.find('a', href=re.compile('/s/'+self.story.getMetadata('storyId')))
if story_a:
# logger.debug("Story is in a series that is in a universe! The universe is '%s'" % universe_name)
self.story.setMetadata("universe", universe_name)
self.story.setMetadata('universeUrl','http://'+self.host+ '/library/universe.php?id=' + universe_id)
break
else:
logger.debug("No universe page")
except:
raise
pass
try:
a = lc4.find('a', href=re.compile(r"/universe/\d+/.*"))
# logger.debug("Looking for universe - a='{0}'".format(a))
if a:
self.story.setMetadata("universe",stripHTML(a))
desc = lc4.contents[2]
# Assumed only one universe, but it does have a URL--use universeHTML
universe_name = stripHTML(a)
universeUrl = 'http://'+self.host+a['href']
# logger.debug("Retrieving Universe - about to get page - universeUrl='{0}".format(universeUrl))
universe_soup = self.make_soup(self._fetchUrl(universeUrl))
logger.debug("Retrieving Universe - have page")
if universe_soup:
logger.debug("Retrieving Universe - looking for name")
universe_name = stripHTML(universe_soup.find('h1', {'id' : 'ptitle'}))
universe_name = re.sub(r' . A Universe from the Mind.*$','',universe_name)
# logger.debug("Universes name: '{0}'".format(universe_name))
self.story.setMetadata('universeUrl',universeUrl)
# logger.debug("Setting universe name: '{0}'".format(universe_name))
self.story.setMetadata('universe',universe_name)
if self.getConfig("universe_as_series"):
self.setSeries(universe_name, 0)
self.story.setMetadata('seriesUrl',universeUrl)
else:
logger.debug("Do not have a universe")
except:
raise
pass
self.setDescription('http://'+self.host+'/s/'+self.story.getMetadata('storyId'),desc)
for b in lc4.findAll('b'):
#logger.debug('Getting metadata: "%s"' % b)
label = b.text
if label in ['Posted:', 'Concluded:', 'Updated:']:
value = b.findNext('noscript').text
#logger.debug('Have a date field label: "%s", value: "%s"' % (label, value))
else:
value = b.nextSibling
#logger.debug('label: "%s", value: "%s"' % (label, value))
if 'Sex' in label:
self.story.setMetadata('rating', value)
if 'Tags' in label or 'Codes' in label:
for code in re.split(r'\s*,\s*', value.strip()):
self.story.addToList('sitetags',code)
if 'Posted' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
if 'Concluded' in label:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
# Some books have a cover in the index page.
# Samples are:
# http://storiesonline.net/s/11999
# http://storiesonline.net/s/10823
if get_cover:
# logger.debug("Looking for the cover image...")
cover_url = ""
img = soup.find('img')
if img:
cover_url=img['src']
# logger.debug("cover_url: %s"%cover_url)
if cover_url:
self.setCoverImage(url,cover_url)
status = lc4.find('span', {'class' : 'ab'})
if status != None:
if 'Incomplete and Inactive' in status.text:
self.story.setMetadata('status', 'Incomplete')
else:
self.story.setMetadata('status', 'In-Progress')
if "Last Activity" in status.text:
# date is passed as a timestamp and converted in JS.
value = status.findNext('noscript').text
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
else:
self.story.setMetadata('status', 'Completed')
0
Example 41
Project: auto-sklearn Source File: ensemble_builder.py
def main(self):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
last_hash = None
current_hash = None
dir_ensemble = os.path.join(self.backend.temporary_directory,
'.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(self.backend.temporary_directory,
'.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(self.backend.temporary_directory,
'.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
dir_ensemble_list_mtimes = []
self.logger.debug('Starting main loop with %f seconds and %d iterations '
'left.' % (self.limit - used_time, num_iteration))
while used_time < self.limit or (self.max_iterations > 0 and
self.max_iterations >= num_iteration):
num_iteration += 1
self.logger.debug('Time left: %f', self.limit - used_time)
self.logger.debug('Time last ensemble building: %f', time_iter)
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = self.backend.load_targets_ensemble()
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
self.logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if self.shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % self.seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % self.seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
# The ensemble dir can contain non-model files. We filter them and
# use the following list instead
dir_ensemble_model_files = []
for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
dir_ensemble_file = dir_ensemble_file[:-1]
if not dir_ensemble_file.endswith(".npy"):
self.logger.warning('Error loading file (not .npy): %s', dir_ensemble_file)
continue
dir_ensemble_model_files.append(dir_ensemble_file)
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_model_files) == 0:
self.logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_model_files) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
self.logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
with warnings.catch_warnings():
warnings.simplefilter('ignore')
# TODO restructure time management in the ensemble builder,
# what is the time of index_run actually needed for?
watch.start_task('index_run' + str(index_run))
watch.start_task('ensemble_iter_' + str(num_iteration))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
backup_num_runs = []
model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$')
if self.ensemble_nbest is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
# The indices of the model that are currently in our ensemble
indices_nbest = []
# The names of the models
model_names = []
model_names_to_scores = dict()
model_idx = 0
for model_name in dir_ensemble_model_files:
if model_name.endswith("/"):
model_name = model_name[:-1]
basename = os.path.basename(model_name)
try:
if self.precision is "16":
predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float16)
elif self.precision is "32":
predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float32)
elif self.precision is "64":
predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float64)
else:
predictions = np.load(os.path.join(dir_ensemble, basename))
score = calculate_score(targets_ensemble, predictions,
self.task_type, self.metric,
predictions.shape[1])
except Exception as e:
self.logger.warning('Error loading %s: %s - %s',
basename, type(e), e)
score = -1
model_names_to_scores[model_name] = score
match = model_and_automl_re.search(model_name)
automl_seed = int(match.group(1))
num_run = int(match.group(2))
if self.ensemble_nbest is not None:
if score <= 0.001:
self.logger.info('Model only predicts at random: ' +
model_name + ' has score: ' + str(score))
backup_num_runs.append((automl_seed, num_run))
# If we have less models in our ensemble than ensemble_nbest add
# the current model if it is better than random
elif len(scores_nbest) < self.ensemble_nbest:
scores_nbest.append(score)
indices_nbest.append(model_idx)
include_num_runs.append((automl_seed, num_run))
model_names.append(model_name)
else:
# Take the worst performing model in our ensemble so far
idx = np.argmin(np.array([scores_nbest]))
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
self.logger.info(
'Worst model in our ensemble: %s with score %f '
'will be replaced by model %s with score %f',
model_names[idx], scores_nbest[idx], model_name,
score)
# Exclude the old model
del scores_nbest[idx]
scores_nbest.append(score)
del include_num_runs[idx]
del indices_nbest[idx]
indices_nbest.append(model_idx)
include_num_runs.append((automl_seed, num_run))
del model_names[idx]
model_names.append(model_name)
# Otherwise exclude the current model from the ensemble
else:
# include_num_runs.append(True)
pass
else:
# Load all predictions that are better than random
if score <= 0.001:
# include_num_runs.append(True)
self.logger.info('Model only predicts at random: ' +
model_name + ' has score: ' +
str(score))
backup_num_runs.append((automl_seed, num_run))
else:
include_num_runs.append((automl_seed, num_run))
model_idx += 1
# If there is no model better than random guessing, we have to use
# all models which do random guessing
if len(include_num_runs) == 0:
include_num_runs = backup_num_runs
indices_to_model_names = dict()
indices_to_run_num = dict()
for i, model_name in enumerate(dir_ensemble_model_files):
match = model_and_automl_re.search(model_name)
automl_seed = int(match.group(1))
num_run = int(match.group(2))
if (automl_seed, num_run) in include_num_runs:
num_indices = len(indices_to_model_names)
indices_to_model_names[num_indices] = model_name
indices_to_run_num[num_indices] = (automl_seed, num_run)
try:
all_predictions_train, all_predictions_valid, all_predictions_test =\
self.get_all_predictions(dir_ensemble,
dir_ensemble_model_files,
dir_valid, dir_valid_list,
dir_test, dir_test_list,
include_num_runs,
model_and_automl_re,
self.precision)
except IOError:
self.logger.error('Could not load the predictions.')
continue
if len(include_num_runs) == 0:
self.logger.error('All models do just random guessing')
time.sleep(2)
continue
else:
ensemble = EnsembleSelection(ensemble_size=self.ensemble_size,
task_type=self.task_type,
metric=self.metric)
try:
ensemble.fit(all_predictions_train, targets_ensemble,
include_num_runs)
self.logger.info(ensemble)
except ValueError as e:
self.logger.error('Caught ValueError: ' + str(e))
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
except IndexError as e:
self.logger.error('Caught IndexError: ' + str(e))
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
except Exception as e:
self.logger.error('Caught error! %s', str(e))
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
# Output the score
self.logger.info('Training performance: %f' % ensemble.train_score_)
self.logger.info('Building the ensemble took %f seconds' %
watch.wall_elapsed('ensemble_iter_' + str(num_iteration)))
# Set this variable here to avoid re-running the ensemble builder
# every two seconds in case the ensemble did not change
current_num_models = len(dir_ensemble_model_files)
ensemble_predictions = ensemble.predict(all_predictions_train)
if sys.version_info[0] == 2:
ensemble_predictions.flags.writeable = False
current_hash = hash(ensemble_predictions.data)
else:
current_hash = hash(ensemble_predictions.data.tobytes())
# Only output a new ensemble and new predictions if the output of the
# ensemble would actually change!
# TODO this is neither safe (collisions, tests only with the ensemble
# prediction, but not the ensemble), implement a hash function for
# each possible ensemble builder.
if last_hash is not None:
if current_hash == last_hash:
self.logger.info('Ensemble output did not change.')
time.sleep(2)
continue
else:
last_hash = current_hash
else:
last_hash = current_hash
# Save the ensemble for later use in the main auto-sklearn module!
self.backend.save_ensemble(ensemble, index_run, self.seed)
# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_model_files):
all_predictions_valid = np.array(all_predictions_valid)
ensemble_predictions_valid = ensemble.predict(all_predictions_valid)
if self.task_type == BINARY_CLASSIFICATION:
ensemble_predictions_valid = ensemble_predictions_valid[:, 1]
if self.low_precision:
if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
ensemble_predictions_valid[ensemble_predictions_valid < 1e-4] = 0.
if self.metric in [BAC_METRIC, F1_METRIC]:
bin_array = np.zeros(ensemble_predictions_valid.shape, dtype=np.int32)
if (self.task_type != MULTICLASS_CLASSIFICATION) or (
ensemble_predictions_valid.shape[1] == 1):
bin_array[ensemble_predictions_valid >= 0.5] = 1
else:
sample_num = ensemble_predictions_valid.shape[0]
for i in range(sample_num):
j = np.argmax(ensemble_predictions_valid[i, :])
bin_array[i, j] = 1
ensemble_predictions_valid = bin_array
if self.task_type in CLASSIFICATION_TASKS:
if ensemble_predictions_valid.size < (20000 * 20):
precision = 3
else:
precision = 2
else:
if ensemble_predictions_valid.size > 1000000:
precision = 4
else:
# File size maximally 2.1MB
precision = 6
self.backend.save_predictions_as_txt(ensemble_predictions_valid,
'valid', index_run, prefix=self.dataset_name,
precision=precision)
else:
self.logger.info('Could not find as many validation set predictions (%d)'
'as ensemble predictions (%d)!.',
len(dir_valid_list), len(dir_ensemble_model_files))
del all_predictions_valid
if len(dir_test_list) == len(dir_ensemble_model_files):
all_predictions_test = np.array(all_predictions_test)
ensemble_predictions_test = ensemble.predict(all_predictions_test)
if self.task_type == BINARY_CLASSIFICATION:
ensemble_predictions_test = ensemble_predictions_test[:, 1]
if self.low_precision:
if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
ensemble_predictions_test[ensemble_predictions_test < 1e-4] = 0.
if self.metric in [BAC_METRIC, F1_METRIC]:
bin_array = np.zeros(ensemble_predictions_test.shape,
dtype=np.int32)
if (self.task_type != MULTICLASS_CLASSIFICATION) or (
ensemble_predictions_test.shape[1] == 1):
bin_array[ensemble_predictions_test >= 0.5] = 1
else:
sample_num = ensemble_predictions_test.shape[0]
for i in range(sample_num):
j = np.argmax(ensemble_predictions_test[i, :])
bin_array[i, j] = 1
ensemble_predictions_test = bin_array
if self.task_type in CLASSIFICATION_TASKS:
if ensemble_predictions_test.size < (20000 * 20):
precision = 3
else:
precision = 2
else:
if ensemble_predictions_test.size > 1000000:
precision = 4
else:
precision = 6
self.backend.save_predictions_as_txt(ensemble_predictions_test,
'test', index_run, prefix=self.dataset_name,
precision=precision)
else:
self.logger.info('Could not find as many test set predictions (%d) as '
'ensemble predictions (%d)!',
len(dir_test_list), len(dir_ensemble_model_files))
del all_predictions_test
current_num_models = len(dir_ensemble_model_files)
watch.stop_task('index_run' + str(index_run))
time_iter = watch.get_wall_dur('index_run' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
index_run += 1
return
0
Example 42
Project: tp-qemu Source File: cpuid.py
def run(test, params, env):
"""
Boot guest with different cpu_models and cpu flags and check if guest works correctly.
:param test: kvm test object.
:param params: Dictionary with the test parameters.
:param env: Dictionary with test environment.
"""
qemu_binary = utils_misc.get_qemu_binary(params)
cpu_model = params.get("cpu_model", "qemu64")
xfail = False
if (params.get("xfail") is not None) and (params.get("xfail") == "yes"):
xfail = True
def cpu_models_to_test():
"""Return the list of CPU models to be tested, based on the
cpu_models and cpu_model config options.
Config option "cpu_model" may be used to ask a single CPU model
to be tested. Config option "cpu_models" may be used to ask
multiple CPU models to be tested.
If cpu_models is "*", all CPU models reported by QEMU will be tested.
"""
models_opt = params.get("cpu_models")
model_opt = params.get("cpu_model")
if (models_opt is None and model_opt is None):
raise error.TestError("No cpu_models or cpu_model option is set")
cpu_models = set()
if models_opt == '*':
cpu_models.update(utils_misc.get_qemu_cpu_models(qemu_binary))
elif models_opt:
cpu_models.update(models_opt.split())
if model_opt:
cpu_models.add(model_opt)
return cpu_models
def test_qemu_cpu_models_list(self):
"""
check CPU models returned by <qemu> -cpu '?' are what is expected
"""
"""
test method
"""
cpu_models = cpu_models_to_test()
qemu_models = utils_misc.get_qemu_cpu_models(qemu_binary)
missing = set(cpu_models) - set(qemu_models)
if missing:
raise error.TestFail(
"Some CPU models not in QEMU CPU model list: %r" % (missing))
added = set(qemu_models) - set(cpu_models)
if added:
logging.info("Extra CPU models in QEMU CPU listing: %s", added)
def compare_cpuid_output(a, b):
"""
Generates a list of (bit, va, vb) tuples for
each bit that is different between a and b.
"""
for bit in range(32):
ba = (a & (1 << bit)) >> bit
if b is not None:
bb = (b & (1 << bit)) >> bit
else:
bb = None
if ba != bb:
yield (bit, ba, bb)
def parse_cpuid_dump(output):
dbg("parsing cpuid dump: %r", output)
cpuid_re = re.compile(
"^ *(0x[0-9a-f]+) +0x([0-9a-f]+): +eax=0x([0-9a-f]+) ebx=0x([0-9a-f]+) ecx=0x([0-9a-f]+) edx=0x([0-9a-f]+)$")
output_match = re.search('(==START TEST==.*==END TEST==)', output, re.M | re.DOTALL)
if output_match is None:
dbg("cpuid dump doesn't follow expected pattern")
return None
output = output_match.group(1)
out_lines = output.splitlines()
if out_lines[0] != '==START TEST==' or out_lines[-1] != '==END TEST==':
dbg("cpuid dump doesn't have expected delimiters")
return None
if out_lines[1] != 'CPU:':
dbg("cpuid dump doesn't start with 'CPU:' line")
return None
result = {}
for l in out_lines[2:-1]:
m = cpuid_re.match(l)
if m is None:
dbg("invalid cpuid dump line: %r", l)
return None
in_eax = int(m.group(1), 16)
in_ecx = int(m.group(2), 16)
result[in_eax, in_ecx, 'eax'] = int(m.group(3), 16)
result[in_eax, in_ecx, 'ebx'] = int(m.group(4), 16)
result[in_eax, in_ecx, 'ecx'] = int(m.group(5), 16)
result[in_eax, in_ecx, 'edx'] = int(m.group(6), 16)
return result
def get_test_kernel_cpuid(self, vm):
vm.resume()
timeout = float(params.get("login_timeout", 240))
logging.debug("Will wait for CPUID serial output at %r",
vm.serial_console)
if not utils_misc.wait_for(lambda:
re.search("==END TEST==",
vm.serial_console.get_output()),
timeout, 1):
raise error.TestFail("Could not get test complete message.")
test_output = parse_cpuid_dump(vm.serial_console.get_output())
logging.debug("Got CPUID serial output: %r", test_output)
if test_output is None:
raise error.TestFail("Test output signature not found in "
"output:\n %s", vm.serial_console.get_output())
vm.destroy(gracefully=False)
return test_output
def find_cpu_obj(vm):
"""Find path of a valid VCPU object"""
roots = ['/machine/icc-bridge/icc', '/machine/unattached/device']
for root in roots:
for child in vm.monitor.cmd('qom-list', dict(path=root)):
logging.debug('child: %r', child)
if child['type'].rstrip('>').endswith('-cpu'):
return root + '/' + child['name']
def get_qom_cpuid(self, vm):
assert vm.monitor.protocol == "qmp"
cpu_path = find_cpu_obj(vm)
logging.debug('cpu path: %r', cpu_path)
r = {}
for prop in 'feature-words', 'filtered-features':
words = vm.monitor.cmd('qom-get', dict(path=cpu_path, property=prop))
logging.debug('%s property: %r', prop, words)
for w in words:
reg = w['cpuid-register'].lower()
key = (w['cpuid-input-eax'], w.get('cpuid-input-ecx', 0), reg)
r.setdefault(key, 0)
r[key] |= w['features']
return r
def get_guest_cpuid(self, cpu_model, feature=None, extra_params=None, qom_mode=False):
if not qom_mode:
test_kernel_dir = os.path.join(data_dir.get_deps_dir(), "cpuid", "src")
os.chdir(test_kernel_dir)
utils.make("cpuid_dump_kernel.bin")
vm_name = params['main_vm']
params_b = params.copy()
if not qom_mode:
params_b["kernel"] = os.path.join(
test_kernel_dir, "cpuid_dump_kernel.bin")
params_b["cpu_model"] = cpu_model
params_b["cpu_model_flags"] = feature
del params_b["images"]
del params_b["nics"]
if extra_params:
params_b.update(extra_params)
env_process.preprocess_vm(self, params_b, env, vm_name)
vm = env.get_vm(vm_name)
dbg('is dead: %r', vm.is_dead())
vm.create()
self.vm = vm
if qom_mode:
return get_qom_cpuid(self, vm)
else:
return get_test_kernel_cpuid(self, vm)
def cpuid_to_vendor(cpuid_dump, idx):
dst = []
map(lambda i:
dst.append((chr(cpuid_dump[idx, 0, 'ebx'] >> (8 * i) & 0xff))),
range(0, 4))
map(lambda i:
dst.append((chr(cpuid_dump[idx, 0, 'edx'] >> (8 * i) & 0xff))),
range(0, 4))
map(lambda i:
dst.append((chr(cpuid_dump[idx, 0, 'ecx'] >> (8 * i) & 0xff))),
range(0, 4))
return ''.join(dst)
def default_vendor(self):
"""
Boot qemu with specified cpu models and
verify that CPU vendor matches requested
"""
cpu_models = cpu_models_to_test()
vendor = params.get("vendor")
if vendor is None or vendor == "host":
cmd = "grep 'vendor_id' /proc/cpuinfo | head -n1 | awk '{print $3}'"
cmd_result = utils.run(cmd, ignore_status=True)
vendor = cmd_result.stdout.strip()
ignore_cpus = set(params.get("ignore_cpu_models", "").split(' '))
cpu_models = cpu_models - ignore_cpus
for cpu_model in cpu_models:
out = get_guest_cpuid(self, cpu_model)
guest_vendor = cpuid_to_vendor(out, 0x00000000)
logging.debug("Guest's vendor: " + guest_vendor)
if guest_vendor != vendor:
raise error.TestFail("Guest vendor [%s], doesn't match "
"required vendor [%s] for CPU [%s]" %
(guest_vendor, vendor, cpu_model))
def custom_vendor(self):
"""
Boot qemu with specified vendor
"""
has_error = False
vendor = params["vendor"]
try:
out = get_guest_cpuid(self, cpu_model, "vendor=" + vendor)
guest_vendor0 = cpuid_to_vendor(out, 0x00000000)
guest_vendor80000000 = cpuid_to_vendor(out, 0x80000000)
logging.debug("Guest's vendor[0]: " + guest_vendor0)
logging.debug("Guest's vendor[0x80000000]: " +
guest_vendor80000000)
if guest_vendor0 != vendor:
raise error.TestFail("Guest vendor[0] [%s], doesn't match "
"required vendor [%s] for CPU [%s]" %
(guest_vendor0, vendor, cpu_model))
if guest_vendor80000000 != vendor:
raise error.TestFail("Guest vendor[0x80000000] [%s], "
"doesn't match required vendor "
"[%s] for CPU [%s]" %
(guest_vendor80000000, vendor,
cpu_model))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_to_level(cpuid_dump):
r = cpuid_dump[0, 0]
return r['eax']
def custom_level(self):
"""
Boot qemu with specified level
"""
has_error = False
level = params["level"]
try:
out = get_guest_cpuid(self, cpu_model, "level=" + level)
guest_level = str(cpuid_to_level(out))
if guest_level != level:
raise error.TestFail("Guest's level [%s], doesn't match "
"required level [%s]" %
(guest_level, level))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_to_family(cpuid_dump):
# Intel Processor Identification and the CPUID Instruction
# http://www.intel.com/Assets/PDF/appnote/241618.pdf
# 5.1.2 Feature Information (Function 01h)
eax = cpuid_dump[1, 0]['eax']
family = (eax >> 8) & 0xf
if family == 0xf:
# extract extendend family
return family + ((eax >> 20) & 0xff)
return family
def custom_family(self):
"""
Boot qemu with specified family
"""
has_error = False
family = params["family"]
try:
out = get_guest_cpuid(self, cpu_model, "family=" + family)
guest_family = str(cpuid_to_family(out))
if guest_family != family:
raise error.TestFail("Guest's family [%s], doesn't match "
"required family [%s]" %
(guest_family, family))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_to_model(cpuid_dump):
# Intel Processor Identification and the CPUID Instruction
# http://www.intel.com/Assets/PDF/appnote/241618.pdf
# 5.1.2 Feature Information (Function 01h)
eax = cpuid_dump[1, 0]['eax']
model = (eax >> 4) & 0xf
# extended model
model |= (eax >> 12) & 0xf0
return model
def custom_model(self):
"""
Boot qemu with specified model
"""
has_error = False
model = params["model"]
try:
out = get_guest_cpuid(self, cpu_model, "model=" + model)
guest_model = str(cpuid_to_model(out))
if guest_model != model:
raise error.TestFail("Guest's model [%s], doesn't match "
"required model [%s]" %
(guest_model, model))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_to_stepping(cpuid_dump):
# Intel Processor Identification and the CPUID Instruction
# http://www.intel.com/Assets/PDF/appnote/241618.pdf
# 5.1.2 Feature Information (Function 01h)
eax = cpuid_dump[1, 0]['eax']
stepping = eax & 0xf
return stepping
def custom_stepping(self):
"""
Boot qemu with specified stepping
"""
has_error = False
stepping = params["stepping"]
try:
out = get_guest_cpuid(self, cpu_model, "stepping=" + stepping)
guest_stepping = str(cpuid_to_stepping(out))
if guest_stepping != stepping:
raise error.TestFail("Guest's stepping [%s], doesn't match "
"required stepping [%s]" %
(guest_stepping, stepping))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_to_xlevel(cpuid_dump):
# Intel Processor Identification and the CPUID Instruction
# http://www.intel.com/Assets/PDF/appnote/241618.pdf
# 5.2.1 Largest Extendend Function # (Function 80000000h)
return cpuid_dump[0x80000000, 0x00]['eax']
def custom_xlevel(self):
"""
Boot qemu with specified xlevel
"""
has_error = False
xlevel = params["xlevel"]
if params.get("expect_xlevel") is not None:
xlevel = params.get("expect_xlevel")
try:
out = get_guest_cpuid(self, cpu_model, "xlevel=" +
params.get("xlevel"))
guest_xlevel = str(cpuid_to_xlevel(out))
if guest_xlevel != xlevel:
raise error.TestFail("Guest's xlevel [%s], doesn't match "
"required xlevel [%s]" %
(guest_xlevel, xlevel))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_to_model_id(cpuid_dump):
# Intel Processor Identification and the CPUID Instruction
# http://www.intel.com/Assets/PDF/appnote/241618.pdf
# 5.2.3 Processor Brand String (Functions 80000002h, 80000003h,
# 80000004h)
m_id = ""
for idx in (0x80000002, 0x80000003, 0x80000004):
regs = cpuid_dump[idx, 0]
for name in ('eax', 'ebx', 'ecx', 'edx'):
for shift in range(4):
c = ((regs[name] >> (shift * 8)) & 0xff)
if c == 0: # drop trailing \0-s
break
m_id += chr(c)
return m_id
def custom_model_id(self):
"""
Boot qemu with specified model_id
"""
has_error = False
model_id = params["model_id"]
try:
out = get_guest_cpuid(self, cpu_model, "model_id='%s'" %
model_id)
guest_model_id = cpuid_to_model_id(out)
if guest_model_id != model_id:
raise error.TestFail("Guest's model_id [%s], doesn't match "
"required model_id [%s]" %
(guest_model_id, model_id))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_regs_to_string(cpuid_dump, leaf, idx, regs):
r = cpuid_dump[leaf, idx]
signature = ""
for i in regs:
for shift in range(0, 4):
c = chr((r[i] >> (shift * 8)) & 0xFF)
if c in string.printable:
signature = signature + c
else:
signature = "%s\\x%02x" % (signature, ord(c))
logging.debug("(%s.%s:%s: signature: %s" % (leaf, idx, str(regs),
signature))
return signature
def cpuid_signature(self):
"""
test signature in specified leaf:index:regs
"""
has_error = False
flags = params.get("flags", "")
leaf = int(params.get("leaf", "0x40000000"), 0)
idx = int(params.get("index", "0x00"), 0)
regs = params.get("regs", "ebx ecx edx").split()
signature = params["signature"]
try:
out = get_guest_cpuid(self, cpu_model, flags)
_signature = cpuid_regs_to_string(out, leaf, idx, regs)
if _signature != signature:
raise error.TestFail("Guest's signature [%s], doesn't"
"match required signature [%s]" %
(_signature, signature))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_bit_test(self):
"""
test bits in specified leaf:func:reg
"""
has_error = False
flags = params.get("flags", "")
leaf = int(params.get("leaf", "0x40000000"), 0)
idx = int(params.get("index", "0x00"), 0)
reg = params.get("reg", "eax")
bits = params["bits"].split()
try:
out = get_guest_cpuid(self, cpu_model, flags)
r = out[leaf, idx][reg]
logging.debug("CPUID(%s.%s).%s=0x%08x" % (leaf, idx, reg, r))
for i in bits:
if (r & (1 << int(i))) == 0:
raise error.TestFail("CPUID(%s.%s).%s[%s] is not set" %
(leaf, idx, reg, i))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def cpuid_reg_test(self):
"""
test register value in specified leaf:index:reg
"""
has_error = False
flags = params.get("flags", "")
leaf = int(params.get("leaf", "0x00"), 0)
idx = int(params.get("index", "0x00"), 0)
reg = params.get("reg", "eax")
val = int(params["value"], 0)
try:
out = get_guest_cpuid(self, cpu_model, flags)
r = out[leaf, idx][reg]
logging.debug("CPUID(%s.%s).%s=0x%08x" % (leaf, idx, reg, r))
if r != val:
raise error.TestFail("CPUID(%s.%s).%s is not 0x%08x" %
(leaf, idx, reg, val))
except:
has_error = True
if xfail is False:
raise
if (has_error is False) and (xfail is True):
raise error.TestFail("Test was expected to fail, but it didn't")
def check_cpuid_dump(self):
"""
Compare full CPUID dump data
"""
machine_type = params.get("machine_type_to_check", "")
kvm_enabled = params.get("enable_kvm", "yes") == "yes"
ignore_cpuid_leaves = params.get("ignore_cpuid_leaves", "")
ignore_cpuid_leaves = ignore_cpuid_leaves.split()
whitelist = []
for l in ignore_cpuid_leaves:
l = l.split(',')
# syntax of ignore_cpuid_leaves:
# <in_eax>[,<in_ecx>[,<register>[ ,<bit>]]] ...
for i in 0, 1, 3: # integer fields:
if len(l) > i:
l[i] = int(l[i], 0)
whitelist.append(tuple(l))
if not machine_type:
raise error.TestNAError("No machine_type_to_check defined")
cpu_model_flags = params.get('cpu_model_flags', '')
full_cpu_model_name = cpu_model
if cpu_model_flags:
full_cpu_model_name += ','
full_cpu_model_name += cpu_model_flags.lstrip(',')
ref_file = os.path.join(data_dir.get_deps_dir(), 'cpuid',
"cpuid_dumps",
kvm_enabled and "kvm" or "nokvm",
machine_type, '%s-dump.txt' % (full_cpu_model_name))
if not os.path.exists(ref_file):
raise error.TestNAError("no cpuid dump file: %s" % (ref_file))
reference = open(ref_file, 'r').read()
if not reference:
raise error.TestNAError(
"no cpuid dump data on file: %s" % (ref_file))
reference = parse_cpuid_dump(reference)
if reference is None:
raise error.TestNAError(
"couldn't parse reference cpuid dump from file; %s" % (ref_file))
qom_mode = params.get('qom_mode', "no").lower() == 'yes'
if not qom_mode:
cpu_model_flags += ',enforce'
try:
out = get_guest_cpuid(
self, cpu_model, cpu_model_flags,
extra_params=dict(machine_type=machine_type, smp=1),
qom_mode=qom_mode)
except (virt_vm.VMStartError, virt_vm.VMCreateError) as e:
output = getattr(e, 'reason', getattr(e, 'output', ''))
if "host doesn't support requested feature:" in output \
or ("host cpuid" in output and
("lacks requested flag" in output or
"flag restricted to guest" in output)) \
or ("Unable to find CPU definition:" in output):
raise error.TestNAError(
"Can't run CPU model %s on this host" % (full_cpu_model_name))
else:
raise
dbg('ref_file: %r', ref_file)
dbg('ref: %r', reference)
dbg('out: %r', out)
ok = True
for k in reference.keys():
in_eax, in_ecx, reg = k
diffs = compare_cpuid_output(reference[k], out.get(k))
for d in diffs:
bit, vreference, vout = d
whitelisted = (in_eax,) in whitelist \
or (in_eax, in_ecx) in whitelist \
or (in_eax, in_ecx, reg) in whitelist \
or (in_eax, in_ecx, reg, bit) in whitelist
silent = False
if vout is None and params.get('ok_missing', 'no') == 'yes':
whitelisted = True
silent = True
if not silent:
info(
"Non-matching bit: CPUID[0x%x,0x%x].%s[%d]: found %s instead of %s%s",
in_eax, in_ecx, reg, bit, vout, vreference,
whitelisted and " (whitelisted)" or "")
if not whitelisted:
ok = False
if not ok:
raise error.TestFail("Unexpected CPUID data")
# subtests runner
test_type = params["test_type"]
if test_type not in locals():
raise error.TestError("Test function '%s' is not defined in"
" test" % test_type)
test_func = locals()[test_type]
return test_func(test)
0
Example 43
Project: python-beaver Source File: config.py
def _parse(self, args):
def _main_parser(config):
transpose = ['config', 'confd_path', 'debug', 'daemonize', 'files', 'format', 'fqdn', 'hostname', 'path', 'pid', 'transport']
namspace_dict = vars(args)
for key in transpose:
if key not in namspace_dict or namspace_dict[key] is None or namspace_dict[key] == '':
continue
config[key] = namspace_dict[key]
if args.mode:
config['zeromq_bind'] = args.mode
# HACK: Python 2.6 ConfigParser does not properly
# handle non-string values
for key in config:
if config[key] == '':
config[key] = None
require_bool = ['debug', 'daemonize', 'fqdn', 'rabbitmq_exchange_durable', 'rabbitmq_queue_durable',
'rabbitmq_ha_queue', 'rabbitmq_ssl', 'tcp_ssl_enabled', 'tcp_ssl_verify']
for key in require_bool:
config[key] = bool(int(config[key]))
require_int = [
'max_failure',
'max_queue_size',
'queue_timeout',
'rabbitmq_port',
'rabbitmq_timeout',
'rabbitmq_delivery_mode',
'respawn_delay',
'subprocess_poll_sleep',
'refresh_worker_process',
'tcp_port',
'udp_port',
'wait_timeout',
'zeromq_hwm',
'logstash_version',
'kafka_batch_n',
'kafka_batch_t',
'kafka_ack_timeout',
'number_of_consumer_processes',
'ignore_old_files'
]
for key in require_int:
if config[key] is not None:
config[key] = int(config[key])
require_float = [
'update_file_mapping_time',
'discover_interval',
]
for key in require_float:
if config[key] is not None:
config[key] = float(config[key])
if config.get('format') == 'null':
config['format'] = 'raw'
if config['files'] is not None and type(config['files']) == str:
config['files'] = config['files'].split(',')
if config['path'] is not None:
config['path'] = os.path.realpath(config['path'])
if not os.path.isdir(config['path']):
raise LookupError('{0} does not exist'.format(config['path']))
if config.get('hostname') is None:
if config.get('fqdn') is True:
config['hostname'] = socket.getfqdn()
else:
config['hostname'] = socket.gethostname()
if config.get('sincedb_path'):
config['sincedb_path'] = os.path.realpath(config.get('sincedb_path'))
if config['zeromq_address'] and type(config['zeromq_address']) == str:
config['zeromq_address'] = [x.strip() for x in config.get('zeromq_address').split(',')]
if config.get('ssh_options') is not None:
csv = config.get('ssh_options')
config['ssh_options'] = []
if type(csv) == str:
for opt in csv.split(','):
config['ssh_options'].append('-o %s' % opt.strip())
else:
config['ssh_options'] = []
config['globs'] = {}
return config
def _section_parser(config, raise_exceptions=True):
'''Parse a given INI-style config file using ConfigParser module.
Stanza's names match file names, and properties are defaulted as in
http://logstash.net/docs/1.1.1/inputs/file
Config file example:
[/var/log/syslog]
type: syslog
tags: sys,main
[/var/log/auth]
type: syslog
;tags: auth,main
'''
fields = config.get('add_field', '')
if type(fields) != dict:
try:
if type(fields) == str:
fields = filter(None, fields.split(','))
if len(fields) == 0:
config['fields'] = {}
elif (len(fields) % 2) == 1:
if raise_exceptions:
raise Exception('Wrong number of values for add_field')
else:
fieldkeys = fields[0::2]
fieldvalues = [[x] for x in fields[1::2]]
config['fields'] = dict(zip(fieldkeys, fieldvalues))
except TypeError:
config['fields'] = {}
if 'add_field' in config:
del config['add_field']
envFields = config.get('add_field_env', '')
if type(envFields) != dict:
try:
if type(envFields) == str:
envFields = envFields.replace(" ","")
envFields = filter(None, envFields.split(','))
if len(envFields) == 0:
config['envFields'] = {}
elif (len(envFields) % 2) == 1:
if raise_exceptions:
raise Exception('Wrong number of values for add_field_env')
else:
envFieldkeys = envFields[0::2]
envFieldvalues = []
for x in envFields[1::2]:
envFieldvalues.append(os.environ.get(x))
config['fields'].update(dict(zip(envFieldkeys, envFieldvalues)))
except TypeError:
config['envFields'] = {}
if 'add_field_env' in config:
del config['add_field_env']
try:
tags = config.get('tags', '')
if type(tags) == str:
tags = filter(None, tags.split(','))
if len(tags) == 0:
tags = []
config['tags'] = tags
except TypeError:
config['tags'] = []
if config.get('format') == 'null':
config['format'] = 'raw'
file_type = config.get('type', None)
if not file_type:
config['type'] = 'file'
require_bool = ['debug', 'ignore_empty', 'ignore_truncate']
for k in require_bool:
config[k] = bool(int(config[k]))
config['delimiter'] = config['delimiter'].decode('string-escape')
if config['multiline_regex_after']:
config['multiline_regex_after'] = re.compile(config['multiline_regex_after'])
if config['multiline_regex_before']:
config['multiline_regex_before'] = re.compile(config['multiline_regex_before'])
require_int = ['sincedb_write_interval', 'stat_interval', 'tail_lines']
for k in require_int:
config[k] = int(config[k])
return config
conf = Configuration(
name='beaver',
path=self._configfile,
main_defaults=self._main_defaults,
section_defaults=self._section_defaults,
main_parser=_main_parser,
section_parser=_section_parser,
path_from_main='confd_path',
config_parser=self._config_parser
)
config = conf.raw()
self._beaver_config = config['beaver']
self._file_config = config['sections']
self._main_parser = _main_parser(self._main_defaults)
self._section_defaults = _section_parser(self._section_defaults, raise_exceptions=False)
self._files = {}
for section in config['sections']:
globs = eglob(section, config['sections'][section].get('exclude', ''))
if not globs:
self._logger.debug('Skipping glob due to no files found: %s' % section)
continue
for globbed_file in globs:
self._files[os.path.realpath(globbed_file)] = config['sections'][section]
0
Example 44
Project: powerline Source File: bat.py
def _fetch_battery_info(pl):
try:
import dbus
except ImportError:
pl.debug('Not using DBUS+UPower as dbus is not available')
else:
try:
bus = dbus.SystemBus()
except Exception as e:
pl.exception('Failed to connect to system bus: {0}', str(e))
else:
interface = 'org.freedesktop.UPower'
try:
up = bus.get_object(interface, '/org/freedesktop/UPower')
except dbus.exceptions.DBusException as e:
if getattr(e, '_dbus_error_name', '').endswith('ServiceUnknown'):
pl.debug('Not using DBUS+UPower as UPower is not available via dbus')
else:
pl.exception('Failed to get UPower service with dbus: {0}', str(e))
else:
devinterface = 'org.freedesktop.DBus.Properties'
devtype_name = interface + '.Device'
devices = []
for devpath in up.EnumerateDevices(dbus_interface=interface):
dev = bus.get_object(interface, devpath)
devget = lambda what: dev.Get(
devtype_name,
what,
dbus_interface=devinterface
)
if int(devget('Type')) != 2:
pl.debug('Not using DBUS+UPower with {0}: invalid type', devpath)
continue
if not bool(devget('IsPresent')):
pl.debug('Not using DBUS+UPower with {0}: not present', devpath)
continue
if not bool(devget('PowerSupply')):
pl.debug('Not using DBUS+UPower with {0}: not a power supply', devpath)
continue
devices.append(devpath)
pl.debug('Using DBUS+UPower with {0}', devpath)
if devices:
def _flatten_battery(pl):
energy = 0.0
energy_full = 0.0
state = True
for devpath in devices:
dev = bus.get_object(interface, devpath)
energy_full += float(
dbus.Interface(dev, dbus_interface=devinterface).Get(
devtype_name,
'EnergyFull'
),
)
energy += float(
dbus.Interface(dev, dbus_interface=devinterface).Get(
devtype_name,
'Energy'
),
)
state &= dbus.Interface(dev, dbus_interface=devinterface).Get(
devtype_name,
'State'
) != 2
return (energy * 100.0 / energy_full), state
return _flatten_battery
pl.debug('Not using DBUS+UPower as no batteries were found')
if os.path.isdir('/sys/class/power_supply'):
linux_energy_full_fmt = '/sys/class/power_supply/{0}/energy_full'
linux_energy_fmt = '/sys/class/power_supply/{0}/energy_now'
linux_status_fmt = '/sys/class/power_supply/{0}/status'
devices = []
for linux_supplier in os.listdir('/sys/class/power_supply'):
energy_path = linux_energy_fmt.format(linux_supplier)
if not os.path.exists(energy_path):
continue
pl.debug('Using /sys/class/power_supply with battery {0}', linux_supplier)
devices.append(linux_supplier)
if devices:
def _get_battery_status(pl):
energy = 0.0
energy_full = 0.0
state = True
for device in devices:
with open(linux_energy_full_fmt.format(device), 'r') as f:
energy_full += int(float(f.readline().split()[0]))
with open(linux_energy_fmt.format(device), 'r') as f:
energy += int(float(f.readline().split()[0]))
try:
with open(linux_status_fmt.format(device), 'r') as f:
state &= (f.readline().strip() != 'Discharging')
except IOError:
state = None
return (energy * 100.0 / energy_full), state
return _get_battery_status
pl.debug('Not using /sys/class/power_supply as no batteries were found')
else:
pl.debug('Not using /sys/class/power_supply: no directory')
try:
from shutil import which # Python-3.3 and later
except ImportError:
pl.info('Using dumb “which” which only checks for file in /usr/bin')
which = lambda f: (lambda fp: os.path.exists(fp) and fp)(os.path.join('/usr/bin', f))
if which('pmset'):
pl.debug('Using pmset')
BATTERY_PERCENT_RE = re.compile(r'(\d+)%')
def _get_battery_status(pl):
battery_summary = run_cmd(pl, ['pmset', '-g', 'batt'])
battery_percent = BATTERY_PERCENT_RE.search(battery_summary).group(1)
ac_charging = 'AC' in battery_summary
return int(battery_percent), ac_charging
return _get_battery_status
else:
pl.debug('Not using pmset: executable not found')
if sys.platform.startswith('win') or sys.platform == 'cygwin':
# From http://stackoverflow.com/a/21083571/273566, reworked
try:
from win32com.client import GetObject
except ImportError:
pl.debug('Not using win32com.client as it is not available')
else:
try:
wmi = GetObject('winmgmts:')
except Exception as e:
pl.exception('Failed to run GetObject from win32com.client: {0}', str(e))
else:
for battery in wmi.InstancesOf('Win32_Battery'):
pl.debug('Using win32com.client with Win32_Battery')
def _get_battery_status(pl):
# http://msdn.microsoft.com/en-us/library/aa394074(v=vs.85).aspx
return battery.EstimatedChargeRemaining, battery.BatteryStatus == 6
return _get_battery_status
pl.debug('Not using win32com.client as no batteries were found')
from ctypes import Structure, c_byte, c_ulong, byref
if sys.platform == 'cygwin':
pl.debug('Using cdll to communicate with kernel32 (Cygwin)')
from ctypes import cdll
library_loader = cdll
else:
pl.debug('Using windll to communicate with kernel32 (Windows)')
from ctypes import windll
library_loader = windll
class PowerClass(Structure):
_fields_ = [
('ACLineStatus', c_byte),
('BatteryFlag', c_byte),
('BatteryLifePercent', c_byte),
('Reserved1', c_byte),
('BatteryLifeTime', c_ulong),
('BatteryFullLifeTime', c_ulong)
]
def _get_battery_status(pl):
powerclass = PowerClass()
result = library_loader.kernel32.GetSystemPowerStatus(byref(powerclass))
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa372693(v=vs.85).aspx
if result:
return None
return powerclass.BatteryLifePercent, powerclass.ACLineStatus == 1
if _get_battery_status() is None:
pl.debug('Not using GetSystemPowerStatus because it failed')
else:
pl.debug('Using GetSystemPowerStatus')
return _get_battery_status
raise NotImplementedError
0
Example 45
def sync():
games = [gameobj({ # The Banner Saga
'regexformats': {
'base': (r'^[0-4]/(resume|sav_(chapter[1235]|(leaving)?(einartof' +
r't|frostvellr)|(dengl|dund|hridvaldy|radormy|skog)r|bj' +
r'orulf|boersgard|finale|grofheim|hadeborg|ingrid|marek' +
r'|ridgehorn|sigrholm|stravhs|wyrmtoe))\.(bmpzip|png|im' +
r'g|save\.json)$')
},
'folder': 'save/saga1',
'modules': {
'steamcloud': {
'id': '237990'
},
'icloud': {
'id': 'MQ92743Y4D~com~stoicstudio~BannerSaga'
}
},
'read': bannersaga_read,
'write': bannersaga_write
}), gameobj({ # Transistor
'regexformats': {
'base': r'^[Pp]rofile[1-5]\.sav$'
},
'modules': {
'steamcloud': {
'id': '237930'
},
'icloud': {
'id': 'GPYC69L4CR~iCloud~com~supergiantgames~transistor',
'folder': 'Docuements',
}
},
'read': transistor_read,
'write': transistor_write
}), gameobj({ # Costume Quest
'regexformats': {
'base': r'^CQ(_DLC)?_save_[012]$',
'timeplayed': b'^.+(;TimePlayed=([1-9]*[0-9](\.[0-9]+)?)).*$',
'level': b'worlds\/([a-z_]+)\/\1'
},
'modules': {
'steamcloud': {
'id': '115100'
},
'icloud': {
'id': '8VM2L59D89~com~doublefine~cqios',
'folder': 'Docuements'
}
},
'read': costumequest_read,
'write': costumequest_write
}), gameobj({ # Race the Sun
'regexformats': {
'base': r'^(savegame|rts_save)\.xml$'
},
'modules': {
'steamcloud': {
'id': '253030'
},
'icloud': {
'id': 'iCloud~com~flippfly~racethesun',
'folder': 'Docuements'
}
},
'read': racethesun_read,
'write': racethesun_write
})]
if len(modules) > 1:
workingmodules = {}
modulenum = 0
for module in modules:
if module.init():
workingmodules[modulename(module.__name__)] = module
modulenum += 1
if modulenum > 1:
for game in games:
gamemodules = []
metadata = {}
cancontinue = True
for module in modules:
name = modulename(module.__name__)
if name in game['modules']:
if name not in workingmodules:
cancontinue = False
break
else:
module = workingmodules[name]
if ('folder' in game['modules'][name] or
'folder' in game):
module.set_folder(game['folder']
if 'folder' not in
game['modules'][name] else
game['modules'][name]
['folder'])
module.set_id(game['modules'][name]['id'])
if module.will_work():
gamemodules.append(module)
else:
module.shutdown()
cancontinue = False
break
if cancontinue:
regexes = {}
filetimestamps = {}
filedata = {}
files = {}
for regex in game['regexformats']:
if regex == 'base':
fileregex = re.compile(game['regexformats']
['base'])
else:
regexes[regex] = re.compile(game['regexformats']
[regex])
moduleindex = 0
while cancontinue and moduleindex < len(gamemodules):
cancontinue = False
for filename in (gamemodules[moduleindex]
.get_file_names()):
if fileregex.match(filename):
readobject = (game['read']
(filename,
gamemodules[moduleindex]
.get_file_timestamp(filename),
gamemodules[moduleindex]
.read_file(filename),
modulename(
gamemodules[moduleindex]
.__name__), regexes))
metadata.update(readobject[1])
for (itemfilename, itemfiletimestamp,
itemfiledata) in readobject[0]:
if fileregex.match(itemfilename):
if itemfilename not in filetimestamps:
filetimestamps[itemfilename] = \
[-1] * len(gamemodules)
(filetimestamps[itemfilename]
[moduleindex]) = itemfiletimestamp
if itemfilename not in filedata:
filedata[itemfilename] = \
[b''] * len(gamemodules)
filedata[itemfilename][moduleindex] = \
itemfiledata
cancontinue = True
moduleindex += 1
if cancontinue:
for filename in filetimestamps:
for timestamp in filetimestamps[filename]:
if timestamp == 0:
cancontinue = False
break
if cancontinue:
newerfilesmayexist = True
highestlowtimestamp = -1
if cancontinue:
while newerfilesmayexist:
newerfilesmayexist = False
lowesttimestamp = 2000000000
lowesttimestampindex = -1
for moduleindex in \
range(len(gamemodules)):
if (highestlowtimestamp <
filetimestamps[filename]
[moduleindex] <
lowesttimestamp and
filetimestamps[filename]
[moduleindex] >
0):
lowesttimestamp = \
(filetimestamps[filename]
[moduleindex])
lowesttimestampindex = \
moduleindex
if lowesttimestampindex != -1:
newerfilesmayexist = True
highestlowtimestamp = \
lowesttimestamp
for moduleindex in \
range(len(gamemodules)):
if (moduleindex !=
lowesttimestampindex and
filetimestamps[filename]
[moduleindex] > 0 and
filedata[filename]
[lowesttimestampindex]
==
filedata[filename]
[moduleindex]):
(filetimestamps[filename]
[moduleindex]) = \
lowesttimestamp
highesttimestamp = -1
highesttimestampindex = -1
for moduleindex in range(len(gamemodules)):
if (filetimestamps[filename]
[moduleindex] >
highesttimestamp):
highesttimestamp = \
(filetimestamps
[filename][moduleindex])
highesttimestampindex = moduleindex
files[filename] = \
(filedata[filename]
[highesttimestampindex],
highesttimestamp)
for moduleindex in range(len(gamemodules)):
if (moduleindex !=
highesttimestampindex and
filetimestamps[filename]
[moduleindex] <
highesttimestamp):
writeobject = (game['write']
(filename,
files[filename][0],
modulename(
gamemodules
[moduleindex]
.__name__),
metadata, regexes))
if writeobject[0]:
(gamemodules[moduleindex]
.write_file(
*writeobject[1]))
game['after'](files, workingmodules, metadata)
for module in gamemodules:
module.shutdown()
0
Example 46
Project: dirigible-spreadsheet Source File: test_2787_SignUp.py
def test_can_sign_up_from_signup_page(self):
# Harold goes to the Dirigible home page
self.go_to_url('/')
# He notes that there are two "sign up" links, both pointing to the same URL
self.click_link("id_signup_link")
signup_url = self.browser.current_url
self.go_to_url('/')
# He follows one of them
self.click_link("id_signup_call_to_action")
self.assertEquals(self.browser.current_url, signup_url)
# He notices a "sign up" form that requires a username, an email address,
# and two copies of the same password.
self.assertTrue(self.is_element_present(
'css=input#id_username'))
self.assertTrue(self.is_element_present(
'css=input#id_email'))
self.assertTrue(self.is_element_present(
'css=input#id_password1'))
self.assertTrue(self.is_element_present(
'css=input#id_password2'))
self.assertTrue(self.is_element_present(
'css=input#id_signup_button'))
self.assertEquals(
self.selenium.get_attribute(
'css=#id_signup_button@value'),
'Sign up')
self.assertEquals(
self.selenium.get_attribute(
'css=input#id_signup_button@type'),
'submit')
# Being an awkward sod, he tries to sign up with no details.
self.click_link('id_signup_button')
# He is told off.
self.assertEquals(
self.get_text('id=id_username_error'),
"Please enter a username."
)
self.assertEquals(
self.get_text('id=id_email_error'),
"Please enter your email address."
)
self.assertEquals(
self.get_text('id=id_password1_error'),
"Please enter a password."
)
self.assertEquals(
self.get_text('id=id_password2_error'),
"Please enter a password."
)
# He tries again, this time using his friend's username,
# but entering sensible details for everything else.
username = self.get_my_username() + "_x"
duplicate_username = self.get_my_username()
self.email_address = 'harold.testuser-%[email protected]' % (self.get_my_username(),)
password = 'p4ssw0rd'
self.selenium.type(
'id=id_username',
duplicate_username)
self.selenium.type(
'id=id_email',
self.email_address)
self.selenium.type(
'id=id_password1',
password)
self.selenium.type(
'id=id_password2',
password)
self.click_link('id_signup_button')
# He is told off.
self.assertEquals(
self.get_text('id=id_username_error'),
"This username is already taken. Please choose another."
)
# He tries again with a unique username but mistypes the email address
self.selenium.type(
'id=id_username',
username)
self.selenium.type(
'id=id_email',
'@@@@@')
self.selenium.type(
'id=id_password1',
password)
self.selenium.type(
'id=id_password2',
password)
self.click_link('id_signup_button')
# He is told off.
self.assertEquals(
self.get_text('id=id_email_error'),
"Please enter a valid email address."
)
# He tries again with a unique username but mistypes the password
self.selenium.type(
'id=id_username',
username)
self.selenium.type(
'id=id_email',
self.email_address)
self.selenium.type(
'id=id_password1',
password)
self.selenium.type(
'id=id_password2',
"hello")
## Do the last character using native keypresses to make sure that
## all of our client-side validation JS really gets called
self.selenium.focus('id=id_password2')
self.human_key_press(key_codes.NUMBER_1)
# Even before he submits the form, the page is grumbling at him
self.wait_for(
self.are_password_fields_showing_error,
lambda : "Password error to appear"
)
# With misplaced self-confidence, he goes ahead and clicks the button
self.click_link('id_signup_button')
# He is told off.
self.assertEquals(
self.get_text('id=id_non_field_errors'),
"You must type the same password each time"
)
# He finally does it correctly
self.selenium.type(
'id=id_username',
username)
self.selenium.type(
'id=id_email',
self.email_address)
self.selenium.type(
'id=id_password1',
password)
self.selenium.type(
'id=id_password2',
password)
# Before he clicks the link, he confirms that there is no error in the password fields
self.wait_for(
lambda : not self.are_password_fields_showing_error(),
lambda : "Password errors to not be there"
)
self.click_link('id_signup_button')
# He gets a message saying "Thank you" that tells him that an email has been
# sent to his address.
self.assertTrue('Thank you' in self.selenium.get_body_text())
self.assertTrue(self.email_address in self.selenium.get_body_text())
# There is a link to the Dirigible home page, which he follows and discovers
# that it works.
self.click_link('id_link_home')
self.assertEquals(self.browser.current_url, Url.ROOT)
# He checks his email, and after a short wait finds a message
# from the Dirigible server, that looks like the following string:
email_from, email_to, subject, message = self.pop_email_for_client(self.email_address)
self.assertEquals(email_to, self.email_address)
self.assertEquals(email_from, '[email protected]')
self.assertEquals(subject, 'Dirigible Beta Sign-up')
self.assertTrue('Click on the following link' in message)
confirm_url_re = re.compile(
r'<(http://projectdirigible\.com/signup/activate/[^>]+)>')
match = confirm_url_re.search(message)
self.assertTrue(match)
confirmation_url = match.group(1).replace('projectdirigible.com', SERVER_IP)
# He decides to type the confirmation link manually into his browser and,
# inevitably, gets it completely wrong
self.go_to_url(urljoin(Url.ROOT, '/signup/activate/wibble'))
# He's given a kindly warning.
self.assertTrue('the activation link you used was not recognised' in self.selenium.get_body_text())
# He clicks on the link in the email instead
self.go_to_url(confirmation_url)
body_text = self.selenium.get_body_text()
# He's taken to a page that welcomes him aboard and allows him to log in.
self.assertTrue(
'Welcome to Dirigible' in body_text,
'could not find "Welcome to Dirigible" on page. URL:<%s>, body text:\n%s' % (confirmation_url, body_text[:-100])
)
# He logs in, using the fields on the page.
self.login(username, password, already_on_login_page=True)
# He is taken to his dashboard
self.assertEquals(self.browser.title, "%s's Dashboard: Dirigible" % (username,))
_, __, path, ___, ____, _____ = urlparse(self.browser.current_url)
self.assertEquals(path, '/')
# He's super keen to get in on the Dirigible action, so when he sees the
# link saying "Create new sheet", he clicks it with gusto
self.click_link('id_create_new_sheet')
# He sees a dialog box promoting the tutorial
self.wait_for_element_visibility('id_tutorial_promo_dialog', True)
dialog_text = self.get_text('id=id_tutorial_promo_dialog')
self.assertTrue('tutorial' in dialog_text.lower())
# He notes that even when the spinner stops, the focus stays on the dialog's OK
# button
self.wait_for_spinner_to_stop()
self.assertTrue(
self.is_element_focused('css=#id_tutorial_promo_dialog_close')
)
# He notices a link to the tutorial inside the dialog
tutorial_link_inside_dialog_locator = 'css=#id_tutorial_promo_dialog a#id_tutorial_link'
self.wait_for_element_to_appear(tutorial_link_inside_dialog_locator)
tutorial_link_url = self.selenium.get_attribute('%s@href' % (tutorial_link_inside_dialog_locator))
# He clicks the OK button to dismiss the dialog
self.selenium.click('id=id_tutorial_promo_dialog_close')
# the dialog disappears
self.wait_for_element_visibility('id=id_tutorial_promo_dialog', False)
# he goes to the tutorial url he remembers from earlier
self.go_to_url(tutorial_link_url)
# He finds himself on a page which contains the first tutorial
expected_title = 'Tutorial part 1: First steps, adding Python to a spreadsheet'
self.assertTrue(expected_title in self.browser.title)
# He goes back to the dashboard
self.go_to_url(Url.ROOT)
# He creates another sheet, ready to get annoyed if he sees that dialog again...
self.click_link('id_create_new_sheet')
# And is happy that it isn't there,
self.wait_for_grid_to_appear()
self.assertFalse(
self.is_element_present('id=id_tutorial_promo_dialog')
)
# He logs out
self.logout()
# He decides that he enjoyed confirming his account so much, he'd like to
# do it again.
self.go_to_url(confirmation_url)
# He's given a kindly warning.
self.assertTrue('your account might already be activated' in self.selenium.get_body_text())
# When he returns to his email app, he sees a second email from us,
email_from, email_to, subject, message = self.pop_email_for_client(self.email_address)
self.assertEquals(email_to, self.email_address)
self.assertEquals(email_from, '[email protected]')
self.assertEquals(subject, 'Welcome to Dirigible')
# pointing him towards the tutorial.
self.assertTrue('tutorial' in message.lower())
self.assertTrue('/docuementation/tutorial01.html' in message)
# We also recommend that he subscribe to the Dirigible blog
self.assertTrue('blog.projectdirigible.com' in message)
# or follows us on Twitter
self.assertTrue('twitter.com/dirigiblegrid' in message)
# Satisfied, he goes back to sleep.
self.assertTrue('sleep')
0
Example 47
def run(test, params, env):
"""
Test the command virsh nodecpustats
(1) Call the virsh nodecpustats command for all cpu host cpus
separately
(2) Get the output
(3) Check the against /proc/stat output(o) for respective cpu
user: o[0] + o[1]
system: o[2] + o[5] + o[6]
idle: o[3]
iowait: o[4]
(4) Call the virsh nodecpustats command with an unexpected option
(5) Call the virsh nodecpustats command with libvirtd service stop
"""
def virsh_check_nodecpustats_percpu(actual_stats):
"""
Check the acual nodecpustats output value
total time <= system uptime
"""
# Normalise to seconds from nano seconds
total = float((actual_stats['system'] + actual_stats['user'] +
actual_stats['idle'] + actual_stats['iowait']) / (10 ** 9))
uptime = float(utils.get_uptime())
if not total <= uptime:
raise error.TestFail("Commands 'virsh nodecpustats' not succeeded"
" as total time: %f is more"
" than uptime: %f" % (total, uptime))
return True
def virsh_check_nodecpustats(actual_stats, cpu_count):
"""
Check the acual nodecpustats output value
total time <= system uptime
"""
# Normalise to seconds from nano seconds and get for one cpu
total = float(((actual_stats['system'] + actual_stats['user'] +
actual_stats['idle'] + actual_stats['iowait']) / (10 ** 9)) / (
cpu_count))
uptime = float(utils.get_uptime())
if not total <= uptime:
raise error.TestFail("Commands 'virsh nodecpustats' not succeeded"
" as total time: %f is more"
" than uptime: %f" % (total, uptime))
return True
def virsh_check_nodecpustats_percentage(actual_per):
"""
Check the actual nodecpustats percentage adds up to 100%
"""
total = int(round(actual_per['user'] + actual_per['system'] +
actual_per['idle'] + actual_per['iowait']))
if not total == 100:
raise error.TestFail("Commands 'virsh nodecpustats' not succeeded"
" as the total percentage value: %d"
" is not equal 100" % total)
def parse_output(output):
"""
To get the output parsed into a dictionary
:param virsh command output
:return: dict of user,system,idle,iowait times
"""
# From the beginning of a line, group 1 is one or more word-characters,
# followed by zero or more whitespace characters and a ':',
# then one or more whitespace characters,
# followed by group 2, which is one or more digit characters,
# e.g as below
# user: 6163690000000
#
regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+)")
actual = {}
for line in output.stdout.split('\n'):
match_obj = regex_obj.search(line)
# Due to the extra space in the list
if match_obj is not None:
name = match_obj.group(1)
value = match_obj.group(2)
actual[name] = int(value)
return actual
def parse_percentage_output(output):
"""
To get the output parsed into a dictionary
:param virsh command output
:return: dict of user,system,idle,iowait times
"""
# From the beginning of a line, group 1 is one or more word-characters,
# followed by zero or more whitespace characters and a ':',
# then one or more whitespace characters,
# followed by group 2, which is one or more digit characters,
# e.g as below
# user: 1.5%
#
regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+.\d+)")
actual_percentage = {}
for line in output.stdout.split('\n'):
match_obj = regex_obj.search(line)
# Due to the extra space in the list
if match_obj is not None:
name = match_obj.group(1)
value = match_obj.group(2)
actual_percentage[name] = float(value)
return actual_percentage
# Initialize the variables
itr = int(params.get("inner_test_iterations"))
option = params.get("virsh_cpunodestats_options")
invalid_cpunum = params.get("invalid_cpunum")
status_error = params.get("status_error")
libvirtd = params.get("libvirtd", "on")
# Prepare libvirtd service
if libvirtd == "off":
utils_libvirtd.libvirtd_stop()
# Get the host cpu list
host_cpus_list = utils.cpu_online_map()
# Run test case for 5 iterations default can be changed in subtests.cfg
# file
for i in range(itr):
if status_error == "yes":
if invalid_cpunum == "yes":
option = "--cpu %s" % (len(host_cpus_list) + 1)
output = virsh.nodecpustats(ignore_status=True, option=option)
status = output.exit_status
if status == 0:
if libvirtd == "off":
utils_libvirtd.libvirtd_start()
raise error.TestFail("Command 'virsh nodecpustats' "
"succeeded with libvirtd service "
"stopped, incorrect")
else:
raise error.TestFail("Command 'virsh nodecpustats %s' "
"succeeded (incorrect command)" % option)
elif status_error == "no":
# Run the testcase for each cpu to get the cpu stats
for cpu in host_cpus_list:
option = "--cpu %s" % cpu
output = virsh.nodecpustats(ignore_status=True, option=option)
status = output.exit_status
if status == 0:
actual_value = parse_output(output)
virsh_check_nodecpustats_percpu(actual_value)
else:
raise error.TestFail("Command 'virsh nodecpustats %s'"
"not succeeded" % option)
# Run the test case for each cpu to get the cpu stats in percentage
for cpu in host_cpus_list:
option = "--cpu %s --percent" % cpu
output = virsh.nodecpustats(ignore_status=True, option=option)
status = output.exit_status
if status == 0:
actual_value = parse_percentage_output(output)
virsh_check_nodecpustats_percentage(actual_value)
else:
raise error.TestFail("Command 'virsh nodecpustats %s'"
" not succeeded" % option)
option = ''
# Run the test case for total cpus to get the cpus stats
output = virsh.nodecpustats(ignore_status=True, option=option)
status = output.exit_status
if status == 0:
actual_value = parse_output(output)
virsh_check_nodecpustats(actual_value, len(host_cpus_list))
else:
raise error.TestFail("Command 'virsh nodecpustats %s'"
" not succeeded" % option)
# Run the test case for the total cpus to get the stats in
# percentage
option = "--percent"
output = virsh.nodecpustats(ignore_status=True, option=option)
status = output.exit_status
if status == 0:
actual_value = parse_percentage_output(output)
virsh_check_nodecpustats_percentage(actual_value)
else:
raise error.TestFail("Command 'virsh nodecpustats %s'"
" not succeeded" % option)
# Recover libvirtd service start
if libvirtd == "off":
utils_libvirtd.libvirtd_start()
0
Example 48
Project: PokemonGo-Map Source File: runserver.py
def main():
# Patch threading to make exceptions catchable
install_thread_excepthook()
# Make sure exceptions get logged
sys.excepthook = handle_exception
args = get_args()
# Add file logging if enabled
if args.verbose and args.verbose != 'nofile':
filelog = logging.FileHandler(args.verbose)
filelog.setFormatter(logging.Formatter('%(asctime)s [%(threadName)16s][%(module)14s][%(levelname)8s] %(message)s'))
logging.getLogger('').addHandler(filelog)
if args.very_verbose and args.very_verbose != 'nofile':
filelog = logging.FileHandler(args.very_verbose)
filelog.setFormatter(logging.Formatter('%(asctime)s [%(threadName)16s][%(module)14s][%(levelname)8s] %(message)s'))
logging.getLogger('').addHandler(filelog)
# Check if we have the proper encryption library file and get its path
encryption_lib_path = get_encryption_lib_path(args)
if encryption_lib_path is "":
sys.exit(1)
if args.verbose or args.very_verbose:
log.setLevel(logging.DEBUG)
else:
log.setLevel(logging.INFO)
# Let's not forget to run Grunt / Only needed when running with webserver
if not args.no_server:
if not os.path.exists(os.path.join(os.path.dirname(__file__), 'static/dist')):
log.critical('Missing front-end assets (static/dist) -- please run "npm install && npm run build" before starting the server')
sys.exit()
# These are very noisey, let's shush them up a bit
logging.getLogger('peewee').setLevel(logging.INFO)
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('pgoapi.pgoapi').setLevel(logging.WARNING)
logging.getLogger('pgoapi.rpc_api').setLevel(logging.INFO)
logging.getLogger('werkzeug').setLevel(logging.ERROR)
config['parse_pokemon'] = not args.no_pokemon
config['parse_pokestops'] = not args.no_pokestops
config['parse_gyms'] = not args.no_gyms
# Turn these back up if debugging
if args.verbose or args.very_verbose:
logging.getLogger('pgoapi').setLevel(logging.DEBUG)
if args.very_verbose:
logging.getLogger('peewee').setLevel(logging.DEBUG)
logging.getLogger('requests').setLevel(logging.DEBUG)
logging.getLogger('pgoapi.pgoapi').setLevel(logging.DEBUG)
logging.getLogger('pgoapi.rpc_api').setLevel(logging.DEBUG)
logging.getLogger('rpc_api').setLevel(logging.DEBUG)
logging.getLogger('werkzeug').setLevel(logging.DEBUG)
# use lat/lng directly if matches such a pattern
prog = re.compile("^(\-?\d+\.\d+),?\s?(\-?\d+\.\d+)$")
res = prog.match(args.location)
if res:
log.debug('Using coordinates from CLI directly')
position = (float(res.group(1)), float(res.group(2)), 0)
else:
log.debug('Looking up coordinates in API')
position = util.get_pos_by_name(args.location)
# Use the latitude and longitude to get the local altitude from Google
try:
url = 'https://maps.googleapis.com/maps/api/elevation/json?locations={},{}'.format(
str(position[0]), str(position[1]))
altitude = requests.get(url).json()[u'results'][0][u'elevation']
log.debug('Local altitude is: %sm', altitude)
position = (position[0], position[1], altitude)
except (requests.exceptions.RequestException, IndexError, KeyError):
log.error('Unable to retrieve altitude from Google APIs; setting to 0')
if not any(position):
log.error('Could not get a position by name, aborting')
sys.exit()
log.info('Parsed location is: %.4f/%.4f/%.4f (lat/lng/alt)',
position[0], position[1], position[2])
if args.no_pokemon:
log.info('Parsing of Pokemon disabled')
if args.no_pokestops:
log.info('Parsing of Pokestops disabled')
if args.no_gyms:
log.info('Parsing of Gyms disabled')
if args.encounter:
log.info('Encountering pokemon enabled')
config['LOCALE'] = args.locale
config['CHINA'] = args.china
app = Pogom(__name__)
db = init_database(app)
if args.clear_db:
log.info('Clearing database')
if args.db_type == 'mysql':
drop_tables(db)
elif os.path.isfile(args.db):
os.remove(args.db)
create_tables(db)
app.set_current_location(position)
# Control the search status (running or not) across threads
pause_bit = Event()
pause_bit.clear()
if args.on_demand_timeout > 0:
pause_bit.set()
heartbeat = [now()]
# Setup the location tracking queue and push the first location on
new_location_queue = Queue()
new_location_queue.put(position)
# DB Updates
db_updates_queue = Queue()
# Thread(s) to process database updates
for i in range(args.db_threads):
log.debug('Starting db-updater worker thread %d', i)
t = Thread(target=db_updater, name='db-updater-{}'.format(i), args=(args, db_updates_queue))
t.daemon = True
t.start()
# db clearner; really only need one ever
if not args.disable_clean:
t = Thread(target=clean_db_loop, name='db-cleaner', args=(args,))
t.daemon = True
t.start()
# WH Updates
wh_updates_queue = Queue()
# Thread to process webhook updates
for i in range(args.wh_threads):
log.debug('Starting wh-updater worker thread %d', i)
t = Thread(target=wh_updater, name='wh-updater-{}'.format(i), args=(args, wh_updates_queue))
t.daemon = True
t.start()
if not args.only_server:
# Check all proxies before continue so we know they are good
if args.proxy and not args.proxy_skip_check:
# Overwrite old args.proxy with new working list
args.proxy = check_proxies(args)
# Gather the pokemons!
# attempt to dump the spawn points (do this before starting threads of endure the woe)
if args.spawnpoint_scanning and args.spawnpoint_scanning != 'nofile' and args.dump_spawnpoints:
with open(args.spawnpoint_scanning, 'w+') as file:
log.info('Saving spawn points to %s', args.spawnpoint_scanning)
spawns = Pokemon.get_spawnpoints_in_hex(position, args.step_limit)
file.write(json.dumps(spawns))
log.info('Finished exporting spawn points')
argset = (args, new_location_queue, pause_bit, heartbeat, encryption_lib_path, db_updates_queue, wh_updates_queue)
log.debug('Starting a %s search thread', args.scheduler)
search_thread = Thread(target=search_overseer_thread, name='search-overseer', args=argset)
search_thread.daemon = True
search_thread.start()
if args.cors:
CORS(app)
# No more stale JS
init_cache_busting(app)
app.set_search_control(pause_bit)
app.set_heartbeat_control(heartbeat)
app.set_location_queue(new_location_queue)
config['ROOT_PATH'] = app.root_path
config['GMAPS_KEY'] = args.gmaps_key
if args.no_server:
# This loop allows for ctrl-c interupts to work since flask won't be holding the program open
while search_thread.is_alive():
time.sleep(60)
else:
ssl_context = None
if args.ssl_certificate and args.ssl_privatekey \
and os.path.exists(args.ssl_certificate) and os.path.exists(args.ssl_privatekey):
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
ssl_context.load_cert_chain(args.ssl_certificate, args.ssl_privatekey)
log.info('Web server in SSL mode.')
if args.verbose or args.very_verbose:
app.run(threaded=True, use_reloader=False, debug=True, host=args.host, port=args.port, ssl_context=ssl_context)
else:
app.run(threaded=True, use_reloader=False, debug=False, host=args.host, port=args.port, ssl_context=ssl_context)
0
Example 49
Project: zeya Source File: zeya.py
def ZeyaHandler(backend, library_repr, resource_basedir, bitrate,
auth_type=None, auth_data=None):
"""
Wrapper around the actual HTTP request handler implementation class. We
need to create a closure so that the inner class can receive the following
data:
Backend to use.
Library data.
Base directory for resources.
Bitrate for encoding.
Authentication data.
"""
class ZeyaHandlerImpl(BaseHTTPRequestHandler, object):
"""
Web server request handler.
"""
def do_GET(self):
"""
Handle a GET request.
"""
# http://host/ yields the library main page.
if self.path == '/':
self.serve_static_content('/library.html')
# http://host/getlibrary returns a representation of the music
# collection.
elif self.path == '/getlibrary':
self.serve_library()
# http://host/getcontent?key=N yields an Ogg stream of the file
# associated with the specified key.
elif self.path.startswith('/getcontent?'):
self.serve_content(urllib.unquote(self.path[12:]))
# All other paths are assumed to be static content.
# http://host/foo is mapped to resources/foo.
else:
self.serve_static_content(self.path)
def get_content_type(self, path):
"""
Return the MIME type associated with the given path.
"""
path = path.lower()
if path.endswith('.html'):
return 'text/html'
elif path.endswith('.png'):
return 'image/png'
elif path.endswith('.gif'):
return 'image/gif'
elif path.endswith('.css'):
return 'text/css'
elif path.endswith('.js'):
return 'text/javascript'
elif path.endswith('.ogg'):
return 'audio/ogg'
else:
print ("Warning: couldn't identify content-type for %r, "
+ "serving as application/octet-stream") % (path,)
return 'application/octet-stream'
def serve_content(self, query):
"""
Serve an audio stream (audio/ogg).
"""
# The query is of the form key=N or key=N&buffered=true.
args = parse_qs(query)
key = args['key'][0] if args.has_key('key') else ''
# If buffering is activated, encode the entire file and serve the
# Content-Length header. This increases song load latency because
# we can't serve any of the file until we've finished encoding the
# whole thing. However, Chrome needs the Content-Length header to
# accompany audio data.
buffered = args['buffered'][0] if args.has_key('buffered') else ''
# TODO: send error 500 when we encounter an error during the
# decoding phase. This is needed for reliable client-side error
# dialogs.
self.send_response(200)
self.send_header('Content-type', 'audio/ogg')
if buffered:
# Complete the transcode and write to a temporary file.
# Determine its length and serve the Content-Length header.
output_file = tempfile.TemporaryFile()
backend.get_content(key, output_file, bitrate, buffered=True)
output_file.seek(0)
data = output_file.read()
self.send_header('Content-Length', str(len(data)))
self.end_headers()
output_file.seek(0)
try:
backends.copy_output_with_shaping(
output_file.fileno(), self.wfile, bitrate)
except socket.error:
pass
output_file.close()
else:
# Don't determine the Content-Length. Just stream to the client
# on the fly.
self.end_headers()
backend.get_content(key, self.wfile, bitrate)
self.wfile.close()
def send_data(self, ctype, data):
"""
Send data to the client.
Use deflate compression if client headers indicate that the
other end supports it and if it's appropriate for this
content-type.
"""
compress_data = \
(ctype.startswith('text/')
and 'Accept-Encoding' in self.headers
and 'deflate' in self.headers['Accept-Encoding'].split(','))
self.send_response(200)
if compress_data:
data = zlib.compress(data)
self.send_header('Content-Encoding', 'deflate')
self.send_header('Vary', 'Accept-Encoding')
self.send_header('Content-Length', str(len(data)))
self.send_header('Content-Type', ctype)
self.end_headers()
self.wfile.write(data)
self.wfile.close()
def serve_library(self):
"""
Serve a representation of the library.
"""
self.send_data('text/html', library_repr.encode('utf-8'))
def serve_static_content(self, path):
"""
Serve static content from the resources/ directory.
"""
try:
# path already has a leading '/' in front of it. Strip it.
full_path = os.path.join(resource_basedir, path[1:])
# Ensure that the basedir we use for security checks ends in '/'.
effective_basedir = os.path.join(resource_basedir, '')
# Prevent directory traversal attacks. Canonicalize the
# filename we're going to open and verify that it's inside the
# resource directory.
if not os.path.abspath(full_path).startswith(effective_basedir):
self.send_error(404, 'File not found: %s' % (path,))
return
with open(full_path) as f:
self.send_data(self.get_content_type(path), f.read())
except IOError:
traceback.print_exc()
self.send_error(404, 'File not found: %s' % (path,))
class ZeyaBasicAuthHandlerImpl(ZeyaHandlerImpl):
def __init__(self, *args, **kwargs):
self.auth_regexp = re.compile('Basic ([%s[]*)' % b64dict)
super(ZeyaBasicAuthHandlerImpl, self).__init__(*args, **kwargs)
def send_no_auth(self):
"""
Send an unauthorized required page.
"""
self.send_response(401)
self.send_header('Content-type', 'text/html')
self.send_header('Content-Length', str(len(no_auth_rval)))
self.send_header('WWW-Authenticate', 'Basic realm="Zeya Secure"')
self.end_headers()
self.wfile.write(no_auth_rval)
def authorized(self):
"""
Return true if self.headers has valid authentication information.
"""
if auth in self.headers and self.auth_regexp.match(self.headers[auth]):
encoded_auth = self.auth_regexp.sub('\\1', self.headers[auth])
decoded_auth = base64.b64decode(encoded_auth)
client_user, client_pass = split_user_pass(decoded_auth)
if client_user in auth_data:
client_crypt_pass = crypt.crypt(\
client_pass, auth_data[client_user][:2])
return client_crypt_pass == auth_data[client_user]
return False
def do_GET(self):
"""
Handle a GET request, sending an authentication required header if
not authenticated.
"""
if self.authorized():
ZeyaHandlerImpl.do_GET(self)
else:
self.send_no_auth()
if auth_type == BASIC_AUTH:
print 'Using Basic Auth Handler...'
return ZeyaBasicAuthHandlerImpl
return ZeyaHandlerImpl
0
Example 50
Project: tp-libvirt Source File: virsh_volume.py
def run(test, params, env):
"""
1. Create a pool
2. Create n number of volumes(vol-create-as)
3. Check the volume details from the following commands
vol-info
vol-key
vol-list
vol-name
vol-path
vol-pool
qemu-img info
4. Delete the volume and check in vol-list
5. Repeat the steps for number of volumes given
6. Delete the pool and target
TODO: Handle negative testcases
"""
def delete_volume(expected_vol):
"""
Deletes Volume
"""
pool_name = expected_vol['pool_name']
vol_name = expected_vol['name']
pv = libvirt_storage.PoolVolume(pool_name)
if not pv.delete_volume(vol_name):
raise error.TestFail("Delete volume failed." % vol_name)
else:
logging.debug("Volume: %s successfully deleted on pool: %s",
vol_name, pool_name)
def get_vol_list(pool_name, vol_name):
"""
Parse the volume list
"""
output = virsh.vol_list(pool_name, "--details")
rg = re.compile(
r'^(\S+)\s+(\S+)\s+(\S+)\s+(\d+.\d+\s\S+)\s+(\d+.\d+.*)')
vol = {}
vols = []
volume_detail = None
for line in output.stdout.splitlines():
match = re.search(rg, line.lstrip())
if match is not None:
vol['name'] = match.group(1)
vol['path'] = match.group(2)
vol['type'] = match.group(3)
vol['capacity'] = match.group(4)
vol['allocation'] = match.group(5)
vols.append(vol)
vol = {}
for volume in vols:
if volume['name'] == vol_name:
volume_detail = volume
return volume_detail
def norm_capacity(capacity):
"""
Normalize the capacity values to bytes
"""
# Normaize all values to bytes
norm_capacity = {}
des = {'B': 'B', 'bytes': 'B', 'b': 'B', 'kib': 'K',
'KiB': 'K', 'K': 'K', 'k': 'K', 'KB': 'K',
'mib': 'M', 'MiB': 'M', 'M': 'M', 'm': 'M',
'MB': 'M', 'gib': 'G', 'GiB': 'G', 'G': 'G',
'g': 'G', 'GB': 'G', 'Gb': 'G', 'tib': 'T',
'TiB': 'T', 'TB': 'T', 'T': 'T', 't': 'T'
}
val = {'B': 1,
'K': 1024,
'M': 1048576,
'G': 1073741824,
'T': 1099511627776
}
reg_list = re.compile(r'(\S+)\s(\S+)')
match_list = re.search(reg_list, capacity['list'])
if match_list is not None:
mem_value = float(match_list.group(1))
norm = val[des[match_list.group(2)]]
norm_capacity['list'] = int(mem_value * norm)
else:
raise error.TestFail("Error in parsing capacity value in"
" virsh vol-list")
match_info = re.search(reg_list, capacity['info'])
if match_info is not None:
mem_value = float(match_info.group(1))
norm = val[des[match_list.group(2)]]
norm_capacity['info'] = int(mem_value * norm)
else:
raise error.TestFail("Error in parsing capacity value "
"in virsh vol-info")
norm_capacity['qemu_img'] = capacity['qemu_img']
norm_capacity['xml'] = int(capacity['xml'])
return norm_capacity
def check_vol(expected, avail=True):
"""
Checks the expected volume details with actual volume details from
vol-dumpxml
vol-list
vol-info
vol-key
vol-path
qemu-img info
"""
error_count = 0
pv = libvirt_storage.PoolVolume(expected['pool_name'])
vol_exists = pv.volume_exists(expected['name'])
if vol_exists:
if not avail:
error_count += 1
logging.error("Expect volume %s not exists but find it",
expected['name'])
return error_count
else:
if avail:
error_count += 1
logging.error("Expect volume %s exists but not find it",
expected['name'])
return error_count
else:
logging.info("Volume %s checked successfully for deletion",
expected['name'])
return error_count
actual_list = get_vol_list(expected['pool_name'], expected['name'])
actual_info = pv.volume_info(expected['name'])
# Get values from vol-dumpxml
volume_xml = vol_xml.VolXML.new_from_vol_dumpxml(expected['name'],
expected['pool_name'])
# Check against virsh vol-key
vol_key = virsh.vol_key(expected['name'], expected['pool_name'])
if vol_key.stdout.strip() != volume_xml.key:
logging.error("Volume key is mismatch \n%s"
"Key from xml: %s\nKey from command: %s",
expected['name'], volume_xml.key, vol_key)
error_count += 1
else:
logging.debug("virsh vol-key for volume: %s successfully"
" checked against vol-dumpxml", expected['name'])
# Check against virsh vol-name
get_vol_name = virsh.vol_name(expected['path'])
if get_vol_name.stdout.strip() != expected['name']:
logging.error("Volume name mismatch\n"
"Expected name: %s\nOutput of vol-name: %s",
expected['name'], get_vol_name)
# Check against virsh vol-path
vol_path = virsh.vol_path(expected['name'], expected['pool_name'])
if expected['path'] != vol_path.stdout.strip():
logging.error("Volume path mismatch for volume: %s\n"
"Expected path: %s\nOutput of vol-path: %s\n",
expected['name'],
expected['path'], vol_path)
error_count += 1
else:
logging.debug("virsh vol-path for volume: %s successfully checked"
" against created volume path", expected['name'])
# Check path against virsh vol-list
if expected['path'] != actual_list['path']:
logging.error("Volume path mismatch for volume:%s\n"
"Expected Path: %s\nPath from virsh vol-list: %s",
expected['name'], expected['path'],
actual_list['path'])
error_count += 1
else:
logging.debug("Path of volume: %s from virsh vol-list "
"successfully checked against created "
"volume path", expected['name'])
# Check path against virsh vol-dumpxml
if expected['path'] != volume_xml.path:
logging.error("Volume path mismatch for volume: %s\n"
"Expected Path: %s\nPath from virsh vol-dumpxml: %s",
expected['name'], expected['path'], volume_xml.path)
error_count += 1
else:
logging.debug("Path of volume: %s from virsh vol-dumpxml "
"successfully checked against created volume path",
expected['name'])
# Check type against virsh vol-list
if expected['type'] != actual_list['type']:
logging.error("Volume type mismatch for volume: %s\n"
"Expected Type: %s\n Type from vol-list: %s",
expected['name'], expected['type'],
actual_list['type'])
error_count += 1
else:
logging.debug("Type of volume: %s from virsh vol-list "
"successfully checked against the created "
"volume type", expected['name'])
# Check type against virsh vol-info
if expected['type'] != actual_info['Type']:
logging.error("Volume type mismatch for volume: %s\n"
"Expected Type: %s\n Type from vol-info: %s",
expected['name'], expected['type'],
actual_info['Type'])
error_count += 1
else:
logging.debug("Type of volume: %s from virsh vol-info successfully"
" checked against the created volume type",
expected['name'])
# Check name against virsh vol-info
if expected['name'] != actual_info['Name']:
logging.error("Volume name mismatch for volume: %s\n"
"Expected name: %s\n Name from vol-info: %s",
expected['name'],
expected['name'], actual_info['Name'])
error_count += 1
else:
logging.debug("Name of volume: %s from virsh vol-info successfully"
" checked against the created volume name",
expected['name'])
# Check format from against qemu-img info
img_info = utils_misc.get_image_info(expected['path'])
if expected['format']:
if expected['format'] != img_info['format']:
logging.error("Volume format mismatch for volume: %s\n"
"Expected format: %s\n"
"Format from qemu-img info: %s",
expected['name'], expected['format'],
img_info['format'])
error_count += 1
else:
logging.debug("Format of volume: %s from qemu-img info "
"checked successfully against the created "
"volume format", expected['name'])
# Check format against vol-dumpxml
if expected['format']:
if expected['format'] != volume_xml.format:
logging.error("Volume format mismatch for volume: %s\n"
"Expected format: %s\n"
"Format from vol-dumpxml: %s",
expected['name'], expected['format'],
volume_xml.format)
error_count += 1
else:
logging.debug("Format of volume: %s from virsh vol-dumpxml "
"checked successfully against the created"
" volume format", expected['name'])
logging.info(expected['encrypt_format'])
# Check encrypt against vol-dumpxml
if expected['encrypt_format']:
# As the 'default' format will change to specific valut(qcow), so
# just output it here
logging.debug("Encryption format of volume '%s' is: %s",
expected['name'], volume_xml.encryption.format)
# And also output encryption secret uuid
secret_uuid = volume_xml.encryption.secret['uuid']
logging.debug("Encryption secret of volume '%s' is: %s",
expected['name'], secret_uuid)
if expected['encrypt_secret']:
if expected['encrypt_secret'] != secret_uuid:
logging.error("Encryption secret mismatch for volume: %s\n"
"Expected secret uuid: %s\n"
"Secret uuid from vol-dumpxml: %s",
expected['name'], expected['encrypt_secret'],
secret_uuid)
error_count += 1
else:
# If no set encryption secret value, automatically
# generate a secret value at the time of volume creation
logging.debug("Volume encryption secret is %s", secret_uuid)
# Check pool name against vol-pool
vol_pool = virsh.vol_pool(expected['path'])
if expected['pool_name'] != vol_pool.stdout.strip():
logging.error("Pool name mismatch for volume: %s against"
"virsh vol-pool", expected['name'])
error_count += 1
else:
logging.debug("Pool name of volume: %s checked successfully"
" against the virsh vol-pool", expected['name'])
norm_cap = {}
capacity = {}
capacity['list'] = actual_list['capacity']
capacity['info'] = actual_info['Capacity']
capacity['xml'] = volume_xml.capacity
capacity['qemu_img'] = img_info['vsize']
norm_cap = norm_capacity(capacity)
delta_size = params.get('delta_size', "1024")
if abs(expected['capacity'] - norm_cap['list']) > delta_size:
logging.error("Capacity mismatch for volume: %s against virsh"
" vol-list\nExpected: %s\nActual: %s",
expected['name'], expected['capacity'],
norm_cap['list'])
error_count += 1
else:
logging.debug("Capacity value checked successfully against"
" virsh vol-list for volume %s", expected['name'])
if abs(expected['capacity'] - norm_cap['info']) > delta_size:
logging.error("Capacity mismatch for volume: %s against virsh"
" vol-info\nExpected: %s\nActual: %s",
expected['name'], expected['capacity'],
norm_cap['info'])
error_count += 1
else:
logging.debug("Capacity value checked successfully against"
" virsh vol-info for volume %s", expected['name'])
if abs(expected['capacity'] - norm_cap['xml']) > delta_size:
logging.error("Capacity mismatch for volume: %s against virsh"
" vol-dumpxml\nExpected: %s\nActual: %s",
expected['name'], expected['capacity'],
norm_cap['xml'])
error_count += 1
else:
logging.debug("Capacity value checked successfully against"
" virsh vol-dumpxml for volume: %s",
expected['name'])
if abs(expected['capacity'] - norm_cap['qemu_img']) > delta_size:
logging.error("Capacity mismatch for volume: %s against "
"qemu-img info\nExpected: %s\nActual: %s",
expected['name'], expected['capacity'],
norm_cap['qemu_img'])
error_count += 1
else:
logging.debug("Capacity value checked successfully against"
" qemu-img info for volume: %s",
expected['name'])
return error_count
def get_all_secrets():
"""
Return all exist libvirt secrets uuid in a list
"""
secret_list = []
secrets = virsh.secret_list().stdout.strip()
for secret in secrets.splitlines()[2:]:
secret_list.append(secret.strip().split()[0])
return secret_list
# Initialize the variables
pool_name = params.get("pool_name")
pool_type = params.get("pool_type")
pool_target = params.get("pool_target")
if os.path.dirname(pool_target) is "":
pool_target = os.path.join(test.tmpdir, pool_target)
vol_name = params.get("volume_name")
vol_number = int(params.get("number_of_volumes", "2"))
capacity = params.get("volume_size", "1048576")
allocation = params.get("volume_allocation", "1048576")
vol_format = params.get("volume_format")
source_name = params.get("gluster_source_name", "gluster-vol1")
source_path = params.get("gluster_source_path", "/")
encrypt_format = params.get("vol_encrypt_format")
encrypt_secret = params.get("encrypt_secret")
emulated_image = params.get("emulated_image")
emulated_image_size = params.get("emulated_image_size")
if not libvirt_version.version_compare(1, 0, 0):
if pool_type == "gluster":
raise error.TestNAError("Gluster pool is not supported in current"
" libvirt version.")
try:
str_capa = utils_misc.normalize_data_size(capacity, "B")
int_capa = int(str(str_capa).split('.')[0])
except ValueError:
raise error.TestError("Translate size %s to 'B' failed" % capacity)
try:
str_capa = utils_misc.normalize_data_size(allocation, "B")
int_allo = int(str(str_capa).split('.')[0])
except ValueError:
raise error.TestError("Translate size %s to 'B' failed" % allocation)
# Stop multipathd to avoid start pool fail(For fs like pool, the new add
# disk may in use by device-mapper, so start pool will report disk already
# mounted error).
multipathd = service.Factory.create_service("multipathd")
multipathd_status = multipathd.status()
if multipathd_status:
multipathd.stop()
# Get exists libvirt secrets before test
ori_secrets = get_all_secrets()
expected_vol = {}
vol_type = 'file'
if pool_type in ['disk', 'logical']:
vol_type = 'block'
if pool_type == 'gluster':
vol_type = 'network'
logging.debug("Debug:\npool_name:%s\npool_type:%s\npool_target:%s\n"
"vol_name:%s\nvol_number:%s\ncapacity:%s\nallocation:%s\n"
"vol_format:%s", pool_name, pool_type, pool_target,
vol_name, vol_number, capacity, allocation, vol_format)
libv_pvt = utlv.PoolVolumeTest(test, params)
# Run Testcase
total_err_count = 0
try:
# Create a new pool
libv_pvt.pre_pool(pool_name=pool_name,
pool_type=pool_type,
pool_target=pool_target,
emulated_image=emulated_image,
image_size=emulated_image_size,
source_name=source_name,
source_path=source_path)
for i in range(vol_number):
volume_name = "%s_%d" % (vol_name, i)
expected_vol['pool_name'] = pool_name
expected_vol['pool_type'] = pool_type
expected_vol['pool_target'] = pool_target
expected_vol['capacity'] = int_capa
expected_vol['allocation'] = int_allo
expected_vol['format'] = vol_format
expected_vol['name'] = volume_name
expected_vol['type'] = vol_type
expected_vol['encrypt_format'] = encrypt_format
expected_vol['encrypt_secret'] = encrypt_secret
# Creates volume
if pool_type != "gluster":
expected_vol['path'] = pool_target + '/' + volume_name
new_volxml = vol_xml.VolXML()
new_volxml.name = volume_name
new_volxml.capacity = int_capa
new_volxml.allocation = int_allo
if vol_format:
new_volxml.format = vol_format
encrypt_dict = {}
if encrypt_format:
encrypt_dict.update({"format": encrypt_format})
if encrypt_secret:
encrypt_dict.update({"secret": {'uuid': encrypt_secret}})
if encrypt_dict:
new_volxml.encryption = new_volxml.new_encryption(**encrypt_dict)
logging.debug("Volume XML for creation:\n%s", str(new_volxml))
virsh.vol_create(pool_name, new_volxml.xml, debug=True)
else:
ip_addr = utlv.get_host_ipv4_addr()
expected_vol['path'] = "gluster://%s/%s/%s" % (ip_addr,
source_name,
volume_name)
utils.run("qemu-img create -f %s %s %s" % (vol_format,
expected_vol['path'],
capacity))
virsh.pool_refresh(pool_name)
# Check volumes
total_err_count += check_vol(expected_vol)
# Delete volume and check for results
delete_volume(expected_vol)
total_err_count += check_vol(expected_vol, False)
if total_err_count > 0:
raise error.TestFail("Get %s errors when checking volume" % total_err_count)
finally:
# Clean up
for sec in get_all_secrets():
if sec not in ori_secrets:
virsh.secret_undefine(sec)
try:
libv_pvt.cleanup_pool(pool_name, pool_type, pool_target,
emulated_image, source_name=source_name)
except error.TestFail, detail:
logging.error(str(detail))
if multipathd_status:
multipathd.start()