Here are the examples of the python api re.sub taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
162 Examples
0
Example 51
Project: Kaggle_HomeDepot Source File: homedepot_functions.py
def str_parser(s, automatic_spell_check_dict={}, remove_from_brackets=False,parse_material=False,add_space_stop_list=[]):
#the following three replacements are shared on the forum
s = s.replace("craftsm,an","craftsman")
s = re.sub(r'depot.com/search=', '', s)
s = re.sub(r'pilers,needlenose', 'pliers, needle nose', s)
s = re.sub(r'\bmr.', 'mr ', s)
s = re.sub(r'&', '&', s)
s = re.sub(' ', '', s)
s = re.sub(''', '', s)
s = re.sub(r'(?<=[0-9]),[\ ]*(?=[0-9])', '', s)
s = s.replace(";",".")
s = s.replace(",",".")
s = s.replace(":",". ")
s = s.replace("+"," ")
s = re.sub(r'\bU.S.', 'US ', s)
s = s.replace(" W x "," ")
s = s.replace(" H x "," ")
s = re.sub(' [\#]\d+[\-\d]*[\,]*', '', s)
s = re.sub('(?<=[0-9\%])(?=[A-Z][a-z])', '. ', s) # add dot between number and cap letter
s = re.sub(r'(?<=\))(?=[a-zA-Z0-9])', ' ', s) # add space between parentheses and letters
s = re.sub(r'(?<=[a-zA-Z0-9])(?=\()', ' ', s) # add space between parentheses and letters
if parse_material:
replace_dict={'Medium Density Fiberboard (MDF)':'mdf', 'High Density Fiberboard (HDF)':'hdf',\
'Fibre Reinforced Polymer (FRP)': 'frp', 'Acrylonitrile Butadiene Styrene (ABS)': 'abs',\
'Cross-Linked Polyethylene (PEX)':'pex', 'Chlorinated Poly Vinyl Chloride (CPVC)': 'cpvc',\
'PVC (vinyl)': 'pvc','Thermoplastic rubber (TPR)':'tpr','Poly Lactic Acid (PLA)': 'pla',\
'100% Polyester':'polyester','100% UV Olefin':'olefin', '100% BCF Polypropylene': 'polypropylene',\
'100% PVC':'pvc'}
if s in replace_dict.keys():
s=replace_dict[s]
s = re.sub('[^a-zA-Z0-9\n\ \%\$\-\#\@\&\/\.\'\*\(\)]', ' ', s)
s= " ".join(s.split())
s=s.replace("-"," ")
if len(add_space_stop_list)>0:
s = " ".join([re.sub('(?<=[a-z])(?=[A-Z][a-z\ ])', '. ', word) if word.lower() not in add_space_stop_list else word for word in s.split()])
s=s.lower()
s = re.sub('\.(?=[a-z])', '. ', s) #dots before words -> replace with spaces
# s = re.sub('(?<=[a-z])(?=[A-Z][a-z\ ])', ' ', s) # add space if uppercase after lowercase
s = re.sub('(?<=[a-z][a-z][a-z])(?=[0-9])', ' ', s) # add cpase if number after at least three letters
##s = re.sub('(?<=[a-zA-Z])\.(?=\ |$)', '', s) #remove dots at the end of string
#s = re.sub('(?<=[0-9])\.(?=\ |$)', '', s) # dot after digit before space
s = re.sub('^\.\ ', '', s) #dot at the beginning before space
if len(automatic_spell_check_dict.keys())>0:
s=spell_correction(s,automatic_spell_check_dict=automatic_spell_check_dict)
if remove_from_brackets==True:
s = re.sub('(?<=\()[a-zA-Z0-9\n\ \%\$\-\#\@\&\/\.\'\*\(\)]*(?=\))', '', s)
else:
s=s.replace(" (",". ")
s=re.sub('(?<=[a-zA-Z0-9\%\$])\(', '. ', s)
s=s.replace(" )",". ")
s=s.replace(")",". ")
s=s.replace(" "," ")
s = re.sub('\ \.', '\.', s)
#######s = re.sub('(?<=[0-9\%])(?=[a-wyz])', ' ', s) # add space between number and text (except letter x)
#s = re.sub('(?<=[a-zA-Z])-(?=[a-zA-Z])', ' ', s) # replace '-' in words with space
s=s.replace("at&t","att")
s=s.replace("&"," and ")
s=s.replace("*"," x ")
s = re.sub('(?<=[a-z\ ])\/(?=[a-z\ ])', ' ', s) # replace "/" between words with space
s = re.sub('(?<=[a-z])\\\\(?=[a-z])', ' ', s) # replace "/" between words with space
s=s.replace(" "," ")
s=s.replace(" "," ")
#s=re.sub('(?<=\ [a-ux-z])\ (?=[0-9])', '', s) #remove spaces
#s=re.sub('(?<=^[a-z])\ (?=[0-9])', '', s) #remove spaces
#####################################
### thesaurus replacement in all vars
s=replace_in_parser(s)
s = re.sub('half(?=\ inch)', '1/2', s)
s = re.sub('\ba half\b', '1/2', s)
#s = re.sub('half\ ', 'half-', s)
s = re.sub(r'(?<=\')s\b', '', s)
s = re.sub('(?<=[0-9])\'\'', ' in ', s)
s = re.sub('(?<=[0-9])\'', ' in ', s)
s = re.sub(r'(?<=[0-9])[\ ]*inch[es]*\b', '-in ', s)
s = re.sub(r'(?<=[0-9])[\ ]*in\b', '-in ', s)
s = re.sub(r'(?<=[0-9])[\-|\ ]*feet[s]*\b', '-ft ', s)
s = re.sub(r'(?<=[0-9])[\ ]*foot[s]*\b', '-ft ', s)
s = re.sub(r'(?<=[0-9])[\ ]*ft[x]*\b', '-ft ', s)
s = re.sub('(?<=[0-9])[\ ]*volt[s]*(?=\ |$|\.)', '-V ', s)
s = re.sub('(?<=[0-9])[\ ]*v(?=\ |$|\.)', '-V ', s)
s = re.sub('(?<=[0-9])[\ ]*wat[t]*[s]*(?=\ |$|\.)', '-W ', s)
s = re.sub('(?<=[0-9])[\ ]*w(?=\ |$|\.)', '-W ', s)
s = re.sub('(?<=[0-9])[\ ]*kilo[\ ]*watt[s]*(?=\ |$|\.)', '-KW ', s)
s = re.sub('(?<=[0-9])[\ ]*kw(?=\ |$|\.)', '-KW ', s)
s = re.sub('(?<=[0-9])[\ ]*amp[s]*(?=\ |$|\.)', '-A ', s)
#s = re.sub('(?<=[0-9]) a(?=\ |$|\.)', '-A. ', s)
s = re.sub('(?<=[0-9])a(?=\ |$|\.)', '-A ', s)
s = re.sub('(?<=[0-9])[\ ]*gallon[s]*(?=\ |$|\.)', '-gal ', s)
s = re.sub('(?<=[0-9])[\ ]*gal(?=\ |$|\.)', '-gal ', s)
s = re.sub('(?<=[0-9])[\ ]*pound[s]*(?=\ |$|\.)', '-lb ', s)
s = re.sub('(?<=[0-9])[\ ]*lb[s]*(?=\ |$|\.)', '-lb ', s)
s = re.sub('(?<=[0-9])[\ ]*mi[l]+imet[er]*[s]*(?=\ |$|\.)', '-mm ', s)
s = re.sub('(?<=[0-9])[\ ]*mm(?=\ |$|\.)', '-mm ', s)
s = re.sub('(?<=[0-9])[\ ]*centimeter[s]*(?=\ |$|\.)', '-cm ', s)
s = re.sub('(?<=[0-9])[\ ]*cm(?=\ |$|\.)', '-cm ', s)
s = re.sub('(?<=[0-9])[\ ]*ounce[s]*(?=\ |$|\.)', '-oz ', s)
s = re.sub('(?<=[0-9])[\ ]*oz(?=\ |$|\.)', '-oz ', s)
s = re.sub('(?<=[0-9])[\ ]*liter[s]*(?=\ |$|\.)', '-L ', s)
s = re.sub('(?<=[0-9])[\ ]*litre[s]*(?=\ |$|\.)', '-L ', s)
s = re.sub('(?<=[0-9])[\ ]*l(?=\ |$|\.)', '-L. ', s)
s = re.sub('(?<=[0-9])[\ ]*square feet[s]*(?=\ |$|\.)', '-sqft ', s)
s = re.sub('(?<=[0-9])square feet[s]*(?=\ |$|\.)', '-sqft ', s)
s = re.sub('(?<=[0-9])[\ ]*sq[\ |\.|\.\ ]*ft(?=\ |$|\.)', '-sqft ', s)
s = re.sub('(?<=[0-9])[\ ]*sq. ft(?=\ |$|\.)', '-sqft', s)
s = re.sub('(?<=[0-9])[\ ]*sq.ft(?=\ |$|\.)', '-sqft', s)
s = re.sub('(?<=[0-9])[\ ]*cubic f[e]*t[s]*(?=\ |$|\.)', '-cuft ', s)
s = re.sub('(?<=[0-9])[\ ]*cu[\ |\.|\.\ ]*ft(?=\ |$|\.)', '-cuft ', s)
s = re.sub('(?<=[0-9])[\ ]*cu[\.]*[\ ]*ft(?=\ |$|\.)', '-cuft', s)
#remove 'x'
s = re.sub('(?<=[0-9]) x (?=[0-9])', '-X ', s)
s = re.sub('(?<=[0-9])x (?=[0-9])', '-X ', s)
s = re.sub('(?<=[0-9]) x(?=[0-9])', '-X ', s)
s = re.sub('(?<=[0-9])x(?=[0-9])', '-X ', s)
#s=s.replace("..",".")
s=s.replace("\n"," ")
s=s.replace(" "," ")
words=s.split()
if s.find("-X")>=0:
for cnt in range(0,len(words)-1):
if words[cnt].find("-X")>=0:
if words[cnt+1].find("-X") and cnt<len(words)-2:
cntAdd=2
else:
cntAdd=1
to_replace=re.search(r'(?<=[0-9]\-)\w+\b',words[cnt+cntAdd])
if not (to_replace==None):
words[cnt]=words[cnt].replace("-X","-"+to_replace.group(0)+"")
else:
words[cnt]=words[cnt].replace("-X","x")
s = " ".join([word for word in words])
s = re.sub('[^a-zA-Z0-9\ \%\$\-\@\&\/\.]', '', s) #remove "'" and "\n" and "#" and characters
##s = re.sub('(?<=[a-zA-Z])[\.|\/](?=\ |$)', '', s) #remove dots at the end of string
s = re.sub('(?<=[0-9])x(?=\ |$)', '', s) #remove
s = re.sub('(?<=[\ ])x(?=[0-9])', '', s) #remove
s = re.sub('(?<=^)x(?=[0-9])', '', s)
#s = re.sub('[\ ]\.(?=\ |$)', '', s) #remove dots
s=s.replace(" "," ")
s=s.replace("..",".")
s = re.sub('\ \.', '', s)
s=re.sub('(?<=\ [ch-hj-np-su-z][a-z])\ (?=[0-9])', '', s) #remove spaces
s=re.sub('(?<=^[ch-hj-np-su-z][a-z])\ (?=[0-9])', '', s) #remove spaces
s = re.sub('(?<=\ )\.(?=[0-9])', '0.', s)
s = re.sub('(?<=^)\.(?=[0-9])', '0.', s)
return " ".join([word for word in s.split()])
0
Example 52
def addMediaFile(self, package, contextType='video', encfs=False, dpath='', epath=''):
thumbnail = self.cache.getThumbnail(self, package.file.thumbnail,package.file.id)
listitem = xbmcgui.ListItem(package.file.displayTitle(), iconImage=package.file.thumbnail,
thumbnailImage=package.file.thumbnail)
# audio file, not in "pictures"
if package.file.type == package.file.AUDIO and contextType != 'image':
if package.file.hasMeta:
infolabels = decode_dict({ 'title' : package.file.displayTrackTitle(), 'tracknumber' : package.file.trackNumber, 'artist': package.file.artist, 'album': package.file.album,'genre': package.file.genre,'premiered': package.file.releaseDate, 'size' : package.file.size })
else:
infolabels = decode_dict({ 'title' : package.file.displayTitle(), 'size' : package.file.size })
listitem.setInfo('Music', infolabels)
playbackURL = '?mode=audio'
if self.integratedPlayer:
listitem.setProperty('IsPlayable', 'false')
else:
listitem.setProperty('IsPlayable', 'true')
# encrypted file, viewing in "pictures", assume image
elif package.file.type == package.file.UNKNOWN and contextType == 'image':
infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot })
listitem.setInfo('Pictures', infolabels)
playbackURL = '?mode=photo'
listitem.setProperty('IsPlayable', 'false')
# encrypted file, viewing in "video", assume video
elif package.file.type == package.file.UNKNOWN and contextType == 'video':
infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot, 'size' : package.file.size })
listitem.setInfo('Video', infolabels)
playbackURL = '?mode=video'
if self.integratedPlayer:
listitem.setProperty('IsPlayable', 'false')
else:
listitem.setProperty('IsPlayable', 'true')
if float(package.file.resume) > 0:
listitem.setProperty('isResumable', 1)
# encrypted file, viewing in "music", assume audio
elif package.file.type == package.file.UNKNOWN and contextType == 'audio':
if package.file.hasMeta:
infolabels = decode_dict({ 'title' : package.file.displayTrackTitle(), 'tracknumber' : package.file.trackNumber, 'artist': package.file.artist, 'album': package.file.album,'genre': package.file.genre,'premiered': package.file.releaseDate, 'size' : package.file.size })
else:
infolabels = decode_dict({ 'title' : package.file.displayTitle(), 'size' : package.file.size })
listitem.setInfo('Music', infolabels)
playbackURL = '?mode=audio'
if self.integratedPlayer:
listitem.setProperty('IsPlayable', 'false')
else:
listitem.setProperty('IsPlayable', 'true')
# audio file, viewing in "pictures"
elif package.file.type == package.file.AUDIO and contextType == 'image':
if package.file.hasMeta:
infolabels = decode_dict({ 'title' : package.file.displayTrackTitle(), 'tracknumber' : package.file.trackNumber, 'artist': package.file.artist, 'album': package.file.album,'genre': package.file.genre,'premiered': package.file.releaseDate, 'size' : package.file.size })
else:
infolabels = decode_dict({ 'title' : package.file.displayTitle(), 'size' : package.file.size })
listitem.setInfo('Music', infolabels)
playbackURL = '?mode=audio'
listitem.setProperty('IsPlayable', 'false')
# video file
elif package.file.type == package.file.VIDEO:
if package.file.hasMeta:
infolabels = decode_dict({ 'title' : package.file.displayShowTitle() , 'plot' : package.file.plot, 'TVShowTitle': package.file.show, 'EpisodeName': package.file.showtitle, 'season': package.file.season, 'episode': package.file.episode,'size' : package.file.size })
else:
infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot, 'size' : package.file.size })
listitem.setInfo('Video', infolabels)
playbackURL = '?mode=video'
if self.integratedPlayer:
listitem.setProperty('IsPlayable', 'false')
else:
listitem.setProperty('IsPlayable', 'true')
if float(package.file.resume) > 0:
listitem.setProperty('isResumable', "1")
if int(package.file.playcount) > 0: #or (float(package.file.resume) > 0 and package.file.duration > 0 and package.file.resume/package.file.duration > (1-self.settskipResume)):
listitem.setInfo('video', {'playcount':int(package.file.playcount)})
if int(package.file.resolution[0]) > 0:
listitem.addStreamInfo('video', {'width': package.file.resolution[1], 'height': package.file.resolution[0], 'duration':package.file.duration})
# image file
elif package.file.type == package.file.PICTURE:
infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot })
listitem.setInfo('Pictures', infolabels)
playbackURL = '?mode=photo'
listitem.setProperty('IsPlayable', 'false')
# otherwise, assume video
else:
infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot, 'size' : package.file.size })
listitem.setInfo('Video', infolabels)
playbackURL = '?mode=video'
if self.integratedPlayer:
listitem.setProperty('IsPlayable', 'false')
else:
listitem.setProperty('IsPlayable', 'true')
if float(package.file.resume) > 0:
listitem.setProperty('isResumable', 1)
listitem.setProperty('fanart_image', package.file.fanart)
cm=[]
try:
url = package.getMediaURL()
cleanURL = re.sub('---', '', url)
cleanURL = re.sub('&', '---', cleanURL)
except:
cleanURL = ''
# url = PLUGIN_URL+playbackURL+'&title='+package.file.title+'&filename='+package.file.id+'&instance='+str(self.instanceName)+'&folder='+str(package.folder.id)
if encfs:
values = {'instance': self.instanceName, 'dpath': dpath, 'epath': epath, 'encfs': 'true', 'title': package.file.title, 'filename': package.file.id, 'folder': package.folder.id}
else:
values = {'instance': self.instanceName, 'title': package.file.title, 'filename': package.file.id, 'folder': package.folder.id}
url = self.PLUGIN_URL+ str(playbackURL)+ '&' + urllib.urlencode(values)
if (contextType != 'image' and package.file.type != package.file.PICTURE):
valuesBS = {'username': self.authorization.username, 'title': package.file.title, 'filename': package.file.id, 'content_type': 'video'}
cm.append(( self.addon.getLocalizedString(30042), 'XBMC.RunPlugin('+self.PLUGIN_URL+'?mode=buildstrm&type='+str(package.file.type)+'&'+urllib.urlencode(valuesBS)+')', ))
if (self.protocol == 2):
# play-original for video only
if (contextType == 'video'):
if self.settings.promptQuality:
cm.append(( self.addon.getLocalizedString(30123), 'XBMC.RunPlugin('+url + '&original=true'+')', ))
else:
cm.append(( self.addon.getLocalizedString(30151), 'XBMC.RunPlugin('+url + '&promptquality=true'+')', ))
# if the options are disabled in settings, display option to playback with feature
if not self.settings.srt:
cm.append(( self.addon.getLocalizedString(30138), 'XBMC.RunPlugin('+url + '&srt=true'+')', ))
if not self.settings.cc:
cm.append(( self.addon.getLocalizedString(30146), 'XBMC.RunPlugin('+url + '&cc=true'+')', ))
cm.append(( self.addon.getLocalizedString(30147), 'XBMC.RunPlugin('+url + '&seek=true'+')', ))
# cm.append(( self.addon.getLocalizedString(30148), 'XBMC.RunPlugin('+url + '&resume=true'+')', ))
# values = {'instance': self.instanceName, 'folder': package.folder.id}
# folderurl = self.PLUGIN_URL+ str(playbackURL)+ '&' + urllib.urlencode(values)
# cm.append(( 'folder', 'XBMC.RunPlugin('+folderurl+')', ))
if contextType != 'image':
# download
cm.append(( self.addon.getLocalizedString(30113), 'XBMC.RunPlugin('+url + '&download=true'+')', ))
# download + watch
cm.append(( self.addon.getLocalizedString(30124), 'XBMC.RunPlugin('+url + '&play=true&download=true'+')', ))
# # watch downloaded copy
# cm.append(( self.addon.getLocalizedString(30125), 'XBMC.RunPlugin('+url + '&cache=true'+')', ))
elif package.file.type == package.file.PICTURE: #contextType == 'image':
cm.append(( self.addon.getLocalizedString(30126), 'XBMC.RunPlugin('+self.PLUGIN_URL+ '?mode=slideshow&' + urllib.urlencode(values)+')', ))
#encfs
# if (self.protocol == 2):
# cm.append(( self.addon.getLocalizedString(30130), 'XBMC.RunPlugin('+self.PLUGIN_URL+ '?mode=downloadfolder&encfs=true&' + urllib.urlencode(values)+'&content_type='+contextType+')', ))
url = url + '&content_type='+contextType
# listitem.addContextMenuItems( commands )
# if cm:
if package.file.type == package.file.PICTURE: #contextType == 'image':
listitem.addContextMenuItems(cm, True)
else:
listitem.addContextMenuItems(cm, False)
xbmcplugin.addDirectoryItem(plugin_handle, url, listitem,
isFolder=False, totalItems=0)
return url
0
Example 53
Project: frescobaldi Source File: vocal.py
def build(self, data, builder):
# normalize voicing
staves = self.voicing.currentText().upper()
# remove unwanted characters
staves = re.sub(r'[^SATB-]+', '', staves)
# remove double hyphens, and from begin and end
staves = re.sub('-+', '-', staves).strip('-')
if not staves:
return
splitStaves = staves.split('-')
numStaves = len(splitStaves)
staffCIDs = collections.defaultdict(int) # number same-name staff Context-IDs
voiceCounter = collections.defaultdict(int) # dict to number same voice types
maxNumVoices = max(map(len, splitStaves)) # largest number of voices
numStanzas = self.stanzas.value()
lyrics = collections.defaultdict(list) # lyrics grouped by stanza number
pianoReduction = collections.defaultdict(list)
rehearsalMidis = []
p = ly.dom.ChoirStaff()
choir = ly.dom.Sim(p)
data.nodes.append(p)
# print main instrumentName if there are more choirs, and we
# have more than one staff.
if numStaves > 1 and data.num:
builder.setInstrumentNames(p,
builder.instrumentName(lambda _: _("Choir"), data.num),
builder.instrumentName(lambda _: _("abbreviation for Choir", "Ch."), data.num))
# get the preferred way of adding lyrics
lyrAllSame, lyrEachSame, lyrEachDiff, lyrSpread = (
self.lyrics.currentIndex() == i for i in range(4))
lyrEach = lyrEachSame or lyrEachDiff
# stanzas to print (0 = don't print stanza number):
if numStanzas == 1:
allStanzas = [0]
else:
allStanzas = list(range(1, numStanzas + 1))
# Which stanzas to print where:
if lyrSpread and numStanzas > 1 and numStaves > 2:
spaces = numStaves - 1
count, rest = divmod(max(numStanzas, spaces), spaces)
stanzaSource = itertools.cycle(allStanzas)
stanzaGroups = (itertools.islice(stanzaSource, num)
for num in itertools.chain(
itertools.repeat(count + 1, rest),
itertools.repeat(count, numStaves - rest)))
else:
stanzaGroups = itertools.repeat(allStanzas, numStaves)
# a function to set staff affinity (in LilyPond 2.13.4 and above):
if builder.lyVersion >= (2, 13, 4):
def setStaffAffinity(context, affinity):
ly.dom.Line("\\override VerticalAxisGroup "
"#'staff-affinity = #" + affinity, context.getWith())
else:
def setStaffAffinity(lyricsContext, affinity):
pass
# a function to make a column markup:
if builder.lyVersion >= (2, 11, 57):
columnCommand = 'center-column'
else:
columnCommand = 'center-align'
def makeColumnMarkup(names):
node = ly.dom.Markup()
column = ly.dom.MarkupEnclosed(columnCommand, node)
for name in names:
ly.dom.QuotedString(name, column)
return node
stavesLeft = numStaves
for staff, stanzas in zip(splitStaves, stanzaGroups):
# are we in the last staff?
stavesLeft -= 1
# the number of voices in this staff
numVoices = len(staff)
# sort the letters in order SATB
staff = ''.join(i * staff.count(i) for i in 'SATB')
# Create the staff for the voices
s = ly.dom.Staff(parent=choir)
builder.setMidiInstrument(s, self.midiInstrument)
# Build a list of the voices in this staff.
# Each entry is a tuple(name, num).
# name is one of 'S', 'A', 'T', or 'B'
# num is an integer: 0 when a voice occurs only once, or >= 1 when
# there are more voices of the same type (e.g. Soprano I and II)
voices = []
for voice in staff:
if staves.count(voice) > 1:
voiceCounter[voice] += 1
voices.append((voice, voiceCounter[voice]))
# Add the instrument names to the staff:
if numVoices == 1:
voice, num = voices[0]
longName = builder.instrumentName(voice2Voice[voice].title, num)
shortName = builder.instrumentName(voice2Voice[voice].short, num)
builder.setInstrumentNames(s, longName, shortName)
else:
# stack instrument names (long and short) in a markup column.
# long names
longNames = makeColumnMarkup(
builder.instrumentName(voice2Voice[voice].title, num) for voice, num in voices)
shortNames = makeColumnMarkup(
builder.instrumentName(voice2Voice[voice].short, num) for voice, num in voices)
builder.setInstrumentNames(s, longNames, shortNames)
# Make the { } or << >> holder for this staff's children.
# If *all* staves have only one voice, addlyrics is used.
# In that case, don't remove the braces.
staffMusic = (ly.dom.Seq if lyrEach and maxNumVoices == 1 else
ly.dom.Seqr if numVoices == 1 else ly.dom.Simr)(s)
# Set the clef for this staff:
if 'B' in staff:
ly.dom.Clef('bass', staffMusic)
elif 'T' in staff:
ly.dom.Clef('treble_8', staffMusic)
# Determine voice order (\voiceOne, \voiceTwo etc.)
if numVoices == 1:
order = (0,)
elif numVoices == 2:
order = 1, 2
elif staff in ('SSA', 'TTB'):
order = 1, 3, 2
elif staff in ('SAA', 'TBB'):
order = 1, 2, 4
elif staff in ('SSAA', 'TTBB'):
order = 1, 3, 2, 4
else:
order = range(1, numVoices + 1)
# What name would the staff get if we need to refer to it?
# If a name (like 's' or 'sa') is already in use in this part,
# just add a number ('ss2' or 'sa2', etc.)
staffCIDs[staff] += 1
cid = ly.dom.Reference(staff.lower() +
str(staffCIDs[staff] if staffCIDs[staff] > 1 else ""))
# Create voices and their lyrics:
for (voice, num), voiceNum in zip(voices, order):
name = voice2id[voice]
if num:
name += ly.util.int2text(num)
a = data.assignMusic(name, voice2Voice[voice].octave)
lyrName = name + 'Verse' if lyrEachDiff else 'verse'
# Use \addlyrics if all staves have exactly one voice.
if lyrEach and maxNumVoices == 1:
for verse in stanzas:
lyrics[verse].append((ly.dom.AddLyrics(s), lyrName))
ly.dom.Identifier(a.name, staffMusic)
else:
voiceName = voice2id[voice] + str(num or '')
v = ly.dom.Voice(voiceName, parent=staffMusic)
voiceMusic = ly.dom.Seqr(v)
if voiceNum:
ly.dom.Text('\\voice' + ly.util.int2text(voiceNum), voiceMusic)
ly.dom.Identifier(a.name, voiceMusic)
if stanzas and (lyrEach or (voiceNum <= 1 and
(stavesLeft or numStaves == 1))):
# Create the lyrics. If they should be above the staff,
# give the staff a suitable name, and use alignAbove-
# Context to align the Lyrics above the staff.
above = voiceNum & 1 if lyrEach else False
if above and s.cid is None:
s.cid = cid
for verse in stanzas:
l = ly.dom.Lyrics(parent=choir)
if above:
l.getWith()['alignAboveContext'] = cid
setStaffAffinity(l, "DOWN")
elif not lyrEach and stavesLeft:
setStaffAffinity(l, "CENTER")
lyrics[verse].append((ly.dom.LyricsTo(voiceName, l), lyrName))
# Add ambitus:
if self.ambitus.isChecked():
ambitusContext = (s if numVoices == 1 else v).getWith()
ly.dom.Line('\\consists "Ambitus_engraver"', ambitusContext)
if voiceNum > 1:
ly.dom.Line("\\override Ambitus #'X-offset = #{0}".format(
(voiceNum - 1) * 2.0), ambitusContext)
pianoReduction[voice].append(a.name)
rehearsalMidis.append((voice, num, a.name, lyrName))
# Assign the lyrics, so their definitions come after the note defs.
# (These refs are used again below in the midi rehearsal routine.)
refs = {}
for verse in allStanzas:
for node, name in lyrics[verse]:
if (name, verse) not in refs:
refs[(name, verse)] = self.assignLyrics(data, name, verse).name
ly.dom.Identifier(refs[(name, verse)], node)
# Create the piano reduction if desired
if self.pianoReduction.isChecked():
a = data.assign('pianoReduction')
data.nodes.append(ly.dom.Identifier(a.name))
piano = ly.dom.PianoStaff(parent=a)
sim = ly.dom.Sim(piano)
rightStaff = ly.dom.Staff(parent=sim)
leftStaff = ly.dom.Staff(parent=sim)
right = ly.dom.Seq(rightStaff)
left = ly.dom.Seq(leftStaff)
# Determine the ordering of voices in the staves
upper = pianoReduction['S'] + pianoReduction['A']
lower = pianoReduction['T'] + pianoReduction['B']
preferUpper = 1
if not upper:
# Male choir
upper = pianoReduction['T']
lower = pianoReduction['B']
ly.dom.Clef("treble_8", right)
ly.dom.Clef("bass", left)
preferUpper = 0
elif not lower:
# Female choir
upper = pianoReduction['S']
lower = pianoReduction['A']
else:
ly.dom.Clef("bass", left)
# Otherwise accidentals can be confusing
ly.dom.Line("#(set-accidental-style 'piano)", right)
ly.dom.Line("#(set-accidental-style 'piano)", left)
# Move voices if unevenly spread
if abs(len(upper) - len(lower)) > 1:
voices = upper + lower
half = (len(voices) + preferUpper) // 2
upper = voices[:half]
lower = voices[half:]
for staff, voices in (ly.dom.Simr(right), upper), (ly.dom.Simr(left), lower):
if voices:
for v in voices[:-1]:
ly.dom.Identifier(v, staff)
ly.dom.VoiceSeparator(staff).after = 1
ly.dom.Identifier(voices[-1], staff)
# Make the piano part somewhat smaller
ly.dom.Line("fontSize = #-1", piano.getWith())
ly.dom.Line("\\override StaffSymbol #'staff-space = #(magstep -1)",
piano.getWith())
# Nice to add Mark engravers
ly.dom.Line('\\consists "Mark_engraver"', rightStaff.getWith())
ly.dom.Line('\\consists "Metronome_mark_engraver"', rightStaff.getWith())
# Keep piano reduction out of the MIDI output
if builder.midi:
ly.dom.Line('\\remove "Staff_performer"', rightStaff.getWith())
ly.dom.Line('\\remove "Staff_performer"', leftStaff.getWith())
# Create MIDI files if desired
if self.rehearsalMidi.isChecked():
a = data.assign('rehearsalMidi')
rehearsalMidi = a.name
func = ly.dom.SchemeList(a)
func.pre = '#\n(' # hack
ly.dom.Text('define-music-function', func)
ly.dom.Line('(parser location name midiInstrument lyrics) '
'(string? string? ly:music?)', func)
choir = ly.dom.Sim(ly.dom.Command('unfoldRepeats', ly.dom.SchemeLily(func)))
data.afterblocks.append(ly.dom.Comment(_("Rehearsal MIDI files:")))
for voice, num, ref, lyrName in rehearsalMidis:
# Append voice to the rehearsalMidi function
name = voice2id[voice] + str(num or '')
seq = ly.dom.Seq(ly.dom.Voice(name, parent=ly.dom.Staff(name, parent=choir)))
if builder.lyVersion < (2, 18, 0):
ly.dom.Text('<>\\f', seq) # add one dynamic
ly.dom.Identifier(ref, seq) # add the reference to the voice
book = ly.dom.Book()
# Append score to the aftermath (stuff put below the main score)
suffix = "choir{0}-{1}".format(data.num, name) if data.num else name
if builder.lyVersion < (2, 12, 0):
data.afterblocks.append(
ly.dom.Line('#(define output-suffix "{0}")'.format(suffix)))
else:
ly.dom.Line('\\bookOutputSuffix "{0}"'.format(suffix), book)
data.afterblocks.append(book)
data.afterblocks.append(ly.dom.BlankLine())
score = ly.dom.Score(book)
# TODO: make configurable
midiInstrument = voice2Midi[voice]
cmd = ly.dom.Command(rehearsalMidi, score)
ly.dom.QuotedString(name, cmd)
ly.dom.QuotedString(midiInstrument, cmd)
ly.dom.Identifier(refs[(lyrName, allStanzas[0])], cmd)
ly.dom.Midi(score)
ly.dom.Text("\\context Staff = $name", choir)
seq = ly.dom.Seq(choir)
ly.dom.Line("\\set Score.midiMinimumVolume = #0.5", seq)
ly.dom.Line("\\set Score.midiMaximumVolume = #0.5", seq)
ly.dom.Line("\\set Score.tempoWholesPerMinute = #" + data.scoreProperties.schemeMidiTempo(), seq)
ly.dom.Line("\\set Staff.midiMinimumVolume = #0.8", seq)
ly.dom.Line("\\set Staff.midiMaximumVolume = #1.0", seq)
ly.dom.Line("\\set Staff.midiInstrument = $midiInstrument", seq)
lyr = ly.dom.Lyrics(parent=choir)
lyr.getWith()['alignBelowContext'] = ly.dom.Text('$name')
ly.dom.Text("\\lyricsto $name $lyrics", lyr)
0
Example 54
Project: cgat Source File: IndexedFasta.py
def createDatabase(db, iterator,
force=False,
synonyms=None,
compression=None,
random_access_points=None,
regex_identifier=None,
clean_sequence=False,
ignore_duplicates=False,
allow_duplicates=False,
translator=None):
"""index files in filenames to create database.
Two new files are created - db.fasta and db_name.idx
If compression is enabled, provide random access points
every # bytes.
Dictzip is treated as an uncompressed file.
regex_identifier: pattern to extract identifier from description line.
If None, the part until the first white-space character is used.
translator: specify a translator
"""
if db.endswith(".fasta"):
db = db[:-len(".fasta")]
if compression:
if compression == "lzo":
import lzo
def lzo_mangler(s):
return lzo.compress(s, 9)
mangler = lzo_mangler
db_name = db + ".lzo"
write_chunks = True
elif compression == "zlib":
def zlib_mangler(s):
return zlib.compress(s, 9)
mangler = zlib_mangler
db_name = db + ".zlib"
write_chunks = True
elif compression == "gzip":
mangler = gzip_mangler
db_name = db + ".gz"
write_chunks = True
elif compression == "dictzip":
from . import dictzip
def mangler(x):
return x
db_name = db + ".dz"
write_chunks = False
elif compression == "bzip2":
import bz2
def bzip_mangler(x):
return bz2.compress(x, 9)
mangler = bzip_mangler
db_name = db + ".bz2"
write_chunks = True
elif compression == "debug":
def mangler(x):
return x
db_name = db + ".debug"
write_chunks = True
elif compression == "rle":
from . import RLE
mangler = RLE.compress
db_name = db + ".rle"
write_chunks = True
else:
raise ValueError("unknown compression library: %s" % compression)
index_name = db + ".cdx"
if write_chunks and random_access_points is None \
or random_access_points <= 0:
raise ValueError("specify chunksize in --random-access-points")
else:
def mangler(x):
return x
db_name = db + ".fasta"
write_chunks = False
index_name = db + ".idx"
if os.path.exists(db_name) and not force:
raise ValueError("database %s already exists." % db_name)
if os.path.exists(index_name) and not force:
raise ValueError("database index %s already exists." % index_name)
outfile_index = open(index_name, "w")
if compression == "dictzip":
if random_access_points is None or random_access_points <= 0:
raise ValueError(
"specify dictzip chunksize in --random-access-points")
outfile_fasta = dictzip.open(
db_name, "wb", buffersize=1000000, chunksize=random_access_points)
compression = None
else:
outfile_fasta = open(db_name, "w")
identifiers = {}
lsequence = 0
identifier_pos, sequence_pos = 0, 0
if sys.version_info.major >= 3:
translation = str.maketrans("xX", "nN")
else:
translation = string.maketrans("xX", "nN")
fragments = []
lfragment = 0
last_identifier = None
while 1:
try:
result = next(iterator)
except StopIteration:
break
if not result:
break
is_new, identifier, fragment = result
if is_new:
# check for duplicate identifiers
if identifier in identifiers:
if ignore_duplicates:
raise ValueError("ignore duplicates not implemented")
elif allow_duplicates:
# the current implementation will fail if the same
# identifiers
# are directly succeeding each other
# better: add return to iterator that indicates a new
# identifier
out_identifier = identifier + \
"_%i" % (identifiers[identifier])
identifiers[identifier] += 1
identifiers[out_identifier] = 1
else:
raise ValueError("%s occurs more than once" %
(identifier,))
else:
identifiers[identifier] = 1
out_identifier = identifier
if last_identifier:
if write_chunks:
writeFragments(outfile_fasta, outfile_index,
fragments, mangler,
size=random_access_points,
write_all=True)
fragments = []
lfragment = 0
else:
outfile_fasta.write("\n")
outfile_index.write("\t%i\n" % lsequence)
identifier_pos = outfile_fasta.tell()
outfile_fasta.write(mangler(">%s\n" % out_identifier))
sequence_pos = outfile_fasta.tell()
outfile_index.write("%s\t%i" % (out_identifier,
identifier_pos))
if write_chunks:
outfile_index.write("\t%i" % random_access_points)
else:
outfile_index.write("\t%i" % sequence_pos)
fragments = []
lsequence = 0
last_identifier = identifier
if translator:
s = translator(fragment)
else:
s = re.sub("\s", "", fragment.strip())
if clean_sequence:
s = s.translate(translation)
lsequence += len(s)
if write_chunks:
fragments.append(s)
lfragment += len(s)
if lfragment > random_access_points:
rest = writeFragments(outfile_fasta,
outfile_index,
fragments,
mangler,
size=random_access_points,
write_all=False)
fragments = [rest]
lfragment = len(rest)
else:
outfile_fasta.write(mangler(s))
if write_chunks:
writeFragments(outfile_fasta, outfile_index, fragments, mangler,
size=random_access_points, write_all=True)
else:
outfile_fasta.write("\n")
outfile_index.write("\t%i\n" % lsequence)
# add synonyms for the table
if synonyms:
for key, vals in list(synonyms.items()):
for val in vals:
outfile_index.write("%s\t%s\n" % (key, val))
0
Example 55
Project: pyxform Source File: xls2json_backends.py
def xls_to_dict(path_or_file):
"""
Return a Python dictionary with a key for each worksheet
name. For each sheet there is a list of dictionaries, each
dictionary corresponds to a single row in the worksheet. A
dictionary has keys taken from the column headers and values
equal to the cell value for that row and column.
All the keys and leaf elements are unicode text.
"""
try:
if isinstance(path_or_file, basestring):
workbook = xlrd.open_workbook(filename=path_or_file)
else:
workbook = xlrd.open_workbook(file_contents=path_or_file.read())
except XLRDError as e:
raise PyXFormError("Error reading .xls file: %s" % e.message)
def xls_value_to_unicode(value, value_type):
"""
Take a xls formatted value and try to make a unicode string
representation.
"""
if value_type == xlrd.XL_CELL_BOOLEAN:
return u"TRUE" if value else u"FALSE"
elif value_type == xlrd.XL_CELL_NUMBER:
# Try to display as an int if possible.
int_value = int(value)
if int_value == value:
return unicode(int_value)
else:
return unicode(value)
elif value_type is xlrd.XL_CELL_DATE:
# Warn that it is better to single quote as a string.
# error_location = cellFormatString % (ss_row_idx, ss_col_idx)
# raise Exception(
# "Cannot handle excel formatted date at " + error_location)
datetime_or_time_only = xlrd.xldate_as_tuple(
value, workbook.datemode)
if datetime_or_time_only[:3] == (0, 0, 0):
# must be time only
return unicode(datetime.time(*datetime_or_time_only[3:]))
return unicode(datetime.datetime(*datetime_or_time_only))
else:
# ensure unicode and replace nbsp spaces with normal ones
# to avoid this issue:
# https://github.com/modilabs/pyxform/issues/83
return unicode(value).replace(unichr(160), ' ')
def xls_to_dict_normal_sheet(sheet):
def iswhitespace(string):
return (
isinstance(string, basestring) and len(string.strip()) == 0)
# Check for duplicate column headers
column_header_list = list()
for column in range(0, sheet.ncols):
column_header = sheet.cell_value(0, column)
if column_header in column_header_list:
raise PyXFormError(
u"Duplicate column header: %s" % column_header)
# xls file with 3 columns mostly have a 3 more columns that are
# blank by default or something, skip during check
if column_header is not None:
if not iswhitespace(column_header):
column_header_list.append(column_header)
result = []
for row in range(1, sheet.nrows):
row_dict = OrderedDict()
for column in range(0, sheet.ncols):
# Changing to cell_value function
# convert to string, in case it is not string
key = u"%s" % sheet.cell_value(0, column)
key = key.strip()
value = sheet.cell_value(row, column)
# remove whitespace at the beginning and end of value
if isinstance(value, basestring):
value = value.strip()
value_type = sheet.cell_type(row, column)
if value is not None:
if not iswhitespace(value):
row_dict[key] = xls_value_to_unicode(value, value_type)
# Taking this condition out so I can get accurate row numbers.
# TODO: Do the same for csvs
# if row_dict != {}:
result.append(row_dict)
return result, _list_to_dict_list(column_header_list)
def xls_value_from_sheet(sheet, row, column):
value = sheet.cell_value(row, column)
value_type = sheet.cell_type(row, column)
if value is not None and value != "":
return xls_value_to_unicode(value, value_type)
else:
raise PyXFormError("Empty Value")
def _xls_to_dict_cascade_sheet(sheet):
result = []
rs_dict = OrderedDict() # tmp dict to hold entire structure
def slugify(s):
return re.sub(r'\W+', '_', s.strip().lower())
prefix = "$PREFIX$"
# get col headers and position first, ignore first column
for column in range(1, sheet.ncols):
col_name = sheet.cell_value(0, column)
rs_dict[col_name] = {
'pos': column,
'data': [],
'itemset': col_name,
'type': constants.SELECT_ONE,
'name':
prefix if (column == sheet.ncols - 1) else u''.join(
[prefix, '_', col_name]),
'label': sheet.cell_value(1, column)}
if column > 1:
rs_dict[col_name]['parent'] = sheet.cell_value(0, column - 1)
else:
rs_dict[col_name]['choices'] = []
choice_filter = ''
for a in range(1, column):
prev_col_name = sheet.cell_value(0, a)
if choice_filter != '':
choice_filter += ' and %s=${%s_%s}' % \
(prev_col_name, prefix, prev_col_name)
else:
choice_filter += '%s=${%s_%s}' % \
(prev_col_name, prefix, prev_col_name)
rs_dict[col_name]['choice_filter'] = choice_filter
# get data, use new cascade dict structure, data starts on 3 row
for row in range(2, sheet.nrows):
# go through each header aka column
for col_name in rs_dict:
column = rs_dict[col_name]['pos']
cell_data = xls_value_from_sheet(sheet, row, column)
try:
rs_dict[col_name]['data'].index(slugify(cell_data))
except ValueError:
rs_dict[col_name]['data'].append(slugify(cell_data))
if 'choices' in rs_dict[col_name]:
l = {'name': slugify(cell_data), 'label': cell_data}
rs_dict[col_name]['choices'].append(l)
data = {
'name': slugify(cell_data),
'label': cell_data.strip(),
constants.LIST_NAME: col_name
}
for prev_column in range(1, column):
prev_col_name = sheet.cell_value(0, prev_column)
data[prev_col_name] = slugify(xls_value_from_sheet(
sheet, row, prev_column))
result.append(data)
# order
kl = []
for column in range(1, sheet.ncols):
col_name = sheet.cell_value(0, column)
if 'parent' in rs_dict[col_name]:
rs_dict[col_name].pop('parent')
if 'pos' in rs_dict[col_name]:
rs_dict[col_name].pop('pos')
if 'data' in rs_dict[col_name]:
rs_dict[col_name].pop('data')
kl.append(rs_dict[col_name])
# create list with no duplicates
choices = []
for rec in result:
c = 0
for check in result:
if rec == check:
c += 1
if c == 1:
choices.append(rec)
else:
try:
choices.index(rec)
except ValueError:
choices.append(rec)
return [{'choices': choices, 'questions': kl}]
result = OrderedDict()
for sheet in workbook.sheets():
if sheet.name == constants.CASCADING_CHOICES:
result[sheet.name] = _xls_to_dict_cascade_sheet(sheet)
else:
result[sheet.name], result[u"%s_header" % sheet.name] = \
xls_to_dict_normal_sheet(sheet)
return result
0
Example 56
def _make_jsmin(extended=True, python_only=True):
"""
Generate JS minifier based on `jsmin.c by Douglas Crockford`_
.. _jsmin.c by Douglas Crockford:
http://www.crockford.com/javascript/jsmin.c
:Parameters:
`extended` : ``bool``
Extended Regexps? (using lookahead and lookbehind). This is faster,
because it can be optimized way more. The regexps used with `extended`
being false are only left here to allow easier porting to platforms
without extended regex features (and for my own reference...)
`python_only` : ``bool``
Use only the python variant. If true, the c extension is not even
tried to be loaded.
:Return: Minifier
:Rtype: ``callable``
"""
# pylint: disable = R0912, R0914, W0612
if not python_only:
try:
import _rjsmin
except ImportError:
pass
else:
return _rjsmin.jsmin
try:
xrange
except NameError:
xrange = range # pylint: disable = W0622
space_chars = r'[\000-\011\013\014\016-\040]'
line_comment = r'(?://[^\r\n]*)'
space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
string1 = \
r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
strings = r'(?:%s|%s)' % (string1, string2)
charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
nospecial = r'[^/\\\[\r\n]'
if extended:
regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
nospecial, charclass, nospecial
)
else:
regex = (
r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
)
regex = regex % (charclass, nospecial, charclass, nospecial)
space = r'(?:%s|%s)' % (space_chars, space_comment)
newline = r'(?:%s?[\r\n])' % line_comment
def fix_charclass(result):
""" Fixup string of chars to fit into a regex char class """
pos = result.find('-')
if pos >= 0:
result = r'%s%s-' % (result[:pos], result[pos + 1:])
def sequentize(string):
"""
Notate consecutive characters as sequence
(1-4 instead of 1234)
"""
first, last, result = None, None, []
for char in map(ord, string):
if last is None:
first = last = char
elif last + 1 == char:
last = char
else:
result.append((first, last))
first = last = char
if last is not None:
result.append((first, last))
return ''.join(['%s%s%s' % (
chr(first),
last > first + 1 and '-' or '',
last != first and chr(last) or ''
) for first, last in result])
return _re.sub(r'([\000-\040\047])', # for better portability
lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
.replace('\\', '\\\\')
.replace('[', '\\[')
.replace(']', '\\]')
)
)
def id_literal_(what):
""" Make id_literal like char class """
match = _re.compile(what).match
result = ''.join([
chr(c) for c in xrange(127) if not match(chr(c))
])
return '[^%s]' % fix_charclass(result)
def not_id_literal_(keep):
""" Make negated id_literal like char class """
match = _re.compile(id_literal_(keep)).match
result = ''.join([
chr(c) for c in xrange(127) if not match(chr(c))
])
return r'[%s]' % fix_charclass(result)
not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
preregex1 = r'[(,=:\[!&|?{};\r\n]'
preregex2 = r'%(not_id_literal)sreturn' % locals()
if extended:
id_literal = id_literal_(r'[a-zA-Z0-9_$]')
id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
space_sub = _re.compile((
r'([^\047"/\000-\040]+)'
r'|(%(strings)s[^\047"/\000-\040]*)'
r'|(?:(?<=%(preregex1)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
r'|(?:(?<=%(preregex2)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
r'|(?<=%(id_literal_close)s)'
r'%(space)s*(?:(%(newline)s)%(space)s*)+'
r'(?=%(id_literal_open)s)'
r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
r'|%(space)s+'
r'|(?:%(newline)s%(space)s*)+'
) % locals()).sub
def space_subber(match):
""" Substitution callback """
# pylint: disable = C0321, R0911
groups = match.groups()
if groups[0]:
return groups[0]
elif groups[1]:
return groups[1]
elif groups[2]:
return groups[2]
elif groups[3]:
return groups[3]
elif groups[4]:
return '\n'
elif groups[5]:
return ' '
else:
return ''
def jsmin(script): # pylint: disable = W0621
r"""
Minify javascript based on `jsmin.c by Douglas Crockford`_\.
Instead of parsing the stream char by char, it uses a regular
expression approach which minifies the whole script with one big
substitution regex.
.. _jsmin.c by Douglas Crockford:
http://www.crockford.com/javascript/jsmin.c
:Parameters:
`script` : ``str``
Script to minify
:Return: Minified script
:Rtype: ``str``
"""
return space_sub(space_subber, '\n%s\n' % script).strip()
else:
pre_regex = r'(?:%(preregex1)s|%(preregex2)s)' % locals()
not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
space_norm_sub = _re.compile((
r'(%(strings)s)'
r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
r'|(%(space)s)+'
r'|(?:(%(newline)s)%(space)s*)+'
) % locals()).sub
def space_norm_subber(match):
""" Substitution callback """
# pylint: disable = C0321
groups = match.groups()
if groups[0]:
return groups[0]
elif groups[1]:
return groups[1].replace('\r', '\n') + groups[2]
elif groups[3]:
return ' '
elif groups[4]:
return '\n'
space_sub1 = _re.compile((
r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
r'|\040(%(not_id_literal)s)'
r'|\n(%(not_id_literal_open)s)'
) % locals()).sub
def space_subber1(match):
""" Substitution callback """
groups = match.groups()
return groups[0] or groups[1] or groups[2]
space_sub2 = _re.compile((
r'(%(strings)s)\040?'
r'|(%(pre_regex)s%(regex)s)[\040\n]?'
r'|(%(not_id_literal)s)\040'
r'|(%(not_id_literal_close)s)\n'
) % locals()).sub
def space_subber2(match):
""" Substitution callback """
groups = match.groups()
return groups[0] or groups[1] or groups[2] or groups[3]
def jsmin(script):
r"""
Minify javascript based on `jsmin.c by Douglas Crockford`_\.
Instead of parsing the stream char by char, it uses a regular
expression approach. The script is minified with three passes:
normalization
Control character are mapped to spaces, spaces and newlines
are squeezed and comments are stripped.
space removal 1
Spaces before certain tokens are removed
space removal 2
Spaces after certain tokens are remove
.. _jsmin.c by Douglas Crockford:
http://www.crockford.com/javascript/jsmin.c
:Parameters:
`script` : ``str``
Script to minify
:Return: Minified script
:Rtype: ``str``
"""
return space_sub2(space_subber2,
space_sub1(space_subber1,
space_norm_sub(space_norm_subber,
'\n%s\n' % script)
)
).strip()
return jsmin
0
Example 57
Project: cmdbac Source File: basedeployer.py
def save_attempt(self, attempt_result, driver_result = {}):
LOG.info("Saving attempt ...")
# flush log
self.flush_log()
# get info
register_result = driver_result.get('register', USER_STATUS_UNKNOWN)
login_result = driver_result.get('login', USER_STATUS_UNKNOWN)
forms = driver_result.get('forms', None)
urls = driver_result.get('urls', None)
screenshot_path = driver_result.get('screenshot', None)
statistics = driver_result.get('statistics', None)
informations = driver_result.get('informations', None)
# get runtime
if self.runtime == None:
self.runtime = self.get_runtime()
Runtime.objects.get_or_create(executable = self.runtime['executable'], version = self.runtime['version'])
runtime = Runtime.objects.get(executable = self.runtime['executable'], version = self.runtime['version'])
# save attempt
self.attempt.result = attempt_result
self.attempt.login = login_result
self.attempt.register = register_result
self.attempt.stop_time = datetime.now()
self.attempt.size = utils.get_size(self.base_path)
self.attempt.runtime = runtime
self.attempt.actions_count = 0
self.attempt.queries_count = 0
if forms == None and urls == None and self.attempt.result == ATTEMPT_STATUS_SUCCESS:
self.attempt.result = ATTEMPT_STATUS_NO_QUERIES
self.attempt.save()
# save forms
if forms != None:
url_patterns = set()
for f in forms:
try:
if '/admin' in f['url']:
continue
url_pattern = re.sub('\d', '', f['url'])
if url_pattern in url_patterns:
continue
url_patterns.add(url_pattern)
action = Action()
action.url = f['url']
if f['method'] == '':
f['method'] = 'get'
action.method = f['method'].upper()
action.attempt = self.attempt
action.save()
self.attempt.actions_count += 1
for q in f['queries']:
try:
query = Query()
query.content = q['content']
query.matched = q['matched']
query.action = action
query.save()
self.attempt.queries_count += 1
if 'explain' in q:
explain = Explain()
explain.output = q['explain']
explain.query = query
explain.save()
if 'stats' in q:
metric = QueryMetric()
metric.name = 'stats'
metric.value = str(q['stats'])
metric.query = query
metric.save()
except:
pass
for input in f['inputs']:
field = Field()
field.name = input['name']
field.type = input['type']
field.action = action
field.save()
for description, count in f['counter'].iteritems():
counter = Counter()
counter.description = description
counter.count = count
counter.action = action
counter.save()
except Exception, e:
LOG.exception(e)
# save urls
if urls != None:
url_patterns = set()
for u in urls:
try:
if '/admin' in u['url']:
continue
url_pattern = re.sub('\d', '', u['url'])
if url_pattern in url_patterns:
continue
url_patterns.add(url_pattern)
action = Action()
action.url = u['url']
action.method = 'GET'
action.attempt = self.attempt
action.save()
self.attempt.actions_count += 1
for q in u['queries']:
try:
query = Query()
query.content = q['content']
query.action = action
query.save()
self.attempt.queries_count += 1
if 'explain' in q:
explain = Explain()
explain.output = q['explain']
explain.query = query
explain.save()
if 'stats' in q:
metric = QueryMetric()
metric.name = 'stats'
metric.value = str(q['stats'])
metric.query = query
metric.save()
except:
pass
for description, count in u['counter'].iteritems():
counter = Counter()
counter.description = description
counter.count = count
counter.action = action
counter.save()
except Exception, e:
LOG.exception(e)
# save screenshot
if screenshot_path != None:
screenshot = open(screenshot_path, 'rb')
image = Image()
image.data = screenshot.read()
image.attempt = self.attempt
image.save()
# save statistics
if statistics != None:
for description, count in statistics.iteritems():
statistic = Statistic()
statistic.description = description
statistic.count = count
statistic.attempt = self.attempt
statistic.save()
# save informations
if informations != None:
for name, description in informations.iteritems():
information = Information()
information.name = name
information.description = description
information.attempt = self.attempt
information.save()
LOG.info("Saved Attempt #%s for %s" % (self.attempt, self.attempt.repo))
# populate packages
for pkg in self.packages_from_file:
try:
Dependency.objects.get_or_create(attempt=self.attempt, package=pkg, source=PACKAGE_SOURCE_FILE)
pkg.count = pkg.count + 1
pkg.save()
except Exception, e:
LOG.exception(e)
## FOR
for pkg in self.packages_from_database:
try:
Dependency.objects.get_or_create(attempt=self.attempt, package=pkg, source=PACKAGE_SOURCE_DATABASE)
if pkg.version != '':
pkg.count = pkg.count + 1
pkg.save()
except Exception, e:
LOG.exception(e)
## FOR
# make sure we update the repo to point to this latest attempt
if attempt_result in [ATTEMPT_STATUS_MISSING_REQUIRED_FILES, ATTEMPT_STATUS_RUNNING_ERROR, ATTEMPT_STATUS_DOWNLOAD_ERROR]:
self.repo.valid_project = False
else:
self.repo.valid_project = True
self.repo.latest_attempt = self.attempt
if self.attempt.result == ATTEMPT_STATUS_SUCCESS and self.attempt.queries_count == 0:
self.attempt.result = ATTEMPT_STATUS_NO_QUERIES
if self.attempt.result == ATTEMPT_STATUS_SUCCESS:
self.repo.latest_successful_attempt = self.attempt
self.repo.attempts_count = self.repo.attempts_count + 1
self.repo.save()
self.attempt.save()
0
Example 58
Project: Glyphs-Scripts Source File: Batch Metric Keys.py
def BatchMetricKeyMain( self, sender ):
try:
thisFont = Glyphs.font
thisFontMaster = thisFont.selectedFontMaster
listOfSelectedLayers = thisFont.selectedLayers
fieldKey = self.w.keyTextField.get()
flatFieldKey = re.sub("@Base", "@base", fieldKey)
if "@base" in fieldKey or "@Base" in fieldKey:
# Checks if a given layer has a metrics key of a glyph that has another key. Checks the glyph once and returns its name.
def nestHuntL( targetGlyphName ):
try:
# Sees if the glyphName exists in the font
if thisFont.glyphs[ targetGlyphName ]:
# If exists, gets the left key of targetGlyph of the same layer
targetGlyphL = thisFont.glyphs[ targetGlyphName ]
targetLayerL = targetGlyphL.layers[ thisFontMaster.id ]
targetLayerKeyL = targetLayerL.leftMetricsKeyUI()
# If it's a plain number or calculation, returns the original glyph name
a = ["=|", "+", "*", "/", "-1", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9"]
if targetLayerKeyL[0].isdigit() or "-" in targetLayerKeyL[0] or any([ x in targetLayerKeyL for x in a]):
return targetGlyphName
# Finds the first component and returns its name
elif "auto" in targetLayerKeyL:
firstComponent = targetLayerL.components[0]
return firstComponent.componentName
# This is a single-letter key, so clean it up
else:
cleanGlyphName = re.sub( "=", "", targetLayerKeyL )
cleanGlyphName = re.sub( " .*", "", cleanGlyphName )
return cleanGlyphName
# If the glyph doesn't exist:
else:
print "Found invalid LSB key while checking the key of %s" % thisGlyph.name
except Exception, e:
Glyphs.showMacroWindow()
print "nestHuntL Error: %s" % e
def nestHuntR( targetGlyphName ):
try:
# Sees if the glyphName exists in the font
if thisFont.glyphs[ targetGlyphName ]:
# If exists, gets the left key of targetGlyph of the same layer
targetGlyphR = thisFont.glyphs[ targetGlyphName ]
targetLayerR = targetGlyphR.layers[ thisFontMaster.id ]
targetLayerKeyR = targetLayerR.rightMetricsKeyUI()
# If it's a plain number or calculation, returns the original glyph name
a = ["=|", "+", "*", "/", "-1", "-2", "-3", "-4", "-5", "-6", "-7", "-8", "-9"]
if targetLayerKeyR[0].isdigit() or "-" in targetLayerKeyR[0] or any([ x in targetLayerKeyR for x in a]):
return targetGlyphName
# Finds the last "Letter" component and returns its name
elif "auto" in targetLayerKeyR:
allCompernents = thisLayer.components
numOfCompernents = len(allCompernents)
lastCompernent = allCompernents[numOfCompernents-1]
lastCompernentName = lastCompernent.componentName
lastCompernentGlyph = thisFont.glyphs[lastCompernentName]
while lastCompernentGlyph.category != "Letter":
numOfCompernents = numOfCompernents-1
lastCompernent = allCompernents[numOfCompernents]
lastCompernentName = lastCompernent.componentName
lastCompernentGlyph = thisFont.glyphs[lastCompernentName]
return lastCompernentName
# This is a single-letter key, so clean it up
else:
cleanGlyphName = re.sub( "=", "", targetLayerKeyR )
cleanGlyphName = re.sub( " .*", "", cleanGlyphName )
return cleanGlyphName
# If the glyph doesn't exist:
else:
print "Found invalid RSB key while checking the key of %s" % thisGlyph.name
except Exception, e:
Glyphs.showMacroWindow()
print "nestHuntR Error: %s" % e
# Set baseGlyphName for further nest hunting.
for thisLayer in thisFont.selectedLayers:
# Checks case of base glyph.
thisGlyph = thisLayer.parent
baseGlyphName = re.sub("\..*", "", thisGlyph.name)
baseGlyphName = re.sub("superior", "", baseGlyphName)
if "@Base" in fieldKey:
baseGlyphName = baseGlyphName.capitalize()
if thisGlyph.script == "latin" and re.match("Ij|Ae|Oe", baseGlyphName):
baseGlyphName = baseGlyphName[0:2].upper() + baseGlyphName[2:]
baseGlyphNameL = baseGlyphName
baseGlyphNameR = baseGlyphName
# Detects ligatures and sets baseGlyphNameL and R
if "_" in baseGlyphName:
baseGlyphNameL = re.sub("_.*", "", baseGlyphName)
baseGlyphNameR = re.sub(".*_", "", baseGlyphName)
elif "ordfeminine" in thisGlyph.name:
baseGlyphNameL = "a"
baseGlyphNameR = "a"
elif "ordmasculine" in thisGlyph.name:
baseGlyphNameL = "o"
baseGlyphNameR = "o"
else:
baseGlyphNameL = baseGlyphName
baseGlyphNameR = baseGlyphName
thisFont.disableUpdateInterface()
thisGlyph.beginUndo()
# Runs nestHuntL multiple times until it finds the final glyph,
# and then set the final left metrics key.
if self.w.applyL.get():
if self.w.avoidNest:
dummyOldL = nestHuntL(baseGlyphNameL)
dummyNewL = nestHuntL(dummyOldL)
while dummyOldL != dummyNewL:
dummyOldL = nestHuntL(dummyNewL)
dummyNewL = nestHuntL(dummyOldL)
finalKeyL = re.sub("@base", dummyNewL, flatFieldKey)
thisGlyph.setLeftMetricsKey_(finalKeyL)
# Runs nestHuntR multiple times until it finds the final glyph,
# and then set the final right metrics key.
if self.w.applyR.get():
if self.w.avoidNest:
dummyOldR = nestHuntR(baseGlyphNameR)
dummyNewR = nestHuntR(dummyOldR)
while dummyOldR != dummyNewR:
dummyOldR = nestHuntR(dummyNewR)
dummyNewR = nestHuntR(dummyOldR)
finalKeyR = re.sub("@base", dummyNewR, flatFieldKey)
# Processes as normal
if baseGlyphName != "Q":
thisGlyph.setRightMetricsKey_(finalKeyR)
# Uses width of the width of O of the same group
elif baseGlyphName == "Q" and self.w.radioQ.get() == 0:
Qbefore = thisGlyph.name
Qname = re.sub("Q", "O", Qbefore)
Qname = re.sub("q", "o", Qbefore)
glyphO = thisFont.glyphs[Qname]
numOfMasters = len(thisFont.masters)
thisGlyph.setWidth_(thisOWidth)
# Uses RSB as normal
elif baseGlyphName == "Q" and self.w.radioQ.get() == 1:
thisGlyph.setRightMetricsKey_(finalKeyR)
thisGlyph.endUndo()
thisFont.enableUpdateInterface()
self.w.close()
else:
pass
for thisLayer in listOfSelectedLayers:
thisGlyph = thisLayer.parent
thisFont.disableUpdateInterface()
thisGlyph.beginUndo()
for i in thisGlyph.layers:
if self.w.applyL.get():
i.setLeftMetricsKey_(fieldKey)
if self.w.applyR.get():
i.setRightMetricsKey_(fieldKey)
thisGlyph.endUndo()
thisFont.enableUpdateInterface()
self.w.close()
except Exception, e:
Glyphs.showMacroWindow()
print "BatchMetricKeyMain Error: %s" % e
0
Example 59
Project: edx-platform Source File: formula.py
def preprocess_pmathml(self, xml):
r"""
Pre-process presentation MathML from ASCIIMathML to make it more
acceptable for SnuggleTeX, and also to accomodate some sympy
conventions (eg hat(i) for \hat{i}).
This method would be a good spot to look for an integral and convert
it, if possible...
"""
if isinstance(xml, (str, unicode)):
xml = etree.fromstring(xml) # TODO: wrap in try
xml = self.fix_greek_in_mathml(xml) # convert greek utf letters to greek spelled out in ascii
def gettag(expr):
return re.sub('{http://[^}]+}', '', expr.tag)
def fix_pmathml(xml):
"""
f and g are processed as functions by asciimathml, eg "f-2" turns
into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>" this is
really terrible for turning into cmathml. undo this here.
"""
for k in xml:
tag = gettag(k)
if tag == 'mrow':
if len(k) == 2:
if gettag(k[0]) == 'mi' and k[0].text in ['f', 'g'] and gettag(k[1]) == 'mo':
idx = xml.index(k)
xml.insert(idx, deepcopy(k[0])) # drop the <mrow> container
xml.insert(idx + 1, deepcopy(k[1]))
xml.remove(k)
fix_pmathml(k)
fix_pmathml(xml)
def fix_hat(xml):
"""
hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle
this into <mi>hat(f)</mi> hat i also somtimes turned into
<mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>
"""
for k in xml:
tag = gettag(k)
if tag == 'mover':
if len(k) == 2:
if gettag(k[0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
newk = etree.Element('mi')
newk.text = 'hat(%s)' % k[0].text
xml.replace(k, newk)
if gettag(k[0]) == 'mrow' and gettag(k[0][0]) == 'mi' and \
gettag(k[1]) == 'mo' and str(k[1].text) == '^':
newk = etree.Element('mi')
newk.text = 'hat(%s)' % k[0][0].text
xml.replace(k, newk)
fix_hat(k)
fix_hat(xml)
def flatten_pmathml(xml):
"""
Give the text version of certain PMathML elements
Sometimes MathML will be given with each letter separated (it
doesn't know if its implicit multiplication or what). From an xml
node, find the (text only) variable name it represents. So it takes
<mrow>
<mi>m</mi>
<mi>a</mi>
<mi>x</mi>
</mrow>
and returns 'max', for easier use later on.
"""
tag = gettag(xml)
if tag == 'mn':
return xml.text
elif tag == 'mi':
return xml.text
elif tag == 'mrow':
return ''.join([flatten_pmathml(y) for y in xml])
raise Exception('[flatten_pmathml] unknown tag %s' % tag)
def fix_mathvariant(parent):
"""
Fix certain kinds of math variants
Literally replace <mstyle mathvariant="script"><mi>N</mi></mstyle>
with 'scriptN'. There have been problems using script_N or script(N)
"""
for child in parent:
if gettag(child) == 'mstyle' and child.get('mathvariant') == 'script':
newchild = etree.Element('mi')
newchild.text = 'script%s' % flatten_pmathml(child[0])
parent.replace(child, newchild)
fix_mathvariant(child)
fix_mathvariant(xml)
# find "tagged" superscripts
# they have the character \u200b in the superscript
# replace them with a__b so snuggle doesn't get confused
def fix_superscripts(xml):
""" Look for and replace sup elements with 'X__Y' or 'X_Y__Z'
In the javascript, variables with '__X' in them had an invisible
character inserted into the sup (to distinguish from powers)
E.g. normal:
<msubsup>
<mi>a</mi>
<mi>b</mi>
<mi>c</mi>
</msubsup>
to be interpreted '(a_b)^c' (nothing done by this method)
And modified:
<msubsup>
<mi>b</mi>
<mi>x</mi>
<mrow>
<mo>​</mo>
<mi>d</mi>
</mrow>
</msubsup>
to be interpreted 'a_b__c'
also:
<msup>
<mi>x</mi>
<mrow>
<mo>​</mo>
<mi>B</mi>
</mrow>
</msup>
to be 'x__B'
"""
for k in xml:
tag = gettag(k)
# match things like the last example--
# the second item in msub is an mrow with the first
# character equal to \u200b
if (
tag == 'msup' and
len(k) == 2 and gettag(k[1]) == 'mrow' and
gettag(k[1][0]) == 'mo' and k[1][0].text == u'\u200b' # whew
):
# replace the msup with 'X__Y'
k[1].remove(k[1][0])
newk = etree.Element('mi')
newk.text = '%s__%s' % (flatten_pmathml(k[0]), flatten_pmathml(k[1]))
xml.replace(k, newk)
# match things like the middle example-
# the third item in msubsup is an mrow with the first
# character equal to \u200b
if (
tag == 'msubsup' and
len(k) == 3 and gettag(k[2]) == 'mrow' and
gettag(k[2][0]) == 'mo' and k[2][0].text == u'\u200b' # whew
):
# replace the msubsup with 'X_Y__Z'
k[2].remove(k[2][0])
newk = etree.Element('mi')
newk.text = '%s_%s__%s' % (flatten_pmathml(k[0]), flatten_pmathml(k[1]), flatten_pmathml(k[2]))
xml.replace(k, newk)
fix_superscripts(k)
fix_superscripts(xml)
def fix_msubsup(parent):
"""
Snuggle returns an error when it sees an <msubsup> replace such
elements with an <msup>, except the first element is of
the form a_b. I.e. map a_b^c => (a_b)^c
"""
for child in parent:
# fix msubsup
if gettag(child) == 'msubsup' and len(child) == 3:
newchild = etree.Element('msup')
newbase = etree.Element('mi')
newbase.text = '%s_%s' % (flatten_pmathml(child[0]), flatten_pmathml(child[1]))
newexp = child[2]
newchild.append(newbase)
newchild.append(newexp)
parent.replace(child, newchild)
fix_msubsup(child)
fix_msubsup(xml)
self.xml = xml # pylint: disable=attribute-defined-outside-init
return self.xml
0
Example 60
Project: bakthat Source File: __init__.py
@app.cmd(help="Backup a file or a directory, backup the current directory if no arg is provided.")
@app.cmd_arg('filename', type=str, default=os.getcwd(), nargs="?")
@app.cmd_arg('-d', '--destination', type=str, help="s3|glacier|swift", default=None)
@app.cmd_arg('--prompt', type=str, help="yes|no", default="yes")
@app.cmd_arg('-t', '--tags', type=str, help="space separated tags", default="")
@app.cmd_arg('-p', '--profile', type=str, default="default", help="profile name (default by default)")
@app.cmd_arg('-c', '--config', type=str, default=CONFIG_FILE, help="path to config file")
@app.cmd_arg('-k', '--key', type=str, default=None, help="Custom key for periodic backups (works only with BakManager.io hook.)")
@app.cmd_arg('--exclude-file', type=str, default=None)
@app.cmd_arg('--s3-reduced-redundancy', action="store_true")
def backup(filename=os.getcwd(), destination=None, profile="default", config=CONFIG_FILE, prompt="yes", tags=[], key=None, exclude_file=None, s3_reduced_redundancy=False, **kwargs):
"""Perform backup.
:type filename: str
:param filename: File/directory to backup.
:type destination: str
:param destination: s3|glacier|swift
:type prompt: str
:param prompt: Disable password promp, disable encryption,
only useful when using bakthat in command line mode.
:type tags: str or list
:param tags: Tags either in a str space separated,
either directly a list of str (if calling from Python).
:type password: str
:keyword password: Password, empty string to disable encryption.
:type conf: dict
:keyword conf: Override/set AWS configuration.
:type custom_filename: str
:keyword custom_filename: Override the original filename (only in metadata)
:rtype: dict
:return: A dict containing the following keys: stored_filename, size, metadata, backend and filename.
"""
storage_backend, destination, conf = _get_store_backend(config, destination, profile)
backup_file_fmt = "{0}.{1}.tgz"
session_id = str(uuid.uuid4())
events.before_backup(session_id)
# Check if compression is disabled on the configuration.
if conf:
compress = conf.get("compress", True)
else:
compress = config.get(profile).get("compress", True)
if not compress:
backup_file_fmt = "{0}.{1}"
log.info("Backing up " + filename)
if exclude_file and os.path.isfile(exclude_file):
EXCLUDE_FILES.insert(0, exclude_file)
_exclude = lambda filename: False
if os.path.isdir(filename):
join = functools.partial(os.path.join, filename)
for efile in EXCLUDE_FILES:
efile = join(efile)
if os.path.isfile(efile):
_exclude = _get_exclude(efile)
log.info("Using {0} to exclude files.".format(efile))
arcname = filename.strip('/').split('/')[-1]
now = datetime.utcnow()
date_component = now.strftime("%Y%m%d%H%M%S")
stored_filename = backup_file_fmt.format(arcname, date_component)
backup_date = int(now.strftime("%s"))
backup_data = dict(filename=kwargs.get("custom_filename", arcname),
backup_date=backup_date,
last_updated=backup_date,
backend=destination,
is_deleted=False)
# Useful only when using bakmanager.io hook
backup_key = key
password = kwargs.get("password", os.environ.get("BAKTHAT_PASSWORD"))
if password is None and prompt.lower() != "no":
password = getpass("Password (blank to disable encryption): ")
if password:
password2 = getpass("Password confirmation: ")
if password != password2:
log.error("Password confirmation doesn't match")
return
if not compress:
log.info("Compression disabled")
outname = filename
with open(outname) as outfile:
backup_data["size"] = os.fstat(outfile.fileno()).st_size
bakthat_compression = False
# Check if the file is not already compressed
elif mimetypes.guess_type(arcname) == ('application/x-tar', 'gzip'):
log.info("File already compressed")
outname = filename
# removing extension to reformat filename
new_arcname = re.sub(r'(\.t(ar\.)?gz)', '', arcname)
stored_filename = backup_file_fmt.format(new_arcname, date_component)
with open(outname) as outfile:
backup_data["size"] = os.fstat(outfile.fileno()).st_size
bakthat_compression = False
else:
# If not we compress it
log.info("Compressing...")
with tempfile.NamedTemporaryFile(delete=False) as out:
with closing(tarfile.open(fileobj=out, mode="w:gz")) as tar:
tar.add(filename, arcname=arcname, exclude=_exclude)
outname = out.name
out.seek(0)
backup_data["size"] = os.fstat(out.fileno()).st_size
bakthat_compression = True
bakthat_encryption = False
if password:
bakthat_encryption = True
log.info("Encrypting...")
encrypted_out = tempfile.NamedTemporaryFile(delete=False)
encrypt_file(outname, encrypted_out.name, password)
stored_filename += ".enc"
# We only remove the file if the archive is created by bakthat
if bakthat_compression:
os.remove(outname) # remove non-encrypted tmp file
outname = encrypted_out.name
encrypted_out.seek(0)
backup_data["size"] = os.fstat(encrypted_out.fileno()).st_size
# Handling tags metadata
if isinstance(tags, list):
tags = " ".join(tags)
backup_data["tags"] = tags
backup_data["metadata"] = dict(is_enc=bakthat_encryption,
client=socket.gethostname())
backup_data["stored_filename"] = stored_filename
access_key = storage_backend.conf.get("access_key")
container_key = storage_backend.conf.get(storage_backend.container_key)
backup_data["backend_hash"] = hashlib.sha512(access_key + container_key).hexdigest()
log.info("Uploading...")
storage_backend.upload(stored_filename, outname, s3_reduced_redundancy=s3_reduced_redundancy)
# We only remove the file if the archive is created by bakthat
if bakthat_compression or bakthat_encryption:
os.remove(outname)
log.debug(backup_data)
# Insert backup metadata in SQLite
backup = Backups.create(**backup_data)
BakSyncer(conf).sync_auto()
# bakmanager.io hook, enable with -k/--key paramter
if backup_key:
bakmanager_hook(conf, backup_data, backup_key)
events.on_backup(session_id, backup)
return backup
0
Example 61
Project: KaraKara Source File: __init__.py
def main(global_config, **settings):
"""
This function returns a Pyramid WSGI application.
"""
# Setup --------------------------------------------------------------------
# Db
init_DBSession(settings)
# Pyramid Global Settings
config = Configurator(settings=settings) # , autocommit=True
# Register Aditional Includes ---------------------------------------------
config.include('pyramid_mako') # The mako.directories value is updated in the scan for addons. We trigger the import here to include the correct folders.
# Reload on template change
template_filenames = map(operator.attrgetter('absolute'), file_scan(config.registry.settings['mako.directories']))
add_file_callback(lambda: template_filenames)
# Parse/Convert setting keys that have specifyed datatypes
for key in config.registry.settings.keys():
config.registry.settings[key] = convert_str_with_type(config.registry.settings[key])
# i18n
config.add_translation_dirs(config.registry.settings['i18n.translation_dirs'])
# Session Manager
session_settings = extract_subkeys(config.registry.settings, 'session.')
session_factory = SignedCookieSessionFactory(serializer=json_serializer, **session_settings)
config.set_session_factory(session_factory)
# Cachebust etags ----------------------------------------------------------
# crude implementation; count the number of tags in db, if thats changed, the etags will invalidate
if not config.registry.settings['server.etag.cache_buster']:
from .model.actions import last_update
config.registry.settings['server.etag.cache_buster'] = 'last_update:{0}'.format(str(last_update()))
# Search Config ------------------------------------------------------------
import karakara.views.search
karakara.views.search.search_config = read_json(config.registry.settings['karakara.search.view.config'])
# WebSocket ----------------------------------------------------------------
class NullAuthEchoServerManager(object):
def recv(self, *args, **kwargs):
pass
socket_manager = NullAuthEchoServerManager()
# Do not activate websocket if in community mode
if config.registry.settings.get('karakara.server.mode') == 'comunity':
config.registry.settings['karakara.websocket.port'] = None
if config.registry.settings.get('karakara.websocket.port'):
def authenicator(key):
"""Only admin authenticated keys can connect to the websocket"""
request = Request({'HTTP_COOKIE':'{0}={1}'.format(config.registry.settings['session.cookie_name'],key)})
session_data = session_factory(request)
return session_data and session_data.get('admin')
try:
_socket_manager = AuthEchoServerManager(
authenticator=authenicator,
websocket_port=config.registry.settings['karakara.websocket.port'],
tcp_port=config.registry.settings.get('karakara.tcp.port'),
)
_socket_manager.start()
socket_manager = _socket_manager
except OSError:
log.warn('Unable to setup websocket')
config.registry['socket_manager'] = socket_manager
# Login Providers ----------------------------------------------------------
from .views.comunity_login import social_login
social_login.user_store = ComunityUserStore()
login_providers = config.registry.settings.get('login.provider.enabled')
# Facebook
if 'facebook' in login_providers:
for settings_key in ('facebook.appid', 'facebook.secret'):
assert config.registry.settings.get(settings_key), 'To use facebook as a login provider appid and secret must be provided'
social_login.add_login_provider(FacebookLogin(
appid=config.registry.settings.get('facebook.appid'),
secret=config.registry.settings.get('facebook.secret'),
permissions=config.registry.settings.get('facebook.permissions'),
))
# Firefox Persona (Deprecated technology but a useful reference)
#if 'persona' in login_providers:
# social_login.add_login_provider(PersonaLogin(
# site_url=config.registry.settings.get('server.url')
# ))
# No login provider
if not login_providers and config.registry.settings.get('karakara.server.mode') == 'development':
# Auto login if no service keys are provided
social_login.add_login_provider(NullLoginProvider())
social_login.user_store = NullComunityUserStore()
template_helpers.javascript_inline['comunity'] = social_login.html_includes
# Renderers ----------------------------------------------------------------
# AllanC - currently the auto_format decorator does all the formatting work
# it would be far preferable to use the pyramid renderer framework
# issue is, we want to set the renderer to be dynamic based on the url given
# I don't want to define EVERY method with loads of renderer tags
# and I don't want to define 5+ routes for every view callable with differnt formats
# We need a nice way of doing this in pyramid, and right now, after HOURS of trawling
# the doc and experimenting, I cant find one.
#from .renderers.auto_render_factory import AutoRendererFactory, handle_before_render
#config.add_renderer(None , AutoRendererFactory) #'renderers.auto_render_factory.auto_renderer_factory'
#config.add_renderer('.html', 'pyramid.mako_templating.renderer_factory')
#config.add_subscriber(handle_before_render , pyramid.events.BeforeRender) # maybe use this to set renderer?
# closeset ive seen
# http://zhuoqiang.me/a/restful-pyramid
# http://stackoverflow.com/questions/4633320/is-there-a-better-way-to-switch-between-html-and-json-output-in-pyramid
# Routes -------------------------------------------------------------------
# Static Routes
config.add_static_view(name='ext' , path='../externals/static') #cache_max_age=3600 # settings["static.assets"]
config.add_static_view(name='static', path='karakara:{0}'.format(settings["static.assets"])) #cache_max_age=3600 # settings["static.assets"]
config.add_static_view(name='player', path=settings["static.player"])
# AllanC - it's official ... static route setup and generation is a mess in pyramid
#config.add_static_view(name=settings["static.media" ], path="karakara:media" )
config.add_static_view(name='files' , path=settings["static.processed"])
# Routes
def append_format_pattern(route):
return re.sub(r'{(.*)}', r'{\1:[^/\.]+}', route) + r'{spacer:[.]?}{format:(%s)?}' % '|'.join(registered_formats())
config.add_route('home' , append_format_pattern('/') )
config.add_route('track' , append_format_pattern('/track/{id}') )
config.add_route('track_list' , append_format_pattern('/track_list') )
config.add_route('queue' , append_format_pattern('/queue') )
config.add_route('priority_tokens', append_format_pattern('/priority_tokens'))
config.add_route('fave' , append_format_pattern('/fave') )
config.add_route('message' , append_format_pattern('/message') )
config.add_route('admin_toggle' , append_format_pattern('/admin') )
config.add_route('admin_lock' , append_format_pattern('/admin_lock') )
config.add_route('remote' , append_format_pattern('/remote') )
config.add_route('feedback' , append_format_pattern('/feedback') )
config.add_route('settings' , append_format_pattern('/settings') )
config.add_route('random_images' , append_format_pattern('/random_images') )
config.add_route('inject_testdata' , append_format_pattern('/inject_testdata') )
config.add_route('stats' , append_format_pattern('/stats') )
config.add_route('comunity' , append_format_pattern('/comunity') )
config.add_route('comunity_login', append_format_pattern('/comunity/login'))
config.add_route('comunity_logout', append_format_pattern('/comunity/logout'))
config.add_route('comunity_list' , append_format_pattern('/comunity/list') )
config.add_route('comunity_track', append_format_pattern('/comunity/track/{id}'))
config.add_route('comunity_upload', append_format_pattern('/comunity/upload'))
config.add_route('comunity_settings', append_format_pattern('/comunity/settings'))
config.add_route('comunity_processmedia_log', append_format_pattern('/comunity/processmedia_log'))
config.add_route('search_tags' , '/search_tags/{tags:.*}')
config.add_route('search_list' , '/search_list/{tags:.*}')
# Upload extras -----
#config.add_static_view(name=settings['upload.route.uploaded'], path=settings['upload.path']) # the 'upload' route above always matchs first
config.add_route('upload', '/upload{sep:/?}{name:.*}')
# Events -------------------------------------------------------------------
config.add_subscriber(add_localizer_to_request, pyramid.events.NewRequest)
config.add_subscriber(add_render_globals_to_template, pyramid.events.BeforeRender)
# Return -------------------------------------------------------------------
config.scan(ignore='.tests')
config.scan('externals.lib.pyramid_helpers.views')
return config.make_wsgi_app()
0
Example 62
Project: system-config-printer Source File: ppds.py
def getPPDNamesFromDeviceID (self, mfg, mdl, description="",
commandsets=None, uri=None,
make_and_model=None):
"""
Obtain a best-effort PPD match for an IEEE 1284 Device ID.
@param mfg: MFG or MANUFACTURER field
@type mfg: string
@param mdl: MDL or MODEL field
@type mdl: string
@param description: DES or DESCRIPTION field, optional
@type description: string
@param commandsets: CMD or COMMANDSET field, optional
@type commandsets: string
@param uri: device URI, optional (only needed for debugging)
@type uri: string
@param make_and_model: device-make-and-model string
@type make_and_model: string
@returns: a dict of fit (string) indexed by PPD name
"""
_debugprint ("\n%s %s" % (mfg, mdl))
orig_mfg = mfg
orig_mdl = mdl
self._init_ids ()
if commandsets is None:
commandsets = []
# Start with an empty result list and build it up using
# several search methods, in increasing order of fuzziness.
fit = {}
# First, try looking up the device using the manufacturer and
# model fields from the Device ID exactly as they appear (but
# case-insensitively).
mfgl = mfg.lower ()
mdll = mdl.lower ()
id_matched = False
try:
for each in self.ids[mfgl][mdll]:
fit[each] = self.FIT_EXACT
id_matched = True
except KeyError:
pass
# The HP PPDs say "HP" not "Hewlett-Packard", so try that.
if mfgl == "hewlett-packard":
try:
for each in self.ids["hp"][mdll]:
fit[each] = self.FIT_EXACT
print ("cuem Incorrect IEEE 1284 Device ID: %s" %
self.ids["hp"][mdll])
print ("**** Actual ID is MFG:%s;MDL:%s;" % (mfg, mdl))
print ("**** Please report a bug against the HPLIP component")
id_matched = True
except KeyError:
pass
# Now try looking up the device by ppd-make-and-model.
_debugprint ("Trying make/model names")
mdls = None
self._init_makes ()
make = None
if mfgl == "":
(mfg, mdl) = ppdMakeModelSplit (mdl)
mfgl = normalize (mfg)
mdll = normalize (mdl)
_debugprint ("mfgl: %s" % mfgl)
_debugprint ("mdll: %s" % mdll)
mfgrepl = {"hewlett-packard": "hp",
"lexmark international": "lexmark",
"kyocera": "kyocera mita"}
if mfgl in self.lmakes:
# Found manufacturer.
make = self.lmakes[mfgl]
elif mfgl in mfgrepl:
rmfg = mfgrepl[mfgl]
if rmfg in self.lmakes:
mfg = rmfg
mfgl = mfg
# Found manufacturer (after mapping to canonical name)
_debugprint ("remapped mfgl: %s" % mfgl)
make = self.lmakes[mfgl]
_debugprint ("make: %s" % make)
if make is not None:
mdls = self.makes[make]
mdlsl = self.lmodels[normalize(make)]
# Remove manufacturer name from model field
for prefix in [mfgl, 'hewlett-packard', 'hp']:
if mdll.startswith (prefix + ' '):
mdl = mdl[len (prefix) + 1:]
mdll = normalize (mdl)
_debugprint ("unprefixed mdll: %s" % mdll)
if mdll in self.lmodels[mfgl]:
model = mdlsl[mdll]
for each in mdls[model].keys ():
fit[each] = self.FIT_EXACT
_debugprint ("%s: %s" % (fit[each], each))
else:
# Make use of the model name clean-up in the
# ppdMakeModelSplit () function
(mfg2, mdl2) = ppdMakeModelSplit (mfg + " " + mdl)
mdl2l = normalize (mdl2)
_debugprint ("re-split mdll: %s" % mdl2l)
if mdl2l in self.lmodels[mfgl]:
model = mdlsl[mdl2l]
for each in list(mdls[model].keys ()):
fit[each] = self.FIT_EXACT
_debugprint ("%s: %s" % (fit[each], each))
if not fit and mdls:
(s, ppds) = self._findBestMatchPPDs (mdls, mdl)
if s != self.FIT_NONE:
for each in ppds:
fit[each] = s
_debugprint ("%s: %s" % (fit[each], each))
if commandsets:
if type (commandsets) != list:
commandsets = commandsets.split (',')
_debugprint ("Checking CMD field")
generic = self._getPPDNameFromCommandSet (commandsets)
if generic:
for driver in generic:
fit[driver] = self.FIT_GENERIC
_debugprint ("%s: %s" % (fit[driver], driver))
# What about the CMD field of the Device ID? Some devices
# have optional units for page description languages, such as
# PostScript, and they will report different CMD strings
# accordingly.
#
# By convention, if a PPD contains a Device ID with a CMD
# field, that PPD can only be used whenever any of the
# comma-separated words in the CMD field appear in the
# device's ID.
# (See Red Hat bug #630058).
#
# We'll do that check now, and any PPDs that fail
# (e.g. PostScript PPD for non-PostScript printer) can be
# eliminated from the list.
#
# The reason we don't do this check any earlier is that we
# don't want to eliminate PPDs only to have the fuzzy matcher
# add them back in afterwards.
#
# While doing this, any drivers that we can positively confirm
# as using a command set understood by the printer will be
# converted from FIT_EXACT to FIT_EXACT_CMD.
if id_matched and len (commandsets) > 0:
failed = set()
exact_cmd = set()
for ppdname in fit.keys ():
ppd_cmd_field = None
ppd = self.ppds[ppdname]
ppd_device_id = _singleton (ppd.get ('ppd-device-id'))
if ppd_device_id:
ppd_device_id_dict = parseDeviceID (ppd_device_id)
ppd_cmd_field = ppd_device_id_dict["CMD"]
if (not ppd_cmd_field and
# ppd-type is not reliable for driver-generated
# PPDs (see CUPS STR #3720). Neither gutenprint
# nor foomatic specify ppd-type in their CUPS
# drivers.
ppdname.find (":") == -1):
# If this is a PostScript PPD we know which
# command set it will use.
ppd_type = _singleton (ppd.get ('ppd-type'))
if ppd_type == "postscript":
ppd_cmd_field = ["POSTSCRIPT"]
if not ppd_cmd_field:
# We can't be sure which command set this driver
# uses.
continue
usable = False
for pdl in ppd_cmd_field:
if pdl in commandsets:
usable = True
break
if usable:
exact_cmd.add (ppdname)
else:
failed.add (ppdname)
# Assign the more specific fit "exact-cmd" to those that
# positively matched the CMD field.
for each in exact_cmd:
if fit[each] == self.FIT_EXACT:
fit[each] = self.FIT_EXACT_CMD
_debugprint (self.FIT_EXACT_CMD + ": %s" % each)
if len (failed) < len ([d for (d, m) in fit.items ()
if m != 'generic']):
_debugprint ("Removed %s due to CMD mis-match" % failed)
for each in failed:
del fit[each]
else:
_debugprint ("Not removing %s " % failed +
"due to CMD mis-match as it would "
"leave nothing good")
if not fit:
fallbacks = ["textonly.ppd", "postscript.ppd"]
found = False
for fallback in fallbacks:
_debugprint ("'%s' fallback" % fallback)
fallbackgz = fallback + ".gz"
for ppdpath in self.ppds.keys ():
if (ppdpath.endswith (fallback) or
ppdpath.endswith (fallbackgz)):
fit[ppdpath] = self.FIT_NONE
found = True
break
if found:
break
_debugprint ("Fallback '%s' not available" % fallback)
if not found:
_debugprint ("No fallback available; choosing any")
fit[list(self.ppds.keys ())[0]] = self.FIT_NONE
if not id_matched:
sanitised_uri = re.sub (pattern="//[^@]*@/?", repl="//",
string=str (uri))
try:
cmd = reduce (lambda x, y: x + ","+ y, commandsets)
except TypeError:
cmd = ""
id = "MFG:%s;MDL:%s;" % (orig_mfg, orig_mdl)
if cmd:
id += "CMD:%s;" % cmd
if description:
id += "DES:%s;" % description
print ("No ID match for device %s:" % sanitised_uri)
print (id)
return fit
0
Example 63
Project: inspectors-general Source File: doj.py
def extract_info(content, directory, year_range):
# goes through each agency or content bucket
if directory in not_agency:
agency = "doj"
agency_name = "Department of Justice"
elif directory[:11] == "Immigration":
agency = "ins"
agency_name = "Immigration and Naturalization Service"
else:
agency = agency_decoder[directory][1]
agency_name = agency_decoder[directory][0]
# there can be multiple reports per blurb
blurbs = content[-1].find_all("p")
report_count = 0
for b in blurbs:
# date
# finding new dates that are just above the old ones
# this is the format of the newest entries and the easiest to get
x = b.previous_sibling
y = b.previous_sibling.previous_sibling
if isinstance(y, Tag) and y.get('class') == ['date']:
date_string = y.string
elif isinstance(x, Tag) and x.get('class') == ['date']:
date_string = x.string
else:
date_string = None
# finding older dates that are at the end of the text
if date_string == None:
try:
date_string = b.get_text()
except:
date_string = None
if date_string is not None:
# get rid of extra stuff that is not the date
date_text = re.sub(r'\([^)]*\)', '', date_string)
date_text = re.sub(r'\[(.*?)\]', '', date_text)
# chop up the string, the last part should be the date
date_chopped = date_text.rsplit(',')
day = date_chopped[-1]
# ATF added dashes
if "-" in day:
date_chopped = date_text.rsplit('-')
day = date_chopped[0]
# cleaning
date_string = day.strip()
date_string = date_string.replace(" ", " ")
day = day.strip()
# this is a date written out with a comma
if day.isdigit():
date_string = date_chopped[-2] + "," + date_chopped[-1]
# check for missing commas
try:
date_string = datetime.strptime(date_string, "%B %d %Y")
date_string = datetime.strftime(date_string, "%B %d, %Y")
except ValueError:
pass
# for dates without a day
if date_string is not None:
date_string = date_string.strip()
if "," not in date_string:
date_test = date_string.replace(" ", " 1, ")
try:
datetime.strptime(date_test, "%B %d, %Y")
date_string = date_test
except ValueError:
pass
# going through each link in a paragraph
for l in b.find_all("a"):
date = None
real_title = None
# most cases pass this test
try:
date = datetime.strptime(date_string, "%B %d, %Y")
# these ones got to a coding purgatory called odd_link
except ValueError:
info = odd_link(b, date_string, l, directory, )
# this should give better titles than "pdf" or "Press Release"
real_title = info["real_title"]
date_string = info["date_string"]
# these are links to things that are not reports
if real_title == False and date_string == False:
break
elif "," not in date_string:
date_string = date_string.strip()
date_string = date_string.replace(" ", " 1, ")
date = datetime.strptime(date_string, "%B %d, %Y")
if date is None:
date = datetime.strptime(date_string, "%B %d, %Y")
report_year = datetime.strftime(date, "%Y")
published_on = datetime.strftime(date, "%Y-%m-%d")
# trying to get the most descriptive title
# I go from the best methods to fall back and override exceptions
try:
string_title = b.text
except:
string_title = b.string
if string_title == None:
string_title = b.contents
if "<a href=" in str(string_title):
string_title = b.contents[0]
link = l.get("href")
link = strip_url_fragment(link)
if link != None:
# title
try:
title = l.text
except:
title = l.string
if title in ("HTML", "PDF", "Executive Summary", "Full Report"):
title = string_title
# in some cases the title is a heading a few elements up this gets passed in odd link
if real_title is not None:
title = real_title
if title == 'id="content" name="content">':
title = b.string
if title == None:
title = b.text
try:
title = title.strip()
title = title.replace('\n', "")
title = title.replace('\r', "")
except:
pass
file_type = find_file_type(link)
if file_type == None or title == False:
break
if title == None:
title = b.string
# formating links consistently
link = urljoin(base_url, link)
# id
doc_id = os.path.splitext(urlparse(link).path)[0]
#these docs are one report where the page has a table of contents with links to content
if "/index" in link:
indexed = True
else:
indexed = False
# creating ids
# there may be a better way to do this but I am just taking out all the things that are not the id
url_extras = ( "/final", "/fullpdf", "/ins_response", "oig/special/", "USMS/", "plus/", "oig/grants/", "oig/reports/", "EOUSA/", "BOP/", "ATF/", "COPS/", "FBI/", "OJP/", "INS/", "DEA/", "OBD", "/analysis", "/report", "/PDF_list", "/full_report", "/full", "_redacted", "oig", "r-", "/response", "/listpdf", "/memo", "/fullreport", "/Final", "/extradition", "/oig", "/grants", "/index")
for n in url_extras:
if n in doc_id:
doc_id = doc_id.replace(n, "")
while doc_id[:1] == "/":
doc_id = doc_id[1:]
year_match = YEAR_RE.match(doc_id)
if year_match:
doc_id = year_match.group(1)
ag_match = AG_RE.match(link)
if ag_match:
doc_id = ag_match.group(1)
# if it's still got slashes, just turn them into dashes
# the ol' slash and dash
doc_id = doc_id.replace("/", "-")
# some weird issues I hard coded
special_cases = {"a0118/au0118":"a0118", "a0207/0207":"a0207"}
if doc_id in special_cases:
doc_id = special_cases[doc_id]
if "spanish" in link:
language = "Spanish"
else:
language = "English"
report_count += 1
# if we're filtering on a year, and this isn't in it, skip it
if int(report_year) not in year_range:
# print("Skipping report for %s..." % report_year)
continue
if doc_id in report:
if file_type == "pdf":
# current and previous file pdf
if report[doc_id]["file_type"] == "pdf":
report[doc_id]["categories"].append(directory)
# current file a pdf, old file html
else:
report[doc_id]["file_type"] = "pdf"
report[doc_id]["url"] = link
report[doc_id]["categories"].append(directory)
else:
# current file html old file pdf OR both files html
report[doc_id]["categories"].append(directory)
# add url if new
old_url = False
for n in report[doc_id]["urls"]:
if link in n:
old_url = True
if not old_url:
report[doc_id]["urls"].append({
"url": link,
"file_type": file_type,
"indexed": indexed,
})
# finding the most descriptive name for cross-listed docs
if report[doc_id]["agency"] == "doj" and agency != "doj":
report[doc_id]["agency"] = agency
report[doc_id]["agency_name"] = agency_name
# Adding new docuement
else:
report[doc_id] = {
"report_id": doc_id,
"inspector": "doj",
"inspector_url": "https://oig.justice.gov/reports/",
"agency": agency,
"agency_name": agency_name,
"url": link,
"title": title,
"file_type": file_type,
"categories": [directory,],
"urls": [{
"url": link,
"file_type": file_type,
"indexed": indexed,
}],
"published_on": published_on,
# perhaps elaborate on this later
"type": type_for(title),
"language": language,
}
if report_count == 0:
raise inspector.NoReportsFoundError("DOJ (%s)" % directory)
0
Example 64
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(
version="%prog version: $Id: run_nubiscan.py 2861 2010-02-23 17:36:32Z andreas $", usage=globals()["__doc__"])
parser.add_option("-i", "--iterations", dest="iterations", type="int",
help="number of iterations for sampling [default=%default].")
parser.add_option("-q", "--qvalue", dest="qvalue_threshold", type="float",
help="qvalue threshold [default=%default].")
parser.add_option("--without-combine", dest="combine", action="store_false",
help="combine overlapping motifs [default=%default].")
parser.add_option("-f", "--fdr-control", dest="fdr_control", type="choice",
choices=("per-sequence", "all", "xall"),
help="qvalue threshold [default=%default].")
parser.add_option("-m", "--motif", dest="motif", type="choice",
choices=("rxrvdr", "rxrvdr1", "rxrvdr2", "nr"),
help="qvalue threshold [default=%default].")
parser.add_option("-a", "--arrangements", dest="arrangements", type="string",
help="',' separated list of repeat arrangements [default=%default]")
parser.add_option("-x", "--mask", dest="mask", type="choice",
choices=("dust", "repeatmasker"),
help ="mask sequences before scanning [default=%default]")
parser.add_option("--output-stats", dest="output_stats", action="store_true",
help="output stats [default=%default].")
parser.add_option("--add-sequence", dest="add_sequence", action="store_true",
help="add sequence information [default=%default].")
parser.set_defaults(
iterations=100,
qvalue_threshold=0.05,
motif="rxrvdr",
fdr_control="all",
combine=True,
arrangements=None,
mask=None,
output_stats=False,
add_sequence=False,
)
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv, add_output_options=True)
# do sth
ninput, nskipped, noutput = 0, 0, 0
if options.arrangements is None:
options.arrangements = [
"DR%s" % x for x in range(0, 15)] + ["ER%s" % x for x in range(0, 15)]
else:
options.arrangements = options.arrangements.split(",")
options.stdout.write("%s" % "\t".join(Nubiscan.NubiscanMatch._fields))
if options.add_sequence:
options.stdout.write("\tsequence")
options.stdout.write("\n")
if options.motif == 'nr':
sense_matrix = NR
elif options.motif == "rxrvdr":
sense_matrix = RXRVDR
elif options.motif == "rxrvdr1":
sense_matrix = RXRVDR1
elif options.motif == "rxrvdr2":
sense_matrix = RXRVDR2
else:
raise ValueError("unknown matrix %s" % options.motif)
if options.fdr_control == "all":
seqs = list(FastaIterator.iterate(options.stdin))
if options.mask:
masked_seqs = maskSequences(
[x.sequence for x in seqs], options.mask)
else:
masked_seqs = [x.sequence for x in seqs]
ninput = len(seqs)
map_id2title = dict(
enumerate([re.sub("\s.*", "", x.title) for x in seqs]))
matcher = Nubiscan.MatcherRandomisationSequences(sense_matrix,
samples=options.iterations)
results = matcher.run(masked_seqs,
options.arrangements,
qvalue_threshold=options.qvalue_threshold)
if options.combine:
results = Nubiscan.combineMotifs(results)
for r in results:
if r.alternatives:
alternatives = ",".join(
[x.arrangement for x in r.alternatives])
else:
alternatives = ""
options.stdout.write("\t".join((
map_id2title[r.id],
"%i" % r.start,
"%i" % r.end,
r.strand,
r.arrangement,
"%6.4f" % r.score,
"%6.4f" % r.zscore,
"%6.4e" % r.pvalue,
"%6.4e" % r.qvalue,
alternatives)))
if options.add_sequence:
s = masked_seqs[int(r.id)][r.start:r.end]
if r.strand == "-":
s = Genomics.complement(s)
s = s[:6].upper() + s[6:-6].lower() + s[-6:].upper()
options.stdout.write("\t%s" % s)
options.stdout.write("\n")
noutput += 1
# output stats
if options.output_stats:
outfile = E.openOutputFile("fdr")
outfile.write("bin\thist\tnobserved\n")
for bin, hist, nobs in zip(matcher.bin_edges, matcher.hist, matcher.nobservations):
outfile.write("%f\t%f\t%f\n" % (bin, hist, nobs))
outfile.close()
elif options.fdr_control == "xall":
matcher = Nubiscan.MatcherRandomisationSequence(sense_matrix,
samples=options.iterations)
# collect all results
matches = []
for seq in FastaIterator.iterate(options.stdin):
ninput += 1
mm = matcher.run(seq.sequence,
options.arrangements,
qvalue_threshold=None)
for m in mm:
matches.append(m._replace(sequence=seq.title))
# estimate qvalues for all matches across all sequences
pvalues = [x.pvalue for x in matches]
fdr = Stats.doFDR(pvalues)
qvalues = fdr.mQValues
results = []
for m, qvalue in zip(matches, qvalues):
if qvalue > options.qvalue_threshold:
continue
results.append(m._replace(qvalue=qvalue))
if options.combine:
results = Nubiscan.combineMotifs(results)
# output
for r in results:
options.stdout.write("\t".join((
r.id,
"%i" % r.start,
"%i" % r.end,
r.strand,
r.arrangement,
"%6.4f" % r.score,
"%6.4f" % r.zscore,
"%6.4e" % r.pvalue,
"%6.4e" % r.qvalue)) + "\n")
noutput += 1
elif options.fdr_control == "per-sequence":
matcher = Nubiscan.MatcherRandomisationSequence(sense_matrix,
samples=options.iterations)
for seq in FastaIterator.iterate(options.stdin):
ninput += 1
result = matcher.run(seq.sequence,
options.arrangements,
qvalue_threshold=options.qvalue_threshold)
if options.combine:
result = Nubiscan.combineMotifs(result)
t = re.sub(" .*", "", seq.title)
for r in result:
options.stdout.write("\t".join((
t,
"%i" % r.start,
"%i" % r.end,
r.strand,
r.arrangement,
"%6.4f" % r.score,
"%6.4f" % r.zscore,
"%f" % r.pvalue,
"%f" % r.qvalue)) + "\n")
noutput += 1
E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped))
# write footer and output benchmark information.
E.Stop()
0
Example 65
Project: WIPSTER Source File: crits.py
def submit_to_crits(post_data, last_sample, crits_ta, savename=""):
crits_result = {}
crits_str_result = ""
crits_upload_dict = {}
data = {}
final_data = {}
search_res = {}
for k, v in post_data.iteritems():
if "chk" in k and v=="on":
chk_input_key = re.sub("_chk", "", k)
if chk_input_key in post_data : #Make sure there's an input that matches with the checkbox
#Create or clear the dict if it already exists
data, final_data = clear_upload_dicts(data, final_data)
search_res.clear()
if "_domain_" in chk_input_key:
data['type'] = "domain"
elif "_ip_" in chk_input_key:
data['type'] = "ip"
elif "_vt_" in chk_input_key:
data['type'] = "vt"
elif "_command_" in chk_input_key:
data['type'] = "command"
elif "_ua_" in chk_input_key:
data['type'] = "ua"
else:
data['type'] = "event"
data['val'] = post_data[chk_input_key]
if not data['val']: #If the input is empty, check the next form box
continue
if "ta_" in chk_input_key:
data['ta'] = True
else:
data['ta'] = False
# Search if the object already exists. If it does, pull in the JSON, otherwise, add it to CRITs
data['search'] = data['val']
search_res = search_crits(data)
# Set types for relationships later on
if data['type'] == "domain":
crits_type = "Domain"
elif data['type'] == "ip":
crits_type = "IP"
else:
crits_type = "Event"
if search_res['objects']: # If result found
if data['type'] not in crits_upload_dict: # If a list for that type does not yet exist, create it
crits_upload_dict[data['type']] = []
crits_upload_dict[data['type']].append({"id": search_res['objects'][0]['_id'],
"type": crits_type})
else: # If no result found in search, add it to CRITs
final_data = build_data(data, last_sample)
crits_upload_res = upload_object(final_data)
crits_str_result += "uploaded " + data['type'] + "\r\n\r\n" + str(crits_upload_res) + "\r\n\r\ncuem*********\r\n\r\n"
if data['type'] not in crits_upload_dict: # If a list of that type does not yet exist, create it
crits_upload_dict[data['type']] = []
crits_upload_dict[data['type']].append({"id": crits_upload_res['id'],
"type": crits_type})
#################################################
#### Handle Uploading the Ticket as an Event ####
#################################################
#Create or clear the dict if it already exists
data, final_data = clear_upload_dicts(data, final_data)
search_res.clear()
data['type'] = "ticket"
# Search if the ticket already exists. If it does, pull in the JSON, otherwise, add it to CRITs
data['search'] = last_sample.ticket
search_res = search_crits(data)
if 'objects' in search_res: # If an event with this Ticket # is found to exist, use its existing ID
if search_res['objects']:
crits_upload_dict['ticket'] = [{'id': search_res['objects'][0]['_id'],
'type': 'Event'}]
# crits_upload_dict['ticket'][0]['id'] = search_res['objects'][0]['_id']
# crits_upload_dict['ticket'][0]['type'] = 'Event'
else: # Otherwise, upload it
final_data = build_data(data, last_sample)
crits_upload_res = upload_object(final_data)
crits_upload_dict['ticket'] = [{'id': crits_upload_res['id'],
'type': 'Event'}]
crits_str_result += "\r\nUploaded Ticket: " + str(crits_upload_dict['ticket'][0]) + "\r\n\r\n***********************\r\n"
############################################
#### Handle uploading the sample itself ####
############################################
#Create or clear the dict if it already exists
data, final_data = clear_upload_dicts(data, final_data)
search_res.clear()
data['type'] = "sample"
# Search if the sample already exists. If it does, pull the JSON, otherwise, add it to CRITs
data['search'] = last_sample.md5
search_res = search_crits(data)
if 'objects' in search_res:
if search_res['objects']:
crits_upload_dict['sample'] = [{'id': search_res['objects'][0]['_id'],
'type': 'Sample'}]
else:
# Need to handle renaming the sample to remove the .MAL when adding to CRITs
# Before calling build_data()
savename = "sanalysis/static/"+savename
newname = threatanalyzer.remove_mal(savename) # Copy the file without .MAL - Removed later in main method
final_data = build_data(data, last_sample, newname=newname)
crits_upload_res = upload_object(final_data)
rem_tmp = threatanalyzer.remove_tmp_file(newname) # Remove the copy of the file that doesn't have .MAL
crits_upload_dict['sample'] = [{'id': crits_upload_res['id'],
'type': 'Sample'}]
crits_str_result += "\r\nUploaded Sample: \r\n" + str(crits_upload_dict['sample'][0]) + "\r\n\r\n****************\r\n\r\n"
########################################################
#### Handle uploading metadata of any dropped files ####
########################################################
if 'crits_dropped' in crits_ta:
crits_upload_dict['sample_metadata'] = []
for dropped in crits_ta['crits_dropped']:
data, final_data = clear_upload_dicts(data, final_data)
search_res.clear()
data['type'] = "sample_metadata"
data['val'] = dropped
data['search'] = dropped['md5']
search_res = search_crits(data)
if 'objects' in search_res:
if search_res['objects']:
crits_upload_dict['sample_metadata'].append({'id': search_res['objects'][0]['_id'],
'type': 'Sample'})
else:
final_data = build_data(data, last_sample)
crits_upload_res = upload_object(final_data)
crits_upload_dict['sample_metadata'].append({'id': crits_upload_res['id'],
'type': 'Sample'})
crits_str_result += "\r\nUploaded Sample MetaData: \r\n" + str(crits_upload_dict['sample_metadata'][-1]) + "\r\n\r\n****************\r\n\r\n"
##################################
#### Handle all relationships ####
##################################
data, final_data = clear_upload_dicts(data, final_data) #Clear dicts
crits_str_result += "\r\n\r\n****************crits_upload_dict*****************\r\n\r\n" + str(crits_upload_dict)
relation_res = relate_objects(crits_upload_dict, last_sample)
crits_str_result += "\r\n\r\n****************relation_res*****************\r\n\r\n" + str(relation_res)
return crits_str_result
0
Example 66
Project: poio-api Source File: toolbox.py
def _build_annotations(self):
"""
Helper method to parse the input file and store intermediate information
in attributes.
"""
elements = dict()
ids = dict()
current_record_id = 0
current_utterance_id = 0
current_id = 0
first_marker_found = False
tier_marker = None
current_utterance = None
# Go through lines in the input file
for line_number, line in enumerate(self.input_stream):
# remove BOM
if line_number == 0:
if line.startswith(codecs.BOM_UTF8):
line = line[BOMLEN:]
line = line.decode("utf-8", 'ignore')
line = line.strip()
if "\name" in line:
self.meta_information = line.split(None,2)[2]
if line == "":
if len(elements) > 0:
self._process_record(elements, ids, current_utterance_id)
elements = dict()
ids = dict()
continue
# parse line
last_tier_marker = tier_marker
tier_marker = None
line_content = None
match_tier_marker = re_tier_marker.search(line)
if match_tier_marker:
tier_marker = match_tier_marker.group(1)
line_content = re_tier_marker.sub("", line)
line_content = line_content.lstrip()
elif first_marker_found:
# append to last annotation´s content
id_to_add = current_record_id
if last_tier_marker in self.utterance_level_markers:
id_to_add = current_utterance_id
if self._tier_labels.tier_label_exists(last_tier_marker):
self._annotations_for_parent[
("a{0}".format(id_to_add),
last_tier_marker)][-1].value += " " + \
line
tier_marker = last_tier_marker
continue
# skip all lines before first record marker
if not first_marker_found:
if tier_marker != self.record_marker:
continue
else:
first_marker_found = True
if tier_marker in self.word_level_markers:
# Is it a new utterance? Then create a new ID.
if current_utterance is None:
current_utterance = ""
if current_utterance == "":
current_utterance_id = current_id
current_id += 1
current_utterance += re.sub("\s+", " ", line_content) + " "
if tier_marker in self.word_level_markers or \
tier_marker in self.morpheme_level_markers or \
tier_marker in self.tag_level_markers:
if tier_marker not in elements:
elements[tier_marker] = dict()
if tier_marker not in ids:
ids[tier_marker] = dict()
for j, match in enumerate(re_word.finditer(line)):
pos = char_len(line[:match.start(1)])
elements[tier_marker][pos] = match.group(1)
ids[tier_marker][pos] = "a{0}".format(current_id)
current_id += 1
# utterance level markers
elif tier_marker in self.utterance_level_markers:
# we left the utterance tiers, so create an utterance
# annotation based on the content and make it the current
# utterance
if current_utterance is not None and current_utterance != "":
current_utterance = current_utterance.rstrip()
self._annotate_utterance(current_record_id,
current_utterance_id,
current_utterance)
current_utterance = ""
elif current_utterance is None:
current_utterance_id = current_id
current_id += 1
self._annotate_utterance(current_record_id,
current_utterance_id, "")
current_utterance = ""
# add the annotation to the current utterance
self._annotations_for_parent[
("a{0}".format(current_utterance_id), tier_marker)].append(
poioapi.io.graf.Annotation(
"a{0}".format(current_id), line_content))
current_id += 1
# record level markers
elif tier_marker in self.record_level_markers:
if tier_marker == self.record_marker:
# this is to ensure that the utterance get annotated even
# if there were no other utterance_level_markers to cause
# it to be
if current_utterance is not None and current_utterance != '':
self._annotate_utterance(current_record_id,
current_utterance_id,
current_utterance)
self._annotations_for_parent[
(None, tier_marker)].append(
poioapi.io.graf.Annotation(
"a{0}".format(current_id), line_content))
current_record_id = current_id
current_id += 1
current_utterance = None
else:
# this is to ensure that the utterance get annotated even
# if there were no other utterance_level_markers to cause
# it to be
if current_utterance is not None and current_utterance != '':
self._annotate_utterance(current_record_id,
current_utterance_id,
current_utterance)
self._annotations_for_parent[
("a{0}".format(current_record_id), tier_marker)].append(
poioapi.io.graf.Annotation(
"a{0}".format(current_id), line_content))
current_id += 1
self.input_stream.seek(0)
0
Example 67
Project: LTLMoP Source File: specCompiler.py
def _writeLTLFile(self):
self.LTL2SpecLineNumber = None
#regionList = [r.name for r in self.parser.proj.rfi.regions]
regionList = [r.name for r in self.proj.rfi.regions]
sensorList = deepcopy(self.proj.enabled_sensors)
robotPropList = self.proj.enabled_actuators + self.proj.all_customs
text = self.proj.specText
response = None
# Create LTL using selected parser
# TODO: rename decomposition object to something other than 'parser'
if self.proj.compile_options["parser"] == "slurp":
# default to no region tags if no simconfig is defined, so we can compile without
if self.proj.current_config == "":
region_tags = {}
else:
self.hsub = handlerSubsystem.HandlerSubsystem(None, self.proj.project_root)
config, success = self.hsub.loadConfigFile(self.proj.current_config)
if success: self.hsub.configs.append(config)
self.hsub.setExecutingConfig(self.proj.current_config)
region_tags = self.hsub.executing_config.region_tags
# Hack: We need to make sure there's only one of these
global _SLURP_SPEC_GENERATOR
# Make a new specgenerator and have it process the text
if not _SLURP_SPEC_GENERATOR:
# Add SLURP to path for import
p = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(p, "..", "etc", "SLURP"))
from ltlbroom.specgeneration import SpecGenerator
_SLURP_SPEC_GENERATOR = SpecGenerator()
# Filter out regions it shouldn't know about
filtered_regions = [region.name for region in self.proj.rfi.regions
if not (region.isObstacle or region.name.lower() == "boundary")]
LTLspec_env, LTLspec_sys, self.proj.internal_props, internal_sensors, results, responses, traceback = \
_SLURP_SPEC_GENERATOR.generate(text, sensorList, filtered_regions, robotPropList, region_tags)
oldspec_env = LTLspec_env
oldspec_sys = LTLspec_sys
for ln, result in enumerate(results):
if not result:
logging.warning("Could not parse the sentence in line {0}".format(ln))
# Abort compilation if there were any errors
if not all(results):
return None, None, responses
# Add in the sensors so they go into the SMV and spec files
for s in internal_sensors:
if s not in sensorList:
sensorList.append(s)
self.proj.all_sensors.append(s)
self.proj.enabled_sensors.append(s)
# Conjoin all the spec chunks
LTLspec_env = '\t\t' + ' & \n\t\t'.join(LTLspec_env)
LTLspec_sys = '\t\t' + ' & \n\t\t'.join(LTLspec_sys)
if self.proj.compile_options["decompose"]:
# substitute decomposed region names
for r in self.proj.rfi.regions:
if not (r.isObstacle or r.name.lower() == "boundary"):
LTLspec_env = re.sub('\\bs\.' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_env)
LTLspec_env = re.sub('\\be\.' + r.name + '\\b', "("+' | '.join(["e."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_env)
LTLspec_sys = re.sub('\\bs\.' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_sys)
LTLspec_sys = re.sub('\\be\.' + r.name + '\\b', "("+' | '.join(["e."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_sys)
response = responses
elif self.proj.compile_options["parser"] == "ltl":
# delete comments
text = re.sub(r"#.*$", "", text, flags=re.MULTILINE)
# split into env and sys parts (by looking for a line of just dashes in between)
LTLspec_env, LTLspec_sys = re.split(r"^\s*-+\s*$", text, maxsplit=1, flags=re.MULTILINE)
# split into subformulas
LTLspec_env = re.split(r"(?:[ \t]*[\n\r][ \t]*)+", LTLspec_env)
LTLspec_sys = re.split(r"(?:[ \t]*[\n\r][ \t]*)+", LTLspec_sys)
# remove any empty initial entries (HACK?)
while '' in LTLspec_env:
LTLspec_env.remove('')
while '' in LTLspec_sys:
LTLspec_sys.remove('')
# automatically conjoin all the subformulas
LTLspec_env = '\t\t' + ' & \n\t\t'.join(LTLspec_env)
LTLspec_sys = '\t\t' + ' & \n\t\t'.join(LTLspec_sys)
if self.proj.compile_options["decompose"]:
# substitute decomposed region
for r in self.proj.rfi.regions:
if not (r.isObstacle or r.name.lower() == "boundary"):
LTLspec_env = re.sub('\\b(?:s\.)?' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_env)
LTLspec_sys = re.sub('\\b(?:s\.)?' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", LTLspec_sys)
else:
for r in self.proj.rfi.regions:
if not (r.isObstacle or r.name.lower() == "boundary"):
LTLspec_env = re.sub('\\b(?:s\.)?' + r.name + '\\b', "s."+r.name, LTLspec_env)
LTLspec_sys = re.sub('\\b(?:s\.)?' + r.name + '\\b', "s."+r.name, LTLspec_sys)
traceback = [] # HACK: needs to be something other than None
elif self.proj.compile_options["parser"] == "structured":
import parseEnglishToLTL
if self.proj.compile_options["decompose"]:
# substitute the regions name in specs
for m in re.finditer(r'near (?P<rA>\w+)', text):
text=re.sub(r'near (?P<rA>\w+)', "("+' or '.join(["s."+r for r in self.parser.proj.regionMapping['near$'+m.group('rA')+'$'+str(50)]])+")", text)
for m in re.finditer(r'within (?P<dist>\d+) (from|of) (?P<rA>\w+)', text):
text=re.sub(r'within ' + m.group('dist')+' (from|of) '+ m.group('rA'), "("+' or '.join(["s."+r for r in self.parser.proj.regionMapping['near$'+m.group('rA')+'$'+m.group('dist')]])+")", text)
for m in re.finditer(r'between (?P<rA>\w+) and (?P<rB>\w+)', text):
text=re.sub(r'between ' + m.group('rA')+' and '+ m.group('rB'),"("+' or '.join(["s."+r for r in self.parser.proj.regionMapping['between$'+m.group('rA')+'$and$'+m.group('rB')+"$"]])+")", text)
# substitute decomposed region
for r in self.proj.rfi.regions:
if not (r.isObstacle or r.name.lower() == "boundary"):
text = re.sub('\\b' + r.name + '\\b', "("+' | '.join(["s."+x for x in self.parser.proj.regionMapping[r.name]])+")", text)
regionList = ["s."+x.name for x in self.parser.proj.rfi.regions]
else:
for r in self.proj.rfi.regions:
if not (r.isObstacle or r.name.lower() == "boundary"):
text = re.sub('\\b' + r.name + '\\b', "s."+r.name, text)
regionList = ["s."+x.name for x in self.proj.rfi.regions]
spec, traceback, failed, self.LTL2SpecLineNumber, self.proj.internal_props = parseEnglishToLTL.writeSpec(text, sensorList, regionList, robotPropList)
# Abort compilation if there were any errors
if failed:
return None, None, None
LTLspec_env = spec["EnvInit"] + spec["EnvTrans"] + spec["EnvGoals"]
LTLspec_sys = spec["SysInit"] + spec["SysTrans"] + spec["SysGoals"]
else:
logging.error("Parser type '{0}' not currently supported".format(self.proj.compile_options["parser"]))
return None, None, None
if self.proj.compile_options["decompose"]:
regionList = [x.name for x in self.parser.proj.rfi.regions]
else:
regionList = [x.name for x in self.proj.rfi.regions]
if self.proj.compile_options["use_region_bit_encoding"]:
# Define the number of bits needed to encode the regions
numBits = int(math.ceil(math.log(len(regionList),2)))
# creating the region bit encoding
bitEncode = bitEncoding(len(regionList),numBits)
currBitEnc = bitEncode['current']
nextBitEnc = bitEncode['next']
# switch to bit encodings for regions
LTLspec_env = replaceRegionName(LTLspec_env, bitEncode, regionList)
LTLspec_sys = replaceRegionName(LTLspec_sys, bitEncode, regionList)
if self.LTL2SpecLineNumber is not None:
for k in self.LTL2SpecLineNumber.keys():
new_k = replaceRegionName(k, bitEncode, regionList)
if new_k != k:
self.LTL2SpecLineNumber[new_k] = self.LTL2SpecLineNumber[k]
del self.LTL2SpecLineNumber[k]
if self.proj.compile_options["decompose"]:
adjData = self.parser.proj.rfi.transitions
else:
adjData = self.proj.rfi.transitions
# Store some data needed for later analysis
self.spec = {}
if self.proj.compile_options["decompose"]:
self.spec['Topo'] = createTopologyFragment(adjData, self.parser.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
else:
self.spec['Topo'] = createTopologyFragment(adjData, self.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
# Substitute any macros that the parsers passed us
LTLspec_env = self.substituteMacros(LTLspec_env)
LTLspec_sys = self.substituteMacros(LTLspec_sys)
# If we are not using bit-encoding, we need to
# explicitly encode a mutex for regions
if not self.proj.compile_options["use_region_bit_encoding"]:
# DNF version (extremely slow for core-finding)
#mutex = "\n\t&\n\t []({})".format(" | ".join(["({})".format(" & ".join(["s."+r2.name if r is r2 else "!s."+r2.name for r2 in self.parser.proj.rfi.regions])) for r in self.parser.proj.rfi.regions]))
if self.proj.compile_options["decompose"]:
region_list = self.parser.proj.rfi.regions
else:
region_list = self.proj.rfi.regions
# Almost-CNF version
exclusions = []
for i, r1 in enumerate(region_list):
for r2 in region_list[i+1:]:
exclusions.append("!(s.{} & s.{})".format(r1.name, r2.name))
mutex = "\n&\n\t []({})".format(" & ".join(exclusions))
LTLspec_sys += mutex
self.spec.update(self.splitSpecIntoComponents(LTLspec_env, LTLspec_sys))
# Add in a fragment to make sure that we start in a valid region
if self.proj.compile_options["decompose"]:
self.spec['InitRegionSanityCheck'] = createInitialRegionFragment(self.parser.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
else:
self.spec['InitRegionSanityCheck'] = createInitialRegionFragment(self.proj.rfi.regions, use_bits=self.proj.compile_options["use_region_bit_encoding"])
LTLspec_sys += "\n&\n" + self.spec['InitRegionSanityCheck']
LTLspec_sys += "\n&\n" + self.spec['Topo']
createLTLfile(self.proj.getFilenamePrefix(), LTLspec_env, LTLspec_sys)
if self.proj.compile_options["parser"] == "slurp":
self.reversemapping = {self.postprocessLTL(line,sensorList,robotPropList).strip():line.strip() for line in oldspec_env + oldspec_sys}
self.reversemapping[self.spec['Topo'].replace("\n","").replace("\t","").lstrip().rstrip("\n\t &")] = "TOPOLOGY"
#for k,v in self.reversemapping.iteritems():
# print "{!r}:{!r}".format(k,v)
return self.spec, traceback, response
0
Example 68
Project: PipelineConstructionSet Source File: expressions.py
def expressionAsCSharpFunction(expressionNode, conflictDict, refTypeDict, jointOrientVars):
"""Return the expression node's contents as C# for Unity
@param expressionNode the expression node to translate into C#
@param conflictDict a dictionary mapping Maya reference names to conflict-resolved alternatives
@param refTypeDict a dictionary storing the Unity type for each reference in the list
@param jointOrientVars a dictionary mapping joint names to their prerotation Quaternion variables"""
# get the expression's contents
expression = cmds.expression(expressionNode, q=True, s=True)
# remove comments
expression = re.sub('//.*?[\n\r]', '', expression)
for comment in re.findall('/\*.*?\*/', expression, re.DOTALL):
expression = expression.replace(comment, '')
# find types of reference fields and variables
referenceFieldDict = getReferenceFieldDict(expressionNode)
variableTypeDict = getVariableTypeDict(expression, referenceFieldDict)
# append variable name conflicts to conflictDict
for v in variableTypeDict:
if not v[1:] in kReservedKeywords and not v[1:] in conflictDict: conflictDict[v] = v
else: conflictDict[v] = '$__varNameConflict__%s'%v[1:]
# fix naming conflicts
for conflict in conflictDict:
if re.match('%s(?=\W)'%re.escape(conflict), expression):
expression = re.sub('\A%s(?=\W)'%re.escape(conflict), conflictDict[conflict], expression, 1)
expression = re.sub('(?<=\W)%s(?=\W)'%re.escape(conflict), conflictDict[conflict], expression)
# attempt to make on-demand math commands Unity-friendly
expression = convertMathCommands(expression)
# make print statements Unity-friendly
skip = 0
while len(re.findall('print\s+.*?;', expression)) > skip:
printStatement = list(re.finditer('print\s+.*?;', expression))[skip]
if printStatement.start(0) == 0 or not re.match('[\w\d.$]', expression[printStatement.start(0)-1]):
expression = (expression[0:printStatement.start(0)] +
'Debug.Log(%s);'%re.match('.*?(?=;)', printStatement.group(0)[len('print'):].lstrip()).group(0) +
expression[printStatement.end(0):])
else: skip += 1
# reformat lines so there is one line of code per line of text
lines = expression.split(';')
expression = ''
for line in lines:
if line == '': continue
if line[-1] == '}': line = '\n%s'%re.sub('\n', ' ', line.lstrip().rstrip())
else: line = '\n%s;'%re.sub('\n', ' ', line.lstrip().rstrip())
expression += re.sub(' ', ' ', line)
expression = re.sub(';\n}', '; }\n', expression.lstrip())
# remove lines that have only a declaration
lines = expression.split(';')
expression = ''
for line in lines:
if line == '': continue
if re.search('\w+\s+\$\w+', line):
if not '=' in line: continue
else: line = re.sub('\w+\s+(?=\$\w)', '', line)
if line[-1] == '}' or line[-1] == '\n': expression += line
else: expression += '%s;'%line
# split multi-assignments into separate lines
lines = expression.split(';')
for line in lines:
newLines = ['%s;'%line]
assignments = re.split('(?<![=<>])=(?!=)', line)
finalValue = assignments[len(assignments)-1]
if len(assignments) > 2:
newLines = []
for i in range(len(assignments)-1):
if i == 0: newLines.append('%s = %s;'%(assignments[i].rstrip(), finalValue.lstrip()))
else: newLines.append('%s = %s;'%(assignments[i].rstrip(), re.match('[\s\w.]+', assignments[0][::-1]).group(0)[::-1].rstrip().lstrip()))
replacementLine = newLines[0]
for i in range(len(newLines)):
if i == 0: continue
replacementLine = '%s\n%s'%(replacementLine, newLines[i])
expression = expression.replace('%s;'%line, replacementLine)
# reformat operators
for op in ['=', '==', '<', '<=', '>', '>=', '!=', '+', '+=', '-=', '*', '*=', '/', '/=', '?', ':']:
expression = re.sub('(?<=[\w\d\s()])%s(?=[\w\d\s()$])'%re.escape(op), ' %s '%op, expression)
expression = re.sub('(?<=[\w\d()])\s+%s\s+(?=[\w\d()$])'%re.escape(op), ' %s '%op, expression)
for op in ['-']:
expression = re.sub('\s+%s\s+'%re.escape(op), op, expression)
expression = re.sub('(?<=[\w\d()])%s(?=[\w\d()$])'%re.escape(op), ' %s '%op, expression)
expression = re.sub('(?<=[\w\d()])%s%s(?=[\w\d()$])'%(re.escape(op),re.escape(op)), '%s %s'%(op,op), expression)
expression = re.sub('(?<=[\w\d)])%s\s'%(re.escape(op)), ' %s '%op, expression)
for op in ['++', '--']: # TODO: this hasn't been thoroughly examined
expression = re.sub('\s+%s'%re.escape(op), '%s'%op, expression)
# reformat one-line if-statements
lines = expression.split('\n')
expression = ''
for line in lines:
line = line.lstrip().rstrip()
if (re.match('if\W', line) or re.match('else if\W', line)) and not re.search('{', line):
openCt = 0
closeCt = 0
endBlock = 0
for i in xrange(len(line)):
if endBlock > 0: continue
if line[i] == '(': openCt += 1
elif line[i] == ')':
closeCt += 1
if openCt > 0 and openCt == closeCt: endBlock = i
if endBlock > 0: line = '%s\n{%s;}'%(line[:endBlock+1], line[endBlock+1:-1])
elif re.match('else\W', line) and not re.search('{', line):
line = 'else {%s}'%line[5:]
expression += '%s\n'%line
# reformat tabbing
expression = re.sub('{', '\n{\n', expression)
expression = re.sub('}', '\n}\n', expression)
lines = expression.split('\n')
expression = ''
indent = 0
for line in lines:
line = line.lstrip().rstrip()
line = re.sub('\s+', ' ', line)
if line == '': continue
bracketOpen = len(re.findall('{', line))
bracketClose = len(re.findall('}', line))
if bracketOpen<bracketClose: indent -= 1
expression += '%s%s\n'%('\t'*indent, line)
if bracketOpen>bracketClose: indent += 1
expression = expression.rstrip()
# consolidate literal blocks
expression = consolidateLiteralBlocks(expression, conflictDict, referenceFieldDict, variableTypeDict)
# correct assignments
expression = correctAssignments(expression, conflictDict, refTypeDict, referenceFieldDict, variableTypeDict, jointOrientVars)
# replace attributeNames with names of Unity fields as needed
expression = correctReferenceFieldSyntax(expression, conflictDict, refTypeDict)
# clean up spacing
for op in ['= =', '< =', '> =', '! =', '+ =', '- =', '* =', '/ =']:
expression = re.sub(re.escape(op), op.replace(' ', ''), expression)
matches = list(re.finditer('(?<=\()\-\s(?=\d)', expression))
for match in matches: expression = re.sub(re.escape(match.group(0)), '-', expression)
# convert addition and subtraction of negatives
expression = re.sub('\+\s\-(?=\S)', '- ', expression)
expression = re.sub('\+\s\(\-\(', '- ((', expression)
expression = re.sub('\-\s\-(?=\S)', '+ ', expression)
expression = re.sub('\-\s\(\-\(', '+ ((', expression)
# correct float literal syntax
expression = correctFloatLiteralSyntax(expression)
# comment out lines with broken references e.g., .I[1] or .O[3]
lines = expression.split('\n')
expression = ''
for line in lines:
if line == '': continue
if re.search('\s\.\w', line) or re.match('\.\w', line): line = '//%s // ERROR: Missing object reference. Check your Maya file.'%line
expression += '%s\n'%line
expression = expression.rstrip()
# put all variable declarations up front
declarations = ''
for var in variableTypeDict:
declarations += '\n%s %s;'%(variableTypeDict[var], conflictDict[var])
expression = (declarations.lstrip()+'\n\n'+expression.lstrip()).lstrip()
# remove variable symbols
expression = expression.replace('$', '')
#print expression
return expression
0
Example 69
Project: nrvr-commander Source File: ssh.py
def __init__(self,
fromPath, toPath,
fromSshParameters=None, toSshParameters=None,
recurseDirectories=False,
preserveTimes=True):
"""Create new ScpCommand instance.
Will wait until completed.
Captures returncode, and output.
Either fromPath or toPath is expected to be local, i.e. without user and without IP address.
Correspondingly either fromSshParameters or toSshParameters must NOT be assigned an SshParameters
instance and remain default None.
fromPath
one path or a list of paths.
Absolute paths strongly recommended.
toPath
one path.
Absolute path strongly recommended.
Must be directory if more than one fromPath.
fromSshParameters
an SshParameters instance.
toSshParameters
an SshParameters instance.
recurseDirectories
a hint for when fromSshParameters."""
if not _gotPty:
# cannot use scp if no pty
raise Exception("must have module pty available to use scp command"
", which is known to be available in Python 2.6 on Linux, but not on Windows")
#
if fromSshParameters and toSshParameters:
raise Exception("cannot copy if both fromSshParameters and toSshParameters, only one or other")
if not fromSshParameters and not toSshParameters:
raise Exception("cannot copy if neither fromSshParameters nor toSshParameters, requires one or other")
#
if not isinstance(fromPath, (list, tuple)): # should be one string for one path to copy from
fromPaths = [fromPath]
else: # should be a list of strings for multiple paths to copy from
fromPaths = fromPath
if len(fromPaths) == 0:
raise Exception("cannot copy zero files, requires at least one")
if fromSshParameters: # get files from remote
if len(fromPaths) > 1 or recurseDirectories:
if not os.path.isdir(toPath):
raise Exception("cannot copy multiple files into a file, must copy into a directory, not into %s" % toPath)
self._fromSpecification = \
[fromSshParameters.user + "@" + IPAddress.asString(fromSshParameters.ipaddress) + ":" + " ".join(fromPaths)]
self._toSpecification = toPath
self._ipaddress = fromSshParameters.ipaddress
self._pwd = fromSshParameters.pwd
else: # put files to remote
anyFromDirectory = False
for path in fromPaths:
if os.path.isdir(path):
anyFromDirectory = True
break
if anyFromDirectory:
recurseDirectories = True # mandatory in this case
self._fromSpecification = fromPaths
self._toSpecification = \
toSshParameters.user + "@" + IPAddress.asString(toSshParameters.ipaddress) + ":" + toPath
self._ipaddress = toSshParameters.ipaddress
self._pwd = toSshParameters.pwd
self._args = ["scp"]
if preserveTimes:
self._args.append("-p")
if recurseDirectories:
self._args.append("-r")
self._args.extend(self._fromSpecification) # a list because possibly more than one
self._args.append(self._toSpecification)
#
self._output = ""
self._returncode = None
#
# fork and connect child to a pseudo-terminal
self._pid, self._fd = pty.fork()
if self._pid == 0:
# in child process
os.execvp("scp", self._args)
else:
# in parent process
if self._pwd:
# if given a password then apply
promptedForPassword = False
outputTillPrompt = ""
# look for password prompt
while not promptedForPassword:
try:
newOutput = os.read(self._fd, 1024)
if not len(newOutput):
# end has been reached
# was raise Exception("unexpected end of output from scp")
raise Exception("failing to connect for scp\n" +
outputTillPrompt)
# ssh has been observed returning "\r\n" for newline, but we want "\n"
newOutput = SshCommand._crLfRegex.sub("\n", newOutput)
outputTillPrompt += newOutput
if SshCommand._acceptPromptRegex.search(outputTillPrompt):
# e.g. "Are you sure you want to continue connecting (yes/no)? "
raise Exception("cannot proceed unless having accepted host key\n" +
outputTillPrompt +
'\nE.g. invoke SshCommand.acceptKnownHostKey(SshParameters("{0}",user,pwd)).'.format(self._ipaddress))
if SshCommand._pwdPromptRegex.search(outputTillPrompt):
# e.g. "10.123.45.67's password: "
promptedForPassword = True
except EnvironmentError:
# e.g. "@ WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED! @" and closing
raise Exception("failing to connect for scp\n" +
outputTillPrompt)
os.write(self._fd, self._pwd + "\n")
# look for output
endOfOutput = False
outputSincePrompt = ""
try:
while not endOfOutput:
try:
newOutput = os.read(self._fd, 1024)
if len(newOutput):
outputSincePrompt += newOutput
else:
# end has been reached
endOfOutput = True
except EnvironmentError as e:
# some ideas maybe at http://bugs.python.org/issue5380
if e.errno == 5: # errno.EIO:
# seen when pty closes OSError: [Errno 5] Input/output error
endOfOutput = True
else:
# we accept what we got so far, for now
endOfOutput = True
finally:
# remove any leading space (maybe there after "password:" prompt) and
# remove first newline (is there after entering password and "\n")
self._output = re.sub(r"^\s*?\n(.*)$", r"\1", outputSincePrompt)
#
# get returncode
try:
ignorePidAgain, waitEncodedStatusIndication = os.waitpid(self._pid, 0)
if os.WIFEXITED(waitEncodedStatusIndication):
# normal exit(status) call
self._returncode = os.WEXITSTATUS(waitEncodedStatusIndication)
# raise an exception if there is a reason
exceptionMessage = ""
if self._returncode:
exceptionMessage += "returncode: " + str(self._returncode)
if exceptionMessage:
commandDescription = "scp from:\n\t" + str(self._fromSpecification)
commandDescription += "\nto:\n\t" + self._toSpecification
commandDescription += "\nargs:\n\t" + str(self._args)
exceptionMessage = commandDescription + "\n" + exceptionMessage
exceptionMessage += "\noutput:\n" + self._output
raise ScpCommandException(exceptionMessage)
else:
# e.g. os.WIFSIGNALED or os.WIFSTOPPED
self._returncode = -1
raise ScpCommandException("scp did not exit normally")
except OSError:
# supposedly can occur
self._returncode = -1
raise ScpCommandException("scp did not exit normally")
0
Example 70
Project: gitlab-to-atlassian Source File: dump_gitlab_json.py
def main(argv=None):
'''
Process the command line arguments and create the JSON dump.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
'''
# Get command line arguments
parser = argparse.ArgumentParser(
description="Export all users/issues from GitLab to JIRA JSON format.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
parser.add_argument('gitlab_url',
help='The full URL to your GitLab instance.')
parser.add_argument('-d', '--date_filter',
help='Only include issues, notes, etc. created after\
the specified date. Expected format is \
YYYY-MM-DD',
type=get_datetime, default='1970-01-01')
parser.add_argument('-e', '--include_empty',
help='Include projects in output that do not have any\
issues.',
action='store_true')
parser.add_argument('-i', '--ignore_list',
help='List of project names to exclude from dump.',
type=argparse.FileType('r'))
parser.add_argument('-p', '--password',
help='The password to use to authenticate if token is \
not specified. If password and token are both \
unspecified, you will be prompted to enter a \
password.')
parser.add_argument('-P', '--page_size',
help='When retrieving result from GitLab, how many \
results should be included in a given page?.',
type=int, default=20)
parser.add_argument('-s', '--verify_ssl',
help='Enable SSL certificate verification',
action='store_true')
parser.add_argument('-t', '--token',
help='The private GitLab API token to use for \
authentication. Either this or username and \
password must be set.')
parser.add_argument('-u', '--username',
help='The username to use for authentication, if token\
is unspecified.')
parser.add_argument('-v', '--verbose',
help='Print more status information. For every ' +
'additional time this flag is specified, ' +
'output gets more verbose.',
default=0, action='count')
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)
args.page_size = max(100, args.page_size)
# Convert verbose flag to actually logging level
log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
log_level = log_levels[min(args.verbose, 2)]
# Make warnings from built-in warnings module get formatted more nicely
logging.captureWarnings(True)
logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
'%(message)s'), level=log_level)
# Setup authenticated GitLab instance
if args.token:
git = GitLab(args.gitlab_url, token=args.token,
verify_ssl=args.verify_ssl)
else:
if not args.username:
print('Username: ', end="", file=sys.stderr)
args.username = input('').strip()
if not args.password:
args.password = getpass.getpass('Password: ')
git = GitLab(args.gitlab_url, verify_ssl=args.verify_ssl)
git.login(args.username, args.password)
# Initialize output dictionary
output_dict = defaultdict(list)
print('Creating project entries...', end="", file=sys.stderr)
sys.stderr.flush()
key_set = set()
mentioned_users = set()
if args.ignore_list is not None:
ignore_list = {line.strip().lower() for line in args.ignore_list}
else:
ignore_list = {}
for project in gen_all_results(git.getprojectsall,
per_page=args.page_size):
proj_name_lower = project['name'].lower()
if proj_name_lower not in ignore_list and project['issues_enabled']:
project_issues = []
for issue in gen_all_results(git.getprojectissues, project['id'],
per_page=args.page_size):
if args.date_filter < parsedate(issue['updated_at']).replace(tzinfo=None):
project_issues.append(issue)
else:
for note in git.getissuewallnotes(project['id'],
issue['id']):
if args.date_filter < parsedate(issue['updated_at']).replace(tzinfo=None):
project_issues.append(issue)
break
if project_issues or args.include_empty:
jira_project = {}
jira_project['name'] = project['name_with_namespace']
key = project['name']
if key.islower():
key = key.title()
key = re.sub(r'[^A-Z]', '', key)
if len(key) < 2:
key = re.sub(r'[^A-Za-z]', '',
project['name'])[0:2].upper()
added = False
suffix = 65
while key in key_set:
if not added:
key += 'A'
else:
suffix += 1
key = key[:-1] + chr(suffix)
key_set.add(key)
jira_project['key'] = key
jira_project['description'] = md_to_wiki(project['description'])
# jira_project['created'] = project['created_at']
jira_project['issues'] = []
for issue in project_issues:
jira_issue = {}
jira_issue['externalId'] = issue['iid']
if issue['state'] == 'closed':
jira_issue['status'] = 'Closed'
jira_issue['resolution'] = 'Resolved'
else:
jira_issue['status'] = 'Open'
jira_issue['description'] = md_to_wiki(issue['description'])
jira_issue['reporter'] = issue['author']['username']
mentioned_users.add(jira_issue['reporter'])
jira_issue['labels'] = issue['labels']
jira_issue['summary'] = issue['title']
if issue['assignee']:
jira_issue['assignee'] = issue['assignee']['username']
mentioned_users.add(jira_issue['assignee'])
jira_issue['issueType'] = 'Bug'
jira_issue['comments'] = []
# Get all comments/notes
for note in git.getissuewallnotes(project['id'],
issue['id']):
jira_note = {}
jira_note['body'] = md_to_wiki(note['body'])
jira_note['author'] = note['author']['username']
mentioned_users.add(jira_note['author'])
jira_note['created'] = note['created_at']
jira_issue['comments'].append(jira_note)
jira_project['issues'].append(jira_issue)
output_dict['projects'].append(jira_project)
print('.', end="", file=sys.stderr)
sys.stderr.flush()
print('\nCreating user entries...', end="", file=sys.stderr)
sys.stderr.flush()
for user in gen_all_results(git.getusers, per_page=args.page_size):
# Only add users who are actually referenced in issues
if user['username'] in mentioned_users:
jira_user = {}
jira_user['name'] = user['username']
jira_user['fullname'] = user['name']
jira_user['email'] = user['email']
jira_user['groups'] = ['gitlab-users']
jira_user['active'] = (user['state'] == 'active')
output_dict['users'].append(jira_user)
print('.', end="", file=sys.stderr)
sys.stderr.flush()
print('\nPrinting JSON output...', file=sys.stderr)
sys.stderr.flush()
print(json.dumps(output_dict, indent=4))
0
Example 71
Project: webrecorder Source File: usercontroller.py
def init_routes(self):
@self.app.get(['/api/v1/dashboard', '/api/v1/dashboard/'])
@self.manager.admin_view()
def api_dashboard():
cache_key = self.cache_template.format('dashboard')
expiry = 5 * 60 # 5 min
cache = self.manager.redis.get(cache_key)
if cache:
return json.loads(cache.decode('utf-8'))
users = self.manager.get_users().items()
results = []
# add username and get collections
for user, data in users:
data['username'] = user
results.append(data)
temp = self.manager.redis.hgetall(self.temp_usage_key)
user = self.manager.redis.hgetall(self.user_usage_key)
temp = [(k.decode('utf-8'), int(v)) for k, v in temp.items()]
user = [(k.decode('utf-8'), int(v)) for k, v in user.items()]
data = {
'users': UserSchema().load(results, many=True).data,
'collections': self.manager.get_collections(user='*', api=True),
'temp_usage': sorted(temp, key=itemgetter(0)),
'user_usage': sorted(user, key=itemgetter(0)),
}
self.manager.redis.setex(cache_key,
expiry,
json.dumps(data, cls=CustomJSONEncoder))
return data
@self.app.get(['/api/v1/users', '/api/v1/users/'])
@self.manager.admin_view()
def api_users():
"""Full admin API resource of all users.
Containing user info and public collections
- Provides basic (1 dimension) RESTful sorting
- TODO: Pagination
"""
sorting = request.query.getunicode('sort', None)
sort_key = sub(r'^-{1}?', '', sorting) if sorting is not None else None
reverse = sorting.startswith('-') if sorting is not None else False
def dt(d):
return datetime.strptime(d, '%Y-%m-%d %H:%M:%S.%f')
# sortable fields, with optional key unpacking functions
filters = {
'created': {'key': lambda obj: dt(obj[1]['creation_date'])},
'email': {'key': lambda obj: obj[1]['email_addr']},
'last_login': {'key': lambda obj: dt(obj[1]['last_login'])},
'name': {'key': lambda obj: json.loads(obj[1]['desc'] or '{}')['name']},
'username': {},
}
if sorting is not None and sort_key not in filters:
raise HTTPError(400, 'Bad Request')
sort_by = filters[sort_key] if sorting is not None else {}
users = sorted(self.manager.get_users().items(),
**sort_by,
reverse=reverse)
results = []
# add username and get collections
for user, data in users:
data['username'] = user
# add space usage
total = self.manager.get_size_allotment(user)
used = self.manager.get_size_usage(user)
data['space_utilization'] = {
'total': total,
'used': used,
'available': total - used,
}
results.append(data)
return {
# `results` is a list so will always read as `many`
'users': UserSchema().load(results, many=True).data
}
@self.app.get('/api/v1/anon_user')
def get_anon_user():
return {'anon_user': self.manager.get_anon_user(True)}
@self.app.get('/api/v1/temp-users')
@self.manager.admin_view()
def temp_users():
""" Resource returning active temp users
"""
temp_users_keys = self.manager.redis.keys('u:{0}*'.format(self.temp_user_key))
temp_users = []
if len(temp_users_keys):
with self.manager.redis.pipeline() as pi:
for user in temp_users_keys:
pi.hgetall(user)
temp_users = pi.execute()
for idx, user in enumerate(temp_users_keys):
temp_users[idx][b'username'] = user
# convert bytestrings, skip over incomplete
temp_users = [{k.decode('utf-8'): v.decode('utf-8') for k, v in d.items()}
for d in temp_users
if b'max_size' in d and b'created_at' in d]
for user in temp_users:
total = int(user['max_size'])
used = int(user.get('size', 0))
creation = datetime.fromtimestamp(int(user['created_at']))
removal = creation + timedelta(seconds=self.config['session.durations']['short']['total'])
u = re.search(r'{0}\w+'.format(self.temp_user_key),
user['username']).group()
user['username'] = u
user['removal'] = removal.isoformat()
user['space_utilization'] = {
'total': total,
'used': used,
'available': total - used,
}
temp_users, err = TempUserSchema().load(temp_users, many=True)
if err:
return {'errors': err}
return {'users': temp_users}
@self.app.post('/api/v1/users/<user>/desc')
def update_desc(user):
"""legacy, eventually move to the patch endpoint"""
desc = request.body.read().decode('utf-8')
self.manager.set_user_desc(user, desc)
return {}
@self.app.post(['/api/v1/users', '/api/v1/users/'])
@self.manager.admin_view()
def api_create_user():
"""API enpoint to create a user with schema validation"""
users = self.manager.get_users()
emails = [u[1]['email_addr'] for u in users.items()]
data = request.json
err = NewUserSchema().validate(data)
if 'username' in data and data['username'] in users:
if not err:
return {'errors': 'Username already exists'}
else:
err.update({'username': 'Username already exists'})
if 'email' in data and data['email'] in emails:
if not err:
return {'errors': 'Email already exists'}
else:
err.update({'email': 'Email already exists'})
# validate
if len(err):
return {'errors': err}
# create user
self.manager.cork._store.users[data['username']] = {
'role': data['role'],
'hash': self.manager.cork._hash(data['username'],
data['password']).decode('ascii'),
'email_addr': data['email'],
'desc': '{{"name":"{name}"}}'.format(name=data.get('name', '')),
'creation_date': str(datetime.utcnow()),
'last_login': str(datetime.utcnow()),
}
self.manager.cork._store.save_users()
# add user account defaults
key = self.manager.user_key.format(user=data['username'])
now = int(time.time())
max_size, max_coll = self.manager.redis.hmget('h:defaults',
['max_size', 'max_coll'])
if not max_size:
max_size = self.manager.default_max_size
if not max_coll:
max_coll = self.manager.default_max_coll
with redis.utils.pipeline(self.manager.redis) as pi:
pi.hset(key, 'max_size', max_size)
pi.hset(key, 'max_coll', max_coll)
pi.hset(key, 'created_at', now)
pi.hset(key, 'name', data.get('name', ''))
pi.hsetnx(key, 'size', '0')
# create initial collection
self.manager.create_collection(
data['username'],
coll=self.manager.default_coll['id'],
coll_title=self.manager.default_coll['title'],
desc=self.manager.default_coll['desc'].format(data['username']),
public=False,
synthetic=True
)
# Check for mailing list management
if self.manager.mailing_list:
self.manager.add_to_mailing_list(
data['username'],
data['email'],
data.get('name', ''),
)
@self.app.get(['/api/v1/users/<username>', '/api/v1/users/<username>/'])
@self.manager.admin_view()
def api_get_user(username):
"""API enpoint to return user info"""
users = self.manager.get_users()
if username not in users:
self._raise_error(404, 'No such user')
user = users[username]
# assemble space usage
total = self.manager.get_size_allotment(username)
used = self.manager.get_size_usage(username)
user['space_utilization'] = {
'total': total,
'used': used,
'available': total - used,
}
user_data, err = UserSchema(exclude=('username',)).load(user)
colls = self.manager.get_collections(username,
include_recs=True,
api=True)
for coll in colls:
for rec in coll['recordings']:
rec['pages'] = self.manager.list_pages(username,
coll['id'],
rec['id'])
# colls is a list so will always be `many` even if one collection
collections, err = CollectionSchema().load(colls, many=True)
user_data['collections'] = collections
return {'user': user_data}
@self.app.put(['/api/v1/users/<username>', '/api/v1/users/<username>/'])
@self.manager.auth_view()
def api_update_user(username):
"""API enpoint to update user info
See `UserUpdateSchema` for available fields.
** bottle 0.12.9 doesn't support `PATCH` methods.. update to
patch once availabile.
"""
users = self.manager.get_users()
if username not in users:
self._raise_error(404, 'No such user')
# if not admin, check ownership
if not self.manager.is_anon(username) and not self.manager.is_superuser():
self.manager.assert_user_is_owner(username)
user = users[username]
try:
json_data = json.loads(request.forms.json)
except Exception as e:
print(e)
return {'errors': 'bad json data'}
if len(json_data.keys()) == 0:
return {'errors': 'empty payload'}
data, err = UserUpdateSchema(only=json_data.keys()).load(json_data)
if len(err):
return {'errors': err}
if 'name' in data:
user['desc'] = '{{"name":"{name}"}}'.format(name=data.get('name', ''))
#
# restricted resources
#
if 'max_size' in data and self.manager.is_superuser():
key = self.manager.user_key.format(user=username)
max_size = data.get('max_size', self.manager.default_max_size)
max_size = int(max_size) if type(max_size) is not int else max_size
with redis.utils.pipeline(self.manager.redis) as pi:
pi.hset(key, 'max_size', max_size)
if 'role' in data and self.manager.is_superuser():
# set new role or default to base role
user['role'] = data.get('role', 'archivist')
#
# return updated user data
#
total = self.manager.get_size_allotment(username)
used = self.manager.get_size_usage(username)
user['space_utilization'] = {
'total': total,
'used': used,
'available': total - used,
}
user_data, err = UserSchema(exclude=('username',)).load(user)
colls = self.manager.get_collections(username,
include_recs=True,
api=True)
for coll in colls:
for rec in coll['recordings']:
rec['pages'] = self.manager.list_pages(username,
coll['id'],
rec['id'])
# colls is a list so will always be `many` even if one collection
collections, err = CollectionSchema().load(colls, many=True)
user_data['collections'] = collections
return {'user': user_data}
@self.app.delete(['/api/v1/users/<user>', '/api/v1/users/<user>/'])
@self.manager.admin_view()
def api_delete_user(user):
"""API enpoint to delete a user"""
if user not in self.manager.get_users():
self._raise_error(404, 'No such user')
self.manager.delete_user(user)
@self.app.get(['/<user>', '/<user>/'])
@self.jinja2_view('user.html')
def user_info(user):
self.redir_host()
if self.manager.is_anon(user):
self.redirect('/' + user + '/temp')
self.manager.assert_user_exists(user)
result = {
'user': user,
'user_info': self.manager.get_user_info(user),
'collections': self.manager.get_collections(user),
}
if not result['user_info'].get('desc'):
result['user_info']['desc'] = self.default_user_desc.format(user)
return result
# User Account Settings
@self.app.get('/<user>/_settings')
@self.jinja2_view('account.html')
def account_settings(user):
self.manager.assert_user_is_owner(user)
return {'user': user,
'user_info': self.manager.get_user_info(user),
'num_coll': self.manager.num_collections(user),
}
# Delete User Account
@self.app.post('/<user>/$delete')
def delete_user(user):
if self.manager.delete_user(user):
self.flash_message('The user {0} has been permanently deleted!'.format(user), 'success')
redir_to = '/'
request.environ['webrec.delete_all_cookies'] = 'all'
self.manager.cork.logout(success_redirect=redir_to, fail_redirect=redir_to)
else:
self.flash_message('There was an error deleting {0}'.format(coll))
self.redirect(self.get_path(user))
# Expiry Message
@self.app.route('/_expire')
def expire():
self.flash_message('Sorry, the anonymous collection has expired due to inactivity')
self.redirect('/')
@self.app.post('/_reportissues')
def report_issues():
useragent = request.headers.get('User-Agent')
@self.jinja2_view('email_error.html')
def error_email(params):
ua = UserAgent(params.get('ua'))
if ua.browser:
browser = '{0} {1} {2} {3}'
lang = ua.language or ''
browser = browser.format(ua.platform, ua.browser,
ua.version, lang)
params['browser'] = browser
else:
params['browser'] = ua.string
params['time'] = params['time'][:19]
return params
self.manager.report_issues(request.POST, useragent, error_email)
return {}
# Skip POST request recording
@self.app.get('/_skipreq')
def skip_req():
url = request.query.getunicode('url')
user = self.manager.get_curr_user()
if not user:
user = self.manager.get_anon_user()
self.manager.skip_post_req(user, url)
return {}
0
Example 72
Project: wikiteam Source File: uploader.py
def upload(wikis, config={}):
headers = {'User-Agent': dumpgenerator.getUserAgent()}
for wiki in wikis:
print "#"*73
print "# Uploading", wiki
print "#"*73
wiki = wiki.lower()
prefix = dumpgenerator.domain2prefix(config={'api': wiki})
wikiname = prefix.split('-')[0]
dumps = []
for dirname, dirnames, filenames in os.walk('.'):
if dirname == '.':
for f in filenames:
if f.startswith('%s-' % (wikiname)) and (f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z')):
dumps.append(f)
break
c = 0
for dump in dumps:
wikidate = dump.split('-')[1]
item = get_item('wiki-' + wikiname)
if dump in uploadeddumps:
if config['prune-directories']:
rmline='rm -rf %s-%s-wikidump/' % (wikiname, wikidate)
# With -f the deletion might have happened before and we won't know
if not os.system(rmline):
print 'DELETED %s-%s-wikidump/' % (wikiname, wikidate)
if config['prune-wikidump'] and dump.endswith('wikidump.7z'):
# Simplistic quick&dirty check for the presence of this file in the item
stdout, stderr = subprocess.Popen(["md5sum", dump], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
dumphash = re.sub(' +.+\n?', '', stdout)
if dumphash in map(lambda x: x['md5'], item.files):
log(wiki, dump, 'verified')
rmline='rm -rf %s' % dump
if not os.system(rmline):
print 'DELETED ' + dump
print '%s was uploaded before, skipping...' % (dump)
continue
else:
print 'ERROR: The online item misses ' + dump
log(wiki, dump, 'missing')
# We'll exit this if and go upload the dump
else:
print '%s was uploaded before, skipping...' % (dump)
continue
time.sleep(0.1)
wikidate_text = wikidate[0:4]+'-'+wikidate[4:6]+'-'+wikidate[6:8]
print wiki, wikiname, wikidate, dump
# Does the item exist already?
ismissingitem = not item.exists
# Logo path
logourl = ''
if ismissingitem or config['update']:
#get metadata from api.php
#first sitename and base url
params = {'action': 'query', 'meta': 'siteinfo', 'format': 'xml'}
data = urllib.urlencode(params)
req = urllib2.Request(url=wiki, data=data, headers=headers)
xml = ''
try:
f = urllib2.urlopen(req)
xml = f.read()
f.close()
except:
pass
sitename = ''
baseurl = ''
lang = ''
try:
sitename = re.findall(ur"sitename=\"([^\"]+)\"", xml)[0]
except:
pass
try:
baseurl = re.findall(ur"base=\"([^\"]+)\"", xml)[0]
except:
pass
try:
lang = re.findall(ur"lang=\"([^\"]+)\"", xml)[0]
except:
pass
if not sitename:
sitename = wikiname
if not baseurl:
baseurl = re.sub(ur"(?im)/api\.php", ur"", wiki)
if lang:
lang = convertlang.has_key(lang.lower()) and convertlang[lang.lower()] or lang.lower()
#now copyright info from API
params = {'action': 'query', 'siprop': 'general|rightsinfo', 'format': 'xml'}
data = urllib.urlencode(params)
req = urllib2.Request(url=wiki, data=data, headers=headers)
xml = ''
try:
f = urllib2.urlopen(req)
xml = f.read()
f.close()
except:
pass
rightsinfourl = ''
rightsinfotext = ''
try:
rightsinfourl = re.findall(ur"rightsinfo url=\"([^\"]+)\"", xml)[0]
rightsinfotext = re.findall(ur"text=\"([^\"]+)\"", xml)[0]
except:
pass
raw = ''
try:
f = urllib.urlopen(baseurl)
raw = f.read()
f.close()
except:
pass
#or copyright info from #footer in mainpage
if baseurl and not rightsinfourl and not rightsinfotext:
rightsinfotext = ''
rightsinfourl = ''
try:
rightsinfourl = re.findall(ur"<link rel=\"copyright\" href=\"([^\"]+)\" />", raw)[0]
except:
pass
try:
rightsinfotext = re.findall(ur"<li id=\"copyright\">([^\n\r]*?)</li>", raw)[0]
except:
pass
if rightsinfotext and not rightsinfourl:
rightsinfourl = baseurl + '#footer'
try:
logourl = re.findall(ur'p-logo["\'][^>]*>\s*<a [^>]*background-image:\s*(?:url\()?([^;)"]+)', raw)[0]
except:
pass
print logourl
#retrieve some info from the wiki
wikititle = "Wiki - %s" % (sitename) # Wiki - ECGpedia
wikidesc = "<a href=\"%s\">%s</a> dumped with <a href=\"https://github.com/WikiTeam/wikiteam\" rel=\"nofollow\">WikiTeam</a> tools." % (baseurl, sitename)# "<a href=\"http://en.ecgpedia.org/\" rel=\"nofollow\">ECGpedia,</a>: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with <a href=\"https://github.com/WikiTeam/wikiteam\" rel=\"nofollow\">WikiTeam</a> tools."
wikikeys = ['wiki', 'wikiteam', 'MediaWiki', sitename, wikiname] # ecg; ECGpedia; wiki; wikiteam; MediaWiki
if not rightsinfourl and not rightsinfotext:
wikikeys.append('unknowncopyright')
wikilicenseurl = rightsinfourl # http://creativecommons.org/licenses/by-nc-sa/3.0/
wikirights = rightsinfotext # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found.
wikiurl = wiki # we use api here http://en.ecgpedia.org/api.php
else:
print 'Item already exists.'
lang = 'foo'
wikititle = 'foo'
wikidesc = 'foo'
wikikeys = 'foo'
wikilicenseurl = 'foo'
wikirights = 'foo'
wikiurl = 'foo'
if c == 0:
# Item metadata
md = {
'mediatype': 'web',
'collection': config['collection'],
'title': wikititle,
'description': wikidesc,
'language': lang,
'last-updated-date': wikidate_text,
'subject': '; '.join(wikikeys), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
'licenseurl': wikilicenseurl and urlparse.urljoin(wiki, wikilicenseurl),
'rights': wikirights,
'originalurl': wikiurl,
}
#Upload files and update metadata
try:
item.upload(dump, metadata=md, access_key=accesskey, secret_key=secretkey, verbose=True)
item.modify_metadata(md) # update
print 'You can find it in https://archive.org/details/wiki-%s' % (wikiname)
if logourl:
logo = StringIO.StringIO(urllib.urlopen(urlparse.urljoin(wiki, logourl)).read())
logoextension = logourl.split('.')[-1] if logourl.split('.') else 'unknown'
logo.name = 'wiki-' + wikiname + '_logo.' + logoextension
item.upload(logo, access_key=accesskey, secret_key=secretkey, verbose=True)
uploadeddumps.append(dump)
log(wiki, dump, 'ok')
except:
print wiki, dump, 'error when uploading?'
c += 1
0
Example 73
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
parse_email=False, tokenizer=HTMLSanitizer):
"""Convert URL-like strings in an HTML fragment to links.
linkify() converts strings that look like URLs or domain names in a
blob of text that may be an HTML fragment to links, while preserving
(a) links already in the string, (b) urls found in attributes, and
(c) email addresses.
"""
text = force_unicode(text)
if not text:
return u''
parser = html5lib.HTMLParser(tokenizer=tokenizer)
forest = parser.parseFragment(text)
def replace_nodes(tree, new_frag, node):
new_tree = parser.parseFragment(new_frag)
for n in new_tree.childNodes:
# Prevent us from re-parsing links new links as existing links.
if n.name == 'a':
n._seen = True
tree.insertBefore(n, node)
tree.removeChild(node)
# Return the number of new nodes.
return len(new_tree.childNodes) - 1
def strip_wrapping_parentheses(fragment):
"""Strips wrapping parentheses.
Returns a tuple of the following format::
(string stripped from wrapping parentheses,
count of stripped opening parentheses,
count of stripped closing parentheses)
"""
opening_parentheses = closing_parentheses = 0
# Count consecutive opening parentheses
# at the beginning of the fragment (string).
for char in fragment:
if char == '(':
opening_parentheses += 1
else:
break
if opening_parentheses:
newer_frag = ''
# Cut the consecutive opening brackets from the fragment.
fragment = fragment[opening_parentheses:]
# Reverse the fragment for easier detection of parentheses
# inside the URL.
reverse_fragment = fragment[::-1]
skip = False
for char in reverse_fragment:
# Remove the closing parentheses if it has a matching
# opening parentheses (they are balanced).
if (char == ')' and
closing_parentheses < opening_parentheses and
not skip):
closing_parentheses += 1
continue
# Do not remove ')' from the URL itself.
elif char != ')':
skip = True
newer_frag += char
fragment = newer_frag[::-1]
return fragment, opening_parentheses, closing_parentheses
def apply_callbacks(attrs, new):
for cb in callbacks:
attrs = cb(attrs, new)
if attrs is None:
return None
return attrs
def linkify_nodes(tree, parse_text=True):
# I know this isn't Pythonic, but we're sometimes mutating
# tree.childNodes, which ends up breaking the loop and causing us to
# reparse code.
children = len(tree.childNodes)
current = 0 # A pointer to the "current" node.
while current < children:
node = tree.childNodes[current]
if node.type == NODE_TEXT and parse_text:
new_frag = _render(node)
# Look for email addresses?
if parse_email:
new_frag = re.sub(email_re, email_repl, new_frag)
if new_frag != _render(node):
adj = replace_nodes(tree, new_frag, node)
children += adj
current += adj
linkify_nodes(tree)
continue
new_frag = re.sub(url_re, link_repl, new_frag)
if new_frag != _render(node):
adj = replace_nodes(tree, new_frag, node)
children += adj
current += adj
elif node.name == 'a' and not getattr(node, '_seen', False):
if 'href' in node.attributes:
attrs = node.attributes
_text = attrs['_text'] = ''.join(_render(c) for
c in node.childNodes)
attrs = apply_callbacks(attrs, False)
if attrs is not None:
text = force_unicode(attrs.pop('_text'))
node.attributes = attrs
for n in node.childNodes:
node.removeChild(n)
node.insertText(text)
node._seen = True
else:
replace_nodes(tree, _text, node)
elif skip_pre and node.name == 'pre':
linkify_nodes(node, False)
elif not getattr(node, '_seen', False):
linkify_nodes(node)
current += 1
def email_repl(match):
addr = match.group(0).replace('"', '"')
link = {
'_text': addr,
'href': 'mailto:%s' % addr,
}
link = apply_callbacks(link, True)
if link is None:
return addr
_href = link.pop('href')
_text = link.pop('_text')
repl = '<a href="%s" %s>%s</a>'
attribs = ' '.join('%s="%s"' % (k, v) for k, v in link.items())
return repl % (_href, attribs, _text)
def link_repl(match):
url = match.group(0)
open_brackets = close_brackets = 0
if url.startswith('('):
url, open_brackets, close_brackets = (
strip_wrapping_parentheses(url)
)
end = u''
m = re.search(punct_re, url)
if m:
end = m.group(0)
url = url[0:m.start()]
if re.search(proto_re, url):
href = url
else:
href = u''.join([u'http://', url])
link = {
'_text': url,
'href': href,
}
link = apply_callbacks(link, True)
if link is None:
return url
_text = link.pop('_text')
_href = link.pop('href')
repl = u'%s<a href="%s" %s>%s</a>%s%s'
attribs = ' '.join('%s="%s"' % (k, v) for k, v in link.items())
return repl % ('(' * open_brackets,
_href, attribs, _text, end,
')' * close_brackets)
try:
linkify_nodes(forest)
except (RECURSION_EXCEPTION), e:
# If we hit the max recursion depth, just return what we've got.
log.error('Probable recursion error: %r' % e, exc_info=sys.exc_info())
return _render(forest)
0
Example 74
def _parse_xml(self, data):
# For some reason MAL returns an XML file with HTML exclusive
# entities like á, so we have to create a custom XMLParser
# to convert these entities correctly.
ENTITIES = {
"nbsp": u'\u00A0',
"iexcl": u'\u00A1',
"cent": u'\u00A2',
"pound": u'\u00A3',
"curren": u'\u00A4',
"yen": u'\u00A5',
"brvbar": u'\u00A6',
"sect": u'\u00A7',
"uml": u'\u00A8',
"copy": u'\u00A9',
"ordf": u'\u00AA',
"laquo": u'\u00AB',
"not": u'\u00AC',
"shy": u'\u00AD',
"reg": u'\u00AE',
"macr": u'\u00AF',
"deg": u'\u00B0',
"plusmn": u'\u00B1',
"sup2": u'\u00B2',
"sup3": u'\u00B3',
"acute": u'\u00B4',
"micro": u'\u00B5',
"para": u'\u00B6',
"middot": u'\u00B7',
"cedil": u'\u00B8',
"sup1": u'\u00B9',
"ordm": u'\u00BA',
"raquo": u'\u00BB',
"frac14": u'\u00BC',
"frac12": u'\u00BD',
"frac34": u'\u00BE',
"iquest": u'\u00BF',
"Agrave": u'\u00C0',
"Aacute": u'\u00C1',
"Acirc": u'\u00C2',
"Atilde": u'\u00C3',
"Auml": u'\u00C4',
"Aring": u'\u00C5',
"AElig": u'\u00C6',
"Ccedil": u'\u00C7',
"Egrave": u'\u00C8',
"Eacute": u'\u00C9',
"Ecirc": u'\u00CA',
"Euml": u'\u00CB',
"Igrave": u'\u00CC',
"Iacute": u'\u00CD',
"Icirc": u'\u00CE',
"Iuml": u'\u00CF',
"ETH": u'\u00D0',
"Ntilde": u'\u00D1',
"Ograve": u'\u00D2',
"Oacute": u'\u00D3',
"Ocirc": u'\u00D4',
"Otilde": u'\u00D5',
"Ouml": u'\u00D6',
"times": u'\u00D7',
"Oslash": u'\u00D8',
"Ugrave": u'\u00D9',
"Uacute": u'\u00DA',
"Ucirc": u'\u00DB',
"Uuml": u'\u00DC',
"Yacute": u'\u00DD',
"THORN": u'\u00DE',
"szlig": u'\u00DF',
"agrave": u'\u00E0',
"aacute": u'\u00E1',
"acirc": u'\u00E2',
"atilde": u'\u00E3',
"auml": u'\u00E4',
"aring": u'\u00E5',
"aelig": u'\u00E6',
"ccedil": u'\u00E7',
"egrave": u'\u00E8',
"eacute": u'\u00E9',
"ecirc": u'\u00EA',
"euml": u'\u00EB',
"igrave": u'\u00EC',
"iacute": u'\u00ED',
"icirc": u'\u00EE',
"iuml": u'\u00EF',
"eth": u'\u00F0',
"ntilde": u'\u00F1',
"ograve": u'\u00F2',
"oacute": u'\u00F3',
"ocirc": u'\u00F4',
"otilde": u'\u00F5',
"ouml": u'\u00F6',
"divide": u'\u00F7',
"oslash": u'\u00F8',
"ugrave": u'\u00F9',
"uacute": u'\u00FA',
"ucirc": u'\u00FB',
"uuml": u'\u00FC',
"yacute": u'\u00FD',
"thorn": u'\u00FE',
"yuml": u'\u00FF',
"fnof": u'\u0192',
"Alpha": u'\u0391',
"Beta": u'\u0392',
"Gamma": u'\u0393',
"Delta": u'\u0394',
"Epsilon": u'\u0395',
"Zeta": u'\u0396',
"Eta": u'\u0397',
"Theta": u'\u0398',
"Iota": u'\u0399',
"Kappa": u'\u039A',
"Lambda": u'\u039B',
"Mu": u'\u039C',
"Nu": u'\u039D',
"Xi": u'\u039E',
"Omicron": u'\u039F',
"Pi": u'\u03A0',
"Rho": u'\u03A1',
"Sigma": u'\u03A3',
"Tau": u'\u03A4',
"Upsilon": u'\u03A5',
"Phi": u'\u03A6',
"Chi": u'\u03A7',
"Psi": u'\u03A8',
"Omega": u'\u03A9',
"alpha": u'\u03B1',
"beta": u'\u03B2',
"gamma": u'\u03B3',
"delta": u'\u03B4',
"epsilon": u'\u03B5',
"zeta": u'\u03B6',
"eta": u'\u03B7',
"theta": u'\u03B8',
"iota": u'\u03B9',
"kappa": u'\u03BA',
"lambda": u'\u03BB',
"mu": u'\u03BC',
"nu": u'\u03BD',
"xi": u'\u03BE',
"omicron": u'\u03BF',
"pi": u'\u03C0',
"rho": u'\u03C1',
"sigmaf": u'\u03C2',
"sigma": u'\u03C3',
"tau": u'\u03C4',
"upsilon": u'\u03C5',
"phi": u'\u03C6',
"chi": u'\u03C7',
"psi": u'\u03C8',
"omega": u'\u03C9',
"thetasym": u'\u03D1',
"upsih": u'\u03D2',
"piv": u'\u03D6',
"bull": u'\u2022',
"hellip": u'\u2026',
"prime": u'\u2032',
"Prime": u'\u2033',
"oline": u'\u203E',
"frasl": u'\u2044',
"weierp": u'\u2118',
"image": u'\u2111',
"real": u'\u211C',
"trade": u'\u2122',
"alefsym": u'\u2135',
"larr": u'\u2190',
"uarr": u'\u2191',
"rarr": u'\u2192',
"darr": u'\u2193',
"harr": u'\u2194',
"crarr": u'\u21B5',
"lArr": u'\u21D0',
"uArr": u'\u21D1',
"rArr": u'\u21D2',
"dArr": u'\u21D3',
"hArr": u'\u21D4',
"forall": u'\u2200',
"part": u'\u2202',
"exist": u'\u2203',
"empty": u'\u2205',
"nabla": u'\u2207',
"isin": u'\u2208',
"notin": u'\u2209',
"ni": u'\u220B',
"prod": u'\u220F',
"sum": u'\u2211',
"minus": u'\u2212',
"lowast": u'\u2217',
"radic": u'\u221A',
"prop": u'\u221D',
"infin": u'\u221E',
"ang": u'\u2220',
"and": u'\u2227',
"or": u'\u2228',
"cap": u'\u2229',
"cup": u'\u222A',
"int": u'\u222B',
"there4": u'\u2234',
"sim": u'\u223C',
"cong": u'\u2245',
"asymp": u'\u2248',
"ne": u'\u2260',
"equiv": u'\u2261',
"le": u'\u2264',
"ge": u'\u2265',
"sub": u'\u2282',
"sup": u'\u2283',
"nsub": u'\u2284',
"sube": u'\u2286',
"supe": u'\u2287',
"oplus": u'\u2295',
"otimes": u'\u2297',
"perp": u'\u22A5',
"sdot": u'\u22C5',
"lceil": u'\u2308',
"rceil": u'\u2309',
"lfloor": u'\u230A',
"rfloor": u'\u230B',
"lang": u'\u2329',
"rang": u'\u232A',
"loz": u'\u25CA',
"spades": u'\u2660',
"clubs": u'\u2663',
"hearts": u'\u2665',
"diams": u'\u2666',
"quot": u'\"' ,
"amp": u'&' ,
"lt": u'<' ,
"gt": u'>' ,
"OElig": u'\u0152',
"oelig": u'\u0153',
"Scaron": u'\u0160',
"scaron": u'\u0161',
"Yuml": u'\u0178',
"circ": u'\u02C6',
"tilde": u'\u02DC',
"ensp": u'\u2002',
"emsp": u'\u2003',
"thinsp": u'\u2009',
"zwnj": u'\u200C',
"zwj": u'\u200D',
"lrm": u'\u200E',
"rlm": u'\u200F',
"ndash": u'\u2013',
"mdash": u'\u2014',
"lsquo": u'\u2018',
"rsquo": u'\u2019',
"sbquo": u'\u201A',
"ldquo": u'\u201C',
"rdquo": u'\u201D',
"bdquo": u'\u201E',
"dagger": u'\u2020',
"Dagger": u'\u2021',
"permil": u'\u2030',
"lsaquo": u'\u2039',
"rsaquo": u'\u203A',
"euro": u'\u20AC',
}
# http://stackoverflow.com/a/35591479/2016221
magic = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [\n'''
magic += ''.join("<!ENTITY %s '&#%d;'>\n" % (key, ord(value)) for key, value in ENTITIES.items())
magic += '\n]>'
# strip xml declaration since we're concatenating something before it
data = re.sub('<\?.*?\?>', '', data)
return ET.fromstring(magic + data)
0
Example 75
def handle_line(self, line):
"""
Render a single logical line from the module, and write the
generated HTML to C{self.out}.
@param line: A single logical line, encoded as a list of
C{(toktype,tokttext)} pairs corresponding to the tokens in
the line.
"""
# def_name is the name of the function or class defined by
# this line; or None if no funciton or class is defined.
def_name = None
# def_type is the type of the function or class defined by
# this line; or None if no funciton or class is defined.
def_type = None
# does this line start a class/func def?
starting_def_block = False
in_base_list = False
in_param_list = False
in_param_default = 0
at_module_top = (self.lineno == 1)
ended_def_blocks = 0
# The html output.
if self.ADD_LINE_NUMBERS:
s = self.lineno_to_html()
self.lineno += 1
else:
s = ''
s += ' <tt class="py-line">'
# Loop through each token, and colorize it appropriately.
for i, (toktype, toktext) in enumerate(line):
if type(s) is not str:
if type(s) is unicode:
log.error('While colorizing %s -- got unexpected '
'unicode string' % self.module_name)
s = s.encode('ascii', 'xmlcharrefreplace')
else:
raise ValueError('Unexpected value for s -- %s' %
type(s).__name__)
# For each token, determine its css class and whether it
# should link to a url.
css_class = None
url = None
tooltip = None
onclick = uid = targets = None # these 3 are used together.
# Is this token the class name in a class definition? If
# so, then make it a link back into the API docs.
if i>=2 and line[i-2][1] == 'class':
in_base_list = True
css_class = self.CSS_CLASSES['DEFNAME']
def_name = toktext
def_type = 'class'
if 'func' not in self.context_types:
cls_name = self.context_name(def_name)
url = self.name2url(cls_name)
s = self.mark_def(s, cls_name)
starting_def_block = True
# Is this token the function name in a function def? If
# so, then make it a link back into the API docs.
elif i>=2 and line[i-2][1] == 'def':
in_param_list = True
css_class = self.CSS_CLASSES['DEFNAME']
def_name = toktext
def_type = 'func'
if 'func' not in self.context_types:
cls_name = self.context_name()
func_name = self.context_name(def_name)
url = self.name2url(cls_name, def_name)
s = self.mark_def(s, func_name)
starting_def_block = True
# For each indent, update the indents list (which we use
# to keep track of indentation strings) and the context
# list. If this indent is the start of a class or
# function def block, then self.def_name will be its name;
# otherwise, it will be None.
elif toktype == token.INDENT:
self.indents.append(toktext)
self.context.append(self.def_name)
self.context_types.append(self.def_type)
# When we dedent, pop the last elements off the indents
# list and the context list. If the last context element
# is a name, then we're ending a class or function def
# block; so write an end-div tag.
elif toktype == token.DEDENT:
self.indents.pop()
self.context_types.pop()
if self.context.pop():
ended_def_blocks += 1
# If this token contains whitespace, then don't bother to
# give it a css tag.
elif toktype in (None, tokenize.NL, token.NEWLINE,
token.ENDMARKER):
css_class = None
# Check if the token is a keyword.
elif toktype == token.NAME and keyword.iskeyword(toktext):
css_class = self.CSS_CLASSES['KEYWORD']
elif in_base_list and toktype == token.NAME:
css_class = self.CSS_CLASSES['BASECLASS']
elif (in_param_list and toktype == token.NAME and
not in_param_default):
css_class = self.CSS_CLASSES['PARAM']
# Class/function docstring.
elif (self.def_name and line[i-1][0] == token.INDENT and
self.is_docstring(line, i)):
css_class = self.CSS_CLASSES['DOCSTRING']
# Module docstring.
elif at_module_top and self.is_docstring(line, i):
css_class = self.CSS_CLASSES['DOCSTRING']
# check for decorators??
elif (toktype == token.NAME and
((i>0 and line[i-1][1]=='@') or
(i>1 and line[i-1][0]==None and line[i-2][1] == '@'))):
css_class = self.CSS_CLASSES['DECORATOR']
self.has_decorators = True
# If it's a name, try to link it.
elif toktype == token.NAME:
css_class = self.CSS_CLASSES['NAME']
# If we have a variable named `toktext` in the current
# context, then link to that. Note that if we're inside
# a function, then that function is our context, not
# the namespace that contains it. [xx] this isn't always
# the right thing to do.
if (self.GUESS_LINK_TARGETS and self.docindex is not None
and self.url_func is not None):
context = [n for n in self.context if n is not None]
container = self.docindex.get_vardoc(
DottedName(self.module_name, *context))
if isinstance(container, NamespaceDoc):
doc = container.variables.get(toktext)
if doc is not None:
url = self.url_func(doc)
tooltip = str(doc.canonical_name)
# Otherwise, check the name_to_docs index to see what
# else this name might refer to.
if (url is None and self.name_to_docs is not None
and self.url_func is not None):
docs = self.name_to_docs.get(toktext)
if docs:
tooltip='\n'.join([str(d.canonical_name)
for d in docs])
if len(docs) == 1 and self.GUESS_LINK_TARGETS:
url = self.url_func(docs[0])
else:
uid, onclick, targets = self.doclink(toktext, docs)
# For all other tokens, look up the CSS class to use
# based on the token's type.
else:
if toktype == token.OP and toktext in self.CSS_CLASSES:
css_class = self.CSS_CLASSES[toktext]
elif token.tok_name[toktype] in self.CSS_CLASSES:
css_class = self.CSS_CLASSES[token.tok_name[toktype]]
else:
css_class = None
# update our status..
if toktext == ':':
in_base_list = False
in_param_list = False
if toktext == '=' and in_param_list:
in_param_default = True
if in_param_default:
if toktext in ('(','[','{'): in_param_default += 1
if toktext in (')',']','}'): in_param_default -= 1
if toktext == ',' and in_param_default == 1:
in_param_default = 0
# Write this token, with appropriate colorization.
if tooltip and self.ADD_TOOLTIPS:
tooltip_html = ' title="%s"' % tooltip
else: tooltip_html = ''
if css_class: css_class_html = ' class="%s"' % css_class
else: css_class_html = ''
if onclick:
if targets: targets_html = ' targets="%s"' % targets
else: targets_html = ''
s += ('<tt id="%s"%s%s><a%s%s href="#" onclick="%s">' %
(uid, css_class_html, targets_html, tooltip_html,
css_class_html, onclick))
elif url:
if isinstance(url, unicode):
url = url.encode('ascii', 'xmlcharrefreplace')
s += ('<a%s%s href="%s">' %
(tooltip_html, css_class_html, url))
elif css_class_html or tooltip_html:
s += '<tt%s%s>' % (tooltip_html, css_class_html)
if i == len(line)-1:
s += ' </tt>' # Closes <tt class="py-line">
s += cgi.escape(toktext)
else:
try:
s += self.add_line_numbers(cgi.escape(toktext), css_class)
except Exception, e:
print (toktext, css_class, toktext.encode('ascii'))
raise
if onclick: s += "</a></tt>"
elif url: s += '</a>'
elif css_class_html or tooltip_html: s += '</tt>'
if self.ADD_DEF_BLOCKS:
for i in range(ended_def_blocks):
self.out(self.END_DEF_BLOCK)
# Strip any empty <tt>s.
s = re.sub(r'<tt class="[\w+]"></tt>', '', s)
# Write the line.
self.out(s)
if def_name and starting_def_block:
self.out('</div>')
# Add div's if we're starting a def block.
if (self.ADD_DEF_BLOCKS and def_name and starting_def_block and
(line[-2][1] == ':')):
indentation = (''.join(self.indents)+' ').replace(' ', '+')
linenum_padding = '+'*self.linenum_size
name=self.context_name(def_name)
self.out(self.START_DEF_BLOCK % (name, linenum_padding,
indentation, name))
self.def_name = def_name
self.def_type = def_type
0
Example 76
def mark(src):
global refs
t0 = time.time()
refs = {}
# split source in sections
# sections can be :
# - a block-level HTML element (markdown syntax will not be processed)
# - a script
# - a span-level HTML tag (markdown syntax will be processed)
# - a code block
# normalise line feeds
src = src.replace('\r\n','\n')
# lines followed by dashes
src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src)
src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src)
lines = src.split('\n')+['']
i = bq = 0
ul = ol = 0
while i<len(lines):
# enclose lines starting by > in a blockquote
if lines[i].startswith('>'):
nb = 1
while nb<len(lines[i]) and lines[i][nb]=='>':
nb += 1
lines[i] = lines[i][nb:]
if nb>bq:
lines.insert(i,'<blockquote>'*(nb-bq))
i += 1
bq = nb
elif nb<bq:
lines.insert(i,'</blockquote>'*(bq-nb))
i += 1
bq = nb
elif bq>0:
lines.insert(i,'</blockquote>'*bq)
i += 1
bq = 0
# unordered lists
if lines[i].strip() and lines[i].lstrip()[0] in '-+*' \
and len(lines[i].lstrip())>1 \
and lines[i].lstrip()[1]==' ' \
and (i==0 or ul or not lines[i-1].strip()):
# line indentation indicates nesting level
nb = 1+len(lines[i])-len(lines[i].lstrip())
lines[i] = '<li>'+lines[i][nb:]
if nb>ul:
lines.insert(i,'<ul>'*(nb-ul))
i += 1
elif nb<ul:
lines.insert(i,'</ul>'*(ul-nb))
i += 1
ul = nb
elif ul and not lines[i].strip():
if i<len(lines)-1 and lines[i+1].strip() \
and not lines[i+1].startswith(' '):
nline = lines[i+1].lstrip()
if nline[0] in '-+*' and len(nline)>1 and nline[1]==' ':
pass
else:
lines.insert(i,'</ul>'*ul)
i += 1
ul = 0
# ordered lists
mo = re.search(r'^(\d+\.)',lines[i])
if mo:
if not ol:
lines.insert(i,'<ol>')
i += 1
lines[i] = '<li>'+lines[i][len(mo.groups()[0]):]
ol = 1
elif ol and not lines[i].strip() and i<len(lines)-1 \
and not lines[i+1].startswith(' ') \
and not re.search(r'^(\d+\.)',lines[i+1]):
lines.insert(i,'</ol>')
i += 1
ol = 0
i += 1
if ul:
lines.append('</ul>'*ul)
if ol:
lines.append('</ol>'*ol)
if bq:
lines.append('</blockquote>'*bq)
t1 = time.time()
#print('part 1', t1-t0)
sections = []
scripts = []
section = Marked()
i = 0
while i<len(lines):
line = lines[i]
if line.strip() and line.startswith(' '):
if isinstance(section,Marked) and section.line:
sections.append(section)
section = CodeBlock(line[4:])
j = i+1
while j<len(lines) and lines[j].startswith(' '):
section.lines.append(lines[j][4:])
j += 1
sections.append(section)
section = Marked()
i = j
continue
elif line.strip() and line.startswith("```"):
# fenced code blocks à la Github Flavoured Markdown
if isinstance(section,Marked) and section.line:
sections.append(section)
section = CodeBlock(line)
j = i+1
while j<len(lines) and not lines[j].startswith("```"):
section.lines.append(lines[j])
j += 1
sections.append(section)
section = Marked()
i = j+1
continue
elif line.lower().startswith('<script'):
if isinstance(section,Marked) and section.line:
sections.append(section)
section = Marked()
j = i+1
while j<len(lines):
if lines[j].lower().startswith('</script>'):
scripts.append('\n'.join(lines[i+1:j]))
for k in range(i,j+1):
lines[k] = ''
break
j += 1
i = j
continue
# atext header
elif line.startswith('#'):
level = 1
line = lines[i]
while level<len(line) and line[level]=='#' and level<=6:
level += 1
if not line[level+1:].strip():
if level==1:
i += 1
continue
else:
lines[i] = '<H%s>%s</H%s>\n' %(level-1,'#',level-1)
else:
lines[i] = '<H%s>%s</H%s>\n' %(level,line[level+1:],level)
else:
mo = re.search(ref_pattern,line)
if mo is not None:
if isinstance(section,Marked) and section.line:
sections.append(section)
section = Marked()
key = mo.groups()[0]
value = URL(mo.groups()[1])
refs[key.lower()] = value
else:
if not line.strip():
line = '<p></p>'
if section.line:
section.line += '\n'
section.line += line
i += 1
t2 = time.time()
#print('section 2', t2-t1)
if isinstance(section,Marked) and section.line:
sections.append(section)
res = ''
for section in sections:
mk,_scripts = section.to_html()
res += mk
scripts += _scripts
#print('end mark', time.time()-t2)
return res,scripts
0
Example 77
Project: ZenPacks.zenoss.OpenStackInfrastructure Source File: utils.py
def create_model_data(dmd):
'''
Return an Endpoint suitable for Impact functional testing.
'''
# DeviceClass
dc = dmd.Devices.createOrganizer('/OpenStack/Infrastructure')
dc.setZenProperty('zPythonClass', 'ZenPacks.zenoss.OpenStackInfrastructure.Endpoint')
# OSProcessClasses
osc = dmd.Processes.createOrganizer('/OpenStack')
for binary in ['nova-cert', 'nova-conductor', 'nova-consoleauth', 'nova-scheduler', 'nova-compute', 'nova-api']:
osc.manage_addOSProcessClass(binary)
# Endpoint
endpoint = dc.createInstance('endpoint')
# Org Structure
from ZenPacks.zenoss.OpenStackInfrastructure.Region import Region
from ZenPacks.zenoss.OpenStackInfrastructure.AvailabilityZone import AvailabilityZone
region = addContained(endpoint, "components", Region("region"))
zone1 = addContained(endpoint, "components", AvailabilityZone("zone1"))
zone2 = addContained(endpoint, "components", AvailabilityZone("zone2"))
addNonContained(region, "childOrgs", zone1)
addNonContained(region, "childOrgs", zone2)
# Tenants
from ZenPacks.zenoss.OpenStackInfrastructure.Tenant import Tenant
tenant1 = addContained(endpoint, "components", Tenant("tenant-tenant1"))
tenant2 = addContained(endpoint, "components", Tenant("tenant-tenant2"))
# Flavor
from ZenPacks.zenoss.OpenStackInfrastructure.Flavor import Flavor
flavor1 = addContained(endpoint, "components", Flavor("flavor1"))
# Image
from ZenPacks.zenoss.OpenStackInfrastructure.Image import Image
image1 = addContained(endpoint, "components", Image("image1"))
# Host
from ZenPacks.zenoss.OpenStackInfrastructure.Host import Host
computehost1 = addContained(endpoint, "components", Host("computehost1"))
addNonContained(computehost1, "orgComponent", zone1)
computehost2 = addContained(endpoint, "components", Host("computehost2"))
addNonContained(computehost2, "orgComponent", zone2)
controllerhost = addContained(endpoint, "components", Host("controllerhost"))
addNonContained(controllerhost, "orgComponent", zone1)
# SoftwareComponents
from ZenPacks.zenoss.OpenStackInfrastructure.NovaService import NovaService
from ZenPacks.zenoss.OpenStackInfrastructure.NovaApi import NovaApi
nova_consoleauth = addContained(endpoint, "components", NovaService("nova-consoleauth"))
nova_consoleauth.binary = 'nova-consoleauth'
addNonContained(nova_consoleauth, "hostedOn", controllerhost)
addNonContained(nova_consoleauth, "orgComponent", zone1)
nova_scheduler = addContained(endpoint, "components", NovaService("nova-scheduler"))
nova_scheduler.binary = 'nova-scheduler'
addNonContained(nova_scheduler, "hostedOn", controllerhost)
addNonContained(nova_scheduler, "orgComponent", zone1)
nova_conductor1 = addContained(endpoint, "components", NovaService("nova-conductor1"))
nova_conductor1.binary = 'nova-conductor'
nova_conductor2 = addContained(endpoint, "components", NovaService("nova-conductor2"))
nova_conductor2.binary = 'nova-conductor'
addNonContained(nova_conductor1, "hostedOn", computehost1)
addNonContained(nova_conductor1, "orgComponent", zone1)
addNonContained(nova_conductor2, "hostedOn", computehost2)
addNonContained(nova_conductor2, "orgComponent", zone2)
nova_compute1 = addContained(endpoint, "components", NovaService("nova-compute1"))
nova_compute1.binary = 'nova-compute'
nova_compute2 = addContained(endpoint, "components", NovaService("nova-compute2"))
nova_compute2.binary = 'nova-compute'
addNonContained(nova_compute1, "hostedOn", computehost1)
addNonContained(nova_compute1, "orgComponent", zone1)
addNonContained(nova_compute2, "hostedOn", computehost2)
addNonContained(nova_compute2, "orgComponent", zone2)
nova_cert = addContained(endpoint, "components", NovaService("nova-cert"))
nova_cert.binary = 'nova-cert'
addNonContained(nova_cert, "hostedOn", controllerhost)
addNonContained(nova_cert, "orgComponent", zone1)
nova_api = addContained(endpoint, "components", NovaApi("nova-api"))
nova_api.binary = 'nova-api'
addNonContained(nova_api, "hostedOn", controllerhost)
addNonContained(nova_api, "orgComponent", region)
# Hypervisor
from ZenPacks.zenoss.OpenStackInfrastructure.Hypervisor import Hypervisor
hypervisor1 = addContained(endpoint, "components", Hypervisor("hypervisor1"))
hypervisor2 = addContained(endpoint, "components", Hypervisor("hypervisor2"))
addNonContained(hypervisor1, "host", computehost1)
addNonContained(hypervisor2, "host", computehost2)
# Instance
from ZenPacks.zenoss.OpenStackInfrastructure.Instance import Instance
instance1 = addContained(endpoint, "components", Instance("instance1"))
instance2 = addContained(endpoint, "components", Instance("instance2"))
instance3 = addContained(endpoint, "components", Instance("instance3"))
instance4 = addContained(endpoint, "components", Instance("instance4"))
addNonContained(instance1, "flavor", flavor1)
addNonContained(instance2, "flavor", flavor1)
addNonContained(instance3, "flavor", flavor1)
addNonContained(instance4, "flavor", flavor1)
addNonContained(instance1, "image", image1)
addNonContained(instance2, "image", image1)
addNonContained(instance3, "image", image1)
addNonContained(instance4, "image", image1)
addNonContained(instance1, "hypervisor", hypervisor1)
addNonContained(instance2, "hypervisor", hypervisor1)
addNonContained(instance3, "hypervisor", hypervisor2)
addNonContained(instance4, "hypervisor", hypervisor2)
addNonContained(instance1, "tenant", tenant1)
addNonContained(instance2, "tenant", tenant2)
addNonContained(instance3, "tenant", tenant1)
addNonContained(instance4, "tenant", tenant2)
# Vnic
from ZenPacks.zenoss.OpenStackInfrastructure.Vnic import Vnic
instance1vnic1 = addContained(instance1, "vnics", Vnic("instance1_vnic1"))
instance1vnic1.macaddress = 'de:ad:be:ef:01:01'
instance1vnic1.index_object()
instance1vnic2 = addContained(instance1, "vnics", Vnic("instance1_vnic2"))
instance1vnic2.macaddress = 'de:ad:be:ef:01:02'
instance1vnic2.index_object()
instance2vnic1 = addContained(instance2, "vnics", Vnic("instance2_vnic1"))
instance2vnic1.macaddress = 'de:ad:be:ef:02:01'
instance2vnic1.index_object()
instance2vnic2 = addContained(instance2, "vnics", Vnic("instance2_vnic2"))
instance2vnic2.macaddress = 'de:ad:be:ef:02:02'
instance2vnic2.index_object()
instance3vnic1 = addContained(instance3, "vnics", Vnic("instance3_vnic1"))
instance3vnic1.macaddress = 'de:ad:be:ef:03:01'
instance3vnic1.index_object()
instance3vnic2 = addContained(instance3, "vnics", Vnic("instance3_vnic2"))
instance3vnic2.macaddress = 'de:ad:be:ef:03:02'
instance3vnic2.index_object()
instance4vnic1 = addContained(instance4, "vnics", Vnic("instance4_vnic1"))
instance4vnic1.macaddress = 'de:ad:be:ef:04:01'
instance4vnic1.index_object()
instance4vnic2 = addContained(instance4, "vnics", Vnic("instance4_vnic2"))
instance4vnic2.macaddress = 'de:ad:be:ef:04:02'
instance4vnic2.index_object()
# Linux guest devices (Virtual)
# make sure that the interfaces line up.
guest_dc = dmd.Devices.createOrganizer('/Server/SSH/Linux')
guest_dc.setZenProperty('zPythonClass', 'Products.ZenModel.Device')
guest_instance1 = guest_dc.createInstance("g-instance1")
guest_instance2 = guest_dc.createInstance("g-instance2")
guest_instance3 = guest_dc.createInstance("g-instance3")
# instance4 is not monitored by zenoss.
from Products.ZenModel.IpInterface import IpInterface
def add_linux_interface_mac(device, interface_name, macaddress):
eth_if = IpInterface(interface_name)
device.os.interfaces._setObject(eth_if.id, eth_if)
eth_if = device.os.interfaces._getOb(eth_if.id)
eth_if.macaddress = macaddress
eth_if.index_object()
device.index_object()
add_linux_interface_mac(guest_instance1, 'eth0', 'de:ad:be:ef:01:01')
add_linux_interface_mac(guest_instance1, 'eth1', 'de:ad:be:ef:01:02')
add_linux_interface_mac(guest_instance2, 'eth0', 'de:ad:be:ef:02:01')
add_linux_interface_mac(guest_instance2, 'eth1', 'de:ad:be:ef:02:02')
add_linux_interface_mac(guest_instance3, 'eth0', 'de:ad:be:ef:03:01')
add_linux_interface_mac(guest_instance3, 'eth1', 'de:ad:be:ef:03:02')
# Linux devices (Physical)
# (link to host1 and host2)
phys_dc = dmd.Devices.createOrganizer('/Server/SSH/Linux/NovaHost')
phys_dc.setZenProperty('zPythonClass', 'Products.ZenModel.Device')
phys_computehost1 = phys_dc.createInstance("p-computehost1")
phys_computehost2 = phys_dc.createInstance("p-computehost2")
phys_controllerhost = phys_dc.createInstance("p-controllerhost")
# Link the host components to the physical hosts.
computehost1.claim_proxy_device(phys_computehost1)
computehost2.claim_proxy_device(phys_computehost2)
controllerhost.claim_proxy_device(phys_controllerhost)
# Add OSprocesses for each of the software components.
from ZenPacks.zenoss.OpenStackInfrastructure.SoftwareComponent import SoftwareComponent
from Products.ZenModel.OSProcess import OSProcess
for component in endpoint.components():
if isinstance(component, SoftwareComponent):
binary = component.binary
linux_device = component.hostedOn().proxy_device()
process_id = '%s_%s' % (linux_device.id, binary)
process = OSProcess(process_id)
linux_device.os.processes._setObject(process_id, process)
process = linux_device.os.processes._getOb(process_id)
process_class = re.sub(r'\d+$', '', binary)
process.setOSProcessClass("Processes/OpenStack/osProcessClasses/%s" % process_class)
# Cinder
from ZenPacks.zenoss.OpenStackInfrastructure.Volume import Volume
from ZenPacks.zenoss.OpenStackInfrastructure.VolSnapshot import VolSnapshot
volume1 = addContained(endpoint, "components", Volume("volume1"))
volsnap1 = addContained(endpoint, "components", VolSnapshot("volsnap1"))
addNonContained(instance1, "volumes", volume1)
addNonContained(volume1, "volSnapshots", volsnap1)
return {
'endpoint': endpoint,
'phys_dc': phys_dc,
'guest_dc': guest_dc
}
0
Example 78
Project: pelisalacarta Source File: zentorrents.py
def fanart(item):
logger.info("pelisalacarta.peliculasdk fanart")
itemlist = []
url = item.url
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
if "peliculas" in item.url:
if "microhd" in url or "web" in url or "1080" in url or "bluray" in url or "HDRip" in item.title:
title= scrapertools.get_match(data,'<title>([^"]+) \[')
title= re.sub(r"3D|[0-9]|SBS|\(.*?\)|\[.*?\]|","",title)
title=title.replace('Perdón','perdon')
title= title.replace(' ','%20')
else:
title= scrapertools.get_match(data,'<title>([^"]+) -')
title= re.sub(r"3D|[0-9]|SBS|\(.*?\)|\[.*?\]|","",title)
title= title.replace('á','a')
title= title.replace('Á','A')
title= title.replace('é','e')
title= title.replace('í','i')
title= title.replace('ó','o')
title= title.replace('ú','u')
title= title.replace('ñ','n')
title= title.replace(' ','%20')
url="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title + "&language=es&include_adult=false"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '"page":1.*?,"id":(.*?),.*?"backdrop_path":"\\\(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)==0:
extra=item.thumbnail
show= item.thumbnail
posterdb = item.thumbnail
fanart_info = item.thumbnail
fanart_trailer = item.thumbnail
category= item.thumbnail
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail,extra = extra, show= show, category= category,folder=True) )
for id, fan in matches:
try:
posterdb = scrapertools.get_match(data,'"page":1,.*?"poster_path":"\\\(.*?)"')
posterdb = "https://image.tmdb.org/t/p/original" + posterdb
except:
posterdb = item.thumbnail
fanart="https://image.tmdb.org/t/p/original" + fan
item.extra= fanart
url ="http://api.themoviedb.org/3/movie/"+id+"/images?api_key=2e2160006592024ba87ccdf78c28f49f"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '"backdrops".*?"file_path":".*?",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches) == 0:
patron = '"backdrops".*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches) == 0:
fanart_info = item.extra
fanart_trailer = item.extra
fanart_2 = item.extra
for fanart_info, fanart_trailer, fanart_2 in matches:
fanart_info = "https://image.tmdb.org/t/p/original" + fanart_info
fanart_trailer = "https://image.tmdb.org/t/p/original" + fanart_trailer
fanart_2 = "https://image.tmdb.org/t/p/original" + fanart_2
#clearart, fanart_2 y logo
url ="http://webservice.fanart.tv/v3/movies/"+id+"?api_key=dffe90fba4d02c199ae7a9e71330c987"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '"hdmovielogo":.*?"url": "([^"]+)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if '"moviedisc"' in data:
disc = scrapertools.get_match(data,'"moviedisc":.*?"url": "([^"]+)"')
if '"movieposter"' in data:
poster = scrapertools.get_match(data,'"movieposter":.*?"url": "([^"]+)"')
if '"moviethumb"' in data:
thumb = scrapertools.get_match(data,'"moviethumb":.*?"url": "([^"]+)"')
if '"moviebanner"' in data:
banner= scrapertools.get_match(data,'"moviebanner":.*?"url": "([^"]+)"')
if len(matches)==0:
extra= posterdb
show = fanart_2
category = item.extra
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=item.thumbnail, fanart=item.extra, extra=extra, show=show, category= category, folder=True) )
for logo in matches:
if '"hdmovieclearart"' in data:
clear=scrapertools.get_match(data,'"hdmovieclearart":.*?"url": "([^"]+)"')
if '"moviebackground"' in data:
extra=clear
show= fanart_2
if '"moviedisc"' in data:
category= disc
else:
category= clear
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show, category= category,folder=True) )
else:
extra= clear
show=fanart_2
if '"moviedisc"' in data:
category= disc
else:
category= clear
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show, category= category, folder=True) )
if '"moviebackground"' in data:
if '"hdmovieclearart"' in data:
clear=scrapertools.get_match(data,'"hdmovieclearart":.*?"url": "([^"]+)"')
extra=clear
show= fanart_2
if '"moviedisc"' in data:
category= disc
else:
category= clear
else:
extra=logo
show= fanart_2
if '"moviedisc"' in data:
category= disc
else:
category= logo
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show, category= category, folder=True) )
if not '"hdmovieclearart"' in data and not '"moviebackground"' in data:
extra= logo
show= fanart_2
if '"moviedisc"' in data:
category= disc
else:
category= item.extra
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show , category= category, folder=True) )
else:
if "series" in item.url:
if "hdtv" in item.url or "720" in item.title or "1080p" in item.title:
title= scrapertools.get_match(data,'<title>([^"]+) \[')
title= re.sub(r"3D|'|,|[0-9]|#|;|\[.*?\]|SBS|-|","",title)
title= title.replace('Temporada','')
title= title.replace('Fin','')
title= title.replace('x','')
title= title.replace('Heli','Helix')
title= title.replace('Anatomía','Anatomia')
title= title.replace('á','a')
title= title.replace('Á','A')
title= title.replace('é','e')
title= title.replace('í','i')
title= title.replace('ó','o')
title= title.replace('ú','u')
title= title.replace('ñ','n')
title= title.replace(' ','%20')
else:
title= scrapertools.get_match(data,'<title>([^"]+) -')
title= re.sub(r"3D|'|,|[0-9]|#|;|´|VOSE|\[.*?\]|-|","",title)
title= title.replace('Temporada','')
title= title.replace('Fin','')
title= title.replace('x','')
title= title.replace('á','a')
title= title.replace('Á','A')
title= title.replace('é','e')
title= title.replace('í','i')
title= title.replace('ó','o')
title= title.replace('ú','u')
title= title.replace('ñ','n')
title= title.replace('Anatomía','Anatomia')
title= title.replace(' ','%20')
url="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
if "Erase%20una%20vez%20%20" in title:
url ="http://thetvdb.com/api/GetSeries.php?seriesname=Erase%20una%20vez%20(2011)&language=es"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Data><Series><seriesid>([^<]+)</seriesid>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)==0:
extra= item.thumbnail
show= item.thumbnail
fanart_info = item.thumbnail
fanart_trailer = item.thumbnail
category= ""
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail ,extra=extra, category= category, show=show , folder=True) )
else:
for id in matches:
category = id
id_serie = id
url ="http://thetvdb.com/api/1D62F2F90030C444/series/"+id_serie+"/banners.xml"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Banners><Banner>.*?<VignettePath>(.*?)</VignettePath>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)==0:
extra=item.thumbnail
show= item.thumbnail
fanart_info = item.thumbnail
fanart_trailer = item.thumbnail
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail ,category = category, extra=extra, show=show, folder=True) )
for fan in matches:
fanart="http://thetvdb.com/banners/" + fan
item.extra= fanart
patron= '<Banners><Banner>.*?<BannerPath>.*?</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)==0:
fanart_info= item.extra
fanart_trailer = item.extra
fanart_2 = item.extra
for fanart_info, fanart_trailer, fanart_2 in matches:
fanart_info = "http://thetvdb.com/banners/" + fanart_info
fanart_trailer = "http://thetvdb.com/banners/" + fanart_trailer
fanart_2 = "http://thetvdb.com/banners/" + fanart_2
#clearart, fanart_2 y logo
for id in matches:
url ="http://webservice.fanart.tv/v3/tv/"+id_serie+"?api_key=dffe90fba4d02c199ae7a9e71330c987"
if "Castle" in title:
url ="http://webservice.fanart.tv/v3/tv/83462?api_key=dffe90fba4d02c199ae7a9e71330c987"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '"clearlogo":.*?"url": "([^"]+)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if '"tvposter"' in data:
tvposter = scrapertools.get_match(data,'"tvposter":.*?"url": "([^"]+)"')
if '"tvbanner"' in data:
tvbanner = scrapertools.get_match(data,'"tvbanner":.*?"url": "([^"]+)"')
if '"tvthumb"' in data:
tvthumb = scrapertools.get_match(data,'"tvthumb":.*?"url": "([^"]+)"')
if '"hdtvlogo"' in data:
hdtvlogo = scrapertools.get_match(data,'"hdtvlogo":.*?"url": "([^"]+)"')
if '"hdclearart"' in data:
hdtvclear = scrapertools.get_match(data,'"hdclearart":.*?"url": "([^"]+)"')
if len(matches)==0:
if '"hdtvlogo"' in data:
if "showbackground" in data:
if '"hdclearart"' in data:
thumbnail = hdtvlogo
extra= hdtvclear
show = fanart_2
else:
thumbnail = hdtvlogo
extra= thumbnail
show = fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, category=category, extra=extra, show=show, folder=True) )
else:
if '"hdclearart"' in data:
thumbnail= hdtvlogo
extra= hdtvclear
show= fanart_2
else:
thumbnail= hdtvlogo
extra= thumbnail
show= fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra, show=show, category= category, folder=True) )
else:
extra= item.thumbnail
show = fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=item.thumbnail, fanart=item.extra, extra=extra, show=show, category = category, folder=True) )
for logo in matches:
if '"hdtvlogo"' in data:
thumbnail = hdtvlogo
elif not '"hdtvlogo"' in data :
if '"clearlogo"' in data:
thumbnail= logo
else:
thumbnail= item.thumbnail
if '"clearart"' in data:
clear=scrapertools.get_match(data,'"clearart":.*?"url": "([^"]+)"')
if "showbackground" in data:
extra=clear
show= fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show, category= category, folder=True) )
else:
extra= clear
show=fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show, category= category, folder=True) )
if "showbackground" in data:
if '"clearart"' in data:
clear=scrapertools.get_match(data,'"clearart":.*?"url": "([^"]+)"')
extra=clear
show= fanart_2
else:
extra=logo
show= fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show, category = category, folder=True) )
if not '"clearart"' in data and not '"showbackground"' in data:
if '"hdclearart"' in data:
extra= hdtvclear
show= fanart_2
else:
extra= thumbnail
show= fanart_2
itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=thumbnail, fanart=item.extra, extra=extra,show=show , category = category, folder=True) )
title ="Info"
title = title.replace(title,bbcode_kodi2html("[COLOR skyblue]"+title+"[/COLOR]"))
if not "series" in item.url:
thumbnail = posterdb
if "series" in item.url:
if '"tvposter"' in data:
thumbnail= tvposter
else:
thumbnail = item.thumbnail
if "tvbanner" in data:
category = tvbanner
else:
category = show
itemlist.append( Item(channel=item.channel, action="info" , title=title , url=item.url, thumbnail=thumbnail, fanart=fanart_info, extra= extra, category = category, show= show, folder=False ))
title= "[COLOR cadetblue]Trailer[/COLOR]"
if len(item.extra)==0:
fanart=item.thumbnail
else:
fanart = item.extra
if "series" in item.url:
if '"tvthumb"' in data:
thumbnail = tvthumb
else:
thumbnail = item.thumbnail
if '"tvbanner"' in data:
extra= tvbanner
elif '"tvthumb"' in data:
extra = tvthumb
else:
extra = item.thumbnail
else:
if '"moviethumb"' in data:
thumbnail = thumb
else:
thumbnail = posterdb
if '"moviebanner"' in data:
extra= banner
else:
if '"hdmovieclearart"' in data:
extra = clear
else:
extra = posterdb
itemlist.append( Item(channel=item.channel, action="trailer", title=title , url=item.url , thumbnail=thumbnail , plot=item.plot , fanart=fanart_trailer, extra=extra, folder=True) )
return itemlist
0
Example 79
def _make_jsmin(extended=True, python_only=False):
"""
Generate JS minifier based on `jsmin.c by Douglas Crockford`_
.. _jsmin.c by Douglas Crockford:
http://www.crockford.com/javascript/jsmin.c
:Parameters:
`extended` : ``bool``
Extended Regexps? (using lookahead and lookbehind). This is faster,
because it can be optimized way more. The regexps used with `extended`
being false are only left here to allow easier porting to platforms
without extended regex features (and for my own reference...)
`python_only` : ``bool``
Use only the python variant. If true, the c extension is not even
tried to be loaded.
:Return: Minifier
:Rtype: ``callable``
"""
# pylint: disable = R0912, R0914, W0612
if not python_only:
try:
import _rjsmin
except ImportError:
pass
else:
return _rjsmin.jsmin
try:
xrange
except NameError:
xrange = range # pylint: disable = W0622
space_chars = r'[\000-\011\013\014\016-\040]'
line_comment = r'(?://[^\r\n]*)'
space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
string1 = \
r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
strings = r'(?:%s|%s)' % (string1, string2)
charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
nospecial = r'[^/\\\[\r\n]'
if extended:
regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
nospecial, charclass, nospecial
)
else:
regex = (
r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
)
regex = regex % (charclass, nospecial, charclass, nospecial)
pre_regex = r'[(,=:\[!&|?{};\r\n]'
space = r'(?:%s|%s)' % (space_chars, space_comment)
newline = r'(?:%s?[\r\n])' % line_comment
def fix_charclass(result):
""" Fixup string of chars to fit into a regex char class """
pos = result.find('-')
if pos >= 0:
result = r'%s%s-' % (result[:pos], result[pos + 1:])
def sequentize(string):
"""
Notate consecutive characters as sequence
(1-4 instead of 1234)
"""
first, last, result = None, None, []
for char in map(ord, string):
if last is None:
first = last = char
elif last + 1 == char:
last = char
else:
result.append((first, last))
first = last = char
if last is not None:
result.append((first, last))
return ''.join(['%s%s%s' % (
chr(first),
last > first + 1 and '-' or '',
last != first and chr(last) or ''
) for first, last in result])
return _re.sub(r'([\000-\040\047])', # for better portability
lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
.replace('\\', '\\\\')
.replace('[', '\\[')
.replace(']', '\\]')
)
)
def id_literal_(what):
""" Make id_literal like char class """
match = _re.compile(what).match
result = ''.join([
chr(c) for c in xrange(127) if not match(chr(c))
])
return '[^%s]' % fix_charclass(result)
def not_id_literal_(keep):
""" Make negated id_literal like char class """
match = _re.compile(id_literal_(keep)).match
result = ''.join([
chr(c) for c in xrange(127) if not match(chr(c))
])
return r'[%s]' % fix_charclass(result)
if extended:
id_literal = id_literal_(r'[a-zA-Z0-9_$]')
id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
space_sub = _re.compile((
r'([^\047"/\000-\040]+)'
r'|(%(strings)s[^\047"/\000-\040]*)'
r'|(?:(?<=%(pre_regex)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
r'|(?<=%(id_literal_close)s)'
r'%(space)s*(?:(%(newline)s)%(space)s*)+'
r'(?=%(id_literal_open)s)'
r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
r'|%(space)s+'
r'|(?:%(newline)s%(space)s*)+'
) % locals()).sub
def space_subber(match):
""" Substitution callback """
# pylint: disable = C0321
groups = match.groups()
if groups[0]: return groups[0]
elif groups[1]: return groups[1]
elif groups[2]: return groups[2]
elif groups[3]: return '\n'
elif groups[4]: return ' '
return ''
def jsmin(script): # pylint: disable = W0621
r"""
Minify javascript based on `jsmin.c by Douglas Crockford`_\.
Instead of parsing the stream char by char, it uses a regular
expression approach which minifies the whole script with one big
substitution regex.
.. _jsmin.c by Douglas Crockford:
http://www.crockford.com/javascript/jsmin.c
:Parameters:
`script` : ``str``
Script to minify
:Return: Minified script
:Rtype: ``str``
"""
return space_sub(space_subber, '\n%s\n' % script).strip()
else:
not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
space_norm_sub = _re.compile((
r'(%(strings)s)'
r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
r'|(%(space)s)+'
r'|(?:(%(newline)s)%(space)s*)+'
) % locals()).sub
def space_norm_subber(match):
""" Substitution callback """
# pylint: disable = C0321
groups = match.groups()
if groups[0]: return groups[0]
elif groups[1]: return groups[1].replace('\r', '\n') + groups[2]
elif groups[3]: return ' '
elif groups[4]: return '\n'
space_sub1 = _re.compile((
r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
r'|\040(%(not_id_literal)s)'
r'|\n(%(not_id_literal_open)s)'
) % locals()).sub
def space_subber1(match):
""" Substitution callback """
groups = match.groups()
return groups[0] or groups[1] or groups[2]
space_sub2 = _re.compile((
r'(%(strings)s)\040?'
r'|(%(pre_regex)s%(regex)s)[\040\n]?'
r'|(%(not_id_literal)s)\040'
r'|(%(not_id_literal_close)s)\n'
) % locals()).sub
def space_subber2(match):
""" Substitution callback """
groups = match.groups()
return groups[0] or groups[1] or groups[2] or groups[3]
def jsmin(script):
r"""
Minify javascript based on `jsmin.c by Douglas Crockford`_\.
Instead of parsing the stream char by char, it uses a regular
expression approach. The script is minified with three passes:
normalization
Control character are mapped to spaces, spaces and newlines
are squeezed and comments are stripped.
space removal 1
Spaces before certain tokens are removed
space removal 2
Spaces after certain tokens are remove
.. _jsmin.c by Douglas Crockford:
http://www.crockford.com/javascript/jsmin.c
:Parameters:
`script` : ``str``
Script to minify
:Return: Minified script
:Rtype: ``str``
"""
return space_sub2(space_subber2,
space_sub1(space_subber1,
space_norm_sub(space_norm_subber, '\n%s\n' % script)
)
).strip()
return jsmin
0
Example 80
def fix_text(s, ignore_sym_pat=False):
if not ignore_sym_pat:
# Fix descriptions like D7TDB1 (
s = re.sub("([A-Z0-9]){6} \(", "", s)
s = s.split(";")[0]
# Fix parantheses containing names
s = s.translate(None, "[]")
s = s.replace("(-)", "[-]")
s = s.replace("(+)", "[+]")
s = s.replace("(Uncharacterized protein)", "")
if not ignore_sym_pat:
s = s.translate(None, "()")
# fix minor typos, seen in `autonaming` output
# change 'protei ' to 'protein '
# change 'hypthetical' to 'hypothetical'
# fix string starting with 'ytochrome'
if 'protei ' in s: s = s.replace('protei ', 'protein ')
if 'hypthetical' in s: s = s.replace('hypthetical', 'hypothetical')
if s.startswith('ytochrome'): s = s.replace('ytochrome', 'cytochrome')
# before trimming off at the first ";", check if name has glycosidic
# linkage information (e.g 1,3 or 1,4). If so, also check if multiple
# linkages are separated by ";". If so, replace ";" by "-"
m = re.findall(glycosidic_link_pat, s)
if m and ";" in s:
s = re.sub(";\s*", "-", s)
# remove underscore from description
s = re.sub("_", " ", s)
# Cellular locations
# Any word that matches e.g. AT5G54690
# Any word that matches e.g. Os02g0234800
# (fragment)
# UPF
# Remove 'DDB_G\d+' ID
# '_At[0-9]+g[0-9]+' to ''
for pat in (loc_pat, osg_pat, frag_pat, upf_pat, ddb_pat):
# below is a hack since word boundaries don't work on /
s = s.strip() + " "
s = re.sub(pat, "", s)
# '? => '
s = re.sub(apos_pat, "'", s)
# > => none
s = re.sub(gt_pat, "", s)
# reduce runs such as -- '''
s = re.sub(r"[-]+", "-", s)
s = re.sub(r"[']+", "'", s)
s = s.strip()
# -like to -like protein
s = re.sub(like_pat, "-like protein", s)
# 'repeat$' to 'repeat protein'
if re.search(repeat_pat, s):
s += "-containing protein"
# 'binding$' to 'binding protein'
if re.search(binding_pat, s):
s += " protein"
if re.match(Protein_pat, s):
s = re.sub(Protein_pat, "", s)
# 'domain$' to 'domain-containing protein'
if re.search(domain_pat, s):
s += "-containing protein"
if re.search(r"-domain", s):
s = re.sub(r"-domain", " domain", s)
if re.match(Protein_pat, s):
s = re.sub(Protein_pat, "", s)
# 'related$' to '-like protein'
if re.search(related_pat, s):
s = re.sub(related_pat, "-like protein", s)
if re.match(Protein_pat, s) and not re.match(r"Protein kinase", s):
s = re.sub(Protein_pat, "", s)
# '[0-9]+ humolog' to '-like protein'
if re.search(humolog_pat1, s):
s = re.sub(humolog_pat1, "-like protein", s)
if re.match(Protein_pat, s):
s = re.sub(Protein_pat, "", s)
# 'Protein\s+(.*)\s+humolog' to '$1-like protein'
match = re.search(humolog_pat2, s)
if match and not re.match(r"Protein kinase", s):
ret = match.group(1)
s = re.sub(humolog_pat2, ret + "-like protein", s)
s = re.sub(r"^\s+", "", s)
s = s.capitalize()
# 'humolog protein' to '-like protein'
# 'humologue$' to '-like protein'
# 'humolog$' to '-like protein'
for pat in (humolog_pat3, humolog_pat5, humolog_pat6):
if re.search(pat, s):
s = re.sub(pat, "-like protein", s)
# 'Agenet domain-containing protein / bromo-adjacent humology (BAH) domain-containing protein'
# to 'Agenet and bromo-adjacent humology (BAH) domain-containing protein'
if re.search(agenet_pat, s):
s = re.sub(agenet_pat, "Agenet and ", s)
# plural to singular
if re.search(plural_pat, s):
if (s.find('biogenesis') == -1 and s.find('Topors') == -1) or (not re.search(with_and_pat, s)):
s = re.sub(r"s$", "", s)
# 'like_TBP' or 'likeTBP' to 'like TBP'
if re.search(tbp_pat, s):
s = re.sub(tbp_pat, "like TBP", s)
# 'protein protein' to 'protein'
if re.search(prot_pat, s):
s = re.sub(prot_pat, "protein", s)
# 'dimerisation' to 'dimerization'
if re.search(dimer_pat, s):
s = re.sub(dimer_pat, "dimerization", s)
# Any AHRD that matches e.g. "AT5G54690-like protein"
# Any AHRD that contains the words '^Belongs|^Encoded|^Expression|^highly'
for pat in (atg_pat, athila_pat1):
if re.search(pat, s):
s = Unknown
# remove 'arabidopsis[ thaliana]' and/or embedded Atg IDs
for pat in (atg_id_pat, athila_pat2, athila_pat3, athila_pat4):
# below is a hack since word boundaries don't work on /
s = s.strip() + " "
s = re.sub(pat, "", s)
# remove "\s+LENGTH=\d+" from TAIR deflines
if re.search(length_pat, s):
s = re.sub(length_pat, "", s)
# if name has a dot followed by a space (". ") in it and contains multiple
# parts separated by a comma, strip name starting from first occurrence of ","
if re.search(r"\. ", s):
if re.search(r",", s):
s = s.split(",")[0]
# if name contains any of the disallowed words,
# remove word occurrence from name
# if name contains references to any other organism, trim name upto
# that occurrence
for pat in (disallow_pat, organism_pat):
if re.search(pat, s):
s = re.sub(pat, "", s)
s = s.strip()
if not ignore_sym_pat:
# 'humolog \d+' to '-like protein'
if re.search(humolog_pat4, s):
s = re.sub(humolog_pat4, "", s)
# Trailing protein numeric copy (e.g. Myb 1)
if re.search(trail_pat, s):
s = re.sub(trail_pat, "", s)
# if name is entirely a gene symbol-like (all capital letters, maybe followed by numbers)
# add a "-like protein" at the end
if (re.search(sym_pat, s) or re.search(lc_sym_pat, s)) \
and not re.search(spada_pat, s):
s = s + "-like protein"
# if gene symbol in parantheses at EOL, remove symbol
if re.search(eol_sym_pat, s):
s = re.sub(eol_sym_pat, "", s)
# if name terminates at a symbol([^A-Za-z0-9_]), trim it off
if re.search(r"\W{1,}$", s) and not re.search(r"\)$", s):
s = re.sub("\W{1,}$", "", s)
if "uncharacterized" in s:
s = "uncharacterized protein"
# change sulfer to sulfur
if re.search(sulfer_pat, s):
s = re.sub(sulfer_pat, "sulfur", s)
# change sulph to sulf
if re.search(sulph_pat, s):
s = re.sub(sulph_pat, "sulf", s)
# change monoxy to monooxy
if re.search(monoxy_pat, s):
s = re.sub(monoxy_pat, "monooxy", s)
# change proteine to protein
if re.search(proteine_pat, s):
s = re.sub(proteine_pat, "protein", s)
# change signalling to signaling
if re.search(signalling_pat, s):
s = re.sub(signalling_pat, "signaling", s)
# change aluminium to aluminum
if re.search(aluminium_pat, s):
s = re.sub(aluminium_pat, "aluminum", s)
# change haem to heme
if re.search(haem_pat, s):
s = re.sub(haem_pat, "heme", s)
# chage haemo to hemo
if re.search(haemo_pat, s):
s = re.sub(haemo_pat, "hemo", s)
# change assessory to accessory
if re.search(assessory_pat, s):
s = re.sub(assessory_pat, "accessory", s)
# change -ise/-ised/-isation to -ize/-ized/-ization
match = re.search(ise_pat, s)
if match:
ret = match.group(1)
if match.group(2):
suff = match.group(2)
s = re.sub(ise_pat, "{0}ize{1}".format(ret, suff), s)
else:
s = re.sub(ise_pat, "{0}ize".format(ret), s)
match = re.search(isation_pat, s)
if match:
ret = match.group(1)
s = re.sub(isation_pat, "{0}ization".format(ret), s)
# change -bre to -ber
match = re.search(bre_pat, s)
if match:
ret = match.group(1)
s = re.sub(bre_pat, "{0}ber".format(ret), s)
if not s.startswith(Hypothetical):
# 'Candidate|Hypothetical|Novel|Predicted|Possible|Probable|Uncharacterized' to 'Putative'
if s.startswith('Uncharacterized') and any(pat in s for pat in ('UCP', 'UPF', 'protein')):
pass
else:
if re.search(put_pat, s):
s = re.sub(put_pat, "Putative", s)
"""
case (qr/^Histone-lysine/) { $ahrd =~ s/,\s+H\d{1}\s+lysine\-\d+//gs; }
"""
sl = s.lower()
# Any mention of `clone` or `contig` is not informative
if "clone" in sl or "contig" in sl:
s = Unknown
# All that's left is `protein` is not informative
if sl in ("protein", "protein, putative", ""):
s = Unknown
if Unknown.lower() in sl:
s = Unknown
if "FUNCTIONS IN".lower() in sl and "unknown" in sl:
s = Unknown
if "LOCATED IN".lower() in sl:
s = Unknown
s = re.sub(r"[,]*\s+putative$", "", s)
if s == Unknown or s.strip() == "protein":
s = Hypothetical
# Compact all spaces
s = ' '.join(s.split())
assert s.strip()
return s
0
Example 81
Project: cgat Source File: cgat2rdf.py
def processScript(script_name, outfile, options):
'''process one script.'''
# call other script
dirname = os.path.dirname(script_name)
basename = os.path.basename(script_name)[:-3]
if options.src_dir:
dirname = options.src_dir
script_name = os.path.join(dirname, basename) + ".py"
sys.path.insert(0, dirname)
module = __import__(basename)
E.Start = LocalStart
E.info("loaded modules %s" % module)
try:
module.main(argv=["--help"])
except DummyError:
pass
# get script's docstring
docstring = module.__doc__
# for k in dir(PARSER):
# print k, getattr(PARSER, k)
# for option in PARSER.option_list:
# print option, option.type, option.help, option._short_opts,
# option._long_opts, option.default
# @prefix clp: <http://www.humgen.nl/climate/ontologies/clp#> .
# @prefix co: <http://www.isi.edu/ikcap/Wingse/componentOntology.owl#> .
# @prefix dcterms: <http://purl.org/dc/terms/> .
# n = Namespace("http://example.org/people/")
g = Generator()
data = collections.defaultdict(str)
data['meta_title'] = 'Interface generator for CGAT scripts'
data['meta_author'] = 'Andreas Heger'
data['meta_version'] = 0.1
data['name'] = basename
data['interpreter'] = 'python'
data['property_bag'] = {}
data['description'] = getDescription(basename, docstring)
data['help'] = docstring
data['version'] = "1.0"
data['owner'] = "CGAT"
data['email'] = "[email protected]"
data['binary'] = script_name
# does not output multiple files
data['multiple_output_files'] = False
input_format, output_format = guessFormats(basename, docstring)
stdin = {}
stdin['name'] = 'input_file'
stdin['ns_name'] = 'input_file'
stdin['type'] = 'stdin'
stdin['label'] = 'input file'
stdin['description'] = 'input file'
stdin['choices'] = None
stdin['format'] = MAP_TYPE2FORMAT.get(input_format, input_format)
stdin['rank'] = 1
stdin['display'] = 'show'
stdin['min_occurrence'] = 1
stdin['max_occurrence'] = 1
stdin['value'] = ""
stdin['arg'] = "<"
stdin['arg_long'] = ""
stdin['property_bag'] = {}
stdin['dependencies'] = {}
stdout = {}
stdout['name'] = 'tsvfile'
stdout['ns_name'] = 'tsvfile'
stdout['type'] = 'stdout'
stdout['label'] = 'table'
stdout['description'] = 'bam file'
stdout['choices'] = None
stdout['format'] = MAP_TYPE2FORMAT.get(output_format, output_format)
stdout['rank'] = 1
stdout['display'] = 'show'
stdout['min_occurrence'] = 1
stdout['max_occurrence'] = 1
stdout['value'] = ""
stdout['arg'] = ">"
stdout['arg_long'] = ""
stdout['property_bag'] = {}
stdout['dependencies'] = {}
outputs = [stdout]
data['parameters'] = [stdin, stdout]
defaults = PARSER.get_default_values()
# flag to indicate wether script needs to go through cgat_wrapper.py
use_wrapper = False
for option in PARSER.option_list:
# ignore options added by optparse
if option.dest is None:
continue
# ignore benchmarking options
if option.dest.startswith("timeit"):
continue
# ignore options related to forcing output
if "force" in option.dest:
continue
# ignore some special options:
# if option.dest in ("output_filename_pattern", ):
# continue
# ignore output options
if option.dest in ("stdin", "stdout", "stdlog", "stderr", "loglevel"):
continue
# remove default from help string
option.help = re.sub("\[[^\]]*%default[^\]]*\]", "", option.help)
param = buildParam()
# get command line option call (long/short option)
try:
param['arg'] = option._short_opts[0]
except IndexError:
pass
try:
param['arg_long'] = option._long_opts[0]
except IndexError:
pass
assert 'arg' in param or 'arg_long' in param
# print "----------------------------------"
# print [(x,getattr(option,x)) for x in dir( option )]
param['name'] = option.dest
param['ns_name'] = option.dest
if option.type == "int":
param['type'] = "integer"
elif option.type == "float":
param['type'] = "float"
elif option.type == "string":
param['type'] = "text"
if option.metavar:
mvar = option.metavar.lower()
if mvar in MAP_TYPE2FORMAT:
param['format'] = MAP_TYPE2FORMAT[mvar]
param['type'] = "data"
if mvar == "bam":
use_wrapper = True
data['parameters'].append(buildParam(
name='wrapper_bam_file',
ns_name='wrapper_bam_file',
arg_long='--wrapper-bam-file',
label=option.dest,
type='data',
format='bam',
help=option.help,
value=getattr(defaults, option.dest)))
data['parameters'].append(buildParam(
name='wrapper_bam_index',
ns_name='wrapper_bam_index',
arg_long='--wrapper-bai-file',
type='data',
value='${wrapper_bam_file.metadata.bam_index}',
display='hidden'))
# use long argument
data['parameters'].append(buildParam(
name='wrapper_bam_option',
ns_name='wrapper_bam_option',
arg_long='--wrapper-bam-option',
value=param[
'arg_long'],
display='hidden'))
continue
elif option.type == "choice":
param['type'] = "select"
param['choices'] = option.choices
if option.action == "append":
param['multiple'] = True
elif option.action.startswith("store"):
param['type'] = "boolean"
else:
raise ValueError("unknown type for %s" % str(option))
param['label'] = option.dest
param['description'] = option.help
param['rank'] = 1
param['display'] = 'show'
param['min_occurrence'] = 0
param['max_occurrence'] = 1
# get default value
param['value'] = getattr(defaults, option.dest)
param['dependencies'] = {}
param['property_bag'] = {}
if option.dest == "genome_file":
param['property_bag'] = {'from_loc': 'path',
'loc_id': 'sam_fa',
'loc_id_filter': '1'}
# deal with multiple output files:
if option.dest == "output_filename_pattern":
use_wrapper = True
data['parameters'].append(buildParam(
name='wrapper_html_file',
ns_name='wrapper_html_file',
arg_long='--wrapper-html-file',
value='$html_file',
display='hidden'))
data['parameters'].append(buildParam(
name='wrapper_html_dir',
ns_name='wrapper_html_dir',
arg_long='--wrapper-html-dir',
value='$html_file.files_path',
display='hidden'))
outputs.append(buildParam(name='html_file',
ns_name='html_file',
format='html',
label='html'),
)
continue
data['parameters'].append(param)
if options.output_format == "rdf":
outfile.write(g.serialize(data, format='turtle') + "\n")
elif options.output_format == "galaxy":
if use_wrapper:
# add hidden option for wrapper
param = buildParam(
name='wrapper-command',
ns_name='wrapper-command',
display='hidden',
type='text',
value=data['binary'],
label='wrapper',
description='wrapper',
arg_long="--wrapper-command")
data['parameters'].append(param)
# point to wrapper
data['binary'] = os.path.join(dirname, "cgat_galaxy_wrapper.py")
displayMap = collections.defaultdict(list)
for param in data['parameters']:
displayMap[param['display']].append(param)
displayMap['normal'] = displayMap['show']
target = Template(
IOTools.openFile('/ifs/devel/andreas/cgat/scripts/cgat2rdf/galaxy.xml').read())
outfile.write(target.render(data=data,
displayMap=displayMap,
outputs=outputs) + "\n")
0
Example 82
Project: termite-data-server Source File: ttfonts.py
def extractInfo(self):
#################/
# name - Naming table
#################/
self.sFamilyClass = 0
self.sFamilySubClass = 0
name_offset = self.seek_table("name")
format = self.read_ushort()
if (format != 0):
die("Unknown name table format " + format)
numRecords = self.read_ushort()
string_data_offset = name_offset + self.read_ushort()
names = {1:'',2:'',3:'',4:'',6:''}
K = names.keys()
nameCount = len(names)
for i in range(numRecords):
platformId = self.read_ushort()
encodingId = self.read_ushort()
languageId = self.read_ushort()
nameId = self.read_ushort()
length = self.read_ushort()
offset = self.read_ushort()
if (nameId not in K): continue
N = ''
if (platformId == 3 and encodingId == 1 and languageId == 0x409): # Microsoft, Unicode, US English, PS Name
opos = self._pos
self.seek(string_data_offset + offset)
if (length % 2 != 0):
die("PostScript name is UTF-16BE string of odd length")
length /= 2
N = ''
while (length > 0):
char = self.read_ushort()
N += (chr(char))
length -= 1
self._pos = opos
self.seek(opos)
elif (platformId == 1 and encodingId == 0 and languageId == 0): # Macintosh, Roman, English, PS Name
opos = self._pos
N = self.get_chunk(string_data_offset + offset, length)
self._pos = opos
self.seek(opos)
if (N and names[nameId]==''):
names[nameId] = N
nameCount -= 1
if (nameCount==0): break
if (names[6]):
psName = names[6]
elif (names[4]):
psName = re.sub(' ','-',names[4])
elif (names[1]):
psName = re.sub(' ','-',names[1])
else:
psName = ''
if (not psName):
die("Could not find PostScript font name")
self.name = psName
if (names[1]):
self.familyName = names[1]
else:
self.familyName = psName
if (names[2]):
self.styleName = names[2]
else:
self.styleName = 'Regular'
if (names[4]):
self.fullName = names[4]
else:
self.fullName = psName
if (names[3]):
self.uniqueFontID = names[3]
else:
self.uniqueFontID = psName
if (names[6]):
self.fullName = names[6]
#################/
# head - Font header table
#################/
self.seek_table("head")
self.skip(18)
self.unitsPerEm = unitsPerEm = self.read_ushort()
scale = 1000 / float(unitsPerEm)
self.skip(16)
xMin = self.read_short()
yMin = self.read_short()
xMax = self.read_short()
yMax = self.read_short()
self.bbox = [(xMin*scale), (yMin*scale), (xMax*scale), (yMax*scale)]
self.skip(3*2)
indexToLocFormat = self.read_ushort()
glyphDataFormat = self.read_ushort()
if (glyphDataFormat != 0):
die('Unknown glyph data format ' + glyphDataFormat)
#################/
# hhea metrics table
#################/
# ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
if ("hhea" in self.tables):
self.seek_table("hhea")
self.skip(4)
hheaAscender = self.read_short()
hheaDescender = self.read_short()
self.ascent = (hheaAscender *scale)
self.descent = (hheaDescender *scale)
#################/
# OS/2 - OS/2 and Windows metrics table
#################/
if ("OS/2" in self.tables):
self.seek_table("OS/2")
version = self.read_ushort()
self.skip(2)
usWeightClass = self.read_ushort()
self.skip(2)
fsType = self.read_ushort()
if (fsType == 0x0002 or (fsType & 0x0300) != 0):
die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
self.restrictedUse = True
self.skip(20)
sF = self.read_short()
self.sFamilyClass = (sF >> 8)
self.sFamilySubClass = (sF & 0xFF)
self._pos += 10 #PANOSE = 10 byte length
panose = self.fh.read(10)
self.skip(26)
sTypoAscender = self.read_short()
sTypoDescender = self.read_short()
if (not self.ascent):
self.ascent = (sTypoAscender*scale)
if (not self.descent):
self.descent = (sTypoDescender*scale)
if (version > 1):
self.skip(16)
sCapHeight = self.read_short()
self.capHeight = (sCapHeight*scale)
else:
self.capHeight = self.ascent
else:
usWeightClass = 500
if (not self.ascent): self.ascent = (yMax*scale)
if (not self.descent): self.descent = (yMin*scale)
self.capHeight = self.ascent
self.stemV = 50 + int(pow((usWeightClass / 65.0),2))
#################/
# post - PostScript table
#################/
self.seek_table("post")
self.skip(4)
self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
self.underlinePosition = self.read_short() * scale
self.underlineThickness = self.read_short() * scale
isFixedPitch = self.read_ulong()
self.flags = 4
if (self.italicAngle!= 0):
self.flags = self.flags | 64
if (usWeightClass >= 600):
self.flags = self.flags | 262144
if (isFixedPitch):
self.flags = self.flags | 1
#################/
# hhea - Horizontal header table
#################/
self.seek_table("hhea")
self.skip(32)
metricDataFormat = self.read_ushort()
if (metricDataFormat != 0):
die('Unknown horizontal metric data format '.metricDataFormat)
numberOfHMetrics = self.read_ushort()
if (numberOfHMetrics == 0):
die('Number of horizontal metrics is 0')
#################/
# maxp - Maximum profile table
#################/
self.seek_table("maxp")
self.skip(4)
numGlyphs = self.read_ushort()
#################/
# cmap - Character to glyph index mapping table
#################/
cmap_offset = self.seek_table("cmap")
self.skip(2)
cmapTableCount = self.read_ushort()
unicode_cmap_offset = 0
unicode_cmap_offset12 = 0
for i in range(cmapTableCount):
platformID = self.read_ushort()
encodingID = self.read_ushort()
offset = self.read_ulong()
save_pos = self._pos
if platformID == 3 and encodingID == 10: # Microsoft, UCS-4
format = self.get_ushort(cmap_offset + offset)
if (format == 12):
if not unicode_cmap_offset12:
unicode_cmap_offset12 = cmap_offset + offset
break
if ((platformID == 3 and encodingID == 1) or platformID == 0): # Microsoft, Unicode
format = self.get_ushort(cmap_offset + offset)
if (format == 4):
if (not unicode_cmap_offset):
unicode_cmap_offset = cmap_offset + offset
break
self.seek(save_pos)
if not unicode_cmap_offset and not unicode_cmap_offset12:
die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')
glyphToChar = {}
charToGlyph = {}
if unicode_cmap_offset12:
self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
else:
self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)
#################/
# hmtx - Horizontal metrics table
#################/
self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)
0
Example 83
Project: cgstudiomap Source File: escpos.py
def receipt(self,xml):
"""
Prints an xml based receipt definition
"""
def strclean(string):
if not string:
string = ''
string = string.strip()
string = re.sub('\s+',' ',string)
return string
def format_value(value, decimals=3, width=0, decimals_separator='.', thousands_separator=',', autoint=False, symbol='', position='after'):
decimals = max(0,int(decimals))
width = max(0,int(width))
value = float(value)
if autoint and math.floor(value) == value:
decimals = 0
if width == 0:
width = ''
if thousands_separator:
formatstr = "{:"+str(width)+",."+str(decimals)+"f}"
else:
formatstr = "{:"+str(width)+"."+str(decimals)+"f}"
ret = formatstr.format(value)
ret = ret.replace(',','COMMA')
ret = ret.replace('.','DOT')
ret = ret.replace('COMMA',thousands_separator)
ret = ret.replace('DOT',decimals_separator)
if symbol:
if position == 'after':
ret = ret + symbol
else:
ret = symbol + ret
return ret
def print_elem(stylestack, serializer, elem, indent=0):
elem_styles = {
'h1': {'bold': 'on', 'size':'double'},
'h2': {'size':'double'},
'h3': {'bold': 'on', 'size':'double-height'},
'h4': {'size': 'double-height'},
'h5': {'bold': 'on'},
'em': {'font': 'b'},
'b': {'bold': 'on'},
}
stylestack.push()
if elem.tag in elem_styles:
stylestack.set(elem_styles[elem.tag])
stylestack.set(elem.attrib)
if elem.tag in ('p','div','section','article','receipt','header','footer','li','h1','h2','h3','h4','h5'):
serializer.start_block(stylestack)
serializer.text(elem.text)
for child in elem:
print_elem(stylestack,serializer,child)
serializer.start_inline(stylestack)
serializer.text(child.tail)
serializer.end_entity()
serializer.end_entity()
elif elem.tag in ('span','em','b','left','right'):
serializer.start_inline(stylestack)
serializer.text(elem.text)
for child in elem:
print_elem(stylestack,serializer,child)
serializer.start_inline(stylestack)
serializer.text(child.tail)
serializer.end_entity()
serializer.end_entity()
elif elem.tag == 'value':
serializer.start_inline(stylestack)
serializer.pre(format_value(
elem.text,
decimals=stylestack.get('value-decimals'),
width=stylestack.get('value-width'),
decimals_separator=stylestack.get('value-decimals-separator'),
thousands_separator=stylestack.get('value-thousands-separator'),
autoint=(stylestack.get('value-autoint') == 'on'),
symbol=stylestack.get('value-symbol'),
position=stylestack.get('value-symbol-position')
))
serializer.end_entity()
elif elem.tag == 'line':
width = stylestack.get('width')
if stylestack.get('size') in ('double', 'double-width'):
width = width / 2
lineserializer = XmlLineSerializer(stylestack.get('indent')+indent,stylestack.get('tabwidth'),width,stylestack.get('line-ratio'))
serializer.start_block(stylestack)
for child in elem:
if child.tag == 'left':
print_elem(stylestack,lineserializer,child,indent=indent)
elif child.tag == 'right':
lineserializer.start_right()
print_elem(stylestack,lineserializer,child,indent=indent)
serializer.pre(lineserializer.get_line())
serializer.end_entity()
elif elem.tag == 'ul':
serializer.start_block(stylestack)
bullet = stylestack.get('bullet')
for child in elem:
if child.tag == 'li':
serializer.style(stylestack)
serializer.raw(' ' * indent * stylestack.get('tabwidth') + bullet)
print_elem(stylestack,serializer,child,indent=indent+1)
serializer.end_entity()
elif elem.tag == 'ol':
cwidth = len(str(len(elem))) + 2
i = 1
serializer.start_block(stylestack)
for child in elem:
if child.tag == 'li':
serializer.style(stylestack)
serializer.raw(' ' * indent * stylestack.get('tabwidth') + ' ' + (str(i)+')').ljust(cwidth))
i = i + 1
print_elem(stylestack,serializer,child,indent=indent+1)
serializer.end_entity()
elif elem.tag == 'pre':
serializer.start_block(stylestack)
serializer.pre(elem.text)
serializer.end_entity()
elif elem.tag == 'hr':
width = stylestack.get('width')
if stylestack.get('size') in ('double', 'double-width'):
width = width / 2
serializer.start_block(stylestack)
serializer.text('-'*width)
serializer.end_entity()
elif elem.tag == 'br':
serializer.linebreak()
elif elem.tag == 'img':
if 'src' in elem.attrib and 'data:' in elem.attrib['src']:
self.print_base64_image(elem.attrib['src'])
elif elem.tag == 'barcode' and 'encoding' in elem.attrib:
serializer.start_block(stylestack)
self.barcode(strclean(elem.text),elem.attrib['encoding'])
serializer.end_entity()
elif elem.tag == 'cut':
self.cut()
elif elem.tag == 'partialcut':
self.cut(mode='part')
elif elem.tag == 'cashdraw':
self.cashdraw(2)
self.cashdraw(5)
stylestack.pop()
try:
stylestack = StyleStack()
serializer = XmlSerializer(self)
root = ET.fromstring(xml.encode('utf-8'))
self._raw(stylestack.to_escpos())
print_elem(stylestack,serializer,root)
if 'open-cashdrawer' in root.attrib and root.attrib['open-cashdrawer'] == 'true':
self.cashdraw(2)
self.cashdraw(5)
if not 'cut' in root.attrib or root.attrib['cut'] == 'true' :
self.cut()
except Exception as e:
errmsg = str(e)+'\n'+'-'*48+'\n'+traceback.format_exc() + '-'*48+'\n'
self.text(errmsg)
self.cut()
raise e
0
Example 84
Project: SickRage Source File: parser.py
def _parse_string(self, name): # pylint: disable=too-many-locals, too-many-branches, too-many-statements
if not name:
return
matches = []
bestResult = None
for (cur_regex_num, cur_regex_name, cur_regex) in self.compiled_regexes:
match = cur_regex.match(name)
if not match:
continue
result = ParseResult(name)
result.which_regex = [cur_regex_name]
result.score = 0 - cur_regex_num
named_groups = match.groupdict().keys()
if 'series_name' in named_groups:
result.series_name = match.group('series_name')
if result.series_name:
result.series_name = self.clean_series_name(result.series_name)
result.score += 1
if 'series_num' in named_groups and match.group('series_num'):
result.score += 1
if 'season_num' in named_groups:
tmp_season = int(match.group('season_num'))
if cur_regex_name == 'bare' and tmp_season in (19, 20):
continue
result.season_number = tmp_season
result.score += 1
if 'ep_num' in named_groups:
ep_num = self._convert_number(match.group('ep_num'))
if 'extra_ep_num' in named_groups and match.group('extra_ep_num'):
result.episode_numbers = range(ep_num, self._convert_number(match.group('extra_ep_num')) + 1)
result.score += 1
else:
result.episode_numbers = [ep_num]
result.score += 3
if 'ep_ab_num' in named_groups:
ep_ab_num = self._convert_number(match.group('ep_ab_num'))
if 'extra_ab_ep_num' in named_groups and match.group('extra_ab_ep_num'):
result.ab_episode_numbers = range(ep_ab_num,
self._convert_number(match.group('extra_ab_ep_num')) + 1)
result.score += 1
else:
result.ab_episode_numbers = [ep_ab_num]
result.score += 1
if 'air_date' in named_groups:
air_date = match.group('air_date')
try:
assert re.sub(r'[^\d]*', '', air_date) != '112263'
result.air_date = dateutil.parser.parse(air_date, fuzzy_with_tokens=True)[0].date()
result.score += 1
except Exception:
continue
if 'extra_info' in named_groups:
tmp_extra_info = match.group('extra_info')
# Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season
if tmp_extra_info and cur_regex_name == 'season_only' and re.search(
r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I):
continue
result.extra_info = tmp_extra_info
result.score += 1
if 'release_group' in named_groups:
result.release_group = match.group('release_group')
result.score += 1
if 'version' in named_groups:
# assigns version to anime file if detected using anime regex. Non-anime regex receives -1
version = match.group('version')
if version:
result.version = version
else:
result.version = 1
else:
result.version = -1
matches.append(result)
if matches:
# pick best match with highest score based on placement
bestResult = max(sorted(matches, reverse=True, key=lambda x: x.which_regex), key=lambda x: x.score)
show = None
if not self.naming_pattern:
# try and create a show object for this result
show = helpers.get_show(bestResult.series_name, self.tryIndexers)
# confirm passed in show object indexer id matches result show object indexer id
if show:
if self.showObj and show.indexerid != self.showObj.indexerid:
show = None
bestResult.show = show
elif not show and self.showObj:
bestResult.show = self.showObj
# if this is a naming pattern test or result doesn't have a show object then return best result
if not bestResult.show or self.naming_pattern:
return bestResult
# get quality
bestResult.quality = common.Quality.nameQuality(name, bestResult.show.is_anime)
new_episode_numbers = []
new_season_numbers = []
new_absolute_numbers = []
# if we have an air-by-date show then get the real season/episode numbers
if bestResult.is_air_by_date:
airdate = bestResult.air_date.toordinal()
main_db_con = db.DBConnection()
sql_result = main_db_con.select(
"SELECT season, episode FROM tv_episodes WHERE showid = ? and indexer = ? and airdate = ?",
[bestResult.show.indexerid, bestResult.show.indexer, airdate])
season_number = None
episode_numbers = []
if sql_result:
season_number = int(sql_result[0][0])
episode_numbers = [int(sql_result[0][1])]
if season_number is None or not episode_numbers:
try:
lINDEXER_API_PARMS = sickbeard.indexerApi(bestResult.show.indexer).api_params.copy()
lINDEXER_API_PARMS['language'] = bestResult.show.lang or sickbeard.INDEXER_DEFAULT_LANGUAGE
t = sickbeard.indexerApi(bestResult.show.indexer).indexer(**lINDEXER_API_PARMS)
epObj = t[bestResult.show.indexerid].airedOn(bestResult.air_date)[0]
season_number = int(epObj["seasonnumber"])
episode_numbers = [int(epObj["episodenumber"])]
except sickbeard.indexer_episodenotfound:
logger.log(u"Unable to find episode with date " + str(bestResult.air_date) + " for show " + bestResult.show.name + ", skipping", logger.WARNING)
episode_numbers = []
except sickbeard.indexer_error as e:
logger.log(u"Unable to contact " + sickbeard.indexerApi(bestResult.show.indexer).name + ": " + ex(e), logger.WARNING)
episode_numbers = []
for epNo in episode_numbers:
s = season_number
e = epNo
if bestResult.show.is_scene:
(s, e) = scene_numbering.get_indexer_numbering(bestResult.show.indexerid,
bestResult.show.indexer,
season_number,
epNo)
new_episode_numbers.append(e)
new_season_numbers.append(s)
elif bestResult.show.is_anime and bestResult.ab_episode_numbers:
scene_season = scene_exceptions.get_scene_exception_by_name(bestResult.series_name)[1]
for epAbsNo in bestResult.ab_episode_numbers:
a = epAbsNo
if bestResult.show.is_scene:
a = scene_numbering.get_indexer_absolute_numbering(bestResult.show.indexerid,
bestResult.show.indexer, epAbsNo,
True, scene_season)
(s, e) = helpers.get_all_episodes_from_absolute_number(bestResult.show, [a])
new_absolute_numbers.append(a)
new_episode_numbers.extend(e)
new_season_numbers.append(s)
elif bestResult.season_number and bestResult.episode_numbers:
for epNo in bestResult.episode_numbers:
s = bestResult.season_number
e = epNo
if bestResult.show.is_scene:
(s, e) = scene_numbering.get_indexer_numbering(bestResult.show.indexerid,
bestResult.show.indexer,
bestResult.season_number,
epNo)
if bestResult.show.is_anime:
a = helpers.get_absolute_number_from_season_and_episode(bestResult.show, s, e)
if a:
new_absolute_numbers.append(a)
new_episode_numbers.append(e)
new_season_numbers.append(s)
# need to do a quick sanity check heregex. It's possible that we now have episodes
# from more than one season (by tvdb numbering), and this is just too much
# for sickbeard, so we'd need to flag it.
new_season_numbers = list(set(new_season_numbers)) # remove duplicates
if len(new_season_numbers) > 1:
raise InvalidNameException("Scene numbering results episodes from "
"seasons %s, (i.e. more than one) and "
"sickrage does not support this. "
"Sorry." % (str(new_season_numbers)))
# I guess it's possible that we'd have duplicate episodes too, so lets
# eliminate them
new_episode_numbers = list(set(new_episode_numbers))
new_episode_numbers.sort()
# maybe even duplicate absolute numbers so why not do them as well
new_absolute_numbers = list(set(new_absolute_numbers))
new_absolute_numbers.sort()
if new_absolute_numbers:
bestResult.ab_episode_numbers = new_absolute_numbers
if new_season_numbers and new_episode_numbers:
bestResult.episode_numbers = new_episode_numbers
bestResult.season_number = new_season_numbers[0]
if bestResult.show.is_scene:
logger.log(
u"Converted parsed result " + bestResult.original_name + " into " + str(bestResult).decode('utf-8',
'xmlcharrefreplace'),
logger.DEBUG)
# CPU sleep
time.sleep(0.02)
return bestResult
0
Example 85
def removeStopWords (texts):
# Split up the words...
texts_split = texts.split(" ")
preps = [
u'\u0641\u064a', #fy
u'\u0641\u064a\u0647', #fyh
u'\u0641\u064a\u0647\u0627', #fyha
u'\u0641\u064a\u0647\u0645', #fyhm
u'\u0639\u0644\u0649', #3lA
u'\u0639\u0644\u064a\u0643', #3lyk
u'\u0639\u0644\u064a\u0647', #3lyh
u'\u0639\u0644\u064a\u0647\u0627', #3lyha
u'\u0639\u0644\u064a\u0647\u0645', #3lyhm
u'\u0639\u0644\u064a', #3ly
u'\u0628\u0647', #bh
u'\u0628\u0647\u0627', #bha
u'\u0628\u0647\u0645', #bhm
u'\u0644\u0647', #lh
u'\u0644\u0647\u0627', #lha
u'\u0644\u0647\u0645', #lhm
u'\u0645\u0639', #m3
u'\u0645\u0639\u0647', #m3h
u'\u0645\u0639\u0647\u0627', #m3ha
u'\u0645\u0639\u0647\u0645', #m3hm
u'\u0639\u0646', #3n
u'\u0639\u0646\u0647', #3nh
u'\u0639\u0646\u0647\u0627', #3nha
u'\u0639\u0646\u0647\u0645', #3nhm
u'\u062a\u062d\u062a', #t7t
u'\u062d\u062a\u0649', #7tA
u'\u0641\u0648\u0642', #fwQ
u'\u0641\u0648\u0642\u064e', #fwQ?
u'\u0628\u062c\u0627\u0646\u0628', #bjanb
u'\u0623\u0645\u0627\u0645', #amam
u'\u0623\u0645\u0627\u0645\u064e', #amam?
u'\u0627\u0645\u0627\u0645', #amam
u'\u062e\u0627\u0631\u062c', #Karj
u'\u0628\u0627\u0644\u062e\u0627\u0631\u062c', #balKarj
u'\u062d\u0648\u0644\u064e', #7wl?
u'\u062d\u0648\u0644', #7wl
u'\u0631\u063a\u0645', #rGm
u'\u0628\u0627\u0644\u0631\u063a\u0645', #balrGm
u'\u0631\u063a\u0645\u064e', #rGm?
u'\u0645\u0646\u0630', #mni
u'\u0645\u0646\u0630\u064f', #mni?
u'\u0645\u0646', #mn
u'\u062e\u0644\u0627\u0644', #Klal
u'\u062e\u0644\u0627\u0644\u064e', #Klal?
u'\u062d\u0648\u0644', #7wl
u'\u062d\u0648\u0644\u064e', #7wl?
u'\u0642\u0628\u0644', #Qbl
u'\u0642\u0628\u0644\u064e', #Qbl?
u'\u0648\u0641\u0642\u0627', #wfQa
u'\u0625\u0644\u0649', #alA
u'\u0627\u0644\u0649\u0648\u0631\u0627\u0621\u064e', #alAwraq?
u'\u0648\u0631\u0627\u0621', #wraq
u'\u0628\u064a\u0646\u064e', #byn?
u'\u0628\u064a\u0646', #byn
u'\u0628\u062f\u0648\u0646', #bdwn
u'\u0644\u0643\u0646', #lkn
u'\u0628\u0627\u062a\u062c\u0627\u0647', #batjah
u'\u0623\u0642\u0644', #aQl
u'\u0627\u0642\u0644', #aQl
u'\u0627\u0643\u062b\u0631' #akUr
]
pronouns = [
u'\u0647\u0630\u0627', #hia
u'\u0647\u0630\u0647', #hih
u'\u0630\u0644\u0643', #ilk
u'\u062a\u0644\u0643', #tlk
u'\u0647\u0624\u0644\u064e\u0627\u0621', #hol?aq
u'\u0647\u0624\u0644\u0627\u0621', #holaq
u'\u0627\u0648\u0644\u0627\u0626\u0643', #awla5k
u'\u0647\u0630\u0627\u0646', #hian
u'\u0647\u0630\u064a\u0646\u0647\u062a\u0627\u0646', #hiynhtan
u'\u0647\u062a\u064a\u0646\u0623\u0646\u0627', #htynana
u'\u0627\u0646\u0627', #ana
u'\u0623\u0646\u062a', #ant
u'\u0647\u0645\u0627', #hma
u'\u0623\u0646\u062a\u064e', #ant?
u'\u0627\u0646\u062a', #ant
u'\u0623\u0646\u062a', #ant
u'\u0623\u0646\u062a\u0650', #ant?
u'\u0627\u0646\u062a\u0647\u0648', #anthw
u'\u0647\u0648\u064e', #hw?
u'\u0647\u0648', #hw
u'\u0647\u064a', #hy
u'\u0647\u064a\u064e', #hy?
u'\u0646\u062d\u0646', #n7n
u'\u0623\u0646\u062a\u0645', #antm
u'\u0627\u0646\u062a\u0645', #antm
u'\u0623\u0646\u062a\u0645', #antm
u'\u0627\u0646\u062a\u0645', #antm
u'\u0647\u064f\u0645', #h?m
u'\u0647\u0645', #hm
u'\u0644\u0647\u0645', #lhm
u'\u0645\u0646\u0647\u0645', #mnhm
u'\u0648\u0647\u0645', #whm
u'\u0627\u0644\u062a\u064a', #alty
u'\u0627\u0644\u0630\u064a', #aliy
u'\u0627\u0644\u0644\u0630\u0627\u0646', #allian
u'\u0627\u0644\u0644\u0630\u064a\u0646', #alliyn
u'\u0627\u0644\u0644\u062a\u0627\u0646', #alltan
u'\u0627\u0644\u0644\u062a\u064a\u0646' #alltyn
]
particles = [
u'\u0627\u0646', #an
u'\u0648\u0627\u0646', #wan
u'\u0625\u0646', #an
u'\u0625\u0646\u0647', #anh
u'\u0625\u0646\u0647\u0627', #anha
u'\u0625\u0646\u0647\u0645', #anhm
u'\u0625\u0646\u0647\u0645\u0627', #anhma
u'\u0625\u0646\u064a', #any
u'\u0648\u0625\u0646', #wan
u'\u0648\u0623\u0646', #wan
u'\u0627\u0646', #an
u'\u0627\u0646\u0647', #anh
u'\u0627\u0646\u0647\u0627', #anha
u'\u0627\u0646\u0647\u0645', #anhm
u'\u0627\u0646\u0647\u0645\u0627', #anhma
u'\u0627\u0646\u064a', #any
u'\u0648\u0627\u0646', #wan
u'\u0648\u0627\u0646', #wan
u'\u0623\u0646', #an
u'\u0627\u0646', #an
u'\u0623\u0644\u0627', #ala
u'\u0628\u0623\u0646', #ban
u'\u0627\u0646', #an
u'\u0627\u0644\u0627', #ala
u'\u0628\u0627\u0646', #ban
u'\u0623\u0646\u0647', #anh
u'\u0623\u0646\u0647\u0627', #anha
u'\u0623\u0646\u0647\u0645', #anhm
u'\u0623\u0646\u0647\u0645\u0627', #anhma
u'\u0627\u0646\u0647', #anh
u'\u0627\u0646\u0647\u0627', #anha
u'\u0627\u0646\u0647\u0645', #anhm
u'\u0627\u0646\u0647\u0645\u0627', #anhma
u'\u0623\u0630', #ai
u'\u0627\u0630', #ai
u'\u0627\u0630\u0627', #aia
u'\u0625\u0630', #ai
u'\u0625\u0630\u0627', #aia
u'\u0648\u0625\u0630', #wai
u'\u0648\u0625\u0630\u0627', #waia
u'\u0627\u0630', #ai
u'\u0627\u0630', #ai
u'\u0627\u0630\u0627', #aia
u'\u0627\u0630', #ai
u'\u0627\u0630\u0627', #aia
u'\u0641\u0627\u0630\u0627', #faia
u'\u0645\u0627\u0630\u0627', #maia
u'\u0648\u0627\u0630', #wai
u'\u0648\u0627\u0630\u0627', #waia
u'\u0644\u0648\u0644\u0627', #lwla
u'\u0644\u0648', #lw
u'\u0648\u0644\u0648\u0633\u0648\u0641', #wlwswf
u'\u0644\u0646', #ln
u'\u0645\u0627', #ma
u'\u0644\u0645', #lm
u'\u0648\u0644\u0645', #wlm
u'\u0623\u0645\u0627', #ama
u'\u0627\u0645\u0627', #ama
u'\u0644\u0627', #la
u'\u0625\u0644\u0627', #ala
u'\u0627\u0644\u0627', #ala
u'\u0623\u0645', #am
u'\u0623\u0648', #aw
u'\u0627\u0645', #am
u'\u0627\u0648', #aw
u'\u0628\u0644', #bl
u'\u0623\u0646\u0645\u0627', #anma
u'\u0625\u0646\u0645\u0627', #anma
u'\u0628\u0644', #bl
u'\u0627\u0646\u0645\u0627', #anma
u'\u0627\u0646\u0645\u0627', #anma
u'\u0648' #w
]
# Connectors
connectors = [u'\u0628\u0645\u0627', #bma
u'\u0643\u0645\u0627', #kma
u'\u0644\u0645\u0627', #lma
u'\u0644\u0623\u0646', #lan
u'\u0644\u064a', #ly
u'\u0644\u0649', #ly
u'\u0644\u0623\u0646\u0647', #lanh
u'\u0644\u0623\u0646\u0647\u0627', #lanha
u'\u0644\u0623\u0646\u0647\u0645', #lanhm
u'\u0644\u0627\u0646', #lan
u'\u0644\u0627\u0646\u0647', #lanh
u'\u0644\u0627\u0646\u0647\u0627', #lanha
u'\u0644\u0627\u0646\u0647\u0645', #lanhm
u'\u062b\u0645', #Um
u'\u0623\u064a\u0636\u0627', #ayDa
u'\u0627\u064a\u0636\u0627', #ayDa
u'\u0643\u0630\u0644\u0643', #kilk
u'\u0642\u0628\u0644', #Qbl
u'\u0628\u0639\u062f', #b3d
u'\u0644\u0643\u0646', #lkn
u'\u0648\u0644\u0643\u0646', #wlkn
u'\u0644\u0643\u0646\u0647', #lknh
u'\u0644\u0643\u0646\u0647\u0627', #lknha
u'\u0644\u0643\u0646\u0647\u0645', #lknhm
u'\u0641\u0642\u0637', #fQT
u'\u0631\u063a\u0645', #rGm
u'\u0628\u0627\u0644\u0631\u063a\u0645', #balrGm
u'\u0628\u0641\u0636\u0644', #bfDl
u'\u062d\u064a\u062b', #7yU
u'\u0628\u062d\u064a\u062b', #b7yU
u'\u0644\u0643\u064a', #lky
u'\u0647\u0646\u0627', #hna
u'\u0647\u0646\u0627\u0643', #hnak
u'\u0628\u0633\u0628\u0628', #bsbb
u'\u0630\u0627\u062a', #iat
u'\u0630\u0648', #iw
u'\u0630\u064a', #iy
u'\u0630\u0649', #iy
u'\u0648\u0647', #wh
u'\u064a\u0627', #ya
u'\u0627\u0646\u0645\u0627', #anma
u'\u0641\u0647\u0630\u0627', #fhia
u'\u0641\u0647\u0648', #fhw
u'\u0641\u0645\u0627', #fma
u'\u0641\u0645\u0646', #fmn
u'\u0641\u064a\u0645\u0627', #fyma
u'\u0641\u0647\u0644', #fhl
u'\u0648\u0647\u0644', #fhl
u'\u0641\u0647\u0624\u0644\u0627\u0621', #fholaq
u'\u0643\u0630\u0627', #kia
u'\u0644\u0630\u0644\u0643', #lilk
u'\u0644\u0645\u0627\u0630\u0627', #lmaia
u'\u0644\u0645\u0646', #lmn
u'\u0644\u0646\u0627', #lna
u'\u0645\u0646\u0627', #mna
u'\u0645\u0646\u0643', #mnk
u'\u0645\u0646\u0643\u0645', #mnkm
u'\u0645\u0646\u0647\u0645\u0627', #mnhma
u'\u0644\u0643', #lk
u'\u0648\u0644\u0648', #wlw
u'\u0645\u0645\u0627', #mma
u'\u0639\u0646\u062f', #3nd
u'\u0639\u0646\u062f\u0647\u0645', #3ndhm
u'\u0639\u0646\u062f\u0645\u0627', #3ndma
u'\u0639\u0646\u062f\u0646\u0627', #3ndna
u'\u0639\u0646\u0647\u0645\u0627', #3nhma
u'\u0639\u0646\u0643', #3nk
u'\u0627\u0630\u0646', #ain
u'\u0627\u0644\u0630\u064a', #aliy
u'\u0641\u0627\u0646\u0627', #fana
u'\u0641\u0627\u0646\u0647\u0645', #fanhm
u'\u0641\u0647\u0645', #fhm
u'\u0641\u0647', #fh
u'\u0641\u0643\u0644', #fkl
u'\u0644\u0643\u0644', #lkl
u'\u0644\u0643\u0645', #lkm
u'\u0641\u0644\u0645', #flm
u'\u0641\u0644\u0645\u0627', #flma
u'\u0641\u064a\u0643', #fyk
u'\u0641\u064a\u0643\u0645', #fykm
u'\u0644\u0647\u0630\u0627' # lhia
]
all = preps+pronouns+particles+connectors
all = ' '.join(all)
all = all+ ' ' + fixAlifs(all)
all = list(set(all.split(' ')))
for i in range(len(texts_split)):
word = texts_split[i]
if word in all :
texts_split[i] = ''
# Rejoining the texts again...
texts = ''.join([word + " " for word in texts_split])
# split to get rid of white space
#texts_split = new_texts.split()
# then re-rejoin them.
#out_texts = ''.join([word + " " for word in texts_split])
## clean up spaces
return(re.sub(' {2,}', ' ', texts))
0
Example 86
def parse(self, text):
# Basically, r_tag.split will split the text into
# an array containing, 'non-tag', 'tag', 'non-tag', 'tag'
# so if we alternate this variable, we know
# what to look for. This is alternate to
# line.startswith("{{")
in_tag = False
extend = None
pre_extend = True
# Use a list to store everything in
# This is because later the code will "look ahead"
# for missing strings or brackets.
ij = self.r_tag.split(text)
# j = current index
# i = current item
stack = self.stack
for j in range(len(ij)):
i = ij[j]
if i:
if not stack:
self._raise_error('The "end" tag is unmatched, please check if you have a starting "block" tag')
# Our current element in the stack.
top = stack[-1]
if in_tag:
line = i
# Get rid of delimiters
line = line[len(self.delimiters[0]):-len(self.delimiters[1])].strip()
# This is bad juju, but let's do it anyway
if not line:
continue
# We do not want to replace the newlines in code,
# only in block comments.
def remove_newline(re_val):
# Take the entire match and replace newlines with
# escaped newlines.
return re_val.group(0).replace('\n', '\\n')
# Perform block comment escaping.
# This performs escaping ON anything
# in between """ and """
line = sub(TemplateParser.r_multiline,
remove_newline,
line)
if line.startswith('='):
# IE: {{=response.title}}
name, value = '=', line[1:].strip()
else:
v = line.split(' ', 1)
if len(v) == 1:
# Example
# {{ include }}
# {{ end }}
name = v[0]
value = ''
else:
# Example
# {{ block pie }}
# {{ include "layout.html" }}
# {{ for i in range(10): }}
name = v[0]
value = v[1]
# This will replace newlines in block comments
# with the newline character. This is so that they
# retain their formatting, but squish down to one
# line in the rendered template.
# First check if we have any custom lexers
if name in self.lexers:
# Pass the information to the lexer
# and allow it to inject in the environment
# You can define custom names such as
# '{{<<variable}}' which could potentially
# write unescaped version of the variable.
self.lexers[name](parser=self,
value=value,
top=top,
stack=stack)
elif name == '=':
# So we have a variable to insert into
# the template
buf = "\n%s(%s)" % (self.writer, value)
top.append(Node(buf, pre_extend=pre_extend))
elif name == 'block' and not value.startswith('='):
# Make a new node with name.
node = BlockNode(name=value.strip(),
pre_extend=pre_extend,
delimiters=self.delimiters)
# Append this node to our active node
top.append(node)
# Make sure to add the node to the stack.
# so anything after this gets added
# to this node. This allows us to
# "nest" nodes.
stack.append(node)
elif name == 'end' and not value.startswith('='):
# We are done with this node.
# Save an instance of it
self.blocks[top.name] = top
# Pop it.
stack.pop()
elif name == 'super' and not value.startswith('='):
# Get our correct target name
# If they just called {{super}} without a name
# attempt to assume the top blocks name.
if value:
target_node = value
else:
target_node = top.name
# Create a SuperNode instance
node = SuperNode(name=target_node,
pre_extend=pre_extend)
# Add this to our list to be taken care of
self.super_nodes.append(node)
# And put in in the tree
top.append(node)
elif name == 'include' and not value.startswith('='):
# If we know the target file to include
if value:
self.include(top, value)
# Otherwise, make a temporary include node
# That the child node will know to hook into.
else:
include_node = BlockNode(
name='__include__' + self.name,
pre_extend=pre_extend,
delimiters=self.delimiters)
top.append(include_node)
elif name == 'extend' and not value.startswith('='):
# We need to extend the following
# template.
extend = value
pre_extend = False
else:
# If we don't know where it belongs
# we just add it anyways without formatting.
if line and in_tag:
# Split on the newlines >.<
tokens = line.split('\n')
# We need to look for any instances of
# for i in range(10):
# = i
# pass
# So we can properly put a response.write() in place.
continuation = False
len_parsed = 0
for k, token in enumerate(tokens):
token = tokens[k] = token.strip()
len_parsed += len(token)
if token.startswith('='):
if token.endswith('\\'):
continuation = True
tokens[k] = "\n%s(%s" % (
self.writer, token[1:].strip())
else:
tokens[k] = "\n%s(%s)" % (
self.writer, token[1:].strip())
elif continuation:
tokens[k] += ')'
continuation = False
buf = "\n%s" % '\n'.join(tokens)
top.append(Node(buf, pre_extend=pre_extend))
else:
# It is HTML so just include it.
buf = "\n%s(%r, escape=False)" % (self.writer, i)
top.append(Node(buf, pre_extend=pre_extend))
# Remember: tag, not tag, tag, not tag
in_tag = not in_tag
# Make a list of items to remove from child
to_rm = []
# Go through each of the children nodes
for node in self.child_super_nodes:
# If we declared a block that this node wants to include
if node.name in self.blocks:
# Go ahead and include it!
node.value = self.blocks[node.name]
# Since we processed this child, we don't need to
# pass it along to the parent
to_rm.append(node)
# Remove some of the processed nodes
for node in to_rm:
# Since this is a pointer, it works beautifully.
# Sometimes I miss C-Style pointers... I want my asterisk...
self.child_super_nodes.remove(node)
# If we need to extend a template.
if extend:
self.extend(extend)
0
Example 87
Project: hue Source File: create_table.py
def import_wizard(request, database='default'):
"""
Help users define table and based on a file they want to import to Hive.
Limitations:
- Rows are delimited (no serde).
- No detection for map and array types.
- No detection for the presence of column header in the first row.
- No partition table.
- Does not work with binary data.
"""
encoding = i18n.get_site_encoding()
app_name = get_app_name(request)
db = dbms.get(request.user)
dbs = db.get_databases()
databases = [{'name':db, 'url':reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs]
if request.method == 'POST':
#
# General processing logic:
# - We have 3 steps. Each requires the previous.
# * Step 1 : Table name and file location
# * Step 2a : Display sample with auto chosen delim
# * Step 2b : Display sample with user chosen delim (if user chooses one)
# * Step 3 : Display sample, and define columns
# - Each step is represented by a different form. The form of an earlier step
# should be present when submitting to a later step.
# - To preserve the data from the earlier steps, we send the forms back as
# hidden fields. This way, when users revisit a previous step, the data would
# be there as well.
#
delim_is_auto = False
fields_list, n_cols = [[]], 0
s3_col_formset = None
s1_file_form = CreateByImportFileForm(request.POST, db=db)
if s1_file_form.is_valid():
do_s2_auto_delim = request.POST.get('submit_file') # Step 1 -> 2
do_s2_user_delim = request.POST.get('submit_preview') # Step 2 -> 2
do_s3_column_def = request.POST.get('submit_delim') # Step 2 -> 3
do_hive_create = request.POST.get('submit_create') # Step 3 -> execute
cancel_s2_user_delim = request.POST.get('cancel_delim') # Step 2 -> 1
cancel_s3_column_def = request.POST.get('cancel_create') # Step 3 -> 2
# Exactly one of these should be True
if len(filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) != 1:
raise PopupException(_('Invalid form submission'))
if not do_s2_auto_delim:
# We should have a valid delim form
s2_delim_form = CreateByImportDelimForm(request.POST)
if not s2_delim_form.is_valid():
# Go back to picking delimiter
do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
if do_hive_create:
# We should have a valid columns formset
s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
if not s3_col_formset.is_valid():
# Go back to define columns
do_s3_column_def, do_hive_create = True, False
load_data = s1_file_form.cleaned_data.get('load_data', 'IMPORT').upper()
path = s1_file_form.cleaned_data['path']
#
# Go to step 2: We've just picked the file. Preview it.
#
if do_s2_auto_delim:
try:
if load_data == 'IMPORT':
if not request.fs.isfile(path):
raise PopupException(_('Path location must refer to a file if "Import Data" is selected.'))
elif load_data == 'EXTERNAL':
if not request.fs.isdir(path):
raise PopupException(_('Path location must refer to a directory if "Create External Table" is selected.'))
except (IOError, S3FileSystemException), e:
raise PopupException(_('Path location "%s" is invalid: %s') % (path, e))
delim_is_auto = True
fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)
if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
# Delimit based on input
fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'],),
(s2_delim_form.cleaned_data['delimiter'],))
if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
return render('import_wizard_choose_delimiter.mako', request, {
'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
'initial': delim_is_auto,
'file_form': s1_file_form,
'delim_form': s2_delim_form,
'fields_list': fields_list,
'delimiter_choices': TERMINATOR_CHOICES,
'n_cols': n_cols,
'database': database,
'databases': databases
})
#
# Go to step 3: Define column.
#
if do_s3_column_def:
if s3_col_formset is None:
columns = []
for i in range(n_cols):
columns.append({
'column_name': 'col_%s' % (i,),
'column_type': 'string',
})
s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
try:
fields_list_for_json = list(fields_list)
if fields_list_for_json:
fields_list_for_json[0] = map(lambda a: re.sub('[^\w]', '', a), fields_list_for_json[0]) # Cleaning headers
return render('import_wizard_define_columns.mako', request, {
'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
'file_form': s1_file_form,
'delim_form': s2_delim_form,
'column_formset': s3_col_formset,
'fields_list': fields_list,
'fields_list_json': json.dumps(fields_list_for_json),
'n_cols': n_cols,
'database': database,
'databases': databases
})
except Exception, e:
raise PopupException(_("The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e)
#
# Final: Execute
#
if do_hive_create:
delim = s2_delim_form.cleaned_data['delimiter']
table_name = s1_file_form.cleaned_data['name']
proposed_query = django_mako.render_to_string("create_table_statement.mako", {
'table': {
'name': table_name,
'comment': s1_file_form.cleaned_data['comment'],
'row_format': 'Delimited',
'field_terminator': delim,
'file_format': 'TextFile',
'load_data': load_data,
'path': path,
'skip_header': request.REQUEST.get('removeHeader', 'off').lower() == 'on'
},
'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
'partition_columns': [],
'database': database,
'databases': databases
}
)
try:
return _submit_create_and_load(request, proposed_query, table_name, path, load_data, database=database)
except QueryServerException, e:
raise PopupException(_('The table could not be created.'), detail=e.message)
else:
s1_file_form = CreateByImportFileForm()
return render('import_wizard_choose_file.mako', request, {
'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
'file_form': s1_file_form,
'database': database,
'databases': databases
})
0
Example 88
Project: deep_recommend_system Source File: parsing_ops.py
def _parse_single_sequence_example_raw(serialized,
context_sparse_keys=None,
context_sparse_types=None,
context_dense_keys=None,
context_dense_types=None,
context_dense_defaults=None,
context_dense_shapes=None,
feature_list_sparse_keys=None,
feature_list_sparse_types=None,
feature_list_dense_keys=None,
feature_list_dense_types=None,
feature_list_dense_shapes=None,
feature_list_dense_defaults=None,
debug_name=None,
name=None):
"""Parses a single `SequenceExample` proto.
Args:
serialized: A scalar (0-D Tensor) of type string, a single binary
serialized `SequenceExample` proto.
context_sparse_keys: A list of string keys in the `SequenceExample`'s
features. The results for these keys will be returned as
`SparseTensor` objects.
context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
and `tf.string` (`BytesList`) are supported.
context_dense_keys: A list of string keys in the examples' features.
The results for these keys will be returned as `Tensor`s
context_dense_types: A list of DTypes, same length as `context_dense_keys`.
Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
and `tf.string` (`BytesList`) are supported.
context_dense_defaults: A dict mapping string keys to `Tensor`s.
The keys of the dict must match the context_dense_keys of the feature.
context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
The shape of the data for each context_dense feature referenced by
`context_dense_keys`. Required for any input tensors identified by
`context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
feature_lists. The results for these keys will be returned as
`SparseTensor` objects.
feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
and `tf.string` (`BytesList`) are supported.
feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
features_lists. The results for these keys will be returned as `Tensor`s.
feature_list_dense_types: A list of `DTypes`, same length as
`feature_list_dense_keys`. Only `tf.float32` (`FloatList`),
`tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
feature_list_dense_shapes: A list of tuples, same length as
`feature_list_dense_keys`. The shape of the data for each
`FeatureList` feature referenced by `feature_list_dense_keys`.
feature_list_dense_defaults: A dict mapping key strings to values.
The only currently allowed value is `None`. Any key appearing
in this dict with value `None` is allowed to be missing from the
`SequenceExample`. If missing, the key is treated as zero-length.
debug_name: A scalar (0-D Tensor) of strings (optional), the name of
the serialized proto.
name: A name for this operation (optional).
Returns:
A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
The first dict contains the context key/values.
The second dict contains the feature_list key/values.
Raises:
ValueError: If context_sparse and context_dense key sets intersect,
if input lengths do not match up, or if a value in
feature_list_dense_defaults is not None.
TypeError: if feature_list_dense_defaults is not either None or a dict.
"""
with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
context_dense_defaults = (
{} if context_dense_defaults is None else context_dense_defaults)
context_sparse_keys = (
[] if context_sparse_keys is None else context_sparse_keys)
context_sparse_types = (
[] if context_sparse_types is None else context_sparse_types)
context_dense_keys = (
[] if context_dense_keys is None else context_dense_keys)
context_dense_types = (
[] if context_dense_types is None else context_dense_types)
context_dense_shapes = (
[[]] * len(context_dense_keys)
if context_dense_shapes is None else context_dense_shapes)
feature_list_sparse_keys = (
[] if feature_list_sparse_keys is None else feature_list_sparse_keys)
feature_list_sparse_types = (
[] if feature_list_sparse_types is None else feature_list_sparse_types)
feature_list_dense_keys = (
[] if feature_list_dense_keys is None else feature_list_dense_keys)
feature_list_dense_types = (
[] if feature_list_dense_types is None else feature_list_dense_types)
feature_list_dense_shapes = (
[[]] * len(feature_list_dense_keys)
if feature_list_dense_shapes is None else feature_list_dense_shapes)
feature_list_dense_defaults = (
dict() if feature_list_dense_defaults is None
else feature_list_dense_defaults)
debug_name = "" if debug_name is None else debug_name
# Internal
feature_list_dense_missing_assumed_empty = []
num_context_dense = len(context_dense_keys)
num_feature_list_dense = len(feature_list_dense_keys)
num_context_sparse = len(context_sparse_keys)
num_feature_list_sparse = len(feature_list_sparse_keys)
if len(context_dense_shapes) != num_context_dense:
raise ValueError(
"len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
% (len(context_dense_shapes), num_context_dense))
if len(context_dense_types) != num_context_dense:
raise ValueError(
"len(context_dense_types) != len(num_context_dense): %d vs. %d"
% (len(context_dense_types), num_context_dense))
if len(feature_list_dense_shapes) != num_feature_list_dense:
raise ValueError(
"len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
"%d vs. %d" % (len(feature_list_dense_shapes),
num_feature_list_dense))
if len(feature_list_dense_types) != num_feature_list_dense:
raise ValueError(
"len(feature_list_dense_types) != len(num_feature_list_dense):"
"%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
if len(context_sparse_types) != num_context_sparse:
raise ValueError(
"len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
% (len(context_sparse_types), num_context_sparse))
if len(feature_list_sparse_types) != num_feature_list_sparse:
raise ValueError(
"len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
"%d vs. %d"
% (len(feature_list_sparse_types), num_feature_list_sparse))
if (num_context_dense + num_context_sparse
+ num_feature_list_dense + num_feature_list_sparse) == 0:
raise ValueError(
"Must provide at least one context_sparse key, context_dense key, "
", feature_list_sparse key, or feature_list_dense key")
if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
raise ValueError(
"context_dense and context_sparse keys must not intersect; "
"intersection: %s" %
set(context_dense_keys).intersection(set(context_sparse_keys)))
if not set(feature_list_dense_keys).isdisjoint(
set(feature_list_sparse_keys)):
raise ValueError(
"feature_list_dense and feature_list_sparse keys must not intersect; "
"intersection: %s" %
set(feature_list_dense_keys).intersection(
set(feature_list_sparse_keys)))
if not isinstance(feature_list_dense_defaults, dict):
raise TypeError("feature_list_dense_defaults must be a dict")
for k, v in feature_list_dense_defaults.items():
if v is not None:
raise ValueError("Value feature_list_dense_defaults[%s] must be None"
% k)
feature_list_dense_missing_assumed_empty.append(k)
context_dense_defaults_vec = []
for i, key in enumerate(context_dense_keys):
default_value = context_dense_defaults.get(key)
if default_value is None:
default_value = constant_op.constant([], dtype=context_dense_types[i])
elif not isinstance(default_value, ops.Tensor):
key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
default_value = ops.convert_to_tensor(
default_value, dtype=context_dense_types[i], name=key_name)
default_value = array_ops.reshape(
default_value, context_dense_shapes[i])
context_dense_defaults_vec.append(default_value)
context_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
for shape in context_dense_shapes]
feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
for shape in feature_list_dense_shapes]
# pylint: disable=protected-access
outputs = gen_parsing_ops._parse_single_sequence_example(
serialized=serialized,
debug_name=debug_name,
context_dense_defaults=context_dense_defaults_vec,
context_sparse_keys=context_sparse_keys,
context_sparse_types=context_sparse_types,
context_dense_keys=context_dense_keys,
context_dense_shapes=context_dense_shapes,
feature_list_sparse_keys=feature_list_sparse_keys,
feature_list_sparse_types=feature_list_sparse_types,
feature_list_dense_keys=feature_list_dense_keys,
feature_list_dense_types=feature_list_dense_types,
feature_list_dense_shapes=feature_list_dense_shapes,
feature_list_dense_missing_assumed_empty=(
feature_list_dense_missing_assumed_empty),
name=name)
# pylint: enable=protected-access
(context_sparse_indices, context_sparse_values,
context_sparse_shapes, context_dense_values,
feature_list_sparse_indices, feature_list_sparse_values,
feature_list_sparse_shapes, feature_list_dense_values) = outputs
context_sparse_tensors = [
sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
in zip(context_sparse_indices,
context_sparse_values,
context_sparse_shapes)]
feature_list_sparse_tensors = [
sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
in zip(feature_list_sparse_indices,
feature_list_sparse_values,
feature_list_sparse_shapes)]
context_output = dict(
zip(context_sparse_keys + context_dense_keys,
context_sparse_tensors + context_dense_values))
feature_list_output = dict(
zip(feature_list_sparse_keys + feature_list_dense_keys,
feature_list_sparse_tensors + feature_list_dense_values))
return (context_output, feature_list_output)
0
Example 89
Project: libtrack Source File: parsing.py
def yield_paths(filename, timing_info=None, filter_calls=[]):
"""Retrieves full paths from a trace file
For input a trace file returns all full paths ending
in a POSIX call, until the end of the file. Full paths are
returned as lists of elements function calls along with the
respective library name.
For example, "__libc_init:libc.so" is the signature for
the call "__libc_init" bellonging to lib "libc.so"
Args:
filename
Returns:
fullpath as a list
Raises:
Exception: if log file is corrupted
"""
try:
f = open(filename)
except IOError, error:
print >> sys.stderr, "I/O error while opening file: %s" % error
return
cache = Cache()
for line in f:
try:
labels = line.split(':')[:3]
except Exception, error:
print >> sys.stderr, "Unhandled Exception:", error, filename
continue
#return
if labels[1:] in [['BT', 'REPEAT']]:
try:
cache.repeat_next = int(line.split(':')[3])
except Exception, error:
#print >> sys.stderr, "Unhandled Exception:", error, filename
continue
#return
elif labels[1:] in [['BT', 'START']]:
# flush the bloody cache
if cache.dirty_native_path:
if cache.native_path[-1] in filter_calls:
cache.dirty_native_path = False
cache.dirty_dalvik_path = False
cache.repeat_current = cache.repeat_next
cache.repeat_next = 1
continue
# seperately retrieve fd type for open
if cache.native_path[-1] == 'open:libc.so':
cache.native_path[-1] = 'open_' + cache.fd_buff + ':libc.so'
for _ in range(cache.repeat_current):
#yield cache.native_path
if cache.dirty_dalvik_path:
yield cache.dalvik_path + cache.native_path
else:
yield cache.native_path
cache.dirty_native_path = False
cache.dirty_dalvik_path = False
cache.repeat_current = cache.repeat_next
cache.repeat_next = 1
try:
frames = int(line.split(':')[3])
cache.set_native(f, frames)
except (ValueError, IndexError), error:
#print >> sys.stderr, "Error:", error, "in file:", filename
continue
#return
except StopIteration, error:
#print >> sys.stderr, "Error:", error, "unexpected end in File:", filename
continue
#return
except Exception as error:
#print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
continue
#return
elif labels[1:] in [['DVM', 'BT_START']]:
try:
frames = int(line.split(':')[3])
cache.set_dalvik(f, frames)
except (ValueError, IndexError), error:
#print >> sys.stderr, "Error:", error, "in file:", filename
continue
#return
except StopIteration, error:
#print >> sys.stderr, "Error:", error, "unexpected end in File:", filename
continue
#return
except Exception, error:
#print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
continue
#return
elif labels[1:] in [['LOG', 'I']]:
try:
cache.set_fd_buff(line)
except (ValueError, IndexError), error:
#print >> sys.stderr, "Error:", error, "in file:", filename
continue
#return
except StopIteration, error:
#print >> sys.stderr, "Error:", error, "unexpected end in File:", filename
continue
#return
except Exception, error:
#print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
continue
#return
elif labels[1:] in [['BT_REPEAT', '1']]:
cache.dirty_dalvik_path = True
elif labels[1:2] in [['CALL']]:
if len(line.split('.')) > 2:
continue
try:
yield ["NO_BACKTRACE:NO_BACKTRACE", labels[2].split('\n')[0] + ':libc.so']
except Exception, error:
#print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
continue
#return
elif labels[1:] in [['LOG', 'T']]:
if timing_info == None:
continue
# nothing else I can do now for this kind of corrupted logs
#if len(line.split(':')) != 5:
# continue
try:
call = line.split(':')[3] + ":libc.so"
if call in filter_calls:
continue
time = line.split(':')[4].split('\n')[0]
nsec = 1000000000*int(time.split('.')[0]) + int(time.split('.')[1])
except Exception, error:
print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
continue
#return
# special handling of epoll - it should go to cache later
if call[:11] == "epoll_wait_":
call = "epoll_wait_:libc.so"
else:
# this regexec makes things sooooo slow
call = re.sub(r'_B:|_D:|_E:|_F:|_f:|_K:|_k:|_P:|_p:|_S:|_U:', ':', call)
if call not in timing_info:
timing_info[call] = [nsec]
else:
timing_info[call].append(nsec)
else:
continue
# flush cache
if cache.dirty_native_path:
# seperately retrieve fd type for open
if cache.native_path[-1] == 'open:libc.so':
cache.native_path[-1] = 'open_' + cache.fd_buff + ':libc.so'
for _ in range(cache.repeat_current):
#yield cache.native_path
if cache.dirty_dalvik_path:
yield cache.dalvik_path + cache.native_path
else:
yield cache.native_path
f.close()
0
Example 90
def run(self, *args):
def string_clean(value):
if value:
return re.sub('[\n\t\r]', '', value)
return ""
def parse_ole_msg(ole):
stream_dirs = ole.listdir()
for stream in stream_dirs:
# get stream that contains the email header
if stream[0].startswith('__substg1.0_007D'):
email_header = ole.openstream(stream).read()
if stream[0].endswith('001F'): # Unicode probably needs something better than just stripping \x00
email_header = email_header.replace('\x00', '')
# If it came from outlook we may need to trim some lines
try:
email_header = email_header.split('Version 2.0\x0d\x0a', 1)[1]
except:
pass
# Leaving us an RFC compliant email to parse
msg = email.message_from_string(email_header)
return msg
def parse_ole_attachments(ole):
# Hard part now, each part of the attachment is in a seperate stream
# need to get a unique stream id for each att
# its in the streamname as an 8 digit number.
for i in range(20): # arbitrary count of emails. i dont expecet this many
stream_number = str(i).zfill(8)
stream_name = '__attach_version1.0_#' + stream_number
# Unicode
try:
att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read()
att_mime = ole.openstream(stream_name + '/__substg1.0_370E001F').read()
att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
att_size = len(att_data)
att_md5 = hashlib.md5(att_data).hexdigest()
print i, att_size, att_md5, att_filename, att_mime
except:
pass
# ASCII
try:
att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read()
att_mime = ole.openstream(stream_name + '/__substg1.0_370E001E').read()
att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
att_size = len(att_data)
att_md5 = hashlib.md5(att_data).hexdigest()
print i, att_size, att_md5, att_filename, att_mime
except:
pass
def att_session(att_id, msg, ole_flag):
att_count = 0
if ole_flag:
ole = msg
# Hard part now, each part of the attachment is in a seperate stream
# need to get a unique stream id for each att
# its in the streamname as an 8 digit number.
for i in range(20): # arbitrary count of emails. i dont expecet this many
stream_number = str(i).zfill(8)
stream_name = '__attach_version1.0_#' + stream_number
# Unicode
try:
att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read()
att_filename = att_filename.replace('\x00', '')
att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
except:
pass
# ASCII
try:
att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read()
att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
except:
pass
if i == att_id:
self.log('info', "Switching session to {0}".format(att_filename))
tmp_path = os.path.join(tempfile.gettempdir(), att_filename)
with open(tmp_path, 'w') as tmp:
tmp.write(att_data)
__sessions__.new(tmp_path)
return
else:
for part in msg.walk():
if part.get_content_type() == 'message/rfc822':
rfc822 = True
else:
rfc822 = False
if part.get_content_maintype() == 'multipart' \
or not part.get('Content-Disposition') \
and not rfc822:
continue
att_count += 1
if att_count == att_id:
if rfc822:
data = part.as_string()
m = re.match("Content-Type: message/rfc822\r?\n\r?\n(.*)", data, flags=re.S)
if not m:
self.log('error', "Could not extract RFC822 formatted message")
return
data = m.group(1)
att_size = len(data)
filename = "rfc822msg_{0}.eml".format(att_size)
else:
data = part.get_payload(decode=True)
filename = part.get_filename()
self.log('info', "Switching session to {0}".format(filename))
if data:
tmp_path = os.path.join(tempfile.gettempdir(), filename)
with open(tmp_path, 'w') as tmp:
tmp.write(data)
__sessions__.new(tmp_path)
return
def email_envelope(msg):
# Envelope
self.log('info', "Email envelope:")
rows = [
['Subject', msg.get("Subject")],
['To', msg.get("To")],
['From', msg.get("From")],
['Cc', msg.get("Cc")],
['Bcc', msg.get("Bcc")],
['Date', msg.get("Date")]
]
self.log('table', dict(header=['Key', 'Value'], rows=rows))
return
def email_header(msg):
# Headers
rows = []
for x in msg.keys():
# Adding Received to ignore list. this has to be handeled separately if there are more then one line
if x not in ['Subject', 'From', 'To', 'Date', 'Cc', 'Bcc', 'DKIM-Signature', 'Received']:
rows.append([x, string_clean(msg.get(x))])
for x in msg.get_all('Received'):
rows.append(['Received', string_clean(x)])
self.log('info', "Email headers:")
rows = sorted(rows, key=lambda entry: entry[0])
self.log('table', dict(header=['Key', 'Value'], rows=rows))
return
def email_trace(msg, verbose):
rows = []
if verbose:
fields = ['from', 'by', 'with', 'id', 'for', 'timestamp']
else:
fields = ['from', 'by', 'timestamp']
for x in msg.get_all('Received'):
x = string_clean(x)
cre = re.compile("""
(?: from \s+ (?P<from>.*?) (?=by|with|id|ID|for|;|$) )?
(?: by \s+ (?P<by>.*?) (?=with|id|ID|for|;|$) )?
(?: with \s+ (?P<with>.*?) (?=id|ID|for|;|$) )?
(?: (id|ID) \s+ (?P<id>.*?) (?=for|;|$) )?
(?: for \s+ (?P<for>.*?) (?=;|$) )?
(?: \s* ; \s* (?P<timestamp>.*) )?
""", flags=re.X | re.I)
m = cre.search(x)
if not m:
self.log('error', "Received header regex didn't match")
return
t = []
for groupname in fields:
t.append(string_clean(m.group(groupname)))
rows.insert(0, t)
self.log('info', "Email path trace:")
self.log('table', dict(header=fields, rows=rows))
return
def email_spoofcheck(msg, dnsenabled):
self.log('info', "Email spoof check:")
# test 1: check if From address is the same as Sender, Reply-To, and Return-Path
rows = [
['Sender', string_clean(msg.get("Sender"))],
['From', string_clean(msg.get("From"))],
['Reply-To', string_clean(msg.get("Reply-To"))],
['Return-Path', string_clean(msg.get("Return-Path"))]
]
self.log('table', dict(header=['Key', 'Value'], rows=rows))
addr = {
'Sender': email.utils.parseaddr(string_clean(msg.get("Sender")))[1],
'From': email.utils.parseaddr(string_clean(msg.get("From")))[1],
'Reply-To': email.utils.parseaddr(string_clean(msg.get("Reply-To")))[1],
'Return-Path': email.utils.parseaddr(string_clean(msg.get("Return-Path")))[1]
}
if (addr['From'] == ''):
self.log('error', "No From address!")
return
elif addr['Sender'] and (addr['From'] != addr['Sender']):
self.log('warning', "Email FAILED: From address different than Sender")
elif addr['Reply-To'] and (addr['From'] != addr['Reply-To']):
self.log('warning', "Email FAILED: From address different than Reply-To")
elif addr['Return-Path'] and (addr['From'] != addr['Return-Path']):
self.log('warning', "Email FAILED: From address different than Return-Path")
else:
self.log('success', "Email PASSED: From address the same as Sender, Reply-To, and Return-Path")
# test 2: check to see if first Received: by domain matches sender MX domain
if not dnsenabled:
self.log('info', "Unable to run Received by / sender check without dnspython available")
else:
r = msg.get_all('Received')[-1]
m = re.search("by\s+(\S*?)(?:\s+\(.*?\))?\s+with", r)
if not m:
self.log('error', "Received header regex didn't match")
return
byname = m.group(1)
# this can be either a name or an IP
m = re.search("(\w+\.\w+|\d+\.\d+\.\d+\.\d+)$", byname)
if not m:
self.log('error', "Could not find domain or IP in Received by field")
return
bydomain = m.group(1)
domains = [['Received by', bydomain]]
# if it's an IP, do the reverse lookup
m = re.search("\.\d+$", bydomain)
if m:
bydomain = str(dns.reversename.from_address(bydomain)).strip('.')
domains.append(['Received by reverse lookup', bydomain])
# if the email has a Sender header, use that
if (addr['Sender'] != ""):
m = re.search("(\w+\.\w+)$", addr['Sender'])
if not m:
self.log('error', "Sender header regex didn't match")
return
fromdomain = m.group(1)
domains.append(['Sender', fromdomain])
# otherwise, use the From header
else:
m = re.search("(\w+\.\w+)$", addr['From'])
if not m:
self.log('error', "From header regex didn't match")
return
fromdomain = m.group(1)
domains.append(['From', fromdomain])
bymatch = False
try:
mx = dns.resolver.query(fromdomain, 'MX')
if mx :
for rdata in mx:
m = re.search("(\w+\.\w+).$", str(rdata.exchange))
if not m:
self.log('error', "MX domain regex didn't match")
continue
domains.append(['MX for ' + fromdomain, m.group(1)])
if bydomain == m.group(1):
bymatch = True
self.log('table', dict(header=['Key', 'Value'], rows=domains))
except:
domains.append(['MX for ' + fromdomain, "not registered in DNS"])
self.log('table', dict(header=['Key', 'Value'], rows=domains))
if bymatch:
self.log('success', "Email PASSED: Received by domain found in Sender/From MX domains")
else:
self.log('warning', "Email FAILED: Could not match Received by domain to Sender/From MX")
# test 3: look at SPF records
rspf = []
results = set()
allspf = msg.get_all('Received-SPF')
if not allspf:
return
for spf in allspf:
# self.log('info', string_clean(spf))
m = re.search("\s*(\w+)\s+\((.*?):\s*(.*?)\)\s+(.*);", string_clean(spf))
if not m:
self.log('error', "Received-SPF regex didn't match")
return
rspf.append([m.group(2), m.group(1), m.group(3), m.group(4)])
results = results | {m.group(1)}
self.log('table', dict(header=['Domain', 'Action', 'Info', 'Additional'], rows=rspf))
if results & {'fail', 'softfail'}:
self.log('warning', "Email FAILED: Found fail or softfail SPF results")
elif results & {'none', 'neutral'}:
self.log('warning', "Email NEUTRAL: Found none or neutral SPF results")
elif results & {'permerror', 'temperror'}:
self.log('warning', "Email NEUTRAL: Found error condition")
elif results & {'pass'}:
self.log('success', "Email PASSED: Found SPF pass result")
return
def email_attachments(msg, ole_flag):
# Attachments
att_count = 0
rows = []
links = []
if ole_flag:
ole = msg
# Hard part now, each part of the attachment is in a seperate stream
# need to get a unique stream id for each att
# its in the streamname as an 8 digit number.
for i in range(20): # arbitrary count of emails. i dont expecet this many
stream_number = str(i).zfill(8)
stream_name = '__attach_version1.0_#' + stream_number
# Unicode
try:
att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read()
att_mime = ole.openstream(stream_name + '/__substg1.0_370E001F').read()
att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
att_size = len(att_data)
att_md5 = hashlib.md5(att_data).hexdigest()
rows.append([i, att_filename, att_mime, att_size, att_md5])
att_count += 1
except:
pass
# ASCII
try:
att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read()
att_mime = ole.openstream(stream_name + '/__substg1.0_370E001E').read()
att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read()
att_size = len(att_data)
att_md5 = hashlib.md5(att_data).hexdigest()
rows.append([i, att_filename, att_mime, att_size, att_md5])
att_count += 1
except:
pass
else:
# Walk through email string.
for part in msg.walk():
content_type = part.get_content_type()
if content_type == 'multipart':
continue
if content_type in ('text/plain', 'text/html'):
part_content = part.get_payload(decode=True)
for link in re.findall(r'(https?://[^"<>\s]+)', part_content):
if link not in links:
links.append(link)
if content_type == 'message/rfc822':
part_content = part.as_string()
m = re.match("Content-Type: message/rfc822\r?\n\r?\n(.*)", part_content, flags=re.S)
if not m:
self.log('error', "Could not extract RFC822 formatted message")
return
part_content = m.group(1)
att_size = len(part_content)
att_file_name = "rfc822msg_{0}.eml".format(att_size)
att_md5 = hashlib.md5(part_content).hexdigest()
att_count += 1
rows.append([att_count, att_file_name, content_type, att_size, att_md5])
continue
if not part.get('Content-Disposition'):
# These are not attachments.
continue
att_file_name = part.get_filename()
att_size = len(part_content)
if not att_file_name:
continue
att_data = part.get_payload(decode=True)
att_md5 = hashlib.md5(att_data).hexdigest()
att_count += 1
rows.append([att_count, att_file_name, part.get_content_type(), att_size, att_md5])
self.log('info', "Email attachments (total: {0}):".format(att_count))
if att_count > 0:
self.log('table', dict(header=['ID', 'FileName', 'Content Type', 'File Size', 'MD5'], rows=rows))
self.log('info', "Email links:")
for link in links:
self.log('item', link)
return
# Start Here
if not __sessions__.is_set():
self.log('error', "No open session")
return
super(EmailParse, self).run(*args)
if self.args is None:
return
# see if we can load the dns library for MX lookup spoof detecton
try:
import dns.resolver
import dns.reversename
dnsenabled = True
except ImportError:
dnsenabled = False
# Try to open as an ole msg, if not treat as email string
try:
ole = olefile.OleFileIO(__sessions__.current.file.path)
ole_flag = True
except:
ole_flag = False
email_handle = open(__sessions__.current.file.path)
msg = email.message_from_file(email_handle)
email_handle.close()
if self.args.open is not None:
if ole_flag:
msg = ole
att_session(self.args.open, msg, ole_flag)
elif self.args.envelope:
if ole_flag:
msg = parse_ole_msg(ole)
email_envelope(msg)
elif self.args.attach:
if ole_flag:
msg = ole
email_attachments(msg, ole_flag)
elif self.args.header:
if ole_flag:
msg = parse_ole_msg(ole)
email_header(msg)
elif self.args.trace:
if ole_flag:
msg = parse_ole_msg(ole)
email_trace(msg, False)
elif self.args.traceall:
if ole_flag:
msg = parse_ole_msg(ole)
email_trace(msg, True)
elif self.args.spoofcheck:
if ole_flag:
msg = parse_ole_msg(ole)
email_spoofcheck(msg, dnsenabled)
elif self.args.all:
if ole_flag:
msg = parse_ole_msg(ole)
email_envelope(msg)
email_header(msg)
email_trace(msg, True)
email_spoofcheck(msg, dnsenabled)
if ole_flag:
msg = ole
email_attachments(msg, ole_flag)
else:
self.log('error', 'At least one of the parameters is required')
self.usage()
0
Example 91
def _install(self):
logger = self.get_logger('install')
sudo = self.instance.sudo
logger.info(
'START TO INSTALL: branch = %r, commit = %r, instance = %r',
self.branch, self.commit, self.instance
)
def setup_instance(service_manifests, service_manifests_available):
logger = self.get_logger('install.setup_instance')
with self.instance:
def aptitude(*commands):
sudo(['aptitude', '-y'] + list(commands),
environ={'DEBIAN_FRONTEND': 'noninteractive'})
# create user for app
sudo(['useradd', '-U', '-G', 'users,www-data', '-Mr',
self.app.name])
# assume instance uses Ubuntu >= 12.04
apt_sources = re.sub(
r'\n#\s*(deb(?:-src)?\s+'
r'http://[^.]\.ec2\.archive\.ubuntu\.com/'
r'ubuntu/\s+[^-]+multiverse\n)',
lambda m: '\n' + m.group(1),
self.instance.read_file('/etc/apt/sources.list', sudo=True)
)
self.instance.write_file('/etc/apt/sources.list', apt_sources,
sudo=True)
apt_repos = set()
apt_packages = set([
'build-essential', 'python-dev', 'python-setuptools',
'python-pip'
])
with service_manifests_available:
while not service_manifests[0]:
service_manifests_available.wait()
for service in service_manifests[1:]:
apt_repos.update(service.required_apt_repositories)
apt_packages.update(service.required_apt_packages)
if apt_repos:
for repo in apt_repos:
sudo(['apt-add-repository', '-y', repo])
aptitude('update')
with self.instance.sftp():
self.instance.write_file(
'/usr/bin/apt-fast',
resource_string(__name__, 'apt-fast'),
sudo=True
)
self.instance.write_file('/etc/apt-fast.conf', '''
_APTMGR=aptitude
DOWNLOADBEFORE=true
_MAXNUM=20
DLLIST='/tmp/apt-fast.list'
_DOWNLOADER='aria2c -c -j ${_MAXNUM} -i ${DLLIST} --connect-timeout=10 \
--timeout=600 -m0'
DLDIR='/var/cache/apt/archives/apt-fast'
APTCACHE='/var/cache/apt/archives/'
''', sudo=True)
sudo(['chmod', '+x', '/usr/bin/apt-fast'])
aptitude('install', 'aria2')
sudo(['apt-fast', '-q', '-y', 'install'] + list(apt_packages),
environ={'DEBIAN_FRONTEND': 'noninteractive'})
service_manifests_available = threading.Condition()
service_manifests = [False]
instance_setup_worker = threading.Thread(
target=setup_instance,
kwargs={
'service_manifests_available': service_manifests_available,
'service_manifests': service_manifests
}
)
instance_setup_worker.start()
# setup metadata of the instance
self.update_instance_metadata()
self.instance.status = 'started'
# making package (pybundle)
fd, package_path = tempfile.mkstemp()
os.close(fd)
with self.fetch() as download_path:
service_manifests.extend(self.services)
service_manifests[0] = True
with service_manifests_available:
service_manifests_available.notify()
config_temp_path = tempfile.mkdtemp()
shutil.copytree(
os.path.join(download_path, self.app.config_dir),
os.path.join(config_temp_path, self.app.name)
)
with self.dist.bundle_package() as (package, filename, temp_path):
shutil.copyfile(temp_path, package_path)
remote_path = os.path.join('/tmp', filename)
with self.instance.sftp():
# upload config files
self.instance.put_directory(
os.path.join(config_temp_path, self.app.name),
'/etc/' + self.app.name,
sudo=True
)
shutil.rmtree(config_temp_path)
python_packages = set()
for service in service_manifests[1:]:
python_packages.update(service.required_python_packages)
# uploads package
self.instance.put_file(package_path, remote_path)
# join instance_setup_worker
instance_setup_worker.join()
self.instance.status = 'apt-installed'
pip_cmd = ['pip', 'install', '-i', PYPI_INDEX_URLS[0]]
for idx in PYPI_INDEX_URLS[1:]:
pip_cmd.append('--extra-index-url=' + idx)
sudo(pip_cmd + [remote_path], environ={'CI': '1'})
sudo(pip_cmd + ['-I'] + list(python_packages),
environ={'CI': '1'})
self.instance.status = 'installed'
for service in service_manifests[1:]:
for cmd in service.pre_install:
sudo(cmd, environ={'DEBIAN_FRONTEND': 'noninteractive'})
values_path = '/etc/{0}/values.json'.format(self.app.name)
service_values = {
'.build': dict(
commit=self.commit.ref,
branch=self.branch.label
)
}
refresh_values = lambda: self.instance.write_file(
values_path,
json.dumps(service_values),
sudo=True
)
refresh_values()
for service in service_manifests[1:]:
service_value = service.install(self.instance)
service_values[service.name] = service_value
refresh_values()
for service in service_manifests[1:]:
for cmd in service.post_install:
sudo(cmd, environ={'DEBIAN_FRONTEND': 'noninteractive'})
service_map = dict((service.name, service)
for service in service_manifests[1:])
deployed_domains = {}
if self.route53_hosted_zone_id and self.route53_records:
self.instance.status = 'run'
changeset = ResourceRecordSets(
self.app.route53_connection,
self.route53_hosted_zone_id,
'Changed by Asuka: {0}, {1} [{2}]'.format(self.app.name,
self.branch.label,
self.commit.ref)
)
from .service import DomainService
for service_name, domain_format in self.route53_records.items():
service = service_map[service_name]
if not isinstance(service, DomainService):
raise TypeError(repr(service) + 'is not an instance of '
'crosspop.service.DomainService')
domain = domain_format.format(branch=self.branch)
deployed_domains[service_name] = domain
service.route_domain(domain, changeset)
self.instance.tags['Domain-' + service_name] = domain
if changeset.changes:
logger.info('Route 53 changeset:\n%s', changeset.to_xml())
changeset.commit()
self.instance.status = 'done'
self.terminate_instances()
return deployed_domains
0
Example 92
Project: rpmlint Source File: BinariesCheck.py
def __init__(self, pkg, path, file, is_ar, is_shlib):
self.readelf_error = False
self.needed = []
self.rpath = []
self.undef = []
self.unused = []
self.comment = False
self.soname = False
self.non_pic = True
self.stack = False
self.exec_stack = False
self.exit_calls = []
self.forbidden_calls = []
fork_called = False
self.tail = ''
self.setgid = False
self.setuid = False
self.setgroups = False
self.chroot = False
self.chdir = False
self.chroot_near_chdir = False
self.mktemp = False
is_debug = path.endswith('.debug')
cmd = ['env', 'LC_ALL=C', 'readelf', '-W', '-S', '-l', '-d', '-s']
cmd.append(path)
res = Pkg.getstatusoutput(cmd)
if not res[0]:
lines = res[1].splitlines()
for l in lines:
r = BinaryInfo.needed_regex.search(l)
if r:
self.needed.append(r.group(1))
continue
r = BinaryInfo.rpath_regex.search(l)
if r:
for p in r.group(1).split(':'):
self.rpath.append(p)
continue
if BinaryInfo.comment_regex.search(l):
self.comment = True
continue
if BinaryInfo.pic_regex.search(l):
self.non_pic = False
continue
r = BinaryInfo.soname_regex.search(l)
if r:
self.soname = r.group(1)
continue
r = BinaryInfo.stack_regex.search(l)
if r:
self.stack = True
flags = r.group(1)
if flags and BinaryInfo.stack_exec_regex.search(flags):
self.exec_stack = True
continue
if l.startswith("Symbol table"):
break
for l in lines:
r = BinaryInfo.call_regex.search(l)
if not r:
continue
l = r.group(1)
if BinaryInfo.mktemp_call_regex.search(l):
self.mktemp = True
if BinaryInfo.setgid_call_regex.search(l):
self.setgid = True
if BinaryInfo.setuid_call_regex.search(l):
self.setuid = True
if BinaryInfo.setgroups_call_regex.search(l):
self.setgroups = True
if BinaryInfo.chdir_call_regex.search(l):
self.chdir = True
if BinaryInfo.chroot_call_regex.search(l):
self.chroot = True
if BinaryInfo.forbidden_functions:
for r_name, func in BinaryInfo.forbidden_functions.items():
ret = func['f_regex'].search(l)
if ret:
self.forbidden_calls.append(r_name)
if is_shlib:
r = BinaryInfo.exit_call_regex.search(l)
if r:
self.exit_calls.append(r.group(1))
continue
r = BinaryInfo.fork_call_regex.search(l)
if r:
fork_called = True
continue
# check if we don't have a string that will automatically
# waive the presence of a forbidden call
if self.forbidden_calls:
cmd = ['env', 'LC_ALL=C', 'strings']
cmd.append(path)
res = Pkg.getstatusoutput(cmd)
if not res[0]:
for l in res[1].splitlines():
# as we need to remove elements, iterate backwards
for i in range(len(self.forbidden_calls) - 1, -1, -1):
func = self.forbidden_calls[i]
f = BinaryInfo.forbidden_functions[func]
if 'waiver_regex' not in f:
continue
r = f['waiver_regex'].search(l)
if r:
del self.forbidden_calls[i]
if self.non_pic:
self.non_pic = 'TEXTREL' in res[1]
# Ignore all exit() calls if fork() is being called.
# Does not have any context at all but without this kludge, the
# number of false positives would probably be intolerable.
if fork_called:
self.exit_calls = []
# check if chroot is near chdir (since otherwise, chroot is called
# without chdir)
# Currently this implementation works only on x86_64 due to reliance
# on x86_64 specific assembly. Skip it on other architectures
if pkg.arch == 'x86_64' and self.chroot and self.chdir:
p = subprocess.Popen(
['env', 'LC_ALL=C', 'objdump', '-d', path],
stdout=subprocess.PIPE, bufsize=-1)
with p.stdout:
index = 0
chroot_index = -99
chdir_index = -99
for line in p.stdout:
res = BinaryInfo.objdump_call_regex.search(line)
if not res:
continue
if b'@plt' not in res.group(1):
pass
elif b'chroot@plt' in res.group(1):
chroot_index = index
if abs(chroot_index - chdir_index) <= 2:
self.chroot_near_chdir = True
break
elif b'chdir@plt' in res.group(1):
chdir_index = index
if abs(chroot_index - chdir_index) <= 2:
self.chroot_near_chdir = True
break
index += 1
if p.wait() and not self.chroot_near_chdir:
printWarning(pkg, 'binaryinfo-objdump-failed', file)
self.chroot_near_chdir = True # avoid false positive
else:
self.readelf_error = True
printWarning(pkg, 'binaryinfo-readelf-failed',
file, re.sub('\n.*', '', res[1]))
try:
with open(path, 'rb') as fobj:
fobj.seek(-12, os.SEEK_END)
self.tail = Pkg.b2s(fobj.read())
except Exception as e:
printWarning(pkg, 'binaryinfo-tail-failed %s: %s' % (file, e))
# Undefined symbol and unused direct dependency checks make sense only
# for installed packages.
# skip debuginfo: https://bugzilla.redhat.com/190599
if not is_ar and not is_debug and isinstance(pkg, Pkg.InstalledPkg):
# We could do this with objdump, but it's _much_ simpler with ldd.
res = Pkg.getstatusoutput(
('env', 'LC_ALL=C', 'ldd', '-d', '-r', path))
if not res[0]:
for l in res[1].splitlines():
undef = BinaryInfo.undef_regex.search(l)
if undef:
self.undef.append(undef.group(1))
if self.undef:
cmd = self.undef[:]
cmd.insert(0, 'c++filt')
try:
res = Pkg.getstatusoutput(cmd)
if not res[0]:
self.undef = res[1].splitlines()
except:
pass
else:
printWarning(pkg, 'ldd-failed', file)
res = Pkg.getstatusoutput(
('env', 'LC_ALL=C', 'ldd', '-r', '-u', path))
if res[0]:
# Either ldd doesn't grok -u (added in glibc 2.3.4) or we have
# unused direct dependencies
in_unused = False
for l in res[1].splitlines():
if not l.rstrip():
pass
elif l.startswith('Unused direct dependencies'):
in_unused = True
elif in_unused:
unused = BinaryInfo.unused_regex.search(l)
if unused:
self.unused.append(unused.group(1))
else:
in_unused = False
0
Example 93
Project: rivescript-python Source File: brain.py
def process_tags(self, user, msg, reply, st=[], bst=[], depth=0, ignore_object_errors=True):
"""Post process tags in a message.
:param str user: The user ID.
:param str msg: The user's formatted message.
:param str reply: The raw RiveScript reply for the message.
:param []str st: The array of ``<star>`` matches from the trigger.
:param []str bst: The array of ``<botstar>`` matches from a
``%Previous`` command.
:param int depth: The recursion depth counter.
:param bool ignore_object_errors: Whether to ignore errors in Python
object macros instead of raising an ``ObjectError`` exception.
:return str: The final reply after tags have been processed.
"""
stars = ['']
stars.extend(st)
botstars = ['']
botstars.extend(bst)
if len(stars) == 1:
stars.append("undefined")
if len(botstars) == 1:
botstars.append("undefined")
# Tag shortcuts.
reply = reply.replace('<person>', '{person}<star>{/person}')
reply = reply.replace('<@>', '{@<star>}')
reply = reply.replace('<formal>', '{formal}<star>{/formal}')
reply = reply.replace('<sentence>', '{sentence}<star>{/sentence}')
reply = reply.replace('<uppercase>', '{uppercase}<star>{/uppercase}')
reply = reply.replace('<lowercase>', '{lowercase}<star>{/lowercase}')
# Weight and <star> tags.
reply = re.sub(RE.weight, '', reply) # Leftover {weight}s
if len(stars) > 0:
reply = reply.replace('<star>', stars[1])
reStars = re.findall(RE.star_tags, reply)
for match in reStars:
if int(match) < len(stars):
reply = reply.replace('<star{match}>'.format(match=match), stars[int(match)])
if len(botstars) > 0:
reply = reply.replace('<botstar>', botstars[1])
reStars = re.findall(RE.botstars, reply)
for match in reStars:
if int(match) < len(botstars):
reply = reply.replace('<botstar{match}>'.format(match=match), botstars[int(match)])
# <input> and <reply>
history = self.master.get_uservar(user, "__history__")
if type(history) is not dict:
history = self.default_history()
reply = reply.replace('<input>', history['input'][0])
reply = reply.replace('<reply>', history['reply'][0])
reInput = re.findall(RE.input_tags, reply)
for match in reInput:
reply = reply.replace('<input{match}>'.format(match=match),
history['input'][int(match) - 1])
reReply = re.findall(RE.reply_tags, reply)
for match in reReply:
reply = reply.replace('<reply{match}>'.format(match=match),
history['reply'][int(match) - 1])
# <id> and escape codes.
reply = reply.replace('<id>', user)
reply = reply.replace('\\s', ' ')
reply = reply.replace('\\n', "\n")
reply = reply.replace('\\#', '#')
# Random bits.
reRandom = re.findall(RE.random_tags, reply)
for match in reRandom:
output = ''
if '|' in match:
output = random.choice(match.split('|'))
else:
output = random.choice(match.split(' '))
reply = reply.replace('{{random}}{match}{{/random}}'.format(match=match), output)
# Person Substitutions and String Formatting.
for item in ['person', 'formal', 'sentence', 'uppercase', 'lowercase']:
matcher = re.findall(r'\{' + item + r'\}(.+?)\{/' + item + r'\}', reply)
for match in matcher:
output = None
if item == 'person':
# Person substitutions.
output = self.substitute(match, "person")
else:
output = utils.string_format(match, item)
reply = reply.replace('{{{item}}}{match}{{/{item}}}'.format(item=item, match=match), output)
# Handle all variable-related tags with an iterative regex approach,
# to allow for nesting of tags in arbitrary ways (think <set a=<get b>>)
# Dummy out the <call> tags first, because we don't handle them right
# here.
reply = reply.replace("<call>", "{__call__}")
reply = reply.replace("</call>", "{/__call__}")
while True:
# This regex will match a <tag> which contains no other tag inside
# it, i.e. in the case of <set a=<get b>> it will match <get b> but
# not the <set> tag, on the first pass. The second pass will get the
# <set> tag, and so on.
match = re.search(RE.tag_search, reply)
if not match: break # No remaining tags!
match = match.group(1)
parts = match.split(" ", 1)
tag = parts[0].lower()
data = parts[1] if len(parts) > 1 else ""
insert = "" # Result of the tag evaluation
# Handle the tags.
if tag == "bot" or tag == "env":
# <bot> and <env> tags are similar.
target = self.master._var if tag == "bot" else self.master._global
if "=" in data:
# Setting a bot/env variable.
parts = data.split("=")
self.say("Set " + tag + " variable " + text_type(parts[0]) + "=" + text_type(parts[1]))
target[parts[0]] = parts[1]
else:
# Getting a bot/env variable.
insert = target.get(data, "undefined")
elif tag == "set":
# <set> user vars.
parts = data.split("=")
self.say("Set uservar " + text_type(parts[0]) + "=" + text_type(parts[1]))
self.master.set_uservar(user, parts[0], parts[1])
elif tag in ["add", "sub", "mult", "div"]:
# Math operator tags.
parts = data.split("=")
var = parts[0]
value = parts[1]
curv = self.master.get_uservar(user, var)
# Sanity check the value.
try:
value = int(value)
if curv in [None, "undefined"]:
# Initialize it.
curv = 0
except:
insert = "[ERR: Math can't '{}' non-numeric value '{}']".format(tag, value)
# Attempt the operation.
try:
orig = int(curv)
new = 0
if tag == "add":
new = orig + value
elif tag == "sub":
new = orig - value
elif tag == "mult":
new = orig * value
elif tag == "div":
new = orig / value
self.master.set_uservar(user, var, new)
except:
insert = "[ERR: Math couldn't '{}' to value '{}']".format(tag, curv)
elif tag == "get":
insert = self.master.get_uservar(user, data)
else:
# Unrecognized tag.
insert = "\x00{}\x01".format(match)
reply = reply.replace("<{}>".format(match), text_type(insert))
# Restore unrecognized tags.
reply = reply.replace("\x00", "<").replace("\x01", ">")
# Streaming code. DEPRECATED!
if '{!' in reply:
self._warn("Use of the {!...} tag is deprecated and not supported here.")
# Topic setter.
reTopic = re.findall(RE.topic_tag, reply)
for match in reTopic:
self.say("Setting user's topic to " + match)
self.master.set_uservar(user, "topic", match)
reply = reply.replace('{{topic={match}}}'.format(match=match), '')
# Inline redirecter.
reRedir = re.findall(RE.redir_tag, reply)
for match in reRedir:
self.say("Redirect to " + match)
at = match.strip()
subreply = self._getreply(user, at, step=(depth + 1))
reply = reply.replace('{{@{match}}}'.format(match=match), subreply)
# Object caller.
reply = reply.replace("{__call__}", "<call>")
reply = reply.replace("{/__call__}", "</call>")
reCall = re.findall(r'<call>(.+?)</call>', reply)
for match in reCall:
parts = re.split(RE.ws, match)
output = ''
obj = parts[0]
args = []
if len(parts) > 1:
args = parts[1:]
# Do we know this object?
if obj in self.master._objlangs:
# We do, but do we have a handler for that language?
lang = self.master._objlangs[obj]
if lang in self.master._handlers:
# We do.
try:
output = self.master._handlers[lang].call(self.master, obj, user, args)
except python.PythonObjectError as e:
self.warn(str(e))
if not ignore_object_errors:
raise ObjectError(str(e))
output = RS_ERR_OBJECT
else:
if not ignore_object_errors:
raise ObjectError(RS_ERR_OBJECT_HANDLER)
output = RS_ERR_OBJECT_HANDLER
else:
if not ignore_object_errors:
raise ObjectError(RS_ERR_OBJECT_MISSING)
output = RS_ERR_OBJECT_MISSING
reply = reply.replace('<call>{match}</call>'.format(match=match), output)
return reply
0
Example 94
def metaphone (term):
"returns metaphone code for a given string"
# implementation of the original algorithm from Lawrence Philips
# extended/rewritten by M. Kuhn
# improvements with thanks to John Machin <[email protected]>
# define return value
code = ""
i = 0
term_length = len(term)
if (term_length == 0):
# empty string ?
return code
# end if
# extension #1 (added 2005-01-28)
# convert to lowercase
term = term.lower()
# extension #2 (added 2005-01-28)
# remove all non-english characters, first
term = re.sub(r'[^a-z]', '', term)
if len(term) == 0:
# nothing left
return code
# end if
# extension #3 (added 2005-01-24)
# conflate repeated letters
firstChar = term[0]
str2 = firstChar
for x in term:
if x != str2[-1]:
str2 = str2 + x
# end if
# end for
# extension #4 (added 2005-01-24)
# remove any vowels unless a vowel is the first letter
firstChar = str2[0]
str3 = firstChar
for x in str2[1:]:
if (re.search(r'[^aeiou]', x)):
str3 = str3 + x
# end if
# end for
term = str3
term_length = len(term)
if term_length == 0:
# nothing left
return code
# end if
# check for exceptions
if (term_length > 1):
# get first two characters
first_chars = term[0:2]
# build translation table
table = {
"ae":"e",
"gn":"n",
"kn":"n",
"pn":"n",
"wr":"n",
"wh":"w"
}
if first_chars in table.keys():
term = term[2:]
code = table[first_chars]
term_length = len(term)
# end if
elif (term[0] == "x"):
term = ""
code = "s"
term_length = 0
# end if
# define standard translation table
st_trans = {
"b":"b",
"c":"k",
"d":"t",
"g":"k",
"h":"h",
"k":"k",
"p":"p",
"q":"k",
"s":"s",
"t":"t",
"v":"f",
"w":"w",
"x":"ks",
"y":"y",
"z":"s"
}
i = 0
while (i<term_length):
# init character to add, init basic patterns
add_char = ""
part_n_2 = ""
part_n_3 = ""
part_n_4 = ""
part_c_2 = ""
part_c_3 = ""
# extract a number of patterns, if possible
if (i < (term_length - 1)):
part_n_2 = term[i:i+2]
if (i>0):
part_c_2 = term[i-1:i+1]
part_c_3 = term[i-1:i+2]
# end if
# end if
if (i < (term_length - 2)):
part_n_3 = term[i:i+3]
# end if
if (i < (term_length - 3)):
part_n_4 = term[i:i+4]
# end if
# use table with conditions for translations
if (term[i] == "b"):
add_char = st_trans["b"]
if (i == (term_length - 1)):
if (i>0):
if (term[i-1] == "m"):
add_char = ""
# end if
# end if
# end if
elif (term[i] == "c"):
add_char = st_trans["c"]
if (part_n_2 == "ch"):
add_char = "x"
elif (re.search(r'c[iey]', part_n_2)):
add_char = "s"
# end if
if (part_n_3 == "cia"):
add_char = "x"
# end if
if (re.search(r'sc[iey]', part_c_3)):
add_char = ""
# end if
elif (term[i] == "d"):
add_char = st_trans["d"]
if (re.search(r'dg[eyi]', part_n_3)):
add_char = "j"
# end if
elif (term[i] == "g"):
add_char = st_trans["g"]
if (part_n_2 == "gh"):
if (i == (term_length - 2)):
add_char = ""
# end if
elif (re.search(r'gh[aeiouy]', part_n_3)):
add_char = ""
elif (part_n_2 == "gn"):
add_char = ""
elif (part_n_4 == "gned"):
add_char = ""
elif (re.search(r'dg[eyi]',part_c_3)):
add_char = ""
elif (part_n_2 == "gi"):
if (part_c_3 != "ggi"):
add_char = "j"
# end if
elif (part_n_2 == "ge"):
if (part_c_3 != "gge"):
add_char = "j"
# end if
elif (part_n_2 == "gy"):
if (part_c_3 != "ggy"):
add_char = "j"
# end if
elif (part_n_2 == "gg"):
add_char = ""
# end if
elif (term[i] == "h"):
add_char = st_trans["h"]
if (re.search(r'[aeiouy]h[^aeiouy]', part_c_3)):
add_char = ""
elif (re.search(r'[csptg]h', part_c_2)):
add_char = ""
# end if
elif (term[i] == "k"):
add_char = st_trans["k"]
if (part_c_2 == "ck"):
add_char = ""
# end if
elif (term[i] == "p"):
add_char = st_trans["p"]
if (part_n_2 == "ph"):
add_char = "f"
# end if
elif (term[i] == "q"):
add_char = st_trans["q"]
elif (term[i] == "s"):
add_char = st_trans["s"]
if (part_n_2 == "sh"):
add_char = "x"
# end if
if (re.search(r'si[ao]', part_n_3)):
add_char = "x"
# end if
elif (term[i] == "t"):
add_char = st_trans["t"]
if (part_n_2 == "th"):
add_char = "0"
# end if
if (re.search(r'ti[ao]', part_n_3)):
add_char = "x"
# end if
elif (term[i] == "v"):
add_char = st_trans["v"]
elif (term[i] == "w"):
add_char = st_trans["w"]
if (re.search(r'w[^aeiouy]', part_n_2)):
add_char = ""
# end if
elif (term[i] == "x"):
add_char = st_trans["x"]
elif (term[i] == "y"):
add_char = st_trans["y"]
elif (term[i] == "z"):
add_char = st_trans["z"]
else:
# alternative
add_char = term[i]
# end if
code = code + add_char
i += 1
# end while
# return metaphone code
return code
0
Example 95
Project: TARDIS Source File: TARDIS.py
def main(vulnerability,vulnObject,sourceIP,sourceHost):
#Create results and working directories
if not os.path.exists('Results'):
os.makedirs('Results')
if not os.path.exists('Working'):
os.makedirs('Working')
#Make sure the vulnerability is valid
if vulnerability != "":
vulnCheck=0
resultCount=0
logsource=''
print("Searching for evidence of \"" + vulnerability + "\"")
print(" Host: " + sourceIP)
try:
configFile = 'config.xml'
tree = ET.parse(configFile)
root = tree.getroot()
except:
sys.exit("Not a valid config XML file")
for settings in root.findall("./log_source"):
logsource=settings.text
cnx = getDBConnector()
#check if vulnerability/asset combo exists in assetVulnerability Table
cursor = cnx.cursor()
query = ("SELECT count(*) as count from assetVulnerabilities where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "'")
cursor.execute(query)
for row in cursor:
vulnCheck=row[0]
cursor.close()
if vulnCheck==0:
#No combination exists, write data to DB
cursor = cnx.cursor()
add_vulnInstance = ("INSERT INTO assetVulnerabilities "
"(victim_ip, threat_id, active) "
"VALUES (%s, %s, %s)")
vulnData = (ip2long(sourceIP), vulnerability, '1')
# Insert new entry
cursor.execute(add_vulnInstance , vulnData )
cnx.commit()
cursor.close()
cnx.close()
searchStringResults= findStixObservables.run(vulnerability)
isExploitFound=False
searchStringCount=0
operator=searchStringResults[0]
numResults=0
if(searchStringResults[1]=="No search file found"):
searchResults="0"
print(" No search file found\n")
elif(searchStringResults[1]=="No supported observables found"):
searchResults="0"
print(" No supported observables found\n")
else:
#run search...
#search should return number of results
#Insert source host from arguments
for entry in searchStringResults:
if logsource=="splunk":
if (searchStringCount == 1):
searchString=entry + " AND (host=\"" + sourceHost + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\") | fields host, c_ip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\" | rename c_ip as clientip"
numResults=splunk.searchVulnerability(searchString,vulnerability,sourceIP,sourceHost)
if (numResults != "0"):
data = json.load(numResults)
if (operator=="AND"):
if (searchStringCount > 1):
resultCount=0
for result in data["results"]:
startTime = dateutil.parser.parse(data["results"][resultCount]["_time"]) + datetime.timedelta(days =- 300)
endTime = dateutil.parser.parse(data["results"][resultCount]["_time"]) + datetime.timedelta(days = 300)
searchString=entry + " AND (host=\"" + sourceHost + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\") | fields host, clientip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\""
newResults=splunk.searchVulnerabilityTimeRange(searchString,vulnerability,sourceIP,sourceHost,startTime.isoformat(),endTime.isoformat())
if (newResults != "0"):
#This is the result from search 1
newData = json.load(newResults)
newResultCount=0
for result in newData["results"]:
try:
clientip=newData["results"][newResultCount]["clientip"]
except:
clientip="0"
isExploitFound=True
#These are the results from any further results proving the AND condition
cnx = getDBConnector()
cursor = cnx.cursor()
query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
cursor.execute(query)
for row in cursor:
logCheck=row[0]
cursor.close()
if logCheck==0:
#Write data to DB
cursor = cnx.cursor()
add_logInstance = ("INSERT INTO attackInventory "
"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
"VALUES (%s, %s, %s, %s, %s)")
logData = (ip2long(sourceIP), ip2long(clientip), newData["results"][newResultCount]["_time"], newData["results"][newResultCount]["Raw Log"], vulnerability)
# Insert new entry
cursor.execute(add_logInstance , logData )
cnx.commit()
cursor.close()
cnx.close()
newResultCount=newResultCount+1
else:
newResultCount=0
if (isExploitFound==True):
try:
clientip=data["results"][resultCount]["clientip"]
except:
clientip="0"
cnx = getDBConnector()
cursor = cnx.cursor()
query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
cursor.execute(query)
for row in cursor:
logCheck=row[0]
cursor.close()
if logCheck==0:
#Write data to DB
cursor = cnx.cursor()
add_logInstance = ("INSERT INTO attackInventory "
"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
"VALUES (%s, %s, %s, %s, %s)")
logData = (ip2long(sourceIP), ip2long(clientip), data["results"][resultCount]["_time"], data["results"][resultCount]["Raw Log"], vulnerability)
# Insert new entry
cursor.execute(add_logInstance , logData )
cnx.commit()
cursor.close()
cnx.close()
resultCount=newResultCount+1
else:
resultCount=newResultCount
elif (operator=="OR"):
if (searchStringCount > 0):
#only keep searching if there are more IOCS to look at...
if len(searchStringResults)>2:
searchString=entry + " AND (host=\"" + sourceHost + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\") | fields host, clientip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\""
numResults=splunk.searchVulnerability(searchString,vulnerability,sourceIP,sourceHost)
if (numResults != "0"):
data = json.load(numResults)
resultCount=0
for result in data["results"]:
isExploitFound=True
cnx = getDBConnector()
cursor = cnx.cursor()
query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
cursor.execute(query)
for row in cursor:
logCheck=row[0]
cursor.close()
if logCheck==0:
#Write data to DB
cursor = cnx.cursor()
add_logInstance = ("INSERT INTO attackInventory "
"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
"VALUES (%s, %s, %s, %s, %s)")
logData = (ip2long(sourceIP), ip2long(data["results"][resultCount]["clientip"]), data["results"][resultCount]["_time"], data["results"][resultCount]["Raw Log"], vulnerability)
# Insert new entry
cursor.execute(add_logInstance , logData )
cnx.commit()
cursor.close()
cnx.close()
resultCount=resultCount+1
elif len(searchStringResults)==2:
searchString=entry + " AND (host=\"" + sourceHost + "\" OR host=\"" + sourceIP + "\" OR s_ip=\"" + sourceIP + "\" OR d_host=\"" + sourceHost + "\") | fields host, clientip | fields - _bkt, _cd, _indextime, _kv, _serial, _si, _sourcetype | rename _raw as \"Raw Log\""
numResults=splunk.searchVulnerability(searchString,vulnerability,sourceIP,sourceHost)
if (numResults != "0"):
data = json.load(numResults)
resultCount=0
for result in data["results"]:
isExploitFound=True
cnx = getDBConnector()
cursor = cnx.cursor()
query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_time = '" + data["results"][resultCount]["_time"] + "'")
cursor.execute(query)
for row in cursor:
logCheck=row[0]
cursor.close()
if logCheck==0:
#Write data to DB
cursor = cnx.cursor()
add_logInstance = ("INSERT INTO attackInventory "
"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
"VALUES (%s, %s, %s, %s, %s)")
logData = (ip2long(sourceIP), ip2long(data["results"][resultCount]["clientip"]), data["results"][resultCount]["_time"], data["results"][resultCount]["Raw Log"], vulnerability)
# Insert new entry
cursor.execute(add_logInstance , logData )
cnx.commit()
cursor.close()
cnx.close()
resultCount=resultCount+1
searchStringCount=searchStringCount+1
elif logsource=="elastic_search":
numResults=0
startTime="-90d"
endTime="now"
#Insert source host from arguments
entry = re.sub('\<source_host\>', sourceHost, entry)
#Insert source IP from arguments
entry = re.sub('\<source_ip\>', sourceIP, entry)
if (searchStringCount == 1):
#Insert startTime
entry = re.sub('\<startTime\>', startTime, entry)
#Insert endTime
entry = re.sub('\<endTime\>', endTime, entry)
if sourceIP == '*':
entry = re.sub('\<min_count\>', '1', entry)
else:
entry = re.sub('\<min_count\>', '2', entry)
#print entry
searchResults = ElasticSearchQuery.searchVulnerability(entry,vulnerability,sourceIP,sourceHost)
#print searchResults
numResults = getElasticSearchResults(searchResults)
#print numResults
if (operator=="AND"):
if (searchStringCount > 1):
resultCount=0
for hit in searchResults['hits']['hits']:
startTime = dateutil.parser.parse(hit["_source"]["@timestamp"]) + datetime.timedelta(days =- 1)
endTime = dateutil.parser.parse(hit["_source"]["@timestamp"]) + datetime.timedelta(days = 1)
#Insert start time
entry = re.sub('\<startTime\>', str(startTime.isoformat()), entry)
#Insert end time
entry = re.sub('\<endTime\>', str(endTime.isoformat()), entry)
newSearchResults = ElasticSearchQuery.searchVulnerability(entry,vulnerability,sourceIP,sourceHost)
newResults = getElasticSearchResults(newSearchResults)
if (newResults != "0"):
#This is the result from search 1
newResultCount=0
isExploitFound=True
for newhit in newSearchResults['hits']['hits']:
try:
attackerIP=newhit["_source"]["evt_srcip"]
except:
attackerIP="0.0.0.0"
#These are the results from any further results proving the AND condition
cnx = getDBConnector()
cursor = cnx.cursor()
#Check original log hit
query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_log = '" + newhit["_source"]["message"] + "'")
cursor.execute(query)
for row in cursor:
logCheck=row[0]
cursor.close()
if logCheck==0:
#Write data to DB
cursor = cnx.cursor()
add_logInstance = ("INSERT INTO attackInventory "
"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
"VALUES (%s, %s, %s, %s, %s)")
logData = (ip2long(sourceIP), ip2long(attackerIP),hit["_source"]["@timestamp"], hit["_source"]["message"], vulnerability)
# Insert new entry
cursor.execute(add_logInstance , logData )
cursor = cnx.cursor()
#check new log hit
query = ("SELECT count(*) as count from attackInventory where victim_ip = '" + str(ip2long(sourceIP)) + "' and threat_id = '" + vulnerability + "' and attack_log = '" + newhit["_source"]["message"] + "'")
cursor.execute(query)
for row in cursor:
logCheck=row[0]
cursor.close()
if logCheck==0:
#Write data to DB
cursor = cnx.cursor()
add_logInstance = ("INSERT INTO attackInventory "
"(victim_ip, attacker_ip, attack_time, attack_log, threat_id) "
"VALUES (%s, %s, %s, %s, %s)")
logData = (ip2long(sourceIP), ip2long(attackerIP),newhit["_source"]["@timestamp"], newhit["_source"]["message"], vulnerability)
# Insert new entry
cursor.execute(add_logInstance , logData )
cnx.commit()
cursor.close()
cnx.close()
newResultCount=newResultCount+1
else:
newResultCount=0
resultCount=newResultCount+1
elif (operator=="OR"):
if (searchStringCount == 1):
if (int(numResults) > 0):
resultCount = int(numResults)
writeElasticSearchResults(searchResults,vulnObject,sourceIP,vulnerability)
isExploitFound=True
if (searchStringCount > 1):
#Insert startTime
entry = re.sub('\<startTime\>', startTime, entry)
#Insert endTime
entry = re.sub('\<endTime\>', endTime, entry)
if sourceIP == '*':
entry = re.sub('\<min_count\>', '1', entry)
else:
entry = re.sub('\<min_count\>', '2', entry)
#only keep searching if there are more IOCS to look at...
if len(searchStringResults)>1:
searchResults = ElasticSearchQuery.searchVulnerability(entry,vulnerability,sourceIP,sourceHost)
numResults = getElasticSearchResults(searchResults)
if int(numResults) > 0:
writeElasticSearchResults(searchResults,vulnObject,sourceIP,vulnerability)
resultCount = resultCount + int(numResults)
searchStringCount=searchStringCount+1
if (isExploitFound==True):
print(" Found " + str(resultCount) + " instances of exploitation!")
print(" Generating attack logs")
#Parse through data list to get elastic timestamp for audit log times...
else:
print(" No instances of exploitation found.\n")
else:
resultCount=0
print("Invalid vulnerability ID")
return(resultCount)
0
Example 96
Project: pyfrc Source File: cli_deploy.py
def run(self, options, robot_class, **static_options):
from .. import config
config.mode = 'upload'
# run the test suite before uploading
if not options.skip_tests:
from .cli_test import PyFrcTest
tester = PyFrcTest()
retval = tester.run_test([], robot_class, options.builtin, ignore_missing_test=True)
if retval != 0:
print_err("ERROR: Your robot tests failed, aborting upload.")
if not sys.stdin.isatty():
print_err("- Use --skip-tests if you want to upload anyways")
return retval
print()
if not yesno('- Upload anyways?'):
return retval
if not yesno('- Are you sure? Your robot code may crash!'):
return retval
print()
print("WARNING: Uploading code against my better judgement...")
# upload all files in the robot.py source directory
robot_file = abspath(inspect.getfile(robot_class))
robot_path = dirname(robot_file)
robot_filename = basename(robot_file)
cfg_filename = join(robot_path, '.deploy_cfg')
if not options.nonstandard and robot_filename != 'robot.py':
print_err("ERROR: Your robot code must be in a file called robot.py (launched from %s)!" % robot_filename)
print_err()
print_err("If you really want to do this, then specify the --nonstandard argument")
return 1
# This probably should be configurable... oh well
deploy_dir = '/home/lvuser'
py_deploy_dir = '%s/py' % deploy_dir
# note below: deployed_cmd appears that it only can be a single line
# In 2015, there were stdout/stderr issues. In 2016, they seem to
# have been fixed, but need to use -u for it to really work properly
if options.debug:
deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/rpath-lib/ /usr/local/frc/bin/netconsole-host /usr/local/bin/python3 -u %s/%s -v run' % (py_deploy_dir, robot_filename)
deployed_cmd_fname = 'robotDebugCommand'
extra_cmd = 'touch /tmp/frcdebug; chown lvuser:ni /tmp/frcdebug'
else:
deployed_cmd = 'env LD_LIBRARY_PATH=/usr/local/frc/rpath-lib/ /usr/local/frc/bin/netconsole-host /usr/local/bin/python3 -u -O %s/%s run' % (py_deploy_dir, robot_filename)
deployed_cmd_fname = 'robotCommand'
extra_cmd = ''
if options.in_place:
del_cmd = ''
else:
del_cmd = "[ -d %(py_deploy_dir)s ] && rm -rf %(py_deploy_dir)s"
del_cmd %= {"py_deploy_dir": py_deploy_dir}
check_version = '/usr/local/bin/python3 -c "exec(open(\\"$SITEPACKAGES/wpilib/version.py\\", \\"r\\").read(), globals()); print(\\"WPILib version on robot is \\" + __version__);exit(0) if __version__ == \\"%s\\" else exit(89)"' % wpilib.__version__
if options.no_version_check:
check_version = ''
# This is a nasty bit of code now...
sshcmd = inspect.cleandoc("""
/bin/bash -ce '[ -x /usr/local/bin/python3 ] || exit 87
SITEPACKAGES=$(/usr/local/bin/python3 -c "import site; print(site.getsitepackages()[0])")
[ -f $SITEPACKAGES/wpilib/version.py ] || exit 88
%(check_version)s
%(del_cmd)s
echo "%(cmd)s" > %(deploy_dir)s/%(cmd_fname)s
%(extra_cmd)s'
""")
sshcmd %= {
'del_cmd': del_cmd,
'deploy_dir': deploy_dir,
'cmd': deployed_cmd,
'cmd_fname': deployed_cmd_fname,
'extra_cmd': extra_cmd,
'check_version': check_version
}
sshcmd = re.sub("\n+", ";", sshcmd)
nc_thread = None
try:
controller = installer.ssh_from_cfg(cfg_filename,
username='lvuser',
password='',
hostname=options.robot,
allow_mitm=True,
no_resolve=options.no_resolve)
# Housekeeping first
logger.debug('SSH: %s', sshcmd)
controller.ssh(sshcmd)
# Copy the files over, copy to a temporary directory first
# -> this is inefficient, but it's easier in sftp
tmp_dir = tempfile.mkdtemp()
py_tmp_dir = join(tmp_dir, 'py')
try:
self._copy_to_tmpdir(py_tmp_dir, robot_path)
controller.sftp(py_tmp_dir, deploy_dir, mkdir=not options.in_place)
finally:
shutil.rmtree(tmp_dir)
# start the netconsole listener now if requested, *before* we
# actually start the robot code, so we can see all messages
if options.nc:
from netconsole import run
nc_event = threading.Event()
nc_thread = threading.Thread(target=run,
kwargs={'init_event': nc_event},
daemon=True)
nc_thread.start()
nc_event.wait(5)
logger.info("Netconsole is listening...")
if not options.in_place:
# Restart the robot code and we're done!
sshcmd = "/bin/bash -ce '" + \
'. /etc/profile.d/natinst-path.sh; ' + \
'chown -R lvuser:ni %s; ' + \
'/usr/local/frc/bin/frcKillRobot.sh -t -r' + \
"'"
sshcmd %= (py_deploy_dir)
logger.debug('SSH: %s', sshcmd)
controller.ssh(sshcmd)
except installer.SshExecError as e:
if e.retval == 87:
print_err("ERROR: python3 was not found on the roboRIO: have you installed robotpy?")
elif e.retval == 88:
print_err("ERROR: WPILib was not found on the roboRIO: have you installed robotpy?")
elif e.retval == 89:
print_err("ERROR: expected WPILib version %s" % wpilib.__version__)
print_err()
print_err("You should either:")
print_err("- If the robot version is older, upgrade the RobotPy on your robot")
print_err("- Otherwise, upgrade pyfrc on your computer")
print_err()
print_err("Alternatively, you can specify --no-version-check to skip this check")
else:
print_err("ERROR: %s" % e)
return 1
except installer.Error as e:
print_err("ERROR: %s" % e)
return 1
else:
print("\nSUCCESS: Deploy was successful!")
if nc_thread is not None:
nc_thread.join()
return 0
0
Example 97
def __init__(self):
self.config = None
self.address_book_list = []
self.original_uid_dict = {}
self.uid_dict = {}
# set locale
locale.setlocale(locale.LC_ALL, '')
# load config file
xdg_config_home = os.environ.get("XDG_CONFIG_HOME") or \
os.path.expanduser("~/.config")
config_file = os.environ.get("KHARD_CONFIG") or \
os.path.join(xdg_config_home, "khard", "khard.conf")
if not os.path.exists(config_file):
print("Config file %s not available" % config_file)
sys.exit(2)
# parse config file contents
try:
self.config = configobj.ConfigObj(
config_file, interpolation=False)
except configobj.ParseError as err:
print("Error in config file\n%s" % err)
sys.exit(2)
# general settings
if "general" not in self.config:
print('Error in config file\n'
'Missing main section "[general]".')
sys.exit(2)
# debug
if 'debug' not in self.config['general']:
self.config['general']['debug'] = False
elif self.config['general']['debug'] == "yes":
self.config['general']['debug'] = True
elif self.config['general']['debug'] == "no":
self.config['general']['debug'] = False
else:
print("Error in config file\n"
"Invalid value for debug parameter\n"
"Possible values: yes, no")
sys.exit(2)
# editor
self.config['general']['editor'] = \
self.config['general'].get("editor") \
or os.environ.get("EDITOR")
if self.config['general']['editor'] is None:
print("Error in config file\n"
"Set path to your preferred text editor in khard's "
"config file or the $EDITOR shell variable\n"
"Example for khard.conf: editor = vim")
sys.exit(2)
self.config['general']['editor'] = find_executable(
os.path.expanduser(self.config['general']['editor']))
if self.config['general']['editor'] is None:
print("Error in config file\n"
"Invalid editor path or executable not found.")
sys.exit(2)
# merge editor
self.config['general']['merge_editor'] = \
self.config['general'].get("merge_editor") \
or os.environ.get("MERGE_EDITOR")
if self.config['general']['merge_editor'] is None:
print("Error in config file\nSet path to your preferred text "
"merge editor in khard's config file or the "
"$MERGE_EDITOR shell variable\n"
"Example for khard.conf: merge_editor = vimdiff")
sys.exit(2)
self.config['general']['merge_editor'] = find_executable(
os.path.expanduser(self.config['general']['merge_editor']))
if self.config['general']['merge_editor'] is None:
print("Error in config file\n"
"Invalid merge editor path or executable not found.")
sys.exit(2)
# default action
if "default_action" not in self.config['general']:
print("Error in config file\n"
"Missing default action parameter.")
sys.exit(2)
elif self.config['general']['default_action'] not in \
Actions.get_list_of_all_actions():
print("Error in config file\nInvalid value for default_action "
"parameter\nPossible values: %s" % ', '.join(
sorted(Actions.get_list_of_all_actions())))
sys.exit(2)
# contact table settings
if "contact table" not in self.config:
self.config['contact table'] = {}
# sort contact table by first or last name
if "sort" not in self.config['contact table']:
self.config['contact table']['sort'] = "first_name"
elif self.config['contact table']['sort'] not in \
["first_name", "last_name"]:
print("Error in config file\n"
"Invalid value for sort parameter\n"
"Possible values: first_name, last_name")
sys.exit(2)
# display names in contact table by first or last name
if "display" not in self.config['contact table']:
# if display by name attribute is not present in the config
# file use the sort attribute value for backwards compatibility
self.config['contact table']['display'] = \
self.config['contact table']['sort']
elif self.config['contact table']['display'] not in \
["first_name", "last_name"]:
print("Error in config file\n"
"Invalid value for display parameter\n"
"Possible values: first_name, last_name")
sys.exit(2)
# reverse contact table
if 'reverse' not in self.config['contact table']:
self.config['contact table']['reverse'] = False
elif self.config['contact table']['reverse'] == "yes":
self.config['contact table']['reverse'] = True
elif self.config['contact table']['reverse'] == "no":
self.config['contact table']['reverse'] = False
else:
print("Error in config file\n"
"Invalid value for reverse parameter\n"
"Possible values: yes, no")
sys.exit(2)
# group contact table by address book
if "group_by_addressbook" not in self.config['contact table']:
self.config['contact table']['group_by_addressbook'] = False
elif self.config['contact table']['group_by_addressbook'] == "yes":
self.config['contact table']['group_by_addressbook'] = True
elif self.config['contact table']['group_by_addressbook'] == "no":
self.config['contact table']['group_by_addressbook'] = False
else:
print("Error in config file\n"
"Invalid value for group_by_addressbook parameter\n"
"Possible values: yes, no")
sys.exit(2)
# nickname
if "show_nicknames" not in self.config['contact table']:
self.config['contact table']['show_nicknames'] = False
elif self.config['contact table']['show_nicknames'] == "yes":
self.config['contact table']['show_nicknames'] = True
elif self.config['contact table']['show_nicknames'] == "no":
self.config['contact table']['show_nicknames'] = False
else:
print("Error in config file\n"
"Invalid value for show_nicknames parameter\n"
"Possible values: yes, no")
sys.exit(2)
# show uids
if "show_uids" not in self.config['contact table']:
self.config['contact table']['show_uids'] = True
elif self.config['contact table']['show_uids'] == "yes":
self.config['contact table']['show_uids'] = True
elif self.config['contact table']['show_uids'] == "no":
self.config['contact table']['show_uids'] = False
else:
print("Error in config file\n"
"Invalid value for show_uids parameter\n"
"Possible values: yes, no")
sys.exit(2)
# vcard settings
if "vcard" not in self.config:
self.config['vcard'] = {}
# get supported private objects
if "private_objects" not in self.config['vcard']:
self.config['vcard']['private_objects'] = []
else:
# check if object only contains letters, digits or -
for object in self.config['vcard']['private_objects']:
if object != re.sub("[^a-zA-Z0-9-]", "", object):
print("Error in config file\n"
"private object %s may only contain letters, "
"digits and the \"-\" character." % object)
sys.exit(2)
if object == re.sub("[^-]", "", object) \
or object.startswith("-") \
or object.endswith("-"):
print("Error in config file\n"
"A \"-\" in a private object label must be "
"at least surrounded by one letter or digit.")
sys.exit(2)
# preferred vcard version
if "preferred_version" not in self.config['vcard']:
self.config['vcard']['preferred_version'] = "3.0"
elif self.config['vcard']['preferred_version'] not in \
self.get_supported_vcard_versions():
print("Error in config file\n"
"Invalid value for preferred_version parameter\n"
"Possible values: %s"
% self.get_supported_vcard_versions())
sys.exit(2)
# speed up program by pre-searching in the vcard source files
if 'search_in_source_files' not in self.config['vcard']:
self.config['vcard']['search_in_source_files'] = False
elif self.config['vcard']['search_in_source_files'] == "yes":
self.config['vcard']['search_in_source_files'] = True
elif self.config['vcard']['search_in_source_files'] == "no":
self.config['vcard']['search_in_source_files'] = False
else:
print("Error in config file\n"
"Invalid value for search_in_source_files parameter\n"
"Possible values: yes, no")
sys.exit(2)
# skip unparsable vcards
if 'skip_unparsable' not in self.config['vcard']:
self.config['vcard']['skip_unparsable'] = False
elif self.config['vcard']['skip_unparsable'] == "yes":
self.config['vcard']['skip_unparsable'] = True
elif self.config['vcard']['skip_unparsable'] == "no":
self.config['vcard']['skip_unparsable'] = False
else:
print("Error in config file\n"
"Invalid value for skip_unparsable parameter\n"
"Possible values: yes, no")
sys.exit(2)
# load address books
if "addressbooks" not in self.config:
print('Error in config file\n'
'Missing main section "[addressbooks]".')
sys.exit(2)
if len(self.config['addressbooks'].keys()) == 0:
print("Error in config file\n"
"No address book entries available.")
sys.exit(2)
for name in self.config['addressbooks'].keys():
# create address book object
try:
address_book = AddressBook(
name, self.config['addressbooks'][name]['path'])
except KeyError as e:
print("Error in config file\n"
"Missing path to the \"%s\" address book." % name)
sys.exit(2)
except IOError as e:
print("Error in config file\n%s" % e)
sys.exit(2)
else:
# add address book to list
self.address_book_list.append(address_book)
0
Example 98
Project: tahoe-lafs Source File: fixups.py
def initialize():
global done
import sys
if sys.platform != "win32" or done:
return True
done = True
import codecs, re
from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error
from ctypes.wintypes import BOOL, HANDLE, DWORD, UINT, LPWSTR, LPCWSTR, LPVOID
from allmydata.util import log
from allmydata.util.encodingutil import canonical_encoding
# <https://msdn.microsoft.com/en-us/library/ms680621%28VS.85%29.aspx>
SetErrorMode = WINFUNCTYPE(
UINT, UINT,
use_last_error=True
)(("SetErrorMode", windll.kernel32))
SEM_FAILCRITICALERRORS = 0x0001
SEM_NOOPENFILEERRORBOX = 0x8000
SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX)
original_stderr = sys.stderr
# If any exception occurs in this code, we'll probably try to print it on stderr,
# which makes for frustrating debugging if stderr is directed to our wrapper.
# So be paranoid about catching errors and reporting them to original_stderr,
# so that we can at least see them.
def _complain(message):
print >>original_stderr, isinstance(message, str) and message or repr(message)
log.msg(message, level=log.WEIRD)
# Work around <http://bugs.python.org/issue6058>.
codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
# Make Unicode console output work independently of the current code page.
# This also fixes <http://bugs.python.org/issue1602>.
# Credit to Michael Kaplan <https://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx>
# and TZOmegaTZIOY
# <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
try:
# <https://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>
# HANDLE WINAPI GetStdHandle(DWORD nStdHandle);
# returns INVALID_HANDLE_VALUE, NULL, or a valid handle
#
# <https://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>
# DWORD WINAPI GetFileType(DWORD hFile);
#
# <https://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>
# BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);
GetStdHandle = WINFUNCTYPE(
HANDLE, DWORD,
use_last_error=True
)(("GetStdHandle", windll.kernel32))
STD_OUTPUT_HANDLE = DWORD(-11)
STD_ERROR_HANDLE = DWORD(-12)
GetFileType = WINFUNCTYPE(
DWORD, DWORD,
use_last_error=True
)(("GetFileType", windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = WINFUNCTYPE(
BOOL, HANDLE, POINTER(DWORD),
use_last_error=True
)(("GetConsoleMode", windll.kernel32))
INVALID_HANDLE_VALUE = DWORD(-1).value
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
or GetConsoleMode(handle, byref(DWORD())) == 0)
old_stdout_fileno = None
old_stderr_fileno = None
if hasattr(sys.stdout, 'fileno'):
old_stdout_fileno = sys.stdout.fileno()
if hasattr(sys.stderr, 'fileno'):
old_stderr_fileno = sys.stderr.fileno()
STDOUT_FILENO = 1
STDERR_FILENO = 2
real_stdout = (old_stdout_fileno == STDOUT_FILENO)
real_stderr = (old_stderr_fileno == STDERR_FILENO)
if real_stdout:
hStdout = GetStdHandle(STD_OUTPUT_HANDLE)
if not_a_console(hStdout):
real_stdout = False
if real_stderr:
hStderr = GetStdHandle(STD_ERROR_HANDLE)
if not_a_console(hStderr):
real_stderr = False
if real_stdout or real_stderr:
# <https://msdn.microsoft.com/en-us/library/windows/desktop/ms687401%28v=vs.85%29.aspx>
# BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,
# LPDWORD lpCharsWritten, LPVOID lpReserved);
WriteConsoleW = WINFUNCTYPE(
BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID,
use_last_error=True
)(("WriteConsoleW", windll.kernel32))
class UnicodeOutput:
def __init__(self, hConsole, stream, fileno, name):
self._hConsole = hConsole
self._stream = stream
self._fileno = fileno
self.closed = False
self.softspace = False
self.mode = 'w'
self.encoding = 'utf-8'
self.name = name
if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8':
log.msg("%s: %r had encoding %r, but we're going to write UTF-8 to it" %
(name, stream, stream.encoding), level=log.CURIOUS)
self.flush()
def isatty(self):
return False
def close(self):
# don't really close the handle, that would only cause problems
self.closed = True
def fileno(self):
return self._fileno
def flush(self):
if self._hConsole is None:
try:
self._stream.flush()
except Exception, e:
_complain("%s.flush: %r from %r" % (self.name, e, self._stream))
raise
def write(self, text):
try:
if self._hConsole is None:
if isinstance(text, unicode):
text = text.encode('utf-8')
self._stream.write(text)
else:
if not isinstance(text, unicode):
text = str(text).decode('utf-8')
remaining = len(text)
while remaining > 0:
n = DWORD(0)
# There is a shorter-than-docuemented limitation on the length of the string
# passed to WriteConsoleW (see #1232).
retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)
if retval == 0:
raise IOError("WriteConsoleW failed with WinError: %s" % (WinError(get_last_error()),))
if n.value == 0:
raise IOError("WriteConsoleW returned %r, n.value = 0" % (retval,))
remaining -= n.value
if remaining == 0: break
text = text[n.value:]
except Exception, e:
_complain("%s.write: %r" % (self.name, e))
raise
def writelines(self, lines):
try:
for line in lines:
self.write(line)
except Exception, e:
_complain("%s.writelines: %r" % (self.name, e))
raise
if real_stdout:
sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')
else:
sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')
if real_stderr:
sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')
else:
sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')
except Exception, e:
_complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))
# This works around <http://bugs.python.org/issue2128>.
# <https://msdn.microsoft.com/en-us/library/windows/desktop/ms683156%28v=vs.85%29.aspx>
GetCommandLineW = WINFUNCTYPE(
LPWSTR,
use_last_error=True
)(("GetCommandLineW", windll.kernel32))
# <https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391%28v=vs.85%29.aspx>
CommandLineToArgvW = WINFUNCTYPE(
POINTER(LPWSTR), LPCWSTR, POINTER(c_int),
use_last_error=True
)(("CommandLineToArgvW", windll.shell32))
argc = c_int(0)
argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
if argv_unicode is None:
raise WinError(get_last_error())
# Because of <http://bugs.python.org/issue8775> (and similar limitations in
# twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments.
# Instead it "mangles" or escapes them using \x7F as an escape character, which we
# unescape here.
def unmangle(s):
return re.sub(ur'\x7F[0-9a-fA-F]*\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s)
try:
argv = [unmangle(argv_unicode[i]).encode('utf-8') for i in xrange(0, argc.value)]
except Exception, e:
_complain("%s: could not unmangle Unicode arguments.\n%r"
% (sys.argv[0], [argv_unicode[i] for i in xrange(0, argc.value)]))
raise
# Take only the suffix with the same number of arguments as sys.argv.
# This accounts for anything that can cause initial arguments to be stripped,
# for example, the Python interpreter or any options passed to it, or runner
# scripts such as 'coverage run'. It works even if there are no such arguments,
# as in the case of a frozen executable created by bb-freeze or similar.
sys.argv = argv[-len(sys.argv):]
if sys.argv[0].endswith('.pyscript'):
sys.argv[0] = sys.argv[0][:-9]
0
Example 99
Project: pelisalacarta Source File: aquitorrent.py
def fanart(item):
logger.info("pelisalacarta.aquitorrent fanart")
itemlist = []
url = item.url
data = scrapertools.cache_page(url)
data = re.sub(r"\n|\r|\t|\s{2}|\(.*?\)| ","",data)
title = item.extra
year=""
item.title = re.sub(r"-|\(.*?\)|\d+x\d+","",item.title)
if not "Series" in item.url:
urlyear = item.url
data = scrapertools.cache_page(urlyear)
try:
year =scrapertools.get_match(data,'<span style="text-align: justify;">.*?Año.*?(\d\d\d\d)')
except:
year = ""
try:
if "CLASICOS-DISNEY" in item.url:
title = title + " "+"Disney"
try:
###Busqueda en Tmdb la peli por titulo y año
title_tmdb = title.replace(" ","%20")
url_tmdb="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title_tmdb +"&year="+year+"&language=es&include_adult=false"
data = scrapertools.cachePage(url_tmdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
id = scrapertools.get_match(data,'"page":1.*?,"id":(.*?),')
except:
if ":" in title or "(" in title:
title_tmdb = title.replace(" ","%20")
url_tmdb="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title_tmdb +"&year="+year+"&language=es&include_adult=false"
data = scrapertools.cachePage(url_tmdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
id = scrapertools.get_match(data,'"page":1.*?,"id":(.*?),')
else:
title_tmdb = title.replace(" ","%20")
title_tmdb= re.sub(r"(:.*)|\(.*?\)","",title_tmdb)
url_tmdb="http://api.themoviedb.org/3/search/movie?api_key=2e2160006592024ba87ccdf78c28f49f&query=" + title_tmdb +"&year="+year+"&language=es&include_adult=false"
data = scrapertools.cachePage(url_tmdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
id = scrapertools.get_match(data,'"page":1.*?,"id":(.*?),')
except:
###Si no hay coincidencia realiza busqueda por bing del id Imdb
urlbing_imdb = "http://www.bing.com/search?q=%s+%s+site:imdb.com" % (title.replace(' ', '+'), year)
data = browser (urlbing_imdb)
try:
subdata_imdb = scrapertools.get_match(data,'<li class="b_algo">(.*?)h="ID')
except:
pass
try:
url_imdb = scrapertools.get_match(subdata_imdb,'<a href="([^"]+)"')
except:
pass
try:
id_imdb = scrapertools.get_match(url_imdb,'.*?www.imdb.com/.*?/(.*?)/')
except:
pass
try:
###Busca id Tmdb mediante el id de Imdb
urltmdb_remote ="https://api.themoviedb.org/3/find/"+id_imdb+"?external_source=imdb_id&api_key=2e2160006592024ba87ccdf78c28f49f&language=es&include_adult=false"
data = scrapertools.cachePage(urltmdb_remote)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
id = scrapertools.get_match(data,'"movie_results".*?,"id":(\d+)')
except:
id = ""
###Llegados aqui ya tenemos(o no) el id(Tmdb);Busca fanart_1
urltmdb_fan1 ="http://api.themoviedb.org/3/movie/"+id+"?api_key=2e2160006592024ba87ccdf78c28f49f"
data = scrapertools.cachePage( urltmdb_fan1 )
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '"adult".*?"backdrop_path":"(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data)
try:
###Prueba poster de Tmdb
posterdb = scrapertools.get_match(data,'"adult".*?"poster_path":"(.*?)"')
posterdb = "https://image.tmdb.org/t/p/original" + posterdb
except:
posterdb = item.thumbnail
if len(matches)==0:
fanart_info = item.fanart
fanart= item.fanart
fanart_2 = item.fanart
itemlist.append( Item(channel=item.channel, title =item.title, url=item.url, action="findvideos", thumbnail=posterdb, fanart=fanart ,extra= fanart_2, folder=True) )
for fan in matches:
fanart="https://image.tmdb.org/t/p/original" + fan
fanart_1= fanart
###Busca fanart para info, fanart para trailer y fanart_2(finvideos) en Tmdb
urltmdb_images ="http://api.themoviedb.org/3/movie/"+id+"/images?api_key=2e2160006592024ba87ccdf78c28f49f"
data = scrapertools.cachePage(urltmdb_images)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '"backdrops".*?"file_path":".*?",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches) == 0:
patron = '"backdrops".*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches) == 0:
fanart_info = fanart_1
fanart_trailer = fanart_1
fanart_2 = fanart_1
category =""
for fanart_info, fanart_trailer, fanart_2 in matches:
fanart_info = "https://image.tmdb.org/t/p/original" + fanart_info
fanart_trailer = "https://image.tmdb.org/t/p/original" + fanart_trailer
fanart_2 = "https://image.tmdb.org/t/p/original" + fanart_2
category = ""
if fanart_info == fanart:
###Busca fanart_info en Imdb si coincide con fanart
try:
url_imdbphoto = "http://www.imdb.com/title/"+id_imdb+"/mediaindex"
photo_imdb= scrapertools.get_match(url_imdbphoto,'<div class="media_index_thumb_list".*?src="([^"]+)"')
photo_imdb = photo_imdb.replace("@._V1_UY100_CR25,0,100,100_AL_.jpg","@._V1_SX1280_SY720_.jpg")
fanart_info = photo_imdb
except:
fanart_info = fanart_2
itemlist.append( Item(channel=item.channel, title =item.title, url=item.url, action="findvideos", thumbnail=posterdb, fanart=fanart_1 ,extra= fanart_2, folder=True) )
else:
urlyear = item.url
data = scrapertools.cache_page(urlyear)
try:
year =scrapertools.get_match(data,'<span style="text-align: justify;">.*?Año.*?(\d\d\d\d)')
except:
try:
year =scrapertools.get_match(data,'SINOPSIS.*? \((\d\d\d\d)')
except:
year = ""
#Busqueda bing de Imdb serie id
url_imdb = "http://www.bing.com/search?q=%s+%s+tv+series+site:imdb.com" % (title.replace(' ', '+'), year)
data = browser (url_imdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
try:
subdata_imdb = scrapertools.get_match(data,'<li class="b_algo">(.*?)h="ID')
except:
pass
try:
imdb_id = scrapertools.get_match(subdata_imdb,'<a href=.*?http.*?imdb.com/title/(.*?)/.*?"')
except:
imdb_id = ""
### Busca id de tvdb mediante imdb id
urltvdb_remote="http://thetvdb.com/api/GetSeriesByRemoteID.php?imdbid="+imdb_id+"&language=es"
data = scrapertools.cachePage(urltvdb_remote)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)== 0:
print "gooooooo"
###Si no hay coincidencia busca en tvdb directamente
if ":" in title or "(" in title:
title= title.replace(" ","%20")
url_tvdb="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
data = scrapertools.cachePage(url_tvdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)== 0:
title= re.sub(r"(:.*)|\(.*?\)","",title)
title= title.replace(" ","%20")
url_tvdb="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
data = scrapertools.cachePage(url_tvdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches) == 0:
plot = ""
postertvdb = item.thumbnail
extra= "http://s6.postimg.org/rv2mu3pap/bityouthsinopsis2.png"
fanart_info = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
fanart_trailer = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
category= ""
show = title+"|"+year+"|"+"http://s6.postimg.org/mh3umjzkh/bityouthnofanventanuco.jpg"
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="finvideos", thumbnail=item.thumbnail, fanart="http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg" ,extra=extra, category= category, show=show ,plot=plot, folder=True) )
else:
title= title.replace(" ","%20")
url_tvdb="http://thetvdb.com/api/GetSeries.php?seriesname=" + title + "&language=es"
data = scrapertools.cachePage(url_tvdb)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Data><Series><seriesid>([^<]+)</seriesid>.*?<Overview>(.*?)</Overview>'
print "perroooo"
print patron
matches = re.compile(patron,re.DOTALL).findall(data)
print matches
if len(matches) == 0:
plot = ""
postertvdb = item.thumbnail
extra= "http://s6.postimg.org/rv2mu3pap/bityouthsinopsis2.png"
show = title+"|"+year+"|"+"http://s6.postimg.org/mh3umjzkh/bityouthnofanventanuco.jpg"
fanart_info = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
fanart_trailer = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
category= ""
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart="http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg" ,extra=extra, category= category, show=show ,plot= plot, folder=True) )
#fanart
for id, info in matches:
category = id
plot = info
id_serie = id
url ="http://thetvdb.com/api/1D62F2F90030C444/series/"+id_serie+"/banners.xml"
data = scrapertools.cachePage(url)
data = re.sub(r"\n|\r|\t|\s{2}| ","",data)
patron = '<Banners><Banner>.*?<VignettePath>(.*?)</VignettePath>'
matches = re.compile(patron,re.DOTALL).findall(data)
try:
postertvdb = scrapertools.get_match(data,'<Banners><Banner>.*?<BannerPath>posters/(.*?)</BannerPath>')
postertvdb = "http://thetvdb.com/banners/_cache/posters/" + postertvdb
except:
postertvdb = item.thumbnail
if len(matches)==0:
extra="http://s6.postimg.org/rv2mu3pap/bityouthsinopsis2.png"
show = title+"|"+year+"|"+"http://s6.postimg.org/mh3umjzkh/bityouthnofanventanuco.jpg"
fanart_info = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
fanart_trailer = "http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg"
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=postertvdb, fanart="http://s6.postimg.org/6ucl96lsh/bityouthnofan.jpg" ,category = category, extra=extra, show=show,folder=True) )
for fan in matches:
fanart="http://thetvdb.com/banners/" + fan
fanart_1= fanart
patron= '<Banners><Banner>.*?<BannerPath>.*?</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>.*?</Banner><Banner>.*?<BannerPath>(.*?)</BannerPath>'
matches = re.compile(patron,re.DOTALL).findall(data)
if len(matches)==0:
fanart_info= fanart_1
fanart_trailer = fanart_1
fanart_2 = fanart_1
show = title+"|"+year+"|"+fanart_1
extra=postertvdb
itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=postertvdb, fanart=fanart_1 ,category = category, extra=extra, show=show,folder=True) )
for fanart_info, fanart_trailer, fanart_2 in matches:
fanart_info = "http://thetvdb.com/banners/" + fanart_info
fanart_trailer = "http://thetvdb.com/banners/" + fanart_trailer
fanart_2 = "http://thetvdb.com/banners/" + fanart_2
itemlist.append( Item(channel=item.channel, title =item.title, url=item.url, action="findvideos", thumbnail=postertvdb, fanart=fanart_1 , extra= fanart_2,folder=True) )
title ="Info"
title = title.replace(title,"[COLOR skyblue][B]"+title+"[/B][/COLOR]")
if "Series" in item.url:
thumbnail = postertvdb
else:
thumbnail = posterdb
itemlist.append( Item(channel=item.channel, action="info" , title=title , url=item.url, thumbnail=thumbnail, fanart=fanart_info , folder=False ))
return itemlist
0
Example 100
def extractInfo(self):
#################/
# name - Naming table
#################/
self.sFamilyClass = 0
self.sFamilySubClass = 0
name_offset = self.seek_table("name")
format = self.read_ushort()
if format != 0:
die("Unknown name table format " + format)
numRecords = self.read_ushort()
string_data_offset = name_offset + self.read_ushort()
names = {1: '', 2: '', 3: '', 4: '', 6: ''}
K = list(names.keys())
nameCount = len(names)
for i in range(numRecords):
platformId = self.read_ushort()
encodingId = self.read_ushort()
languageId = self.read_ushort()
nameId = self.read_ushort()
length = self.read_ushort()
offset = self.read_ushort()
if (nameId not in K):
continue
N = ''
if (platformId == 3 and encodingId == 1 and languageId == 0x409): # Microsoft, Unicode, US English, PS Name
opos = self._pos
self.seek(string_data_offset + offset)
if (length % 2 != 0):
die("PostScript name is UTF-16BE string of odd length")
length /= 2
N = ''
while (length > 0):
char = self.read_ushort()
N += (chr(char))
length -= 1
self._pos = opos
self.seek(opos)
elif (platformId == 1 and encodingId == 0 and languageId == 0): # Macintosh, Roman, English, PS Name
opos = self._pos
N = self.get_chunk(string_data_offset + offset, length)
self._pos = opos
self.seek(opos)
if (N and names[nameId] == ''):
names[nameId] = N
nameCount -= 1
if (nameCount == 0):
break
if names[6]:
psName = names[6]
elif names[4]:
psName = re.sub(' ', '-', names[4])
elif names[1]:
psName = re.sub(' ', '-', names[1])
else:
psName = ''
if not psName:
die("Could not find PostScript font name")
self.name = psName
if names[1]:
self.familyName = names[1]
else:
self.familyName = psName
if names[2]:
self.styleName = names[2]
else:
self.styleName = 'Regular'
if names[4]:
self.fullName = names[4]
else:
self.fullName = psName
if names[3]:
self.uniqueFontID = names[3]
else:
self.uniqueFontID = psName
if names[6]:
self.fullName = names[6]
#################/
# head - Font header table
#################/
self.seek_table("head")
self.skip(18)
self.unitsPerEm = unitsPerEm = self.read_ushort()
scale = 1000 / float(unitsPerEm)
self.skip(16)
xMin = self.read_short()
yMin = self.read_short()
xMax = self.read_short()
yMax = self.read_short()
self.bbox = [(xMin * scale), (yMin * scale),
(xMax * scale), (yMax * scale)]
self.skip(3 * 2)
indexToLocFormat = self.read_ushort()
glyphDataFormat = self.read_ushort()
if glyphDataFormat != 0:
die('Unknown glyph data format ' + glyphDataFormat)
#################/
# hhea metrics table
#################/
# ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
if "hhea" in self.tables:
self.seek_table("hhea")
self.skip(4)
hheaAscender = self.read_short()
hheaDescender = self.read_short()
self.ascent = hheaAscender * scale
self.descent = hheaDescender * scale
#################/
# OS/2 - OS/2 and Windows metrics table
#################/
if "OS/2" in self.tables:
self.seek_table("OS/2")
version = self.read_ushort()
self.skip(2)
usWeightClass = self.read_ushort()
self.skip(2)
fsType = self.read_ushort()
if fsType == 0x0002 or (fsType & 0x0300) != 0:
die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
self.restrictedUse = True
self.skip(20)
sF = self.read_short()
self.sFamilyClass = (sF >> 8)
self.sFamilySubClass = (sF & 0xFF)
self._pos += 10 # PANOSE = 10 byte length
panose = self.fh.read(10)
self.skip(26)
sTypoAscender = self.read_short()
sTypoDescender = self.read_short()
if not self.ascent:
self.ascent = (sTypoAscender * scale)
if not self.descent:
self.descent = (sTypoDescender * scale)
if version > 1:
self.skip(16)
sCapHeight = self.read_short()
self.capHeight = (sCapHeight * scale)
else:
self.capHeight = self.ascent
else:
usWeightClass = 500
if not self.ascent:
self.ascent = (yMax * scale)
if not self.descent:
self.descent = (yMin * scale)
self.capHeight = self.ascent
self.stemV = 50 + int(pow((usWeightClass / 65.0), 2))
#################/
# post - PostScript table
#################/
self.seek_table("post")
self.skip(4)
self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
self.underlinePosition = self.read_short() * scale
self.underlineThickness = self.read_short() * scale
isFixedPitch = self.read_ulong()
self.flags = 4
if self.italicAngle != 0:
self.flags = self.flags | 64
if usWeightClass >= 600:
self.flags = self.flags | 262144
if isFixedPitch:
self.flags = self.flags | 1
#################/
# hhea - Horizontal header table
#################/
self.seek_table("hhea")
self.skip(32)
metricDataFormat = self.read_ushort()
if (metricDataFormat != 0):
die('Unknown horizontal metric data format '.metricDataFormat)
numberOfHMetrics = self.read_ushort()
if (numberOfHMetrics == 0):
die('Number of horizontal metrics is 0')
#################/
# maxp - Maximum profile table
#################/
self.seek_table("maxp")
self.skip(4)
numGlyphs = self.read_ushort()
#################/
# cmap - Character to glyph index mapping table
#################/
cmap_offset = self.seek_table("cmap")
self.skip(2)
cmapTableCount = self.read_ushort()
unicode_cmap_offset = 0
for i in range(cmapTableCount):
platformID = self.read_ushort()
encodingID = self.read_ushort()
offset = self.read_ulong()
save_pos = self._pos
if (platformID == 3 and encodingID == 1) or platformID == 0: # Microsoft, Unicode
format_ = self.get_ushort(cmap_offset + offset)
print('Format is %s' % format_)
if format_ == 4:
if not unicode_cmap_offset:
unicode_cmap_offset = cmap_offset + offset
break
self.seek(save_pos)
if not unicode_cmap_offset:
die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 0, any encoding, format 4)')
glyphToChar = {}
charToGlyph = {}
self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)
#################/
# hmtx - Horizontal metrics table
#################/
self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)