re.sub

Here are the examples of the python api re.sub taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 101

View license
def str_parser(s, automatic_spell_check_dict={}, remove_from_brackets=False,parse_material=False,add_space_stop_list=[]):
    #the following three replacements are shared on the forum    
    s = s.replace("craftsm,an","craftsman")        
    s = re.sub(r'depot.com/search=', '', s)
    s = re.sub(r'pilers,needlenose', 'pliers, needle nose', s)
    
    s = re.sub(r'\bmr.', 'mr ', s)
    s = re.sub(r'&', '&', s)
    s = re.sub(' ', '', s)
    s = re.sub(''', '', s)
    s = re.sub(r'(?<=[0-9]),[\ ]*(?=[0-9])', '', s)
    s = s.replace(";",".")
    s = s.replace(",",".")
    s = s.replace(":",". ")
    s = s.replace("+"," ")
    s = re.sub(r'\bU.S.', 'US ', s)
    s = s.replace(" W x "," ")
    s = s.replace(" H x "," ")
    s = re.sub(' [\#]\d+[\-\d]*[\,]*', '', s)    
    s = re.sub('(?<=[0-9\%])(?=[A-Z][a-z])', '. ', s) # add dot between number and cap letter
    s = re.sub(r'(?<=\))(?=[a-zA-Z0-9])', ' ', s) # add space between parentheses and letters
    s = re.sub(r'(?<=[a-zA-Z0-9])(?=\()', ' ', s) # add space between parentheses and letters

    if parse_material:
        replace_dict={'Medium Density Fiberboard (MDF)':'mdf', 'High Density Fiberboard (HDF)':'hdf',\
        'Fibre Reinforced Polymer (FRP)': 'frp', 'Acrylonitrile Butadiene Styrene (ABS)': 'abs',\
        'Cross-Linked Polyethylene (PEX)':'pex', 'Chlorinated Poly Vinyl Chloride (CPVC)': 'cpvc',\
        'PVC (vinyl)': 'pvc','Thermoplastic rubber (TPR)':'tpr','Poly Lactic Acid (PLA)': 'pla',\
        '100% Polyester':'polyester','100% UV Olefin':'olefin', '100% BCF Polypropylene': 'polypropylene',\
        '100% PVC':'pvc'}
        
        if s in replace_dict.keys():
            s=replace_dict[s]


    s = re.sub('[^a-zA-Z0-9\n\ \%\$\-\#\@\&\/\.\'\*\(\)]', ' ', s)
    s= " ".join(s.split())

    s=s.replace("-"," ")
    
    if len(add_space_stop_list)>0:
        s = " ".join([re.sub('(?<=[a-z])(?=[A-Z][a-z\ ])', '. ', word)  if word.lower() not in add_space_stop_list else word for word in s.split()])

    s=s.lower() 
    s = re.sub('\.(?=[a-z])', '. ', s) #dots before words -> replace with spaces
   # s = re.sub('(?<=[a-z])(?=[A-Z][a-z\ ])', ' ', s) # add space if uppercase after lowercase
    s = re.sub('(?<=[a-z][a-z][a-z])(?=[0-9])', ' ', s) # add cpase if number after at least three letters
    ##s = re.sub('(?<=[a-zA-Z])\.(?=\ |$)', '', s) #remove dots at the end of string
    #s = re.sub('(?<=[0-9])\.(?=\ |$)', '', s) # dot after digit before space
    s = re.sub('^\.\ ', '', s) #dot at the beginning before space
    

    if len(automatic_spell_check_dict.keys())>0:
        s=spell_correction(s,automatic_spell_check_dict=automatic_spell_check_dict)
    
    if remove_from_brackets==True:
        s = re.sub('(?<=\()[a-zA-Z0-9\n\ \%\$\-\#\@\&\/\.\'\*\(\)]*(?=\))', '', s)
    else:
        s=s.replace(" (",". ")
        s=re.sub('(?<=[a-zA-Z0-9\%\$])\(', '. ', s)
        s=s.replace(" )",". ")
        s=s.replace(")",". ")
        s=s.replace("  "," ")
        s = re.sub('\ \.', '\.', s)
        

    #######s = re.sub('(?<=[0-9\%])(?=[a-wyz])', ' ', s) # add space between number and text (except letter x) 
    #s = re.sub('(?<=[a-zA-Z])-(?=[a-zA-Z])', ' ', s) # replace '-' in words with space
    s=s.replace("at&t","att")
    s=s.replace("&"," and ")    
    s=s.replace("*"," x ")
    s = re.sub('(?<=[a-z\ ])\/(?=[a-z\ ])', ' ', s) # replace "/" between words with space
    s = re.sub('(?<=[a-z])\\\\(?=[a-z])', ' ', s) # replace "/" between words with space
    s=s.replace("  "," ")
    s=s.replace("  "," ")
    
    #s=re.sub('(?<=\ [a-ux-z])\ (?=[0-9])', '', s)   #remove spaces
    #s=re.sub('(?<=^[a-z])\ (?=[0-9])', '', s)   #remove spaces




    #####################################
    ### thesaurus replacement in all vars
    s=replace_in_parser(s)
    
    s = re.sub('half(?=\ inch)', '1/2', s)
    s = re.sub('\ba half\b', '1/2', s)
    #s = re.sub('half\ ', 'half-', s)

    s = re.sub(r'(?<=\')s\b', '', s)
    s = re.sub('(?<=[0-9])\'\'', ' in ', s)
    s = re.sub('(?<=[0-9])\'', ' in ', s)

    s = re.sub(r'(?<=[0-9])[\ ]*inch[es]*\b', '-in ', s)
    s = re.sub(r'(?<=[0-9])[\ ]*in\b', '-in ', s)
    
    s = re.sub(r'(?<=[0-9])[\-|\ ]*feet[s]*\b', '-ft ', s)
    s = re.sub(r'(?<=[0-9])[\ ]*foot[s]*\b', '-ft ', s)
    s = re.sub(r'(?<=[0-9])[\ ]*ft[x]*\b', '-ft ', s)
    
    s = re.sub('(?<=[0-9])[\ ]*volt[s]*(?=\ |$|\.)', '-V ', s)
    s = re.sub('(?<=[0-9])[\ ]*v(?=\ |$|\.)', '-V ', s)
    
    s = re.sub('(?<=[0-9])[\ ]*wat[t]*[s]*(?=\ |$|\.)', '-W ', s)
    s = re.sub('(?<=[0-9])[\ ]*w(?=\ |$|\.)', '-W ', s)
    
    s = re.sub('(?<=[0-9])[\ ]*kilo[\ ]*watt[s]*(?=\ |$|\.)', '-KW ', s)
    s = re.sub('(?<=[0-9])[\ ]*kw(?=\ |$|\.)', '-KW ', s)
    
    s = re.sub('(?<=[0-9])[\ ]*amp[s]*(?=\ |$|\.)', '-A ', s)
    #s = re.sub('(?<=[0-9]) a(?=\ |$|\.)', '-A. ', s)
    s = re.sub('(?<=[0-9])a(?=\ |$|\.)', '-A ', s)

    s = re.sub('(?<=[0-9])[\ ]*gallon[s]*(?=\ |$|\.)', '-gal ', s)
    s = re.sub('(?<=[0-9])[\ ]*gal(?=\ |$|\.)', '-gal ', s)
        
    s = re.sub('(?<=[0-9])[\ ]*pound[s]*(?=\ |$|\.)', '-lb ', s)
    s = re.sub('(?<=[0-9])[\ ]*lb[s]*(?=\ |$|\.)', '-lb ', s)
        
    s = re.sub('(?<=[0-9])[\ ]*mi[l]+imet[er]*[s]*(?=\ |$|\.)', '-mm ', s)
    s = re.sub('(?<=[0-9])[\ ]*mm(?=\ |$|\.)', '-mm ', s)
        
    s = re.sub('(?<=[0-9])[\ ]*centimeter[s]*(?=\ |$|\.)', '-cm ', s)
    s = re.sub('(?<=[0-9])[\ ]*cm(?=\ |$|\.)', '-cm ', s)
        
    s = re.sub('(?<=[0-9])[\ ]*ounce[s]*(?=\ |$|\.)', '-oz ', s)
    s = re.sub('(?<=[0-9])[\ ]*oz(?=\ |$|\.)', '-oz ', s)
    
    s = re.sub('(?<=[0-9])[\ ]*liter[s]*(?=\ |$|\.)', '-L ', s)
    s = re.sub('(?<=[0-9])[\ ]*litre[s]*(?=\ |$|\.)', '-L ', s)
    s = re.sub('(?<=[0-9])[\ ]*l(?=\ |$|\.)', '-L. ', s)
    
    s = re.sub('(?<=[0-9])[\ ]*square feet[s]*(?=\ |$|\.)', '-sqft ', s)
    s = re.sub('(?<=[0-9])square feet[s]*(?=\ |$|\.)', '-sqft ', s)
    s = re.sub('(?<=[0-9])[\ ]*sq[\ |\.|\.\ ]*ft(?=\ |$|\.)', '-sqft ', s)
    s = re.sub('(?<=[0-9])[\ ]*sq. ft(?=\ |$|\.)', '-sqft', s)
    s = re.sub('(?<=[0-9])[\ ]*sq.ft(?=\ |$|\.)', '-sqft', s)
    
    s = re.sub('(?<=[0-9])[\ ]*cubic f[e]*t[s]*(?=\ |$|\.)', '-cuft ', s)
    s = re.sub('(?<=[0-9])[\ ]*cu[\ |\.|\.\ ]*ft(?=\ |$|\.)', '-cuft ', s)
    s = re.sub('(?<=[0-9])[\ ]*cu[\.]*[\ ]*ft(?=\ |$|\.)', '-cuft', s)
    
     
    #remove 'x'
    s = re.sub('(?<=[0-9]) x (?=[0-9])', '-X ', s)
    s = re.sub('(?<=[0-9])x (?=[0-9])', '-X ', s)
    s = re.sub('(?<=[0-9]) x(?=[0-9])', '-X ', s)
    s = re.sub('(?<=[0-9])x(?=[0-9])', '-X ', s)
    
    #s=s.replace("..",".")
    s=s.replace("\n"," ")
    s=s.replace("  "," ")

    words=s.split()

    if s.find("-X")>=0:
        for cnt in range(0,len(words)-1):
            if words[cnt].find("-X")>=0:
                if words[cnt+1].find("-X") and cnt<len(words)-2:
                    cntAdd=2
                else:
                    cntAdd=1
                to_replace=re.search(r'(?<=[0-9]\-)\w+\b',words[cnt+cntAdd])
                if not (to_replace==None):
                    words[cnt]=words[cnt].replace("-X","-"+to_replace.group(0)+"")
                else:
                    words[cnt]=words[cnt].replace("-X","x")
    s = " ".join([word for word in words])
    
    s = re.sub('[^a-zA-Z0-9\ \%\$\-\@\&\/\.]', '', s) #remove "'" and "\n" and "#" and characters
    ##s = re.sub('(?<=[a-zA-Z])[\.|\/](?=\ |$)', '', s) #remove dots at the end of string
    s = re.sub('(?<=[0-9])x(?=\ |$)', '', s) #remove 
    s = re.sub('(?<=[\ ])x(?=[0-9])', '', s) #remove
    s = re.sub('(?<=^)x(?=[0-9])', '', s)
    #s = re.sub('[\ ]\.(?=\ |$)', '', s) #remove dots 
    s=s.replace("  "," ")
    s=s.replace("..",".")
    s = re.sub('\ \.', '', s)
    
    s=re.sub('(?<=\ [ch-hj-np-su-z][a-z])\ (?=[0-9])', '', s) #remove spaces
    s=re.sub('(?<=^[ch-hj-np-su-z][a-z])\ (?=[0-9])', '', s) #remove spaces
    
    s = re.sub('(?<=\ )\.(?=[0-9])', '0.', s)
    s = re.sub('(?<=^)\.(?=[0-9])', '0.', s)
    return " ".join([word for word in s.split()])

Example 102

Project: txtorg
Source File: nielsenstemmer.py
View license
def removeStopWords (texts):

    # Split up the words...
    texts_split = texts.split(" ")

    preps = [
        u'\u0641\u064a',  #fy
        u'\u0641\u064a\u0647',  #fyh
        u'\u0641\u064a\u0647\u0627',  #fyha
        u'\u0641\u064a\u0647\u0645',  #fyhm
        u'\u0639\u0644\u0649',  #3lA
        u'\u0639\u0644\u064a\u0643',  #3lyk
        u'\u0639\u0644\u064a\u0647',  #3lyh
        u'\u0639\u0644\u064a\u0647\u0627',  #3lyha
        u'\u0639\u0644\u064a\u0647\u0645',  #3lyhm
        u'\u0639\u0644\u064a',  #3ly
        u'\u0628\u0647',  #bh
        u'\u0628\u0647\u0627',  #bha
        u'\u0628\u0647\u0645',  #bhm
        u'\u0644\u0647',  #lh
        u'\u0644\u0647\u0627',  #lha
        u'\u0644\u0647\u0645',  #lhm
        u'\u0645\u0639',  #m3
        u'\u0645\u0639\u0647',  #m3h
        u'\u0645\u0639\u0647\u0627',  #m3ha
        u'\u0645\u0639\u0647\u0645',  #m3hm
        u'\u0639\u0646',  #3n
        u'\u0639\u0646\u0647',  #3nh
        u'\u0639\u0646\u0647\u0627',  #3nha
        u'\u0639\u0646\u0647\u0645',  #3nhm
        u'\u062a\u062d\u062a',  #t7t
        u'\u062d\u062a\u0649',  #7tA
        u'\u0641\u0648\u0642',  #fwQ
        u'\u0641\u0648\u0642\u064e',  #fwQ?
        u'\u0628\u062c\u0627\u0646\u0628',  #bjanb
        u'\u0623\u0645\u0627\u0645',  #amam
        u'\u0623\u0645\u0627\u0645\u064e',  #amam?
        u'\u0627\u0645\u0627\u0645',  #amam
        u'\u062e\u0627\u0631\u062c',  #Karj
        u'\u0628\u0627\u0644\u062e\u0627\u0631\u062c',  #balKarj
        u'\u062d\u0648\u0644\u064e',  #7wl?
        u'\u062d\u0648\u0644',  #7wl
        u'\u0631\u063a\u0645',  #rGm
        u'\u0628\u0627\u0644\u0631\u063a\u0645',  #balrGm
        u'\u0631\u063a\u0645\u064e',  #rGm?
        u'\u0645\u0646\u0630',  #mni
        u'\u0645\u0646\u0630\u064f',  #mni?
        u'\u0645\u0646',  #mn
        u'\u062e\u0644\u0627\u0644',  #Klal
        u'\u062e\u0644\u0627\u0644\u064e',  #Klal?
        u'\u062d\u0648\u0644',  #7wl
        u'\u062d\u0648\u0644\u064e',  #7wl?
        u'\u0642\u0628\u0644',  #Qbl
        u'\u0642\u0628\u0644\u064e',  #Qbl?
        u'\u0648\u0641\u0642\u0627',  #wfQa
        u'\u0625\u0644\u0649',  #alA
        u'\u0627\u0644\u0649\u0648\u0631\u0627\u0621\u064e',  #alAwraq?
        u'\u0648\u0631\u0627\u0621',  #wraq
        u'\u0628\u064a\u0646\u064e',  #byn?
        u'\u0628\u064a\u0646',  #byn
        u'\u0628\u062f\u0648\u0646',  #bdwn
        u'\u0644\u0643\u0646',  #lkn
        u'\u0628\u0627\u062a\u062c\u0627\u0647',  #batjah
        u'\u0623\u0642\u0644',  #aQl
        u'\u0627\u0642\u0644',  #aQl
        u'\u0627\u0643\u062b\u0631'  #akUr
        ]  

    pronouns = [
        u'\u0647\u0630\u0627',  #hia
        u'\u0647\u0630\u0647',  #hih
        u'\u0630\u0644\u0643',  #ilk
        u'\u062a\u0644\u0643',  #tlk
        u'\u0647\u0624\u0644\u064e\u0627\u0621',  #hol?aq
        u'\u0647\u0624\u0644\u0627\u0621',  #holaq
        u'\u0627\u0648\u0644\u0627\u0626\u0643',  #awla5k
        u'\u0647\u0630\u0627\u0646',  #hian
        u'\u0647\u0630\u064a\u0646\u0647\u062a\u0627\u0646',  #hiynhtan
        u'\u0647\u062a\u064a\u0646\u0623\u0646\u0627',  #htynana
        u'\u0627\u0646\u0627',  #ana
        u'\u0623\u0646\u062a',  #ant
        u'\u0647\u0645\u0627',  #hma
        u'\u0623\u0646\u062a\u064e',  #ant?
        u'\u0627\u0646\u062a',  #ant
        u'\u0623\u0646\u062a',  #ant
        u'\u0623\u0646\u062a\u0650',  #ant?
        u'\u0627\u0646\u062a\u0647\u0648',  #anthw
        u'\u0647\u0648\u064e',  #hw?
        u'\u0647\u0648',  #hw
        u'\u0647\u064a',  #hy
        u'\u0647\u064a\u064e',  #hy?
        u'\u0646\u062d\u0646',  #n7n
        u'\u0623\u0646\u062a\u0645',  #antm
        u'\u0627\u0646\u062a\u0645',  #antm
        u'\u0623\u0646\u062a\u0645',  #antm
        u'\u0627\u0646\u062a\u0645',  #antm
        u'\u0647\u064f\u0645',  #h?m
        u'\u0647\u0645',  #hm
        u'\u0644\u0647\u0645',  #lhm
        u'\u0645\u0646\u0647\u0645',  #mnhm
        u'\u0648\u0647\u0645',  #whm
        u'\u0627\u0644\u062a\u064a',  #alty
        u'\u0627\u0644\u0630\u064a',  #aliy
        u'\u0627\u0644\u0644\u0630\u0627\u0646',  #allian
        u'\u0627\u0644\u0644\u0630\u064a\u0646',  #alliyn
        u'\u0627\u0644\u0644\u062a\u0627\u0646',  #alltan
        u'\u0627\u0644\u0644\u062a\u064a\u0646'  #alltyn
        ]

    particles = [
        u'\u0627\u0646',  #an
        u'\u0648\u0627\u0646',  #wan
        u'\u0625\u0646',  #an
        u'\u0625\u0646\u0647',  #anh
        u'\u0625\u0646\u0647\u0627',  #anha
        u'\u0625\u0646\u0647\u0645',  #anhm
        u'\u0625\u0646\u0647\u0645\u0627',  #anhma
        u'\u0625\u0646\u064a',  #any
        u'\u0648\u0625\u0646',  #wan
        u'\u0648\u0623\u0646',  #wan
        u'\u0627\u0646',  #an
        u'\u0627\u0646\u0647',  #anh
        u'\u0627\u0646\u0647\u0627',  #anha
        u'\u0627\u0646\u0647\u0645',  #anhm
        u'\u0627\u0646\u0647\u0645\u0627',  #anhma
        u'\u0627\u0646\u064a',  #any
        u'\u0648\u0627\u0646',  #wan
        u'\u0648\u0627\u0646',  #wan
        u'\u0623\u0646',  #an
        u'\u0627\u0646',  #an
        u'\u0623\u0644\u0627',  #ala
        u'\u0628\u0623\u0646',  #ban
        u'\u0627\u0646',  #an
        u'\u0627\u0644\u0627',  #ala
        u'\u0628\u0627\u0646',  #ban
        u'\u0623\u0646\u0647',  #anh
        u'\u0623\u0646\u0647\u0627',  #anha
        u'\u0623\u0646\u0647\u0645',  #anhm
        u'\u0623\u0646\u0647\u0645\u0627',  #anhma
        u'\u0627\u0646\u0647',  #anh
        u'\u0627\u0646\u0647\u0627',  #anha
        u'\u0627\u0646\u0647\u0645',  #anhm
        u'\u0627\u0646\u0647\u0645\u0627',  #anhma
        u'\u0623\u0630',  #ai
        u'\u0627\u0630',  #ai
        u'\u0627\u0630\u0627',  #aia
        u'\u0625\u0630',  #ai
        u'\u0625\u0630\u0627',  #aia
        u'\u0648\u0625\u0630',  #wai
        u'\u0648\u0625\u0630\u0627',  #waia
        u'\u0627\u0630',  #ai
        u'\u0627\u0630',  #ai
        u'\u0627\u0630\u0627',  #aia
        u'\u0627\u0630',  #ai
        u'\u0627\u0630\u0627',  #aia
        u'\u0641\u0627\u0630\u0627',  #faia
        u'\u0645\u0627\u0630\u0627',  #maia
        u'\u0648\u0627\u0630',  #wai
        u'\u0648\u0627\u0630\u0627',  #waia
        u'\u0644\u0648\u0644\u0627',  #lwla
        u'\u0644\u0648',  #lw
        u'\u0648\u0644\u0648\u0633\u0648\u0641',  #wlwswf
        u'\u0644\u0646',  #ln
        u'\u0645\u0627',  #ma
        u'\u0644\u0645',  #lm
        u'\u0648\u0644\u0645',  #wlm
        u'\u0623\u0645\u0627',  #ama
        u'\u0627\u0645\u0627',  #ama
        u'\u0644\u0627',  #la
        u'\u0625\u0644\u0627',  #ala
        u'\u0627\u0644\u0627',  #ala
        u'\u0623\u0645',  #am
        u'\u0623\u0648',  #aw
        u'\u0627\u0645',  #am
        u'\u0627\u0648',  #aw
        u'\u0628\u0644',  #bl
        u'\u0623\u0646\u0645\u0627',  #anma
        u'\u0625\u0646\u0645\u0627',  #anma
        u'\u0628\u0644',  #bl
        u'\u0627\u0646\u0645\u0627',  #anma
        u'\u0627\u0646\u0645\u0627',  #anma
        u'\u0648'  #w
        ]

    # Connectors
    connectors = [u'\u0628\u0645\u0627',  #bma
        u'\u0643\u0645\u0627',  #kma
        u'\u0644\u0645\u0627',  #lma
        u'\u0644\u0623\u0646',  #lan
        u'\u0644\u064a', #ly
        u'\u0644\u0649', #ly
        u'\u0644\u0623\u0646\u0647',  #lanh
        u'\u0644\u0623\u0646\u0647\u0627',  #lanha
        u'\u0644\u0623\u0646\u0647\u0645',  #lanhm
        u'\u0644\u0627\u0646',  #lan
        u'\u0644\u0627\u0646\u0647',  #lanh
        u'\u0644\u0627\u0646\u0647\u0627',  #lanha
        u'\u0644\u0627\u0646\u0647\u0645',  #lanhm
        u'\u062b\u0645',  #Um
        u'\u0623\u064a\u0636\u0627',  #ayDa
        u'\u0627\u064a\u0636\u0627',  #ayDa
        u'\u0643\u0630\u0644\u0643',  #kilk
        u'\u0642\u0628\u0644',  #Qbl
        u'\u0628\u0639\u062f',  #b3d
        u'\u0644\u0643\u0646',  #lkn
        u'\u0648\u0644\u0643\u0646',  #wlkn
        u'\u0644\u0643\u0646\u0647',  #lknh
        u'\u0644\u0643\u0646\u0647\u0627',  #lknha
        u'\u0644\u0643\u0646\u0647\u0645',  #lknhm
        u'\u0641\u0642\u0637',  #fQT
        u'\u0631\u063a\u0645',  #rGm
        u'\u0628\u0627\u0644\u0631\u063a\u0645',  #balrGm
        u'\u0628\u0641\u0636\u0644',  #bfDl
        u'\u062d\u064a\u062b',  #7yU
        u'\u0628\u062d\u064a\u062b',  #b7yU
        u'\u0644\u0643\u064a',  #lky
        u'\u0647\u0646\u0627',  #hna
        u'\u0647\u0646\u0627\u0643',  #hnak
        u'\u0628\u0633\u0628\u0628',  #bsbb
        u'\u0630\u0627\u062a',  #iat
        u'\u0630\u0648',  #iw
        u'\u0630\u064a',  #iy
        u'\u0630\u0649',  #iy
        u'\u0648\u0647', #wh
        u'\u064a\u0627',  #ya
        u'\u0627\u0646\u0645\u0627',  #anma
        u'\u0641\u0647\u0630\u0627',  #fhia
        u'\u0641\u0647\u0648',  #fhw
        u'\u0641\u0645\u0627',  #fma
        u'\u0641\u0645\u0646',  #fmn
        u'\u0641\u064a\u0645\u0627', #fyma
        u'\u0641\u0647\u0644',  #fhl
        u'\u0648\u0647\u0644',  #fhl
        u'\u0641\u0647\u0624\u0644\u0627\u0621',  #fholaq
        u'\u0643\u0630\u0627', #kia
        u'\u0644\u0630\u0644\u0643', #lilk
        u'\u0644\u0645\u0627\u0630\u0627', #lmaia
        u'\u0644\u0645\u0646', #lmn
        u'\u0644\u0646\u0627',  #lna
        u'\u0645\u0646\u0627',  #mna
        u'\u0645\u0646\u0643',  #mnk
        u'\u0645\u0646\u0643\u0645',  #mnkm
        u'\u0645\u0646\u0647\u0645\u0627',  #mnhma
        u'\u0644\u0643', #lk
        u'\u0648\u0644\u0648', #wlw
        u'\u0645\u0645\u0627', #mma
        u'\u0639\u0646\u062f',  #3nd
        u'\u0639\u0646\u062f\u0647\u0645',  #3ndhm
        u'\u0639\u0646\u062f\u0645\u0627',  #3ndma
        u'\u0639\u0646\u062f\u0646\u0627',  #3ndna
        u'\u0639\u0646\u0647\u0645\u0627',  #3nhma
        u'\u0639\u0646\u0643',  #3nk
        u'\u0627\u0630\u0646',  #ain
        u'\u0627\u0644\u0630\u064a',  #aliy
        u'\u0641\u0627\u0646\u0627',  #fana
        u'\u0641\u0627\u0646\u0647\u0645',  #fanhm
        u'\u0641\u0647\u0645',  #fhm
        u'\u0641\u0647',  #fh
        u'\u0641\u0643\u0644',  #fkl
        u'\u0644\u0643\u0644',  #lkl
        u'\u0644\u0643\u0645',  #lkm
        u'\u0641\u0644\u0645',  #flm
        u'\u0641\u0644\u0645\u0627',  #flma
        u'\u0641\u064a\u0643',  #fyk
        u'\u0641\u064a\u0643\u0645',  #fykm
        u'\u0644\u0647\u0630\u0627'    # lhia
        ]

    all = preps+pronouns+particles+connectors
    all = ' '.join(all)
    all = all+ ' ' + fixAlifs(all)
    all = list(set(all.split(' ')))
    
    for i in range(len(texts_split)):
        word = texts_split[i] 
        if word in all :
            texts_split[i] = ''

    # Rejoining the texts again...
    texts = ''.join([word + " "  for word in texts_split])
    # split to get rid of white space
    #texts_split = new_texts.split()
    # then re-rejoin them.
    #out_texts = ''.join([word + " "  for word in texts_split])
    ## clean up spaces
    return(re.sub(' {2,}', ' ', texts))

Example 103

Project: poio-api
Source File: toolbox.py
View license
    def _build_annotations(self):
        """
        Helper method to parse the input file and store intermediate information
        in attributes.

        """
 
        elements = dict()
        ids = dict()

        current_record_id = 0
        current_utterance_id = 0
        current_id = 0

        first_marker_found = False
        tier_marker = None

        current_utterance = None

        # Go through lines in the input file
        for line_number, line in enumerate(self.input_stream):
            # remove BOM
            if line_number == 0:

                if line.startswith(codecs.BOM_UTF8):
                    line = line[BOMLEN:]
                    
            line = line.decode("utf-8", 'ignore')
            line = line.strip()

            if "\name" in line:
                self.meta_information = line.split(None,2)[2]

            if line == "":
                if len(elements) > 0:
                    self._process_record(elements, ids, current_utterance_id)
                    elements = dict()
                    ids = dict()
                continue

            # parse line
            last_tier_marker = tier_marker
            tier_marker = None
            line_content = None
            match_tier_marker = re_tier_marker.search(line)
            if match_tier_marker:
                tier_marker = match_tier_marker.group(1)
                line_content = re_tier_marker.sub("", line)
                line_content = line_content.lstrip()
            elif first_marker_found:
                # append to last annotation´s content
                id_to_add = current_record_id
                if last_tier_marker in self.utterance_level_markers:
                    id_to_add = current_utterance_id

                if self._tier_labels.tier_label_exists(last_tier_marker):
                    self._annotations_for_parent[
                        ("a{0}".format(id_to_add),
                            last_tier_marker)][-1].value += " " + \
                            line

                tier_marker = last_tier_marker
                continue

            # skip all lines before first record marker
            if not first_marker_found:
                if tier_marker != self.record_marker:
                    continue
                else:
                    first_marker_found = True

            if tier_marker in self.word_level_markers:
                # Is it a new utterance? Then create a new ID.
                if current_utterance is None:
                    current_utterance = ""

                if current_utterance == "":
                    current_utterance_id = current_id
                    current_id += 1

                current_utterance += re.sub("\s+", " ", line_content) + " "

            if tier_marker in self.word_level_markers or \
                    tier_marker in self.morpheme_level_markers or \
                    tier_marker in self.tag_level_markers:

                if tier_marker not in elements:
                    elements[tier_marker] = dict()
                if tier_marker not in ids:
                    ids[tier_marker] = dict()

                for j, match in enumerate(re_word.finditer(line)):
                    pos = char_len(line[:match.start(1)])
                    elements[tier_marker][pos] = match.group(1)
                    ids[tier_marker][pos] = "a{0}".format(current_id)
                    current_id += 1

            # utterance level markers
            elif tier_marker in self.utterance_level_markers:

                # we left the utterance tiers, so create an utterance
                # annotation based on the content and make it the current
                # utterance
                if current_utterance is not None and current_utterance != "":
                    current_utterance = current_utterance.rstrip()
                    self._annotate_utterance(current_record_id,
                                             current_utterance_id,
                                             current_utterance)

                    current_utterance = ""

                elif current_utterance is None:
                    current_utterance_id = current_id
                    current_id += 1
                    self._annotate_utterance(current_record_id,
                                             current_utterance_id, "")

                    current_utterance = ""

                # add the annotation to the current utterance
                self._annotations_for_parent[
                    ("a{0}".format(current_utterance_id), tier_marker)].append(
                        poioapi.io.graf.Annotation(
                            "a{0}".format(current_id), line_content))
                current_id += 1

            # record level markers
            elif tier_marker in self.record_level_markers:

                if tier_marker == self.record_marker:
                    # this is to ensure that the utterance get annotated even
                    # if there were no other utterance_level_markers to cause
                    # it to be
                    if current_utterance is not None and current_utterance != '':
                        self._annotate_utterance(current_record_id,
                                                 current_utterance_id,
                                                 current_utterance)

                    self._annotations_for_parent[
                        (None, tier_marker)].append(
                            poioapi.io.graf.Annotation(
                                "a{0}".format(current_id), line_content))
                    current_record_id = current_id
                    current_id += 1
                    current_utterance = None

                else:
                    # this is to ensure that the utterance get annotated even
                    # if there were no other utterance_level_markers to cause
                    # it to be
                    if current_utterance is not None and current_utterance != '':
                        self._annotate_utterance(current_record_id,
                                                 current_utterance_id,
                                                 current_utterance)

                    self._annotations_for_parent[
                        ("a{0}".format(current_record_id), tier_marker)].append(
                            poioapi.io.graf.Annotation(
                                "a{0}".format(current_id), line_content))

                    current_id += 1

        self.input_stream.seek(0)

Example 104

Project: hue
Source File: create_table.py
View license
def import_wizard(request, database='default'):
  """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
  encoding = i18n.get_site_encoding()
  app_name = get_app_name(request)

  db = dbms.get(request.user)
  dbs = db.get_databases()
  databases = [{'name':db, 'url':reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs]

  if request.method == 'POST':
    #
    # General processing logic:
    # - We have 3 steps. Each requires the previous.
    #   * Step 1      : Table name and file location
    #   * Step 2a     : Display sample with auto chosen delim
    #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
    #   * Step 3      : Display sample, and define columns
    # - Each step is represented by a different form. The form of an earlier step
    #   should be present when submitting to a later step.
    # - To preserve the data from the earlier steps, we send the forms back as
    #   hidden fields. This way, when users revisit a previous step, the data would
    #   be there as well.
    #
    delim_is_auto = False
    fields_list, n_cols = [[]], 0
    s3_col_formset = None
    s1_file_form = CreateByImportFileForm(request.POST, db=db)

    if s1_file_form.is_valid():
      do_s2_auto_delim = request.POST.get('submit_file')        # Step 1 -> 2
      do_s2_user_delim = request.POST.get('submit_preview')     # Step 2 -> 2
      do_s3_column_def = request.POST.get('submit_delim')       # Step 2 -> 3
      do_hive_create = request.POST.get('submit_create')        # Step 3 -> execute

      cancel_s2_user_delim = request.POST.get('cancel_delim')   # Step 2 -> 1
      cancel_s3_column_def = request.POST.get('cancel_create')  # Step 3 -> 2

      # Exactly one of these should be True
      if len(filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) != 1:
        raise PopupException(_('Invalid form submission'))

      if not do_s2_auto_delim:
        # We should have a valid delim form
        s2_delim_form = CreateByImportDelimForm(request.POST)
        if not s2_delim_form.is_valid():
          # Go back to picking delimiter
          do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
      if do_hive_create:
        # We should have a valid columns formset
        s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
        if not s3_col_formset.is_valid():
          # Go back to define columns
          do_s3_column_def, do_hive_create = True, False

      load_data = s1_file_form.cleaned_data.get('load_data', 'IMPORT').upper()
      path = s1_file_form.cleaned_data['path']

      #
      # Go to step 2: We've just picked the file. Preview it.
      #
      if do_s2_auto_delim:
        try:
          if load_data == 'IMPORT':
            if not request.fs.isfile(path):
              raise PopupException(_('Path location must refer to a file if "Import Data" is selected.'))
          elif load_data == 'EXTERNAL':
            if not request.fs.isdir(path):
              raise PopupException(_('Path location must refer to a directory if "Create External Table" is selected.'))
        except (IOError, S3FileSystemException), e:
          raise PopupException(_('Path location "%s" is invalid: %s') % (path, e))

        delim_is_auto = True
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)

      if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
        # Delimit based on input
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'],),
                                                            (s2_delim_form.cleaned_data['delimiter'],))

      if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
        return render('import_wizard_choose_delimiter.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
          'initial': delim_is_auto,
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'fields_list': fields_list,
          'delimiter_choices': TERMINATOR_CHOICES,
          'n_cols': n_cols,
          'database': database,
          'databases': databases
        })

      #
      # Go to step 3: Define column.
      #
      if do_s3_column_def:
        if s3_col_formset is None:
          columns = []
          for i in range(n_cols):
            columns.append({
                'column_name': 'col_%s' % (i,),
                'column_type': 'string',
            })
          s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
        try:
          fields_list_for_json = list(fields_list)
          if fields_list_for_json:
            fields_list_for_json[0] = map(lambda a: re.sub('[^\w]', '', a), fields_list_for_json[0]) # Cleaning headers

          return render('import_wizard_define_columns.mako', request, {
            'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
            'file_form': s1_file_form,
            'delim_form': s2_delim_form,
            'column_formset': s3_col_formset,
            'fields_list': fields_list,
            'fields_list_json': json.dumps(fields_list_for_json),
            'n_cols': n_cols,
            'database': database,
            'databases': databases
          })
        except Exception, e:
          raise PopupException(_("The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e)

      #
      # Final: Execute
      #
      if do_hive_create:
        delim = s2_delim_form.cleaned_data['delimiter']
        table_name = s1_file_form.cleaned_data['name']

        proposed_query = django_mako.render_to_string("create_table_statement.mako", {
            'table': {
                'name': table_name,
                'comment': s1_file_form.cleaned_data['comment'],
                'row_format': 'Delimited',
                'field_terminator': delim,
                'file_format': 'TextFile',
                'load_data': load_data,
                'path': path,
                'skip_header': request.REQUEST.get('removeHeader', 'off').lower() == 'on'
             },
            'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
            'partition_columns': [],
            'database': database,
            'databases': databases
          }
        )
        try:
          return _submit_create_and_load(request, proposed_query, table_name, path, load_data, database=database)
        except QueryServerException, e:
          raise PopupException(_('The table could not be created.'), detail=e.message)
  else:
    s1_file_form = CreateByImportFileForm()

  return render('import_wizard_choose_file.mako', request, {
    'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
    'file_form': s1_file_form,
    'database': database,
    'databases': databases
  })

Example 105

Project: cmdbac
Source File: basedeployer.py
View license
    def save_attempt(self, attempt_result, driver_result = {}):
        LOG.info("Saving attempt ...")

        # flush log
        self.flush_log()

        # get info
        register_result = driver_result.get('register', USER_STATUS_UNKNOWN)
        login_result = driver_result.get('login', USER_STATUS_UNKNOWN)
        forms = driver_result.get('forms', None)
        urls = driver_result.get('urls', None)
        screenshot_path = driver_result.get('screenshot', None)
        statistics = driver_result.get('statistics', None)
        informations = driver_result.get('informations', None)

        # get runtime
        if self.runtime == None:
            self.runtime = self.get_runtime()
        Runtime.objects.get_or_create(executable = self.runtime['executable'], version = self.runtime['version'])
        runtime = Runtime.objects.get(executable = self.runtime['executable'], version = self.runtime['version'])

        # save attempt
        self.attempt.result = attempt_result
        self.attempt.login = login_result
        self.attempt.register = register_result
        self.attempt.stop_time = datetime.now()
        self.attempt.size = utils.get_size(self.base_path)
        self.attempt.runtime = runtime
        self.attempt.actions_count = 0
        self.attempt.queries_count = 0
        if forms == None and urls == None and self.attempt.result == ATTEMPT_STATUS_SUCCESS:
            self.attempt.result = ATTEMPT_STATUS_NO_QUERIES

        self.attempt.save()

        # save forms
        if forms != None:
            url_patterns = set()
            for f in forms:
                try:
                    if '/admin' in f['url']:
                        continue
                    url_pattern = re.sub('\d', '', f['url'])
                    if url_pattern in url_patterns:
                        continue
                    url_patterns.add(url_pattern)
                    action = Action()
                    action.url = f['url']
                    if f['method'] == '':
                        f['method'] = 'get'
                    action.method = f['method'].upper()
                    action.attempt = self.attempt
                    action.save()
                    self.attempt.actions_count += 1
                    for q in f['queries']:
                        try:
                            query = Query()
                            query.content = q['content']
                            query.matched = q['matched']
                            query.action = action
                            query.save()
                            self.attempt.queries_count += 1

                            if 'explain' in q:
                                explain = Explain()
                                explain.output = q['explain']
                                explain.query = query
                                explain.save()

                            if 'stats' in q:
                                metric = QueryMetric()
                                metric.name = 'stats'
                                metric.value = str(q['stats'])
                                metric.query = query
                                metric.save()
                        except:
                            pass
                    for input in f['inputs']:
                        field = Field()
                        field.name = input['name']
                        field.type = input['type']
                        field.action = action
                        field.save()
                    for description, count in f['counter'].iteritems():
                        counter = Counter()
                        counter.description = description
                        counter.count = count
                        counter.action = action
                        counter.save()
                except Exception, e:
                    LOG.exception(e)  

        # save urls
        if urls != None:
            url_patterns = set()
            for u in urls:
                try:
                    if '/admin' in u['url']:
                        continue
                    url_pattern = re.sub('\d', '', u['url'])
                    if url_pattern in url_patterns:
                        continue
                    url_patterns.add(url_pattern)
                    action = Action()
                    action.url = u['url']
                    action.method = 'GET'
                    action.attempt = self.attempt
                    action.save()
                    self.attempt.actions_count += 1
                    for q in u['queries']:
                        try:
                            query = Query()
                            query.content = q['content']
                            query.action = action
                            query.save()
                            self.attempt.queries_count += 1

                            if 'explain' in q:
                                explain = Explain()
                                explain.output = q['explain']
                                explain.query = query
                                explain.save()

                            if 'stats' in q:
                                metric = QueryMetric()
                                metric.name = 'stats'
                                metric.value = str(q['stats'])
                                metric.query = query
                                metric.save()
                        except:
                            pass
                    for description, count in u['counter'].iteritems():
                        counter = Counter()
                        counter.description = description
                        counter.count = count
                        counter.action = action
                        counter.save()
                except Exception, e:
                    LOG.exception(e)  

        # save screenshot
        if screenshot_path != None:
            screenshot = open(screenshot_path, 'rb')
            image = Image()
            image.data = screenshot.read()
            image.attempt = self.attempt
            image.save()

        # save statistics
        if statistics != None:
            for description, count in statistics.iteritems():
                statistic = Statistic()
                statistic.description = description
                statistic.count = count
                statistic.attempt = self.attempt
                statistic.save()

        # save informations
        if informations != None:
            for name, description in informations.iteritems():
                information = Information()
                information.name = name
                information.description = description
                information.attempt = self.attempt
                information.save()

        LOG.info("Saved Attempt #%s for %s" % (self.attempt, self.attempt.repo))
        
        # populate packages
        for pkg in self.packages_from_file:
            try:
                Dependency.objects.get_or_create(attempt=self.attempt, package=pkg, source=PACKAGE_SOURCE_FILE)
                pkg.count = pkg.count + 1
                pkg.save()
            except Exception, e:
                LOG.exception(e)  
        ## FOR
        for pkg in self.packages_from_database:
            try:
                Dependency.objects.get_or_create(attempt=self.attempt, package=pkg, source=PACKAGE_SOURCE_DATABASE)
                if pkg.version != '':
                    pkg.count = pkg.count + 1
                    pkg.save()
            except Exception, e:
                LOG.exception(e)
        ## FOR

        # make sure we update the repo to point to this latest attempt
        if attempt_result in [ATTEMPT_STATUS_MISSING_REQUIRED_FILES, ATTEMPT_STATUS_RUNNING_ERROR, ATTEMPT_STATUS_DOWNLOAD_ERROR]:
            self.repo.valid_project = False
        else:
            self.repo.valid_project = True
        self.repo.latest_attempt = self.attempt
        if self.attempt.result == ATTEMPT_STATUS_SUCCESS and self.attempt.queries_count == 0:
            self.attempt.result = ATTEMPT_STATUS_NO_QUERIES
        if self.attempt.result == ATTEMPT_STATUS_SUCCESS:
            self.repo.latest_successful_attempt = self.attempt
        self.repo.attempts_count = self.repo.attempts_count + 1
        self.repo.save()
        self.attempt.save()

Example 106

Project: libtrack
Source File: parsing.py
View license
def yield_paths(filename, timing_info=None, filter_calls=[]):
    """Retrieves full paths from a trace file

    For input a trace file returns all full  paths ending
    in a POSIX call, until the end of the file. Full paths are
    returned as lists of elements function calls along with the
    respective library name.
    For example, "__libc_init:libc.so" is the signature for
    the call "__libc_init" bellonging to lib "libc.so"

    Args:
        filename

    Returns:
        fullpath as a list

    Raises:
        Exception: if log file is corrupted
    """
    try:
        f = open(filename)
    except IOError, error:
        print >> sys.stderr, "I/O error while opening file: %s" % error
        return

    cache = Cache()
    for line in f:
        try:
            labels = line.split(':')[:3]
        except Exception, error:
            print >> sys.stderr, "Unhandled Exception:", error, filename
            continue
            #return
        if labels[1:] in [['BT', 'REPEAT']]:
            try:
                cache.repeat_next = int(line.split(':')[3])
            except Exception, error:
                #print >> sys.stderr, "Unhandled Exception:", error, filename
                continue
                #return

        elif labels[1:] in [['BT', 'START']]:
           # flush the bloody cache
            if cache.dirty_native_path:
                if cache.native_path[-1] in filter_calls:
                    cache.dirty_native_path = False
                    cache.dirty_dalvik_path = False
                    cache.repeat_current = cache.repeat_next
                    cache.repeat_next = 1
                    continue
                # seperately retrieve fd type for open
                if cache.native_path[-1] == 'open:libc.so':
                    cache.native_path[-1] = 'open_' + cache.fd_buff + ':libc.so'
                for _ in range(cache.repeat_current):
                    #yield cache.native_path
                    if cache.dirty_dalvik_path:
                        yield cache.dalvik_path + cache.native_path 
                    else:
                        yield cache.native_path
                cache.dirty_native_path = False
                cache.dirty_dalvik_path = False
                cache.repeat_current = cache.repeat_next
                cache.repeat_next = 1
            try:
                frames = int(line.split(':')[3])
                cache.set_native(f, frames)
            except (ValueError, IndexError), error:
                #print >>  sys.stderr, "Error:", error, "in file:", filename
                continue
                #return
            except StopIteration, error:
                #print >> sys.stderr, "Error:", error, "unexpected end in File:", filename
                continue
                #return
            except Exception as error:
                #print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
                continue
                #return

        elif labels[1:] in [['DVM', 'BT_START']]:
            try:
                frames = int(line.split(':')[3])
                cache.set_dalvik(f, frames)
            except (ValueError, IndexError), error:
                #print >> sys.stderr, "Error:", error, "in file:", filename
                continue
                #return
            except StopIteration, error:
                #print >> sys.stderr, "Error:", error, "unexpected end in File:", filename
                continue
                #return
            except Exception, error:
                #print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
                continue
                #return

        elif labels[1:] in [['LOG', 'I']]:
            try:
                cache.set_fd_buff(line)
            except (ValueError, IndexError), error:
                #print >> sys.stderr, "Error:", error, "in file:", filename
                continue
                #return
            except StopIteration, error:
                #print >> sys.stderr, "Error:", error, "unexpected end in File:", filename
                continue
                #return
            except Exception, error:
                #print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
                continue
                #return

        elif labels[1:] in [['BT_REPEAT', '1']]:
            cache.dirty_dalvik_path = True

        elif labels[1:2] in [['CALL']]:
            if len(line.split('.')) > 2:
                continue
            try:
                yield ["NO_BACKTRACE:NO_BACKTRACE", labels[2].split('\n')[0] + ':libc.so']
            except Exception, error:
                #print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
                continue
                #return

        elif labels[1:] in [['LOG', 'T']]:
            if timing_info == None:
                continue
            # nothing else I can do now for this kind of corrupted logs     
            #if len(line.split(':')) != 5:
            #    continue
            try:
                call = line.split(':')[3] + ":libc.so"
                if call in filter_calls:
                    continue
                time = line.split(':')[4].split('\n')[0]
                nsec = 1000000000*int(time.split('.')[0]) + int(time.split('.')[1])
            except Exception, error:
                print >> sys.stderr, "Unhandled Exception:", error, "in file:", filename
                continue
                #return
            # special handling of epoll - it should go to cache later
            if call[:11] == "epoll_wait_":
                call = "epoll_wait_:libc.so"
            else:
                # this regexec makes things sooooo slow
                call = re.sub(r'_B:|_D:|_E:|_F:|_f:|_K:|_k:|_P:|_p:|_S:|_U:', ':', call)

            if call not in timing_info:
                timing_info[call] = [nsec]
            else:
                timing_info[call].append(nsec)

        else:
            continue

    # flush cache
    if cache.dirty_native_path:
        # seperately retrieve fd type for open
        if cache.native_path[-1] == 'open:libc.so':
            cache.native_path[-1] = 'open_' + cache.fd_buff + ':libc.so'
        for _ in range(cache.repeat_current):
            #yield cache.native_path
            if cache.dirty_dalvik_path:
                yield cache.dalvik_path + cache.native_path
            else:
                yield cache.native_path
    f.close()

Example 107

View license
def expressionAsCSharpFunction(expressionNode, conflictDict, refTypeDict, jointOrientVars):
	"""Return the expression node's contents as C# for Unity
	@param expressionNode the expression node to translate into C#
	@param conflictDict a dictionary mapping Maya reference names to conflict-resolved alternatives
	@param refTypeDict a dictionary storing the Unity type for each reference in the list
	@param jointOrientVars a dictionary mapping joint names to their prerotation Quaternion variables"""
	# get the expression's contents
	expression = cmds.expression(expressionNode, q=True, s=True)
		
	# remove comments
	expression = re.sub('//.*?[\n\r]', '', expression)
	for comment in re.findall('/\*.*?\*/', expression, re.DOTALL):
		expression = expression.replace(comment, '')
	
	# find types of reference fields and variables
	referenceFieldDict = getReferenceFieldDict(expressionNode)
	variableTypeDict = getVariableTypeDict(expression, referenceFieldDict)
	
	# append variable name conflicts to conflictDict
	for v in variableTypeDict:
		if not v[1:] in kReservedKeywords and not v[1:] in conflictDict: conflictDict[v] = v
		else: conflictDict[v] = '$__varNameConflict__%s'%v[1:]
	
	# fix naming conflicts
	for conflict in conflictDict:
		if re.match('%s(?=\W)'%re.escape(conflict), expression):
			expression = re.sub('\A%s(?=\W)'%re.escape(conflict), conflictDict[conflict], expression, 1)
		expression = re.sub('(?<=\W)%s(?=\W)'%re.escape(conflict), conflictDict[conflict], expression)
	
	# attempt to make on-demand math commands Unity-friendly
	expression = convertMathCommands(expression)
	
	# make print statements Unity-friendly
	skip = 0
	while len(re.findall('print\s+.*?;', expression)) > skip:
		printStatement = list(re.finditer('print\s+.*?;', expression))[skip]
		if printStatement.start(0) == 0 or not re.match('[\w\d.$]', expression[printStatement.start(0)-1]):
			expression = (expression[0:printStatement.start(0)] + 
				'Debug.Log(%s);'%re.match('.*?(?=;)', printStatement.group(0)[len('print'):].lstrip()).group(0) + 
				expression[printStatement.end(0):])
		else: skip += 1
	
	# reformat lines so there is one line of code per line of text
	lines = expression.split(';')
	expression = ''
	for line in lines:
		if line == '': continue
		if line[-1] == '}': line = '\n%s'%re.sub('\n', ' ', line.lstrip().rstrip())
		else: line = '\n%s;'%re.sub('\n', ' ', line.lstrip().rstrip())
		expression += re.sub('  ', ' ', line)
	expression = re.sub(';\n}', '; }\n', expression.lstrip())
		
	# remove lines that have only a declaration
	lines = expression.split(';')
	expression = ''
	for line in lines:
		if line == '': continue
		if re.search('\w+\s+\$\w+', line):
			if not '=' in line: continue
			else: line = re.sub('\w+\s+(?=\$\w)', '', line)
		if line[-1] == '}' or line[-1] == '\n': expression += line
		else: expression += '%s;'%line
	
	# split multi-assignments into separate lines
	lines = expression.split(';')
	for line in lines:
		newLines = ['%s;'%line]
		assignments = re.split('(?<![=<>])=(?!=)', line)
		finalValue = assignments[len(assignments)-1]
		if len(assignments) > 2:
			newLines = []
			for i in range(len(assignments)-1):
				if i == 0: newLines.append('%s = %s;'%(assignments[i].rstrip(), finalValue.lstrip()))
				else: newLines.append('%s = %s;'%(assignments[i].rstrip(), re.match('[\s\w.]+', assignments[0][::-1]).group(0)[::-1].rstrip().lstrip()))
		replacementLine = newLines[0]
		for i in range(len(newLines)):
			if i == 0: continue
			replacementLine = '%s\n%s'%(replacementLine, newLines[i])
		expression = expression.replace('%s;'%line, replacementLine)
	
	# reformat operators
	for op in ['=', '==', '<', '<=', '>', '>=', '!=', '+', '+=', '-=', '*', '*=', '/', '/=', '?', ':']:
		expression = re.sub('(?<=[\w\d\s()])%s(?=[\w\d\s()$])'%re.escape(op), ' %s '%op, expression)
		expression = re.sub('(?<=[\w\d()])\s+%s\s+(?=[\w\d()$])'%re.escape(op), ' %s '%op, expression)
	for op in ['-']:
		expression = re.sub('\s+%s\s+'%re.escape(op), op, expression)
		expression = re.sub('(?<=[\w\d()])%s(?=[\w\d()$])'%re.escape(op), ' %s '%op, expression)
		expression = re.sub('(?<=[\w\d()])%s%s(?=[\w\d()$])'%(re.escape(op),re.escape(op)), '%s %s'%(op,op), expression)
		expression = re.sub('(?<=[\w\d)])%s\s'%(re.escape(op)), ' %s '%op, expression)
	for op in ['++', '--']: # TODO: this hasn't been thoroughly examined
		expression = re.sub('\s+%s'%re.escape(op), '%s'%op, expression)
	
	# reformat one-line if-statements
	lines = expression.split('\n')
	expression = ''
	for line in lines:
		line = line.lstrip().rstrip()
		if (re.match('if\W', line) or re.match('else if\W', line)) and not re.search('{', line):
			openCt = 0
			closeCt = 0
			endBlock = 0
			for i in xrange(len(line)):
				if endBlock > 0: continue
				if line[i] == '(': openCt += 1
				elif line[i] == ')':
					closeCt += 1
					if openCt > 0 and openCt == closeCt: endBlock = i
			if endBlock > 0: line = '%s\n{%s;}'%(line[:endBlock+1], line[endBlock+1:-1])
		elif re.match('else\W', line) and not re.search('{', line):
			line = 'else {%s}'%line[5:]
		expression += '%s\n'%line
	
	# reformat tabbing
	expression = re.sub('{', '\n{\n', expression)
	expression = re.sub('}', '\n}\n', expression)
	lines = expression.split('\n')
	expression = ''
	indent = 0
	for line in lines:
		line = line.lstrip().rstrip()
		line = re.sub('\s+', ' ', line)
		if line == '': continue
		bracketOpen = len(re.findall('{', line))
		bracketClose = len(re.findall('}', line))
		if bracketOpen<bracketClose: indent -= 1
		expression += '%s%s\n'%('\t'*indent, line)
		if bracketOpen>bracketClose: indent += 1
	expression = expression.rstrip()
	
	# consolidate literal blocks
	expression = consolidateLiteralBlocks(expression, conflictDict, referenceFieldDict, variableTypeDict)
	
	# correct assignments
	expression = correctAssignments(expression, conflictDict, refTypeDict, referenceFieldDict, variableTypeDict, jointOrientVars)
	
	# replace attributeNames with names of Unity fields as needed
	expression = correctReferenceFieldSyntax(expression, conflictDict, refTypeDict)
	
	# clean up spacing
	for op in ['= =', '< =', '> =', '! =', '+ =', '- =', '* =', '/ =']:
		expression = re.sub(re.escape(op), op.replace(' ', ''), expression)
	matches = list(re.finditer('(?<=\()\-\s(?=\d)', expression))
	for match in matches: expression = re.sub(re.escape(match.group(0)), '-', expression)
	
	# convert addition and subtraction of negatives
	expression = re.sub('\+\s\-(?=\S)', '- ', expression)
	expression = re.sub('\+\s\(\-\(', '- ((', expression)
	expression = re.sub('\-\s\-(?=\S)', '+ ', expression)
	expression = re.sub('\-\s\(\-\(', '+ ((', expression)
	
	# correct float literal syntax
	expression = correctFloatLiteralSyntax(expression)
	
	# comment out lines with broken references e.g., .I[1] or .O[3]
	lines = expression.split('\n')
	expression = ''
	for line in lines:
		if line == '': continue
		if re.search('\s\.\w', line) or re.match('\.\w', line): line = '//%s // ERROR: Missing object reference. Check your Maya file.'%line
		expression += '%s\n'%line
	expression = expression.rstrip()
	
	# put all variable declarations up front
	declarations = ''
	for var in variableTypeDict:
		declarations += '\n%s %s;'%(variableTypeDict[var], conflictDict[var])
	expression = (declarations.lstrip()+'\n\n'+expression.lstrip()).lstrip()
	
	# remove variable symbols
	expression = expression.replace('$', '')
	
	#print expression
	return expression

Example 108

Project: asuka
Source File: build.py
View license
    def _install(self):
        logger = self.get_logger('install')
        sudo = self.instance.sudo
        logger.info(
            'START TO INSTALL: branch = %r, commit = %r, instance = %r',
            self.branch, self.commit, self.instance
        )
        def setup_instance(service_manifests, service_manifests_available):
            logger = self.get_logger('install.setup_instance')
            with self.instance:
                def aptitude(*commands):
                    sudo(['aptitude', '-y'] + list(commands),
                         environ={'DEBIAN_FRONTEND': 'noninteractive'})
                # create user for app
                sudo(['useradd', '-U', '-G', 'users,www-data', '-Mr',
                      self.app.name])
                # assume instance uses Ubuntu >= 12.04
                apt_sources = re.sub(
                    r'\n#\s*(deb(?:-src)?\s+'
                    r'http://[^.]\.ec2\.archive\.ubuntu\.com/'
                    r'ubuntu/\s+[^-]+multiverse\n)',
                    lambda m: '\n' + m.group(1),
                    self.instance.read_file('/etc/apt/sources.list', sudo=True)
                )
                self.instance.write_file('/etc/apt/sources.list', apt_sources,
                                         sudo=True)
                apt_repos = set()
                apt_packages = set([
                    'build-essential', 'python-dev', 'python-setuptools',
                    'python-pip'
                ])
                with service_manifests_available:
                    while not service_manifests[0]:
                        service_manifests_available.wait()
                for service in service_manifests[1:]:
                    apt_repos.update(service.required_apt_repositories)
                    apt_packages.update(service.required_apt_packages)
                if apt_repos:
                    for repo in apt_repos:
                        sudo(['apt-add-repository', '-y', repo])
                    aptitude('update')
                with self.instance.sftp():
                    self.instance.write_file(
                        '/usr/bin/apt-fast',
                        resource_string(__name__, 'apt-fast'),
                        sudo=True
                    )
                    self.instance.write_file('/etc/apt-fast.conf', '''
_APTMGR=aptitude
DOWNLOADBEFORE=true
_MAXNUM=20
DLLIST='/tmp/apt-fast.list'
_DOWNLOADER='aria2c -c -j ${_MAXNUM} -i ${DLLIST} --connect-timeout=10 \
             --timeout=600 -m0'
DLDIR='/var/cache/apt/archives/apt-fast'
APTCACHE='/var/cache/apt/archives/'
                    ''', sudo=True)
                sudo(['chmod', '+x', '/usr/bin/apt-fast'])
                aptitude('install', 'aria2')
                sudo(['apt-fast', '-q', '-y', 'install'] + list(apt_packages),
                     environ={'DEBIAN_FRONTEND': 'noninteractive'})
        service_manifests_available = threading.Condition()
        service_manifests = [False]
        instance_setup_worker = threading.Thread(
            target=setup_instance,
            kwargs={
                'service_manifests_available': service_manifests_available,
                'service_manifests': service_manifests
            }
        )
        instance_setup_worker.start()
        # setup metadata of the instance
        self.update_instance_metadata()
        self.instance.status = 'started'
        # making package (pybundle)
        fd, package_path = tempfile.mkstemp()
        os.close(fd)
        with self.fetch() as download_path:
            service_manifests.extend(self.services)
            service_manifests[0] = True
            with service_manifests_available:
                service_manifests_available.notify()
            config_temp_path = tempfile.mkdtemp()
            shutil.copytree(
                os.path.join(download_path, self.app.config_dir),
                os.path.join(config_temp_path, self.app.name)
            )
            with self.dist.bundle_package() as (package, filename, temp_path):
                shutil.copyfile(temp_path, package_path)
                remote_path = os.path.join('/tmp', filename)
            with self.instance.sftp():
                # upload config files
                self.instance.put_directory(
                    os.path.join(config_temp_path, self.app.name),
                    '/etc/' + self.app.name,
                    sudo=True
                )
                shutil.rmtree(config_temp_path)
                python_packages = set()
                for service in service_manifests[1:]:
                    python_packages.update(service.required_python_packages)
                # uploads package
                self.instance.put_file(package_path, remote_path)
                # join instance_setup_worker
                instance_setup_worker.join()
                self.instance.status = 'apt-installed'
                pip_cmd = ['pip', 'install', '-i', PYPI_INDEX_URLS[0]]
                for idx in PYPI_INDEX_URLS[1:]:
                    pip_cmd.append('--extra-index-url=' + idx)
                sudo(pip_cmd + [remote_path], environ={'CI': '1'})
                sudo(pip_cmd + ['-I'] + list(python_packages),
                     environ={'CI': '1'})
                self.instance.status = 'installed'
                for service in service_manifests[1:]:
                    for cmd in service.pre_install:
                        sudo(cmd, environ={'DEBIAN_FRONTEND': 'noninteractive'})
                values_path = '/etc/{0}/values.json'.format(self.app.name)
                service_values = {
                    '.build': dict(
                        commit=self.commit.ref,
                        branch=self.branch.label
                    )
                }
                refresh_values = lambda: self.instance.write_file(
                    values_path,
                    json.dumps(service_values),
                    sudo=True
                )
                refresh_values()
                for service in service_manifests[1:]:
                    service_value = service.install(self.instance)
                    service_values[service.name] = service_value
                    refresh_values()
                for service in service_manifests[1:]:
                    for cmd in service.post_install:
                        sudo(cmd, environ={'DEBIAN_FRONTEND': 'noninteractive'})
        service_map = dict((service.name, service)
                           for service in service_manifests[1:])
        deployed_domains = {}
        if self.route53_hosted_zone_id and self.route53_records:
            self.instance.status = 'run'
            changeset = ResourceRecordSets(
                self.app.route53_connection,
                self.route53_hosted_zone_id,
                'Changed by Asuka: {0}, {1} [{2}]'.format(self.app.name,
                                                          self.branch.label,
                                                          self.commit.ref)
            )
            from .service import DomainService
            for service_name, domain_format in self.route53_records.items():
                service = service_map[service_name]
                if not isinstance(service, DomainService):
                    raise TypeError(repr(service) + 'is not an instance of '
                                    'crosspop.service.DomainService')
                domain = domain_format.format(branch=self.branch)
                deployed_domains[service_name] = domain
                service.route_domain(domain, changeset)
                self.instance.tags['Domain-' + service_name] = domain
            if changeset.changes:
                logger.info('Route 53 changeset:\n%s', changeset.to_xml())
                changeset.commit()
        self.instance.status = 'done'
        self.terminate_instances()
        return deployed_domains

Example 109

Project: odoo
Source File: escpos.py
View license
    def receipt(self,xml):
        """
        Prints an xml based receipt definition
        """

        def strclean(string):
            if not string:
                string = ''
            string = string.strip()
            string = re.sub('\s+',' ',string)
            return string

        def format_value(value, decimals=3, width=0, decimals_separator='.', thousands_separator=',', autoint=False, symbol='', position='after'):
            decimals = max(0,int(decimals))
            width    = max(0,int(width))
            value    = float(value)

            if autoint and math.floor(value) == value:
                decimals = 0
            if width == 0:
                width = ''

            if thousands_separator:
                formatstr = "{:"+str(width)+",."+str(decimals)+"f}"
            else:
                formatstr = "{:"+str(width)+"."+str(decimals)+"f}"


            ret = formatstr.format(value)
            ret = ret.replace(',','COMMA')
            ret = ret.replace('.','DOT')
            ret = ret.replace('COMMA',thousands_separator)
            ret = ret.replace('DOT',decimals_separator)

            if symbol:
                if position == 'after':
                    ret = ret + symbol
                else:
                    ret = symbol + ret
            return ret

        def print_elem(stylestack, serializer, elem, indent=0):

            elem_styles = {
                'h1': {'bold': 'on', 'size':'double'},
                'h2': {'size':'double'},
                'h3': {'bold': 'on', 'size':'double-height'},
                'h4': {'size': 'double-height'},
                'h5': {'bold': 'on'},
                'em': {'font': 'b'},
                'b':  {'bold': 'on'},
            }

            stylestack.push()
            if elem.tag in elem_styles:
                stylestack.set(elem_styles[elem.tag])
            stylestack.set(elem.attrib)

            if elem.tag in ('p','div','section','article','receipt','header','footer','li','h1','h2','h3','h4','h5'):
                serializer.start_block(stylestack)
                serializer.text(elem.text)
                for child in elem:
                    print_elem(stylestack,serializer,child)
                    serializer.start_inline(stylestack)
                    serializer.text(child.tail)
                    serializer.end_entity()
                serializer.end_entity()

            elif elem.tag in ('span','em','b','left','right'):
                serializer.start_inline(stylestack)
                serializer.text(elem.text)
                for child in elem:
                    print_elem(stylestack,serializer,child)
                    serializer.start_inline(stylestack)
                    serializer.text(child.tail)
                    serializer.end_entity()
                serializer.end_entity()

            elif elem.tag == 'value':
                serializer.start_inline(stylestack)
                serializer.pre(format_value( 
                                              elem.text,
                                              decimals=stylestack.get('value-decimals'),
                                              width=stylestack.get('value-width'),
                                              decimals_separator=stylestack.get('value-decimals-separator'),
                                              thousands_separator=stylestack.get('value-thousands-separator'),
                                              autoint=(stylestack.get('value-autoint') == 'on'),
                                              symbol=stylestack.get('value-symbol'),
                                              position=stylestack.get('value-symbol-position') 
                                            ))
                serializer.end_entity()

            elif elem.tag == 'line':
                width = stylestack.get('width')
                if stylestack.get('size') in ('double', 'double-width'):
                    width = width / 2

                lineserializer = XmlLineSerializer(stylestack.get('indent')+indent,stylestack.get('tabwidth'),width,stylestack.get('line-ratio'))
                serializer.start_block(stylestack)
                for child in elem:
                    if child.tag == 'left':
                        print_elem(stylestack,lineserializer,child,indent=indent)
                    elif child.tag == 'right':
                        lineserializer.start_right()
                        print_elem(stylestack,lineserializer,child,indent=indent)
                serializer.pre(lineserializer.get_line())
                serializer.end_entity()

            elif elem.tag == 'ul':
                serializer.start_block(stylestack)
                bullet = stylestack.get('bullet')
                for child in elem:
                    if child.tag == 'li':
                        serializer.style(stylestack)
                        serializer.raw(' ' * indent * stylestack.get('tabwidth') + bullet)
                    print_elem(stylestack,serializer,child,indent=indent+1)
                serializer.end_entity()

            elif elem.tag == 'ol':
                cwidth = len(str(len(elem))) + 2
                i = 1
                serializer.start_block(stylestack)
                for child in elem:
                    if child.tag == 'li':
                        serializer.style(stylestack)
                        serializer.raw(' ' * indent * stylestack.get('tabwidth') + ' ' + (str(i)+')').ljust(cwidth))
                        i = i + 1
                    print_elem(stylestack,serializer,child,indent=indent+1)
                serializer.end_entity()

            elif elem.tag == 'pre':
                serializer.start_block(stylestack)
                serializer.pre(elem.text)
                serializer.end_entity()

            elif elem.tag == 'hr':
                width = stylestack.get('width')
                if stylestack.get('size') in ('double', 'double-width'):
                    width = width / 2
                serializer.start_block(stylestack)
                serializer.text('-'*width)
                serializer.end_entity()

            elif elem.tag == 'br':
                serializer.linebreak()

            elif elem.tag == 'img':
                if 'src' in elem.attrib and 'data:' in elem.attrib['src']:
                    self.print_base64_image(elem.attrib['src'])

            elif elem.tag == 'barcode' and 'encoding' in elem.attrib:
                serializer.start_block(stylestack)
                self.barcode(strclean(elem.text),elem.attrib['encoding'])
                serializer.end_entity()

            elif elem.tag == 'cut':
                self.cut()
            elif elem.tag == 'partialcut':
                self.cut(mode='part')
            elif elem.tag == 'cashdraw':
                self.cashdraw(2)
                self.cashdraw(5)

            stylestack.pop()

        try:
            stylestack      = StyleStack() 
            serializer      = XmlSerializer(self)
            root            = ET.fromstring(xml.encode('utf-8'))

            self._raw(stylestack.to_escpos())

            print_elem(stylestack,serializer,root)

            if 'open-cashdrawer' in root.attrib and root.attrib['open-cashdrawer'] == 'true':
                self.cashdraw(2)
                self.cashdraw(5)
            if not 'cut' in root.attrib or root.attrib['cut'] == 'true' :
                self.cut()

        except Exception as e:
            errmsg = str(e)+'\n'+'-'*48+'\n'+traceback.format_exc() + '-'*48+'\n'
            self.text(errmsg)
            self.cut()

            raise e

Example 110

View license
    def addMediaFile(self, package, contextType='video', encfs=False, dpath='', epath=''):
        thumbnail = self.cache.getThumbnail(self, package.file.thumbnail,package.file.id)
        listitem = xbmcgui.ListItem(package.file.displayTitle(), iconImage=package.file.thumbnail,
                                thumbnailImage=package.file.thumbnail)

        # audio file, not in "pictures"
        if package.file.type == package.file.AUDIO and contextType != 'image':
            if package.file.hasMeta:
                infolabels = decode_dict({ 'title' : package.file.displayTrackTitle(), 'tracknumber' : package.file.trackNumber, 'artist': package.file.artist, 'album': package.file.album,'genre': package.file.genre,'premiered': package.file.releaseDate, 'size' : package.file.size })
            else:
                infolabels = decode_dict({ 'title' : package.file.displayTitle(), 'size' : package.file.size })
            listitem.setInfo('Music', infolabels)
            playbackURL = '?mode=audio'
            if self.integratedPlayer:
                listitem.setProperty('IsPlayable', 'false')
            else:
                listitem.setProperty('IsPlayable', 'true')

        # encrypted file, viewing in "pictures", assume image
        elif package.file.type == package.file.UNKNOWN and contextType == 'image':
            infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot })
            listitem.setInfo('Pictures', infolabels)
            playbackURL = '?mode=photo'
            listitem.setProperty('IsPlayable', 'false')

        # encrypted file, viewing in "video", assume video
        elif package.file.type == package.file.UNKNOWN and contextType == 'video':
            infolabels = decode_dict({ 'title' : package.file.displayTitle() ,  'plot' : package.file.plot, 'size' : package.file.size })
            listitem.setInfo('Video', infolabels)
            playbackURL = '?mode=video'
            if self.integratedPlayer:
                listitem.setProperty('IsPlayable', 'false')
            else:
                listitem.setProperty('IsPlayable', 'true')
            if float(package.file.resume) > 0:
                listitem.setProperty('isResumable', 1)



        # encrypted file, viewing in "music", assume audio
        elif package.file.type == package.file.UNKNOWN and contextType == 'audio':
            if package.file.hasMeta:
                infolabels = decode_dict({ 'title' : package.file.displayTrackTitle(), 'tracknumber' : package.file.trackNumber, 'artist': package.file.artist, 'album': package.file.album,'genre': package.file.genre,'premiered': package.file.releaseDate, 'size' : package.file.size })
            else:
                infolabels = decode_dict({ 'title' : package.file.displayTitle(), 'size' : package.file.size })
            listitem.setInfo('Music', infolabels)
            playbackURL = '?mode=audio'
            if self.integratedPlayer:
                listitem.setProperty('IsPlayable', 'false')
            else:
                listitem.setProperty('IsPlayable', 'true')

        # audio file, viewing in "pictures"
        elif package.file.type == package.file.AUDIO and contextType == 'image':
            if package.file.hasMeta:
                infolabels = decode_dict({ 'title' : package.file.displayTrackTitle(), 'tracknumber' : package.file.trackNumber, 'artist': package.file.artist, 'album': package.file.album,'genre': package.file.genre,'premiered': package.file.releaseDate, 'size' : package.file.size })
            else:
                infolabels = decode_dict({ 'title' : package.file.displayTitle(), 'size' : package.file.size })
            listitem.setInfo('Music', infolabels)
            playbackURL = '?mode=audio'
            listitem.setProperty('IsPlayable', 'false')

        # video file
        elif package.file.type == package.file.VIDEO:
            if package.file.hasMeta:
                infolabels = decode_dict({ 'title' : package.file.displayShowTitle() ,  'plot' : package.file.plot, 'TVShowTitle': package.file.show, 'EpisodeName': package.file.showtitle, 'season': package.file.season, 'episode': package.file.episode,'size' : package.file.size })
            else:
                infolabels = decode_dict({ 'title' : package.file.displayTitle() ,  'plot' : package.file.plot, 'size' : package.file.size })
            listitem.setInfo('Video', infolabels)
            playbackURL = '?mode=video'
            if self.integratedPlayer:
                listitem.setProperty('IsPlayable', 'false')
            else:
                listitem.setProperty('IsPlayable', 'true')
            if float(package.file.resume) > 0:
                listitem.setProperty('isResumable', "1")
            if int(package.file.playcount) > 0: #or (float(package.file.resume) > 0 and package.file.duration > 0 and package.file.resume/package.file.duration > (1-self.settskipResume)):
                listitem.setInfo('video', {'playcount':int(package.file.playcount)})

            if int(package.file.resolution[0]) > 0:
                listitem.addStreamInfo('video', {'width': package.file.resolution[1], 'height': package.file.resolution[0], 'duration':package.file.duration})

        # image file
        elif package.file.type == package.file.PICTURE:
            infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot })
            listitem.setInfo('Pictures', infolabels)
            playbackURL = '?mode=photo'
            listitem.setProperty('IsPlayable', 'false')

        # otherwise, assume video
        else:
            infolabels = decode_dict({ 'title' : package.file.displayTitle() , 'plot' : package.file.plot, 'size' : package.file.size })
            listitem.setInfo('Video', infolabels)
            playbackURL = '?mode=video'
            if self.integratedPlayer:
                listitem.setProperty('IsPlayable', 'false')
            else:
                listitem.setProperty('IsPlayable', 'true')
            if float(package.file.resume) > 0:
                listitem.setProperty('isResumable', 1)

        listitem.setProperty('fanart_image', package.file.fanart)


        cm=[]

        try:
            url = package.getMediaURL()
            cleanURL = re.sub('---', '', url)
            cleanURL = re.sub('&', '---', cleanURL)
        except:
            cleanURL = ''

    #    url = PLUGIN_URL+playbackURL+'&title='+package.file.title+'&filename='+package.file.id+'&instance='+str(self.instanceName)+'&folder='+str(package.folder.id)
        if encfs:
            values = {'instance': self.instanceName, 'dpath': dpath, 'epath': epath, 'encfs': 'true', 'title': package.file.title, 'filename': package.file.id, 'folder': package.folder.id}
        else:
            values = {'instance': self.instanceName, 'title': package.file.title, 'filename': package.file.id, 'folder': package.folder.id}
        url = self.PLUGIN_URL+ str(playbackURL)+ '&' + urllib.urlencode(values)

        if (contextType != 'image' and package.file.type != package.file.PICTURE):
            valuesBS = {'username': self.authorization.username, 'title': package.file.title, 'filename': package.file.id, 'content_type': 'video'}
            cm.append(( self.addon.getLocalizedString(30042), 'XBMC.RunPlugin('+self.PLUGIN_URL+'?mode=buildstrm&type='+str(package.file.type)+'&'+urllib.urlencode(valuesBS)+')', ))

            if (self.protocol == 2):
                # play-original for video only
                if (contextType == 'video'):
                    if self.settings.promptQuality:
                        cm.append(( self.addon.getLocalizedString(30123), 'XBMC.RunPlugin('+url + '&original=true'+')', ))
                    else:
                        cm.append(( self.addon.getLocalizedString(30151), 'XBMC.RunPlugin('+url + '&promptquality=true'+')', ))

                    # if the options are disabled in settings, display option to playback with feature
                    if not self.settings.srt:
                        cm.append(( self.addon.getLocalizedString(30138), 'XBMC.RunPlugin('+url + '&srt=true'+')', ))
                    if not self.settings.cc:
                        cm.append(( self.addon.getLocalizedString(30146), 'XBMC.RunPlugin('+url + '&cc=true'+')', ))

                    cm.append(( self.addon.getLocalizedString(30147), 'XBMC.RunPlugin('+url + '&seek=true'+')', ))
#                    cm.append(( self.addon.getLocalizedString(30148), 'XBMC.RunPlugin('+url + '&resume=true'+')', ))
#                    values = {'instance': self.instanceName, 'folder': package.folder.id}
#                    folderurl = self.PLUGIN_URL+ str(playbackURL)+ '&' + urllib.urlencode(values)
#                    cm.append(( 'folder', 'XBMC.RunPlugin('+folderurl+')', ))

                if contextType != 'image':
                    # download
                    cm.append(( self.addon.getLocalizedString(30113), 'XBMC.RunPlugin('+url + '&download=true'+')', ))

                    # download + watch
                    cm.append(( self.addon.getLocalizedString(30124), 'XBMC.RunPlugin('+url + '&play=true&download=true'+')', ))

#                    # watch downloaded copy
#                    cm.append(( self.addon.getLocalizedString(30125), 'XBMC.RunPlugin('+url + '&cache=true'+')', ))


        elif package.file.type ==  package.file.PICTURE: #contextType == 'image':

                cm.append(( self.addon.getLocalizedString(30126), 'XBMC.RunPlugin('+self.PLUGIN_URL+ '?mode=slideshow&' + urllib.urlencode(values)+')', ))

        #encfs
#        if (self.protocol == 2):
#            cm.append(( self.addon.getLocalizedString(30130), 'XBMC.RunPlugin('+self.PLUGIN_URL+ '?mode=downloadfolder&encfs=true&' + urllib.urlencode(values)+'&content_type='+contextType+')', ))


        url = url + '&content_type='+contextType

        #    listitem.addContextMenuItems( commands )
        #    if cm:
        if  package.file.type ==  package.file.PICTURE: #contextType == 'image':
            listitem.addContextMenuItems(cm, True)
        else:
            listitem.addContextMenuItems(cm, False)

        xbmcplugin.addDirectoryItem(plugin_handle, url, listitem,
                                isFolder=False, totalItems=0)
        return url

Example 111

Project: rivescript-python
Source File: brain.py
View license
    def process_tags(self, user, msg, reply, st=[], bst=[], depth=0, ignore_object_errors=True):
        """Post process tags in a message.

        :param str user: The user ID.
        :param str msg: The user's formatted message.
        :param str reply: The raw RiveScript reply for the message.
        :param []str st: The array of ``<star>`` matches from the trigger.
        :param []str bst: The array of ``<botstar>`` matches from a
            ``%Previous`` command.
        :param int depth: The recursion depth counter.
        :param bool ignore_object_errors: Whether to ignore errors in Python
            object macros instead of raising an ``ObjectError`` exception.

        :return str: The final reply after tags have been processed.
        """
        stars = ['']
        stars.extend(st)
        botstars = ['']
        botstars.extend(bst)
        if len(stars) == 1:
            stars.append("undefined")
        if len(botstars) == 1:
            botstars.append("undefined")

        # Tag shortcuts.
        reply = reply.replace('<person>', '{person}<star>{/person}')
        reply = reply.replace('<@>', '{@<star>}')
        reply = reply.replace('<formal>', '{formal}<star>{/formal}')
        reply = reply.replace('<sentence>', '{sentence}<star>{/sentence}')
        reply = reply.replace('<uppercase>', '{uppercase}<star>{/uppercase}')
        reply = reply.replace('<lowercase>', '{lowercase}<star>{/lowercase}')

        # Weight and <star> tags.
        reply = re.sub(RE.weight, '', reply)  # Leftover {weight}s
        if len(stars) > 0:
            reply = reply.replace('<star>', stars[1])
            reStars = re.findall(RE.star_tags, reply)
            for match in reStars:
                if int(match) < len(stars):
                    reply = reply.replace('<star{match}>'.format(match=match), stars[int(match)])
        if len(botstars) > 0:
            reply = reply.replace('<botstar>', botstars[1])
            reStars = re.findall(RE.botstars, reply)
            for match in reStars:
                if int(match) < len(botstars):
                    reply = reply.replace('<botstar{match}>'.format(match=match), botstars[int(match)])

        # <input> and <reply>
        history = self.master.get_uservar(user, "__history__")
        if type(history) is not dict:
            history = self.default_history()
        reply = reply.replace('<input>', history['input'][0])
        reply = reply.replace('<reply>', history['reply'][0])
        reInput = re.findall(RE.input_tags, reply)
        for match in reInput:
            reply = reply.replace('<input{match}>'.format(match=match),
                                  history['input'][int(match) - 1])
        reReply = re.findall(RE.reply_tags, reply)
        for match in reReply:
            reply = reply.replace('<reply{match}>'.format(match=match),
                                  history['reply'][int(match) - 1])

        # <id> and escape codes.
        reply = reply.replace('<id>', user)
        reply = reply.replace('\\s', ' ')
        reply = reply.replace('\\n', "\n")
        reply = reply.replace('\\#', '#')

        # Random bits.
        reRandom = re.findall(RE.random_tags, reply)
        for match in reRandom:
            output = ''
            if '|' in match:
                output = random.choice(match.split('|'))
            else:
                output = random.choice(match.split(' '))
            reply = reply.replace('{{random}}{match}{{/random}}'.format(match=match), output)

        # Person Substitutions and String Formatting.
        for item in ['person', 'formal', 'sentence', 'uppercase',  'lowercase']:
            matcher = re.findall(r'\{' + item + r'\}(.+?)\{/' + item + r'\}', reply)
            for match in matcher:
                output = None
                if item == 'person':
                    # Person substitutions.
                    output = self.substitute(match, "person")
                else:
                    output = utils.string_format(match, item)
                reply = reply.replace('{{{item}}}{match}{{/{item}}}'.format(item=item, match=match), output)

        # Handle all variable-related tags with an iterative regex approach,
        # to allow for nesting of tags in arbitrary ways (think <set a=<get b>>)
        # Dummy out the <call> tags first, because we don't handle them right
        # here.
        reply = reply.replace("<call>", "{__call__}")
        reply = reply.replace("</call>", "{/__call__}")
        while True:
            # This regex will match a <tag> which contains no other tag inside
            # it, i.e. in the case of <set a=<get b>> it will match <get b> but
            # not the <set> tag, on the first pass. The second pass will get the
            # <set> tag, and so on.
            match = re.search(RE.tag_search, reply)
            if not match: break  # No remaining tags!

            match = match.group(1)
            parts  = match.split(" ", 1)
            tag    = parts[0].lower()
            data   = parts[1] if len(parts) > 1 else ""
            insert = ""  # Result of the tag evaluation

            # Handle the tags.
            if tag == "bot" or tag == "env":
                # <bot> and <env> tags are similar.
                target = self.master._var if tag == "bot" else self.master._global
                if "=" in data:
                    # Setting a bot/env variable.
                    parts = data.split("=")
                    self.say("Set " + tag + " variable " + text_type(parts[0]) + "=" + text_type(parts[1]))
                    target[parts[0]] = parts[1]
                else:
                    # Getting a bot/env variable.
                    insert = target.get(data, "undefined")
            elif tag == "set":
                # <set> user vars.
                parts = data.split("=")
                self.say("Set uservar " + text_type(parts[0]) + "=" + text_type(parts[1]))
                self.master.set_uservar(user, parts[0], parts[1])
            elif tag in ["add", "sub", "mult", "div"]:
                # Math operator tags.
                parts = data.split("=")
                var   = parts[0]
                value = parts[1]
                curv  = self.master.get_uservar(user, var)

                # Sanity check the value.
                try:
                    value = int(value)
                    if curv in [None, "undefined"]:
                        # Initialize it.
                        curv = 0
                except:
                    insert = "[ERR: Math can't '{}' non-numeric value '{}']".format(tag, value)

                # Attempt the operation.
                try:
                    orig = int(curv)
                    new  = 0
                    if tag == "add":
                        new = orig + value
                    elif tag == "sub":
                        new = orig - value
                    elif tag == "mult":
                        new = orig * value
                    elif tag == "div":
                        new = orig / value
                    self.master.set_uservar(user, var, new)
                except:
                    insert = "[ERR: Math couldn't '{}' to value '{}']".format(tag, curv)
            elif tag == "get":
                insert = self.master.get_uservar(user, data)
            else:
                # Unrecognized tag.
                insert = "\x00{}\x01".format(match)

            reply = reply.replace("<{}>".format(match), text_type(insert))

        # Restore unrecognized tags.
        reply = reply.replace("\x00", "<").replace("\x01", ">")

        # Streaming code. DEPRECATED!
        if '{!' in reply:
            self._warn("Use of the {!...} tag is deprecated and not supported here.")

        # Topic setter.
        reTopic = re.findall(RE.topic_tag, reply)
        for match in reTopic:
            self.say("Setting user's topic to " + match)
            self.master.set_uservar(user, "topic", match)
            reply = reply.replace('{{topic={match}}}'.format(match=match), '')

        # Inline redirecter.
        reRedir = re.findall(RE.redir_tag, reply)
        for match in reRedir:
            self.say("Redirect to " + match)
            at = match.strip()
            subreply = self._getreply(user, at, step=(depth + 1))
            reply = reply.replace('{{@{match}}}'.format(match=match), subreply)

        # Object caller.
        reply = reply.replace("{__call__}", "<call>")
        reply = reply.replace("{/__call__}", "</call>")
        reCall = re.findall(r'<call>(.+?)</call>', reply)
        for match in reCall:
            parts  = re.split(RE.ws, match)
            output = ''
            obj    = parts[0]
            args   = []
            if len(parts) > 1:
                args = parts[1:]

            # Do we know this object?
            if obj in self.master._objlangs:
                # We do, but do we have a handler for that language?
                lang = self.master._objlangs[obj]
                if lang in self.master._handlers:
                    # We do.
                    try:
                        output = self.master._handlers[lang].call(self.master, obj, user, args)
                    except python.PythonObjectError as e:
                        self.warn(str(e))
                        if not ignore_object_errors:
                            raise ObjectError(str(e))
                        output = RS_ERR_OBJECT
                else:
                    if not ignore_object_errors:
                        raise ObjectError(RS_ERR_OBJECT_HANDLER)
                    output = RS_ERR_OBJECT_HANDLER
            else:
                if not ignore_object_errors:
                    raise ObjectError(RS_ERR_OBJECT_MISSING)
                output = RS_ERR_OBJECT_MISSING

            reply = reply.replace('<call>{match}</call>'.format(match=match), output)

        return reply

Example 112

Project: drawquest-web
Source File: rjsmin.py
View license
def _make_jsmin(extended=True, python_only=False):
    """
    Generate JS minifier based on `jsmin.c by Douglas Crockford`_

    .. _jsmin.c by Douglas Crockford:
       http://www.crockford.com/javascript/jsmin.c

    :Parameters:
      `extended` : ``bool``
        Extended Regexps? (using lookahead and lookbehind). This is faster,
        because it can be optimized way more. The regexps used with `extended`
        being false are only left here to allow easier porting to platforms
        without extended regex features (and for my own reference...)

      `python_only` : ``bool``
        Use only the python variant. If true, the c extension is not even
        tried to be loaded.

    :Return: Minifier
    :Rtype: ``callable``
    """
    # pylint: disable = R0912, R0914, W0612
    if not python_only:
        try:
            import _rjsmin
        except ImportError:
            pass
        else:
            return _rjsmin.jsmin
    try:
        xrange
    except NameError:
        xrange = range # pylint: disable = W0622

    space_chars = r'[\000-\011\013\014\016-\040]'

    line_comment = r'(?://[^\r\n]*)'
    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    string1 = \
        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    strings = r'(?:%s|%s)' % (string1, string2)

    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    nospecial = r'[^/\\\[\r\n]'
    if extended:
        regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
            nospecial, charclass, nospecial
        )
    else:
        regex = (
            r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
        )
        regex = regex % (charclass, nospecial, charclass, nospecial)
    pre_regex = r'[(,=:\[!&|?{};\r\n]'

    space = r'(?:%s|%s)' % (space_chars, space_comment)
    newline = r'(?:%s?[\r\n])' % line_comment

    def fix_charclass(result):
        """ Fixup string of chars to fit into a regex char class """
        pos = result.find('-')
        if pos >= 0:
            result = r'%s%s-' % (result[:pos], result[pos + 1:])

        def sequentize(string):
            """
            Notate consecutive characters as sequence

            (1-4 instead of 1234)
            """
            first, last, result = None, None, []
            for char in map(ord, string):
                if last is None:
                    first = last = char
                elif last + 1 == char:
                    last = char
                else:
                    result.append((first, last))
                    first = last = char
            if last is not None:
                result.append((first, last))
            return ''.join(['%s%s%s' % (
                chr(first),
                last > first + 1 and '-' or '',
                last != first and chr(last) or ''
            ) for first, last in result])

        return _re.sub(r'([\000-\040\047])', # for better portability
            lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
                .replace('\\', '\\\\')
                .replace('[', '\\[')
                .replace(']', '\\]')
            )
        )

    def id_literal_(what):
        """ Make id_literal like char class """
        match = _re.compile(what).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return '[^%s]' % fix_charclass(result)

    def not_id_literal_(keep):
        """ Make negated id_literal like char class """
        match = _re.compile(id_literal_(keep)).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return r'[%s]' % fix_charclass(result)

    if extended:
        id_literal = id_literal_(r'[a-zA-Z0-9_$]')
        id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_sub = _re.compile((
            r'([^\047"/\000-\040]+)'
            r'|(%(strings)s[^\047"/\000-\040]*)'
            r'|(?:(?<=%(pre_regex)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
            r'|(?<=%(id_literal_close)s)'
                r'%(space)s*(?:(%(newline)s)%(space)s*)+'
                r'(?=%(id_literal_open)s)'
            r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
            r'|%(space)s+'
            r'|(?:%(newline)s%(space)s*)+'
        ) % locals()).sub
        def space_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321
            groups = match.groups()
            if groups[0]: return groups[0]
            elif groups[1]: return groups[1]
            elif groups[2]: return groups[2]
            elif groups[3]: return '\n'
            elif groups[4]: return ' '
            return ''

        def jsmin(script): # pylint: disable = W0621
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach which minifies the whole script with one big
            substitution regex.

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub(space_subber, '\n%s\n' % script).strip()

    else:
        not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
        not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
        not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

        space_norm_sub = _re.compile((
            r'(%(strings)s)'
            r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
            r'|(%(space)s)+'
            r'|(?:(%(newline)s)%(space)s*)+'
        ) % locals()).sub
        def space_norm_subber(match):
            """ Substitution callback """
            # pylint: disable = C0321
            groups = match.groups()
            if groups[0]: return groups[0]
            elif groups[1]: return groups[1].replace('\r', '\n') + groups[2]
            elif groups[3]: return ' '
            elif groups[4]: return '\n'

        space_sub1 = _re.compile((
            r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
            r'|\040(%(not_id_literal)s)'
            r'|\n(%(not_id_literal_open)s)'
        ) % locals()).sub
        def space_subber1(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2]

        space_sub2 = _re.compile((
            r'(%(strings)s)\040?'
            r'|(%(pre_regex)s%(regex)s)[\040\n]?'
            r'|(%(not_id_literal)s)\040'
            r'|(%(not_id_literal_close)s)\n'
        ) % locals()).sub
        def space_subber2(match):
            """ Substitution callback """
            groups = match.groups()
            return groups[0] or groups[1] or groups[2] or groups[3]

        def jsmin(script):
            r"""
            Minify javascript based on `jsmin.c by Douglas Crockford`_\.

            Instead of parsing the stream char by char, it uses a regular
            expression approach. The script is minified with three passes:

            normalization
                Control character are mapped to spaces, spaces and newlines
                are squeezed and comments are stripped.
            space removal 1
                Spaces before certain tokens are removed
            space removal 2
                Spaces after certain tokens are remove

            .. _jsmin.c by Douglas Crockford:
               http://www.crockford.com/javascript/jsmin.c

            :Parameters:
              `script` : ``str``
                Script to minify

            :Return: Minified script
            :Rtype: ``str``
            """
            return space_sub2(space_subber2,
                space_sub1(space_subber1,
                    space_norm_sub(space_norm_subber, '\n%s\n' % script)
                )
            ).strip()
    return jsmin

Example 113

View license
def main(argv=None):
    '''
    Process the command line arguments and create the JSON dump.

    :param argv: List of arguments, as if specified on the command-line.
                 If None, ``sys.argv[1:]`` is used instead.
    :type argv: list of str
    '''
    # Get command line arguments
    parser = argparse.ArgumentParser(
        description="Export all users/issues from GitLab to JIRA JSON format.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve')
    parser.add_argument('gitlab_url',
                        help='The full URL to your GitLab instance.')
    parser.add_argument('-d', '--date_filter',
                        help='Only include issues, notes, etc. created after\
                              the specified date. Expected format is \
                              YYYY-MM-DD',
                        type=get_datetime, default='1970-01-01')
    parser.add_argument('-e', '--include_empty',
                        help='Include projects in output that do not have any\
                              issues.',
                        action='store_true')
    parser.add_argument('-i', '--ignore_list',
                        help='List of project names to exclude from dump.',
                        type=argparse.FileType('r'))
    parser.add_argument('-p', '--password',
                        help='The password to use to authenticate if token is \
                              not specified. If password and token are both \
                              unspecified, you will be prompted to enter a \
                              password.')
    parser.add_argument('-P', '--page_size',
                        help='When retrieving result from GitLab, how many \
                              results should be included in a given page?.',
                        type=int, default=20)
    parser.add_argument('-s', '--verify_ssl',
                        help='Enable SSL certificate verification',
                        action='store_true')
    parser.add_argument('-t', '--token',
                        help='The private GitLab API token to use for \
                              authentication. Either this or username and \
                              password must be set.')
    parser.add_argument('-u', '--username',
                        help='The username to use for authentication, if token\
                              is unspecified.')
    parser.add_argument('-v', '--verbose',
                        help='Print more status information. For every ' +
                             'additional time this flag is specified, ' +
                             'output gets more verbose.',
                        default=0, action='count')
    parser.add_argument('--version', action='version',
                        version='%(prog)s {0}'.format(__version__))
    args = parser.parse_args(argv)

    args.page_size = max(100, args.page_size)

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'), level=log_level)

    # Setup authenticated GitLab instance
    if args.token:
        git = GitLab(args.gitlab_url, token=args.token,
                     verify_ssl=args.verify_ssl)
    else:
        if not args.username:
            print('Username: ', end="", file=sys.stderr)
            args.username = input('').strip()
        if not args.password:
            args.password = getpass.getpass('Password: ')
        git = GitLab(args.gitlab_url, verify_ssl=args.verify_ssl)
        git.login(args.username, args.password)

    # Initialize output dictionary
    output_dict = defaultdict(list)

    print('Creating project entries...', end="", file=sys.stderr)
    sys.stderr.flush()
    key_set = set()
    mentioned_users = set()
    if args.ignore_list is not None:
        ignore_list = {line.strip().lower() for line in args.ignore_list}
    else:
        ignore_list = {}
    for project in gen_all_results(git.getprojectsall,
                                   per_page=args.page_size):
        proj_name_lower = project['name'].lower()
        if proj_name_lower not in ignore_list and project['issues_enabled']:
            project_issues = []
            for issue in gen_all_results(git.getprojectissues, project['id'],
                                         per_page=args.page_size):
                if args.date_filter < parsedate(issue['updated_at']).replace(tzinfo=None):
                    project_issues.append(issue)
                else:
                    for note in git.getissuewallnotes(project['id'],
                                                      issue['id']):
                        if args.date_filter < parsedate(issue['updated_at']).replace(tzinfo=None):
                            project_issues.append(issue)
                            break

            if project_issues or args.include_empty:
                jira_project = {}
                jira_project['name'] = project['name_with_namespace']
                key = project['name']
                if key.islower():
                    key = key.title()
                key = re.sub(r'[^A-Z]', '', key)
                if len(key) < 2:
                    key = re.sub(r'[^A-Za-z]', '',
                                 project['name'])[0:2].upper()
                added = False
                suffix = 65
                while key in key_set:
                    if not added:
                        key += 'A'
                    else:
                        suffix += 1
                        key = key[:-1] + chr(suffix)
                key_set.add(key)
                jira_project['key'] = key
                jira_project['description'] = md_to_wiki(project['description'])
                # jira_project['created'] = project['created_at']
                jira_project['issues'] = []
                for issue in project_issues:
                    jira_issue = {}
                    jira_issue['externalId'] = issue['iid']
                    if issue['state'] == 'closed':
                        jira_issue['status'] = 'Closed'
                        jira_issue['resolution'] = 'Resolved'
                    else:
                        jira_issue['status'] = 'Open'

                    jira_issue['description'] = md_to_wiki(issue['description'])
                    jira_issue['reporter'] = issue['author']['username']
                    mentioned_users.add(jira_issue['reporter'])
                    jira_issue['labels'] = issue['labels']
                    jira_issue['summary'] = issue['title']
                    if issue['assignee']:
                        jira_issue['assignee'] = issue['assignee']['username']
                        mentioned_users.add(jira_issue['assignee'])
                    jira_issue['issueType'] = 'Bug'
                    jira_issue['comments'] = []
                    # Get all comments/notes
                    for note in git.getissuewallnotes(project['id'],
                                                      issue['id']):
                        jira_note = {}
                        jira_note['body'] = md_to_wiki(note['body'])
                        jira_note['author'] = note['author']['username']
                        mentioned_users.add(jira_note['author'])
                        jira_note['created'] = note['created_at']
                        jira_issue['comments'].append(jira_note)
                    jira_project['issues'].append(jira_issue)

                output_dict['projects'].append(jira_project)
        print('.', end="", file=sys.stderr)
        sys.stderr.flush()

    print('\nCreating user entries...', end="", file=sys.stderr)
    sys.stderr.flush()
    for user in gen_all_results(git.getusers, per_page=args.page_size):
        # Only add users who are actually referenced in issues
        if user['username'] in mentioned_users:
            jira_user = {}
            jira_user['name'] = user['username']
            jira_user['fullname'] = user['name']
            jira_user['email'] = user['email']
            jira_user['groups'] = ['gitlab-users']
            jira_user['active'] = (user['state'] == 'active')
            output_dict['users'].append(jira_user)
        print('.', end="", file=sys.stderr)
        sys.stderr.flush()

    print('\nPrinting JSON output...', file=sys.stderr)
    sys.stderr.flush()
    print(json.dumps(output_dict, indent=4))

Example 114

Project: edx-platform
Source File: formula.py
View license
    def preprocess_pmathml(self, xml):
        r"""
        Pre-process presentation MathML from ASCIIMathML to make it more
        acceptable for SnuggleTeX, and also to accomodate some sympy
        conventions (eg hat(i) for \hat{i}).

        This method would be a good spot to look for an integral and convert
        it, if possible...
        """

        if isinstance(xml, (str, unicode)):
            xml = etree.fromstring(xml)		# TODO: wrap in try

        xml = self.fix_greek_in_mathml(xml)	 # convert greek utf letters to greek spelled out in ascii

        def gettag(expr):
            return re.sub('{http://[^}]+}', '', expr.tag)

        def fix_pmathml(xml):
            """
            f and g are processed as functions by asciimathml, eg "f-2" turns
            into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>" this is
            really terrible for turning into cmathml.  undo this here.
            """
            for k in xml:
                tag = gettag(k)
                if tag == 'mrow':
                    if len(k) == 2:
                        if gettag(k[0]) == 'mi' and k[0].text in ['f', 'g'] and gettag(k[1]) == 'mo':
                            idx = xml.index(k)
                            xml.insert(idx, deepcopy(k[0]))	 # drop the <mrow> container
                            xml.insert(idx + 1, deepcopy(k[1]))
                            xml.remove(k)
                fix_pmathml(k)

        fix_pmathml(xml)

        def fix_hat(xml):
            """
            hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle
            this into <mi>hat(f)</mi> hat i also somtimes turned into
            <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>
            """
            for k in xml:
                tag = gettag(k)
                if tag == 'mover':
                    if len(k) == 2:
                        if gettag(k[0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
                            newk = etree.Element('mi')
                            newk.text = 'hat(%s)' % k[0].text
                            xml.replace(k, newk)
                        if gettag(k[0]) == 'mrow' and gettag(k[0][0]) == 'mi' and \
                           gettag(k[1]) == 'mo' and str(k[1].text) == '^':
                            newk = etree.Element('mi')
                            newk.text = 'hat(%s)' % k[0][0].text
                            xml.replace(k, newk)
                fix_hat(k)
        fix_hat(xml)

        def flatten_pmathml(xml):
            """
            Give the text version of certain PMathML elements

            Sometimes MathML will be given with each letter separated (it
            doesn't know if its implicit multiplication or what). From an xml
            node, find the (text only) variable name it represents. So it takes
            <mrow>
              <mi>m</mi>
              <mi>a</mi>
              <mi>x</mi>
            </mrow>
            and returns 'max', for easier use later on.
            """
            tag = gettag(xml)
            if tag == 'mn':
                return xml.text
            elif tag == 'mi':
                return xml.text
            elif tag == 'mrow':
                return ''.join([flatten_pmathml(y) for y in xml])
            raise Exception('[flatten_pmathml] unknown tag %s' % tag)

        def fix_mathvariant(parent):
            """
            Fix certain kinds of math variants

            Literally replace <mstyle mathvariant="script"><mi>N</mi></mstyle>
            with 'scriptN'. There have been problems using script_N or script(N)
            """
            for child in parent:
                if gettag(child) == 'mstyle' and child.get('mathvariant') == 'script':
                    newchild = etree.Element('mi')
                    newchild.text = 'script%s' % flatten_pmathml(child[0])
                    parent.replace(child, newchild)
                fix_mathvariant(child)
        fix_mathvariant(xml)

        # find "tagged" superscripts
        # they have the character \u200b in the superscript
        # replace them with a__b so snuggle doesn't get confused
        def fix_superscripts(xml):
            """ Look for and replace sup elements with 'X__Y' or 'X_Y__Z'

            In the javascript, variables with '__X' in them had an invisible
            character inserted into the sup (to distinguish from powers)
            E.g. normal:
            <msubsup>
              <mi>a</mi>
              <mi>b</mi>
              <mi>c</mi>
            </msubsup>
            to be interpreted '(a_b)^c' (nothing done by this method)

            And modified:
            <msubsup>
              <mi>b</mi>
              <mi>x</mi>
              <mrow>
                <mo>&#x200B;</mo>
                <mi>d</mi>
              </mrow>
            </msubsup>
            to be interpreted 'a_b__c'

            also:
            <msup>
              <mi>x</mi>
              <mrow>
                <mo>&#x200B;</mo>
                <mi>B</mi>
              </mrow>
            </msup>
            to be 'x__B'
            """
            for k in xml:
                tag = gettag(k)

                # match things like the last example--
                # the second item in msub is an mrow with the first
                # character equal to \u200b
                if (
                        tag == 'msup' and
                        len(k) == 2 and gettag(k[1]) == 'mrow' and
                        gettag(k[1][0]) == 'mo' and k[1][0].text == u'\u200b'  # whew
                ):

                    # replace the msup with 'X__Y'
                    k[1].remove(k[1][0])
                    newk = etree.Element('mi')
                    newk.text = '%s__%s' % (flatten_pmathml(k[0]), flatten_pmathml(k[1]))
                    xml.replace(k, newk)

                # match things like the middle example-
                # the third item in msubsup is an mrow with the first
                # character equal to \u200b
                if (
                        tag == 'msubsup' and
                        len(k) == 3 and gettag(k[2]) == 'mrow' and
                        gettag(k[2][0]) == 'mo' and k[2][0].text == u'\u200b'    # whew
                ):

                    # replace the msubsup with 'X_Y__Z'
                    k[2].remove(k[2][0])
                    newk = etree.Element('mi')
                    newk.text = '%s_%s__%s' % (flatten_pmathml(k[0]), flatten_pmathml(k[1]), flatten_pmathml(k[2]))
                    xml.replace(k, newk)

                fix_superscripts(k)
        fix_superscripts(xml)

        def fix_msubsup(parent):
            """
            Snuggle returns an error when it sees an <msubsup> replace such
            elements with an <msup>, except the first element is of
            the form a_b. I.e. map a_b^c => (a_b)^c
            """
            for child in parent:
                # fix msubsup
                if gettag(child) == 'msubsup' and len(child) == 3:
                    newchild = etree.Element('msup')
                    newbase = etree.Element('mi')
                    newbase.text = '%s_%s' % (flatten_pmathml(child[0]), flatten_pmathml(child[1]))
                    newexp = child[2]
                    newchild.append(newbase)
                    newchild.append(newexp)
                    parent.replace(child, newchild)

                fix_msubsup(child)
        fix_msubsup(xml)

        self.xml = xml  # pylint: disable=attribute-defined-outside-init
        return self.xml

Example 115

Project: open-context-py
Source File: models.py
View license
    def compose_query(self, request_dict):
        """ composes the search query based on the request_dict """
        qm = QueryMaker()
        qm.mem_cache_obj = self.mem_cache_obj
        child_context_join = False # do a JOIN to include children in results
        query = {}
        query['facet'] = 'true'
        query['facet.mincount'] = 1
        query['rows'] = self.rows
        query['start'] = self.start
        query['debugQuery'] = 'false'
        query['fq'] = []
        query['facet.field'] = []
        query['facet.range'] = []
        query['stats'] = 'true'
        query['stats.field'] = self.stats_fields
        query['sort'] = SortingOptions.DEFAULT_SOLR_SORT
        s_param = self.get_request_param(request_dict,
                                         'sort',
                                         False,
                                         False)
        if s_param is not False:
            # add custom sorting
            sort_opts = SortingOptions()
            query['sort'] = sort_opts.make_solr_sort_param(s_param)
        # If the user does not provide a search term, search for everything
        query['q'] = '*:*'  # defaul search for all
        q_param = self.get_request_param(request_dict,
                                         'q',
                                         False,
                                         False)
        if q_param is not False:
            escaped_terms = qm.prep_string_search_term(q_param)
            query['q'] = ' '.join(escaped_terms)
            query['q.op'] = 'AND'
            query['hl'] = 'true'
            query['hl.fl'] = 'text'
            query['hl.q'] = ' '.join(escaped_terms)
        start = self.get_request_param(request_dict,
                                       'start',
                                       False,
                                       False)
        if start is not False:
            query['start'] = re.sub(r'[^\d]', r'', str(start))
        rows = self.get_request_param(request_dict,
                                      'rows',
                                      False,
                                      False)
        if rows is not False:
            rows = re.sub(r'[^\d]', r'', str(rows))
            rows = int(float(rows))
            if rows > self.max_rows:
                rows = self.max_rows
            elif rows < 0:
                rows = 0
            query['rows'] = rows
        # Spatial Context
        if 'path' in request_dict and self.do_context_paths:
            self.remove_from_default_facet_fields(SolrDocument.ROOT_CONTEXT_SOLR)
            context = qm._process_spatial_context(request_dict['path'])
            query['fq'].append(context['fq'])
            query['facet.field'] += context['facet.field']  # context facet fields, always a list
        # Properties and Linked Data
        props = self.get_request_param(request_dict,
                                       'prop',
                                       False,
                                       True)
        if props is not False:
            for act_prop in props:
                # process each prop independently.
                prop_query = qm.process_prop(act_prop)
                query['fq'] += prop_query['fq']
                query['facet.field'] += prop_query['facet.field']
                query['stats.field'] += prop_query['stats.field']
                query['facet.range'] += prop_query['facet.range']
                if 'ranges' in prop_query:
                    for key, value in prop_query['ranges'].items():
                        query[key] = value
                if 'hl-queries' in prop_query:
                    query['hl'] = 'true'
                    query['hl.fl'] = 'text'
                    # query['hl.snippets'] = 2
                    for q_term in prop_query['hl-queries']:
                        if 'hl.q' in query:
                            query['hl.q'] += ' OR (' + q_term + ')'
                        else:
                            query['hl.q'] = q_term
                if 'prequery-stats' in prop_query:
                    # we have fields that need a stats prequery
                    self.prequery_stats += prop_query['prequery-stats']
        # Project
        proj = self.get_request_param(request_dict,
                                      'proj',
                                      False)
        if proj is not False:
            # remove the facet field, since we're already filtering with it
            self.remove_from_default_facet_fields(SolrDocument.ROOT_PROJECT_SOLR)
            proj_query = qm.process_proj(proj)
            query['fq'] += proj_query['fq']
            query['facet.field'] += proj_query['facet.field']    
        # Dublin-Core terms
        dc_terms_obj = DCterms()
        dc_params = dc_terms_obj.get_dc_params_list()
        for dc_param in dc_params:
            dc_terms = self.get_request_param(request_dict,
                                              dc_param,
                                              False,
                                              True)
            if dc_terms is not False:
                dc_query = qm.process_dc_term(dc_param,
                                              dc_terms)
                query['fq'] += dc_query['fq']
                query['facet.field'] += dc_query['facet.field']
                if dc_param == 'dc-temporal':
                    child_context_join = False  # turn this off
        # item-types
        item_type = self.get_request_param(request_dict,
                                           'type',
                                           False,
                                           False)
        if item_type is not False:
            # remove the facet field, since we're already filtering with it
            self.remove_from_default_facet_fields('item_type')
            it_query = qm.process_item_type(item_type)
            query['fq'] += it_query['fq']
            query['facet.field'] += it_query['facet.field']
        """
        If a item_type_limit is set, then we're doing a specialized search
        that looks only for a certain item_type.
        """
        if self.item_type_limit is not False:
            query['fq'].append('item_type:' + self.item_type_limit)
            if self.item_type_limit in self.ITEM_TYPE_ROWS:
                query['rows'] = self.ITEM_TYPE_ROWS[self.item_type_limit]
            if self.item_type_limit in self.ITEM_TYPE_FACET_MIN:
                query['facet.mincount'] = self.ITEM_TYPE_FACET_MIN[self.item_type_limit]
            if self.item_type_limit in self.ITEM_TYPE_FACETFIELDS:
                for add_facet_field in self.ITEM_TYPE_FACETFIELDS[self.item_type_limit]:
                    if add_facet_field not in query['facet.field']:
                        # add facet field for this type of item
                        query['facet.field'].append(add_facet_field)
        else:
            cat_field_found = False
            for item_cat_field in self.ITEM_CAT_FIELDS:
                for facet_field in query['facet.field']:
                    if item_cat_field in facet_field:
                        cat_field_found = True
            if cat_field_found is False:
                query['facet.field'].append('item_type')
        """ CHRONOLOGY Form Use Life (form)
            queries
        """
        # now add form-use-life chronology
        form_chrono = self.get_request_param(request_dict,
                                             'form-chronotile',
                                             False,
                                             False)
        if form_chrono is not False:
            # query for form-use-live chronological tile
            form_chrono_query = qm.process_form_use_life_chrono(form_chrono)
            query['fq'] += form_chrono_query['fq']
            query['facet.field'] += form_chrono_query['facet.field']
            query['f.form_use_life_chrono_tile.facet.limit'] = -1
        else:
            # Add default form-use-life chronology
            query = self.add_root_form_use_life_chrono(query,
                                                       request_dict)
        form_start = self.get_request_param(request_dict,
                                            'form-start',
                                            False,
                                            False)
        if form_start is not False:
            # query for form-use-live start date
            form_start_query = qm.process_form_date_chrono(form_start,
                                                           'start')
            query['fq'] += form_start_query['fq']
        form_stop = self.get_request_param(request_dict,
                                           'form-stop',
                                           False,
                                           False)
        if form_stop is not False:
            # query for form-use-live stop date
            form_stop_query = qm.process_form_date_chrono(form_stop,
                                                          'stop')
            query['fq'] += form_stop_query['fq']
        """ Updated and Published Times
        """
        updated = self.get_request_param(request_dict,
                                         'updated',
                                         False,
                                         False)
        if updated is not False:
            # query for when the resource was updated
            query['fq'].append('updated:' + updated)
        published = self.get_request_param(request_dict,
                                           'published',
                                           False,
                                           False)
        if published is not False:
            # query for when the resource was published
            query['fq'].append('published:' + published)
        """
            query by uuid
            uri, or other identifier
        """
        uuid = self.get_request_param(request_dict,
                                      'uuid',
                                      False,
                                      False)
        if uuid is not False:
            query['fq'].append('uuid:' + uuid)
        identifier = self.get_request_param(request_dict,
                                            'id',
                                            False,
                                            False)
        if identifier is not False:
            id_query = qm.process_id(identifier)
            query['fq'] += id_query['fq']
        """ Linked media (images, documents, other)
            queries
        """
        # images
        images = self.get_request_param(request_dict,
                                        'images',
                                        False,
                                        False)
        if images is not False:
            query['fq'] += ['image_media_count:[1 TO *]']
        # other media (not images)
        other_media = self.get_request_param(request_dict,
                                             'other-media',
                                             False,
                                             False)
        if other_media is not False:
            query['fq'] += ['other_binary_media_count:[1 TO *]']
         # other media (not images)
        documents = self.get_request_param(request_dict,
                                           'documents',
                                           False,
                                           False)
        if documents is not False:
            query['fq'] += ['document_count:[1 TO *]']
        """ Geospatial (discovery location)
            queries
        """
        # now add discovery geo location
        disc_geo = self.get_request_param(request_dict,
                                          'disc-geotile',
                                          False,
                                          False)
        if disc_geo is not False:
            disc_geo_query = qm.process_discovery_geo(disc_geo)
            query['fq'] += disc_geo_query['fq']
            query['facet.field'] += disc_geo_query['facet.field']
            query['f.discovery_geotile.facet.limit'] = -1
        else:
            # Add default geofacet
            query = self.add_root_discovery_geo(query,
                                                request_dict)
        # geospatial bounding box query
        disc_bbox = self.get_request_param(request_dict,
                                           'disc-bbox',
                                           False,
                                           False)
        if disc_bbox is not False:
            disc_bbox_query = qm.process_discovery_bbox(disc_bbox)
            query['fq'] += disc_bbox_query['fq']
        # get items with a URI (or slug) indentified object
        obj = self.get_request_param(request_dict,
                                     'obj',
                                     False)
        if obj is not False:
            obj_query = qm.process_ld_object(obj)
            query['fq'] += obj_query['fq']
        """ -----------------------------------------
            Add default facet fields, used for most
            searches
            -----------------------------------------
        """
        query = self.add_default_facet_fields(query,
                                              request_dict)
        """ -----------------------------------------
            Additional, dataset specific specialized
            queries
            -----------------------------------------
        """
        # special queries (to simplify access to specific datasets)
        spsearch = SpecialSearches()
        response = self.get_request_param(request_dict,
                                          'response',
                                          False,
                                          False)
        if response is not False:
            if 'geo-project' in response:
                # request for special handling of project facets with
                # added geospatial and chronological metadata
                query = spsearch.process_geo_projects(query)
        linked = self.get_request_param(request_dict,
                                        'linked',
                                        False,
                                        False)
        if linked == 'dinaa-cross-ref':
            query = spsearch.process_linked_dinaa(query)
        trinomial = self.get_request_param(request_dict,
                                           'trinomial',
                                           False,
                                           False)
        if trinomial is not False:
            query = spsearch.process_trinonial_reconcile(trinomial,
                                                         query)
        reconcile = self.get_request_param(request_dict,
                                           'reconcile',
                                           False,
                                           True)
        if reconcile is not False:
            query = spsearch.process_reconcile(reconcile,
                                               query)
        if len(self.prequery_stats) > 0:
            #  we have fields that need a stats prequery
            statsq = StatsQuery()
            statsq.q = query['q']
            if 'q.op' in query:
                statsq.q_op = query['q.op']
            statsq.fq = query['fq']
            statsq.stats_fields = self.prequery_stats
            query = statsq.add_stats_ranges_from_solr(query)
        # Now set aside entities used as search filters
        self.mem_cache_obj = qm.mem_cache_obj
        if child_context_join:
            all_fq = False
            for fq in query['fq']:
                if all_fq is False:
                    all_fq = '(' + fq + ')'
                else:
                    all_fq += ' AND (' + fq + ')'
            all_fq = '(' + all_fq + ')'
            joined_fq = '{!join from=slug_type_uri_label to=obj_all___context_id}' + all_fq 
            query['fq'] = all_fq + ' OR _query_:"' + joined_fq + '"' 
        return query

Example 116

Project: anima
Source File: repr_tools.py
View license
    def generate_ass(self):
        """generates the ASS representation of the current scene

        For Model Tasks the ASS is generated over the LookDev Task because it
        is not possible to assign a material to an object inside an ASS file.
        """
        # before doing anything, check if this is a look dev task
        # and export the objects from the referenced files with their current
        # shadings, then replace all of the references to ASS repr and than
        # add Stand-in nodes and parent them under the referenced models

        # load necessary plugins
        pm.loadPlugin('mtoa')

        # disable "show plugin shapes"
        active_panel = auxiliary.Playblaster.get_active_panel()
        show_plugin_shapes = pm.modelEditor(active_panel, q=1, pluginShapes=1)
        pm.modelEditor(active_panel, e=1, pluginShapes=False)

        # validate the version first
        self.version = self._validate_version(self.version)

        self.open_version(self.version)

        task = self.version.task

        # export_command = 'arnoldExportAss -f "%(path)s" -s -mask 24 ' \
        #                  '-lightLinks 0 -compressed -boundingBox ' \
        #                  '-shadowLinks 0 -cam perspShape;'

        export_command = 'arnoldExportAss -f "%(path)s" -s -mask 60' \
                         '-lightLinks 1 -compressed -boundingBox ' \
                         '-shadowLinks 1 -cam perspShape;'

        # calculate output path
        output_path = \
            os.path.join(self.version.absolute_path, 'Outputs/ass/')\
            .replace('\\', '/')

        # check if all references have an ASS repr first
        refs_with_no_ass_repr = []
        for ref in pm.listReferences():
            if ref.version and not ref.has_repr('ASS'):
                refs_with_no_ass_repr.append(ref)

        if len(refs_with_no_ass_repr):
            raise RuntimeError(
                'Please generate the ASS Representation of the references '
                'first!!!\n%s' %
                '\n'.join(map(lambda x: str(x.path), refs_with_no_ass_repr))
            )

        if self.is_look_dev_task(task):
            # in look dev files, we export the ASS files directly from the Base
            # version and parent the resulting Stand-In node to the parent of
            # the child node

            # load only Model references
            for ref in pm.listReferences():
                v = ref.version
                load_ref = False
                if v:
                    ref_task = v.task
                    if self.is_model_task(ref_task):
                        load_ref = True

                if load_ref:
                    ref.load()
                    ref.importContents()

            # Make all texture paths relative
            # replace all "$REPO#" from all texture paths first
            #
            # This is needed to properly render textures with any OS
            types_and_attrs = {
                'aiImage': 'filename',
                'file': 'fileTextureName',
                'imagePlane': 'imageName'
            }

            for node_type in types_and_attrs.keys():
                attr_name = types_and_attrs[node_type]
                for node in pm.ls(type=node_type):
                    orig_path = node.getAttr(attr_name).replace("\\", "/")
                    path = re.sub(
                        r'(\$REPO[0-9/]+)',
                        '',
                        orig_path
                    )
                    tx_path = self.make_tx(path)
                    inputs = node.attr(attr_name).inputs(p=1)
                    if len(inputs):
                        # set the input attribute
                        for input_node_attr in inputs:
                            input_node_attr.set(tx_path)
                    else:
                        node.setAttr(attr_name, tx_path)

            # randomize all render node names
            # This is needed to prevent clashing of materials in a bigger scene
            for node in pm.ls(type=RENDER_RELATED_NODE_TYPES):
                if node.referenceFile() is None and \
                   node.name() not in READ_ONLY_NODE_NAMES:
                    node.rename('%s_%s' % (node.name(), uuid.uuid4().hex))

            nodes_to_ass_files = {}

            # export all root ass files as they are
            for root_node in auxiliary.get_root_nodes():
                for child_node in root_node.getChildren():
                    # check if it is a transform node
                    if not isinstance(child_node, pm.nt.Transform):
                        continue

                    if not auxiliary.has_shape(child_node):
                        continue

                    # randomize child node name
                    # TODO: This is not working as intended, node names are like |NS:node1|NS:node2
                    #       resulting a child_node_name as "node2"
                    child_node_name = child_node\
                        .fullPath()\
                        .replace('|', '_')\
                        .split(':')[-1]

                    child_node_full_path = child_node.fullPath()

                    pm.select(child_node)
                    child_node.rename('%s_%s' % (child_node.name(), uuid.uuid4().hex))

                    output_filename =\
                        '%s_%s.ass' % (
                            self.version.nice_name,
                            child_node_name
                        )

                    output_full_path = \
                        os.path.join(output_path, output_filename)

                    # run the mel command
                    pm.mel.eval(
                        export_command % {
                            'path': output_full_path.replace('\\', '/')
                        }
                    )
                    nodes_to_ass_files[child_node_full_path] = \
                        '%s.gz' % output_full_path
                    # print('%s -> %s' % (
                    #     child_node_full_path,
                    #     output_full_path)
                    # )

            # reload the scene
            pm.newFile(force=True)
            self.open_version(self.version)

            # convert all references to ASS
            # we are doing it a little bit early here, but we need to
            for ref in pm.listReferences():
                ref.to_repr('ASS')

            all_stand_ins = pm.ls(type='aiStandIn')
            for ass_node in all_stand_ins:
                ass_tra = ass_node.getParent()
                full_path = ass_tra.fullPath()
                if full_path in nodes_to_ass_files:
                    ass_file_path = \
                        Repository.to_os_independent_path(
                            nodes_to_ass_files[full_path]
                        )
                    ass_node.setAttr('dso', ass_file_path)

        elif self.is_vegetation_task(task):
            # in vegetation files, we export the ASS files directly from the
            # Base version, also we use the geometry under "pfxPolygons"
            # and parent the resulting Stand-In nodes to the
            # pfxPolygons
            # load all references
            for ref in pm.listReferences():
                ref.load()

            # Make all texture paths relative
            # replace all "$REPO#" from all texture paths first
            #
            # This is needed to properly render textures with any OS
            types_and_attrs = {
                'aiImage': 'filename',
                'file': 'fileTextureName',
                'imagePlane': 'imageName'
            }

            for node_type in types_and_attrs.keys():
                attr_name = types_and_attrs[node_type]
                for node in pm.ls(type=node_type):
                    orig_path = node.getAttr(attr_name).replace("\\", "/")
                    path = re.sub(
                        r'(\$REPO[0-9/]+)',
                        '',
                        orig_path
                    )
                    tx_path = self.make_tx(path)
                    inputs = node.attr(attr_name).inputs(p=1)
                    if len(inputs):
                        # set the input attribute
                        for input_node_attr in inputs:
                            input_node_attr.set(tx_path)
                    else:
                        node.setAttr(attr_name, tx_path)

            # import shaders that are referenced to this scene
            # there is only one reference in the vegetation task and this is
            # the shader scene
            for ref in pm.listReferences():
                ref.importContents()

            # randomize all render node names
            # This is needed to prevent clashing of materials in a bigger scene
            for node in pm.ls(type=RENDER_RELATED_NODE_TYPES):
                if node.referenceFile() is None and \
                   node.name() not in READ_ONLY_NODE_NAMES:
                    node.rename('%s_%s' % (node.name(), uuid.uuid4().hex))

            # find the _pfxPolygons node
            pfx_polygons_node = pm.PyNode('kks___vegetation_pfxPolygons')

            for node in pfx_polygons_node.getChildren():
                for child_node in node.getChildren():
                    #print('processing %s' % child_node.name())
                    child_node_name = child_node.name().split('___')[-1]

                    pm.select(child_node)
                    output_filename =\
                        '%s_%s.ass' % (
                            self.version.nice_name,
                            child_node_name.replace(':', '_').replace('|', '_')
                        )

                    output_full_path = \
                        os.path.join(output_path, output_filename)

                    # run the mel command
                    pm.mel.eval(
                        export_command % {
                            'path': output_full_path.replace('\\', '/')
                        }
                    )

                    # generate an aiStandIn node and set the path
                    ass_node = auxiliary.create_arnold_stand_in(
                        path='%s.gz' % output_full_path
                    )
                    ass_tra = ass_node.getParent()

                    # parent the ass node under the current node
                    # under pfx_polygons_node
                    pm.parent(ass_tra, node)

                    # set pivots
                    rp = pm.xform(child_node, q=1, ws=1, rp=1)
                    sp = pm.xform(child_node, q=1, ws=1, sp=1)
                    # rpt = child_node.getRotatePivotTranslation()

                    pm.xform(ass_node, ws=1, rp=rp)
                    pm.xform(ass_node, ws=1, sp=sp)
                    # ass_node.setRotatePivotTranslation(rpt)

                    # delete the child_node
                    pm.delete(child_node)

                    # give it the same name with the original
                    ass_tra.rename('%s' % child_node_name)

            # clean up other nodes
            pm.delete('kks___vegetation_pfxStrokes')
            pm.delete('kks___vegetation_paintableGeos')

        elif self.is_model_task(task):
            # convert all children of the root node
            # to an empty aiStandIn node
            # and save it as it is
            root_nodes = self.get_local_root_nodes()

            for root_node in root_nodes:
                for child_node in root_node.getChildren():
                    child_node_name = child_node.name()

                    rp = pm.xform(child_node, q=1, ws=1, rp=1)
                    sp = pm.xform(child_node, q=1, ws=1, sp=1)

                    pm.delete(child_node)

                    ass_node = auxiliary.create_arnold_stand_in(path='')
                    ass_tra = ass_node.getParent()
                    pm.parent(ass_tra, root_node)
                    ass_tra.rename(child_node_name)

                    # set pivots
                    pm.xform(ass_tra, ws=1, rp=rp)
                    pm.xform(ass_tra, ws=1, sp=sp)

                    # because there will be possible material assignments
                    # in look dev disable overrideShaders
                    ass_node.setAttr('overrideShaders', False)

                    # we definitely do not use light linking in our studio,
                    # which seems to create more problems then it solves.
                    ass_node.setAttr('overrideLightLinking', False)

        # convert all references to ASS
        for ref in pm.listReferences():
            ref.to_repr('ASS')
            ref.load()

        # fix an arnold bug
        for node_name in ['initialShadingGroup', 'initialParticleSE']:
            node = pm.PyNode(node_name)
            node.setAttr("ai_surface_shader", (0, 0, 0), type="float3")
            node.setAttr("ai_volume_shader", (0, 0, 0), type="float3")

        # if this is an Exterior/Interior -> Layout -> Hires task flatten it
        is_exterior_or_interior_task = self.is_exterior_or_interior_task(task)
        if is_exterior_or_interior_task:
            # and import all of the references
            all_refs = pm.listReferences()
            while len(all_refs) != 0:
                for ref in all_refs:
                    if not ref.isLoaded():
                        ref.load()
                    ref.importContents()
                all_refs = pm.listReferences()

            # assign lambert1 to all GPU nodes
            pm.sets('initialShadingGroup', e=1, fe=auxiliary.get_root_nodes())

            # now remove them from the group
            pm.sets('initialShadingGroup', e=1, rm=pm.ls())

            # and to make sure that no override is enabled
            [node.setAttr('overrideLightLinking', False)
             for node in pm.ls(type='aiStandIn')]

            # make sure motion blur is disabled
            [node.setAttr('motionBlur', False)
             for node in pm.ls(type='aiStandIn')]

            # clean up
            self.clean_up()

        # check if all aiStandIn nodes are included in
        # ArnoldStandInDefaultLightSet set
        try:
            arnold_stand_in_default_light_set = \
                pm.PyNode('ArnoldStandInDefaultLightSet')
        except pm.MayaNodeError:
            # just create it
            arnold_stand_in_default_light_set = \
                pm.createNode(
                    'objectSet',
                    name='ArnoldStandInDefaultLightSet'
                )

        pm.select(None)
        pm.sets(
            arnold_stand_in_default_light_set,
            fe=pm.ls(type='aiStandIn')
        )

        # save the scene as {{original_take}}___ASS
        # use maya
        take_name = '%s%s%s' % (
            self.base_take_name, Representation.repr_separator, 'ASS'
        )
        v = self.get_latest_repr_version(take_name)
        self.maya_env.save_as(v)

        # export the root nodes under the same file
        if is_exterior_or_interior_task:
            pm.select(auxiliary.get_root_nodes())
            pm.exportSelected(
                v.absolute_full_path,
                type='mayaAscii',
                force=True
            )

        # new scene
        pm.newFile(force=True)

        # reset show plugin shapes option
        active_panel = auxiliary.Playblaster.get_active_panel()
        pm.modelEditor(active_panel, e=1, pluginShapes=show_plugin_shapes)

Example 117

Project: Ludolph
Source File: main.py
View license
def start():
    """
    Start the daemon.
    """
    ret = 0
    cfg = 'ludolph.cfg'
    cfg_fp = None
    cfg_lo = ((os.path.expanduser('~'), '.' + cfg), (sys.prefix, 'etc', cfg), ('/etc', cfg))
    config_base_sections = ('global', 'xmpp', 'webserver', 'cron', 'ludolph.bot')

    # Try to read config file from ~/.ludolph.cfg or /etc/ludolph.cfg
    for i in cfg_lo:
        try:
            cfg_fp = open(os.path.join(*i))
        except IOError:
            continue
        else:
            break

    if not cfg_fp:
        sys.stderr.write("""\nLudolph can't start!\n
You need to create a config file in one these locations: \n%s\n
You can rename ludolph.cfg.example and update the required options.
The example file is located in: %s\n\n""" % (
            '\n'.join([os.path.join(*i) for i in cfg_lo]),
            os.path.dirname(os.path.abspath(__file__))))
        sys.exit(1)

    # Read and parse configuration
    # noinspection PyShadowingNames
    def load_config(fp, reopen=False):
        config = RawConfigParser()
        if reopen:
            fp = open(fp.name)
        config.readfp(fp)  # TODO: Deprecated since python 3.2
        fp.close()
        return config
    config = load_config(cfg_fp)

    # Prepare logging configuration
    logconfig = {
        'level': parse_loglevel(config.get('global', 'loglevel')),
        'format': LOGFORMAT,
    }

    if config.has_option('global', 'logfile'):
        logfile = config.get('global', 'logfile').strip()
        if logfile:
            logconfig['filename'] = logfile

    # Daemonize
    if config.has_option('global', 'daemon'):
        if config.getboolean('global', 'daemon'):
            ret = daemonize()

    # Save pid file
    if config.has_option('global', 'pidfile'):
        try:
            with open(config.get('global', 'pidfile'), 'w') as fp:
                fp.write('%s' % os.getpid())
        except Exception as ex:
            # Setup logging just to show this error
            logging.basicConfig(**logconfig)
            logger.critical('Could not write to pidfile (%s)\n', ex)
            sys.exit(1)

    # Setup logging
    logging.basicConfig(**logconfig)

    # All exceptions will be logged without exit
    def log_except_hook(*exc_info):
        logger.critical('Unhandled exception!', exc_info=exc_info)
    sys.excepthook = log_except_hook

    # Default configuration
    use_tls = True
    use_ssl = False
    address = []

    # Starting
    logger.info('Starting Ludolph %s (%s %s)', __version__, sys.executable, sys.version.split()[0])
    logger.info('Loaded configuration from %s', cfg_fp.name)

    # Load plugins
    # noinspection PyShadowingNames
    def load_plugins(config, reinit=False):
        plugins = []

        for config_section in config.sections():
            config_section = config_section.strip()

            if config_section in config_base_sections:
                continue

            # Parse other possible imports
            parsed_plugin = config_section.split('.')

            if len(parsed_plugin) == 1:
                modname = 'ludolph.plugins.' + config_section
                plugin = config_section
            else:
                modname = config_section
                plugin = parsed_plugin[-1]

            logger.info('Loading plugin: %s', modname)

            try:
                # Translate super_ludolph_plugin into SuperLudolphPlugin
                clsname = plugin[0].upper() + re.sub(r'_+([a-zA-Z0-9])', lambda m: m.group(1).upper(), plugin[1:])
                module = __import__(modname, fromlist=[clsname])

                if reinit and getattr(module, '_loaded_', False):
                    reload(module)

                module._loaded_ = True
                imported_class = getattr(module, clsname)

                if not issubclass(imported_class, LudolphPlugin):
                    raise TypeError('Plugin: %s is not LudolphPlugin instance' % modname)

                plugins.append(Plugin(config_section, modname, imported_class))
            except Exception as ex:
                logger.exception(ex)
                logger.critical('Could not load plugin: %s', modname)

        return plugins
    plugins = load_plugins(config)

    # XMPP connection settings
    if config.has_option('xmpp', 'host'):
        address = [config.get('xmpp', 'host'), '5222']
        if config.has_option('xmpp', 'port'):
            address[1] = config.get('xmpp', 'port')
        logger.info('Connecting to jabber server %s', ':'.join(address))
    else:
        logger.info('Using DNS SRV lookup to find jabber server')

    if config.has_option('xmpp', 'tls'):
        use_tls = config.getboolean('xmpp', 'tls')

    if config.has_option('xmpp', 'ssl'):
        use_ssl = config.getboolean('xmpp', 'ssl')

    # Here we go
    xmpp = LudolphBot(config, plugins=plugins)

    signal.signal(signal.SIGINT, xmpp.shutdown)
    signal.signal(signal.SIGTERM, xmpp.shutdown)

    if hasattr(signal, 'SIGHUP'):  # Windows does not support SIGHUP - bug #41
        # noinspection PyUnusedLocal,PyShadowingNames
        def sighup(signalnum, handler):
            if xmpp.reloading:
                logger.warn('Reload already in progress')
            else:
                xmpp.reloading = True

                try:
                    config = load_config(cfg_fp, reopen=True)
                    logger.info('Reloaded configuration from %s', cfg_fp.name)
                    xmpp.prereload()
                    plugins = load_plugins(config, reinit=True)
                    xmpp.reload(config, plugins=plugins)
                finally:
                    xmpp.reloading = False

        signal.signal(signal.SIGHUP, sighup)
        # signal.siginterrupt(signal.SIGHUP, false)  # http://stackoverflow.com/a/4302037

    if xmpp.client.connect(tuple(address), use_tls=use_tls, use_ssl=use_ssl):
        xmpp.client.process(block=True)
        sys.exit(ret)
    else:
        logger.error('Ludolph is unable to connect to jabber server')
        sys.exit(2)

Example 118

Project: ete
Source File: ete_view.py
View license
def run(args):    
    if args.text_mode:
        for tindex, tfile in enumerate(src_tree_iterator(args)):
            #print tfile
            if args.raxml:
                nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
                t = Tree(nw, format=args.src_newick_format)
            else:
                t = Tree(tfile, format=args.src_newick_format)

            print(t.get_ascii(show_internal=args.show_internal_names,
                              attributes=args.show_attributes))
        return

    global FACES

    if args.face:
        FACES = parse_faces(args.face)
    else:
        FACES = []

    # VISUALIZATION
    ts = TreeStyle()
    ts.mode = args.mode
    ts.show_leaf_name = True
    ts.tree_width = args.tree_width


    for f in FACES:
        if f["value"] == "@name":
            ts.show_leaf_name = False
            break

    if args.as_ncbi:
        ts.show_leaf_name = False
        FACES.extend(parse_faces(
            ['value:@sci_name, size:10, fstyle:italic',
             'value:@taxid, color:grey, size:6, format:" - %s"',
             'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal',
             'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal',
         ]))


    if args.alg:
        FACES.extend(parse_faces(
            ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type]
         ))

    if args.heatmap:
        FACES.extend(parse_faces(
            ['value:@name, size:10, pos:aligned, ftype:heatmap']
         ))

    if args.bubbles:
        for bubble in args.bubbles:
            FACES.extend(parse_faces(
                ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble,
             ]))

    ts.branch_vertical_margin = args.branch_separation
    if args.show_support:
        ts.show_branch_support = True
    if args.show_branch_length:
        ts.show_branch_length = True
    if args.force_topology:
        ts.force_topology = True
    ts.layout_fn = lambda x: None

    for tindex, tfile in enumerate(src_tree_iterator(args)):
        #print tfile
        if args.raxml:
            nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
            t = PhyloTree(nw, format=args.src_newick_format)
        else:
            t = PhyloTree(tfile, format=args.src_newick_format)


        if args.alg:
            t.link_to_alignment(args.alg, alg_format=args.alg_format)

        if args.heatmap:
            DEFAULT_COLOR_SATURATION = 0.3
            BASE_LIGHTNESS = 0.7
            def gradient_color(value, max_value, saturation=0.5, hue=0.1):
                def rgb2hex(rgb):
                    return '#%02x%02x%02x' % rgb
                def hls2hex(h, l, s):
                    return rgb2hex( tuple([int(x*255) for x in colorsys.hls_to_rgb(h, l, s)]))

                lightness = 1 - (value * BASE_LIGHTNESS) / max_value
                return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION)


            heatmap_data = {}
            max_value, min_value = None, None
            for line in open(args.heatmap):
                if line.startswith('#COLNAMES'):
                    pass
                elif line.startswith('#') or not line.strip():
                    pass
                else:
                    fields = line.split('\t')
                    name = fields[0].strip()

                    values = [float(x) if x else None for x in fields[1:]]

                    maxv = max(values)
                    minv = min(values)
                    if max_value is None or maxv > max_value:
                        max_value = maxv
                    if min_value is None or minv < min_value:
                        min_value = minv
                    heatmap_data[name] = values

            heatmap_center_value = 0
            heatmap_color_center = "white"
            heatmap_color_up = 0.3
            heatmap_color_down = 0.7
            heatmap_color_missing = "black"

            heatmap_max_value = abs(heatmap_center_value - max_value)
            heatmap_min_value = abs(heatmap_center_value - min_value)

            if heatmap_center_value <= min_value:
                heatmap_max_value = heatmap_min_value + heatmap_max_value
            else:
                heatmap_max_value = max(heatmap_min_value, heatmap_max_value)



        # scale the tree
        if not args.height:
            args.height = None
        if not args.width:
            args.width = None

        f2color = {}
        f2last_seed = {}
        for node in t.traverse():
            node.img_style['size'] = 0
            if len(node.children) == 1:
                node.img_style['size'] = 2
                node.img_style['shape'] = "square"
                node.img_style['fgcolor'] = "steelblue"

            ftype_pos = defaultdict(int)

            for findex, f in enumerate(FACES):
                if (f['nodetype'] == 'any' or
                    (f['nodetype'] == 'leaf' and node.is_leaf()) or
                    (f['nodetype'] == 'internal' and not node.is_leaf())):


                    # if node passes face filters
                    if node_matcher(node, f["filters"]):
                        if f["value"].startswith("@"):
                            fvalue = getattr(node, f["value"][1:], None)
                        else:
                            fvalue = f["value"]

                        # if node's attribute has content, generate face
                        if fvalue is not None:
                            fsize = f["size"]
                            fbgcolor = f["bgcolor"]
                            fcolor = f['color']

                            if fcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if fbgcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fbgcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fbgcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if f["ftype"] == "text":
                                if f.get("format", None):
                                    fvalue = f["format"] % fvalue

                                F = TextFace(fvalue,
                                             fsize = fsize,
                                             fgcolor = fcolor or "black",
                                             fstyle = f.get('fstyle', None))

                            elif f["ftype"] == "fullseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="seq",
                                                       gap_format="line",
                                                       height=fsize)
                            elif f["ftype"] == "compactseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq",
                                                       gap_format="compactseq",
                                                       height=fsize)
                            elif f["ftype"] == "blockseq":
                                F = faces.SeqMotifFace(seq=fvalue, 
                                                       height=fsize,
                                                       fgcolor=fcolor or "slategrey",
                                                       bgcolor=fbgcolor or "slategrey",
                                                       scale_factor = 1.0)
                                fbgcolor = None
                            elif f["ftype"] == "bubble":
                                try:
                                    v = float(fvalue)
                                except ValueError:
                                    rad = fsize
                                else:
                                    rad = fsize * v
                                F = faces.CircleFace(radius=rad, style="sphere",
                                                     color=fcolor or "steelblue")

                            elif f["ftype"] == "heatmap":
                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                else:
                                    col = f["column"]

                                for i, value in enumerate(heatmap_data.get(node.name, [])):
                                    ftype_pos[f["pos"]] += 1

                                    if value is None:
                                        color = heatmap_color_missing
                                    elif value > heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up)
                                    elif value < heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down)
                                    else:
                                        color = heatmap_color_center
                                    node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i)
                                    # Add header
                                    # for i, name in enumerate(header):
                                    #    nameF = TextFace(name, fsize=7)
                                    #    nameF.rotation = -90
                                    #    tree_style.aligned_header.add_face(nameF, column=i)
                                F = None

                            elif f["ftype"] == "profile":
                                # internal profiles?
                                F = None
                            elif f["ftype"] == "barchart":
                                F = None
                            elif f["ftype"] == "piechart":
                                F = None



                            # Add the Face
                            if F:
                                F.opacity = f['opacity'] or 1.0

                                # Set face general attributes
                                if fbgcolor:
                                    F.background.color = fbgcolor

                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                    ftype_pos[f["pos"]] += 1
                                else:
                                    col = f["column"]
                                node.add_face(F, column=col, position=f["pos"])

        if args.image:
            if tindex > 0: 
                t.render("t%d.%s" %(tindex, args.image),
                         tree_style=ts, w=args.width, h=args.height, units=args.size_units)
            else:
                t.render("%s" %(args.image),
                         tree_style=ts, w=args.width, h=args.height, units=args.size_units)                
        else:
            t.show(None, tree_style=ts)

Example 119

Project: ISYlib-python
Source File: IsyDiscover.py
View license
def isy_discover(**kwargs):
    """ discover a device's IP

        named args:
            node : node name of id
            timeout : how long to wait for replies
            count : number of devices to wait for
            passively : passivly wait for broadcast
            debug : print debug info

        return: a list of dict obj containing:
                - friendlyName: the device name
                - URLBase: base URL for device
                - UDN: uuid
            ( optional : eventSubURL controlURL SCPDURL  )


    """
    class _IsyDiscoveryData:
        debug = 0
        timeout = 20
        passive = 0
        count = 2
        upnp_urls = []

    ddata = _IsyDiscoveryData()

    ddata.debug = kwargs.get("debug", 0)
    ddata.timeout = kwargs.get("timeout", 30)
    ddata.passive = kwargs.get("passive", 0)
    ddata.count = kwargs.get("count", 2)

    if ddata.debug:
        print("isy_discover :debug=%s\ttimeout=%s\tpassive=%s\tcount=%s\n" % \
            (ddata.debug, ddata.timeout, ddata.passive, ddata.count))

    def isy_discover_timeout(signum, frame):
        print("isy_discover_timeout CALL")
        raise UpnpLimitExpired("Timed Out")

#    def isy_timeout(signum, frame):
#        print("isy_timeout CALL")
#        print('Signal handler called with signal', signum)

    def isy_upnp(ddata):

        if ddata.debug:
            print("isy_upnp CalL")

            print("isy_upnp debug=%s\ttimeout=%s\tpassive=%s\tcount=%s\n" % \
                    (ddata.debug, ddata.timeout, ddata.passive, ddata.count))

        multicast_group = '239.255.255.250'
        multicast_port = 1900
        server_address = ('', multicast_port)


        # Create the socket
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        sock.bind(server_address)
        group = socket.inet_aton(multicast_group)
        mreq = struct.pack('4sL', group, socket.INADDR_ANY)
        sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq)

        if not ddata.passive:
            probe = "M-SEARCH * HTTP/1.1\r\nHOST:239.255.255.250:1900\r\n" \
                "MAN:\"ssdp.discover\"\r\nMX:1\r\n"  \
                "ST:urn:udi-com:device:X_Insteon_Lighting_Device:1\r\n\r\n"

            #print "sending : ", probe
            sock.sendto(probe.encode('utf-8'), (multicast_group, multicast_port))


        while  len(ddata.upnp_urls) < ddata.count:

            if ddata.debug:
                print("while IN")

            data, address = sock.recvfrom(1024)

            #.decode('UTF-8')
            if sys.hexversion >= 0x3000000:
                data = str( data, encoding='utf8')

            if ddata.debug:
                print('received %s bytes from %s' % (len(data), address))
                print(data)
                print("ddata.upnp_urls = ", ddata.upnp_urls)

            # only ISY devices
            # if should I look for
            # SERVER:UCoS, UPnP/1.0, UDI/1.0
            if not "X_Insteon_Lighting_" in data:
                continue

            upnp_packet = data.splitlines()

            if "M-SEARCH " in upnp_packet[0]:
                continue

            # extract LOCATION
            for l in upnp_packet:
                a = l.split(':', 1)
                if len(a) == 2:
                    if str(a[0]).upper() == "LOCATION":
                        ddata.upnp_urls.append(str(a[1]).strip())
                        # uniq the list
                        ddata.upnp_urls = list(set(ddata.upnp_urls))

        #print "returning ", ddata.upnp_urls


    old_handler = signal.signal(signal.SIGALRM, isy_discover_timeout)

    #isy_upnp(ddata)
    try:
        signal.alarm(ddata.timeout)
        isy_upnp(ddata)
        signal.alarm(0)
        signal.signal(signal.SIGALRM, old_handler)
    except UpnpLimitExpired:
        pass
    # except Exception:
        # print("Unexpected error:", sys.exc_info()[0])
    finally:
        signal.alarm(0)
        signal.signal(signal.SIGALRM, old_handler)
        if ddata.debug:
            print("return data.upnp_urls = ", ddata.upnp_urls)

    result = {}
#    result_tags = ["UDN", "URLBase", "SCPDURL",
#                "controlURL", "eventSubURL"]


    for s in ddata.upnp_urls:
        req = URL.Request(s)
        resp = URL.urlopen(req)

        pagedata = resp.read().decode('utf-8')
        resp.close()

        # does this even work ??
        # ET.register_namespace("isy", 'urn:schemas-upnp-org:device-1-0')
        #print "_namespace_map = {0}".format(ET._namespace_map)

        # this is a hack to deal with namespace:
        pa = re.sub(r" xmlns=\"urn:schemas-upnp-org:device-1-0\"", "", pagedata)
        # grok the XML from the Upnp discovered server
        xmlres = ET.fromstring(pa)

        # print "_namespace_map : ",
        # pprint(ET._namespace_map)

        #if hasattr(xmlres, 'tag'):
        #    xmlns = re.search('\{(.*)\}', xmlres.tag).group(1)
        #else:
        #    continue

        #print "xmlns ", xmlns

        isy_res = dict()

        xelm = xmlres.find("URLBase")
        if hasattr(xelm, 'text'):
            isy_res["URLBase"] = xelm.text

        xelm = xmlres.find("device/friendlyName")
        if hasattr(xelm, 'text'):
            isy_res["friendlyName"] = xelm.text

        xelm = xmlres.find("device/UDN")
        if hasattr(xelm, 'text'):
            isy_res["UDN"] = xelm.text

        for elm in xmlres.iter("service"):
            serv = xelm.find('serviceType')
            if hasattr(serv, 'text') and serv.text == "urn:udi-com:service:X_Insteon_Lighting_Service:1":

                serv = elm.find('SCPDURL')
                if hasattr(serv, 'text'):
                    isy_res["SCPDURL"] = serv.text

                serv = elm.find('controlURL')
                if hasattr(serv, 'text'):
                    isy_res["controlURL"] = serv.text

                serv = elm.find('eventSubURL')
                if hasattr(serv, 'text'):
                    isy_res["eventSubURL"] = serv.text

        result[isy_res["UDN"]] = isy_res

    return result

Example 120

Project: open-tamil
Source File: ipaconvert.py
View license
def ipa(text): # Generates narrow transcription of Tamil texts 

    text=" " + text + " "
#    text = """ %s """ % text

    # Move Punctuations  
    repl = lambda m: " " + m.group(1) + " "        
    text = re.sub("([\\,\\\,\.\!\?\"\'\"\(\)])", repl, text)
    # text = re.sub("/(?<=[\w])([\\,\.\!\?\"\'\"\(\)])/",repl,text)
    # text = re.sub("/([\\,\.\!\?\"\'\"\(\)])(?=[\w])/",repl,text)

    # The comments below refer to the implementation of transcription rules as described in the
    # book - Panniru Thirumurai Olipeyarppu by Punal K Murugaiyan

    # Dipthongs 

    text = text.replace("ai","ay") # 5.3.3.1 ii Palatal Approximant ... # ai

    text = text.replace("au","av") # au - dipthong replacement

    # Grantha 

    text = text.replace("j","ʤ")
    text = text.replace("h","ɦ")
    text = text.replace("S","ʂ")
    text = text.replace("srI","ʂrI")

    # Mey 

    # Vallinam 

    # pa 

    text = re.sub(r"(?<=[aAiIuUeEoO])p(?=[aAiIuUeEoO])","β",text) # 5.3.1.6 iii Voiced Bilabial Fricative
    text = re.sub(r"(?<=[yrlvZL])p","β",text) # 5.3.1.6 iii

    text = re.sub(r"(?<=[GJnVmN])p","b",text) #5.3.1.6 ii voiced bilabial plosive

    # 5.3.1.6 i no replacement 

    # ta 

    text = re.sub(r"(?<=[aAiIuUeEoOyrlvZL])t(?=[aAiIuUeEoO])","ð",text) # 5.3.1.5 iii Voiced dental Fricative

    text = re.sub(r"(?<=[nV])t","d̪",text) # 5.3.1.5 ii Voiced dental plosive

    text = re.sub(r"t","t̪",text) # 5.3.1.5 i Voiceless dental plosive

    # Ra 

    text = text.replace("XX","t̺t̺ʳ") # 5.3.1.4 ii & ii find correct name

    text = re.sub(r"(?<=V)X","d̺ʳ",text) # 5.3.1.4 iii

    # 5.3.1.4 iv & v implemented in idaiyinam section


    # Ta  

    text = re.sub(r"(?<=[aAiIuUeEoO])T(?=[aAiIuUeEoO])","ɽ",text) # 5.3.1.3 iii Retroflex Flap

    text = re.sub(r"(?<=[N])T","ɖ",text) # 5.3.1.3 ii Voiced Retroflex Plosive | VT ?

    text = text.replace("T","ʈ") # 5.3.1.3 i Voiceless Retroflex Plosive

    # ca 

    text = re.sub(r"(?<=[aAiIuUeEoOl])c(?=[aAiIuUeEoO])","s",text) # 5.3.1.2 iii voiceless alveolar fricatives 
    
    repl = lambda m: m.group(1) + "s"   
    text = re.sub(r"(\s)c",repl,text) # 5.3.1.2 iii

    text = re.sub(r"(V)c",repl,text)

    text = re.sub(r"(?<=[J])c","ʤ",text) # 5.3.1.2 ii Voiced Post Alveolar affricate - Symbol Changed : d͡ʒ

    text = re.sub(r"c","ʧ",text) # 5.3.1.2 i Voicless Post Alveolar Affrivate - Symbol Changed : t͡ʃ



    # ka 

    text = re.sub(r"Gk(?=[iI])","ŋʲgʲ",text) # 5.3.2.1 ii Palatized Velar Nasal

    text = text.replace("Gk","ŋg") # 5.3.2.1 Velar Nasal

    text = re.sub(r"(?<=[aAiIuUeEoO])k(?=[iI])","ç",text) # 5.3.1.1 viii voiceless palatal fricative

    #yrZlLv assumed above. Missing in definiation : eykiya -> eyçiya  aarkiya -> aarçiya....

    text = re.sub(r"(?<=r)k(?=[aAuUeEoO])","ɣ",text) # 5.3.1.1 Vii Voiced Velar Fricative

    text = re.sub(r"(?<=[aAiIuUeEoO])k(?=[aAuUeEoO])","x",text) # 5.3.1.1 vi Voicless Velar Fricative

    text = re.sub(r"(?<=[ylvZL])k(?=[aAuUeEoO])","x",text) # above

    text = re.sub(r"ykk","jcc",text) # 5.3.1.1 v voiceless palatal plosive

    text = re.sub(r"jkk","jcc",text) # above

    text = re.sub(r"(?<=[rylvZLGVNaAiIuUeEoO])k(?=[iI])","gʲ",text) # 5.3.1.1 iv Voiced Palatized Velar Plosive

    text = re.sub(r"(?<=[NVmn])k(?=[aAuUeEoO])","g",text) # 5.3.1.1 iii voiced velar plosive

    text = re.sub(r"(?<=k)k(?=[iI])","kʲ",text) # 5.3.1.1 ii Voiceless velar plosive

    # 5.3.1.1 i no relacement #


    # Idaiyinam 

    text = text.replace("Z","ɻ") #5.3.3.6  Retroflex Approximant

    text = re.sub(r"(?<=[aAiIuUeEoO])L(?=[aAiIuUeEoO])","ɭʼ",text)  #5.3.3.5 i Lateral Approximant - Ejective

    text = text.replace("L","ɭ") # 5.3.3.5 ii Lateral Approximant

    # 5.3.3.4 no change 

    text = re.sub(r"(?<=[aAiIuUeEoO])[rX](?=[aAiIuUeEoO])","ɾ",text) # 5.3.3.3 i Alveolar Tap

    # 5.3.3.3 ii - pure consonant r - no replacement 

    text = re.sub(r"X(?!=[aAiIuUeEoO])","r",text) # 5.3.3.3 ii Trill

    text = re.sub(r"(?<=[aAiIuUeEoO])v(?=[aAiIuUeEoO])","ʋ",text) # 5.3.3.2 ii labio-dental approximant
    text = re.sub(r"(\s)v(?=[aAiIuUeEoO])",lambda m: m.group(1)+"ʋ",text) # 5.3.3.2 ii
    text = text.replace("vv","ʊ̯ʋ") # 5.3.3.2 i near-close near-back rounded vowel - part of a dipthong
    text = text.replace("v","ʋ")

    text = re.sub(r"yy","jɪ̯",text) # 5.3.3.1 i near-close near-front unrounded vowel - part of a dipthong
    text = re.sub(r"y","ɪ̯",text) # 5.3.3.1 i near-close near-front unrounded vowel - part of a dipthong

    # Mellinam 

    # 5.3.2.6 no replacement 

    text = re.sub(r"[Vn]","n̺",text) # 5.3.2.4 Alveolar Nasal (Check Actual name in Wikipedia)

    text = text.replace("n̺d̪","n̪d̪") # 5.3.2.5 Dental Nasal

    text = re.sub(r"(?<=[aAiIuUeEoO])N(?=[aAiIuUeEoO])","ɳʼ",text) # 5.3.2.3 ii Retroflex Nasal Ejective

    text = text.replace("N","ɳ") # 5.3.2.3 Retroflex Nasal

    text = text.replace("J","ɲ") # 5.3.2.3 Palatal Nasal

    text = re.sub(r"GG(?=[iI])","ŋʲŋʲ",text) # Assumed based on above

    text = text.replace("GG","ŋŋ") # Assumed based on above

    text = text.replace("G","ŋ") # Assumed based on above

    # Uyir 

    # Seperate Pure Vowel Combinations

    text = re.sub(r"([aAiIuUeEoO])([aAiIuUeEoO])",
            lambda m: m.group(1)+"_"+m.group(2),text)

    #return text

    # Long O 

    text=re.sub(r"o(\s)",lambda m: "o·"+m.group(1),text) # 5.2.5.2 v 

    text=re.sub(r"(\s)o(?!·)",lambda m: m.group(1)+"ʷoː",text) # 5.2.5.2 iii 
    text=re.sub(r"_o(?!·)","ʷoː",text) # 5.2.5.2 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·])o(?![ː·])","oː",text) # 5.2.5.2 i

    # Short o 

    text=re.sub(r"(\s)O(?!·)",lambda m: m.group(1)+"ʷo̞",text) # 5.2.5.1 iii 
    text=re.sub(r"_O(?!·)","ʷo̞",text) # 5.2.5.1 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞])O(?![ː·])","o̞",text) # 5.2.5.1 i

    # Adding extra symbol for Retroflex Consonants 

    retroflex = ["ɽ","ɖ","ʈ","ɳ","ɭ","ɻ"]

    for rf in retroflex:
          text = re.sub("/̞(?=" + rf + ")","̞˞",text)
	      
    # Long e 

    text=re.sub(r"e(\s)",lambda m: "e·"+m.group(1),text) # 5.2.4.2 v 

    text=re.sub(r"(\s)e(?!·)",lambda m: m.group(1)+"ʲeː",text) # 5.2.4.2 iii 
    text=re.sub(r"_e(?!·)","ʲeː",text) # 5.2.4.2 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞])e(?![ː·])","eː",text) # 5.2.5.2 i


    # short e 

    text=re.sub(r"(\s)E(?!·)",lambda m: m.group(1)+"ʲɛ̝",text) # 5.2.4.1 iii 
    text=re.sub(r"_E(?!·)","ʲɛ̝",text) # 5.2.4.1 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞̝])E(?![ː·])","ɛ̝",text) # 5.2.5.4 i

    # Adding extra symbol for Retroflex Consonants 

    for rf in retroflex:
          text = re.sub("/̝(?=" + rf + ")","̝˞",text)

    # short u 

    text = re.sub(r"(?<!u)(\S)(?<![bʋpmβ])u",lambda m: m.group(1)+"ɨ",text) # 5.2.3.1 v

    text=re.sub(r"(\s)u(?!·)",lambda m: m.group(1)+"ʷʊ",text) # 5.2.3.1 iii 

    text=re.sub(r"_u(?!·)","ʷʊ",text) # 5.2.3.1 iii - puththi_Ottum

    text = re.sub(r"(?<!u\S)([bʋpmβ])u",lambda m: m.group(1)+"ʉ̩",text) # 5.2.3.1  Vii

    text = re.sub(r"([bʋpβm])u",lambda m: m.group(1)+"ʉ̩",text) # 5.2.3.1  Vii

    text = re.sub(r"(?<![bʋpβm])u(\s)",lambda m: "ɨ"+m.group(1),text) # 5.2.3.1 v
    
    repl = lambda m: m.group(1) + m.group(2) + "ʊ"
    
    text= re.sub(r"(\s)(\S)(ɨ|ʉ̩)",repl,text) # 5.2.5.2 i

    text= re.sub(r"(U)(\S{1,2})ɨ",repl,text) # 5.2.5.2 i 

    text= re.sub(r"(ʊ)(\S{1,2})ɨ",repl,text)

    text= re.sub(r"(ʊ)(\S{1,2})ʉ̩",repl,text)

    text = re.sub(r"(?<![bʋβpm])ʊ(\s)",lambda m: "ɨ"+m.group(1),text) # 5.2.3.1 v

    text = re.sub(r"(?<=[bʋβpm])ʊ(\s)",lambda m: "ʉ̩"+m.group(1),text)

    for rf in retroflex:
          text = re.sub(r"ʊ(?=" + rf + ")","ʊ˞",text)
	      
    for rf in retroflex:
          text = re.sub(r"ʉ̩(?=" + rf + ")","ʉ̩˞",text)
	      
    for rf in retroflex:
          text = re.sub(r"ɨ(?=" + rf + ")","ɨ˞",text) 

    # 
     
    text = re.sub(r"\S(<=!u)\Su","ɨ",text) # 5.2.3.1 v
	      
    # Long u 

    text=re.sub(r"U(\s)",lambda m: "u·"+m.group(1),text) # 5.2.3.2 v 

    text=re.sub(r"(\s)U(?!·)",lambda m: m.group(1)+"ʷuː",text) # 5.2.3.2 iii 
    text=re.sub(r"_U(?!·)","ʷuː",text) # 5.2.3.2 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞̝])U(?![ː·])","uː",text) # 5.2.3.2 i

    # short i 

    text=re.sub(r"i(\s)",lambda m: "ɪ·"+m.group(1),text) # 5.2.2.1 iii 

    text=re.sub(r"(\s)i(?!·)",lambda m: m.group(1)+"ʲɪ",text) # 5.2.4.2 iii 
    text=re.sub(r"_i(?!·)","ʲɪ",text) # 5.2.4.2 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞̝])i(?![ː·])","ɪ",text) # 5.2.5.2 i

    for rf in retroflex:
          text = re.sub(r"ɪ(?=" + rf + ")","ɪ˞",text)

    # Long i 

    text=re.sub(r"I(\s)",lambda m: "i·"+m.group(1),text) # 5.2.2.2 v 

    text=re.sub(r"(\s)I(?!·)",lambda m: m.group(1)+"ʲiː",text) # 5.2.2.2 iii 
    text=re.sub(r"_I(?!·)","ʲiː",text) # 5.2.2.2 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞̝])I(?![ː·])","iː",text) # 5.2.2.2 i

    # Long A 

    text=re.sub(r"(\s)A(?!·)",lambda m: m.group(1)+"ˀɑː",text) # 5.2.1.2 iii 
    text=re.sub(r"_A(?!·)","ˀɑː",text) # 5.2.1.2 iii - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞̝])A(?![ː·])","ɑː",text) # 5.2.1.2 i

    # short a 

    # Transcription of Abbreviation Ignored 

    text=re.sub(r"(\s)a(?!·)",lambda m: m.group(1)+"ˀʌ",text) # 5.2.1.1 vi 
    text=re.sub(r"_a(?!·)","ˀʌ",text) # 5.2.1.1 vi - puththi_Ottum

    text=re.sub(r"(?<![aAiIuUeEoOː·̞̝])a(?![ː·])","ʌ",text) # 5.2.1.1 i

    text=re.sub(r"ʌ(\s)",lambda m: "ə"+m.group(1),text) # 5.2.1.1 iii

    for rf in retroflex:
          text = re.sub(r"ʌ(?=" + rf + ")","ʌ˞",text)
          
    # Aytham 

    text = text.replace("K","x")
    text = text.replace("xt̪","xð")
    text = text.replace("xk","xx")
          
        
    # Adding extra symbol for Retroflex Consonants Common of U & O 

    # Regex won't accept (?=[Multiple Unicode Chars]) so separating each charcter 

    for rf in retroflex:
          text = re.sub(r"ː(?=" + rf + ")","˞ː",text)

    # Fixing kaaZBbu kaLaip kaLaippu bugs 

    text = text.replace("βp","pp")
    text = text.replace("β ","p ")
    text = re.sub(r"ʧ([ \n])s",lambda m: "ʧ"+m.group(1)+"ʧ",text) # ac samayam -> ac camayam \\ Check with Newlines ac \n samayam

    # New IPA Convensions 

    text = text.replace("ː","ː")
    text = text.replace("·","ˑ") #
    text = text.replace("ʧ","tʃ")
    text = text.replace("ʤ","dʒ")

    #ɨɭ (i- + Refelex -> <u> <Retroflex>

    text = text.lstrip()

    return text

Example 121

Project: Flexget
Source File: deluge.py
View license
    def on_connect_success(self, result, task, config):
        """Gets called when successfully connected to a daemon."""
        from deluge.ui.client import client
        from twisted.internet import reactor, defer

        if not result:
            log.debug('on_connect_success returned a failed result. BUG?')

        if task.options.test:
            log.debug('Test connection to deluge daemon successful.')
            client.disconnect()
            return

        def format_label(label):
            """Makes a string compliant with deluge label naming rules"""
            return re.sub('[^\w-]+', '_', label.lower())

        def set_torrent_options(torrent_id, entry, opts):
            """Gets called when a torrent was added to the daemon."""
            dlist = []
            if not torrent_id:
                log.error('There was an error adding %s to deluge.' % entry['title'])
                # TODO: Fail entry? How can this happen still now?
                return
            log.info('%s successfully added to deluge.' % entry['title'])
            entry['deluge_id'] = torrent_id

            def create_path(result, path):
                """Creates the specified path if deluge is older than 1.3"""
                from deluge.common import VersionSplit
                # Before 1.3, deluge would not create a non-existent move directory, so we need to.
                if VersionSplit('1.3.0') > VersionSplit(self.deluge_version):
                    if client.is_localhost():
                        if not os.path.isdir(path):
                            log.debug('path %s doesn\'t exist, creating' % path)
                            os.makedirs(path)
                    else:
                        log.warning('If path does not exist on the machine running the daemon, move will fail.')

            if opts.get('movedone'):
                dlist.append(version_deferred.addCallback(create_path, opts['movedone']))
                dlist.append(client.core.set_torrent_move_completed(torrent_id, True))
                dlist.append(client.core.set_torrent_move_completed_path(torrent_id, opts['movedone']))
                log.debug('%s move on complete set to %s' % (entry['title'], opts['movedone']))
            if opts.get('label'):
                def apply_label(result, torrent_id, label):
                    """Gets called after labels and torrent were added to deluge."""
                    return client.label.set_torrent(torrent_id, label)

                dlist.append(label_deferred.addCallback(apply_label, torrent_id, opts['label']))
            if opts.get('queuetotop') is not None:
                if opts['queuetotop']:
                    dlist.append(client.core.queue_top([torrent_id]))
                    log.debug('%s moved to top of queue' % entry['title'])
                else:
                    dlist.append(client.core.queue_bottom([torrent_id]))
                    log.debug('%s moved to bottom of queue' % entry['title'])

            def on_get_torrent_status(status):
                """Gets called with torrent status, including file info.
                Sets the torrent options which require knowledge of the current status of the torrent."""

                main_file_dlist = []

                # Determine where the file should be
                move_now_path = None
                if opts.get('movedone'):
                    if status['progress'] == 100:
                        move_now_path = opts['movedone']
                    else:
                        # Deluge will unset the move completed option if we move the storage, forgo setting proper
                        # path, in favor of leaving proper final location.
                        log.debug('Not moving storage for %s, as this will prevent movedone.' % entry['title'])
                elif opts.get('path'):
                    move_now_path = opts['path']

                if move_now_path and os.path.normpath(move_now_path) != os.path.normpath(status['save_path']):
                    main_file_dlist.append(version_deferred.addCallback(create_path, move_now_path))
                    log.debug('Moving storage for %s to %s' % (entry['title'], move_now_path))
                    main_file_dlist.append(client.core.move_storage([torrent_id], move_now_path))

                if opts.get('content_filename') or opts.get('main_file_only'):

                    def file_exists(filename):
                        # Checks the download path as well as the move completed path for existence of the file
                        if os.path.exists(os.path.join(status['save_path'], filename)):
                            return True
                        elif status.get('move_on_completed') and status.get('move_on_completed_path'):
                            if os.path.exists(os.path.join(status['move_on_completed_path'], filename)):
                                return True
                        else:
                            return False

                    def unused_name(name):
                        # If on local computer, tries appending a (#) suffix until a unique filename is found
                        if client.is_localhost():
                            counter = 2
                            while file_exists(name):
                                name = ''.join([os.path.splitext(name)[0],
                                                " (", str(counter), ')',
                                                os.path.splitext(name)[1]])
                                counter += 1
                        else:
                            log.debug('Cannot ensure content_filename is unique '
                                      'when adding to a remote deluge daemon.')
                        return name

                    def rename(file, new_name):
                        # Renames a file in torrent
                        main_file_dlist.append(
                            client.core.rename_files(torrent_id,
                                                     [(file['index'], new_name)]))
                        log.debug('File %s in %s renamed to %s' % (file['path'], entry['title'], new_name))

                    # find a file that makes up more than main_file_ratio (default: 90%) of the total size
                    main_file = None
                    for file in status['files']:
                        if file['size'] > (status['total_size'] * opts.get('main_file_ratio')):
                            main_file = file
                            break

                    if main_file is not None:
                        # proceed with renaming only if such a big file is found

                        # find the subtitle file
                        keep_subs = opts.get('keep_subs')
                        sub_file = None
                        if keep_subs:
                            sub_exts = [".srt", ".sub"]
                            for file in status['files']:
                                ext = os.path.splitext(file['path'])[1]
                                if ext in sub_exts:
                                    sub_file = file
                                    break

                        # check for single file torrents so we dont add unnecessary folders
                        if (os.path.dirname(main_file['path']) is not ("" or "/")):
                            # check for top folder in user config
                            if (opts.get('content_filename') and os.path.dirname(opts['content_filename']) is not ""):
                                top_files_dir = os.path.dirname(opts['content_filename']) + "/"
                            else:
                                top_files_dir = os.path.dirname(main_file['path']) + "/"
                        else:
                            top_files_dir = "/"

                        if opts.get('content_filename'):
                            # rename the main file
                            big_file_name = (top_files_dir +
                                             os.path.basename(opts['content_filename']) +
                                             os.path.splitext(main_file['path'])[1])
                            big_file_name = unused_name(big_file_name)
                            rename(main_file, big_file_name)

                            # rename subs along with the main file
                            if sub_file is not None and keep_subs:
                                sub_file_name = (os.path.splitext(big_file_name)[0] +
                                                 os.path.splitext(sub_file['path'])[1])
                                rename(sub_file, sub_file_name)

                        if opts.get('main_file_only'):
                            # download only the main file (and subs)
                            file_priorities = [1 if f == main_file or (f == sub_file and keep_subs) else 0
                                               for f in status['files']]
                            main_file_dlist.append(
                                client.core.set_torrent_file_priorities(torrent_id, file_priorities))

                            if opts.get('hide_sparse_files'):
                                # hide the other sparse files that are not supposed to download but are created anyway
                                # http://dev.deluge-torrent.org/ticket/1827
                                # Made sparse files behave better with deluge http://flexget.com/ticket/2881
                                sparse_files = [f for f in status['files']
                                                if f != main_file and (f != sub_file or (not keep_subs))]
                                rename_pairs = [(f['index'],
                                                 top_files_dir + ".sparse_files/" + os.path.basename(f['path']))
                                                for f in sparse_files]
                                main_file_dlist.append(client.core.rename_files(torrent_id, rename_pairs))
                    else:
                        log.warning('No files in "%s" are > %d%% of content size, no files renamed.' % (
                            entry['title'],
                            opts.get('main_file_ratio') * 100))

                return defer.DeferredList(main_file_dlist)

            status_keys = ['files', 'total_size', 'save_path', 'move_on_completed_path',
                           'move_on_completed', 'progress']
            dlist.append(client.core.get_torrent_status(torrent_id, status_keys).addCallback(on_get_torrent_status))

            return defer.DeferredList(dlist)

        def on_fail(result, task, entry):
            """Gets called when daemon reports a failure adding the torrent."""
            log.info('%s was not added to deluge! %s' % (entry['title'], result))
            entry.fail('Could not be added to deluge')

        # dlist is a list of deferreds that must complete before we exit
        dlist = []
        # loop through entries to get a list of labels to add
        labels = set([format_label(entry['label']) for entry in task.accepted if entry.get('label')])
        if config.get('label'):
            labels.add(format_label(config['label']))
        label_deferred = defer.succeed(True)
        if labels:
            # Make sure the label plugin is available and enabled, then add appropriate labels

            def on_get_enabled_plugins(plugins):
                """Gets called with the list of enabled deluge plugins."""

                def on_label_enabled(result):
                    """ This runs when we verify the label plugin is enabled. """

                    def on_get_labels(d_labels):
                        """Gets available labels from deluge, and adds any new labels we need."""
                        dlist = []
                        for label in labels:
                            if label not in d_labels:
                                log.debug('Adding the label %s to deluge' % label)
                                dlist.append(client.label.add(label))
                        return defer.DeferredList(dlist)

                    return client.label.get_labels().addCallback(on_get_labels)

                if 'Label' in plugins:
                    return on_label_enabled(True)
                else:
                    # Label plugin isn't enabled, so we check if it's available and enable it.

                    def on_get_available_plugins(plugins):
                        """Gets plugins available to deluge, enables Label plugin if available."""
                        if 'Label' in plugins:
                            log.debug('Enabling label plugin in deluge')
                            return client.core.enable_plugin('Label').addCallback(on_label_enabled)
                        else:
                            log.error('Label plugin is not installed in deluge')

                    return client.core.get_available_plugins().addCallback(on_get_available_plugins)

            label_deferred = client.core.get_enabled_plugins().addCallback(on_get_enabled_plugins)
            dlist.append(label_deferred)

        def on_get_daemon_info(ver):
            """Gets called with the daemon version info, stores it in self."""
            log.debug('deluge version %s' % ver)
            self.deluge_version = ver

        version_deferred = client.daemon.info().addCallback(on_get_daemon_info)
        dlist.append(version_deferred)

        def on_get_session_state(torrent_ids):
            """Gets called with a list of torrent_ids loaded in the deluge session.
            Adds new torrents and modifies the settings for ones already in the session."""
            dlist = []
            # add the torrents
            for entry in task.accepted:

                @defer.inlineCallbacks
                def _wait_for_metadata(torrent_id, timeout):
                    log.verbose('Waiting %d seconds for "%s" to magnetize' % (timeout, entry['title']))
                    for _ in range(timeout):
                        time.sleep(1)
                        try:
                            status = yield client.core.get_torrent_status(torrent_id, ['files'])
                        except Exception as err:
                            log.error('wait_for_metadata Error: %s' % err)
                            break
                        if len(status['files']) > 0:
                            log.info('"%s" magnetization successful' % (entry['title']))
                            break
                    else:
                        log.warning('"%s" did not magnetize before the timeout elapsed, '
                                    'file list unavailable for processing.' % entry['title'])

                    defer.returnValue(torrent_id)

                def add_entry(entry, opts):
                    """Adds an entry to the deluge session"""
                    magnet, filedump = None, None
                    if entry.get('url', '').startswith('magnet:'):
                        magnet = entry['url']
                    else:
                        if not os.path.exists(entry['file']):
                            entry.fail('Downloaded temp file \'%s\' doesn\'t exist!' % entry['file'])
                            del (entry['file'])
                            return
                        with open(entry['file'], 'rb') as f:
                            filedump = base64.encodestring(f.read())

                    log.verbose('Adding %s to deluge.' % entry['title'])
                    if magnet:
                        d = client.core.add_torrent_magnet(magnet, opts)
                        if config.get('magnetization_timeout'):
                            d.addCallback(_wait_for_metadata, config['magnetization_timeout'])
                        return d
                    else:
                        return client.core.add_torrent_file(entry['title'], filedump, opts)

                # Generate deluge options dict for torrent add
                add_opts = {}
                try:
                    path = entry.render(entry.get('path', config['path']))
                    if path:
                        add_opts['download_location'] = pathscrub(os.path.expanduser(path))
                except RenderError as e:
                    log.error('Could not set path for %s: %s' % (entry['title'], e))
                for fopt, dopt in self.options.items():
                    value = entry.get(fopt, config.get(fopt))
                    if value is not None:
                        add_opts[dopt] = value
                        if fopt == 'ratio':
                            add_opts['stop_at_ratio'] = True
                # Make another set of options, that get set after the torrent has been added
                modify_opts = {
                    'label': format_label(entry.get('label', config['label'])),
                    'queuetotop': entry.get('queuetotop', config.get('queuetotop')),
                    'main_file_only': entry.get('main_file_only', config.get('main_file_only', False)),
                    'main_file_ratio': entry.get('main_file_ratio', config.get('main_file_ratio')),
                    'hide_sparse_files': entry.get('hide_sparse_files', config.get('hide_sparse_files', True)),
                    'keep_subs': entry.get('keep_subs', config.get('keep_subs', True))
                }
                try:
                    movedone = entry.render(entry.get('movedone', config['movedone']))
                    modify_opts['movedone'] = pathscrub(os.path.expanduser(movedone))
                except RenderError as e:
                    log.error('Error setting movedone for %s: %s' % (entry['title'], e))
                try:
                    content_filename = entry.get('content_filename', config.get('content_filename', ''))
                    modify_opts['content_filename'] = pathscrub(entry.render(content_filename))
                except RenderError as e:
                    log.error('Error setting content_filename for %s: %s' % (entry['title'], e))

                torrent_id = entry.get('deluge_id') or entry.get('torrent_info_hash')
                torrent_id = torrent_id and torrent_id.lower()
                if torrent_id in torrent_ids:
                    log.info('%s is already loaded in deluge, setting options' % entry['title'])
                    # Entry has a deluge id, verify the torrent is still in the deluge session and apply options
                    # Since this is already loaded in deluge, we may also need to change the path
                    modify_opts['path'] = add_opts.pop('download_location', None)
                    dlist.extend([set_torrent_options(torrent_id, entry, modify_opts),
                                  client.core.set_torrent_options([torrent_id], add_opts)])
                else:
                    dlist.append(add_entry(entry, add_opts).addCallbacks(
                        set_torrent_options, on_fail, callbackArgs=(entry, modify_opts), errbackArgs=(task, entry)))
            return defer.DeferredList(dlist)

        dlist.append(client.core.get_session_state().addCallback(on_get_session_state))

        def on_complete(result):
            """Gets called when all of our tasks for deluge daemon are complete."""
            client.disconnect()

        tasks = defer.DeferredList(dlist).addBoth(on_complete)

        def on_timeout(result):
            """Gets called if tasks have not completed in 30 seconds.
            Should only happen when something goes wrong."""
            log.error('Timed out while adding torrents to deluge.')
            log.debug('dlist: %s' % result.resultList)
            client.disconnect()

        # Schedule a disconnect to happen if FlexGet hangs while connected to Deluge
        # Leave the timeout long, to give time for possible lookups to occur
        reactor.callLater(600, lambda: tasks.called or on_timeout(tasks))

Example 122

Project: py-xml-escpos
Source File: escpos.py
View license
    def receipt(self,xml):
        """
        Prints an xml based receipt definition
        """

        def strclean(string):
            if not string:
                string = ''
            string = string.strip()
            string = re.sub('\s+',' ',string)
            return string

        def format_value(value, decimals=3, width=0, decimals_separator='.', thousands_separator=',', autoint=False, symbol='', position='after'):
            decimals = max(0,int(decimals))
            width    = max(0,int(width))
            value    = float(value)

            if autoint and math.floor(value) == value:
                decimals = 0
            if width == 0:
                width = ''

            if thousands_separator:
                formatstr = "{:"+str(width)+",."+str(decimals)+"f}"
            else:
                formatstr = "{:"+str(width)+"."+str(decimals)+"f}"


            ret = formatstr.format(value)
            ret = ret.replace(',','COMMA')
            ret = ret.replace('.','DOT')
            ret = ret.replace('COMMA',thousands_separator)
            ret = ret.replace('DOT',decimals_separator)

            if symbol:
                if position == 'after':
                    ret = ret + symbol
                else:
                    ret = symbol + ret
            return ret

        def print_elem(stylestack, serializer, elem, indent=0):

            elem_styles = {
                'h1': {'bold': 'on', 'size':'double'},
                'h2': {'size':'double'},
                'h3': {'bold': 'on', 'size':'double-height'},
                'h4': {'size': 'double-height'},
                'h5': {'bold': 'on'},
                'em': {'font': 'b'},
                'b':  {'bold': 'on'},
            }

            stylestack.push()
            if elem.tag in elem_styles:
                stylestack.set(elem_styles[elem.tag])
            stylestack.set(elem.attrib)

            if elem.tag in ('p','div','section','article','receipt','header','footer','li','h1','h2','h3','h4','h5'):
                serializer.start_block(stylestack)
                serializer.text(elem.text)
                for child in elem:
                    print_elem(stylestack,serializer,child)
                    serializer.start_inline(stylestack)
                    serializer.text(child.tail)
                    serializer.end_entity()
                serializer.end_entity()

            elif elem.tag in ('span','em','b','left','right'):
                serializer.start_inline(stylestack)
                serializer.text(elem.text)
                for child in elem:
                    print_elem(stylestack,serializer,child)
                    serializer.start_inline(stylestack)
                    serializer.text(child.tail)
                    serializer.end_entity()
                serializer.end_entity()

            elif elem.tag == 'value':
                serializer.start_inline(stylestack)
                serializer.pre(format_value( 
                                              elem.text,
                                              decimals=stylestack.get('value-decimals'),
                                              width=stylestack.get('value-width'),
                                              decimals_separator=stylestack.get('value-decimals-separator'),
                                              thousands_separator=stylestack.get('value-thousands-separator'),
                                              autoint=(stylestack.get('value-autoint') == 'on'),
                                              symbol=stylestack.get('value-symbol'),
                                              position=stylestack.get('value-symbol-position') 
                                            ))
                serializer.end_entity()

            elif elem.tag == 'line':
                width = stylestack.get('width')
                if stylestack.get('size') in ('double', 'double-width'):
                    width = width / 2

                lineserializer = XmlLineSerializer(stylestack.get('indent')+indent,stylestack.get('tabwidth'),width,stylestack.get('line-ratio'))
                serializer.start_block(stylestack)
                for child in elem:
                    if child.tag == 'left':
                        print_elem(stylestack,lineserializer,child,indent=indent)
                    elif child.tag == 'right':
                        lineserializer.start_right()
                        print_elem(stylestack,lineserializer,child,indent=indent)
                serializer.pre(lineserializer.get_line())
                serializer.end_entity()

            elif elem.tag == 'ul':
                serializer.start_block(stylestack)
                bullet = stylestack.get('bullet')
                for child in elem:
                    if child.tag == 'li':
                        serializer.style(stylestack)
                        serializer.raw(' ' * indent * stylestack.get('tabwidth') + bullet)
                    print_elem(stylestack,serializer,child,indent=indent+1)
                serializer.end_entity()

            elif elem.tag == 'ol':
                cwidth = len(str(len(elem))) + 2
                i = 1
                serializer.start_block(stylestack)
                for child in elem:
                    if child.tag == 'li':
                        serializer.style(stylestack)
                        serializer.raw(' ' * indent * stylestack.get('tabwidth') + ' ' + (str(i)+')').ljust(cwidth))
                        i = i + 1
                    print_elem(stylestack,serializer,child,indent=indent+1)
                serializer.end_entity()

            elif elem.tag == 'pre':
                serializer.start_block(stylestack)
                serializer.pre(elem.text)
                serializer.end_entity()

            elif elem.tag == 'hr':
                width = stylestack.get('width')
                if stylestack.get('size') in ('double', 'double-width'):
                    width = width / 2
                serializer.start_block(stylestack)
                serializer.text('-'*width)
                serializer.end_entity()

            elif elem.tag == 'br':
                serializer.linebreak()

            elif elem.tag == 'img':
                if 'src' in elem.attrib and 'data:' in elem.attrib['src']:
                    self.print_base64_image(elem.attrib['src'])

            elif elem.tag == 'barcode' and 'encoding' in elem.attrib:
                serializer.start_block(stylestack)
                self.barcode(strclean(elem.text),elem.attrib['encoding'])
                serializer.end_entity()

            elif elem.tag == 'cut':
                self.cut()
            elif elem.tag == 'partialcut':
                self.cut(mode='part')
            elif elem.tag == 'cashdraw':
                self.cashdraw(2)
                self.cashdraw(5)

            stylestack.pop()

        try:
            stylestack      = StyleStack() 
            serializer      = XmlSerializer(self)
            root            = ET.fromstring(xml.encode('utf-8'))
            if 'sheet' in root.attrib and root.attrib['sheet'] == 'slip':
                self._raw(SHEET_SLIP_MODE)
                self.slip_sheet_mode = True
            else:
                self._raw(SHEET_ROLL_MODE)

            self._raw(stylestack.to_escpos())

            print_elem(stylestack,serializer,root)

            if 'open-cashdrawer' in root.attrib and root.attrib['open-cashdrawer'] == 'true':
                self.cashdraw(2)
                self.cashdraw(5)
            if not 'cut' in root.attrib or root.attrib['cut'] == 'true' :
                if self.slip_sheet_mode:
                    self._raw(CTL_FF)
                else:
                    self.cut()

        except Exception as e:
            errmsg = str(e)+'\n'+'-'*48+'\n'+traceback.format_exc() + '-'*48+'\n'
            self.text(errmsg)
            self.cut()

            raise e

Example 123

Project: tools-iuc
Source File: rgToolFactory2.py
View license
    def __init__(self, opts=None):
        """
        cleanup inputs, setup some outputs
        """
        self.toolhtmldepinterpskel = """<?xml version="1.0"?>
        <tool_dependency>
            <package name="ghostscript" version="9.10">
                <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
            </package>
            <package name="graphicsmagick" version="1.3.18">
                <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
            </package>
            <package name="%(interpreter_name)s" version="%(interpreter_version)s">
                <repository name="%(interpreter_pack)s" owner="%(interpreter_owner)s" prior_installation_required="True" />
            </package>
            <readme>
                %(readme)s
                This file was autogenerated by the Galaxy Tool Factory 2
            </readme>
        </tool_dependency>
        """
        self.toolhtmldepskel = """<?xml version="1.0"?>
        <tool_dependency>
            <package name="ghostscript" version="9.10">
                <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
            </package>
            <package name="graphicsmagick" version="1.3.18">
                <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
            </package>
                <readme>
                   %(readme)s
                   This file was autogenerated by the Galaxy Tool Factory 2
               </readme>
        </tool_dependency>
        """

        self.emptytoolhtmldepskel = """<?xml version="1.0"?>
        <tool_dependency>
                <readme>
                   %(readme)s
                This file was autogenerated by the Galaxy Tool Factory 2
               </readme>
        </tool_dependency>
        """

        self.protorequirements = """<requirements>
              <requirement type="package" version="9.10">ghostscript</requirement>
              <requirement type="package" version="1.3.18">graphicsmagick</requirement>
          </requirements>"""

        self.protorequirements_interpreter = """<requirements>
              <requirement type="package" version="9.10">ghostscript</requirement>
              <requirement type="package" version="1.3.18">graphicsmagick</requirement>
              <requirement type="package" version="%(interpreter_version)s">%(interpreter_name)s</requirement>
          </requirements>"""

        self.newCommand = """
            %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
                --tool_name "%(toolname)s"
                %(command_inputs)s
                %(command_outputs)s
            """

        self.tooltestsTabOnly = """
            <test>
            %(test1Inputs)s
            <param name="job_name" value="test1"/>
            <param name="runMe" value="$runMe"/>
            <output name="output1="%(test1Output)s" ftype="tabular"/>
            %(additionalParams)s
            </test>
            """

        self.tooltestsHTMLOnly = """
            <test>
            %(test1Inputs)s
            <param name="job_name" value="test1"/>
            <param name="runMe" value="$runMe"/>
            %(additionalParams)s
            <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
            </test>
            """

        self.tooltestsBoth = """
            <test>
            %(test1Inputs)s
            <param name="job_name" value="test1"/>
            <param name="runMe" value="$runMe"/>
            %(additionalParams)s
            <output name="output1" file="%(test1Output)s" ftype="tabular" />
            <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
            </test>
            """

        self.newXML = """<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
%(tooldesc)s
%(requirements)s
<command interpreter="python">
%(command)s
</command>
<inputs>
%(inputs)s
%(additionalInputs)s
</inputs>
<outputs>
%(outputs)s
</outputs>
<configfiles>
<configfile name="runMe">
%(script)s
</configfile>
</configfiles>
<tests>
%(tooltests)s
</tests>
<help>

%(help)s

This tool was autogenerated from a user provided script using the Galaxy Tool Factory 2
https://toolshed.g2.bx.psu.edu/view/fubar/tool_factory_2
</help>
<citations>
    %(citations)s
    <citation type="doi">10.1093/bioinformatics/bts573</citation>
</citations>
</tool>"""

        self.useGM = cmd_exists('gm')
        self.useIM = cmd_exists('convert')
        self.useGS = cmd_exists('gs')
        self.temp_warned = False  # we want only one warning if $TMP not set
        if opts.output_dir:  # simplify for the tool tarball
            os.chdir(opts.output_dir)
        self.thumbformat = 'png'
        self.opts = opts
        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name)  # a sanitizer now does this but..
        self.toolid = self.toolname
        self.myname = sys.argv[0]  # get our name because we write ourselves out as a tool later
        self.pyfile = self.myname  # crude but efficient - the cruft won't hurt much
        self.xmlfile = '%s.xml' % self.toolname
        rx = open(self.opts.script_path, 'r').readlines()
        rx = [x.rstrip() for x in rx]  # remove pesky dos line endings if needed
        self.script = '\n'.join(rx)
        fhandle, self.sfile = tempfile.mkstemp(prefix=self.toolname, suffix=".%s" % (opts.interpreter))
        tscript = open(self.sfile, 'w')  # use self.sfile as script source for Popen
        tscript.write(self.script)
        tscript.close()
        self.indentedScript = "  %s" % '\n'.join([' %s' % html_escape(x) for x in rx])  # for restructured text in help
        self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
        self.elog = os.path.join(self.opts.output_dir, "%s_error.log" % self.toolname)
        if opts.output_dir:  # may not want these complexities
            self.tlog = os.path.join(self.opts.output_dir, "%s_runner.log" % self.toolname)
            art = '%s.%s' % (self.toolname, opts.interpreter)
            artpath = os.path.join(self.opts.output_dir, art)  # need full path
            artifact = open(artpath, 'w')  # use self.sfile as script source for Popen
            artifact.write(self.script)
            artifact.close()
        self.cl = []
        self.html = []
        self.test1Inputs = []  # now a list
        a = self.cl.append
        a(opts.interpreter)
        a(self.sfile)
        # if multiple inputs - positional or need to distinguish them with cl params
        if opts.input_tab:
            tests = []
            for i, intab in enumerate(opts.input_tab):  # if multiple, make tests
                if intab.find(',') != -1:
                    (gpath, uname) = intab.split(',')
                else:
                    gpath = intab
                tests.append(os.path.basename(gpath))
            self.test1Inputs = '<param name="input_tab" value="%s" />' % (','.join(tests))
        else:
            self.test1Inputs = ''
        # we always pass path,name pairs in using python optparse append
        # but the command line has to be different
        self.infile_paths = ''
        self.infile_names = ''
        if self.opts.input_tab:
            self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab])
            self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab])
        if self.opts.interpreter == 'python':
            # yes, this is how additional parameters are always passed in python - to the TF itself and to
            # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
            if self.opts.input_tab:
                a('--inpaths=%s' % (self.infile_paths))
                a('--innames=%s' % (self.infile_names))
            if self.opts.output_tab:
                a('--outpath=%s' % self.opts.output_tab)
            for p in opts.additional_parameters:
                p = p.replace('"', '')
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s="%s"' % (param, value))
        if (self.opts.interpreter == 'Rscript'):
            # pass params on command line as expressions which the script evaluates - see sample
            if self.opts.input_tab:
                a('INPATHS="%s"' % self.infile_paths)
                a('INNAMES="%s"' % self.infile_names)
            if self.opts.output_tab:
                a('OUTPATH="%s"' % self.opts.output_tab)
            for p in opts.additional_parameters:
                p = p.replace('"', '')
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s=%s' % (param, quote_non_numeric(value)))
        if (self.opts.interpreter == 'perl'):
            # pass positional params on command line - perl script needs to discombobulate the path/name lists
            if self.opts.input_tab:
                a('%s' % self.infile_paths)
                a('%s' % self.infile_names)
            if self.opts.output_tab:
                a('%s' % self.opts.output_tab)
            for p in opts.additional_parameters:
                # followed by any additional name=value parameter pairs
                p = p.replace('"', '')
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s=%s' % (param, quote_non_numeric(value)))
        if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
            # more is better - now move all params into environment AND drop on to command line.
            self.cl.insert(0, 'env')
            if self.opts.input_tab:
                self.cl.insert(1, 'INPATHS=%s' % (self.infile_paths))
                self.cl.insert(2, 'INNAMES=%s' % (self.infile_names))
            if self.opts.output_tab:
                self.cl.insert(3, 'OUTPATH=%s' % (self.opts.output_tab))
                a('OUTPATH=%s' % (self.opts.output_tab))
            # sets those environment variables for the script
            # additional params appear in CL - yes, it's confusing
            for i, p in enumerate(opts.additional_parameters):
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s=%s' % (param, quote_non_numeric(value)))
                self.cl.insert(4 + i, '%s=%s' % (param, quote_non_numeric(value)))
        self.interpreter_owner = 'SYSTEM'
        self.interpreter_pack = 'SYSTEM'
        self.interpreter_name = 'SYSTEM'
        self.interpreter_version = 'SYSTEM'
        self.interpreter_revision = 'SYSTEM'
        if opts.envshpath != 'system':  # need to parse out details for our tool_dependency
            try:  # fragile - depends on common naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play
                # this ONLY happens at tool generation by an admin - the generated tool always uses the default of system so path is from local env.sh
                packdetails = opts.envshpath.split(os.path.sep)[-4:-1]  # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c']
                self.interpreter_owner = packdetails[0]
                self.interpreter_pack = packdetails[1]
                self.interpreter_name = packdetails[1].split('_')[1].upper()
                self.interpreter_revision = packdetails[2]
                self.interpreter_version = '.'.join(packdetails[1].split('_')[2:])
            except:
                pass
        self.outFormats = opts.output_format
        self.inputFormats = opts.input_formats
        self.test1Output = '%s_test1_output.xls' % self.toolname
        self.test1HTML = '%s_test1_output.html' % self.toolname

Example 124

Project: gengo-python
Source File: gengo.py
View license
    def __getattr__(self, api_call):
        """
        The most magically awesome block of code you'll ever see.

        Rather than list out 9 million methods for this API, we just
        keep a table (see above) of every API endpoint and their
        corresponding function id for this library. This gives flexibility
        in API support.

        For those who don't get what's going on here, Python classes
        have a feature known as __getattr__().
        It's called when an attribute that was called on an object
        doesn't seem to exist. Since it doesn't exist,
        we can take over and find the API method in our table. We then
        return a function that downloads and parses
        what we're looking for, based on the key/values passed in.
        """
        def get(self, **kwargs):
            # Grab the (hopefully) existing method 'definition' to fire off
            # from our api hash table.
            fn = apihash[api_call]

            # don't make any lasting changes to the kwargs dictionary
            kwargs = copy.deepcopy(kwargs)

            # Do a check here for specific job sets - we need to support
            # posting multiple jobs
            # at once, so see if there's an dictionary of jobs passed in,
            # pop it out, let things go on as normal,
            # then pick this chain back up below...
            post_data = {}
            if 'job' in kwargs:
                post_data['job'] = {'job': kwargs.pop('job')}
            if 'jobs' in kwargs:
                # there are two cases to handle; one where there is
                # a dictionary passed in with a `jobs` key, the other
                # where that key is not present.
                post_data['jobs'] = {'jobs': kwargs.pop('jobs')}
                jobs_dict = post_data['jobs']['jobs']
                if 'jobs' in jobs_dict:
                    post_data['jobs']['jobs'] = jobs_dict['jobs']
                if 'as_group' in jobs_dict:
                    post_data['jobs']['as_group'] = jobs_dict.pop('as_group')
                if 'comment' in jobs_dict:
                    post_data['jobs']['comment'] = jobs_dict.pop('comment')
                if 'url_attachments' in jobs_dict:
                    post_data['jobs']['url_attachments'] =\
                     jobs_dict.pop('url_attachments')
            if 'comment' in kwargs:
                post_data['comment'] = kwargs.pop('comment')
            if 'action' in kwargs:
                post_data['action'] = kwargs.pop('action')
            if 'job_ids' in kwargs:
                post_data['job_ids'] = kwargs.pop('job_ids')
            if 'file_attachments' in kwargs:
                post_data['file_attachments'] = kwargs.pop('file_attachments')

            # Set up a true base URL, abstracting away the need to care
            # about the sandbox mode or API versioning at this stage.
            base_url = self.api_url.format(
                version='v{0}'.format(self.api_version)
            )

            # Go through and replace any mustaches that are in our API url
            # with their appropriate key/value pairs...
            # NOTE: We pop() here because we don't want the extra data
            # included and messing up our hash down the road.
            base = re.sub(
                '\{\{(?P<m>[a-zA-Z_]+)\}\}',
                lambda m: '{0}'.format(kwargs.pop(m.group(1),
                                                  # In case of debugging needs
                                                  'no_argument_specified')),
                base_url + fn['url']
            )

            # Build up a proper 'authenticated' url...
            #
            # Note: for further information on what's going on here, it's
            # best to familiarize yourself  with the Gengo authentication
            # API. (http://developers.gengo.com/)
            query_params = dict([k, quote(str(v).encode('utf-8'))] for k, v
                                in kwargs.items())
            if self.public_key is not None:
                query_params['api_key'] = self.public_key
            query_params['ts'] = str(int(time()))

            # check whether the endpoint supports file uploads and check the
            # params for file_path and modify the query_params accordingly
            # needs to be refactored to a more general handling once we
            # also want to support ie glossary upload. for now it's tied to
            # jobs payloads
            file_data = False
            if 'upload' in fn:
                file_data = {}
                jobs = post_data.get('jobs', {}).get('jobs', {})
                for k, j in jobs.items():
                    if isinstance(j, dict):
                        if j.get('type') == 'file' and 'file_path' in j:
                            file_path = j.get('file_path')
                            mimetype = j.get('mimetype')

                            mimetype = mimetype if mimetype else \
                                mimetypes.guess_type(file_path)[0]
                            mimetype = mimetype if mimetype else \
                                'application/octet-stream'

                            file_data['file_' + k] = (
                                file_path, open(file_path, 'rb'), mimetype
                            )
                            j['file_key'] = 'file_' + k
                            del j['file_path']

            # handle order url attachments
            order = post_data.get('jobs', {})
            self.replaceURLAttachmentsWithAttachments(order)

            # handle post jobs url attachments
            jobs = post_data.get('jobs', {}).get('jobs', {})
            for k, j in jobs.items():
                if isinstance(j, dict):
                    self.replaceURLAttachmentsWithAttachments(j)

            # handle post comment url attachments
            comments = post_data.get('comment', {})
            self.replaceURLAttachmentsWithAttachments(comments)

            try:
                # If any file_attachments then modify base url to include
                # private_key and file_data to include file_attachments as
                # multipart.
                tmp_files = []
                if 'file_attachments' in post_data:
                    file_data = [
                        ('body', post_data['comment']['body']),
                    ]

                    file_attachments = post_data['file_attachments']
                    for a in file_attachments:
                        f = open(a, 'rb')
                        tmp_files.append(f)
                        file_data.append(('file_attachments', f))

                # If any further APIs require their own special signing needs,
                # fork here...
                response = self.signAndRequestAPILatest(fn, base, query_params,
                                                        post_data, file_data)
                response.connection.close()
            finally:
                for f in tmp_files:
                    f.close()

            try:
                results = response.json()
            except TypeError:
                # requests<1.0
                results = response.json
            except ValueError:
                msg = "Internal Server Error"
                if self.debug is True:
                    msg = "Invalid JSON response: '{0}'".format(response.text)
                raise GengoError(msg, 1)

            # See if we got any errors back that we can cleanly raise on
            if 'opstat' in results and results['opstat'] != 'ok':
                # In cases of multiple errors, the keys for results['err']
                # will be the job IDs.
                if 'msg' not in results['err'] and\
                        'code' not in results['err']:
                    concatted_msg = ''
                    for job_key, msg_code_list in results['err'].iteritems():
                        concatted_msg += '<{0}: {1}> '.format(
                            job_key, msg_code_list[0]['msg']
                        )
                    raise GengoError(concatted_msg,
                                     results['err'].itervalues().
                                     next()[0]['code'])
                raise GengoError(results['err'].get('msg'),
                                 results['err'].get('code'))

            # If not, return the results
            return results

        if api_call in apihash:
            return get.__get__(self)
        else:
            raise AttributeError

Example 125

Project: agdc
Source File: scene_kml_generator.py
View license
    def generate(self, kml_filename=None, wrs_shapefile='WRS-2_bound_world.kml'):
        '''
        Generate a KML file
        '''
        def write_xml_file(filename, dom_tree, save_backup=False):
            """Function write the metadata contained in self._metadata_dict to an XML file
            Argument:
                filename: Metadata file to be written
                uses_attributes: Boolean flag indicating whether to write values to tag attributes
            """
            logger.debug('write_file(%s) called', filename)
    
            if save_backup and os.path.exists(filename + '.bck'):
                os.remove(filename + '.bck')
    
            if os.path.exists(filename):
                if save_backup:
                    os.rename(filename, filename + '.bck')
                else:
                    os.remove(filename)
    
            # Open XML document
            try:
                outfile = open(filename, 'w')
                assert outfile is not None, 'Unable to open XML file ' + filename + ' for writing'
    
                logger.debug('Writing XML file %s', filename)
    
                # Strip all tabs and EOLs from around values, remove all empty lines
                outfile.write(re.sub('\>(\s+)(\n\t*)\<',
                                     '>\\2<',
                                     re.sub('(\<\w*[^/]\>)\n(\t*\n)*(\t*)([^<>\n]*)\n\t*\n*(\t+)(\</\w+\>)',
                                            '\\1\\4\\6',
                                            dom_tree.toprettyxml(encoding='utf-8')
                                            )
                                     )
                              )
    
            finally:
                outfile.close()
    
        def get_wrs_placemark_node(wrs_document_node, placemark_name):
            """
            Return a clone of the WRS placemark node with the specified name
            """ 
            try:
                return [placemark_node for placemark_node in self.getChildNodesByName(wrs_document_node, 'Placemark') 
                    if self.getChildNodesByName(placemark_node, 'name')[0].childNodes[0].nodeValue == placemark_name][0].cloneNode(True)
            except:
                return None
                

        
        def create_placemark_node(wrs_document_node, acquisition_info):
            """
            Create a new placemark node for the specified acquisition
            """
            logger.info('Processing %s', acquisition_info['dataset_name'])
            
            wrs_placemark_name = '%d_%d' % (acquisition_info['path'], acquisition_info['row'])
            
            kml_placemark_name = acquisition_info['dataset_name']
            
            placemark_node = get_wrs_placemark_node(wrs_document_node, wrs_placemark_name)
            
            self.getChildNodesByName(placemark_node, 'name')[0].childNodes[0].nodeValue = kml_placemark_name
            
            kml_time_span_node = kml_dom_tree.createElement('TimeSpan')
            placemark_node.appendChild(kml_time_span_node)
            
            kml_time_begin_node = kml_dom_tree.createElement('begin')
            kml_time_begin_text_node = kml_dom_tree.createTextNode(acquisition_info['start_datetime'].isoformat())
            kml_time_begin_node.appendChild(kml_time_begin_text_node)
            kml_time_span_node.appendChild(kml_time_begin_node)
            
            kml_time_end_node = kml_dom_tree.createElement('end')
            kml_time_end_text_node = kml_dom_tree.createTextNode(acquisition_info['end_datetime'].isoformat())
            kml_time_end_node.appendChild(kml_time_end_text_node)
            kml_time_span_node.appendChild(kml_time_end_node)
            
            description_node = self.getChildNodesByName(placemark_node, 'description')[0]
            description_node.childNodes[0].data = '''<strong>Geoscience Australia ARG25 Dataset</strong> 
<table cellspacing="1" cellpadding="1">
    <tr>
        <td>Satellite:</td>
        <td>%(satellite)s</td>
    </tr>
    <tr>
        <td>Sensor:</td>
        <td>%(sensor)s</td>
    </tr>
    <tr>
        <td>Start date/time (UTC):</td>
        <td>%(start_datetime)s</td>
    </tr>
    <tr>
        <td>End date/time (UTC):</td>
        <td>%(end_datetime)s</td>
    </tr>
    <tr>
        <td>WRS Path-Row:</td>
        <td>%(path)03d-%(row)03d</td>
    </tr>
    <tr>
        <td>Bounding Box (LL,UR):</td>
        <td>(%(ll_lon)f,%(lr_lat)f),(%(ur_lon)f,%(ul_lat)f)</td>
    </tr>
    <tr>
        <td>Est. Cloud Cover (USGS):</td>
        <td>%(cloud_cover)s%%</td>
    </tr>
    <tr>
        <td>GCP Count:</td>
        <td>%(gcp_count)s</td>
    </tr>
    <tr>
        <td>
            <a href="http://eos.ga.gov.au/thredds/wms/LANDSAT/%(year)04d/%(month)02d/%(dataset_name)s_BX.nc?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&LAYERS=FalseColour741&STYLES=&FORMAT=image/png&TRANSPARENT=TRUE&CRS=CRS:84&BBOX=%(ll_lon)f,%(lr_lat)f,%(ur_lon)f,%(ul_lat)f&WIDTH=%(thumbnail_size)d&HEIGHT=%(thumbnail_size)d">View thumbnail</a>
        </td>
        <td>
            <a href="http://eos.ga.gov.au/thredds/fileServer/LANDSAT/%(year)04d/%(month)02d/%(dataset_name)s_BX.nc">Download full NetCDF file</a>
        </td>
    </tr>
</table>''' % acquisition_info
            
            return placemark_node
            
            
        kml_filename = kml_filename or self.output_file
        assert kml_filename, 'Output filename must be specified'
        
        wrs_dom_tree = xml.dom.minidom.parse(wrs_shapefile)
        wrs_document_element = wrs_dom_tree.documentElement
        wrs_document_node = self.getChildNodesByName(wrs_document_element, 'Document')[0]
        
        kml_dom_tree = xml.dom.minidom.getDOMImplementation().createDocument(wrs_document_element.namespaceURI, 
                                                                             'kml', 
                                                                             wrs_dom_tree.doctype)
        kml_document_element = kml_dom_tree.documentElement
        
        # Copy document attributes
        for attribute_value in wrs_document_element.attributes.items():
            kml_document_element.setAttribute(attribute_value[0], attribute_value[1])
            
        kml_document_node = kml_dom_tree.createElement('Document')
        kml_document_element.appendChild(kml_document_node)
        
        
        # Copy all child nodes of the "Document" node except placemarks
        for wrs_child_node in [child_node for child_node in wrs_document_node.childNodes 
                               if child_node.nodeName != 'Placemark']:
                                   
            kml_child_node = kml_dom_tree.importNode(wrs_child_node, True)                                   
            kml_document_node.appendChild(kml_child_node)
            
        # Update document name 
        doc_name = 'Geoscience Australia ARG-25 Landsat Scenes'
        if self.satellite or self.sensor:
            doc_name += ' for'
            if self.satellite:
                doc_name += ' %s' % self.satellite
            if self.sensor:
                doc_name += ' %s' % self.sensor
        if self.start_date:
            doc_name += ' from %s' % self.start_date
        if self.end_date:
            doc_name += ' to %s' % self.end_date
            
        logger.debug('Setting document name to "%s"', doc_name)
        self.getChildNodesByName(kml_document_node, 'name')[0].childNodes[0].data = doc_name
         
        # Update style nodes as specified in self.style_dict
        for style_node in self.getChildNodesByName(kml_document_node, 'Style'):
            logger.debug('Style node found')
            for tag_name in self.style_dict.keys():
                tag_nodes = self.getChildNodesByName(style_node, tag_name)
                if tag_nodes:
                    logger.debug('\tExisting tag node found for %s', tag_name)
                    tag_node = tag_nodes[0]
                else:
                    logger.debug('\tCreating new tag node for %s', tag_name)
                    tag_node = kml_dom_tree.createElement(tag_name)
                    style_node.appendChild(tag_node)
                    
                for attribute_name in self.style_dict[tag_name].keys():
                    attribute_nodes = self.getChildNodesByName(tag_node, attribute_name)
                    if attribute_nodes:
                        logger.debug('\t\tExisting attribute node found for %s', attribute_name)
                        attribute_node = attribute_nodes[0]
                        text_node = attribute_node.childNodes[0]
                        text_node.data = str(self.style_dict[tag_name][attribute_name])
                    else:
                        logger.debug('\t\tCreating new attribute node for %s', attribute_name)
                        attribute_node = kml_dom_tree.createElement(attribute_name)
                        tag_node.appendChild(attribute_node)
                        text_node = kml_dom_tree.createTextNode(str(self.style_dict[tag_name][attribute_name]))
                        attribute_node.appendChild(text_node)
    
           
        self.db_cursor = self.db_connection.cursor()
        
        sql = """-- Find all NBAR acquisitions
select satellite_name as satellite, sensor_name as sensor, 
x_ref as path, y_ref as row, 
start_datetime, end_datetime,
dataset_path,
ll_lon, ll_lat,
lr_lon, lr_lat,
ul_lon, ul_lat,
ur_lon, ur_lat,
cloud_cover::integer, gcp_count::integer
from 
    (
    select *
    from dataset
    where level_id = 2 -- NBAR
    ) dataset
inner join acquisition a using(acquisition_id)
inner join satellite using(satellite_id)
inner join sensor using(satellite_id, sensor_id)

where (%(start_date)s is null or end_datetime::date >= %(start_date)s)
  and (%(end_date)s is null or end_datetime::date <= %(end_date)s)
  and (%(satellite)s is null or satellite_tag = %(satellite)s)
  and (%(sensor)s is null or sensor_name = %(sensor)s)

order by end_datetime
;
"""
        params = {
                  'start_date': self.start_date,
                  'end_date': self.end_date,
                  'satellite': self.satellite,
                  'sensor': self.sensor
                  }
        
        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
        self.db_cursor.execute(sql, params)
        
        field_list = ['satellite',
                      'sensor', 
                      'path',
                      'row', 
                      'start_datetime', 
                      'end_datetime',
                      'dataset_path',
                      'll_lon',
                      'll_lat',
                      'lr_lon',
                      'lr_lat',
                      'ul_lon',
                      'ul_lat',
                      'ur_lon',
                      'ur_lat',
                      'cloud_cover', 
                      'gcp_count'
                      ]
        
        for record in self.db_cursor:
            
            acquisition_info = {}
            for field_index in range(len(field_list)):
                acquisition_info[field_list[field_index]] = record[field_index]
                
            acquisition_info['year'] = acquisition_info['end_datetime'].year    
            acquisition_info['month'] = acquisition_info['end_datetime'].month    
            acquisition_info['thumbnail_size'] = self.thumbnail_size   
            acquisition_info['dataset_name'] = re.search('[^/]+$', acquisition_info['dataset_path']).group(0)
            
            log_multiline(logger.debug, acquisition_info, 'acquisition_info', '\t')
                
            placemark_node = create_placemark_node(wrs_document_node, acquisition_info)
            kml_document_node.appendChild(placemark_node)
            
        logger.info('Writing KML to %s', kml_filename)
        write_xml_file(kml_filename, kml_dom_tree)

Example 126

Project: agdc
Source File: rgb_stacker.py
View license
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. 
        Creates PQA-masked NDVI stack
        
        Arguments:
            nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM)
                containing all tile info which can be used within the function
                A sample is shown below (including superfluous band-specific information):
                
{
'NBAR': {'band_name': 'Visible Blue',
    'band_tag': 'B10',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NBAR',
    'nodata_value': -999L,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25},
'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)',
     'band_tag': 'B61',
     'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
     'end_row': 77,
     'level_name': 'ORTHO',
     'nodata_value': 0L,
     'path': 91,
     'satellite_tag': 'LS7',
     'sensor_name': 'ETM+',
     'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
     'start_row': 77,
     'tile_layer': 1,
     'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif',
     'x_index': 150,
     'y_index': -25},
'PQA': {'band_name': 'Pixel Quality Assurance',
    'band_tag': 'PQA',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'PQA',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif,
    'x_index': 150,
    'y_index': -25}
}                
                
        Arguments (Cont'd):
            stack_output_info: dict containing stack output information. 
                Obtained from stacker object. 
                A sample is shown below
                
stack_output_info = {'x_index': 144, 
                      'y_index': -36,
                      'stack_output_dir': '/g/data/v10/tmp/ndvi',
                      'start_datetime': None, # Datetime object or None
                      'end_datetime': None, # Datetime object or None 
                      'satellite': None, # String or None 
                      'sensor': None} # String or None 
                      
        Arguments (Cont'd):
            tile_type_info: dict containing tile type information. 
                Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). 
                A sample is shown below
                
{'crs': 'EPSG:4326',
    'file_extension': '.tif',
    'file_format': 'GTiff',
    'format_options': 'COMPRESS=LZW,BIGTIFF=YES',
    'tile_directory': 'EPSG4326_1deg_0.00025pixel',
    'tile_type_id': 1L,
    'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree',
    'unit': 'degree',
    'x_origin': 0.0,
    'x_pixel_size': Decimal('0.00025000000000000000'),
    'x_pixels': 4000L,
    'x_size': 1.0,
    'y_origin': 0.0,
    'y_pixel_size': Decimal('0.00025000000000000000'),
    'y_pixels': 4000L,
    'y_size': 1.0}
                            
        Function must create one or more GDAL-supported output datasets. Useful functions in the
        Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly
        what is required for a single slice of the temporal stack of derived quantities.
            
        Returns:
            output_dataset_info: Dict keyed by stack filename
                containing metadata info for GDAL-supported output datasets created by this function.
                Note that the key(s) will be used as the output filename for the VRT temporal stack
                and each dataset created must contain only a single band. An example is as follows:
{'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': 
    {'band_name': 'Normalised Differential Vegetation Index with PQA applied',
    'band_tag': 'NDVI',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NDVI',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25}
}
        """
        
        log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t')

        # Definitions for mapping NBAR values to RGB
        rgb_bands=('SWIR1','NIR','G')
        rgb_minmax=((780,5100),(200,4500),(100,2300)) # Min/Max scaled values to map to 1-255
        
        nbar_dataset_info = input_dataset_dict.get('NBAR') # Only need NBAR data for NDVI
        #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands
        
        if not nbar_dataset_info:
            log_multiline(logger.warning, input_dataset_dict, 'NBAR dict does not exist', '\t')
            return None

        # Instantiate band lookup object with all required lookup parameters
        lookup = BandLookup(data_cube=self,
                            lookup_scheme_name='LANDSAT-LS5/7',
                            tile_type_id=tile_type_info['tile_type_id'],
                            satellite_tag=nbar_dataset_info['satellite_tag'],
                            sensor_name=nbar_dataset_info['sensor_name'],
                            level_name=nbar_dataset_info['level_name']
                            )

        nbar_dataset_path = nbar_dataset_info['tile_pathname']
        output_tile_path = os.path.join(self.output_dir, re.sub('\.\w+$', 
                                                           '_RGB.tif',
                                                           os.path.basename(nbar_dataset_path)
                                                           )
                                   )
        
        if os.path.exists(output_tile_path):
            logger.info('Skipping existing file %s', output_tile_path)
            return None
        
        if not self.lock_object(output_tile_path):
            logger.info('Skipping locked file %s', output_tile_path)
            return None
        
        input_dataset = gdal.Open(nbar_dataset_path)
        assert input_dataset, 'Unable to open dataset %s' % nbar_dataset_path

        # Nasty work-around for bad PQA due to missing thermal bands for LS8-OLI
        if nbar_dataset_info['satellite_tag'] == 'LS8' and nbar_dataset_info['sensor_name'] == 'OLI':
            pqa_mask = numpy.ones(shape=(input_dataset.RasterYSize, input_dataset.RasterXSize), dtype=numpy.bool)
            logger.debug('Work-around for LS8-OLI PQA issue applied: EVERYTHING PASSED')
        else:
            if input_dataset_dict.get('PQA') is None: # No PQA tile available
                return

            # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
            pqa_mask = self.get_pqa_mask(pqa_dataset_path=input_dataset_dict['PQA']['tile_pathname']) 

        log_multiline(logger.debug, pqa_mask, 'pqa_mask', '\t')

        gdal_driver = gdal.GetDriverByName('GTiff')
        output_dataset = gdal_driver.Create(output_tile_path, 
                                            input_dataset.RasterXSize, input_dataset.RasterYSize,
                                            3, gdal.GDT_Byte,
                                            ['INTERLEAVE=PIXEL','COMPRESS=LZW'] #,'BIGTIFF=YES']
                                            )
        
        assert output_dataset, 'Unable to open output dataset %s'% output_dataset   
                                        
        output_dataset.SetGeoTransform(input_dataset.GetGeoTransform())
        output_dataset.SetProjection(input_dataset.GetProjection()) 

        for band_index in range(3):
            logger.debug('Processing %s band in layer %s as band %s', rgb_bands[band_index], lookup.band_no[rgb_bands[band_index]], band_index + 1)

            # Offset byte values by 1 to avoid transparency bug
            scale = (rgb_minmax[band_index][1] - rgb_minmax[band_index][0]) / 254.0
            offset = 1.0 - rgb_minmax[band_index][0] / scale
            
            input_array = input_dataset.GetRasterBand(lookup.band_no[rgb_bands[band_index]]).ReadAsArray()
            log_multiline(logger.debug, input_array, 'input_array', '\t')

            output_array = (input_array / scale + offset).astype(numpy.byte)
            
            # Set out-of-range values to minimum or maximum as required
            output_array[input_array < rgb_minmax[band_index][0]] = 1
            output_array[input_array > rgb_minmax[band_index][1]] = 255
            
            output_array[~pqa_mask] = 0 # Apply PQA Mask
            log_multiline(logger.debug, output_array, 'output_array', '\t')
            
            output_band = output_dataset.GetRasterBand(band_index + 1)
            output_band.WriteArray(output_array)
            output_band.SetNoDataValue(0)
            output_band.FlushCache()
        output_dataset.FlushCache()
        self.unlock_object(output_tile_path)
        logger.info('Finished writing RGB file %s', output_tile_path)
        
        return None # Don't build a stack file

Example 127

Project: tagger
Source File: model.py
View license
    def build(self,
              dropout,
              char_dim,
              char_lstm_dim,
              char_bidirect,
              word_dim,
              word_lstm_dim,
              word_bidirect,
              lr_method,
              pre_emb,
              crf,
              cap_dim,
              training=True,
              **kwargs
              ):
        """
        Build the network.
        """
        # Training parameters
        n_words = len(self.id_to_word)
        n_chars = len(self.id_to_char)
        n_tags = len(self.id_to_tag)

        # Number of capitalization features
        if cap_dim:
            n_cap = 4

        # Network variables
        is_train = T.iscalar('is_train')
        word_ids = T.ivector(name='word_ids')
        char_for_ids = T.imatrix(name='char_for_ids')
        char_rev_ids = T.imatrix(name='char_rev_ids')
        char_pos_ids = T.ivector(name='char_pos_ids')
        tag_ids = T.ivector(name='tag_ids')
        if cap_dim:
            cap_ids = T.ivector(name='cap_ids')

        # Sentence length
        s_len = (word_ids if word_dim else char_pos_ids).shape[0]

        # Final input (all word features)
        input_dim = 0
        inputs = []

        #
        # Word inputs
        #
        if word_dim:
            input_dim += word_dim
            word_layer = EmbeddingLayer(n_words, word_dim, name='word_layer')
            word_input = word_layer.link(word_ids)
            inputs.append(word_input)
            # Initialize with pretrained embeddings
            if pre_emb and training:
                new_weights = word_layer.embeddings.get_value()
                print 'Loading pretrained embeddings from %s...' % pre_emb
                pretrained = {}
                emb_invalid = 0
                for i, line in enumerate(codecs.open(pre_emb, 'r', 'utf-8')):
                    line = line.rstrip().split()
                    if len(line) == word_dim + 1:
                        pretrained[line[0]] = np.array(
                            [float(x) for x in line[1:]]
                        ).astype(np.float32)
                    else:
                        emb_invalid += 1
                if emb_invalid > 0:
                    print 'WARNING: %i invalid lines' % emb_invalid
                c_found = 0
                c_lower = 0
                c_zeros = 0
                # Lookup table initialization
                for i in xrange(n_words):
                    word = self.id_to_word[i]
                    if word in pretrained:
                        new_weights[i] = pretrained[word]
                        c_found += 1
                    elif word.lower() in pretrained:
                        new_weights[i] = pretrained[word.lower()]
                        c_lower += 1
                    elif re.sub('\d', '0', word.lower()) in pretrained:
                        new_weights[i] = pretrained[
                            re.sub('\d', '0', word.lower())
                        ]
                        c_zeros += 1
                word_layer.embeddings.set_value(new_weights)
                print 'Loaded %i pretrained embeddings.' % len(pretrained)
                print ('%i / %i (%.4f%%) words have been initialized with '
                       'pretrained embeddings.') % (
                            c_found + c_lower + c_zeros, n_words,
                            100. * (c_found + c_lower + c_zeros) / n_words
                      )
                print ('%i found directly, %i after lowercasing, '
                       '%i after lowercasing + zero.') % (
                          c_found, c_lower, c_zeros
                      )

        #
        # Chars inputs
        #
        if char_dim:
            input_dim += char_lstm_dim
            char_layer = EmbeddingLayer(n_chars, char_dim, name='char_layer')

            char_lstm_for = LSTM(char_dim, char_lstm_dim, with_batch=True,
                                 name='char_lstm_for')
            char_lstm_rev = LSTM(char_dim, char_lstm_dim, with_batch=True,
                                 name='char_lstm_rev')

            char_lstm_for.link(char_layer.link(char_for_ids))
            char_lstm_rev.link(char_layer.link(char_rev_ids))

            char_for_output = char_lstm_for.h.dimshuffle((1, 0, 2))[
                T.arange(s_len), char_pos_ids
            ]
            char_rev_output = char_lstm_rev.h.dimshuffle((1, 0, 2))[
                T.arange(s_len), char_pos_ids
            ]

            inputs.append(char_for_output)
            if char_bidirect:
                inputs.append(char_rev_output)
                input_dim += char_lstm_dim

        #
        # Capitalization feature
        #
        if cap_dim:
            input_dim += cap_dim
            cap_layer = EmbeddingLayer(n_cap, cap_dim, name='cap_layer')
            inputs.append(cap_layer.link(cap_ids))

        # Prepare final input
        if len(inputs) != 1:
            inputs = T.concatenate(inputs, axis=1)

        #
        # Dropout on final input
        #
        if dropout:
            dropout_layer = DropoutLayer(p=dropout)
            input_train = dropout_layer.link(inputs)
            input_test = (1 - dropout) * inputs
            inputs = T.switch(T.neq(is_train, 0), input_train, input_test)

        # LSTM for words
        word_lstm_for = LSTM(input_dim, word_lstm_dim, with_batch=False,
                             name='word_lstm_for')
        word_lstm_rev = LSTM(input_dim, word_lstm_dim, with_batch=False,
                             name='word_lstm_rev')
        word_lstm_for.link(inputs)
        word_lstm_rev.link(inputs[::-1, :])
        word_for_output = word_lstm_for.h
        word_rev_output = word_lstm_rev.h[::-1, :]
        if word_bidirect:
            final_output = T.concatenate(
                [word_for_output, word_rev_output],
                axis=1
            )
            tanh_layer = HiddenLayer(2 * word_lstm_dim, word_lstm_dim,
                                     name='tanh_layer', activation='tanh')
            final_output = tanh_layer.link(final_output)
        else:
            final_output = word_for_output

        # Sentence to Named Entity tags - Score
        final_layer = HiddenLayer(word_lstm_dim, n_tags, name='final_layer',
                                  activation=(None if crf else 'softmax'))
        tags_scores = final_layer.link(final_output)

        # No CRF
        if not crf:
            cost = T.nnet.categorical_crossentropy(tags_scores, tag_ids).mean()
        # CRF
        else:
            transitions = shared((n_tags + 2, n_tags + 2), 'transitions')

            small = -1000
            b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
            e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
            observations = T.concatenate(
                [tags_scores, small * T.ones((s_len, 2))],
                axis=1
            )
            observations = T.concatenate(
                [b_s, observations, e_s],
                axis=0
            )

            # Score from tags
            real_path_score = tags_scores[T.arange(s_len), tag_ids].sum()

            # Score from transitions
            b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
            e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
            padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)
            real_path_score += transitions[
                padded_tags_ids[T.arange(s_len + 1)],
                padded_tags_ids[T.arange(s_len + 1) + 1]
            ].sum()

            all_paths_scores = forward(observations, transitions)
            cost = - (real_path_score - all_paths_scores)

        # Network parameters
        params = []
        if word_dim:
            self.add_component(word_layer)
            params.extend(word_layer.params)
        if char_dim:
            self.add_component(char_layer)
            self.add_component(char_lstm_for)
            params.extend(char_layer.params)
            params.extend(char_lstm_for.params)
            if char_bidirect:
                self.add_component(char_lstm_rev)
                params.extend(char_lstm_rev.params)
        self.add_component(word_lstm_for)
        params.extend(word_lstm_for.params)
        if word_bidirect:
            self.add_component(word_lstm_rev)
            params.extend(word_lstm_rev.params)
        if cap_dim:
            self.add_component(cap_layer)
            params.extend(cap_layer.params)
        self.add_component(final_layer)
        params.extend(final_layer.params)
        if crf:
            self.add_component(transitions)
            params.append(transitions)
        if word_bidirect:
            self.add_component(tanh_layer)
            params.extend(tanh_layer.params)

        # Prepare train and eval inputs
        eval_inputs = []
        if word_dim:
            eval_inputs.append(word_ids)
        if char_dim:
            eval_inputs.append(char_for_ids)
            if char_bidirect:
                eval_inputs.append(char_rev_ids)
            eval_inputs.append(char_pos_ids)
        if cap_dim:
            eval_inputs.append(cap_ids)
        train_inputs = eval_inputs + [tag_ids]

        # Parse optimization method parameters
        if "-" in lr_method:
            lr_method_name = lr_method[:lr_method.find('-')]
            lr_method_parameters = {}
            for x in lr_method[lr_method.find('-') + 1:].split('-'):
                split = x.split('_')
                assert len(split) == 2
                lr_method_parameters[split[0]] = float(split[1])
        else:
            lr_method_name = lr_method
            lr_method_parameters = {}

        # Compile training function
        print 'Compiling...'
        if training:
            updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters)
            f_train = theano.function(
                inputs=train_inputs,
                outputs=cost,
                updates=updates,
                givens=({is_train: np.cast['int32'](1)} if dropout else {})
            )
        else:
            f_train = None

        # Compile evaluation function
        if not crf:
            f_eval = theano.function(
                inputs=eval_inputs,
                outputs=tags_scores,
                givens=({is_train: np.cast['int32'](0)} if dropout else {})
            )
        else:
            f_eval = theano.function(
                inputs=eval_inputs,
                outputs=forward(observations, transitions, viterbi=True,
                                return_alpha=False, return_best_sequence=True),
                givens=({is_train: np.cast['int32'](0)} if dropout else {})
            )

        return f_train, f_eval

Example 128

Project: golismero
Source File: dump.py
View license
    def dbTableValues(self, tableValues):
        replication = None
        rtable = None
        dumpFP = None
        appendToFile = False

        if tableValues is None:
            return

        db = tableValues["__infos__"]["db"]
        if not db:
            db = "All"
        table = tableValues["__infos__"]["table"]

        if hasattr(conf, "api"):
            self._write(tableValues, content_type=CONTENT_TYPE.DUMP_TABLE)
            return

        dumpDbPath = os.path.join(conf.dumpPath, re.sub(r"[^\w]", "_", unsafeSQLIdentificatorNaming(db)))

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            replication = Replication(os.path.join(conf.dumpPath, "%s.sqlite3" % unsafeSQLIdentificatorNaming(db)))
        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
            if not os.path.isdir(dumpDbPath):
                os.makedirs(dumpDbPath, 0755)

            dumpFileName = os.path.join(dumpDbPath, "%s.%s" % (unsafeSQLIdentificatorNaming(table), conf.dumpFormat.lower()))
            appendToFile = os.path.isfile(dumpFileName) and any((conf.limitStart, conf.limitStop))
            dumpFP = openFile(dumpFileName, "wb" if not appendToFile else "ab")

        count = int(tableValues["__infos__"]["count"])
        separator = str()
        field = 1
        fields = len(tableValues) - 1

        columns = prioritySortColumns(tableValues.keys())

        for column in columns:
            if column != "__infos__":
                info = tableValues[column]
                lines = "-" * (int(info["length"]) + 2)
                separator += "+%s" % lines

        separator += "+"
        self._write("Database: %s\nTable: %s" % (unsafeSQLIdentificatorNaming(db) if db else "Current database", unsafeSQLIdentificatorNaming(table)))

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            cols = []

            for column in columns:
                if column != "__infos__":
                    colType = Replication.INTEGER

                    for value in tableValues[column]['values']:
                        try:
                            if not value or value == " ":  # NULL
                                continue

                            int(value)
                        except ValueError:
                            colType = None
                            break

                    if colType is None:
                        colType = Replication.REAL

                        for value in tableValues[column]['values']:
                            try:
                                if not value or value == " ":  # NULL
                                    continue

                                float(value)
                            except ValueError:
                                colType = None
                                break

                    cols.append((unsafeSQLIdentificatorNaming(column), colType if colType else Replication.TEXT))

            rtable = replication.createTable(table, cols)
        elif conf.dumpFormat == DUMP_FORMAT.HTML:
            dataToDumpFile(dumpFP, "<!DOCTYPE html>\n<html>\n<head>\n")
            dataToDumpFile(dumpFP, "<meta http-equiv=\"Content-type\" content=\"text/html;charset=%s\">\n" % UNICODE_ENCODING)
            dataToDumpFile(dumpFP, "<title>%s</title>\n" % ("%s%s" % ("%s." % db if METADB_SUFFIX not in db else "", table)))
            dataToDumpFile(dumpFP, HTML_DUMP_CSS_STYLE)
            dataToDumpFile(dumpFP, "\n</head>\n<body>\n<table>\n<thead>\n<tr>\n")

        if count == 1:
            self._write("[1 entry]")
        else:
            self._write("[%d entries]" % count)

        self._write(separator)

        for column in columns:
            if column != "__infos__":
                info = tableValues[column]

                column = unsafeSQLIdentificatorNaming(column)
                maxlength = int(info["length"])
                blank = " " * (maxlength - len(column))

                self._write("| %s%s" % (column, blank), newline=False)

                if not appendToFile:
                    if conf.dumpFormat == DUMP_FORMAT.CSV:
                        if field == fields:
                            dataToDumpFile(dumpFP, "%s" % safeCSValue(column))
                        else:
                            dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(column), conf.csvDel))
                    elif conf.dumpFormat == DUMP_FORMAT.HTML:
                        dataToDumpFile(dumpFP, "<th>%s</th>" % cgi.escape(column).encode("ascii", "xmlcharrefreplace"))

                field += 1

        if conf.dumpFormat == DUMP_FORMAT.HTML:
            dataToDumpFile(dumpFP, "\n</tr>\n</thead>\n<tbody>\n")

        self._write("|\n%s" % separator)

        if conf.dumpFormat == DUMP_FORMAT.CSV:
            dataToDumpFile(dumpFP, "\n" if not appendToFile else "")

        elif conf.dumpFormat == DUMP_FORMAT.SQLITE:
            rtable.beginTransaction()

        if count > TRIM_STDOUT_DUMP_SIZE:
            warnMsg = "console output will be trimmed to "
            warnMsg += "last %d rows due to " % TRIM_STDOUT_DUMP_SIZE
            warnMsg += "large table size"
            logger.warning(warnMsg)

        for i in xrange(count):
            console = (i >= count - TRIM_STDOUT_DUMP_SIZE)
            field = 1
            values = []

            if conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "<tr>")

            for column in columns:
                if column != "__infos__":
                    info = tableValues[column]

                    if len(info["values"]) <= i:
                        continue

                    if info["values"][i] is None:
                        value = u''
                    else:
                        value = getUnicode(info["values"][i])
                        value = DUMP_REPLACEMENTS.get(value, value)

                    values.append(value)
                    maxlength = int(info["length"])
                    blank = " " * (maxlength - len(value))
                    self._write("| %s%s" % (value, blank), newline=False, console=console)

                    if len(value) > MIN_BINARY_DISK_DUMP_SIZE and r'\x' in value:
                        try:
                            mimetype = magic.from_buffer(value, mime=True)
                            if any(mimetype.startswith(_) for _ in ("application", "image")):
                                if not os.path.isdir(dumpDbPath):
                                    os.makedirs(dumpDbPath, 0755)

                                filepath = os.path.join(dumpDbPath, "%s-%d.bin" % (unsafeSQLIdentificatorNaming(column), randomInt(8)))
                                warnMsg = "writing binary ('%s') content to file '%s' " % (mimetype, filepath)
                                logger.warn(warnMsg)

                                with open(filepath, "wb") as f:
                                    _ = safechardecode(value, True)
                                    f.write(_)
                        except magic.MagicException, err:
                            logger.debug(str(err))

                    if conf.dumpFormat == DUMP_FORMAT.CSV:
                        if field == fields:
                            dataToDumpFile(dumpFP, "%s" % safeCSValue(value))
                        else:
                            dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(value), conf.csvDel))
                    elif conf.dumpFormat == DUMP_FORMAT.HTML:
                        dataToDumpFile(dumpFP, "<td>%s</td>" % cgi.escape(value).encode("ascii", "xmlcharrefreplace"))

                    field += 1

            if conf.dumpFormat == DUMP_FORMAT.SQLITE:
                try:
                    rtable.insert(values)
                except SqlmapValueException:
                    pass
            elif conf.dumpFormat == DUMP_FORMAT.CSV:
                dataToDumpFile(dumpFP, "\n")
            elif conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "</tr>\n")

            self._write("|", console=console)

        self._write("%s\n" % separator)

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            rtable.endTransaction()
            logger.info("table '%s.%s' dumped to sqlite3 database '%s'" % (db, table, replication.dbpath))

        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
            if conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "</tbody>\n</table>\n</body>\n</html>")
            else:
                dataToDumpFile(dumpFP, "\n")
            dumpFP.close()
            logger.info("table '%s.%s' dumped to %s file '%s'" % (db, table, conf.dumpFormat, dumpFileName))

Example 129

Project: golismero
Source File: entries.py
View license
    def dumpTable(self, foundData=None):
        self.forceDbmsEnum()

        if conf.db is None or conf.db == CURRENT_DB:
            if conf.db is None:
                warnMsg = "missing database parameter. sqlmap is going "
                warnMsg += "to use the current database to enumerate "
                warnMsg += "table(s) entries"
                logger.warn(warnMsg)

            conf.db = self.getCurrentDb()

        elif conf.db is not None:
            if Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2, DBMS.HSQLDB):
                conf.db = conf.db.upper()

            if  ',' in conf.db:
                errMsg = "only one database name is allowed when enumerating "
                errMsg += "the tables' columns"
                raise SqlmapMissingMandatoryOptionException(errMsg)

        conf.db = safeSQLIdentificatorNaming(conf.db)

        if conf.tbl:
            if Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2, DBMS.HSQLDB):
                conf.tbl = conf.tbl.upper()

            tblList = conf.tbl.split(",")
        else:
            self.getTables()

            if len(kb.data.cachedTables) > 0:
                tblList = kb.data.cachedTables.values()

                if isinstance(tblList[0], (set, tuple, list)):
                    tblList = tblList[0]
            else:
                errMsg = "unable to retrieve the tables "
                errMsg += "in database '%s'" % unsafeSQLIdentificatorNaming(conf.db)
                raise SqlmapNoneDataException(errMsg)

        for tbl in tblList:
            tblList[tblList.index(tbl)] = safeSQLIdentificatorNaming(tbl, True)

        for tbl in tblList:
            conf.tbl = tbl
            kb.data.dumpedTable = {}

            if foundData is None:
                kb.data.cachedColumns = {}
                self.getColumns(onlyColNames=True)
            else:
                kb.data.cachedColumns = foundData

            try:
                kb.dumpTable = "%s.%s" % (conf.db, tbl)

                if not safeSQLIdentificatorNaming(conf.db) in kb.data.cachedColumns \
                   or safeSQLIdentificatorNaming(tbl, True) not in \
                   kb.data.cachedColumns[safeSQLIdentificatorNaming(conf.db)] \
                   or not kb.data.cachedColumns[safeSQLIdentificatorNaming(conf.db)][safeSQLIdentificatorNaming(tbl, True)]:
                    warnMsg = "unable to enumerate the columns for table "
                    warnMsg += "'%s' in database" % unsafeSQLIdentificatorNaming(tbl)
                    warnMsg += " '%s'" % unsafeSQLIdentificatorNaming(conf.db)
                    warnMsg += ", skipping" if len(tblList) > 1 else ""
                    logger.warn(warnMsg)

                    continue

                columns = kb.data.cachedColumns[safeSQLIdentificatorNaming(conf.db)][safeSQLIdentificatorNaming(tbl, True)]
                colList = sorted(filter(None, columns.keys()))
                colNames = colString = ", ".join(column for column in colList)
                rootQuery = queries[Backend.getIdentifiedDbms()].dump_table

                infoMsg = "fetching entries"
                if conf.col:
                    infoMsg += " of column(s) '%s'" % colNames
                infoMsg += " for table '%s'" % unsafeSQLIdentificatorNaming(tbl)
                infoMsg += " in database '%s'" % unsafeSQLIdentificatorNaming(conf.db)
                logger.info(infoMsg)

                for column in colList:
                    _ = agent.preprocessField(tbl, column)
                    if _ != column:
                        colString = re.sub(r"\b%s\b" % column, _, colString)

                entriesCount = 0

                if any(isTechniqueAvailable(_) for _ in (PAYLOAD.TECHNIQUE.UNION, PAYLOAD.TECHNIQUE.ERROR, PAYLOAD.TECHNIQUE.QUERY)) or conf.direct:
                    entries = []
                    query = None

                    if Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2):
                        query = rootQuery.inband.query % (colString, tbl.upper() if not conf.db else ("%s.%s" % (conf.db.upper(), tbl.upper())))
                    elif Backend.getIdentifiedDbms() in (DBMS.SQLITE, DBMS.ACCESS, DBMS.FIREBIRD, DBMS.MAXDB):
                        query = rootQuery.inband.query % (colString, tbl)
                    elif Backend.getIdentifiedDbms() in (DBMS.SYBASE, DBMS.MSSQL):
                        # Partial inband and error
                        if not (isTechniqueAvailable(PAYLOAD.TECHNIQUE.UNION) and kb.injection.data[PAYLOAD.TECHNIQUE.UNION].where == PAYLOAD.WHERE.ORIGINAL):
                            table = "%s.%s" % (conf.db, tbl)

                            retVal = pivotDumpTable(table, colList, blind=False)

                            if retVal:
                                entries, _ = retVal
                                entries = zip(*[entries[colName] for colName in colList])
                        else:
                            query = rootQuery.inband.query % (colString, conf.db, tbl)
                    elif Backend.getIdentifiedDbms() in (DBMS.MYSQL, DBMS.PGSQL, DBMS.HSQLDB):
                        query = rootQuery.inband.query % (colString, conf.db, tbl, prioritySortColumns(colList)[0])
                    else:
                        query = rootQuery.inband.query % (colString, conf.db, tbl)

                    if not entries and query:
                        entries = inject.getValue(query, blind=False, time=False, dump=True)

                    if not isNoneValue(entries):
                        if isinstance(entries, basestring):
                            entries = [entries]
                        elif not isListLike(entries):
                            entries = []

                        entriesCount = len(entries)

                        for index, column in enumerate(colList):
                            if column not in kb.data.dumpedTable:
                                kb.data.dumpedTable[column] = {"length": len(column), "values": BigArray()}

                            for entry in entries:
                                if entry is None or len(entry) == 0:
                                    continue

                                if isinstance(entry, basestring):
                                    colEntry = entry
                                else:
                                    colEntry = unArrayizeValue(entry[index]) if index < len(entry) else u''

                                _ = len(DUMP_REPLACEMENTS.get(getUnicode(colEntry), getUnicode(colEntry)))
                                maxLen = max(len(column), _)

                                if maxLen > kb.data.dumpedTable[column]["length"]:
                                    kb.data.dumpedTable[column]["length"] = maxLen

                                kb.data.dumpedTable[column]["values"].append(colEntry)

                if not kb.data.dumpedTable and isInferenceAvailable() and not conf.direct:
                    infoMsg = "fetching number of "
                    if conf.col:
                        infoMsg += "column(s) '%s' " % colNames
                    infoMsg += "entries for table '%s' " % unsafeSQLIdentificatorNaming(tbl)
                    infoMsg += "in database '%s'" % unsafeSQLIdentificatorNaming(conf.db)
                    logger.info(infoMsg)

                    if Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2):
                        query = rootQuery.blind.count % (tbl.upper() if not conf.db else ("%s.%s" % (conf.db.upper(), tbl.upper())))
                    elif Backend.getIdentifiedDbms() in (DBMS.SQLITE, DBMS.ACCESS, DBMS.FIREBIRD):
                        query = rootQuery.blind.count % tbl
                    elif Backend.getIdentifiedDbms() in (DBMS.SYBASE, DBMS.MSSQL):
                        query = rootQuery.blind.count % ("%s.%s" % (conf.db, tbl))
                    elif Backend.isDbms(DBMS.MAXDB):
                        query = rootQuery.blind.count % tbl
                    else:
                        query = rootQuery.blind.count % (conf.db, tbl)

                    count = inject.getValue(query, union=False, error=False, expected=EXPECTED.INT, charsetType=CHARSET_TYPE.DIGITS)

                    lengths = {}
                    entries = {}

                    if count == 0:
                        warnMsg = "table '%s' " % unsafeSQLIdentificatorNaming(tbl)
                        warnMsg += "in database '%s' " % unsafeSQLIdentificatorNaming(conf.db)
                        warnMsg += "appears to be empty"
                        logger.warn(warnMsg)

                        for column in colList:
                            lengths[column] = len(column)
                            entries[column] = []

                    elif not isNumPosStrValue(count):
                        warnMsg = "unable to retrieve the number of "
                        if conf.col:
                            warnMsg += "column(s) '%s' " % colNames
                        warnMsg += "entries for table '%s' " % unsafeSQLIdentificatorNaming(tbl)
                        warnMsg += "in database '%s'" % unsafeSQLIdentificatorNaming(conf.db)
                        logger.warn(warnMsg)

                        continue

                    elif Backend.getIdentifiedDbms() in (DBMS.ACCESS, DBMS.SYBASE, DBMS.MAXDB, DBMS.MSSQL):
                        if Backend.isDbms(DBMS.ACCESS):
                            table = tbl
                        elif Backend.getIdentifiedDbms() in (DBMS.SYBASE, DBMS.MSSQL):
                            table = "%s.%s" % (conf.db, tbl)
                        elif Backend.isDbms(DBMS.MAXDB):
                            table = "%s.%s" % (conf.db, tbl)

                        retVal = pivotDumpTable(table, colList, count, blind=True)

                        if retVal:
                            entries, lengths = retVal

                    else:
                        emptyColumns = []
                        plusOne = Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2)
                        indexRange = getLimitRange(count, dump=True, plusOne=plusOne)

                        if len(colList) < len(indexRange) > CHECK_ZERO_COLUMNS_THRESHOLD:
                            for column in colList:
                                if inject.getValue("SELECT COUNT(%s) FROM %s" % (column, kb.dumpTable), union=False, error=False) == '0':
                                    emptyColumns.append(column)
                                    debugMsg = "column '%s' of table '%s' will not be " % (column, kb.dumpTable)
                                    debugMsg += "dumped as it appears to be empty"
                                    logger.debug(debugMsg)

                        try:
                            for index in indexRange:
                                for column in colList:
                                    value = ""

                                    if column not in lengths:
                                        lengths[column] = 0

                                    if column not in entries:
                                        entries[column] = BigArray()

                                    if Backend.getIdentifiedDbms() in (DBMS.MYSQL, DBMS.PGSQL):
                                        query = rootQuery.blind.query % (agent.preprocessField(tbl, column), conf.db, conf.tbl, sorted(colList, key=len)[0], index)
                                    elif Backend.getIdentifiedDbms() in (DBMS.ORACLE, DBMS.DB2):
                                        query = rootQuery.blind.query % (agent.preprocessField(tbl, column),
                                                                        tbl.upper() if not conf.db else ("%s.%s" % (conf.db.upper(), tbl.upper())),
                                                                        index)
                                    elif Backend.isDbms(DBMS.SQLITE):
                                        query = rootQuery.blind.query % (agent.preprocessField(tbl, column), tbl, index)

                                    elif Backend.isDbms(DBMS.FIREBIRD):
                                        query = rootQuery.blind.query % (index, agent.preprocessField(tbl, column), tbl)

                                    value = NULL if column in emptyColumns else inject.getValue(query, union=False, error=False, dump=True)
                                    value = '' if value is None else value

                                    _ = DUMP_REPLACEMENTS.get(getUnicode(value), getUnicode(value))
                                    lengths[column] = max(lengths[column], len(_))
                                    entries[column].append(value)

                        except KeyboardInterrupt:
                            clearConsoleLine()
                            warnMsg = "Ctrl+C detected in dumping phase"
                            logger.warn(warnMsg)

                    for column, columnEntries in entries.items():
                        length = max(lengths[column], len(column))

                        kb.data.dumpedTable[column] = {"length": length, "values": columnEntries}

                        entriesCount = len(columnEntries)

                if len(kb.data.dumpedTable) == 0 or (entriesCount == 0 and kb.permissionFlag):
                    warnMsg = "unable to retrieve the entries "
                    if conf.col:
                        warnMsg += "of columns '%s' " % colNames
                    warnMsg += "for table '%s' " % unsafeSQLIdentificatorNaming(tbl)
                    warnMsg += "in database '%s'%s" % (unsafeSQLIdentificatorNaming(conf.db), " (permission denied)" if kb.permissionFlag else "")
                    logger.warn(warnMsg)
                else:
                    kb.data.dumpedTable["__infos__"] = {"count": entriesCount,
                                                        "table": safeSQLIdentificatorNaming(tbl, True),
                                                        "db": safeSQLIdentificatorNaming(conf.db)}
                    attackDumpedTable()
                    conf.dumper.dbTableValues(kb.data.dumpedTable)

            except SqlmapConnectionException, e:
                errMsg = "connection exception detected in dumping phase: "
                errMsg += "'%s'" % e
                logger.critical(errMsg)

            finally:
                kb.dumpTable = None

Example 130

View license
def main(client, campaign_id):
  # Initialize appropriate service.
  feed_service = client.GetService('FeedService', version='v201605')
  feed_item_service = client.GetService('FeedItemService', version='v201605')
  feed_mapping_service = client.GetService(
      'FeedMappingService', version='v201605')
  campaign_feed_service = client.GetService(
      'CampaignFeedService', version='v201605')

  sitelinks_data = {}

  # Create site links feed first.
  site_links_feed = {
      'name': 'Feed For Site Links #%s' % uuid.uuid4(),
      'attributes': [
          {'type': 'STRING', 'name': 'Link Text'},
          {'type': 'URL_LIST', 'name': 'Link Final URLs'},
          {'type': 'STRING', 'name': 'Line 1 Description'},
          {'type': 'STRING', 'name': 'Line 2 Description'}
      ]
  }

  response = feed_service.mutate([
      {'operator': 'ADD', 'operand': site_links_feed}
  ])

  if 'value' in response:
    feed = response['value'][0]
    link_text_feed_attribute_id = feed['attributes'][0]['id']
    final_url_feed_attribute_id = feed['attributes'][1]['id']
    line_1_feed_attribute_id = feed['attributes'][2]['id']
    line_2_feed_attribute_id = feed['attributes'][3]['id']
    print ('Feed with name \'%s\' and ID \'%s\' was added with' %
           (feed['name'], feed['id']))
    print ('\tText attribute ID \'%s\' and Final URL attribute ID \'%s\'.' %
           (link_text_feed_attribute_id, final_url_feed_attribute_id))
    print ('\tLine 1 attribute ID \'%s\' and Line 2 attribute ID \'%s\'.' %
           (line_1_feed_attribute_id, line_2_feed_attribute_id))
    sitelinks_data['feedId'] = feed['id']
    sitelinks_data['linkTextFeedId'] = link_text_feed_attribute_id
    sitelinks_data['finalUrlFeedId'] = final_url_feed_attribute_id
    sitelinks_data['line1FeedId'] = line_1_feed_attribute_id
    sitelinks_data['line2FeedId'] = line_2_feed_attribute_id
  else:
    raise errors.GoogleAdsError('No feeds were added.')

  # Create site links feed items.
  items_data = [
      {'text': 'Home', 'finalUrls': 'http://www.example.com',
       'line1': 'Home line 1', 'line2': 'Home line 2'},
      {'text': 'Stores', 'finalUrls': 'http://www.example.com/stores',
       'line1': 'Stores line 1', 'line2': 'Stores line 2'},
      {'text': 'On Sale', 'finalUrls': 'http://www.example.com/sale',
       'line1': 'On Sale line 1', 'line2': 'On Sale line 2'},
      {'text': 'Support', 'finalUrls': 'http://www.example.com/support',
       'line1': 'Support line 1', 'line2': 'Support line 2'},
      {'text': 'Products', 'finalUrls': 'http://www.example.com/products',
       'line1': 'Products line 1', 'line2': 'Products line 2'},
      {'text': 'About Us', 'finalUrls': 'http://www.example.com/about',
       'line1': 'About line 1', 'line2': 'About line 2', 'locationId': '21137'}
  ]

  feed_items = []
  for item in items_data:
    feed_item = {
        'feedId': sitelinks_data['feedId'],
        'attributeValues': [
            {
                'feedAttributeId': sitelinks_data['linkTextFeedId'],
                'stringValue': item['text']
            },
            {
                'feedAttributeId': sitelinks_data['finalUrlFeedId'],
                'stringValues': [item['finalUrls']]
            },
            {
                'feedAttributeId': sitelinks_data['line1FeedId'],
                'stringValue': item['line1']
            },
            {
                'feedAttributeId': sitelinks_data['line2FeedId'],
                'stringValue': item['line2']
            }
        ],
        # Optional: use the 'startTime' and 'endTime' keys to specify the time
        # period for the feed to deliver.  The example below will make the feed
        # start now and stop in one month.
        # Make sure you specify the datetime in the customer's time zone. You
        # can retrieve this from customer['dateTimeZone'].
        #
        # ['startTime']: datetime.datetime.now().strftime('%Y%m%d %H%M%S')
        # ['endTime']: (datetime.datetime.now() +
        #               relativedelta(months=1)).strftime('%Y%m%d %H%M%S')
    }

    # Use geographical targeting on a feed.
    # The IDs can be found in the documentation or retrieved with the
    # LocationCriterionService.
    if 'locationId' in item:
      feed_item['geoTargeting'] = {
          'id': item['locationId']
      }

    feed_items.append(feed_item)

  feed_items_operations = [{'operator': 'ADD', 'operand': item} for item
                           in feed_items]

  response = feed_item_service.mutate(feed_items_operations)
  if 'value' in response:
    sitelinks_data['feedItemIds'] = []
    for feed_item in response['value']:
      print 'Feed item with ID %s was added.' % feed_item['feedItemId']
      sitelinks_data['feedItemIds'].append(feed_item['feedItemId'])
  else:
    raise errors.GoogleAdsError('No feed items were added.')

  # Create site links feed mapping.

  feed_mapping = {
      'placeholderType': PLACEHOLDER_SITELINKS,
      'feedId': sitelinks_data['feedId'],
      'attributeFieldMappings': [
          {
              'feedAttributeId': sitelinks_data['linkTextFeedId'],
              'fieldId': PLACEHOLDER_FIELD_SITELINK_LINK_TEXT
          },
          {
              'feedAttributeId': sitelinks_data['finalUrlFeedId'],
              'fieldId': PLACEHOLDER_FIELD_SITELINK_FINAL_URLS
          },
          {
              'feedAttributeId': sitelinks_data['line1FeedId'],
              'fieldId': PLACEHOLDER_FIELD_LINE_1_TEXT
          },
          {
              'feedAttributeId': sitelinks_data['line2FeedId'],
              'fieldId': PLACEHOLDER_FIELD_LINE_2_TEXT
          }
      ]
  }

  response = feed_mapping_service.mutate([
      {'operator': 'ADD', 'operand': feed_mapping}
  ])
  if 'value' in response:
    feed_mapping = response['value'][0]
    print ('Feed mapping with ID %s and placeholder type %s was saved for feed'
           ' with ID %s.' %
           (feed_mapping['feedMappingId'], feed_mapping['placeholderType'],
            feed_mapping['feedId']))
  else:
    raise errors.GoogleAdsError('No feed mappings were added.')

  # Construct a matching function that associates the sitelink feeditems to the
  # campaign, and set the device preference to Mobile. For more details, see the
  # matching function guide:
  # https://developers.google.com/adwords/api/docs/guides/feed-matching-functions
  matching_function_string = (
      'AND(IN(FEED_ITEM_ID, {%s}), EQUALS(CONTEXT.DEVICE, \'Mobile\'))' %
      re.sub(r'\[|\]|L', '', str(sitelinks_data['feedItemIds'])))

  campaign_feed = {
      'feedId': sitelinks_data['feedId'],
      'campaignId': campaign_id,
      'matchingFunction': {'functionString': matching_function_string},
      # Specifying placeholder types on the CampaignFeed allows the same feed
      # to be used for different placeholders in different Campaigns.
      'placeholderTypes': [PLACEHOLDER_SITELINKS]
  }

  response = campaign_feed_service.mutate([
      {'operator': 'ADD', 'operand': campaign_feed}
  ])
  if 'value' in response:
    campaign_feed = response['value'][0]
    print ('Campaign with ID %s was associated with feed with ID %s.' %
           (campaign_feed['campaignId'], campaign_feed['feedId']))
  else:
    raise errors.GoogleAdsError('No campaign feeds were added.')

Example 131

View license
def main(client, campaign_id):
  # Initialize appropriate service.
  feed_service = client.GetService('FeedService', version='v201609')
  feed_item_service = client.GetService('FeedItemService', version='v201609')
  feed_mapping_service = client.GetService(
      'FeedMappingService', version='v201609')
  campaign_feed_service = client.GetService(
      'CampaignFeedService', version='v201609')

  sitelinks_data = {}

  # Create site links feed first.
  site_links_feed = {
      'name': 'Feed For Site Links #%s' % uuid.uuid4(),
      'attributes': [
          {'type': 'STRING', 'name': 'Link Text'},
          {'type': 'URL_LIST', 'name': 'Link Final URLs'},
          {'type': 'STRING', 'name': 'Line 1 Description'},
          {'type': 'STRING', 'name': 'Line 2 Description'}
      ]
  }

  response = feed_service.mutate([
      {'operator': 'ADD', 'operand': site_links_feed}
  ])

  if 'value' in response:
    feed = response['value'][0]
    link_text_feed_attribute_id = feed['attributes'][0]['id']
    final_url_feed_attribute_id = feed['attributes'][1]['id']
    line_1_feed_attribute_id = feed['attributes'][2]['id']
    line_2_feed_attribute_id = feed['attributes'][3]['id']
    print ('Feed with name \'%s\' and ID \'%s\' was added with' %
           (feed['name'], feed['id']))
    print ('\tText attribute ID \'%s\' and Final URL attribute ID \'%s\'.' %
           (link_text_feed_attribute_id, final_url_feed_attribute_id))
    print ('\tLine 1 attribute ID \'%s\' and Line 2 attribute ID \'%s\'.' %
           (line_1_feed_attribute_id, line_2_feed_attribute_id))
    sitelinks_data['feedId'] = feed['id']
    sitelinks_data['linkTextFeedId'] = link_text_feed_attribute_id
    sitelinks_data['finalUrlFeedId'] = final_url_feed_attribute_id
    sitelinks_data['line1FeedId'] = line_1_feed_attribute_id
    sitelinks_data['line2FeedId'] = line_2_feed_attribute_id
  else:
    raise errors.GoogleAdsError('No feeds were added.')

  # Create site links feed items.
  items_data = [
      {'text': 'Home', 'finalUrls': 'http://www.example.com',
       'line1': 'Home line 1', 'line2': 'Home line 2'},
      {'text': 'Stores', 'finalUrls': 'http://www.example.com/stores',
       'line1': 'Stores line 1', 'line2': 'Stores line 2'},
      {'text': 'On Sale', 'finalUrls': 'http://www.example.com/sale',
       'line1': 'On Sale line 1', 'line2': 'On Sale line 2'},
      {'text': 'Support', 'finalUrls': 'http://www.example.com/support',
       'line1': 'Support line 1', 'line2': 'Support line 2'},
      {'text': 'Products', 'finalUrls': 'http://www.example.com/products',
       'line1': 'Products line 1', 'line2': 'Products line 2'},
      {'text': 'About Us', 'finalUrls': 'http://www.example.com/about',
       'line1': 'About line 1', 'line2': 'About line 2', 'locationId': '21137'}
  ]

  feed_items = []
  for item in items_data:
    feed_item = {
        'feedId': sitelinks_data['feedId'],
        'attributeValues': [
            {
                'feedAttributeId': sitelinks_data['linkTextFeedId'],
                'stringValue': item['text']
            },
            {
                'feedAttributeId': sitelinks_data['finalUrlFeedId'],
                'stringValues': [item['finalUrls']]
            },
            {
                'feedAttributeId': sitelinks_data['line1FeedId'],
                'stringValue': item['line1']
            },
            {
                'feedAttributeId': sitelinks_data['line2FeedId'],
                'stringValue': item['line2']
            }
        ],
        # Optional: use the 'startTime' and 'endTime' keys to specify the time
        # period for the feed to deliver.  The example below will make the feed
        # start now and stop in one month.
        # Make sure you specify the datetime in the customer's time zone. You
        # can retrieve this from customer['dateTimeZone'].
        #
        # ['startTime']: datetime.datetime.now().strftime('%Y%m%d %H%M%S')
        # ['endTime']: (datetime.datetime.now() +
        #               relativedelta(months=1)).strftime('%Y%m%d %H%M%S')
    }

    # Use geographical targeting on a feed.
    # The IDs can be found in the documentation or retrieved with the
    # LocationCriterionService.
    if 'locationId' in item:
      feed_item['geoTargeting'] = {
          'id': item['locationId']
      }
      feed_item['geoTargetingRestriction'] = {
          'geoRestriction': 'LOCATION_OF_PRESENCE'
      }

    feed_items.append(feed_item)

  feed_items_operations = [{'operator': 'ADD', 'operand': item} for item
                           in feed_items]

  response = feed_item_service.mutate(feed_items_operations)
  if 'value' in response:
    sitelinks_data['feedItemIds'] = []
    for feed_item in response['value']:
      print 'Feed item with ID %s was added.' % feed_item['feedItemId']
      sitelinks_data['feedItemIds'].append(feed_item['feedItemId'])
  else:
    raise errors.GoogleAdsError('No feed items were added.')

  # Create site links feed mapping.

  feed_mapping = {
      'placeholderType': PLACEHOLDER_SITELINKS,
      'feedId': sitelinks_data['feedId'],
      'attributeFieldMappings': [
          {
              'feedAttributeId': sitelinks_data['linkTextFeedId'],
              'fieldId': PLACEHOLDER_FIELD_SITELINK_LINK_TEXT
          },
          {
              'feedAttributeId': sitelinks_data['finalUrlFeedId'],
              'fieldId': PLACEHOLDER_FIELD_SITELINK_FINAL_URLS
          },
          {
              'feedAttributeId': sitelinks_data['line1FeedId'],
              'fieldId': PLACEHOLDER_FIELD_LINE_1_TEXT
          },
          {
              'feedAttributeId': sitelinks_data['line2FeedId'],
              'fieldId': PLACEHOLDER_FIELD_LINE_2_TEXT
          }
      ]
  }

  response = feed_mapping_service.mutate([
      {'operator': 'ADD', 'operand': feed_mapping}
  ])
  if 'value' in response:
    feed_mapping = response['value'][0]
    print ('Feed mapping with ID %s and placeholder type %s was saved for feed'
           ' with ID %s.' %
           (feed_mapping['feedMappingId'], feed_mapping['placeholderType'],
            feed_mapping['feedId']))
  else:
    raise errors.GoogleAdsError('No feed mappings were added.')

  # Construct a matching function that associates the sitelink feeditems to the
  # campaign, and set the device preference to Mobile. For more details, see the
  # matching function guide:
  # https://developers.google.com/adwords/api/docs/guides/feed-matching-functions
  matching_function_string = (
      'AND(IN(FEED_ITEM_ID, {%s}), EQUALS(CONTEXT.DEVICE, \'Mobile\'))' %
      re.sub(r'\[|\]|L', '', str(sitelinks_data['feedItemIds'])))

  campaign_feed = {
      'feedId': sitelinks_data['feedId'],
      'campaignId': campaign_id,
      'matchingFunction': {'functionString': matching_function_string},
      # Specifying placeholder types on the CampaignFeed allows the same feed
      # to be used for different placeholders in different Campaigns.
      'placeholderTypes': [PLACEHOLDER_SITELINKS]
  }

  response = campaign_feed_service.mutate([
      {'operator': 'ADD', 'operand': campaign_feed}
  ])
  if 'value' in response:
    campaign_feed = response['value'][0]
    print ('Campaign with ID %s was associated with feed with ID %s.' %
           (campaign_feed['campaignId'], campaign_feed['feedId']))
  else:
    raise errors.GoogleAdsError('No campaign feeds were added.')

Example 132

Project: python-xbrl
Source File: xbrl.py
View license
    @classmethod
    def parseGAAP(self,
                  xbrl,
                  doc_date="",
                  context="current",
                  ignore_errors=0):
        """
        Parse GAAP from our XBRL soup and return a GAAP object.
        """
        gaap_obj = GAAP()

        if ignore_errors == 2:
            logging.basicConfig(filename='/tmp/xbrl.log',
                level=logging.ERROR,
                format='%(asctime)s %(levelname)s %(name)s %(message)s')
            logger = logging.getLogger(__name__)
        else:
            logger = None

        # the default is today
        if doc_date == "":
            doc_date = str(datetime.date.today())
        doc_date = re.sub(r"[^0-9]+", "", doc_date)

        # current is the previous quarter
        if context == "current":
            context = 90

        if context == "year":
            context = 360

        context = int(context)

        if context % 90 == 0:
            context_extended = list(range(context, context + 9))
            expected_start_date = \
                datetime.datetime.strptime(doc_date, "%Y%m%d") \
                - datetime.timedelta(days=context)
        elif context == "instant":
            expected_start_date = None
        else:
            raise XBRLParserException('invalid context')

        # we need expected end date unless instant
        if context != "instant":
            expected_end_date = \
                datetime.datetime.strptime(doc_date, "%Y%m%d")

        doc_root = ""

        # we might need to attach the document root
        if len(self.xbrl_base) > 1:
            doc_root = self.xbrl_base

        # collect all contexts up that are relevant to us
        # TODO - Maybe move this to Preprocessing Ingestion
        context_ids = []
        context_tags = xbrl.find_all(name=re.compile(doc_root + "context",
                                     re.IGNORECASE | re.MULTILINE))

        try:
            for context_tag in context_tags:
                # we don't want any segments
                if context_tag.find(doc_root + "entity") is None:
                    continue
                if context_tag.find(doc_root + "entity").find(
                doc_root + "segment") is None:
                    context_id = context_tag.attrs['id']

                    found_start_date = None
                    found_end_date = None

                    if context_tag.find(doc_root + "instant"):
                        instant = \
                            datetime.datetime.strptime(re.compile('[^\d]+')
                                                       .sub('', context_tag
                                                       .find(doc_root +
                                                             "instant")
                                                        .text)[:8], "%Y%m%d")
                        if instant == expected_end_date:
                            context_ids.append(context_id)
                            continue

                    if context_tag.find(doc_root + "period").find(
                    doc_root + "startdate"):
                        found_start_date = \
                            datetime.datetime.strptime(re.compile('[^\d]+')
                                                       .sub('', context_tag
                                                       .find(doc_root +
                                                             "period")
                                                       .find(doc_root +
                                                             "startdate")
                                                        .text)[:8], "%Y%m%d")
                    if context_tag.find(doc_root + "period").find(doc_root +
                    "enddate"):
                        found_end_date = \
                            datetime.datetime.strptime(re.compile('[^\d]+')
                                                       .sub('', context_tag
                                                       .find(doc_root +
                                                             "period")
                                                       .find(doc_root +
                                                             "enddate")
                                                       .text)[:8], "%Y%m%d")
                    if found_end_date and found_start_date:
                        for ce in context_extended:
                            if found_end_date - found_start_date == \
                            datetime.timedelta(days=ce):
                                if found_end_date == expected_end_date:
                                    context_ids.append(context_id)
        except IndexError:
            raise XBRLParserException('problem getting contexts')

        assets = xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(assets)",
                               re.IGNORECASE | re.MULTILINE))
        gaap_obj.assets = self.data_processing(assets, xbrl,
            ignore_errors, logger, context_ids)

        current_assets = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(currentassets)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.current_assets = self.data_processing(current_assets,
            xbrl, ignore_errors, logger, context_ids)

        non_current_assets = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(assetsnoncurrent)",
                          re.IGNORECASE | re.MULTILINE))
        if non_current_assets == 0 or not non_current_assets:
            gaap_obj.non_current_assets = gaap_obj.current_assets \
                - gaap_obj.assets
        else:
            gaap_obj.non_current_assets = \
                self.data_processing(non_current_assets, xbrl,
                    ignore_errors, logger, context_ids)

        liabilities_and_equity = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(liabilitiesand)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.liabilities_and_equity = \
            self.data_processing(liabilities_and_equity, xbrl,
                ignore_errors, logger, context_ids)

        liabilities = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(liabilities)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.liabilities = \
            self.data_processing(liabilities, xbrl, ignore_errors,
                logger, context_ids)

        current_liabilities = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]\
                          *(currentliabilities)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.current_liabilities = \
            self.data_processing(current_liabilities, xbrl,
                ignore_errors, logger, context_ids)

        noncurrent_liabilities = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]\
                          *(noncurrentliabilities)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.noncurrent_liabilities = \
            self.data_processing(noncurrent_liabilities, xbrl,
                ignore_errors, logger, context_ids)

        commitments_and_contingencies = \
            xbrl.find_all(name=re.compile("(us-gaap:commitments\
                          andcontingencies)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.commitments_and_contingencies = \
            self.data_processing(commitments_and_contingencies, xbrl,
                ignore_errors, logger, context_ids)

        redeemable_noncontrolling_interest = \
            xbrl.find_all(name=re.compile("(us-gaap:redeemablenoncontrolling\
                          interestequity)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.redeemable_noncontrolling_interest = \
            self.data_processing(redeemable_noncontrolling_interest,
                xbrl, ignore_errors, logger, context_ids)

        temporary_equity = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(temporaryequity)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.temporary_equity = \
            self.data_processing(temporary_equity, xbrl, ignore_errors,
                logger, context_ids)

        equity = xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(equity)",
                               re.IGNORECASE | re.MULTILINE))
        gaap_obj.equity = self.data_processing(equity, xbrl, ignore_errors,
            logger, context_ids)

        equity_attributable_interest = \
            xbrl.find_all(name=re.compile("(us-gaap:minorityinterest)",
                          re.IGNORECASE | re.MULTILINE))
        equity_attributable_interest += \
            xbrl.find_all(name=re.compile("(us-gaap:partnerscapitalattributable\
                          tononcontrollinginterest)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.equity_attributable_interest = \
            self.data_processing(equity_attributable_interest, xbrl,
                ignore_errors, logger, context_ids)

        equity_attributable_parent = \
            xbrl.find_all(name=re.compile("(us-gaap:liabilitiesandpartners\
                          capital)",
                          re.IGNORECASE | re.MULTILINE))
        stockholders_equity = \
            xbrl.find_all(name=re.compile("(us-gaap:stockholdersequity)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.equity_attributable_parent = \
            self.data_processing(equity_attributable_parent, xbrl,
                ignore_errors, logger, context_ids)
        gaap_obj.stockholders_equity = \
            self.data_processing(stockholders_equity, xbrl, ignore_errors,
                logger, context_ids)

        # Incomes #
        revenues = xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(revenue)",
                                 re.IGNORECASE | re.MULTILINE))
        gaap_obj.revenues = self.data_processing(revenues, xbrl,
            ignore_errors, logger, context_ids)

        cost_of_revenue = \
            xbrl.find_all(name=re.compile("(us-gaap:costofrevenue)",
                          re.IGNORECASE | re.MULTILINE))
        cost_of_revenue += \
            xbrl.find_all(name=re.compile("(us-gaap:costffservices)",
                          re.IGNORECASE | re.MULTILINE))
        cost_of_revenue += \
            xbrl.find_all(name=re.compile("(us-gaap:costofgoodssold)",
                          re.IGNORECASE | re.MULTILINE))
        cost_of_revenue += \
            xbrl.find_all(name=re.compile("(us-gaap:costofgoodsand\
                          servicessold)",
                          re.IGNORECASE | re.MULTILINE))

        gross_profit = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(grossprofit)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.gross_profit = \
            self.data_processing(gross_profit, xbrl, ignore_errors,
                                 logger, context_ids)

        operating_expenses = \
            xbrl.find_all(name=re.compile("(us-gaap:operating)[^s]*(expenses)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.operating_expenses = \
            self.data_processing(operating_expenses, xbrl, ignore_errors,
                                 logger, context_ids)

        costs_and_expenses = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(costsandexpenses)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.costs_and_expenses = \
            self.data_processing(costs_and_expenses, xbrl, ignore_errors,
                                 logger, context_ids)

        other_operating_income = \
            xbrl.find_all(name=re.compile("(us-gaap:otheroperatingincome)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.other_operating_income = \
            self.data_processing(other_operating_income, xbrl, ignore_errors,
                                 logger, context_ids)

        operating_income_loss = \
            xbrl.find_all(name=re.compile("(us-gaap:otheroperatingincome)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.operating_income_loss = \
            self.data_processing(operating_income_loss, xbrl, ignore_errors,
                                 logger, context_ids)

        nonoperating_income_loss = \
            xbrl.find_all(name=re.compile("(us-gaap:nonoperatingincomeloss)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.nonoperating_income_loss = \
            self.data_processing(nonoperating_income_loss, xbrl,
                                 ignore_errors, logger, context_ids)

        interest_and_debt_expense = \
            xbrl.find_all(name=re.compile("(us-gaap:interestanddebtexpense)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.interest_and_debt_expense = \
            self.data_processing(interest_and_debt_expense, xbrl,
                                 ignore_errors, logger, context_ids)

        income_before_equity_investments = \
            xbrl.find_all(name=re.compile("(us-gaap:incomelossfromcontinuing"
                                          "operationsbeforeincometaxes"
                                          "minorityinterest)",
                          re.IGNORECASE  | re.MULTILINE))
        gaap_obj.income_before_equity_investments = \
            self.data_processing(income_before_equity_investments, xbrl,
                                 ignore_errors, logger, context_ids)

        income_from_equity_investments = \
            xbrl.find_all(name=re.compile("(us-gaap:incomelossfromequity"
                          "methodinvestments)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.income_from_equity_investments = \
            self.data_processing(income_from_equity_investments, xbrl,
                                 ignore_errors, logger, context_ids)

        income_tax_expense_benefit = \
            xbrl.find_all(name=re.compile("(us-gaap:incometaxexpensebenefit)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.income_tax_expense_benefit = \
            self.data_processing(income_tax_expense_benefit, xbrl,
                                 ignore_errors, logger, context_ids)

        income_continuing_operations_tax = \
            xbrl.find_all(name=re.compile("(us-gaap:IncomeLossBeforeExtraordinaryItems\
                          AndCumulativeEffectOfChangeInAccountingPrinciple)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.income_continuing_operations_tax = \
            self.data_processing(income_continuing_operations_tax, xbrl,
                                 ignore_errors, logger, context_ids)

        income_discontinued_operations = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(discontinued"
                          "operation)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.income_discontinued_operations = \
            self.data_processing(income_discontinued_operations, xbrl,
                                 ignore_errors, logger, context_ids)

        extraordary_items_gain_loss = \
            xbrl.find_all(name=re.compile("(us-gaap:extraordinaryitem"
                          "netoftax)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.extraordary_items_gain_loss = \
            self.data_processing(extraordary_items_gain_loss, xbrl,
                                 ignore_errors, logger, context_ids)

        income_loss = \
            xbrl.find_all(name=re.compile("(us-gaap:)[^s]*(incomeloss)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.income_loss = \
            self.data_processing(income_loss, xbrl, ignore_errors,
                logger, context_ids)
        income_loss += xbrl.find_all(name=re.compile("(us-gaap:profitloss)",
                                     re.IGNORECASE | re.MULTILINE))
        gaap_obj.income_loss = \
            self.data_processing(income_loss, xbrl, ignore_errors,
                                 logger, context_ids)

        net_income_shareholders = \
            xbrl.find_all(name=re.compile("(us-gaap:netincomeavailabletocommon\
                          stockholdersbasic)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_income_shareholders = \
            self.data_processing(net_income_shareholders, xbrl, ignore_errors,
                                 logger, context_ids)

        preferred_stock_dividends = \
            xbrl.find_all(name=re.compile("(us-gaap:preferredstockdividendsand\
                          otheradjustments)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.preferred_stock_dividends = \
            self.data_processing(preferred_stock_dividends, xbrl,
                ignore_errors, logger, context_ids)

        net_income_loss_noncontrolling = \
            xbrl.find_all(name=re.compile("(us-gaap:netincomelossattributableto\
                          noncontrollinginterest)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_income_loss_noncontrolling = \
            self.data_processing(net_income_loss_noncontrolling, xbrl,
                                 ignore_errors, logger, context_ids)

        net_income_loss = \
            xbrl.find_all(name=re.compile("^us-gaap:netincomeloss$",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_income_loss = \
            self.data_processing(net_income_loss, xbrl, ignore_errors,
                                 logger, context_ids)

        other_comprehensive_income = \
            xbrl.find_all(name=re.compile("(us-gaap:othercomprehensiveincomeloss\
                          netoftax)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.other_comprehensive_income = \
            self.data_processing(other_comprehensive_income, xbrl,
                ignore_errors, logger, context_ids)

        comprehensive_income = \
            xbrl.find_all(name=re.compile("(us-gaap:comprehensiveincome)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.comprehensive_income = \
            self.data_processing(comprehensive_income, xbrl, ignore_errors,
                                 logger, context_ids)

        comprehensive_income_parent = \
            xbrl.find_all(name=re.compile("(us-gaap:comprehensiveincomenetof"
                          "tax)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.comprehensive_income_parent = \
            self.data_processing(comprehensive_income_parent, xbrl,
                                 ignore_errors, logger, context_ids)

        comprehensive_income_interest = \
            xbrl.find_all(name=re.compile("(us-gaap:comprehensiveincomenetoftax\
                          attributabletononcontrollinginterest)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.comprehensive_income_interest = \
            self.data_processing(comprehensive_income_interest, xbrl,
                                 ignore_errors, logger, context_ids)

        # Cash flow statements #
        net_cash_flows_operating = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          operatingactivities)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_operating = \
            self.data_processing(net_cash_flows_operating, xbrl, ignore_errors,
                                 logger, context_ids)

        net_cash_flows_investing = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          investingactivities)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_investing = \
            self.data_processing(net_cash_flows_investing, xbrl, ignore_errors,
                                logger, context_ids)

        net_cash_flows_financing = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          financingactivities)", re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_financing = \
            self.data_processing(net_cash_flows_financing, xbrl, ignore_errors,
                                logger, context_ids)

        net_cash_flows_operating_continuing = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          operatingactivitiescontinuingoperations)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_operating_continuing = \
            self.data_processing(net_cash_flows_operating_continuing, xbrl,
                                 ignore_errors, logger, context_ids)

        net_cash_flows_investing_continuing = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          investingactivitiescontinuingoperations)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_investing_continuing = \
            self.data_processing(net_cash_flows_investing_continuing, xbrl,
                                 ignore_errors, logger, context_ids)

        net_cash_flows_financing_continuing = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          financingactivitiescontinuingoperations)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_financing_continuing = \
            self.data_processing(net_cash_flows_financing_continuing, xbrl,
                                 ignore_errors, logger, context_ids)

        net_cash_flows_operating_discontinued = \
            xbrl.find_all(name=re.compile("(us-gaap:cashprovidedbyusedin\
                          operatingactivitiesdiscontinuedoperations)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_operating_discontinued = \
            self.data_processing(net_cash_flows_operating_discontinued, xbrl,
                                 ignore_errors, logger, context_ids)

        net_cash_flows_investing_discontinued = \
            xbrl.find_all(name=re.compile("(us-gaap:cashprovidedbyusedin\
                          investingactivitiesdiscontinuedoperations)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_investing_discontinued = \
            self.data_processing(net_cash_flows_investing_discontinued, xbrl,
                                 ignore_errors, logger, context_ids)

        net_cash_flows_discontinued = \
            xbrl.find_all(name=re.compile("(us-gaap:netcashprovidedbyusedin\
                          discontinuedoperations)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.net_cash_flows_discontinued = \
            self.data_processing(net_cash_flows_discontinued, xbrl,
                                 ignore_errors, logger, context_ids)

        common_shares_outstanding = \
            xbrl.find_all(name=re.compile("(us-gaap:commonstockshares\
                          outstanding)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.common_shares_outstanding = \
            self.data_processing(common_shares_outstanding, xbrl,
                                 ignore_errors, logger, context_ids)

        common_shares_issued = \
            xbrl.find_all(name=re.compile("(us-gaap:commonstockshares\
                          issued)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.common_shares_issued = \
            self.data_processing(common_shares_issued, xbrl,
                                 ignore_errors, logger, context_ids)

        common_shares_authorized = \
            xbrl.find_all(name=re.compile("(us-gaap:commonstockshares\
                          authorized)",
                          re.IGNORECASE | re.MULTILINE))
        gaap_obj.common_shares_authorized = \
            self.data_processing(common_shares_authorized, xbrl,
                                 ignore_errors, logger, context_ids)

        return gaap_obj

Example 133

Project: launchpad
Source File: views.py
View license
def search(request):
    params = request.GET
    q = params.get('q', '')
    page = params.get('page', 1)
    fmt = params.get('format', 'html')
    raw = params.get('raw', False)
    online = params.get('online', False)
    page_size = params.get('page_size', 20)

    try:
        page = int(page)
        page_size = int(page_size)
    except ValueError:
        raise Http404

    # summon can't return results for pages > 50 with page size of 20
    max_pages = 50
    if page > max_pages:
        raise Http404

    api = summon.Summon(settings.SUMMON_ID, settings.SUMMON_SECRET_KEY)
    kwargs = {
        "hl": False,
        "pn": page,
        "ps": page_size,
        "fq": ['SourceType:("Library Catalog")'],
        "ff": [
            'ContentType,or',
            'Author,or',
            'Language,or',
            'Genre,or',
            'Institution,or',
            'Discipline,or',
            'SubjectTerms,or',
            'TemporalSubjectTerms,or',
            'GeographicLocations,or',
        ],
        "ho": "t",
        "light": "t",
        "raw": raw,
    }

    q, kwargs = _filter_by_pubdate(q, kwargs)

    # add to the query if they want online resources only
    if online:
        if q:
            q += " AND"
        q += " lccallnum:('gw electronic' OR 'shared+electronic'" + \
            " OR 'e-resources' OR 'e-govpub' OR 'streaming')"

    # add selected facets to the query
    for facet in params.getlist('facet'):
        if ':' not in facet:
            continue
        facet_field, facet_value = facet.split(':', 1)

        if 'fvf' not in kwargs:
            kwargs['fvf'] = []

        # TODO: maybe summoner should do these escapes somehow?
        def escape(m):
            return '\\' + m.group(1)
        facet_value = re.sub('([,:\()${}])', escape, facet_value)

        kwargs['fvf'].append('%s,%s,false' % (facet_field, facet_value))

        # add Library facet field if user is faceting by Institution
        if facet_field == 'Institution' and 'Library,or' not in kwargs['ff']:
            kwargs['ff'].append('Library,or')

    if fmt == "html":
        kwargs['for_template'] = True

    # catch and retry up to MAX_ATTEMPTS times if we get errors
    # from the Summon API?
    retries = 0
    while True:
        try:
            search_results = api.search(q, **kwargs)
            break
        except requests.HTTPError as error:
            retries += 1
            if retries <= settings.SER_SOL_API_MAX_ATTEMPTS:
                time.sleep(1)
                continue
            else:
                logger.exception('unable to search Summon (3 tries): %s' %
                                 error)
                return error500(request)

    if not raw:
        search_results = _remove_facets(search_results)
        search_results = _reorder_facets(search_results)
        search_results = _remove_active_facets(request, search_results)
        search_results = _format_facets(request, search_results)

    # json-ld
    if fmt == "json":
        return HttpResponse(
            json.dumps(search_results, indent=2),
            content_type='application/json'
        )

    # raw summon results
    elif raw:
        if settings.DEBUG:
            return HttpResponse(
                json.dumps(search_results, indent=2),
                content_type='application/json'
            )
        else:
            # don't serve as a Summon proxy in production!
            return HttpResponse('Unauthorized', status=401)

    # default to a regular html web page
    else:
        # TODO: pull out page link generation so it can be tested
        # how many pagination links to display
        page_links = 5

        # the first page number in the pagination links
        page_range_start = ((page - 1) / page_links) * page_links + 1

        # the last page number + 1 in the pagination links
        # this is used in a range() below so it is offset by 1
        page_range_end = page_range_start + page_links

        # don't display page links that we can't get
        actual_pages = search_results['totalResults'] / page_size + 1
        if actual_pages <= page_range_end:
            page_range_end = actual_pages + 1

        # build page links as a list of tuples (page number, url)
        page_range = []
        page_query = request.GET.copy()
        for n in range(page_range_start, page_range_end):
            page_query['page'] = n
            page_range.append((n, page_query.urlencode()))

        # create links for going to the next set of page results
        next_page_range = prev_page_range = None
        if page_range_end - 1 < actual_pages \
                and page_range_end - 1 < max_pages:
            page_query['page'] = page_range_end
            next_page_range = page_query.urlencode()
        if page_range_start > page_links:
            page_query['page'] = page_range_start - 1
            prev_page_range = page_query.urlencode()

        return render(request, 'search.html', {
            "q": params.get('q'),
            "original_facets": params.getlist('facet'),
            "active_facets": _get_active_facets(request),
            "page": page,
            "page_range": page_range,
            "next_page_range": next_page_range,
            "prev_page_range": prev_page_range,
            "search_results": search_results,
            "debug": settings.DEBUG,
            "online": online,
            "json_url": request.get_full_path() + "&format=json",
            "raw_url": request.get_full_path() + "&raw=true",
            "max_subjects": settings.MAX_SUBJECTS,
        })

Example 134

Project: sonospy
Source File: music_items.py
View license
    def unwrap_metadata(self, events_avt):

        self.music_item_class = ''
        self.music_item_type = ''
        self.music_item_title = ''
        self.music_item_duration = ''
        self.music_item_album = ''
        self.music_item_artist = ''
        self.music_item_albumartURI = ''
        self.music_item_station = ''
        self.music_item_station_url = ''
        self.music_item_onnow = ''
        self.music_item_info = ''
        details = ''
        extras = ''
        audiotype = ''

#        print "events_avt: " + str(events_avt)
        
        if events_avt == {}:
            return ('', '', {}, {}, '')

        metadata = events_avt['CurrentTrackMetaData']

        if metadata != None and metadata != '' and metadata != 'NOT_IMPLEMENTED':
        
            if isinstance(metadata, didl_lite.SonosMusicTrackShow):
                audiotype = 'SonosMusicTrackShow'
            elif isinstance(metadata, didl_lite.SonosMusicTrack):
                audiotype = 'SonosMusicTrack'
            elif isinstance(metadata, didl_lite.MusicTrack):
                audiotype = 'MusicTrack'
#            elif isinstance(metadata, didl_lite.SonosItem):    # these don't contain all the info - fall through to EnqueuedTransportURIMetaData
#                audiotype = 'SonosItem'
# TODO: work out what all the combinations are to get all info
            else:
                metadata = events_avt['{' + ns['rcnw'] + '}EnqueuedTransportURIMetaData']
                if metadata != None and metadata != '' and metadata != 'NOT_IMPLEMENTED':
                    if isinstance(metadata, didl_lite.SonosAudioBroadcast):
                        audiotype = 'SonosAudioBroadcast'
                    elif isinstance(metadata, didl_lite.AudioBroadcast):
                        audiotype = 'AudioBroadcast'
                    elif isinstance(metadata, didl_lite.SonosItem):
                        audiotype = 'SonosMusicTrack'
                else:
                    return ('', '', {}, {}, '')
        else:
            return ('', '', {}, {}, '')

        detailsdict = {}
        extrasdict = {}

        if audiotype == 'SonosMusicTrack':

#            print events_avt['CurrentTrackMetaData']
#            print events_avt['CurrentTrackMetaData'].to_string()

            self.music_item_class = 'SonosMusicTrack'
            self.music_item_type = 'Track'

#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print 'class: ' + self.music_item_class
#            print 'title: ' + metadata.title
#            print 'album: ' + metadata.album
#            print 'creator: ' + metadata.creator
#            print 'items: ' + metadata.to_string()
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

            self.music_item_title = metadata.title
            self.music_item_duration = metadata.resources[0].duration    # TODO: check whether there can be more than one set of res
            self.music_item_artist = metadata.creator
            self.music_item_album = metadata.album
            self.music_item_albumartURI = metadata.albumArtURI

            '''
            if self.music_item_title != '':
                details += 'TRACK: ' + self.music_item_title + '\n'
                detailsdict['TRACK'] = self.music_item_title
            if self.music_item_duration != '':
                details += 'DURATION: ' + self.music_item_duration + '\n'
                detailsdict['DURATION'] = self.music_item_duration
            if self.music_item_artist != '':
                details += 'ARTIST: ' + self.music_item_artist + '\n'
                detailsdict['ARTIST'] = self.music_item_artist
            if self.music_item_album != '':
                details += 'ALBUM: ' + self.music_item_album + '\n'
                detailsdict['ALBUM'] = self.music_item_album
            if self.music_item_class != '':
                extras += 'CLASS: ' + self.music_item_class + '\n'
                extrasdict['CLASS'] = self.music_item_class
            if self.music_item_type != '':
                extras += 'TYPE: ' + self.music_item_type + '\n'
                extrasdict['TYPE'] = self.music_item_type
            '''
            details += 'TRACK: ' + self.music_item_title + '\n'
            detailsdict['TRACK'] = self.music_item_title
            details += 'DURATION: ' + self.music_item_duration + '\n'
            detailsdict['DURATION'] = self.music_item_duration
            details += 'ARTIST: ' + self.music_item_artist + '\n'
            detailsdict['ARTIST'] = self.music_item_artist
            details += 'ALBUM: ' + self.music_item_album + '\n'
            detailsdict['ALBUM'] = self.music_item_album
            extras += 'CLASS: ' + self.music_item_class + '\n'
            extrasdict['CLASS'] = self.music_item_class
            extras += 'TYPE: ' + self.music_item_type + '\n'
            extrasdict['TYPE'] = self.music_item_type

        elif audiotype == 'SonosMusicTrackShow':

#            print events_avt['CurrentTrackMetaData']
#            print events_avt['CurrentTrackMetaData'].to_string()

            self.music_item_class = 'SonosMusicTrackShow'
            self.music_item_type = 'Track'

#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print 'class: ' + self.music_item_class
#            print 'title: ' + metadata.title
#            print 'album: ' + metadata.album
#            print 'creator: ' + metadata.creator
#            print 'items: ' + metadata.to_string()
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

            self.music_item_title = metadata.title
            self.music_item_duration = metadata.resources[0].duration    # TODO: check whether there can be more than one set of res
            self.music_item_artist = metadata.creator
            self.music_item_album = metadata.album
            self.music_item_albumartURI = metadata.albumArtURI

            details += 'TRACK: ' + self.music_item_title + '\n'
            detailsdict['TRACK'] = self.music_item_title
            details += 'DURATION: ' + self.music_item_duration + '\n'
            detailsdict['DURATION'] = self.music_item_duration
            details += 'ARTIST: ' + self.music_item_artist + '\n'
            detailsdict['ARTIST'] = self.music_item_artist
            details += 'SHOW: ' + self.music_item_album + '\n'
            detailsdict['SHOW'] = self.music_item_album
            extras += 'CLASS: ' + self.music_item_class + '\n'
            extrasdict['CLASS'] = self.music_item_class
            extras += 'TYPE: ' + self.music_item_type + '\n'
            extrasdict['TYPE'] = self.music_item_type

        elif audiotype == 'MusicTrack':

            self.music_item_class = 'MusicTrack'
            self.music_item_type = 'Track'

#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print 'class: ' + self.music_item_class
#            print 'title: ' + metadata.title
#            print 'albums: ' + str(metadata.albums)
#            print 'creator: ' + metadata.creator
#            print 'artists: ' + str(metadata.artists)
#            print 'items: ' + metadata.to_string()
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
#            print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

            self.music_item_title = metadata.title
            self.music_item_duration = metadata.resources[0].duration    # TODO: check whether there can be more than one set of res
            self.music_item_artist = metadata.creator
            # TODO: AlbumArt

            details += 'TRACK: ' + self.music_item_title + '\n'
            detailsdict['TRACK'] = self.music_item_title
            details += 'DURATION: ' + self.music_item_duration + '\n'
            detailsdict['DURATION'] = self.music_item_duration
            details += 'ARTIST: ' + self.music_item_artist + '\n'
            detailsdict['ARTIST'] = self.music_item_artist
            details += 'ALBUM: ' + self.music_item_album + '\n'
            detailsdict['ALBUM'] = self.music_item_album
            extras += 'CLASS: ' + self.music_item_class + '\n'
            extrasdict['CLASS'] = self.music_item_class
            extras += 'TYPE: ' + self.music_item_type + '\n'
            extrasdict['TYPE'] = self.music_item_type

        elif audiotype == 'SonosAudioBroadcast':

            '''
            Kerrang
		    <CurrentTrackMetaData 
		    <res protocolInfo="sonos.com-http-get:*:*:*">x-sonosapi-stream:s45579?sid=254</res>
		    <r:streamContent></r:streamContent>
		    <r:radioShowMd></r:radioShowMd>
		    <upnp:albumArtURI>/getaa?s=1&u=x-sonosapi-stream%3as45579%3fsid%3d254</upnp:albumArtURI>
		    <dc:title>x-sonosapi-stream:s45579?sid=254</dc:title>
		    <upnp:class>object.item</upnp:class>

		    <r:NextTrackMetaData 
		    <res protocolInfo="rtsp:*:*:*">rtsp://earth.radica.com/kerrang-70</res>
		    <dc:title>kerrang-70</dc:title>
		    <upnp:class>object.item</upnp:class>

		    <r:EnqueuedTransportURIMetaData 
		    <dc:title>Kerrang! 105.2 (Birmingham, United Kingdom)</dc:title>
		    <upnp:class>object.item.audioItem.audioBroadcast</upnp:class>
		    <desc id="cdudn" nameSpace="urn:schemas-rinconnetworks-com:metadata-1-0/">SA_RINCON65031_</desc>
		
		    <AVTransportURIMetaData 
		    <dc:title>Kerrang! 105.2 (Birmingham, United Kingdom)</dc:title>
		    <upnp:class>object.item.audioItem.audioBroadcast</upnp:class>
		    <desc id="cdudn" nameSpace="urn:schemas-rinconnetworks-com:metadata-1-0/">SA_RINCON65031_</desc>

            BBC R1
		    <CurrentTrackMetaData
		    <item id="-1" parentID="-1" restricted="true">
            <res protocolInfo="sonos.com-http-get:*:*:*">x-sonosapi-stream:s24939?sid=254</res>
            <r:streamContent></r:streamContent>
            <r:radioShowMd>Nihal (BBC R1),p180224</r:radioShowMd>
            <upnp:albumArtURI>/getaa?s=1&u=x-sonosapi-stream%3as24939%3fsid%3d254</upnp:albumArtURI>
            <dc:title>x-sonosapi-stream:s24939?sid=254</dc:title>
            <upnp:class>object.item</upnp:class>
            </item>

		    <r:NextTrackMetaData
		    <item id="-1" parentID="-1" restricted="true">
            <res protocolInfo="rtsp:*:*:*">rtsp://wmlive-acl.bbc.co.uk/wms/bbc_ami/radio1/radio1_bb_live_eq1_sl0?BBC-UID=24da270589f6dc6a6400ecc2e1d845620833f3aa10b01134c4ff7436034ac97a&SSO2-UID=</res>
            <dc:title>radio1_bb_live_eq1_sl0?BBC-UID=24da270589f6dc6a6400ecc2e1d845620833f3aa10b01134c4ff7436034ac97a&SSO2-UID=</dc:title>
            <upnp:class>object.item</upnp:class>
            </item>

		    <r:EnqueuedTransportURIMetaData
		    <item id="-1" parentID="-1" restricted="true">
            <dc:title>BBC Radio 1 (London, United Kingdom)</dc:title>
            <upnp:class>object.item.audioItem.audioBroadcast</upnp:class>
            <desc id="cdudn" nameSpace="urn:schemas-rinconnetworks-com:metadata-1-0/">SA_RINCON65031_</desc>
            </item>

		    <AVTransportURIMetaData
		    <item id="-1" parentID="-1" restricted="true">
            <dc:title>BBC Radio 1 (London, United Kingdom)</dc:title>
            <upnp:class>object.item.audioItem.audioBroadcast</upnp:class>
            <desc id="cdudn" nameSpace="urn:schemas-rinconnetworks-com:metadata-1-0/">SA_RINCON65031_</desc>
            </item>
            '''

            # for Radio from Sonos, some info is in EnqueuedTransportURIMetaData, some is in CurrentTrackMetaData

            self.music_item_class = 'SonosAudioBroadcast'
            self.music_item_type = 'Radio'
            self.music_item_station = metadata.title

            CTmetadata = events_avt['CurrentTrackMetaData']

            self.music_item_onnow = CTmetadata.radioShowMd
            if self.music_item_onnow == None:
                self.music_item_onnow = ''
            if self.music_item_onnow != '':
                # title is followed by a comma and more info - but title can contain \,
                # e.g. 'Mickey\, Amelia and Spiegel,DATA'
                onnow = CTmetadata.radioShowMd
                onnow = re.sub(r'\\,', '\&comma;', onnow)
                onnow = re.split(',', onnow)[0]
                onnow = re.sub(r'\\&comma;', ',', onnow)
                self.music_item_onnow = onnow
            self.music_item_albumartURI = CTmetadata.albumArtURI
            self.music_item_info = CTmetadata.streamContent
            if self.music_item_info == None:
                self.music_item_info = ''

            details += 'STATION: ' + self.music_item_station + '\n'
            detailsdict['STATION'] = self.music_item_station
            details += 'ONNOW: ' + self.music_item_onnow + '\n'
            detailsdict['ONNOW'] = self.music_item_onnow
            details += 'INFO: ' + self.music_item_info + '\n'
            detailsdict['INFO'] = self.music_item_info
            extras += 'CLASS: ' + self.music_item_class + '\n'
            extrasdict['CLASS'] = self.music_item_class
            extras += 'TYPE: ' + self.music_item_type + '\n'
            extrasdict['TYPE'] = self.music_item_type

        elif audiotype == 'AudioBroadcast':

            self.music_item_class = 'AudioBroadcast'
            self.music_item_type = 'Radio'
            self.music_item_station = metadata.title

            self.music_item_onnow = metadata.radio_call_sign
            if self.music_item_onnow == None:
                self.music_item_onnow = ''
            self.music_item_albumartURI = metadata.albumArtURI
            self.music_item_info = metadata.radio_station_id

            details += 'STATION: ' + self.music_item_station + '\n'
            detailsdict['STATION'] = self.music_item_station
            details += 'ONNOW: ' + self.music_item_onnow + '\n'
            detailsdict['ONNOW'] = self.music_item_onnow
            details += 'INFO: ' + self.music_item_info + '\n'
            detailsdict['INFO'] = self.music_item_info
            extras += 'CLASS: ' + self.music_item_class + '\n'
            extrasdict['CLASS'] = self.music_item_class
            extras += 'TYPE: ' + self.music_item_type + '\n'
            extrasdict['TYPE'] = self.music_item_type

        else:
            details = 'UNKNOWN MUSIC CLASS!'
            detailsdict['UNKNOWN'] = 'UNKNOWN MUSIC CLASS!'
            extrasdict['UNKNOWN'] = 'UNKNOWN MUSIC CLASS!'

        return (details, extras, detailsdict, extrasdict, self.music_item_albumartURI)

Example 135

Project: sonospy
Source File: main.py
View license
def wsgibase(environ, responder):
    """
    this is the gluon wsgi application. the first function called when a page
    is requested (static or dynamic). it can be called by paste.httpserver
    or by apache mod_wsgi.

      - fills request with info
      - the environment variables, replacing '.' with '_'
      - adds web2py path and version info
      - compensates for fcgi missing path_info and query_string
      - validates the path in url

    The url path must be either:

    1. for static pages:

      - /<application>/static/<file>

    2. for dynamic pages:

      - /<application>[/<controller>[/<function>[/<sub>]]][.<extension>]
      - (sub may go several levels deep, currently 3 levels are supported:
         sub1/sub2/sub3)

    The naming conventions are:

      - application, controller, function and extension may only contain
        [a-zA-Z0-9_]
      - file and sub may also contain '-', '=', '.' and '/'
    """

    if rewrite.params.routes_in:
        environ = rewrite.filter_in(environ)

    request = Request()
    response = Response()
    session = Session()
    try:
        try:

            # ##################################################
            # parse the environment variables
            # ##################################################

            for (key, value) in environ.items():
                request.env[key.lower().replace('.', '_')] = value
            request.env.web2py_path = web2py_path
            request.env.web2py_version = web2py_version
            request.env.update(settings)

            # ##################################################
            # validate the path in url
            # ##################################################

            if not request.env.path_info and request.env.request_uri:
                # for fcgi, decode path_info and query_string
                items = request.env.request_uri.split('?')
                request.env.path_info = items[0]
                if len(items) > 1:
                    request.env.query_string = items[1]
                else:
                    request.env.query_string = ''
            path = request.env.path_info.replace('\\', '/')

            # ##################################################
            # serve if a static file
            # ##################################################

            match = regex_static.match(regex_space.sub('_', path))
            if match and match.group('x'):
                static_file = os.path.join(request.env.web2py_path,
                                           'applications', match.group('b'),
                                           'static', match.group('x'))
                if request.env.get('query_string', '')[:10] == 'attachment':
                    response.headers['Content-Disposition'] = 'attachment'
                response.stream(static_file, request=request)

            # ##################################################
            # parse application, controller and function
            # ##################################################

            path = re.sub('%20', ' ', path)
            match = regex_url.match(path)
            if not match or match.group('c') == 'static':
                raise HTTP(400,
                           rewrite.params.error_message,
                           web2py_error='invalid path')

            request.application = \
                regex_space.sub('_', match.group('a') or 'init')
            request.controller = \
                regex_space.sub('_', match.group('c') or 'default')
            request.function = \
                regex_space.sub('_', match.group('f') or 'index')
            group_e = match.group('e')
            raw_extension = group_e and regex_space.sub('_',group_e) or None
            request.extension = raw_extension or 'html'
            request.raw_args = match.group('r')
            request.args = List([])
            if request.application in rewrite.params.routes_apps_raw:
                # application is responsible for parsing args
                request.args = None  
            elif request.raw_args:
                match = regex_args.match(request.raw_args)
                if match:
                    group_s = match.group('s')
                    request.args = \
                        List((group_s and group_s.split('/')) or [])
                else:
                    raise HTTP(400,
                               rewrite.params.error_message,
                               web2py_error='invalid path')
            request.client = get_client(request.env)
            request.folder = os.path.join(request.env.web2py_path,
                    'applications', request.application) + '/'

            # ##################################################
            # access the requested application
            # ##################################################

            if not os.path.exists(request.folder):
                if request.application=='init':
                    request.application = 'welcome'
                    redirect(URL(r=request))
                elif rewrite.params.error_handler:
                    redirect(URL(rewrite.params.error_handler['application'],
                                 rewrite.params.error_handler['controller'],
                                 rewrite.params.error_handler['function'],
                                 args=request.application))
                else:
                    raise HTTP(400,
                               rewrite.params.error_message,
                               web2py_error='invalid application')
            request.url = URL(r=request,args=request.args,
                                   extension=raw_extension)

            # ##################################################
            # build missing folder
            # ##################################################

            if not request.env.web2py_runtime_gae:
                for subfolder in ['models','views','controllers', 'databases',
                                  'modules','cron','errors','sessions',
                                  'languages','static','private','uploads']:
                    path =  os.path.join(request.folder,subfolder)
                    if not os.path.exists(path):
                        os.mkdir(path)

            # ##################################################
            # get the GET and POST data
            # ##################################################

            parse_get_post_vars(request, environ)

            # ##################################################
            # expose wsgi hooks for convenience
            # ##################################################

            request.wsgi.environ = environ_aux(environ,request)
            request.wsgi.start_response = lambda status='200', headers=[], \
                exec_info=None, response=response: \
                start_response_aux(status, headers, exec_info, response)
            request.wsgi.middleware = lambda *a: middleware_aux(request,response,*a)

            # ##################################################
            # load cookies
            # ##################################################

            if request.env.http_cookie:
                try:
                    request.cookies.load(request.env.http_cookie)
                except Cookie.CookieError, e:
                    pass # invalid cookies

            # ##################################################
            # try load session or create new session file
            # ##################################################

            session.connect(request, response)

            # ##################################################
            # set no-cache headers
            # ##################################################

            response.headers['Content-Type'] = contenttype('.'+request.extension)
            response.headers['Cache-Control'] = \
                'no-store, no-cache, must-revalidate, post-check=0, pre-check=0'
            response.headers['Expires'] = \
                time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime())
            response.headers['Pragma'] = 'no-cache'

            # ##################################################
            # run controller
            # ##################################################

            serve_controller(request, response, session)

        except HTTP, http_response:

            if request.body:
                request.body.close()

            # ##################################################
            # on success, try store session in database
            # ##################################################
            session._try_store_in_db(request, response)

            # ##################################################
            # on success, commit database
            # ##################################################

            if response._custom_commit:
                response._custom_commit()
            else:
                BaseAdapter.close_all_instances(BaseAdapter.commit)

            # ##################################################
            # if session not in db try store session on filesystem
            # this must be done after trying to commit database!
            # ##################################################

            session._try_store_on_disk(request, response)

            # ##################################################
            # store cookies in headers
            # ##################################################

            if session._forget:
                del response.cookies[response.session_id_name]
            elif session._secure:
                response.cookies[response.session_id_name]['secure'] = True
            if len(response.cookies)>0: 
                http_response.headers['Set-Cookie'] = \
                    [str(cookie)[11:] for cookie in response.cookies.values()]
            ticket=None

        except RestrictedError, e:

            if request.body:
                request.body.close()

            # ##################################################
            # on application error, rollback database
            # ##################################################

            ticket = e.log(request) or 'unknown'
            if response._custom_rollback:
                response._custom_rollback()
            else:
                BaseAdapter.close_all_instances(BaseAdapter.rollback)

            http_response = \
                HTTP(500,
                     rewrite.params.error_message_ticket % dict(ticket=ticket),
                     web2py_error='ticket %s' % ticket)

    except:

        if request.body:
            request.body.close()

        # ##################################################
        # on application error, rollback database
        # ##################################################

        try:
            if response._custom_rollback:
                response._custom_rollback()
            else:
                BaseAdapter.close_all_instances(BaseAdapter.rollback)
        except:
            pass
        e = RestrictedError('Framework', '', '', locals())
        ticket = e.log(request) or 'unrecoverable'
        http_response = \
            HTTP(500,
                 rewrite.params.error_message_ticket % dict(ticket=ticket),
                 web2py_error='ticket %s' % ticket)
    session._unlock(response)
    http_response = rewrite.try_redirect_on_error(http_response,request,ticket)
    return http_response.to(responder)

Example 136

Project: easybuild-framework
Source File: run.py
View license
def run_cmd_qa(cmd, qa, no_qa=None, log_ok=True, log_all=False, simple=False, regexp=True, std_qa=None, path=None, maxhits=50):
    """
    Run specified interactive command (in a subshell)
    :param cmd: command to run
    :param qa: dictionary which maps question to answers
    :param no_qa: list of patters that are not questions
    :param log_ok: only run output/exit code for failing commands (exit code non-zero)
    :param log_all: always log command output and exit code
    :param simple: if True, just return True/False to indicate success, else return a tuple: (output, exit_code)
    :param regex: regex used to check the output for errors; if True it will use the default (see parse_log_for_error)
    :param std_qa: dictionary which maps question regex patterns to answers
    :param path: path to execute the command is; current working directory is used if unspecified
    """
    cwd = os.getcwd()

    # early exit in 'dry run' mode, after printing the command that would be run
    if build_option('extended_dry_run'):
        if path is None:
            path = cwd
        dry_run_msg("  running interactive command \"%s\"" % cmd, silent=build_option('silent'))
        dry_run_msg("  (in %s)" % path, silent=build_option('silent'))
        if simple:
            return True
        else:
            # output, exit code
            return ('', 0)

    try:
        if path:
            os.chdir(path)

        _log.debug("run_cmd_qa: running cmd %s (in %s)" % (cmd, os.getcwd()))
    except OSError, err:
        _log.warning("Failed to change to %s: %s" % (path, err))
        _log.info("running cmd %s in non-existing directory, might fail!" % cmd)

    # Part 1: process the QandA dictionary
    # given initial set of Q and A (in dict), return dict of reg. exp. and A
    #
    # make regular expression that matches the string with
    # - replace whitespace
    # - replace newline

    def escape_special(string):
        return re.sub(r"([\+\?\(\)\[\]\*\.\\\$])", r"\\\1", string)

    split = '[\s\n]+'
    regSplit = re.compile(r"" + split)

    def process_QA(q, a_s):
        splitq = [escape_special(x) for x in regSplit.split(q)]
        regQtxt = split.join(splitq) + split.rstrip('+') + "*$"
        # add optional split at the end
        for i in [idx for idx, a in enumerate(a_s) if not a.endswith('\n')]:
            a_s[i] += '\n'
        regQ = re.compile(r"" + regQtxt)
        if regQ.search(q):
            return (a_s, regQ)
        else:
            raise EasyBuildError("runqanda: Question %s converted in %s does not match itself", q, regQtxt)

    def check_answers_list(answers):
        """Make sure we have a list of answers (as strings)."""
        if isinstance(answers, basestring):
            answers = [answers]
        elif not isinstance(answers, list):
            raise EasyBuildError("Invalid type for answer on %s, no string or list: %s (%s)",
                                 question, type(answers), answers)
        # list is manipulated when answering matching question, so return a copy
        return answers[:]

    newQA = {}
    _log.debug("newQA: ")
    for question, answers in qa.items():
        answers = check_answers_list(answers)
        (answers, regQ) = process_QA(question, answers)
        newQA[regQ] = answers
        _log.debug("newqa[%s]: %s" % (regQ.pattern, newQA[regQ]))

    newstdQA = {}
    if std_qa:
        for question, answers in std_qa.items():
            regQ = re.compile(r"" + question + r"[\s\n]*$")
            answers = check_answers_list(answers)
            for i in [idx for idx, a in enumerate(answers) if not a.endswith('\n')]:
                answers[i] += '\n'
            newstdQA[regQ] = answers
            _log.debug("newstdQA[%s]: %s" % (regQ.pattern, newstdQA[regQ]))

    new_no_qa = []
    if no_qa:
        # simple statements, can contain wildcards
        new_no_qa = [re.compile(r"" + x + r"[\s\n]*$") for x in no_qa]

    _log.debug("New noQandA list is: %s" % [x.pattern for x in new_no_qa])

    # Part 2: Run the command and answer questions
    # - this needs asynchronous stdout

    # # Log command output
    if log_all:
        try:
            runLog = tempfile.NamedTemporaryFile(suffix='.log', prefix='easybuild-cmdqa-')
            _log.debug('run_cmd_qa: Command output will be logged to %s' % runLog.name)
            runLog.write(cmd + "\n\n")
        except IOError, err:
            raise EasyBuildError("Opening log file for Q&A failed: %s", err)
    else:
        runLog = None

    try:
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT, stdin=PIPE, close_fds=True, executable="/bin/bash")
    except OSError, err:
        raise EasyBuildError("run_cmd_qa init cmd %s failed:%s", cmd, err)

    ec = p.poll()
    stdoutErr = ''
    oldLenOut = -1
    hitCount = 0

    while ec is None:
        # need to read from time to time.
        # - otherwise the stdout/stderr buffer gets filled and it all stops working
        try:
            tmpOut = recv_some(p)
            if runLog:
                runLog.write(tmpOut)
            stdoutErr += tmpOut
        # recv_some may throw Exception
        except (IOError, Exception), err:
            _log.debug("run_cmd_qa cmd %s: read failed: %s" % (cmd, err))
            tmpOut = None

        hit = False
        for question, answers in newQA.items():
            res = question.search(stdoutErr)
            if tmpOut and res:
                fa = answers[0] % res.groupdict()
                # cycle through list of answers
                last_answer = answers.pop(0)
                answers.append(last_answer)
                _log.debug("List of answers for question %s after cycling: %s" % (question.pattern, answers))

                _log.debug("run_cmd_qa answer %s question %s out %s" % (fa, question.pattern, stdoutErr[-50:]))
                send_all(p, fa)
                hit = True
                break
        if not hit:
            for question, answers in newstdQA.items():
                res = question.search(stdoutErr)
                if tmpOut and res:
                    fa = answers[0] % res.groupdict()
                    # cycle through list of answers
                    last_answer = answers.pop(0)
                    answers.append(last_answer)
                    _log.debug("List of answers for question %s after cycling: %s" % (question.pattern, answers))

                    _log.debug("run_cmd_qa answer %s std question %s out %s" % (fa, question.pattern, stdoutErr[-50:]))
                    send_all(p, fa)
                    hit = True
                    break
            if not hit:
                if len(stdoutErr) > oldLenOut:
                    oldLenOut = len(stdoutErr)
                else:
                    noqa = False
                    for r in new_no_qa:
                        if r.search(stdoutErr):
                            _log.debug("runqanda: noQandA found for out %s" % stdoutErr[-50:])
                            noqa = True
                    if not noqa:
                        hitCount += 1
            else:
                hitCount = 0
        else:
            hitCount = 0

        if hitCount > maxhits:
            # explicitly kill the child process before exiting
            try:
                os.killpg(p.pid, signal.SIGKILL)
                os.kill(p.pid, signal.SIGKILL)
            except OSError, err:
                _log.debug("run_cmd_qa exception caught when killing child process: %s" % err)
            _log.debug("run_cmd_qa: full stdouterr: %s" % stdoutErr)
            raise EasyBuildError("run_cmd_qa: cmd %s : Max nohits %s reached: end of output %s",
                                 cmd, maxhits, stdoutErr[-500:])

        # the sleep below is required to avoid exiting on unknown 'questions' too early (see above)
        time.sleep(1)
        ec = p.poll()

    # Process stopped. Read all remaining data
    try:
        if p.stdout:
            readTxt = p.stdout.read()
            stdoutErr += readTxt
            if runLog:
                runLog.write(readTxt)
    except IOError, err:
        _log.debug("runqanda cmd %s: remaining data read failed: %s" % (cmd, err))

    # Not needed anymore. Subprocess does this correct?
    # ec=os.WEXITSTATUS(ec)

    try:
        os.chdir(cwd)
    except OSError, err:
        raise EasyBuildError("Failed to return to %s after executing command: %s", cwd, err)

    return parse_cmd_output(cmd, stdoutErr, ec, simple, log_all, log_ok, regexp)

Example 137

Project: django-js-reverse
Source File: rjsmin.py
View license
def _make_jsmin(python_only=False):
    """
    Generate JS minifier based on `jsmin.c by Douglas Crockford`_

    .. _jsmin.c by Douglas Crockford:
       http://www.crockford.com/javascript/jsmin.c

    :Parameters:
      `python_only` : ``bool``
        Use only the python variant. If true, the c extension is not even
        tried to be loaded.

    :Return: Minifier
    :Rtype: ``callable``
    """
    # pylint: disable = R0912, R0914, W0612

    if not python_only:
        try:
            import _rjsmin  # pylint: disable = F0401
        except ImportError:
            pass
        else:
            return _rjsmin.jsmin
    try:
        xrange
    except NameError:
        xrange = range  # pylint: disable = W0622

    space_chars = r'[\000-\011\013\014\016-\040]'

    line_comment = r'(?://[^\r\n]*)'
    space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'

    string1 = \
        r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    strings = r'(?:%s|%s)' % (string1, string2)

    charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    nospecial = r'[^/\\\[\r\n]'
    regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
        nospecial, charclass, nospecial
    )
    space = r'(?:%s|%s)' % (space_chars, space_comment)
    space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang)
    newline = r'(?:%s?[\r\n])' % line_comment

    def fix_charclass(result):
        """ Fixup string of chars to fit into a regex char class """
        pos = result.find('-')
        if pos >= 0:
            result = r'%s%s-' % (result[:pos], result[pos + 1:])

        def sequentize(string):
            """
            Notate consecutive characters as sequence

            (1-4 instead of 1234)
            """
            first, last, result = None, None, []
            for char in map(ord, string):
                if last is None:
                    first = last = char
                elif last + 1 == char:
                    last = char
                else:
                    result.append((first, last))
                    first = last = char
            if last is not None:
                result.append((first, last))
            return ''.join(['%s%s%s' % (
                chr(first),
                last > first + 1 and '-' or '',
                last != first and chr(last) or ''
            ) for first, last in result])

        return _re.sub(
            r'([\000-\040\047])',  # \047 for better portability
            lambda m: '\\%03o' % ord(m.group(1)), (
                sequentize(result)
                .replace('\\', '\\\\')
                .replace('[', '\\[')
                .replace(']', '\\]')
            )
        )

    def id_literal_(what):
        """ Make id_literal like char class """
        match = _re.compile(what).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return '[^%s]' % fix_charclass(result)

    def not_id_literal_(keep):
        """ Make negated id_literal like char class """
        match = _re.compile(id_literal_(keep)).match
        result = ''.join([
            chr(c) for c in xrange(127) if not match(chr(c))
        ])
        return r'[%s]' % fix_charclass(result)

    not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
    preregex1 = r'[(,=:\[!&|?{};\r\n]'
    preregex2 = r'%(not_id_literal)sreturn' % locals()

    id_literal = id_literal_(r'[a-zA-Z0-9_$]')
    id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
    id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')

    dull = r'[^\047"/\000-\040]'

    space_sub_simple = _re.compile((
        # noqa pylint: disable = C0330

        r'(%(dull)s+)'
        r'|(%(strings)s%(dull)s*)'
        r'|(?<=%(preregex1)s)'
            r'%(space)s*(?:%(newline)s%(space)s*)*'
            r'(%(regex)s%(dull)s*)'
        r'|(?<=%(preregex2)s)'
            r'%(space)s*(?:%(newline)s%(space)s)*'
            r'(%(regex)s%(dull)s*)'
        r'|(?<=%(id_literal_close)s)'
            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
            r'(?=%(id_literal_open)s)'
        r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
        r'|(?<=\+)(%(space)s)+(?=\+)'
        r'|(?<=-)(%(space)s)+(?=-)'
        r'|%(space)s+'
        r'|(?:%(newline)s%(space)s*)+'
    ) % locals()).sub
    #print space_sub_simple.__self__.pattern

    def space_subber_simple(match):
        """ Substitution callback """
        # pylint: disable = R0911

        groups = match.groups()
        if groups[0]:
            return groups[0]
        elif groups[1]:
            return groups[1]
        elif groups[2]:
            return groups[2]
        elif groups[3]:
            return groups[3]
        elif groups[4]:
            return '\n'
        elif groups[5] or groups[6] or groups[7]:
            return ' '
        else:
            return ''

    space_sub_banged = _re.compile((
        # noqa pylint: disable = C0330

        r'(%(dull)s+)'
        r'|(%(strings)s%(dull)s*)'
        r'|(%(bang_comment)s%(dull)s*)'
        r'|(?<=%(preregex1)s)'
            r'%(space)s*(?:%(newline)s%(space)s*)*'
            r'(%(regex)s%(dull)s*)'
        r'|(?<=%(preregex2)s)'
            r'%(space)s*(?:%(newline)s%(space)s)*'
            r'(%(regex)s%(dull)s*)'
        r'|(?<=%(id_literal_close)s)'
            r'%(space)s*(?:(%(newline)s)%(space)s*)+'
            r'(?=%(id_literal_open)s)'
        r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
        r'|(?<=\+)(%(space)s)+(?=\+)'
        r'|(?<=-)(%(space)s)+(?=-)'
        r'|%(space)s+'
        r'|(?:%(newline)s%(space)s*)+'
    ) % dict(locals(), space=space_nobang)).sub
    #print space_sub_banged.__self__.pattern

    def space_subber_banged(match):
        """ Substitution callback """
        # pylint: disable = R0911

        groups = match.groups()
        if groups[0]:
            return groups[0]
        elif groups[1]:
            return groups[1]
        elif groups[2]:
            return groups[2]
        elif groups[3]:
            return groups[3]
        elif groups[4]:
            return groups[4]
        elif groups[5]:
            return '\n'
        elif groups[6] or groups[7] or groups[8]:
            return ' '
        else:
            return ''

    def jsmin(script, keep_bang_comments=False):  # pylint: disable = W0621
        r"""
        Minify javascript based on `jsmin.c by Douglas Crockford`_\.

        Instead of parsing the stream char by char, it uses a regular
        expression approach which minifies the whole script with one big
        substitution regex.

        .. _jsmin.c by Douglas Crockford:
           http://www.crockford.com/javascript/jsmin.c

        :Parameters:
          `script` : ``str``
            Script to minify

          `keep_bang_comments` : ``bool``
            Keep comments starting with an exclamation mark? (``/*!...*/``)

        :Return: Minified script
        :Rtype: ``str``
        """
        if keep_bang_comments:
            return space_sub_banged(
                space_subber_banged, '\n%s\n' % script
            ).strip()
        else:
            return space_sub_simple(
                space_subber_simple, '\n%s\n' % script
            ).strip()

    return jsmin

Example 138

View license
    def get_packages(self, invalid_sources=None):
        """
        Provides access to the packages in this repository

        :param invalid_sources:
            A list of URLs that are permissible to fetch data from

        :raises:
            ProviderException: when an error occurs trying to open a file
            DownloaderException: when there is an issue download package info
            ClientException: when there is an issue parsing package info

        :return:
            A generator of
            (
                'Package Name',
                {
                    'name': name,
                    'description': description,
                    'author': author,
                    'homepage': homepage,
                    'last_modified': last modified date,
                    'download': {
                        'url': url,
                        'date': date,
                        'version': version
                    },
                    'previous_names': [old_name, ...],
                    'labels': [label, ...],
                    'sources': [url, ...],
                    'readme': url,
                    'issues': url,
                    'donate': url,
                    'buy': url
                }
            )
            tuples
        """

        if 'get_packages' in self.cache:
            for key, value in self.cache['get_packages'].items():
                yield (key, value)
            return

        if invalid_sources != None and self.repo in invalid_sources:
            raise StopIteration()

        try:
            self.fetch()
        except (DownloaderException, ProviderException) as e:
            self.failed_sources[self.repo] = e
            self.cache['get_packages'] = {}
            return

        def fail(message):
            exception = ProviderException(message)
            self.failed_sources[self.repo] = exception
            self.cache['get_packages'] = {}
            return
        schema_error = u'Repository %s does not appear to be a valid repository file because ' % self.repo

        if 'schema_version' not in self.repo_info:
            error_string = u'%s the "schema_version" JSON key is missing.' % schema_error
            fail(error_string)
            return

        try:
            self.schema_version = float(self.repo_info.get('schema_version'))
        except (ValueError):
            error_string = u'%s the "schema_version" is not a valid number.' % schema_error
            fail(error_string)
            return

        if self.schema_version not in [1.0, 1.1, 1.2, 2.0]:
            error_string = u'%s the "schema_version" is not recognized. Must be one of: 1.0, 1.1, 1.2 or 2.0.' % schema_error
            fail(error_string)
            return

        if 'packages' not in self.repo_info:
            error_string = u'%s the "packages" JSON key is missing.' % schema_error
            fail(error_string)
            return

        github_client = GitHubClient(self.settings)
        bitbucket_client = BitBucketClient(self.settings)

        # Backfill the "previous_names" keys for old schemas
        previous_names = {}
        if self.schema_version < 2.0:
            renamed = self.get_renamed_packages()
            for old_name in renamed:
                new_name = renamed[old_name]
                if new_name not in previous_names:
                    previous_names[new_name] = []
                previous_names[new_name].append(old_name)

        output = {}
        for package in self.repo_info['packages']:
            info = {
                'sources': [self.repo]
            }

            for field in ['name', 'description', 'author', 'last_modified', 'previous_names',
                    'labels', 'homepage', 'readme', 'issues', 'donate', 'buy']:
                if package.get(field):
                    info[field] = package.get(field)

            # Schema version 2.0 allows for grabbing details about a pacakge, or its
            # download from "details" urls. See the GitHubClient and BitBucketClient
            # classes for valid URLs.
            if self.schema_version >= 2.0:
                details = package.get('details')
                releases = package.get('releases')

                # Try to grab package-level details from GitHub or BitBucket
                if details:
                    if invalid_sources != None and details in invalid_sources:
                        continue

                    info['sources'].append(details)

                    try:
                        github_repo_info = github_client.repo_info(details)
                        bitbucket_repo_info = bitbucket_client.repo_info(details)

                        # When grabbing details, prefer explicit field values over the values
                        # from the GitHub or BitBucket API
                        if github_repo_info:
                            info = dict(chain(github_repo_info.items(), info.items()))
                        elif bitbucket_repo_info:
                            info = dict(chain(bitbucket_repo_info.items(), info.items()))
                        else:
                            raise ProviderException(u'Invalid "details" value "%s" for one of the packages in the repository %s.' % (details, self.repo))

                    except (DownloaderException, ClientException, ProviderException) as e:
                        if 'name' in info:
                            self.broken_packages[info['name']] = e
                        self.failed_sources[details] = e
                        continue

                # If no releases info was specified, also grab the download info from GH or BB
                if not releases and details:
                    releases = [{'details': details}]

                if not releases:
                    e = ProviderException(u'No "releases" value for one of the packages in the repository %s.' % self.repo)
                    if 'name' in info:
                        self.broken_packages[info['name']] = e
                    else:
                        self.failed_sources[self.repo] = e
                    continue

                # This allows developers to specify a GH or BB location to get releases from,
                # especially tags URLs (https://github.com/user/repo/tags or
                # https://bitbucket.org/user/repo#tags)
                info['releases'] = []
                for release in releases:
                    download_details = None
                    download_info = {}

                    # Make sure that explicit fields are copied over
                    for field in ['platforms', 'sublime_text', 'version', 'url', 'date']:
                        if field in release:
                            download_info[field] = release[field]

                    if 'details' in release:
                        download_details = release['details']

                        try:
                            github_download = github_client.download_info(download_details)
                            bitbucket_download = bitbucket_client.download_info(download_details)

                            # Overlay the explicit field values over values fetched from the APIs
                            if github_download:
                                download_info = dict(chain(github_download.items(), download_info.items()))
                            # No matching tags
                            elif github_download == False:
                                download_info = {}
                            elif bitbucket_download:
                                download_info = dict(chain(bitbucket_download.items(), download_info.items()))
                            # No matching tags
                            elif bitbucket_download == False:
                                download_info = {}
                            else:
                                raise ProviderException(u'Invalid "details" value "%s" under the "releases" key for the package "%s" in the repository %s.' % (download_details, info['name'], self.repo))

                        except (DownloaderException, ClientException, ProviderException) as e:
                            if 'name' in info:
                                self.broken_packages[info['name']] = e
                            self.failed_sources[download_details] = e
                            continue

                    if download_info:
                        info['releases'].append(download_info)

                info = self.select_release(info)

            # Schema version 1.0, 1.1 and 1.2 just require that all values be
            # explicitly specified in the package JSON
            else:
                info['platforms'] = package.get('platforms')
                info = self.select_platform(info)

            if not info:
                self.unavailable_packages.append(package['name'])
                continue

            if 'download' not in info and 'releases' not in info:
                self.broken_packages[info['name']] = ProviderException(u'No "releases" key for the package "%s" in the repository %s.' % (info['name'], self.repo))
                continue

            for field in ['previous_names', 'labels']:
                if field not in info:
                    info[field] = []

            for field in ['readme', 'issues', 'donate', 'buy']:
                if field not in info:
                    info[field] = None

            if 'homepage' not in info:
                info['homepage'] = self.repo

            if 'download' in info:
                # Rewrites the legacy "zipball" URLs to the new "zip" format
                info['download']['url'] = re.sub(
                    '^(https://nodeload.github.com/[^/]+/[^/]+/)zipball(/.*)$',
                    '\\1zip\\2', info['download']['url'])

                # Rewrites the legacy "nodeload" URLs to the new "codeload" subdomain
                info['download']['url'] = info['download']['url'].replace(
                    'nodeload.github.com', 'codeload.github.com')

                # Extract the date from the download
                if 'last_modified' not in info:
                    info['last_modified'] = info['download']['date']

            elif 'releases' in info and 'last_modified' not in info:
                # Extract a date from the newest download
                date = '1970-01-01 00:00:00'
                for release in info['releases']:
                    if 'date' in release and release['date'] > date:
                        date = release['date']
                info['last_modified'] = date

            if info['name'] in previous_names:
                info['previous_names'].extend(previous_names[info['name']])

            output[info['name']] = info
            yield (info['name'], info)

        self.cache['get_packages'] = output

Example 139

Project: nltk-trainer
Source File: phonetics.py
View license
def metaphone (term):
	"returns metaphone code for a given string"

	# implementation of the original algorithm from Lawrence Philips
	# extended/rewritten by M. Kuhn
	# improvements with thanks to John Machin <[email protected]>

	# define return value
	code = ""

	i = 0
	term_length = len(term)

	if (term_length == 0):
		# empty string ?
		return code
	# end if

	# extension #1 (added 2005-01-28)
	# convert to lowercase
	term = string.lower(term)
	
	# extension #2 (added 2005-01-28)
	# remove all non-english characters, first
	term = re.sub(r'[^a-z]', '', term)
	if len(term) == 0:
		# nothing left
		return code
	# end if
		
	# extension #3 (added 2005-01-24)
	# conflate repeated letters
	firstChar = term[0]
	str2 = firstChar
	for x in term:
		if x != str2[-1]:
			str2 = str2 + x
		# end if
	# end for
	
	# extension #4 (added 2005-01-24)
	# remove any vowels unless a vowel is the first letter
	firstChar = str2[0]
	str3 = firstChar
	for x in str2[1:]:
		if (re.search(r'[^aeiou]', x)):
			str3 = str3 + x
		# end if
	# end for
	
	term = str3
	term_length = len(term)
	if term_length == 0:
		# nothing left
		return code
	# end if
	
	# check for exceptions
	if (term_length > 1):
		# get first two characters
		first_chars = term[0:2]

		# build translation table
		table = {
			"ae":"e",
			"gn":"n",
			"kn":"n",
			"pn":"n",
			"wr":"n",
			"wh":"w"
		}
		
		if first_chars in table.keys():
			term = term[2:]
			code = table[first_chars]
			term_length = len(term)
		# end if
		
	elif (term[0] == "x"):
		term = ""
		code = "s"
		term_length = 0
	# end if

	# define standard translation table
	st_trans = {
		"b":"b",
		"c":"k",
		"d":"t",
		"g":"k",
		"h":"h",
		"k":"k",
		"p":"p",
		"q":"k",
		"s":"s",
		"t":"t",
		"v":"f",
		"w":"w",
		"x":"ks",
		"y":"y",
		"z":"s"
	}

	i = 0
	while (i<term_length):
		# init character to add, init basic patterns
		add_char = ""
		part_n_2 = ""
		part_n_3 = ""
		part_n_4 = ""
		part_c_2 = ""
		part_c_3 = ""

		# extract a number of patterns, if possible
		if (i < (term_length - 1)):
			part_n_2 = term[i:i+2]

			if (i>0):
				part_c_2 = term[i-1:i+1]
				part_c_3 = term[i-1:i+2]
			# end if
		# end if

		if (i < (term_length - 2)):
			part_n_3 = term[i:i+3]
		# end if

		if (i < (term_length - 3)):
			part_n_4 = term[i:i+4]
		# end if

		# use table with conditions for translations
		if (term[i] == "b"):
			add_char = st_trans["b"]
			if (i == (term_length - 1)):
				if (i>0):
					if (term[i-1] == "m"):
						add_char = ""
					# end if
				# end if
			# end if
		elif (term[i] == "c"):
			add_char = st_trans["c"]
			if (part_n_2 == "ch"):
				add_char = "x"
			elif (re.search(r'c[iey]', part_n_2)):
				add_char = "s"
			# end if

			if (part_n_3 == "cia"):
				add_char = "x"
			# end if

			if (re.search(r'sc[iey]', part_c_3)):
				add_char = ""
			# end if

		elif (term[i] == "d"):
			add_char = st_trans["d"]
			if (re.search(r'dg[eyi]', part_n_3)):
				add_char = "j"
			# end if

		elif (term[i] == "g"):
			add_char = st_trans["g"]

			if (part_n_2 == "gh"):
				if (i == (term_length - 2)):
					add_char = ""
				# end if
			elif (re.search(r'gh[aeiouy]', part_n_3)):
				add_char = ""
			elif (part_n_2 == "gn"):
				add_char = ""
			elif (part_n_4 == "gned"):
				add_char = ""
			elif (re.search(r'dg[eyi]',part_c_3)):
				add_char = ""
			elif (part_n_2 == "gi"):
				if (part_c_3 != "ggi"):
					add_char = "j"
				# end if
			elif (part_n_2 == "ge"):
				if (part_c_3 != "gge"):
					add_char = "j"
				# end if
			elif (part_n_2 == "gy"):
				if (part_c_3 != "ggy"):
					add_char = "j"
				# end if
			elif (part_n_2 == "gg"):
				add_char = ""
			# end if
		elif (term[i] == "h"):
			add_char = st_trans["h"]
			if (re.search(r'[aeiouy]h[^aeiouy]', part_c_3)):
				add_char = ""
			elif (re.search(r'[csptg]h', part_c_2)):
				add_char = ""
			# end if
		elif (term[i] == "k"):
			add_char = st_trans["k"]
			if (part_c_2 == "ck"):
				add_char = ""
			# end if
		elif (term[i] == "p"):
			add_char = st_trans["p"]
			if (part_n_2 == "ph"):
				add_char = "f"
			# end if
		elif (term[i] == "q"):
			add_char = st_trans["q"]
		elif (term[i] == "s"):
			add_char = st_trans["s"]
			if (part_n_2 == "sh"):
				add_char = "x"
			# end if

			if (re.search(r'si[ao]', part_n_3)):
				add_char = "x"
			# end if
		elif (term[i] == "t"):
			add_char = st_trans["t"]
			if (part_n_2 == "th"):
				add_char = "0"
			# end if

			if (re.search(r'ti[ao]', part_n_3)):
				add_char = "x"
			# end if
		elif (term[i] == "v"):
			add_char = st_trans["v"]
		elif (term[i] == "w"):
			add_char = st_trans["w"]
			if (re.search(r'w[^aeiouy]', part_n_2)):
				add_char = ""
			# end if
		elif (term[i] == "x"):
			add_char = st_trans["x"]
		elif (term[i] == "y"):
			add_char = st_trans["y"]
		elif (term[i] == "z"):
			add_char = st_trans["z"]
		else:
			# alternative
			add_char = term[i]
		# end if

		code = code + add_char
		i += 1
	# end while

	# return metaphone code
	return code

Example 140

Project: FanFicFare
Source File: htmlheuristics.py
View license
def replace_br_with_p(body):

    # Ascii character (and Unicode as well) xA0 is a non-breaking space, ascii code 160.
    # However, Python Regex does not recognize it as a whitespace, so we'll be changing it to a regular space.
    body = body.replace(u'\xa0', u' ')

    if body.find('>') == -1 or body.rfind('<') == -1:
        return body

    # logger.debug(u'---')
    # logger.debug(u'BODY start.: ' + body[:4000])
    # logger.debug(u'--')
    # logger.debug(u'BODY end...: ' + body[-250:])
    # logger.debug(u'BODY.......: ' + body)
    # logger.debug(u'---')

    # clean breaks (<br />), removing whitespaces between them.
    body = re.sub(r'\s*<br[^>]*>\s*', r'<br />', body)

    # change surrounding div to a p and remove attrs Top surrounding
    # tag in all cases now should be div, to just strip the first and
    # last tags.
    if is_valid_block(body) and body.find('<div') == 0:
        body = body[body.index('>')+1:body.rindex('<')].strip()

    # BS is doing some BS on entities, meaning < and > are turned into < and >... a **very** bad idea in html.
    body = re.sub(r'&(.+?);', r'XAMP;\1;', body)
    
    body = soup_up_div(u'<div>' + body + u'</div>')

    body = body[body.index('>')+1:body.rindex('<')]

    # Find all existing blocks with p, pre and blockquote tags, we need to shields break tags inside those.
    # This is for "lenient" mode, however it is also used to clear break tags before and after the block elements.
    blocksRegex = re.compile(r'(\s*<br\ />\s*)*\s*<(pre|p|blockquote|table)([^>]*)>(.+?)</\2>\s*(\s*<br\ />\s*)*', re.DOTALL)
    body = blocksRegex.sub(r'\n<\2\3>\4</\2>\n', body)

    # if aggressive mode = true
        # blocksRegex = re.compile(r'(\s*<br\ */*>\s*)*\s*<(pre)([^>]*)>(.+?)</\2>\s*(\s*<br\ */*>\s*)*', re.DOTALL)
        # In aggressive mode, we also check breakes inside blockquotes, meaning we can get orphaned paragraph tags.
        # body = re.sub(r'<blockquote([^>]*)>(.+?)</blockquote>', r'<blockquote\1><p>\2</p></blockquote>', body, re.DOTALL)
    # end aggressive mode

    blocks = blocksRegex.finditer(body)
    # For our replacements to work, we need to work backwards, so we reverse the iterator.
    blocksList = []
    for match in blocks:
        blocksList.insert(0, match)

    for match in blocksList:
        group4 =  match.group(4).replace(u'<br />', u'{br /}')
        body = body[:match.start(4)] + group4 + body[match.end(4):]

    # change surrounding div to a p and remove attrs Top surrounding
    # tag in all cases now should be div, to just strip the first and
    # last tags.
    # body = u'<p>' + body + u'</p>'

    # Nuke div tags surrounding a HR tag.
    body = re.sub(r'<div[^>]+>\s*<hr[^>]+>\s*</div>', r'\n<hr />\n', body)

    # So many people add formatting to their HR tags, and ePub does not allow those, we are supposed to use css.
    # This nukes the hr tag attributes.
    body = re.sub(r'\s*<hr[^>]+>\s*', r'\n<hr />\n', body)

    # Remove leading and trailing breaks from HR tags
    body = re.sub(r'\s*(<br\ \/>)*\s*<hr\ \/>\s*(<br\ \/>)*\s*', r'\n<hr />\n', body)
    # Nuking breaks leading paragraps that may be in the body. They are eventually treated as <p><br /></p>
    body = re.sub(r'\s*(<br\ \/>)+\s*<p', r'\n<p></p>\n<p', body)
    # Nuking breaks trailing paragraps that may be in the body. They are eventually treated as <p><br /></p>
    body = re.sub(r'</p>\s*(<br\ \/>)+\s*', r'</p>\n<p></p>\n', body)

    # logger.debug(u'--- 2 ---')
    # logger.debug(u'BODY start.: ' + body[:250])
    # logger.debug(u'--')
    # logger.debug(u'BODY end...: ' + body[-250:])
    # logger.debug(u'BODY.......: ' + body)
    # logger.debug(u'--- 2 ---')

    # Because a leading or trailing non break tag will break the following code, we have to mess around rather badly for a few lines.
    body = body.replace(u'[',u'&squareBracketStart;')
    body = body.replace(u']',u'&squareBracketEnd;')
    body = body.replace(u'<br />',u'[br /]')

    breaksRegexp = [
        re.compile(r'([^\]])(\[br\ \/\])([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){2}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){3}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){4}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){5}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){6}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){7}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){8}([^\[])'),
        re.compile(r'(\[br\ \/\]){9,}')]

    breaksCount = [
        len(breaksRegexp[0].findall(body)),
        len(breaksRegexp[1].findall(body)),
        len(breaksRegexp[2].findall(body)),
        len(breaksRegexp[3].findall(body)),
        len(breaksRegexp[4].findall(body)),
        len(breaksRegexp[5].findall(body)),
        len(breaksRegexp[6].findall(body)),
        len(breaksRegexp[7].findall(body))]

    breaksMax = 0
    breaksMaxIndex = 0;

    for i in range(1,len(breaksCount)):
        if breaksCount[i] >= breaksMax:
            breaksMax = breaksCount[i]
            breaksMaxIndex = i

    lines = body.split(u'[br /]')
    contentLines = 0;
    contentLinesSum = 0;
    longestLineLength = 0;
    averageLineLength = 0;

    for line in lines:
        lineLen = len(line.strip())
        if lineLen > 0:
            contentLines += 1
            contentLinesSum += lineLen
            if lineLen > longestLineLength:
                longestLineLength = lineLen

    if contentLines == 0:
        contentLines = 1

    averageLineLength = contentLinesSum/contentLines

    logger.debug(u'---')
    logger.debug(u'Lines.............: ' + unicode(len(lines)))
    logger.debug(u'contentLines......: ' + unicode(contentLines))
    logger.debug(u'contentLinesSum...: ' + unicode(contentLinesSum))
    logger.debug(u'longestLineLength.: ' + unicode(longestLineLength))
    logger.debug(u'averageLineLength.: ' + unicode(averageLineLength))
    logger.debug(u'---')
    logger.debug(u'breaksMaxIndex....: ' + unicode(breaksMaxIndex))
    logger.debug(u'len(breaksCount)-1: ' + unicode(len(breaksCount)-1))
    logger.debug(u'breaksMax.........: ' + unicode(breaksMax))

    if breaksMaxIndex == len(breaksCount)-1 and breaksMax < 2:
        breaksMaxIndex = 0
        breaksMax = breaksCount[0]

    logger.debug(u'---')
    logger.debug(u'breaks 1: ' + unicode(breaksCount[0]))
    logger.debug(u'breaks 2: ' + unicode(breaksCount[1]))
    logger.debug(u'breaks 3: ' + unicode(breaksCount[2]))
    logger.debug(u'breaks 4: ' + unicode(breaksCount[3]))
    logger.debug(u'breaks 5: ' + unicode(breaksCount[4]))
    logger.debug(u'breaks 6: ' + unicode(breaksCount[5]))
    logger.debug(u'breaks 7: ' + unicode(breaksCount[6]))
    logger.debug(u'breaks 8: ' + unicode(breaksCount[7]))
    logger.debug(u'----')
    logger.debug(u'max found: ' + unicode(breaksMax))
    logger.debug(u'max Index: ' + unicode(breaksMaxIndex))
    logger.debug(u'----')

    if breaksMaxIndex > 0 and breaksCount[0] > breaksMax and averageLineLength < 90:
        body = breaksRegexp[0].sub(r'\1 \n\3', body)

    # Find all instances of consecutive breaks less than otr equal to the max count use most often
    #  replase those tags to inverted p tag pairs, those with more connsecutive breaks are replaced them with a horisontal line
    for i in range(len(breaksCount)):
        # if i > 0 or breaksMaxIndex == 0:
        if i <= breaksMaxIndex:
            logger.debug(unicode(i) + u' <= breaksMaxIndex (' + unicode(breaksMaxIndex) + u')')
            body = breaksRegexp[i].sub(r'\1</p>\n<p>\3', body)
        elif i == breaksMaxIndex+1:
            logger.debug(unicode(i) + u' == breaksMaxIndex+1 (' + unicode(breaksMaxIndex+1) + u')')
            body = breaksRegexp[i].sub(r'\1</p>\n<p><br/></p>\n<p>\3', body)
        else:
            logger.debug(unicode(i) + u' > breaksMaxIndex+1 (' + unicode(breaksMaxIndex+1) + u')')
            body = breaksRegexp[i].sub(r'\1</p>\n<hr />\n<p>\3', body)

    body = breaksRegexp[8].sub(r'</p>\n<hr />\n<p>', body)

    # Reverting the square brackets
    body = body.replace(u'[', u'<')
    body = body.replace(u']', u'>')
    body = body.replace(u'&squareBracketStart;', u'[')
    body = body.replace(u'&squareBracketEnd;', u']')

    body = body.replace(u'{p}', u'<p>')
    body = body.replace(u'{/p}', u'</p>')

    # If for some reason, a third break makes its way inside the paragraph, preplace that with the empty paragraph for the additional linespaing.
    body = re.sub(r'<p>\s*(<br\ \/>)+', r'<p><br /></p>\n<p>', body)

    # change empty p tags to include a br to force spacing.
    body = re.sub(r'<p>\s*</p>', r'<p><br/></p>', body)

    # Clean up hr tags, and add inverted p tag pairs
    body = re.sub(r'(<div[^>]+>)*\s*<hr\ \/>\s*(</div>)*', r'\n<hr />\n', body)

    # Clean up hr tags, and add inverted p tag pairs
    body = re.sub(r'\s*<hr\ \/>\s*', r'</p>\n<hr />\n<p>', body)

    # Because the previous regexp may cause trouble if the hr tag already had a p tag pair around it, w nee dot repair that.
    # Repeated opening p tags are condenced to one. As we added the extra leading opening p tags, we can safely assume that
    #  the last in such a chain must be the original. Lets keep its attributes if they are there.
    body = re.sub(r'\s*(<p[^>]*>\s*)+<p([^>]*)>\s*', r'\n<p\2>', body)
    # Repeated closing p tags are condenced to one
    body = re.sub(r'\s*(<\/\s*p>\s*){2,}', r'</p>\n', body)

    # superflous cleaning, remove whitespaces traling opening p tags. These does affect formatting.
    body = re.sub(r'\s*<p([^>]*)>\s*', r'\n<p\1>', body)
    # superflous cleaning, remove whitespaces leading closing p tags. These does not affect formatting.
    body = re.sub(r'\s*</p>\s*', r'</p>\n', body)

    # Remove empty tag pairs
    body = re.sub(r'\s*<(\S+)[^>]*>\s*</\1>', r'', body)

    body = body.replace(u'{br /}', u'<br />')
    body = re.sub(r'XAMP;(.+?);', r'&\1;', body)
    body = body.strip()

    # re-wrap in div tag.
    body = u'<div>\n' + body + u'</div>\n'

    # return body
    return tag_sanitizer(body)

Example 141

Project: FanFicFare
Source File: htmlheuristics.py
View license
def replace_br_with_p(body):

    # Ascii character (and Unicode as well) xA0 is a non-breaking space, ascii code 160.
    # However, Python Regex does not recognize it as a whitespace, so we'll be changing it to a regular space.
    body = body.replace(u'\xa0', u' ')

    if body.find('>') == -1 or body.rfind('<') == -1:
        return body

    # logger.debug(u'---')
    # logger.debug(u'BODY start.: ' + body[:4000])
    # logger.debug(u'--')
    # logger.debug(u'BODY end...: ' + body[-250:])
    # logger.debug(u'BODY.......: ' + body)
    # logger.debug(u'---')

    # clean breaks (<br />), removing whitespaces between them.
    body = re.sub(r'\s*<br[^>]*>\s*', r'<br />', body)

    # change surrounding div to a p and remove attrs Top surrounding
    # tag in all cases now should be div, to just strip the first and
    # last tags.
    if is_valid_block(body) and body.find('<div') == 0:
        body = body[body.index('>')+1:body.rindex('<')].strip()

    # BS is doing some BS on entities, meaning < and > are turned into < and >... a **very** bad idea in html.
    body = re.sub(r'&(.+?);', r'XAMP;\1;', body)
    
    body = soup_up_div(u'<div>' + body + u'</div>')

    body = body[body.index('>')+1:body.rindex('<')]

    # Find all existing blocks with p, pre and blockquote tags, we need to shields break tags inside those.
    # This is for "lenient" mode, however it is also used to clear break tags before and after the block elements.
    blocksRegex = re.compile(r'(\s*<br\ />\s*)*\s*<(pre|p|blockquote|table)([^>]*)>(.+?)</\2>\s*(\s*<br\ />\s*)*', re.DOTALL)
    body = blocksRegex.sub(r'\n<\2\3>\4</\2>\n', body)

    # if aggressive mode = true
        # blocksRegex = re.compile(r'(\s*<br\ */*>\s*)*\s*<(pre)([^>]*)>(.+?)</\2>\s*(\s*<br\ */*>\s*)*', re.DOTALL)
        # In aggressive mode, we also check breakes inside blockquotes, meaning we can get orphaned paragraph tags.
        # body = re.sub(r'<blockquote([^>]*)>(.+?)</blockquote>', r'<blockquote\1><p>\2</p></blockquote>', body, re.DOTALL)
    # end aggressive mode

    blocks = blocksRegex.finditer(body)
    # For our replacements to work, we need to work backwards, so we reverse the iterator.
    blocksList = []
    for match in blocks:
        blocksList.insert(0, match)

    for match in blocksList:
        group4 =  match.group(4).replace(u'<br />', u'{br /}')
        body = body[:match.start(4)] + group4 + body[match.end(4):]

    # change surrounding div to a p and remove attrs Top surrounding
    # tag in all cases now should be div, to just strip the first and
    # last tags.
    # body = u'<p>' + body + u'</p>'

    # Nuke div tags surrounding a HR tag.
    body = re.sub(r'<div[^>]+>\s*<hr[^>]+>\s*</div>', r'\n<hr />\n', body)

    # So many people add formatting to their HR tags, and ePub does not allow those, we are supposed to use css.
    # This nukes the hr tag attributes.
    body = re.sub(r'\s*<hr[^>]+>\s*', r'\n<hr />\n', body)

    # Remove leading and trailing breaks from HR tags
    body = re.sub(r'\s*(<br\ \/>)*\s*<hr\ \/>\s*(<br\ \/>)*\s*', r'\n<hr />\n', body)
    # Nuking breaks leading paragraps that may be in the body. They are eventually treated as <p><br /></p>
    body = re.sub(r'\s*(<br\ \/>)+\s*<p', r'\n<p></p>\n<p', body)
    # Nuking breaks trailing paragraps that may be in the body. They are eventually treated as <p><br /></p>
    body = re.sub(r'</p>\s*(<br\ \/>)+\s*', r'</p>\n<p></p>\n', body)

    # logger.debug(u'--- 2 ---')
    # logger.debug(u'BODY start.: ' + body[:250])
    # logger.debug(u'--')
    # logger.debug(u'BODY end...: ' + body[-250:])
    # logger.debug(u'BODY.......: ' + body)
    # logger.debug(u'--- 2 ---')

    # Because a leading or trailing non break tag will break the following code, we have to mess around rather badly for a few lines.
    body = body.replace(u'[',u'&squareBracketStart;')
    body = body.replace(u']',u'&squareBracketEnd;')
    body = body.replace(u'<br />',u'[br /]')

    breaksRegexp = [
        re.compile(r'([^\]])(\[br\ \/\])([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){2}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){3}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){4}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){5}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){6}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){7}([^\[])'),
        re.compile(r'([^\]])(\[br\ \/\]){8}([^\[])'),
        re.compile(r'(\[br\ \/\]){9,}')]

    breaksCount = [
        len(breaksRegexp[0].findall(body)),
        len(breaksRegexp[1].findall(body)),
        len(breaksRegexp[2].findall(body)),
        len(breaksRegexp[3].findall(body)),
        len(breaksRegexp[4].findall(body)),
        len(breaksRegexp[5].findall(body)),
        len(breaksRegexp[6].findall(body)),
        len(breaksRegexp[7].findall(body))]

    breaksMax = 0
    breaksMaxIndex = 0;

    for i in range(1,len(breaksCount)):
        if breaksCount[i] >= breaksMax:
            breaksMax = breaksCount[i]
            breaksMaxIndex = i

    lines = body.split(u'[br /]')
    contentLines = 0;
    contentLinesSum = 0;
    longestLineLength = 0;
    averageLineLength = 0;

    for line in lines:
        lineLen = len(line.strip())
        if lineLen > 0:
            contentLines += 1
            contentLinesSum += lineLen
            if lineLen > longestLineLength:
                longestLineLength = lineLen

    if contentLines == 0:
        contentLines = 1

    averageLineLength = contentLinesSum/contentLines

    logger.debug(u'---')
    logger.debug(u'Lines.............: ' + unicode(len(lines)))
    logger.debug(u'contentLines......: ' + unicode(contentLines))
    logger.debug(u'contentLinesSum...: ' + unicode(contentLinesSum))
    logger.debug(u'longestLineLength.: ' + unicode(longestLineLength))
    logger.debug(u'averageLineLength.: ' + unicode(averageLineLength))
    logger.debug(u'---')
    logger.debug(u'breaksMaxIndex....: ' + unicode(breaksMaxIndex))
    logger.debug(u'len(breaksCount)-1: ' + unicode(len(breaksCount)-1))
    logger.debug(u'breaksMax.........: ' + unicode(breaksMax))

    if breaksMaxIndex == len(breaksCount)-1 and breaksMax < 2:
        breaksMaxIndex = 0
        breaksMax = breaksCount[0]

    logger.debug(u'---')
    logger.debug(u'breaks 1: ' + unicode(breaksCount[0]))
    logger.debug(u'breaks 2: ' + unicode(breaksCount[1]))
    logger.debug(u'breaks 3: ' + unicode(breaksCount[2]))
    logger.debug(u'breaks 4: ' + unicode(breaksCount[3]))
    logger.debug(u'breaks 5: ' + unicode(breaksCount[4]))
    logger.debug(u'breaks 6: ' + unicode(breaksCount[5]))
    logger.debug(u'breaks 7: ' + unicode(breaksCount[6]))
    logger.debug(u'breaks 8: ' + unicode(breaksCount[7]))
    logger.debug(u'----')
    logger.debug(u'max found: ' + unicode(breaksMax))
    logger.debug(u'max Index: ' + unicode(breaksMaxIndex))
    logger.debug(u'----')

    if breaksMaxIndex > 0 and breaksCount[0] > breaksMax and averageLineLength < 90:
        body = breaksRegexp[0].sub(r'\1 \n\3', body)

    # Find all instances of consecutive breaks less than otr equal to the max count use most often
    #  replase those tags to inverted p tag pairs, those with more connsecutive breaks are replaced them with a horisontal line
    for i in range(len(breaksCount)):
        # if i > 0 or breaksMaxIndex == 0:
        if i <= breaksMaxIndex:
            logger.debug(unicode(i) + u' <= breaksMaxIndex (' + unicode(breaksMaxIndex) + u')')
            body = breaksRegexp[i].sub(r'\1</p>\n<p>\3', body)
        elif i == breaksMaxIndex+1:
            logger.debug(unicode(i) + u' == breaksMaxIndex+1 (' + unicode(breaksMaxIndex+1) + u')')
            body = breaksRegexp[i].sub(r'\1</p>\n<p><br/></p>\n<p>\3', body)
        else:
            logger.debug(unicode(i) + u' > breaksMaxIndex+1 (' + unicode(breaksMaxIndex+1) + u')')
            body = breaksRegexp[i].sub(r'\1</p>\n<hr />\n<p>\3', body)

    body = breaksRegexp[8].sub(r'</p>\n<hr />\n<p>', body)

    # Reverting the square brackets
    body = body.replace(u'[', u'<')
    body = body.replace(u']', u'>')
    body = body.replace(u'&squareBracketStart;', u'[')
    body = body.replace(u'&squareBracketEnd;', u']')

    body = body.replace(u'{p}', u'<p>')
    body = body.replace(u'{/p}', u'</p>')

    # If for some reason, a third break makes its way inside the paragraph, preplace that with the empty paragraph for the additional linespaing.
    body = re.sub(r'<p>\s*(<br\ \/>)+', r'<p><br /></p>\n<p>', body)

    # change empty p tags to include a br to force spacing.
    body = re.sub(r'<p>\s*</p>', r'<p><br/></p>', body)

    # Clean up hr tags, and add inverted p tag pairs
    body = re.sub(r'(<div[^>]+>)*\s*<hr\ \/>\s*(</div>)*', r'\n<hr />\n', body)

    # Clean up hr tags, and add inverted p tag pairs
    body = re.sub(r'\s*<hr\ \/>\s*', r'</p>\n<hr />\n<p>', body)

    # Because the previous regexp may cause trouble if the hr tag already had a p tag pair around it, w nee dot repair that.
    # Repeated opening p tags are condenced to one. As we added the extra leading opening p tags, we can safely assume that
    #  the last in such a chain must be the original. Lets keep its attributes if they are there.
    body = re.sub(r'\s*(<p[^>]*>\s*)+<p([^>]*)>\s*', r'\n<p\2>', body)
    # Repeated closing p tags are condenced to one
    body = re.sub(r'\s*(<\/\s*p>\s*){2,}', r'</p>\n', body)

    # superflous cleaning, remove whitespaces traling opening p tags. These does affect formatting.
    body = re.sub(r'\s*<p([^>]*)>\s*', r'\n<p\1>', body)
    # superflous cleaning, remove whitespaces leading closing p tags. These does not affect formatting.
    body = re.sub(r'\s*</p>\s*', r'</p>\n', body)

    # Remove empty tag pairs
    body = re.sub(r'\s*<(\S+)[^>]*>\s*</\1>', r'', body)

    body = body.replace(u'{br /}', u'<br />')
    body = re.sub(r'XAMP;(.+?);', r'&\1;', body)
    body = body.strip()

    # re-wrap in div tag.
    body = u'<div>\n' + body + u'</div>\n'

    # return body
    return tag_sanitizer(body)

Example 142

Project: FanFicFare
Source File: writer_epub.py
View license
    def writeStoryImpl(self, out):

        ## Python 2.5 ZipFile is rather more primative than later
        ## versions.  It can operate on a file, or on a StringIO, but
        ## not on an open stream.  OTOH, I suspect we would have had
        ## problems with closing and opening again to change the
        ## compression type anyway.
        zipio = StringIO.StringIO()

        ## mimetype must be first file and uncompressed.  Python 2.5
        ## ZipFile can't change compression type file-by-file, so we
        ## have to close and re-open
        outputepub = ZipFile(zipio, 'w', compression=ZIP_STORED)
        outputepub.debug=3
        outputepub.writestr('mimetype','application/epub+zip')
        outputepub.close()

        ## Re-open file for content.
        outputepub = ZipFile(zipio, 'a', compression=ZIP_DEFLATED)
        outputepub.debug=3

        ## Create META-INF/container.xml file.  The only thing it does is
        ## point to content.opf
        containerdom = getDOMImplementation().createDocument(None, "container", None)
        containertop = containerdom.documentElement
        containertop.setAttribute("version","1.0")
        containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")
        rootfiles = containerdom.createElement("rootfiles")
        containertop.appendChild(rootfiles)
        rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf",
                                                              "media-type":"application/oebps-package+xml"}))
        outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8'))
        containerdom.unlink()
        del containerdom

        ## Epub has two metadata files with real data.  We're putting
        ## them in content.opf (pointed to by META-INF/container.xml)
        ## and toc.ncx (pointed to by content.opf)

        ## content.opf contains metadata, a 'manifest' list of all
        ## other included files, and another 'spine' list of the items in the
        ## file

        uniqueid= 'fanficfare-uid:%s-u%s-s%s' % (
            self.getMetadata('site'),
            self.story.getList('authorId')[0],
            self.getMetadata('storyId'))

        contentdom = getDOMImplementation().createDocument(None, "package", None)
        package = contentdom.documentElement
        package.setAttribute("version","2.0")
        package.setAttribute("xmlns","http://www.idpf.org/2007/opf")
        package.setAttribute("unique-identifier","fanficfare-uid")
        metadata=newTag(contentdom,"metadata",
                        attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/",
                               "xmlns:opf":"http://www.idpf.org/2007/opf"})
        package.appendChild(metadata)

        metadata.appendChild(newTag(contentdom,"dc:identifier",
                                    text=uniqueid,
                                    attrs={"id":"fanficfare-uid"}))

        if self.getMetadata('title'):
            metadata.appendChild(newTag(contentdom,"dc:title",text=self.getMetadata('title')))

        if self.getMetadata('author'):
            if self.story.isList('author'):
                for auth in self.story.getList('author'):
                    metadata.appendChild(newTag(contentdom,"dc:creator",
                                                attrs={"opf:role":"aut"},
                                                text=auth))
            else:
                metadata.appendChild(newTag(contentdom,"dc:creator",
                                            attrs={"opf:role":"aut"},
                                            text=self.getMetadata('author')))

        metadata.appendChild(newTag(contentdom,"dc:contributor",text="FanFicFare [https://github.com/JimmXinu/FanFicFare]",attrs={"opf:role":"bkp"}))
        metadata.appendChild(newTag(contentdom,"dc:rights",text=""))
        if self.story.getMetadata('langcode'):
            metadata.appendChild(newTag(contentdom,"dc:language",text=self.story.getMetadata('langcode')))
        else:
            metadata.appendChild(newTag(contentdom,"dc:language",text='en'))

        #  published, created, updated, calibre
        #  Leave calling self.story.getMetadataRaw directly in case date format changes.
        if self.story.getMetadataRaw('datePublished'):
            metadata.appendChild(newTag(contentdom,"dc:date",
                                        attrs={"opf:event":"publication"},
                                        text=self.story.getMetadataRaw('datePublished').strftime("%Y-%m-%d")))

        if self.story.getMetadataRaw('dateCreated'):
            metadata.appendChild(newTag(contentdom,"dc:date",
                                        attrs={"opf:event":"creation"},
                                        text=self.story.getMetadataRaw('dateCreated').strftime("%Y-%m-%d")))

        if self.story.getMetadataRaw('dateUpdated'):
            metadata.appendChild(newTag(contentdom,"dc:date",
                                        attrs={"opf:event":"modification"},
                                        text=self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%d")))
            metadata.appendChild(newTag(contentdom,"meta",
                                        attrs={"name":"calibre:timestamp",
                                               "content":self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%dT%H:%M:%S")}))

        if self.getMetadata('description'):
            metadata.appendChild(newTag(contentdom,"dc:description",text=
                                        self.getMetadata('description')))

        for subject in self.story.getSubjectTags():
            metadata.appendChild(newTag(contentdom,"dc:subject",text=subject))


        if self.getMetadata('site'):
            metadata.appendChild(newTag(contentdom,"dc:publisher",
                                        text=self.getMetadata('site')))

        if self.getMetadata('storyUrl'):
            metadata.appendChild(newTag(contentdom,"dc:identifier",
                                        attrs={"opf:scheme":"URL"},
                                        text=self.getMetadata('storyUrl')))
            metadata.appendChild(newTag(contentdom,"dc:source",
                                        text=self.getMetadata('storyUrl')))

        ## end of metadata, create manifest.
        items = [] # list of (id, href, type, title) tuples(all strings)
        itemrefs = [] # list of strings -- idrefs from .opfs' spines
        items.append(("ncx","toc.ncx","application/x-dtbncx+xml",None)) ## we'll generate the toc.ncx file,
                                                                   ## but it needs to be in the items manifest.

        guide = None
        coverIO = None

        coverimgid = "image0000"
        if not self.story.cover and self.story.oldcover:
            logger.debug("writer_epub: no new cover, has old cover, write image.")
            (oldcoverhtmlhref,
             oldcoverhtmltype,
             oldcoverhtmldata,
             oldcoverimghref,
             oldcoverimgtype,
             oldcoverimgdata) = self.story.oldcover
            outputepub.writestr(oldcoverhtmlhref,oldcoverhtmldata)
            outputepub.writestr(oldcoverimghref,oldcoverimgdata)

            coverimgid = "image0"
            items.append((coverimgid,
                          oldcoverimghref,
                          oldcoverimgtype,
                          None))
            items.append(("cover",oldcoverhtmlhref,oldcoverhtmltype,None))
            itemrefs.append("cover")
            metadata.appendChild(newTag(contentdom,"meta",{"content":"image0",
                                                           "name":"cover"}))
            guide = newTag(contentdom,"guide")
            guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover",
                                                                   "title":"Cover",
                                                                   "href":oldcoverhtmlhref}))



        if self.getConfig('include_images'):
            imgcount=0
            for imgmap in self.story.getImgUrls():
                imgfile = "OEBPS/"+imgmap['newsrc']
                outputepub.writestr(imgfile,imgmap['data'])
                items.append(("image%04d"%imgcount,
                              imgfile,
                              imgmap['mime'],
                              None))
                imgcount+=1
                if 'cover' in imgfile:
                    # make sure coverimgid is set to the cover, not
                    # just the first image.
                    coverimgid = items[-1][0]


        items.append(("style","OEBPS/stylesheet.css","text/css",None))

        if self.story.cover:
            # Note that the id of the cover xhmtl *must* be 'cover'
            # for it to work on Nook.
            items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None))
            itemrefs.append("cover")
            #
            # <meta name="cover" content="cover.jpg"/>
            metadata.appendChild(newTag(contentdom,"meta",{"content":coverimgid,
                                                           "name":"cover"}))
            # cover stuff for later:
            # at end of <package>:
            # <guide>
            # <reference type="cover" title="Cover" href="Text/cover.xhtml"/>
            # </guide>
            guide = newTag(contentdom,"guide")
            guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover",
                                                       "title":"Cover",
                                                       "href":"OEBPS/cover.xhtml"}))

            if self.hasConfig("cover_content"):
                COVER = string.Template(self.getConfig("cover_content"))
            else:
                COVER = self.EPUB_COVER
            coverIO = StringIO.StringIO()
            coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))

        if self.getConfig("include_titlepage"):
            items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
            itemrefs.append("title_page")
        if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
            items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
            itemrefs.append("toc_page")

        ## save where to insert logpage.
        logpage_indices = (len(items),len(itemrefs))

        dologpage = ( self.getConfig("include_logpage") == "smart" and \
                          (self.story.logfile or self.story.getMetadataRaw("status") == "In-Progress") )  \
                     or self.getConfig("include_logpage") == "true"

        ## collect chapter urls and file names for internalize_text_links option.
        chapurlmap = {}
        for index, chap in enumerate(self.story.getChapters(fortoc=True)):
            if chap.html:
                i=index+1
                items.append(("file%04d"%i,
                              "OEBPS/file%04d.xhtml"%i,
                              "application/xhtml+xml",
                              chap.title))
                itemrefs.append("file%04d"%i)
                chapurlmap[chap.url]="file%04d.xhtml"%i # url -> relative epub file name.

        if dologpage:
            if self.getConfig("logpage_at_end") == "true":
                ## insert logpage after chapters.
                logpage_indices = (len(items),len(itemrefs))
            items.insert(logpage_indices[0],("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
            itemrefs.insert(logpage_indices[1],"log_page")

        manifest = contentdom.createElement("manifest")
        package.appendChild(manifest)
        for item in items:
            (id,href,type,title)=item
            manifest.appendChild(newTag(contentdom,"item",
                                        attrs={'id':id,
                                               'href':href,
                                               'media-type':type}))

        spine = newTag(contentdom,"spine",attrs={"toc":"ncx"})
        package.appendChild(spine)
        for itemref in itemrefs:
            spine.appendChild(newTag(contentdom,"itemref",
                                     attrs={"idref":itemref,
                                            "linear":"yes"}))
        # guide only exists if there's a cover.
        if guide:
            package.appendChild(guide)

        # write content.opf to zip.
        contentxml = contentdom.toxml(encoding='utf-8')

        # tweak for brain damaged Nook STR.  Nook insists on name before content.
        contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
                                        '<meta name="cover" content="%s"/>'%coverimgid)
        outputepub.writestr("content.opf",contentxml)

        contentdom.unlink()
        del contentdom

        ## create toc.ncx file
        tocncxdom = getDOMImplementation().createDocument(None, "ncx", None)
        ncx = tocncxdom.documentElement
        ncx.setAttribute("version","2005-1")
        ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/")
        head = tocncxdom.createElement("head")
        ncx.appendChild(head)
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:uid", "content":uniqueid}))
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:depth", "content":"1"}))
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:totalPageCount", "content":"0"}))
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:maxPageNumber", "content":"0"}))

        docTitle = tocncxdom.createElement("docTitle")
        docTitle.appendChild(newTag(tocncxdom,"text",text=self.getMetadata('title')))
        ncx.appendChild(docTitle)

        tocnavMap = tocncxdom.createElement("navMap")
        ncx.appendChild(tocnavMap)

        # <navPoint id="<id>" playOrder="<risingnumberfrom0>">
        #   <navLabel>
        #     <text><chapter title></text>
        #   </navLabel>
        #   <content src="<chapterfile>"/>
        # </navPoint>
        index=0
        for item in items:
            (id,href,type,title)=item
            # only items to be skipped, cover.xhtml, images, toc.ncx, stylesheet.css, should have no title.
            if title :
                navPoint = newTag(tocncxdom,"navPoint",
                                  attrs={'id':id,
                                         'playOrder':unicode(index)})
                tocnavMap.appendChild(navPoint)
                navLabel = newTag(tocncxdom,"navLabel")
                navPoint.appendChild(navLabel)
                ## the xml library will re-escape as needed.
                navLabel.appendChild(newTag(tocncxdom,"text",text=stripHTML(title)))
                navPoint.appendChild(newTag(tocncxdom,"content",attrs={"src":href}))
                index=index+1

        # write toc.ncx to zip file
        outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8'))
        tocncxdom.unlink()
        del tocncxdom

        # write stylesheet.css file.
        outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute(self.story.getAllMetadata()))

        # write title page.
        if self.getConfig("titlepage_use_table"):
            TITLE_PAGE_START  = self.EPUB_TABLE_TITLE_PAGE_START
            TITLE_ENTRY       = self.EPUB_TABLE_TITLE_ENTRY
            WIDE_TITLE_ENTRY  = self.EPUB_TABLE_TITLE_WIDE_ENTRY
            NO_TITLE_ENTRY    = self.EPUB_TABLE_NO_TITLE_ENTRY
            TITLE_PAGE_END    = self.EPUB_TABLE_TITLE_PAGE_END
        else:
            TITLE_PAGE_START  = self.EPUB_TITLE_PAGE_START
            TITLE_ENTRY       = self.EPUB_TITLE_ENTRY
            WIDE_TITLE_ENTRY  = self.EPUB_TITLE_ENTRY # same, only wide in tables.
            NO_TITLE_ENTRY    = self.EPUB_NO_TITLE_ENTRY
            TITLE_PAGE_END    = self.EPUB_TITLE_PAGE_END

        if coverIO:
            outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue())
            coverIO.close()

        titlepageIO = StringIO.StringIO()
        self.writeTitlePage(out=titlepageIO,
                            START=TITLE_PAGE_START,
                            ENTRY=TITLE_ENTRY,
                            WIDE_ENTRY=WIDE_TITLE_ENTRY,
                            END=TITLE_PAGE_END,
                            NO_TITLE_ENTRY=NO_TITLE_ENTRY)
        if titlepageIO.getvalue(): # will be false if no title page.
            outputepub.writestr("OEBPS/title_page.xhtml",titlepageIO.getvalue())
        titlepageIO.close()

        # write toc page.
        tocpageIO = StringIO.StringIO()
        self.writeTOCPage(tocpageIO,
                          self.EPUB_TOC_PAGE_START,
                          self.EPUB_TOC_ENTRY,
                          self.EPUB_TOC_PAGE_END)
        if tocpageIO.getvalue(): # will be false if no toc page.
            outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue())
        tocpageIO.close()

        if dologpage:
            # write log page.
            logpageIO = StringIO.StringIO()
            self.writeLogPage(logpageIO)
            outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
            logpageIO.close()

        if self.hasConfig('chapter_start'):
            CHAPTER_START = string.Template(self.getConfig("chapter_start"))
        else:
            CHAPTER_START = self.EPUB_CHAPTER_START

        if self.hasConfig('chapter_end'):
            CHAPTER_END = string.Template(self.getConfig("chapter_end"))
        else:
            CHAPTER_END = self.EPUB_CHAPTER_END

        for index, chap in enumerate(self.story.getChapters()): # (url,title,html)
            if chap.html:
                chap_data = chap.html
                if self.getConfig('internalize_text_links'):
                    soup = bs4.BeautifulSoup(chap.html,'html5lib')
                    changed=False
                    for alink in soup.find_all('a'):
                        if alink.has_attr('href') and alink['href'] in chapurlmap:
                            alink['href']=chapurlmap[alink['href']]
                            changed=True
                    if changed:
                        chap_data = unicode(soup)
                        # Don't want html, head or body tags in
                        # chapter html--bs4 insists on adding them.
                        chap_data = re.sub(r"</?(html|head|body)[^>]*>\r?\n?","",chap_data)

                #logger.debug('Writing chapter text for: %s' % chap.title)
                vals={'url':removeEntities(chap.url),
                      'chapter':removeEntities(chap.title),
                      'origchapter':removeEntities(chap.origtitle),
                      'tocchapter':removeEntities(chap.toctitle),
                      'index':"%04d"%(index+1),
                      'number':index+1}
                # escape double quotes in all vals.
                for k,v in vals.items():
                    if isinstance(v,basestring): vals[k]=v.replace('"','&quot;')
                fullhtml = CHAPTER_START.substitute(vals) + \
                    chap_data.strip() + \
                    CHAPTER_END.substitute(vals)
                # strip to avoid ever growning numbers of newlines.
                # ffnet(& maybe others) gives the whole chapter text
                # as one line.  This causes problems for nook(at
                # least) when the chapter size starts getting big
                # (200k+)
                fullhtml = re.sub(r'(</p>|<br ?/>)\n*',r'\1\n',fullhtml)

                outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8'))
                del fullhtml

        if self.story.calibrebookmark:
            outputepub.writestr("META-INF/calibre_bookmarks.txt",self.story.calibrebookmark)

	# declares all the files created by Windows.  otherwise, when
        # it runs in appengine, windows unzips the files as 000 perms.
        for zf in outputepub.filelist:
            zf.create_system = 0
        outputepub.close()
        out.write(zipio.getvalue())
        zipio.close()

Example 143

Project: FanFicFare
Source File: writer_epub.py
View license
    def writeStoryImpl(self, out):

        ## Python 2.5 ZipFile is rather more primative than later
        ## versions.  It can operate on a file, or on a StringIO, but
        ## not on an open stream.  OTOH, I suspect we would have had
        ## problems with closing and opening again to change the
        ## compression type anyway.
        zipio = StringIO.StringIO()

        ## mimetype must be first file and uncompressed.  Python 2.5
        ## ZipFile can't change compression type file-by-file, so we
        ## have to close and re-open
        outputepub = ZipFile(zipio, 'w', compression=ZIP_STORED)
        outputepub.debug=3
        outputepub.writestr('mimetype','application/epub+zip')
        outputepub.close()

        ## Re-open file for content.
        outputepub = ZipFile(zipio, 'a', compression=ZIP_DEFLATED)
        outputepub.debug=3

        ## Create META-INF/container.xml file.  The only thing it does is
        ## point to content.opf
        containerdom = getDOMImplementation().createDocument(None, "container", None)
        containertop = containerdom.documentElement
        containertop.setAttribute("version","1.0")
        containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")
        rootfiles = containerdom.createElement("rootfiles")
        containertop.appendChild(rootfiles)
        rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf",
                                                              "media-type":"application/oebps-package+xml"}))
        outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8'))
        containerdom.unlink()
        del containerdom

        ## Epub has two metadata files with real data.  We're putting
        ## them in content.opf (pointed to by META-INF/container.xml)
        ## and toc.ncx (pointed to by content.opf)

        ## content.opf contains metadata, a 'manifest' list of all
        ## other included files, and another 'spine' list of the items in the
        ## file

        uniqueid= 'fanficfare-uid:%s-u%s-s%s' % (
            self.getMetadata('site'),
            self.story.getList('authorId')[0],
            self.getMetadata('storyId'))

        contentdom = getDOMImplementation().createDocument(None, "package", None)
        package = contentdom.documentElement
        package.setAttribute("version","2.0")
        package.setAttribute("xmlns","http://www.idpf.org/2007/opf")
        package.setAttribute("unique-identifier","fanficfare-uid")
        metadata=newTag(contentdom,"metadata",
                        attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/",
                               "xmlns:opf":"http://www.idpf.org/2007/opf"})
        package.appendChild(metadata)

        metadata.appendChild(newTag(contentdom,"dc:identifier",
                                    text=uniqueid,
                                    attrs={"id":"fanficfare-uid"}))

        if self.getMetadata('title'):
            metadata.appendChild(newTag(contentdom,"dc:title",text=self.getMetadata('title')))

        if self.getMetadata('author'):
            if self.story.isList('author'):
                for auth in self.story.getList('author'):
                    metadata.appendChild(newTag(contentdom,"dc:creator",
                                                attrs={"opf:role":"aut"},
                                                text=auth))
            else:
                metadata.appendChild(newTag(contentdom,"dc:creator",
                                            attrs={"opf:role":"aut"},
                                            text=self.getMetadata('author')))

        metadata.appendChild(newTag(contentdom,"dc:contributor",text="FanFicFare [https://github.com/JimmXinu/FanFicFare]",attrs={"opf:role":"bkp"}))
        metadata.appendChild(newTag(contentdom,"dc:rights",text=""))
        if self.story.getMetadata('langcode'):
            metadata.appendChild(newTag(contentdom,"dc:language",text=self.story.getMetadata('langcode')))
        else:
            metadata.appendChild(newTag(contentdom,"dc:language",text='en'))

        #  published, created, updated, calibre
        #  Leave calling self.story.getMetadataRaw directly in case date format changes.
        if self.story.getMetadataRaw('datePublished'):
            metadata.appendChild(newTag(contentdom,"dc:date",
                                        attrs={"opf:event":"publication"},
                                        text=self.story.getMetadataRaw('datePublished').strftime("%Y-%m-%d")))

        if self.story.getMetadataRaw('dateCreated'):
            metadata.appendChild(newTag(contentdom,"dc:date",
                                        attrs={"opf:event":"creation"},
                                        text=self.story.getMetadataRaw('dateCreated').strftime("%Y-%m-%d")))

        if self.story.getMetadataRaw('dateUpdated'):
            metadata.appendChild(newTag(contentdom,"dc:date",
                                        attrs={"opf:event":"modification"},
                                        text=self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%d")))
            metadata.appendChild(newTag(contentdom,"meta",
                                        attrs={"name":"calibre:timestamp",
                                               "content":self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%dT%H:%M:%S")}))

        if self.getMetadata('description'):
            metadata.appendChild(newTag(contentdom,"dc:description",text=
                                        self.getMetadata('description')))

        for subject in self.story.getSubjectTags():
            metadata.appendChild(newTag(contentdom,"dc:subject",text=subject))


        if self.getMetadata('site'):
            metadata.appendChild(newTag(contentdom,"dc:publisher",
                                        text=self.getMetadata('site')))

        if self.getMetadata('storyUrl'):
            metadata.appendChild(newTag(contentdom,"dc:identifier",
                                        attrs={"opf:scheme":"URL"},
                                        text=self.getMetadata('storyUrl')))
            metadata.appendChild(newTag(contentdom,"dc:source",
                                        text=self.getMetadata('storyUrl')))

        ## end of metadata, create manifest.
        items = [] # list of (id, href, type, title) tuples(all strings)
        itemrefs = [] # list of strings -- idrefs from .opfs' spines
        items.append(("ncx","toc.ncx","application/x-dtbncx+xml",None)) ## we'll generate the toc.ncx file,
                                                                   ## but it needs to be in the items manifest.

        guide = None
        coverIO = None

        coverimgid = "image0000"
        if not self.story.cover and self.story.oldcover:
            logger.debug("writer_epub: no new cover, has old cover, write image.")
            (oldcoverhtmlhref,
             oldcoverhtmltype,
             oldcoverhtmldata,
             oldcoverimghref,
             oldcoverimgtype,
             oldcoverimgdata) = self.story.oldcover
            outputepub.writestr(oldcoverhtmlhref,oldcoverhtmldata)
            outputepub.writestr(oldcoverimghref,oldcoverimgdata)

            coverimgid = "image0"
            items.append((coverimgid,
                          oldcoverimghref,
                          oldcoverimgtype,
                          None))
            items.append(("cover",oldcoverhtmlhref,oldcoverhtmltype,None))
            itemrefs.append("cover")
            metadata.appendChild(newTag(contentdom,"meta",{"content":"image0",
                                                           "name":"cover"}))
            guide = newTag(contentdom,"guide")
            guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover",
                                                                   "title":"Cover",
                                                                   "href":oldcoverhtmlhref}))



        if self.getConfig('include_images'):
            imgcount=0
            for imgmap in self.story.getImgUrls():
                imgfile = "OEBPS/"+imgmap['newsrc']
                outputepub.writestr(imgfile,imgmap['data'])
                items.append(("image%04d"%imgcount,
                              imgfile,
                              imgmap['mime'],
                              None))
                imgcount+=1
                if 'cover' in imgfile:
                    # make sure coverimgid is set to the cover, not
                    # just the first image.
                    coverimgid = items[-1][0]


        items.append(("style","OEBPS/stylesheet.css","text/css",None))

        if self.story.cover:
            # Note that the id of the cover xhmtl *must* be 'cover'
            # for it to work on Nook.
            items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None))
            itemrefs.append("cover")
            #
            # <meta name="cover" content="cover.jpg"/>
            metadata.appendChild(newTag(contentdom,"meta",{"content":coverimgid,
                                                           "name":"cover"}))
            # cover stuff for later:
            # at end of <package>:
            # <guide>
            # <reference type="cover" title="Cover" href="Text/cover.xhtml"/>
            # </guide>
            guide = newTag(contentdom,"guide")
            guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover",
                                                       "title":"Cover",
                                                       "href":"OEBPS/cover.xhtml"}))

            if self.hasConfig("cover_content"):
                COVER = string.Template(self.getConfig("cover_content"))
            else:
                COVER = self.EPUB_COVER
            coverIO = StringIO.StringIO()
            coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))

        if self.getConfig("include_titlepage"):
            items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
            itemrefs.append("title_page")
        if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
            items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
            itemrefs.append("toc_page")

        ## save where to insert logpage.
        logpage_indices = (len(items),len(itemrefs))

        dologpage = ( self.getConfig("include_logpage") == "smart" and \
                          (self.story.logfile or self.story.getMetadataRaw("status") == "In-Progress") )  \
                     or self.getConfig("include_logpage") == "true"

        ## collect chapter urls and file names for internalize_text_links option.
        chapurlmap = {}
        for index, chap in enumerate(self.story.getChapters(fortoc=True)):
            if chap.html:
                i=index+1
                items.append(("file%04d"%i,
                              "OEBPS/file%04d.xhtml"%i,
                              "application/xhtml+xml",
                              chap.title))
                itemrefs.append("file%04d"%i)
                chapurlmap[chap.url]="file%04d.xhtml"%i # url -> relative epub file name.

        if dologpage:
            if self.getConfig("logpage_at_end") == "true":
                ## insert logpage after chapters.
                logpage_indices = (len(items),len(itemrefs))
            items.insert(logpage_indices[0],("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
            itemrefs.insert(logpage_indices[1],"log_page")

        manifest = contentdom.createElement("manifest")
        package.appendChild(manifest)
        for item in items:
            (id,href,type,title)=item
            manifest.appendChild(newTag(contentdom,"item",
                                        attrs={'id':id,
                                               'href':href,
                                               'media-type':type}))

        spine = newTag(contentdom,"spine",attrs={"toc":"ncx"})
        package.appendChild(spine)
        for itemref in itemrefs:
            spine.appendChild(newTag(contentdom,"itemref",
                                     attrs={"idref":itemref,
                                            "linear":"yes"}))
        # guide only exists if there's a cover.
        if guide:
            package.appendChild(guide)

        # write content.opf to zip.
        contentxml = contentdom.toxml(encoding='utf-8')

        # tweak for brain damaged Nook STR.  Nook insists on name before content.
        contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
                                        '<meta name="cover" content="%s"/>'%coverimgid)
        outputepub.writestr("content.opf",contentxml)

        contentdom.unlink()
        del contentdom

        ## create toc.ncx file
        tocncxdom = getDOMImplementation().createDocument(None, "ncx", None)
        ncx = tocncxdom.documentElement
        ncx.setAttribute("version","2005-1")
        ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/")
        head = tocncxdom.createElement("head")
        ncx.appendChild(head)
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:uid", "content":uniqueid}))
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:depth", "content":"1"}))
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:totalPageCount", "content":"0"}))
        head.appendChild(newTag(tocncxdom,"meta",
                                attrs={"name":"dtb:maxPageNumber", "content":"0"}))

        docTitle = tocncxdom.createElement("docTitle")
        docTitle.appendChild(newTag(tocncxdom,"text",text=self.getMetadata('title')))
        ncx.appendChild(docTitle)

        tocnavMap = tocncxdom.createElement("navMap")
        ncx.appendChild(tocnavMap)

        # <navPoint id="<id>" playOrder="<risingnumberfrom0>">
        #   <navLabel>
        #     <text><chapter title></text>
        #   </navLabel>
        #   <content src="<chapterfile>"/>
        # </navPoint>
        index=0
        for item in items:
            (id,href,type,title)=item
            # only items to be skipped, cover.xhtml, images, toc.ncx, stylesheet.css, should have no title.
            if title :
                navPoint = newTag(tocncxdom,"navPoint",
                                  attrs={'id':id,
                                         'playOrder':unicode(index)})
                tocnavMap.appendChild(navPoint)
                navLabel = newTag(tocncxdom,"navLabel")
                navPoint.appendChild(navLabel)
                ## the xml library will re-escape as needed.
                navLabel.appendChild(newTag(tocncxdom,"text",text=stripHTML(title)))
                navPoint.appendChild(newTag(tocncxdom,"content",attrs={"src":href}))
                index=index+1

        # write toc.ncx to zip file
        outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8'))
        tocncxdom.unlink()
        del tocncxdom

        # write stylesheet.css file.
        outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute(self.story.getAllMetadata()))

        # write title page.
        if self.getConfig("titlepage_use_table"):
            TITLE_PAGE_START  = self.EPUB_TABLE_TITLE_PAGE_START
            TITLE_ENTRY       = self.EPUB_TABLE_TITLE_ENTRY
            WIDE_TITLE_ENTRY  = self.EPUB_TABLE_TITLE_WIDE_ENTRY
            NO_TITLE_ENTRY    = self.EPUB_TABLE_NO_TITLE_ENTRY
            TITLE_PAGE_END    = self.EPUB_TABLE_TITLE_PAGE_END
        else:
            TITLE_PAGE_START  = self.EPUB_TITLE_PAGE_START
            TITLE_ENTRY       = self.EPUB_TITLE_ENTRY
            WIDE_TITLE_ENTRY  = self.EPUB_TITLE_ENTRY # same, only wide in tables.
            NO_TITLE_ENTRY    = self.EPUB_NO_TITLE_ENTRY
            TITLE_PAGE_END    = self.EPUB_TITLE_PAGE_END

        if coverIO:
            outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue())
            coverIO.close()

        titlepageIO = StringIO.StringIO()
        self.writeTitlePage(out=titlepageIO,
                            START=TITLE_PAGE_START,
                            ENTRY=TITLE_ENTRY,
                            WIDE_ENTRY=WIDE_TITLE_ENTRY,
                            END=TITLE_PAGE_END,
                            NO_TITLE_ENTRY=NO_TITLE_ENTRY)
        if titlepageIO.getvalue(): # will be false if no title page.
            outputepub.writestr("OEBPS/title_page.xhtml",titlepageIO.getvalue())
        titlepageIO.close()

        # write toc page.
        tocpageIO = StringIO.StringIO()
        self.writeTOCPage(tocpageIO,
                          self.EPUB_TOC_PAGE_START,
                          self.EPUB_TOC_ENTRY,
                          self.EPUB_TOC_PAGE_END)
        if tocpageIO.getvalue(): # will be false if no toc page.
            outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue())
        tocpageIO.close()

        if dologpage:
            # write log page.
            logpageIO = StringIO.StringIO()
            self.writeLogPage(logpageIO)
            outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
            logpageIO.close()

        if self.hasConfig('chapter_start'):
            CHAPTER_START = string.Template(self.getConfig("chapter_start"))
        else:
            CHAPTER_START = self.EPUB_CHAPTER_START

        if self.hasConfig('chapter_end'):
            CHAPTER_END = string.Template(self.getConfig("chapter_end"))
        else:
            CHAPTER_END = self.EPUB_CHAPTER_END

        for index, chap in enumerate(self.story.getChapters()): # (url,title,html)
            if chap.html:
                chap_data = chap.html
                if self.getConfig('internalize_text_links'):
                    soup = bs4.BeautifulSoup(chap.html,'html5lib')
                    changed=False
                    for alink in soup.find_all('a'):
                        if alink.has_attr('href') and alink['href'] in chapurlmap:
                            alink['href']=chapurlmap[alink['href']]
                            changed=True
                    if changed:
                        chap_data = unicode(soup)
                        # Don't want html, head or body tags in
                        # chapter html--bs4 insists on adding them.
                        chap_data = re.sub(r"</?(html|head|body)[^>]*>\r?\n?","",chap_data)

                #logger.debug('Writing chapter text for: %s' % chap.title)
                vals={'url':removeEntities(chap.url),
                      'chapter':removeEntities(chap.title),
                      'origchapter':removeEntities(chap.origtitle),
                      'tocchapter':removeEntities(chap.toctitle),
                      'index':"%04d"%(index+1),
                      'number':index+1}
                # escape double quotes in all vals.
                for k,v in vals.items():
                    if isinstance(v,basestring): vals[k]=v.replace('"','&quot;')
                fullhtml = CHAPTER_START.substitute(vals) + \
                    chap_data.strip() + \
                    CHAPTER_END.substitute(vals)
                # strip to avoid ever growning numbers of newlines.
                # ffnet(& maybe others) gives the whole chapter text
                # as one line.  This causes problems for nook(at
                # least) when the chapter size starts getting big
                # (200k+)
                fullhtml = re.sub(r'(</p>|<br ?/>)\n*',r'\1\n',fullhtml)

                outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8'))
                del fullhtml

        if self.story.calibrebookmark:
            outputepub.writestr("META-INF/calibre_bookmarks.txt",self.story.calibrebookmark)

	# declares all the files created by Windows.  otherwise, when
        # it runs in appengine, windows unzips the files as 000 perms.
        for zf in outputepub.filelist:
            zf.create_system = 0
        outputepub.close()
        out.write(zipio.getvalue())
        zipio.close()

Example 144

Project: pycse
Source File: publish.py
View license
    def __init__(self, options):
        self.preamble = r"""
    \usepackage{{listings}}
    \usepackage{{color}}
    \usepackage{{graphicx}}
    \usepackage{{attachfile}}
    \usepackage{{amsmath}}
    \usepackage{{datetime}}
    % set some pdf metadata
    \pdfinfo{{
             /AndrewID ({andrewid})  
             /Assignment ({assignment})
             /Grade (ungraded)
             }}
    \definecolor{{darkgreen}}{{cmyk}}{{0.7, 0, 1, 0.5}}
    \definecolor{{darkblue}}{{cmyk}}{{1, 0.8, 0, 0}}
    \definecolor{{lightblue}}{{cmyk}}{{0.05,0,0,0.05}}
    \definecolor{{grey}}{{cmyk}}{{0.1,0.1,0.1,1}}
    \definecolor{{lightgrey}}{{cmyk}}{{0,0,0,0.5}}
    \definecolor{{purple}}{{cmyk}}{{0.8,1,0,0}}

\begin{{popupmenu}}{{myMenu}}
    \item{{title=A++}}
    \item{{title=A+}}
    \item{{title=A}}
    \item{{title=A-}}
    \item{{title=A/B}}
    \item{{title=B+}}
    \item{{title=B}}
    \item{{title=B-}}
    \item{{title=B/C}}
    \item{{title=C+}}
    \item{{title=C}}
    \item{{title=C-}}
    \item{{title=C/D}}
    \item{{title=D+}}
    \item{{title=D}}
    \item{{title=D-}}
    \item{{title=D/R}}
    \item{{title=R+}}
    \item{{title=R}}
    \item{{title=R-}}
    \item{{title=R--}}
\end{{popupmenu}}

\begin{{insDLJS}}[AeBMenu]{{md}}{{Menu Data}}
\myMenu
\end{{insDLJS}}

    \makeatletter
        \let\@oddfoot\@empty\let\@evenfoot\@empty
        \def\@evenhead{{\thepage\hfil\slshape\leftmark
                        {{\rule[-0.11cm]{{-\textwidth}}{{0.03cm}}
                        \rule[-0.11cm]{{\textwidth}}{{0.03cm}}}}}}
        \def\@oddhead{{{{\slshape\rightmark}}\hfil\thepage
                        {{\rule[-0.11cm]{{-\textwidth}}{{0.03cm}}
                        \rule[-0.11cm]{{\textwidth}}{{0.03cm}}}}}}
        \let\@mkboth\markboth
        \markright{{{{\bf {replacement} }}\hskip 3em  \today}}
        \def\maketitle{{
            \centerline{{\Large\bfseries\@title}}
            \bigskip
        }}
    \makeatother

    \lstset{{language=python,
            extendedchars=true,
            aboveskip = 0.5ex,
            belowskip = 0.6ex,
            basicstyle=\ttfamily,
            keywordstyle=\sffamily\bfseries,
            identifierstyle=\sffamily,
            commentstyle=\slshape\color{{darkgreen}},
            stringstyle=\rmfamily\color{{blue}},
            showstringspaces=false,
            tabsize=4,
            breaklines=true,
            numberstyle=\footnotesize\color{{grey}},
            classoffset=1,
            morekeywords={{eyes,zeros,zeros_like,ones,ones_like,array,rand,indentity,mat,vander}},keywordstyle=\color{{darkblue}},
            classoffset=2,
            otherkeywords={{[,],=,:}},keywordstyle=\color{{purple}}\bfseries,
            classoffset=0""".format(andrewid=data['ANDREWID'],
            assignment=data['ASSIGNMENT'],
            fullname=data['NAME'],
            replacement=( re.sub( "_", r'\\_', options.infilename) )) + options.latexescapes * r""",
            mathescape=true""" +"""
            }
    """

        if options.nocode:
            latex_column_sep = r"""
    \setlength\columnseprule{0.4pt}
    """
        else:
            latex_column_sep = ""


        latex_doublepage = r"""
    \usepackage[landscape,left=1.5cm,right=1.1cm,top=1.8cm,bottom=1.2cm]{geometry}
    \usepackage{multicol}
    \def\inputBlocksize{\small}
    \makeatletter
        \renewcommand\normalsize{%
        \@setfontsize\normalsize\@ixpt\@xipt%
        \abovedisplayskip 8\[email protected] \@plus4\[email protected] \@minus4\[email protected]
        \abovedisplayshortskip \[email protected] \@plus3\[email protected]
        \belowdisplayshortskip 5\[email protected] \@plus3\[email protected] \@minus3\[email protected]
        \belowdisplayskip \abovedisplayskip
        \let\@listi\@listI}
        \normalsize
        \renewcommand\small{%
        \@setfontsize\small\@viiipt\@ixpt%
        \abovedisplayskip 5\[email protected] \@plus2\[email protected] \@minus2\[email protected]
        \abovedisplayshortskip \[email protected] \@plus1\[email protected]
        \belowdisplayshortskip 3\[email protected] \@plus\[email protected] \@minus2\[email protected]
        \def\@listi{\leftmargin\leftmargini
                    \topsep 3\[email protected] \@plus\[email protected] \@minus\[email protected]
                    \parsep 2\[email protected] \@plus\[email protected] \@minus\[email protected]
                    \itemsep \parsep}%
        \belowdisplayskip \abovedisplayskip
        }
        \renewcommand\footnotesize{%
        \@setfontsize\footnotesize\@viipt\@viiipt
        \abovedisplayskip 4\[email protected] \@plus2\[email protected] \@minus2\[email protected]
        \abovedisplayshortskip \[email protected] \@plus1\[email protected]
        \belowdisplayshortskip 2.5\[email protected] \@plus\[email protected] \@minus\[email protected]
        \def\@listi{\leftmargin\leftmargini
                    \topsep 3\[email protected] \@plus\[email protected] \@minus\[email protected]
                    \parsep 2\[email protected] \@plus\[email protected] \@minus\[email protected]
                    \itemsep \parsep}%
        \belowdisplayskip \abovedisplayskip
        }
        \renewcommand\scriptsize{\@setfontsize\scriptsize\@vipt\@viipt}
        \renewcommand\tiny{\@setfontsize\tiny\@vpt\@vipt}
        \renewcommand\large{\@setfontsize\large\@xpt\@xiipt}
        \renewcommand\Large{\@setfontsize\Large\@xipt{13}}
        \renewcommand\LARGE{\@setfontsize\LARGE\@xiipt{14}}
        \renewcommand\huge{\@setfontsize\huge\@xivpt{18}}
        \renewcommand\Huge{\@setfontsize\Huge\@xviipt{22}}
        \setlength\parindent{14pt}
        \setlength\smallskipamount{3\[email protected] \@plus 1\[email protected] \@minus 1\[email protected]}
        \setlength\medskipamount{6\[email protected] \@plus 2\[email protected] \@minus 2\[email protected]}
        \setlength\bigskipamount{12\[email protected] \@plus 4\[email protected] \@minus 4\[email protected]}
        \setlength\headheight{12\[email protected]}
        \setlength\headsep   {25\[email protected]}
        \setlength\topskip   {9\[email protected]}
        \setlength\footskip{30\[email protected]}
        \setlength\maxdepth{.5\topskip}
    \makeatother

    \AtBeginDocument{
    \setlength\columnsep{1.1cm}
    """ + latex_column_sep + r"""
    \begin{multicols*}{2}
    \small}
    \AtEndDocument{\end{multicols*}}
    """

        if options.double:
            self.preamble += latex_doublepage
        else:
            pass
            self.preamble += r"""\usepackage[top=1in,bottom=1in,left=1in,right=1in]{geometry}
    \def\inputBlocksize{\normalsize}

        """    

        if options.outtype == "tex":
            self.compile = self.compile2tex
        else:
            self.compile = self.compile2pdf

Example 145

Project: madparts
Source File: eagle.py
View license
  def export_footprint(self, interim):
    # make a deep copy so we can make mods without harm
    interim = copy.deepcopy(interim)
    interim = self.add_ats_to_names(interim)
    interim = clean_floats(interim)
    meta = inter.get_meta(interim)
    name = eget(meta, 'name', 'Name not found')
    # make name eagle compatible
    name = re.sub(' ','_',name)
    # check if there is an existing package
    # and if so, replace it
    packages = self.soup.eagle.drawing.packages('package')
    package = None
    for some_package in packages:
      if some_package['name'].lower() == name.lower():
        package = some_package
        package.clear()
        break
    if package == None:
      package = self.soup.new_tag('package')
      self.soup.eagle.drawing.packages.append(package)
      package['name'] = name

    def pad(shape):
      pad = self.soup.new_tag('pad')
      pad['name'] = shape['name']
      # don't set layer in a pad, it is implicit
      pad['x'] = fget(shape, 'x')
      pad['y'] = fget(shape, 'y')
      drill = fget(shape, 'drill')
      pad['drill'] = drill
      pad['rot'] = "R%d" % (fget(shape, 'rot'))
      r = fget(shape, 'r')
      shape2 = 'disc' # disc is the default
      if 'shape' in shape:
        shape2 = shape['shape']
      if shape2 == 'disc':
        pad['shape'] = 'round'
        if f_neq(r, drill*1.5):
          pad['diameter'] = r*2
      elif shape2 == 'octagon':
        pad['shape'] = 'octagon'
        if f_neq(r, drill*1.5):
          pad['diameter'] = r*2
      elif shape2 == 'rect':
        ro = iget(shape, 'ro')
        if ro == 0: 
          pad['shape'] = 'square'
          if f_neq(shape['dx'], drill*1.5):
            pad['diameter'] = float(shape['dx'])
        elif 'drill_off_dx' in shape:
          pad['shape'] = 'offset'
          if f_neq(shape['dy'], drill*1.5):
            pad['diameter'] = float(shape['dy'])
        else:
          pad['shape'] = 'long'
          if f_neq(shape['dy'], drill*1.5):
            pad['diameter'] = float(shape['dy'])
      package.append(pad)

    def smd(shape):
      smd = self.soup.new_tag('smd')
      smd['name'] = shape['name']
      smd['x'] = fget(shape, 'x')
      smd['y'] = fget(shape, 'y')
      smd['dx'] = fget(shape, 'dx')
      smd['dy'] = fget(shape, 'dy')
      smd['roundness'] = iget(shape, 'ro')
      smd['rot'] = "R%d" % (fget(shape, 'rot'))
      smd['layer'] = type_to_layer_number('smd')
      package.append(smd)

    def hole(shape):
      hole = self.soup.new_tag('hole')
      hole['drill'] = fget(shape, 'drill')
      hole['x'] = fget(shape, 'x')
      hole['y'] = fget(shape, 'y')
      package.append(hole)

    def rect(shape, layer):
      rect = self.soup.new_tag('rectangle')
      x = fget(shape, 'x')
      y = fget(shape, 'y')
      dx = fget(shape, 'dx')
      dy = fget(shape, 'dy')
      rect['x1'] = x - dx/2
      rect['x2'] = x + dx/2
      rect['y1'] = y - dy/2
      rect['y2'] = y + dy/2
      rect['rot'] = "R%d" % (fget(shape, 'rot'))
      rect['layer'] = layer
      package.append(rect)

    def label(shape, layer):
      label = self.soup.new_tag('text')
      x = fget(shape,'x')
      y = fget(shape,'y')
      dy = fget(shape,'dy', 1)
      s = shape['value'].upper()
      if s == "NAME": 
        s = ">NAME"
        layer = type_to_layer_number('name')
      if s == "VALUE": 
        s = ">VALUE"
        layer = type_to_layer_number('value')
      label['x'] = x
      label['y'] = y
      label['size'] = dy
      label['layer'] = layer
      label['align'] = 'center'
      label.string = s
      package.append(label)
    
    # a disc is just a circle with a clever radius and width
    def disc(shape, layer):
      r = fget(shape, 'r')
      rx = fget(shape, 'rx', r)
      ry = fget(shape, 'ry', r)
      x = fget(shape,'x')
      y = fget(shape,'y')
      disc = self.soup.new_tag('circle')
      disc['x'] = x
      disc['y'] = y
      disc['radius'] = r/2
      disc['width'] = r/2
      disc['layer'] = layer
      package.append(disc)
  
    def circle(shape, layer):
      r = fget(shape, 'r')
      rx = fget(shape, 'rx', r)
      ry = fget(shape, 'ry', r)
      x = fget(shape,'x')
      y = fget(shape,'y')
      w = fget(shape,'w')
      if 'a1' in shape or 'a2' in shape:
        wire = self.soup.new_tag('wire')
        a1 = fget(shape, 'a1')
        a2 = fget(shape, 'a2')
        wire['width'] = w
        wire['curve'] = a2-a1
        a1 = a1 * math.pi / 180
        a2 = a2 * math.pi / 180
        wire['x1'] = x + r * math.cos(a1)
        wire['y1'] = y + r * math.sin(a1)
        wire['x2'] = x + r * math.cos(a2)
        wire['y2'] = y + r * math.sin(a2)
        wire['layer'] = layer
        package.append(wire)
      else:
        circle = self.soup.new_tag('circle')
        circle['x'] = x
        circle['y'] = y
        circle['radius'] = r
        circle['width'] = w
        circle['layer'] = layer
        package.append(circle)

    def line(shape, layer):
      x1 = fget(shape, 'x1')
      y1 = fget(shape, 'y1')
      x2 = fget(shape, 'x2')
      y2 = fget(shape, 'y2')
      w = fget(shape, 'w')
      line = self.soup.new_tag('wire')
      line['x1'] = x1
      line['y1'] = y1
      line['x2'] = x2
      line['y2'] = y2
      line['width'] = w
      line['layer'] = layer
      if 'curve' in shape:
        if shape['curve'] != 0.0:
          line['curve'] = fget(shape, 'curve')
      package.append(line)
  
    # eagle polygon format is somewhat wierd
    # each vertex is actually a starting point towards the next
    # where the last one is a starting point around towards the first
    #  <polygon width="0.127" layer="21">
    #  <vertex x="-1" y="-1"/>
    #  <vertex x="-1" y="1"/>
    #  <vertex x="0" y="1" curve="-90"/>
    #  <vertex x="1" y="0" curve="-90"/>
    #  <vertex x="0" y="-1"/>
    #  </polygon>
    def polygon(shape, layer):
      p = self.soup.new_tag('polygon')
      p['width'] = fget(shape, 'w')
      p['layer'] = layer
      for v in shape['v']:
        vert = self.soup.new_tag('vertex')
        vert['x'] = fget(v, 'x1')
        vert['y'] = fget(v, 'y1')
        curve = fget(v, 'curve')
        if curve != 0.0:
          vert['curve'] = fget(v, 'curve')
        p.append(vert)
      package.append(p)

    def silk(shape):
      if not 'shape' in shape: return
      layer = type_to_layer_number(shape['type'])
      s = shape['shape']
      if s == 'line' or s == 'vertex': line(shape, layer)
      elif s == 'circle': circle(shape, layer)
      elif s == 'disc': disc(shape, layer)
      elif s == 'label': label(shape, layer)
      elif s == 'rect': rect(shape, layer)
      elif s == 'polygon': polygon(shape, layer)

    def unknown(shape):
      pass

    desc = oget(meta, 'desc', '')
    parent_idx = oget(meta, 'parent', None)
    description = self.soup.new_tag('description')
    package.append(description)
    description.string = desc + "\n<br/><br/>\nGenerated by 'madparts'.<br/>\n"

    # TODO rework to be shape+type based ?
    for shape in interim:
      if 'type' in shape:
        {
          'pad': pad,
          'silk': silk,
          'cu': silk,
          'docu': silk,
          'keepout': silk,
          'stop': silk,
          'restrict': silk,
          'vrestrict': silk,
          'smd': smd,
          'hole': hole,
          }.get(shape['type'], unknown)(shape)
    return name

Example 146

Project: madparts
Source File: eagle.py
View license
  def import_footprint(self, name):

    packages = self.soup.eagle.drawing.packages('package')
    package = None
    for p in packages:
       if p['name'] == name:
         package = p
         break
    if package == None:
      raise Exception("footprint not found %s " % (name))
    meta = {}
    meta['type'] = 'meta'
    meta['name'] = name
    meta['desc'] = None
    l = [meta]

    def clean_name(name):
      if len(name) > 2 and name[0:1] == 'P$':
        name = name[2:]
        # this might cause name clashes :(
        # get rid of @ suffixes
      return re.sub('@\d+$', '', name)

    def text(text):
      res = {}
      res['type'] = 'silk'
      res['shape'] = 'label'
      s = text.string
      layer = int(text['layer'])
      size = float(text['size'])
      align = 'bottom-left' # eagle default
      if text.has_attr('align'):
        align = text['align']
      if align == 'center':
        x = float(text['x'])
        y = float(text['y'])
      elif align == 'bottom-left':
        x = float(text['x']) + len(s)*size/2
        y = float(text['y']) + size/2
      else:
        # TODO deal with all other cases; assuming center
        # eagle supports: bottom-left | bottom-center | bottom-right | center-left | center | center-right | top-left | top-center | top-right
        x = float(text['x']) + len(s)*size/2
        y = float(text['y']) + size/2
      res['value'] = s
      if layer == 25 and s.upper() == '>NAME':
        res['value'] = 'NAME'
      elif layer == 27 and s.upper() == '>VALUE':
        res['value'] = 'VALUE'
      if x != 0: res['x'] = x
      if y != 0: res['y'] = y
      if text.has_attr('size'):
        res['dy'] = text['size']
      return res

    def smd(smd):
      res = {}
      res['type'] = 'smd'
      res['shape'] = 'rect'
      res['name'] = clean_name(smd['name'])
      res['dx'] = float(smd['dx'])
      res['dy'] = float(smd['dy'])
      res['x'] = float(smd['x'])
      res['y'] = float(smd['y'])
      if smd.has_attr('rot'):
        res['rot'] = int(smd['rot'][1:])
      if smd.has_attr('roundness'):
        if smd['roundness'] != '0':
          res['ro'] = smd['roundness']
      return res

    def rect(rect):
      res = {}
      res['type'] = layer_number_to_type(int(rect['layer']))
      res['shape'] = 'rect'
      x1 = float(rect['x1'])
      y1 = float(rect['y1'])
      x2 = float(rect['x2'])
      y2 = float(rect['y2'])
      res['x'] = (x1+x2)/2
      res['y'] = (y1+y2)/2
      res['dx'] = abs(x1-x2)
      res['dy'] = abs(y1-y2)
      if rect.has_attr('rot'):
        res['rot'] = int(rect['rot'][1:])
      return res

    def wire(wire):
      res = {}
      res['type'] = layer_number_to_type(int(wire['layer']))
      res['shape'] = 'vertex'
      res['x1'] = float(wire['x1'])
      res['y1'] = float(wire['y1'])
      res['x2'] = float(wire['x2'])
      res['y2'] = float(wire['y2'])
      res['w'] = float(wire['width'])
      # assymetry here: an Arc exported will come back as
      # a curved vertex
      if wire.has_attr('curve'):
        res['curve'] = float(wire['curve'])
      return res

    def circle(circle):
      res = {}
      res['type'] = layer_number_to_type(int(circle['layer']))
      res['shape'] = 'circle'
      w = float(circle['width'])
      r = float(circle['radius'])
      res['x'] = float(circle['x'])
      res['y'] = float(circle['y'])
      if w == r:
        res['shape'] = 'disc'
        res['r'] = r * 2
      else:
        res['w'] = w
        res['r'] = r
      return res

    def description(desc):
      meta['desc'] = desc.string
      return None

    def pad(pad):
      res = {}
      res['type'] = 'pad'
      res['name'] = clean_name(pad['name'])
      res['x'] = float(pad['x'])
      res['y'] = float(pad['y'])
      drill = float(pad['drill'])
      res['drill'] = drill
      if pad.has_attr('diameter'):
        dia = float(pad['diameter'])
      else:
        dia = res['drill'] * 1.5
      if pad.has_attr('rot'):
        res['rot'] = int(pad['rot'][1:])
      shape = 'round'
      if pad.has_attr('shape'):
        shape = pad['shape']
      if shape == 'round':
        res['shape'] = 'disc'
        res['r'] = 0.0
        #if dia/2 > drill:
        res['r'] = dia/2
      elif shape == 'square':
        res['shape'] = 'rect'
        res['dx'] = dia
        res['dy'] = dia
      elif shape == 'long':
        res['shape'] = 'rect'
        res['ro'] = 100
        res['dx'] = 2*dia
        res['dy'] = dia
      elif shape == 'offset':
        res['shape'] = 'rect'
        res['ro'] = 100
        res['dx'] = 2*dia
        res['dy'] = dia
        res['drill_off_dx'] = -dia/2
      elif shape == 'octagon':
        res['shape'] = 'octagon'
        res['r'] = dia/2
      return res
     
    def hole(x):
      res = {'type':'hole', 'shape':'hole'}
      res['drill'] = fget(x,'drill')
      res['x'] = fget(x,'x')
      res['y'] = fget(x,'y')
      return res

    def polygon(x):
      res = { 'shape':'polygon' }
      res['w'] = float(x['width'])
      res['type'] = layer_number_to_type(int(x['layer']))
      res['v'] = []
      for e in x.find_all('vertex'):
        vert = { 'shape':'vertex' }
        vert['x1'] = float(e['x'])
        vert['y1'] = float(e['y'])
        if e.has_attr('curve'):
          vert['curve'] = float(e['curve'])
        res['v'].append(vert)
      l = len(res['v'])
      for i in range(0, l):
        res['v'][i]['x2'] = res['v'][i-l+1]['x1']
        res['v'][i]['y2'] = res['v'][i-l+1]['y1']
      return res

    def unknown(x):
      res = {}
      res['type'] = 'unknown'
      res['value'] = str(x)
      res['shape'] = 'unknown'
      return res

    for x in package.contents:
      if type(x) == Tag:
        result = {
          'circle': circle,
          'description': description,
          'pad': pad,
          'smd': smd,
          'text': text,
          'wire': wire,
          'rectangle': rect,
          'polygon': polygon,
          'hole': hole,
          }.get(x.name, unknown)(x)
        if result != None: l.append(result)
    return clean_floats(l)

Example 147

Project: madparts
Source File: eagle.py
View license
  def import_footprint(self, name):

    packages = self.soup.eagle.drawing.packages('package')
    package = None
    for p in packages:
       if p['name'] == name:
         package = p
         break
    if package == None:
      raise Exception("footprint not found %s " % (name))
    meta = {}
    meta['type'] = 'meta'
    meta['name'] = name
    meta['desc'] = None
    l = [meta]

    def clean_name(name):
      if len(name) > 2 and name[0:1] == 'P$':
        name = name[2:]
        # this might cause name clashes :(
        # get rid of @ suffixes
      return re.sub('@\d+$', '', name)

    def text(text):
      res = {}
      res['type'] = 'silk'
      res['shape'] = 'label'
      s = text.string
      layer = int(text['layer'])
      size = float(text['size'])
      align = 'bottom-left' # eagle default
      if text.has_attr('align'):
        align = text['align']
      if align == 'center':
        x = float(text['x'])
        y = float(text['y'])
      elif align == 'bottom-left':
        x = float(text['x']) + len(s)*size/2
        y = float(text['y']) + size/2
      else:
        # TODO deal with all other cases; assuming center
        # eagle supports: bottom-left | bottom-center | bottom-right | center-left | center | center-right | top-left | top-center | top-right
        x = float(text['x']) + len(s)*size/2
        y = float(text['y']) + size/2
      res['value'] = s
      if layer == 25 and s.upper() == '>NAME':
        res['value'] = 'NAME'
      elif layer == 27 and s.upper() == '>VALUE':
        res['value'] = 'VALUE'
      if x != 0: res['x'] = x
      if y != 0: res['y'] = y
      if text.has_attr('size'):
        res['dy'] = text['size']
      return res

    def smd(smd):
      res = {}
      res['type'] = 'smd'
      res['shape'] = 'rect'
      res['name'] = clean_name(smd['name'])
      res['dx'] = float(smd['dx'])
      res['dy'] = float(smd['dy'])
      res['x'] = float(smd['x'])
      res['y'] = float(smd['y'])
      if smd.has_attr('rot'):
        res['rot'] = int(smd['rot'][1:])
      if smd.has_attr('roundness'):
        if smd['roundness'] != '0':
          res['ro'] = smd['roundness']
      return res

    def rect(rect):
      res = {}
      res['type'] = layer_number_to_type(int(rect['layer']))
      res['shape'] = 'rect'
      x1 = float(rect['x1'])
      y1 = float(rect['y1'])
      x2 = float(rect['x2'])
      y2 = float(rect['y2'])
      res['x'] = (x1+x2)/2
      res['y'] = (y1+y2)/2
      res['dx'] = abs(x1-x2)
      res['dy'] = abs(y1-y2)
      if rect.has_attr('rot'):
        res['rot'] = int(rect['rot'][1:])
      return res

    def wire(wire):
      res = {}
      res['type'] = layer_number_to_type(int(wire['layer']))
      res['shape'] = 'vertex'
      res['x1'] = float(wire['x1'])
      res['y1'] = float(wire['y1'])
      res['x2'] = float(wire['x2'])
      res['y2'] = float(wire['y2'])
      res['w'] = float(wire['width'])
      # assymetry here: an Arc exported will come back as
      # a curved vertex
      if wire.has_attr('curve'):
        res['curve'] = float(wire['curve'])
      return res

    def circle(circle):
      res = {}
      res['type'] = layer_number_to_type(int(circle['layer']))
      res['shape'] = 'circle'
      w = float(circle['width'])
      r = float(circle['radius'])
      res['x'] = float(circle['x'])
      res['y'] = float(circle['y'])
      if w == r:
        res['shape'] = 'disc'
        res['r'] = r * 2
      else:
        res['w'] = w
        res['r'] = r
      return res

    def description(desc):
      meta['desc'] = desc.string
      return None

    def pad(pad):
      res = {}
      res['type'] = 'pad'
      res['name'] = clean_name(pad['name'])
      res['x'] = float(pad['x'])
      res['y'] = float(pad['y'])
      drill = float(pad['drill'])
      res['drill'] = drill
      if pad.has_attr('diameter'):
        dia = float(pad['diameter'])
      else:
        dia = res['drill'] * 1.5
      if pad.has_attr('rot'):
        res['rot'] = int(pad['rot'][1:])
      shape = 'round'
      if pad.has_attr('shape'):
        shape = pad['shape']
      if shape == 'round':
        res['shape'] = 'disc'
        res['r'] = 0.0
        #if dia/2 > drill:
        res['r'] = dia/2
      elif shape == 'square':
        res['shape'] = 'rect'
        res['dx'] = dia
        res['dy'] = dia
      elif shape == 'long':
        res['shape'] = 'rect'
        res['ro'] = 100
        res['dx'] = 2*dia
        res['dy'] = dia
      elif shape == 'offset':
        res['shape'] = 'rect'
        res['ro'] = 100
        res['dx'] = 2*dia
        res['dy'] = dia
        res['drill_off_dx'] = -dia/2
      elif shape == 'octagon':
        res['shape'] = 'octagon'
        res['r'] = dia/2
      return res
     
    def hole(x):
      res = {'type':'hole', 'shape':'hole'}
      res['drill'] = fget(x,'drill')
      res['x'] = fget(x,'x')
      res['y'] = fget(x,'y')
      return res

    def polygon(x):
      res = { 'shape':'polygon' }
      res['w'] = float(x['width'])
      res['type'] = layer_number_to_type(int(x['layer']))
      res['v'] = []
      for e in x.find_all('vertex'):
        vert = { 'shape':'vertex' }
        vert['x1'] = float(e['x'])
        vert['y1'] = float(e['y'])
        if e.has_attr('curve'):
          vert['curve'] = float(e['curve'])
        res['v'].append(vert)
      l = len(res['v'])
      for i in range(0, l):
        res['v'][i]['x2'] = res['v'][i-l+1]['x1']
        res['v'][i]['y2'] = res['v'][i-l+1]['y1']
      return res

    def unknown(x):
      res = {}
      res['type'] = 'unknown'
      res['value'] = str(x)
      res['shape'] = 'unknown'
      return res

    for x in package.contents:
      if type(x) == Tag:
        result = {
          'circle': circle,
          'description': description,
          'pad': pad,
          'smd': smd,
          'text': text,
          'wire': wire,
          'rectangle': rect,
          'polygon': polygon,
          'hole': hole,
          }.get(x.name, unknown)(x)
        if result != None: l.append(result)
    return clean_floats(l)

Example 148

Project: madparts
Source File: kicad_old.py
View license
  def export_footprint(self, interim, timestamp = None):
    if timestamp is None:
      timestamp = time.time()
    meta = inter.get_meta(interim)
    name = eget(meta, 'name', 'Name not found')
    # make name kicad compatible
    name = re.sub(' ','_',name)
    self.name = name
    desc = oget(meta, 'desc', '')
    parent_idx = oget(meta, 'parent', None)
    d = []
    # generate kicad-old for individual footprint
    # use metric; convert to imperial in "save" if needed
    d.append("$MODULE %s" % (name))
    d.append("Po 0 0 0 15 %x 00000000 ~~" % timestamp)
    d.append("Li %s" % (name))
    d.append("Cd %s" % (desc.replace('\n',' '))) # ugly
    # assuming Kw is optional
    d.append("Sc 0")
    # d.append("AR ") assume AR is optional
    d.append("Op 0 0 0")
    # if no T0 or T1 are specified, kicad defaults to this:
    # right now I'm assuming this is optional
    # T0 0 0 1.524 1.524 0 0.15 N V 21 N ""
    # T1 0 0 1.524 1.524 0 0.15 N V 21 N ""

    def pad(shape, smd=False):
      l = ['$PAD']
      # Sh "<pad name>" shape Xsize Ysize Xdelta Ydelta Orientation
      # octagons are not supported by kicad
      if shape['shape'] == 'disc' or shape['shape'] == 'octagon':
        sh = 'C'
        dx = shape['r']*2
        dy = dx
      elif shape['shape'] == 'rect':
        dx = shape['dx']
        dy = shape['dy']
        sh = 'R'
        # any rect with roundness >= 50 is considered kicad 'oblong'
        if 'ro' in shape:
          if shape['ro'] >= 50:
            sh = 'O'
      rot = fget(shape, 'rot')*10
      l.append("Sh \"%s\" %s %s %s %s %s %s" 
               % (shape['name'], sh, dx, dy, 0, 0, rot))
      # Dr <Pad drill> Xoffset Yoffset (round hole)
      if not smd:
        l.append("Dr %s %s %s" % (fget(shape, 'drill'), 
          fget(shape, 'drill_off_dx'), fc(-fget(shape, 'drill_off_dy'))))
      # Po <x> <y>
      l.append("Po %s %s" % (fget(shape,'x'), fc(-fget(shape,'y'))))
      # At <Pad type> N <layer mask>
      t = 'STD'
      layer_mask = '00E0FFFF'
      if smd:
        t = 'SMD'
        layer_mask = '00888000'
      l.append("At %s N %s" % (t, layer_mask))
      l.append('Ne 0 ""')
      l.append("$ENDPAD")
      return l

    # DS Xstart Ystart Xend Yend Width Layer
    # DS 6000 1500 6000 -1500 120 21
    # DA Xcentre Ycentre Xstart_point Ystart_point angle width layer
    def vertex(shape, layer):
      l = []
      x1 = fget(shape, 'x1')
      y1 = fget(shape, 'y1')
      x2 = fget(shape, 'x2')
      y2 = fget(shape, 'y2')
      w = fget(shape, 'w')
      if not 'curve' in shape or shape['curve'] == 0.0:
        line = "DS %s %s %s %s %s %s" % (x1, fc(-y1), x2, fc(-y2), w, layer)
        l.append(line)
      else:
        curve =  fget(shape, 'curve')
        angle = curve*math.pi/180.0
        ((x0, y0), r, a1, a2) = mutil.calc_center_r_a1_a2((x1,y1),(x2,y2),angle)
        arc = "DA %f %f %f %f %s %s %s" % (x0, fc(-y0), x1, fc(-y1), int(round((-10.0)*(a2-a1))), w, layer)
        l.append(arc)
      return l

    # DC Xcentre Ycentre Xpoint Ypoint Width Layer
    def circle(shape, layer):
      l = []
      x = fget(shape, 'x')
      y = fget(shape, 'y')
      r = fget(shape, 'r')
      w = fget(shape, 'w')
      if not 'a1' in shape and not 'a2' in shape:
        circle = "DC %s %s %s %s %s %s" % (x, fc(-y), fc(x+(r/math.sqrt(2))), fc(-y+(r/math.sqrt(2))), w, layer)
        l.append(circle)
      else:
        # start == center point
        # end == start point of arc
        # angle == angled part in that direction
        a1 = fget(shape, 'a1')
        a2 = fget(shape, 'a2')
        a1rad = a1 * math.pi/180.0
        ex = x + r*math.cos(a1rad)
        ey = y + r*math.sin(a1rad)
        arc = "DA %f %f %f %f %s %s %s" % (x, fc(-y), ex, fc(-ey), int(round((-10)*(a2-a1))), w, layer)
        l.append(arc)
      return l
 
    def disc(shape, layer):
      l = []
      x = fget(shape, 'x')
      y = -fget(shape, 'y')
      r = fget(shape, 'r')
      rad = r/2
      ex = x+(rad/math.sqrt(2))
      ey = y+(rad/math.sqrt(2))
      circle = "DC %s %s %s %s %s %s" % (x, fc(y), fc(ex), fc(ey), rad, layer)
      l.append(circle)
      return l

    def hole(shape):
      layer = type_to_num(shape['type'])
      shape['r'] = shape['drill'] / 2
      return circle(shape, layer)

    # T0 -79 -3307 600 600 0 120 N V 21 N "XT"
    def label(shape, layer):
      s = shape['value'].upper()
      t = 'T2'
      visible = 'V'
      if s == 'VALUE': 
        t = 'T1'
        visible = 'I'
      if s == 'NAME': 
        t = 'T0'
      dy = fget(shape, 'dy', 1/1.6)
      w = fget(shape, 'w', 0.1)
      x = fget(shape, 'x')
      y = fc(-fget(shape, 'y'))
      # T0 -7 -3 60 60 0 10 N V 21 N "XT"
      line = "%s %s %s %s %s 0 %s N %s %s N \"%s\"" % (t, x, y, dy, dy, w, visible, layer, shape['value'])
      return [line]

    def rect(shape, layer):
      print "rect", shape, layer
      l = []
      w = fget(shape,'w')
      rot = abs(fget(shape, 'rot'))
      l.append("DP 0 0 0 0 %s %s %s" % (5, w, layer))
      x = fget(shape, 'x')
      y = -fget(shape, 'y')
      dx = fget(shape, 'dx')
      dy = fget(shape, 'dy')
      if rot == 90 or rot == 270:
        (dx,dy) = (dy,dx)
      def add(x1, y1):
        l.append("Dl %f %f" % (fc(x1), fc(y1)))
      add(x - dx/2, y - dy/2)
      add(x - dx/2, y + dy/2)
      add(x + dx/2, y + dy/2)
      add(x + dx/2, y - dy/2)
      add(x - dx/2, y - dy/2)
      return l
 
    # DP 0 0 0 0 corners_count width layer
    # DP 0 0 0 0 5 0.1 24
    # Dl corner_posx corner_posy
    # Dl 0 -1
    def polygon(shape, layer):
      l = []
      n = len(shape['v'])
      w = shape['w']
      l.append("DP 0 0 0 0 %s %s %s" % (n + 1, w, layer))
      for v in shape['v']:
        l.append("Dl %g %g" % (fc(v['x1']), fc(-v['y1'])))
      l.append("Dl %g %g" % (fc(shape['v'][0]['x1']), fc(-shape['v'][0]['y1'])))
      return l

    def silk(shape):
      if not 'shape' in shape: return None
      layer = type_to_num(shape['type'])
      s = shape['shape']
      if s == 'line': return vertex(shape, layer)
      if s == 'vertex': return vertex(shape, layer)
      elif s == 'circle': return circle(shape, layer)
      elif s == 'disc': return disc(shape, layer)
      elif s == 'label': return label(shape, layer)
      elif s == 'rect': return rect(shape, layer)
      elif s == 'polygon': return polygon(shape, layer) 

    def unknown(shape):
      return None

    for shape in interim:
      if 'type' in shape:
        l = {
          'pad': pad,
          'cu': silk,
          'silk': silk,
          'docu': silk,
          'keepout': unknown,
          'stop': silk,
          'glue': silk,
          'restrict': unknown,
          'vrestrict': unknown,
          'smd': lambda s: pad(s, smd=True),
          'hole': hole,
          }.get(shape['type'], unknown)(shape)
        if l != None:
         d += l
      
    d.append("$EndMODULE %s" % (name))
    self.data = d
    return name

Example 149

Project: madparts
Source File: kicad_old.py
View license
  def export_footprint(self, interim, timestamp = None):
    if timestamp is None:
      timestamp = time.time()
    meta = inter.get_meta(interim)
    name = eget(meta, 'name', 'Name not found')
    # make name kicad compatible
    name = re.sub(' ','_',name)
    self.name = name
    desc = oget(meta, 'desc', '')
    parent_idx = oget(meta, 'parent', None)
    d = []
    # generate kicad-old for individual footprint
    # use metric; convert to imperial in "save" if needed
    d.append("$MODULE %s" % (name))
    d.append("Po 0 0 0 15 %x 00000000 ~~" % timestamp)
    d.append("Li %s" % (name))
    d.append("Cd %s" % (desc.replace('\n',' '))) # ugly
    # assuming Kw is optional
    d.append("Sc 0")
    # d.append("AR ") assume AR is optional
    d.append("Op 0 0 0")
    # if no T0 or T1 are specified, kicad defaults to this:
    # right now I'm assuming this is optional
    # T0 0 0 1.524 1.524 0 0.15 N V 21 N ""
    # T1 0 0 1.524 1.524 0 0.15 N V 21 N ""

    def pad(shape, smd=False):
      l = ['$PAD']
      # Sh "<pad name>" shape Xsize Ysize Xdelta Ydelta Orientation
      # octagons are not supported by kicad
      if shape['shape'] == 'disc' or shape['shape'] == 'octagon':
        sh = 'C'
        dx = shape['r']*2
        dy = dx
      elif shape['shape'] == 'rect':
        dx = shape['dx']
        dy = shape['dy']
        sh = 'R'
        # any rect with roundness >= 50 is considered kicad 'oblong'
        if 'ro' in shape:
          if shape['ro'] >= 50:
            sh = 'O'
      rot = fget(shape, 'rot')*10
      l.append("Sh \"%s\" %s %s %s %s %s %s" 
               % (shape['name'], sh, dx, dy, 0, 0, rot))
      # Dr <Pad drill> Xoffset Yoffset (round hole)
      if not smd:
        l.append("Dr %s %s %s" % (fget(shape, 'drill'), 
          fget(shape, 'drill_off_dx'), fc(-fget(shape, 'drill_off_dy'))))
      # Po <x> <y>
      l.append("Po %s %s" % (fget(shape,'x'), fc(-fget(shape,'y'))))
      # At <Pad type> N <layer mask>
      t = 'STD'
      layer_mask = '00E0FFFF'
      if smd:
        t = 'SMD'
        layer_mask = '00888000'
      l.append("At %s N %s" % (t, layer_mask))
      l.append('Ne 0 ""')
      l.append("$ENDPAD")
      return l

    # DS Xstart Ystart Xend Yend Width Layer
    # DS 6000 1500 6000 -1500 120 21
    # DA Xcentre Ycentre Xstart_point Ystart_point angle width layer
    def vertex(shape, layer):
      l = []
      x1 = fget(shape, 'x1')
      y1 = fget(shape, 'y1')
      x2 = fget(shape, 'x2')
      y2 = fget(shape, 'y2')
      w = fget(shape, 'w')
      if not 'curve' in shape or shape['curve'] == 0.0:
        line = "DS %s %s %s %s %s %s" % (x1, fc(-y1), x2, fc(-y2), w, layer)
        l.append(line)
      else:
        curve =  fget(shape, 'curve')
        angle = curve*math.pi/180.0
        ((x0, y0), r, a1, a2) = mutil.calc_center_r_a1_a2((x1,y1),(x2,y2),angle)
        arc = "DA %f %f %f %f %s %s %s" % (x0, fc(-y0), x1, fc(-y1), int(round((-10.0)*(a2-a1))), w, layer)
        l.append(arc)
      return l

    # DC Xcentre Ycentre Xpoint Ypoint Width Layer
    def circle(shape, layer):
      l = []
      x = fget(shape, 'x')
      y = fget(shape, 'y')
      r = fget(shape, 'r')
      w = fget(shape, 'w')
      if not 'a1' in shape and not 'a2' in shape:
        circle = "DC %s %s %s %s %s %s" % (x, fc(-y), fc(x+(r/math.sqrt(2))), fc(-y+(r/math.sqrt(2))), w, layer)
        l.append(circle)
      else:
        # start == center point
        # end == start point of arc
        # angle == angled part in that direction
        a1 = fget(shape, 'a1')
        a2 = fget(shape, 'a2')
        a1rad = a1 * math.pi/180.0
        ex = x + r*math.cos(a1rad)
        ey = y + r*math.sin(a1rad)
        arc = "DA %f %f %f %f %s %s %s" % (x, fc(-y), ex, fc(-ey), int(round((-10)*(a2-a1))), w, layer)
        l.append(arc)
      return l
 
    def disc(shape, layer):
      l = []
      x = fget(shape, 'x')
      y = -fget(shape, 'y')
      r = fget(shape, 'r')
      rad = r/2
      ex = x+(rad/math.sqrt(2))
      ey = y+(rad/math.sqrt(2))
      circle = "DC %s %s %s %s %s %s" % (x, fc(y), fc(ex), fc(ey), rad, layer)
      l.append(circle)
      return l

    def hole(shape):
      layer = type_to_num(shape['type'])
      shape['r'] = shape['drill'] / 2
      return circle(shape, layer)

    # T0 -79 -3307 600 600 0 120 N V 21 N "XT"
    def label(shape, layer):
      s = shape['value'].upper()
      t = 'T2'
      visible = 'V'
      if s == 'VALUE': 
        t = 'T1'
        visible = 'I'
      if s == 'NAME': 
        t = 'T0'
      dy = fget(shape, 'dy', 1/1.6)
      w = fget(shape, 'w', 0.1)
      x = fget(shape, 'x')
      y = fc(-fget(shape, 'y'))
      # T0 -7 -3 60 60 0 10 N V 21 N "XT"
      line = "%s %s %s %s %s 0 %s N %s %s N \"%s\"" % (t, x, y, dy, dy, w, visible, layer, shape['value'])
      return [line]

    def rect(shape, layer):
      print "rect", shape, layer
      l = []
      w = fget(shape,'w')
      rot = abs(fget(shape, 'rot'))
      l.append("DP 0 0 0 0 %s %s %s" % (5, w, layer))
      x = fget(shape, 'x')
      y = -fget(shape, 'y')
      dx = fget(shape, 'dx')
      dy = fget(shape, 'dy')
      if rot == 90 or rot == 270:
        (dx,dy) = (dy,dx)
      def add(x1, y1):
        l.append("Dl %f %f" % (fc(x1), fc(y1)))
      add(x - dx/2, y - dy/2)
      add(x - dx/2, y + dy/2)
      add(x + dx/2, y + dy/2)
      add(x + dx/2, y - dy/2)
      add(x - dx/2, y - dy/2)
      return l
 
    # DP 0 0 0 0 corners_count width layer
    # DP 0 0 0 0 5 0.1 24
    # Dl corner_posx corner_posy
    # Dl 0 -1
    def polygon(shape, layer):
      l = []
      n = len(shape['v'])
      w = shape['w']
      l.append("DP 0 0 0 0 %s %s %s" % (n + 1, w, layer))
      for v in shape['v']:
        l.append("Dl %g %g" % (fc(v['x1']), fc(-v['y1'])))
      l.append("Dl %g %g" % (fc(shape['v'][0]['x1']), fc(-shape['v'][0]['y1'])))
      return l

    def silk(shape):
      if not 'shape' in shape: return None
      layer = type_to_num(shape['type'])
      s = shape['shape']
      if s == 'line': return vertex(shape, layer)
      if s == 'vertex': return vertex(shape, layer)
      elif s == 'circle': return circle(shape, layer)
      elif s == 'disc': return disc(shape, layer)
      elif s == 'label': return label(shape, layer)
      elif s == 'rect': return rect(shape, layer)
      elif s == 'polygon': return polygon(shape, layer) 

    def unknown(shape):
      return None

    for shape in interim:
      if 'type' in shape:
        l = {
          'pad': pad,
          'cu': silk,
          'silk': silk,
          'docu': silk,
          'keepout': unknown,
          'stop': silk,
          'glue': silk,
          'restrict': unknown,
          'vrestrict': unknown,
          'smd': lambda s: pad(s, smd=True),
          'hole': hole,
          }.get(shape['type'], unknown)(shape)
        if l != None:
         d += l
      
    d.append("$EndMODULE %s" % (name))
    self.data = d
    return name

Example 150

Project: papercut
Source File: phpbb_mysql.py
View license
    def do_POST(self, group_name, lines, ip_address, username=''):
        forum_id = self.get_forum(group_name)
        prefix = settings.phpbb_table_prefix
        # patch by Andreas Wegmann <[email protected]> to fix the handling of unusual encodings of messages
        lines = mime_decode_header(re.sub(q_quote_multiline, "=?\\1?Q?\\2\\3?=", lines))
        body = self.get_message_body(lines)
        author, email = from_regexp.search(lines, 0).groups()
        subject = subject_regexp.search(lines, 0).groups()[0].strip()
        # get the authentication information now
        if username != '':
            stmt = """
                    SELECT
                        user_id
                    FROM
                        %susers
                    WHERE
                        username='%s'""" % (prefix, username)
            num_rows = self.cursor.execute(stmt)
            if num_rows == 0:
                poster_id = -1
            else:
                poster_id = self.cursor.fetchone()[0]
            post_username = ''
        else:
            poster_id = -1
            post_username = author
        if lines.find('References') != -1:
            # get the 'modifystamp' value from the parent (if any)
            references = references_regexp.search(lines, 0).groups()
            parent_id, void = references[-1].strip().split('@')
            stmt = """
                    SELECT
                        topic_id
                    FROM
                        %sposts
                    WHERE
                        post_id=%s
                    GROUP BY
                        post_id""" % (prefix, parent_id)
            num_rows = self.cursor.execute(stmt)
            if num_rows == 0:
                return None
            thread_id = self.cursor.fetchone()[0]
        else:
            # create a new topic
            stmt = """
                    INSERT INTO
                        %stopics
                    (
                        forum_id,
                        topic_title,
                        topic_poster,
                        topic_time,
                        topic_status,
                        topic_vote,
                        topic_type
                    ) VALUES (
                        %s,
                        '%s',
                        %s,
                        UNIX_TIMESTAMP(),
                        0,
                        0,
                        0
                    )""" % (prefix, forum_id, self.quote_string(subject), poster_id)
            self.cursor.execute(stmt)
            thread_id = self.cursor.insert_id()
        stmt = """
                INSERT INTO
                    %sposts
                (
                    topic_id,
                    forum_id,
                    poster_id,
                    post_time,
                    poster_ip,
                    post_username,
                    enable_bbcode,
                    enable_html,
                    enable_smilies,
                    enable_sig
                ) VALUES (
                    %s,
                    %s,
                    %s,
                    UNIX_TIMESTAMP(),
                    '%s',
                    '%s',
                    1,
                    0,
                    1,
                    0
                )""" % (prefix, thread_id, forum_id, poster_id, self.encode_ip(ip_address), post_username)
        self.cursor.execute(stmt)
        new_id = self.cursor.insert_id()
        if not new_id:
            return None
        else:
            # insert into the '*posts_text' table
            stmt = """
                    INSERT INTO
                        %sposts_text
                    (
                        post_id,
                        bbcode_uid,
                        post_subject,
                        post_text
                    ) VALUES (
                        %s,
                        '%s',
                        '%s',
                        '%s'
                    )""" % (prefix, new_id, self.make_bbcode_uid(), self.quote_string(subject), self.quote_string(body))
            if not self.cursor.execute(stmt):
                # delete from 'topics' and 'posts' tables before returning...
                stmt = """
                        DELETE FROM
                            %stopics
                        WHERE
                            topic_id=%s""" % (prefix, thread_id)
                self.cursor.execute(stmt)
                stmt = """
                        DELETE FROM
                            %sposts
                        WHERE
                            post_id=%s""" % (prefix, new_id)
                self.cursor.execute(stmt)
                return None
            else:
                if lines.find('References') != -1:
                    # update the total number of posts in the forum
                    stmt = """
                            UPDATE
                                %sforums
                            SET
                                forum_posts=forum_posts+1,
                                forum_last_post_id=%s
                            WHERE
                                forum_id=%s
                            """ % (settings.phpbb_table_prefix, new_id, forum_id)
                    self.cursor.execute(stmt)
                else:
                    # update the total number of topics and posts in the forum
                    stmt = """
                            UPDATE
                                %sforums
                            SET
                                forum_topics=forum_topics+1,
                                forum_posts=forum_posts+1,
                                forum_last_post_id=%s
                            WHERE
                                forum_id=%s
                            """ % (settings.phpbb_table_prefix, new_id, forum_id)
                    self.cursor.execute(stmt)
                # update the user's post count, if this is indeed a real user
                if poster_id != -1:
                    stmt = """
                            UPDATE
                                %susers
                            SET
                                user_posts=user_posts+1
                            WHERE
                                user_id=%s""" % (prefix, poster_id)
                    self.cursor.execute(stmt)
                # setup last post on the topic thread (Patricio Anguita <[email protected]>)
                stmt = """
                        UPDATE
                            %stopics
                        SET
                            topic_replies=topic_replies+1,
                            topic_last_post_id=%s
                        WHERE
                            topic_id=%s""" % (prefix, new_id, thread_id)
                self.cursor.execute(stmt)
                # if this is the first post on the thread.. (Patricio Anguita <[email protected]>)
                if lines.find('References') == -1:
                    stmt = """
                            UPDATE
                                %stopics
                            SET
                                topic_first_post_id=%s
                            WHERE
                                topic_id=%s AND
                                topic_first_post_id=0""" % (prefix, new_id, thread_id)
                    self.cursor.execute(stmt)
                return 1