Here are the examples of the python api os.walk taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
165 Examples
0
Example 51
Project: pyinfra Source File: files.py
@operation(pipeline_facts={
'find_files': 'destination'
})
def sync(
state, host, source, destination,
user=None, group=None, mode=None, delete=False, exclude=None
):
'''
Syncs a local directory with a remote one, with delete support. Note that delete will
remove extra files on the remote side, but not extra directories.
+ source: local directory to sync
+ destination: remote directory to sync to
+ user: user to own the files and directories
+ group: group to own the files and directories
+ mode: permissions of the files
+ delete: delete remote files not present locally
+ exclude: string or list/tuple of strings to match & exclude files (eg *.pyc)
'''
# If we don't enforce the source ending with /, remote_dirname below might start with
# a /, which makes the path.join cut off the destination bit.
if not source.endswith(path.sep):
source = '{0}{1}'.format(source, path.sep)
# Source relative to deploy.py
if state.deploy_dir:
source = path.join(state.deploy_dir, source)
# Ensure exclude is a list/tuple
if exclude is not None:
if not isinstance(exclude, (list, tuple)):
exclude = [exclude]
put_files = []
ensure_dirnames = []
for dirname, _, filenames in walk(source):
remote_dirname = dirname.replace(source, '')
if remote_dirname:
ensure_dirnames.append(remote_dirname)
for filename in filenames:
full_filename = path.join(dirname, filename)
# Should we exclude this file?
to_exclude = False
if exclude:
for match in exclude:
if fnmatch(full_filename, match):
to_exclude = True
if to_exclude:
continue
put_files.append((
# Join local as normal (unix, win)
full_filename,
# Join remote as unix like
'/'.join(
item for item in
(destination, remote_dirname, filename)
if item
)
))
# Ensure the destination directory
yield directory(
state, host, destination,
user=user, group=group,
)
# Ensure any remote dirnames
for dirname in ensure_dirnames:
yield directory(
state, host,
'/'.join((destination, dirname)),
user=user, group=group,
)
# Put each file combination
for local_filename, remote_filename in put_files:
yield put(
state, host,
local_filename, remote_filename,
user=user, group=group, mode=mode,
add_deploy_dir=False,
)
# Delete any extra files
if delete:
remote_filenames = set(host.fact.find_files(destination) or [])
wanted_filenames = set([remote_filename for _, remote_filename in put_files])
files_to_delete = remote_filenames - wanted_filenames
for filename in files_to_delete:
yield file(state, host, filename, present=False)
0
Example 52
Project: nikola Source File: status.py
def _execute(self, options, args):
"""Display site status."""
self.site.scan_posts()
last_deploy = self.site.state.get('last_deploy')
if last_deploy is not None:
last_deploy = datetime.strptime(last_deploy, "%Y-%m-%dT%H:%M:%S.%f")
last_deploy_offset = datetime.utcnow() - last_deploy
else:
print("It does not seem like you've ever deployed the site (or cache missing).")
if last_deploy:
fmod_since_deployment = []
for root, dirs, files in os.walk(self.site.config["OUTPUT_FOLDER"], followlinks=True):
if not dirs and not files:
continue
for fname in files:
fpath = os.path.join(root, fname)
fmodtime = datetime.fromtimestamp(os.stat(fpath).st_mtime)
if fmodtime.replace(tzinfo=tzlocal()) > last_deploy.replace(tzinfo=gettz("UTC")).astimezone(tz=tzlocal()):
fmod_since_deployment.append(fpath)
if len(fmod_since_deployment) > 0:
print("{0} output files modified since last deployment {1} ago.".format(str(len(fmod_since_deployment)), self.human_time(last_deploy_offset)))
if options['list_modified']:
for fpath in fmod_since_deployment:
print("Modified: '{0}'".format(fpath))
else:
print("Last deployment {0} ago.".format(self.human_time(last_deploy_offset)))
now = datetime.utcnow().replace(tzinfo=gettz("UTC"))
posts_count = len(self.site.all_posts)
# find all published posts
posts_published = [post for post in self.site.all_posts if post.use_in_feeds]
posts_published = sorted(posts_published, key=lambda post: post.source_path)
# find all private posts
posts_private = [post for post in self.site.all_posts if post.is_private]
posts_private = sorted(posts_private, key=lambda post: post.source_path)
# find all drafts
posts_drafts = [post for post in self.site.all_posts if post.is_draft]
posts_drafts = sorted(posts_drafts, key=lambda post: post.source_path)
# find all scheduled posts with offset from now until publishing time
posts_scheduled = [
(post.date - now, post) for post in self.site.all_posts
if post.publish_later and not (post.is_draft or post.is_private)
]
posts_scheduled = sorted(posts_scheduled, key=lambda offset_post: (offset_post[0], offset_post[1].source_path))
if len(posts_scheduled) > 0:
if options['list_scheduled']:
for offset, post in posts_scheduled:
print("Scheduled: '{1}' ({2}; source: {3}) in {0}".format(self.human_time(offset), post.meta('title'), post.permalink(), post.source_path))
else:
offset, post = posts_scheduled[0]
print("{0} to next scheduled post ('{1}'; {2}; source: {3}).".format(self.human_time(offset), post.meta('title'), post.permalink(), post.source_path))
if options['list_drafts']:
for post in posts_drafts:
print("Draft: '{0}' ({1}; source: {2})".format(post.meta('title'), post.permalink(), post.source_path))
if options['list_private']:
for post in posts_private:
print("Private: '{0}' ({1}; source: {2})".format(post.meta('title'), post.permalink(), post.source_path))
if options['list_published']:
for post in posts_published:
print("Published: '{0}' ({1}; source: {2})".format(post.meta('title'), post.permalink(), post.source_path))
print("{0} posts in total, {1} scheduled, {2} drafts, {3} private and {4} published.".format(posts_count, len(posts_scheduled), len(posts_drafts), len(posts_private), len(posts_published)))
0
Example 53
Project: esky Source File: util.py
def create_zipfile(source,target,get_zipinfo=None,members=None,compress=None):
"""Bundle the contents of a given directory into a zipfile.
The argument 'source' names the directory to read, while 'target' names
the zipfile to be written.
If given, the optional argument 'get_zipinfo' must be a function mapping
filenames to ZipInfo objects. It may also return None to indicate that
defaults should be used, or a string to indicate that defaults should be
used with a new archive name.
If given, the optional argument 'members' must be an iterable yielding
names or ZipInfo objects. Files will be added to the archive in the
order specified by this function.
If the optional argument 'compress' is given, it must be a bool indicating
whether to compress the files by default. The default is no compression.
"""
if not compress:
compress_type = zipfile.ZIP_STORED
else:
compress_type = zipfile.ZIP_DEFLATED
zf = zipfile.ZipFile(target,"w",compression=compress_type)
if members is None:
def gen_members():
for (dirpath,dirnames,filenames) in os.walk(source):
for fn in filenames:
yield os.path.join(dirpath,fn)[len(source)+1:]
members = gen_members()
for fpath in members:
if isinstance(fpath,zipfile.ZipInfo):
zinfo = fpath
fpath = os.path.join(source,zinfo.filename)
else:
if get_zipinfo:
zinfo = get_zipinfo(fpath)
else:
zinfo = None
fpath = os.path.join(source,fpath)
if os.path.islink(fpath):
# For information about adding symlinks to a zip file, see
# https://mail.python.org/pipermail/python-list/2005-June/322180.html
dest = os.readlink(fpath)
if zinfo is None:
zinfo = zipfile.ZipInfo()
zinfo.filename = fpath[len(source)+1:]
elif isinstance(zinfo,basestring):
link = zinfo
zinfo = zipfile.ZipInfo()
zinfo.filename = link
else: # isinstance(zinfo,zipfile.ZipInfo)
pass
zinfo.create_system = 3
zinfo.external_attr = 2716663808L # symlink: 0xA1ED0000
zf.writestr(zinfo,dest)
else: # not a symlink
if zinfo is None:
zf.write(fpath,fpath[len(source)+1:])
elif isinstance(zinfo,basestring):
zf.write(fpath,zinfo)
else:
with open(fpath,"rb") as f:
zf.writestr(zinfo,f.read())
zf.close()
0
Example 54
Project: andoc Source File: import_maildir.py
def main():
if path.exists(argv[1]) and path.isdir(argv[1]):
search_dir = argv[1]
else:
print "Error: invalid directory"
exit(1)
valid_emails = []
for root, dirs, files in walk(search_dir):
for name in files:
valid_emails.append(path.join(root, name))
if len(valid_emails) == 0:
print "Error: no files found"
exit(1)
r = redis.Redis()
for email in valid_emails:
msg = message_from_file(open(email))
has_plaintext = False
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == 'text/plain':
has_plaintext = True
plaintext = part.get_payload(decode=True)
else:
if msg.get_content_type() == 'text/plain':
has_plaintext = True
plaintext = msg.get_payload(decode=True)
if has_plaintext:
destfile = open('data/%s.txt' % path.basename(email), 'w')
selections = []
dates = []
for k,v in msg.items():
selection_start = destfile.tell()
# web browser counts one char for \r\n
destfile.write('%s: %s\n' % (
k.replace('\r','').strip(),
v.replace('\r','').strip())
)
selection_end = destfile.tell()
selections.append((selection_start, selection_end,
'http://www.w3.org/1999/xhtml/#div'))
if k == 'Date':
ts = mktime_tz(parsedate_tz(v))
ts_start = len('%s: ' %k )
ts_end = selection_end - selection_start
dates.append(
(selection_start, selection_end, ts, ts_start, ts_end))
destfile.write('\n')
bstart = destfile.tell()
destfile.write(plaintext.replace('\r','').strip())
bend = destfile.tell()
destfile.close()
selections.append((bstart, bend+1,
'http://www.w3.org/1999/xhtml/#div'))
doc = Docuement(r)
if doc.add('data/%s.txt' % path.basename(email)):
for start,end,ref in selections:
text_selection = TextSelection(doc.id, start, end, ref)
text_selection.save(r)
for s_start, s_end, ts, ts_start, ts_end in dates:
pre = 'date'
sub = '%s%s#%s.s%se%s' % (
'http://127.0.0.1:8080/doc/struc/',
doc.id, 'div', s_start, s_end)
# http://127.0.0.1:8080/doc/struc/1#div.s1086e1124/t6e37
trsub = '%s/t%se%s' % (sub, ts_start, ts_end)
trip = Triple(sub, pre, str(ts))
tid = trip.save(r)
h = HtmlSelection(doc.id, sub, ts_start, ts_end, tid)
h.save(r)
# save the object relation to this docuement
doc.add_relation(pre, str(ts))
0
Example 55
Project: docket Source File: __init__.py
def strip_private_layer(client, tag, parent_id, private_layer_id):
build_tar = tempfile.NamedTemporaryFile()
logger.info('saving tar file from build %s', build_tar.name)
# TODO save using client
p_args = ['docker', 'save', '--output', build_tar.name, tag]
p = Popen(p_args)
res = p.wait()
if res != 0:
sys.exit(res)
try:
client.remove_image(tag)
except Exception:
pass
extract_dir = tempfile.mkdtemp()
logger.info('extract the build tar %s', extract_dir)
try:
with tarfile.open(mode='r', fileobj=build_tar) as tar:
tar.extractall(path=extract_dir)
# prune away image layers under private_id
# we alreayd have them, don't need them again
def prune(basepath, start_id):
json_path = basepath + '/' + start_id + '/json'
f = open(json_path, 'r+')
content = json.load(f)
f.close()
if content.has_key('parent'):
prune(basepath, content['parent'])
elif content.has_key('Parent'):
prune(basepath, content['Parent'])
logger.debug('pruning %s', start_id)
shutil.rmtree(basepath + '/' + start_id)
logger.info('Splice out private layer id %s', private_layer_id)
prune(extract_dir, private_layer_id)
for (dirpath, dirnames, filenames) in walk(extract_dir):
for dir in dirnames:
json_path = extract_dir + '/' + dir + '/json'
f = open(json_path, 'r+')
content = json.load(f)
if content.has_key('parent') and content['parent'] == private_layer_id:
content['parent'] = parent_id
content['Parent'] = parent_id
content['config']['Image'] = parent_id
content['container_config']['Image'] = parent_id
f.seek(0)
json.dump(content, f)
f.truncate()
elif content.has_key('Parent') and content['Parent'] == private_layer_id:
content['parent'] = parent_id
content['Parent'] = parent_id
content['config']['Image'] = parent_id
content['container_config']['Image'] = parent_id
f.seek(0)
json.dump(content, f)
f.truncate()
f.close()
logger.info('make final tarball')
tmp_fpath = tempfile.mkstemp()
try:
tmp_file = tmp_fpath[0]
tmp_path = tmp_fpath[1]
with tarfile.open(name=tmp_path, mode='w') as tar:
tar.add(extract_dir, arcname='')
os.fsync(tmp_file)
logger.info('loading final image %s', tmp_path)
p_args = ['docker', 'load', '--input', tmp_path]
p = Popen(p_args)
res = p.wait()
if res != 0:
sys.exit(res)
finally:
os.remove(tmp_fpath[1])
finally:
shutil.rmtree(extract_dir)
0
Example 56
Project: tika-similarity Source File: value-similarity.py
def main(argv = None):
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], 'hvf:c:a:', ['help', 'verbose', 'directory=', 'file=', 'accept=' ])
except getopt.error, msg:
raise _Usage(msg)
if len(opts) ==0:
raise _Usage(_helpMessage)
dirFile = ""
filenames = []
filename_list = []
allowed_mime_types = []
directory_flag = 0
for option, value in opts:
if option in ('-h', '--help'):
raise _Usage(_helpMessage)
elif option in ('-c', '--file'):
#extract file names from command line
if '-c' in argv :
index_of_file_option = argv.index('-c')
else :
index_of_file_option = argv.index('--file')
filenames = argv[index_of_file_option+1 : ]
elif option in ('-f', '--directory'):
dirFile = value
directory_flag = 1
for root, dirnames, files in os.walk(dirFile):
dirnames[:] = [d for d in dirnames if not d.startswith('.')]
for filename in files:
if not filename.startswith('.'):
filename_list.append(os.path.join(root, filename))
elif option in ('--accept'):
#extract accepted mime types from command line
index_of_mime_type_option = argv.index('--accept')
allowed_mime_types = argv[index_of_mime_type_option+1 : ]
elif option in ('-v', '--verbose'):
global _verbose
_verbose = True
#format filename
if directory_flag == 0:
filenames = [x.strip() for x in filenames]
filenames = [filenames[k].strip('\'\n') for k in range(len(filenames))]
for filename in filenames :
if not os.path.isfile(os.path.join(dirFile, filename)):
continue
filename = os.path.join(dirFile, filename) if dirFile else filename
filename_list.append(filename)
if len(filename_list) <2 :
raise _Usage("you need to type in at least two valid files")
#allow only files with specifed mime types
if len(allowed_mime_types) != 0:
filename_list = [filename for filename in filename_list if parser.from_file(filename) and str(parser.from_file(filename)['metadata']['Content-Type'].encode('utf-8')).split('/')[-1] in allowed_mime_types]
else:
print "Accepting all MIME Types....."
union_feature_names = set()
file_parsed_data = {}
resemblance_scores = {}
file_metadata={}
for filename in filename_list:
file_parsed = []
# first compute the union of all features
parsedData = parser.from_file(filename)
filename_stripped = filename.replace(",", "")
try:
file_metadata[filename_stripped] = parsedData["metadata"]
#get key : value of metadata
for key in parsedData["metadata"]:
value = parsedData["metadata"][key]
if isinstance(value, list):
value = ", ".join(parsedData["metadata"][key])
file_parsed.append(str(key.strip(' ').encode('utf-8') + ": " + value.strip(' ').encode('utf-8')))
file_parsed_data[filename_stripped] = set(file_parsed)
union_feature_names = union_feature_names | set(file_parsed_data[filename_stripped])
except ConnectionError:
sleep(1)
except KeyError:
continue
total_num_features = len(union_feature_names)
# now compute the specific resemblance and containment scores
for filename in file_parsed_data:
overlap = {}
overlap = file_parsed_data[filename] & set(union_feature_names)
resemblance_scores[filename] = float(len(overlap))/total_num_features
sorted_resemblance_scores = sorted(resemblance_scores.items(), key=operator.itemgetter(1), reverse=True)
'''print "Resemblance:\n"
for tuple in sorted_resemblance_scores:
print os.path.basename(tuple[0].rstrip(os.sep))+","+str(tuple[1]) +"," + tuple[0] + ","+ convertUnicode(file_metadata[tuple[0]])+'\n'''
with open("similarity-scores.txt", "w") as f:
f.write("Resemblance : \n")
for tuple in sorted_resemblance_scores:
f.write(os.path.basename(tuple[0].rstrip(os.sep))+","+str(tuple[1]) +"," + tuple[0] + ","+ convertUnicode(file_metadata[tuple[0]])+'\n')
except _Usage, err:
print >>sys.stderr, sys.argv[0].split('/')[-1] + ': ' + str(err.msg)
return 2
0
Example 57
Project: courtlistener Source File: import_columbia.py
def do_many(dir_path, limit, random_order, status_interval, log_file,
newcases, skipdupes, skip_newcases, avoid_nocites, courtdates,
startfolder, startfile, debug):
"""Runs through a directory of the form /data/[state]/[sub]/.../[folders]/[.xml docuements].
Parses each .xml docuement, instantiates the associated model object, and
saves the object. Prints/logs status updates and tracebacks instead of
raising exceptions.
:param dir_path: The directory.
:param limit: A limit on how many files to run through. If None, will run
through all (or if random order, forever).
:param random_order: If true, will run through the directories and files in
random order.
:param status_interval: How often a status update will be given.
:param log_file: If not None, file paths that raise Exceptions will be
logged to this file.
:param newcases: If true, skip court-years that already have data.
:param skipdupes: If true, skip duplicates.
:param skip_newcases: If true, skip cases imported under newcases.
:param avoid_nocites: If true, skip cases from dates after any case with no cite.
:param courtdates: If true, skip cases with dates before court established.
:param startfolder: If not None, start on startfolder
:param startfile: If not None, start on this file (for resuming)
"""
if limit:
total = limit
elif not random_order:
print ("Getting an initial file count ...")
print
total = 0
for _, _, file_names in os.walk(dir_path):
total += len(fnmatch.filter(file_names, '*.xml'))
else:
total = None
log = None
if log_file:
print ("Logging problematic file paths to '%s' ..." % log_file)
print
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log.addHandler(logging.FileHandler(log_file))
# go through the files, yielding parsed files and printing status updates as
# we go
folders = glob(dir_path+'/*')
folders.sort()
count = 0
# get earliest dates for each court
if newcases:
print('Only new cases: getting earliest dates by court.')
min_dates = get_min_dates()
else:
min_dates = None
if avoid_nocites:
if newcases:
raise Exception("Cannot use both avoid_nocites and newcases options.")
print('Avoiding no cites: getting earliest dates by court with no citation.')
min_dates = get_min_nocite()
if courtdates:
start_dates = get_courtdates()
else:
start_dates = None
# check if skipping first columbias cases
if skip_newcases:
skiplist = get_path_list()
else:
skiplist = set()
# start/resume functionality
if startfolder is not None:
skipfolder = True
else:
skipfolder = False
if startfile is not None:
skipfile = True
else:
skipfile = False
for folder in folders:
if skipfolder:
if startfolder is not None:
checkfolder = folder.split('/')[-1]
if checkfolder == startfolder:
skipfolder = False
else:
continue
print(folder)
for path in file_generator(folder, random_order, limit):
if skipfile:
if startfile is not None:
checkfile = path.split('/')[-1]
if checkfile == startfile:
skipfile = False
else:
continue
if path in skiplist:
continue
# skip cases in 'misc*' folders -- they are relatively different
# than the other cases, so we'll deal with them later
if 'miscellaneous_court_opinions' in path:
continue
print(path)
# try to parse/save the case and print any exceptions with full
# tracebacks
try:
parsed = parse_file(path)
make_and_save(parsed, skipdupes, min_dates, start_dates, debug)
except Exception as e:
# log the file name
if log:
log.info(path)
# print simple exception summaries for known problems
known = [
'mismatched tag', 'Failed to get a citation',
'Failed to find a court ID',
'null value in column "date_filed"', 'duplicate(s)'
]
if any(k in str(e) for k in known):
print
print "Known exception in file '%s':" % path
print str(e)
print
else:
# otherwise, print generic traceback
print
print "Unknown exception in file '%s':" % path
print traceback.format_exc()
print
# status update
count += 1
if count % status_interval == 0:
print
if total:
print "Finished %s out of %s files." % (count, total)
else:
print "Finished %s files." % count
print
0
Example 58
Project: bsdpy Source File: bsdpserver.py
def getNbiOptions(incoming):
"""
The getNbiOptions() function walks through a given directory and
finds and parses compatible NBIs by looking for NBImageInfo.plist
files which are then processed with plistlib to extract an NBI's
configuration items that are needed later on to send to BSDP clients.
It is assumed that the NBI root directory is laid out as follows:
/nbi/MyGreatImage.nbi
/nbi/AnotherNetBootImage.nbi
"""
# Initialize lists to store NBIs and their options
nbioptions = []
nbisources = []
try:
for path, dirs, files in os.walk(incoming):
# Create an empty dict that will hold an NBI's settings
thisnbi = {}
if os.path.splitext(path)[1] == '.nbi':
del dirs[:]
# Search the path for an NBImageInfo.plist and parse it.
logging.debug('Considering NBI source at ' + str(path))
nbimageinfoplist = find('NBImageInfo.plist', path)[0]
nbimageinfo = plistlib.readPlist(nbimageinfoplist)
# Pull NBI settings out of the plist for use later on:
# booter = The kernel which is loaded with tftp
# disabledsysids = System IDs to blacklist, optional
# dmg = The actual OS image loaded after the booter
# enabledsysids = System IDs to whitelist, optional
# enabledmacaddrs = Enabled MAC addresses to whitelist, optional
# (and for which a key may not exist in)
# id = The NBI Identifier, must be unique
# isdefault = Indicates the NBI is the default
# length = Length of the NBI name, needed for BSDP packet
# name = The name of the NBI
if nbimageinfo['Index'] == 0:
logging.debug('Image "%s" Index is NULL (0), skipping!'
% nbimageinfo['Name'])
continue
elif nbimageinfo['IsEnabled'] is False:
logging.debug('Image "%s" is disabled, skipping.'
% nbimageinfo['Name'])
continue
else:
thisnbi['id'] = nbimageinfo['Index']
thisnbi['booter'] = \
find('booter', path)[0]
thisnbi['description'] = \
nbimageinfo['Description']
thisnbi['disabledsysids'] = \
nbimageinfo['DisabledSystemIdentifiers']
thisnbi['dmg'] = \
'/'.join(find('*.dmg', path)[0].split('/')[2:])
thisnbi['enabledmacaddrs'] = \
nbimageinfo.get('EnabledMACAddresses', [])
# EnabledMACAddresses must be lower-case - Apple's tools create them
# as such, but in case they aren't..
thisnbi['enabledmacaddrs'] = [mac.lower() for mac in
thisnbi['enabledmacaddrs']]
thisnbi['enabledsysids'] = \
nbimageinfo['EnabledSystemIdentifiers']
thisnbi['isdefault'] = \
nbimageinfo['IsDefault']
thisnbi['length'] = \
len(nbimageinfo['Name'])
thisnbi['name'] = \
nbimageinfo['Name']
thisnbi['proto'] = \
nbimageinfo['Type']
# Add the parameters for the current NBI to nbioptions
nbioptions.append(thisnbi)
# Found an eligible NBI source, add it to our nbisources list
nbisources.append(path)
except:
logging.debug("Unexpected error getNbiOptions: %s" %
sys.exc_info()[1])
raise
return nbioptions, nbisources
0
Example 59
Project: import-mailbox-to-gmail Source File: import-mailbox-to-gmail.py
def main():
"""Import multiple users' mbox files to Gmail.
"""
httplib2.debuglevel = args.httplib2debuglevel
# Use args.logging_level if defined.
try:
logging_level = args.logging_level
except AttributeError:
logging_level = 'INFO'
# Default logging to standard output
logging.basicConfig(
level=logging_level,
format='%(asctime)s %(levelname)s %(funcName)s@%(filename)s %(message)s',
datefmt='%H:%M:%S')
# More detailed logging to file
file_handler = logging.handlers.RotatingFileHandler(args.log,
maxBytes=1024 * 1024 * 32,
backupCount=8)
file_formatter = logging.Formatter(
'%(asctime)s %(process)d %(levelname)s %(funcName)s '
'(%(filename)s:%(lineno)d) %(message)s')
file_formatter.datefmt = '%Y-%m-%dT%H:%M:%S (%z)'
file_handler.setFormatter(file_formatter)
logging.getLogger().addHandler(file_handler)
logging.info('*** Starting %s %s on Python %s ***',
APPLICATION_NAME,
APPLICATION_VERSION,
sys.version)
logging.info('Arguments:')
for arg, value in sorted(vars(args).items()):
logging.info('\t%s: %r', arg, value)
number_of_labels_imported_without_error = 0
number_of_labels_imported_with_some_errors = 0
number_of_labels_failed = 0
number_of_messages_imported_without_error = 0
number_of_messages_failed = 0
number_of_users_imported_without_error = 0
number_of_users_imported_with_some_errors = 0
number_of_users_failed = 0
for username in next(os.walk(args.dir))[1]:
try:
logging.info('Processing user %s', username)
try:
credentials = get_credentials(username)
http = credentials.authorize(set_user_agent(
httplib2.Http(),
'%s-%s' % (APPLICATION_NAME, APPLICATION_VERSION)))
service = discovery.build('gmail', 'v1', http=http)
except Exception:
logging.error("Can't get access token for user %s", username)
raise
try:
results = service.users().labels().list(
userId=username,
fields='labels(id,name)').execute(num_retries=args.num_retries)
labels = results.get('labels', [])
except Exception:
logging.error("Can't get labels for user %s", username)
raise
try:
result = process_mbox_files(username, service, labels)
except Exception:
logging.error("Can't process mbox files for user %s", username)
raise
if result[2] == 0 and result[4] == 0:
number_of_users_imported_without_error += 1
elif result[0] > 0 or result[3] > 0:
number_of_users_imported_with_some_errors += 1
else:
number_of_users_failed += 1
number_of_labels_imported_without_error += result[0]
number_of_labels_imported_with_some_errors += result[1]
number_of_labels_failed += result[2]
number_of_messages_imported_without_error += result[3]
number_of_messages_failed += result[4]
logging.info('Done importing user %s. Labels: %d succeeded, %d with some '
'errors, %d failed. Messages: %d succeeded, %d failed.',
username,
result[0],
result[1],
result[2],
result[3],
result[4])
except Exception:
number_of_users_failed += 1
logging.exception("Can't process user %s", username)
logging.info("*** Done importing all users from directory '%s'", args.dir)
logging.info('*** Import summary:')
logging.info(' %d users imported with no failures',
number_of_users_imported_without_error)
logging.info(' %d users imported with some failures',
number_of_users_imported_with_some_errors)
logging.info(' %d users failed',
number_of_users_failed)
logging.info(' %d labels (mbox files) imported with no failures',
number_of_labels_imported_without_error)
logging.info(' %d labels (mbox files) imported with some failures',
number_of_labels_imported_with_some_errors)
logging.info(' %d labels (mbox files) failed',
number_of_labels_failed)
logging.info(' %d messages imported successfully',
number_of_messages_imported_without_error)
logging.info(' %d messages failed\n',
number_of_messages_failed)
if (number_of_messages_failed + number_of_labels_failed +
number_of_users_failed > 0):
logging.info('*** Check log file %s for detailed errors.', args.log)
logging.info('Finished.\n\n')
0
Example 60
Project: needy Source File: universal_binary.py
def build(self):
print('Building universal binary %s' % self.name())
universal_paths = dict()
for library in self.libraries():
for root, dirs, files in os.walk(library.build_directory()):
for path in files + dirs:
key = os.path.join(os.path.relpath(root, library.build_directory()), path)
if key not in universal_paths:
universal_paths[key] = []
universal_paths[key].append((library, os.path.join(root, path)))
directory = self.build_directory()
if os.path.exists(directory):
shutil.rmtree(directory)
os.makedirs(directory)
try:
for path, builds in universal_paths.items():
if len(builds) != len(self.libraries()):
continue
file_name, extension = os.path.splitext(path)
output_path = os.path.join(directory, path)
self.__make_output_dirs_for_builds(output_path, builds)
if not os.path.islink(builds[0][1]) and any([os.path.isdir(source_path) for _, source_path in builds]):
continue
elif not os.path.islink(builds[0][1]) and len(self.libraries()) == 1:
print('Copying %s' % path)
shutil.copy(builds[0][1], output_path)
elif extension in ['.h', '.hpp', '.hxx', '.ipp', '.c', '.cc', '.cpp']:
header_contents = '#if __APPLE__\n#include "TargetConditionals.h"\n#endif\n'
for library, header in builds:
macro = library.target().platform.detection_macro(library.target().architecture)
if not macro:
header_contents = ''
break
header_directory = os.path.join(os.path.dirname(output_path), 'needy_targets', library.target().platform.identifier(), library.target().architecture)
if not os.path.exists(header_directory):
os.makedirs(header_directory)
header_path = os.path.join(header_directory, os.path.basename(header))
shutil.copyfile(header, header_path)
header_contents += '#if {}\n#include "{}"\n#endif\n'.format(macro, os.path.relpath(header_path, os.path.dirname(output_path)))
if header_contents:
print('Creating universal header %s' % path)
with open(output_path, 'w') as f:
f.write(header_contents)
elif os.path.islink(builds[0][1]):
print('Copying symlink %s' % path)
os.symlink(os.readlink(builds[0][1]), output_path)
elif extension in ['.a', '.dylib', '.so']:
print('Creating universal library %s' % path)
inputs = []
for library, lib in builds:
f = tempfile.NamedTemporaryFile(delete=True)
try:
with open(os.devnull, 'w') as devnull:
subprocess.check_call(['lipo', '-extract', library.target().architecture, lib, '-output', f.name], stderr=devnull)
except subprocess.CalledProcessError:
subprocess.check_call(['cp', lib, f.name])
inputs.append(f)
subprocess.check_call(['lipo', '-create'] + [input.name for input in inputs] + ['-output', output_path])
for input in inputs:
input.close()
elif extension == '.pc' and 'pkgconfig' in path:
universal_pc = None
for library, pc in builds:
with open(pc, 'r') as f:
contents = f.read().decode()
fixed = contents.replace(library.build_directory(), '${pcfiledir}/../..')
if universal_pc is not None and fixed != universal_pc:
print('Package config differs beyond prefix. Not creating %s' % path)
universal_pc = None
break
universal_pc = fixed
if universal_pc:
print('Creating universal package config: %s' % path)
with open(output_path, 'w') as f:
f.write(universal_pc.encode())
except:
shutil.rmtree(directory)
raise
if not self.is_in_development_mode():
with open(self.build_status_path(), 'w') as status_file:
status = {
'configuration': binascii.hexlify(self.configuration_hash()).decode()
}
json.dump(status, status_file)
0
Example 61
def run(self):
if self.file:
try:
self.parse_functions(norm_path(self.file))
except:
pass
elif not Pref.scan_running:
Pref.scan_running = True
Pref.scan_started = time.time()
# the list of opened files in all the windows
files = list(Pref.updated_files)
# the list of opened folders in all the windows
folders = list(Pref.updated_folders)
Pref.folders = list(folders) # this is the "cache id" to know when to rescan the whole thing again
# add also as folders, the dirname of the current opened files
folders += [norm_path(dirname(file)) for file in files]
# deduplicate
folders = list(set(folders))
_folders = []
for folder in folders:
_folders = deduplicate_crawl_folders(_folders, folder)
folders = _folders
if debug:
print('Folders to scan:')
print("\n".join(folders))
# pasing
files_seen = 0
files_js = 0
files_cache_miss = 0
files_cache_hit = 0
files_failed_parsing = 0
# parse files with priority
for file in files:
if should_abort():
break
files_seen += 1
files_js += 1
if file not in MySign.files:
try:
self.parse_functions(file)
files_cache_miss += 1
except:
files_failed_parsing += 1# the file may be unreachable/unreadable
else:
files_cache_hit += 1
# now parse folders
for folder in folders:
if should_abort():
break
for dir, dnames, files in os.walk(folder):
if should_abort():
break
for f in files:
if should_abort():
break
files_seen += 1
file = os.path.join(dir, f)
if not should_exclude(file) and is_javascript_file(file):
files_js += 1
file = norm_path(file)
if file not in MySign.files:
try:
self.parse_functions(file)
files_cache_miss += 1
except:
files_failed_parsing += 1# the file may be unreachable/unreadable
else:
files_cache_hit += 1
if debug:
print('Scan done in '+str(time.time()-Pref.scan_started)+' seconds - Scan was aborted: '+str(Pref.scan_aborted))
print('Files Seen:'+str(files_seen)+', Files JS:'+str(files_js)+', Cache Miss:'+str(files_cache_miss)+', Cache Hit:'+str(files_cache_hit)+', Failed Parsing:'+str(files_failed_parsing))
Pref.scan_running = False
Pref.scan_aborted = False
0
Example 62
def removeDirectory( self, path, recursive = False ):
"""Remove a directory on the physical storage together with all its files and
subdirectories.
:param path : single or list of path
:param recursive : if True, we recursively delete the subdir
:return: successful and failed dictionaries. The keys are the pathes,
the values are dictionary {'Files': amount of files deleted, 'Size': amount of data deleted}
Note: it is known that if recursive is False, the removal of a non existing directory is successful,
while it is failed for recursive = True. That's stupid, but well... I guess I have to keep the interface
"""
res = checkArgumentFormat( path )
if not res['OK']:
return res
urls = res['Value']
self.log.debug( "FileStorage.removeDirectory: Attempting to remove %s directories." % len( urls ) )
successful = {}
failed = {}
for url in urls:
if recursive:
nbOfFiles = 0
totalSize = 0
# Calculate the original size
for root, _dirs, files in os.walk( url ):
nbOfFiles += len( files )
totalSize += sum( os.path.getsize( os.path.join( root, fn ) ) for fn in files )
try:
shutil.rmtree(url)
successful[url] = {'FilesRemoved':nbOfFiles, 'SizeRemoved':totalSize}
except OSError as ose:
# if the directory does not exist, then the numbers are already correct, no need to re do
# the walk
if ose.errno != errno.ENOENT:
# If we only removed partially, check how much was removed
leftFiles = 0
leftSize = 0
for root, _dirs, files in os.walk( url ):
leftFiles += len( files )
leftSize += sum( os.path.getsize( os.path.join( root, fn ) ) for fn in files )
nbOfFiles -= leftFiles
totalSize -= leftSize
failed[url] = {'FilesRemoved':nbOfFiles, 'SizeRemoved':totalSize}
# If no recursive
else:
try:
# Delete all the files
for child in os.listdir( url ):
fullpath = os.path.join( url, child )
if os.path.isfile( fullpath ):
os.unlink( fullpath )
successful[url] = True
except OSError as ose:
# If we get as exception that the directory does not exist
# (it can only be the directory), then success
if ose.errno == errno.ENOENT:
successful[url] = True
else:
failed[url] = str( ose )
resDict = {'Failed':failed, 'Successful':successful}
return S_OK( resDict )
0
Example 63
Project: wharf Source File: index.py
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
url = ""
file = ""
desc = ""
services = []
try:
url = request.form['wharf_url']
except:
url = ""
try:
file = request.files['file']
except:
file = ""
if file != "":
try:
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
file_path = path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)
if filename.rsplit('.', 1)[1] == "zip":
with zipfile.ZipFile(file_path, 'r') as service_zip:
service_zip.extractall(path.join(app.config['UPLOAD_FOLDER'],
filename.rsplit('.', 1)[0]))
# !! TODO
# allow exception for dockerfile, check at root as well
# check for existence of necessary files
missing_files = {}
for key,value in app.config['SERVICE_DICT'].items():
if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
filename.rsplit('.', 1)[0],
filename.rsplit('.', 1)[0],
value)):
missing_files[key] = value
services.append(filename.rsplit('.', 1)[0])
if missing_files:
if "dockerfile" in missing_files:
return render_template("failed.html")
else:
return render_template("forms.html",
services=services,
missing_files=missing_files,
filename=filename,
indexDesc=desc,
url=url)
move_services(filename, 1)
elif filename.rsplit('.', 1)[1] == "gz":
with tarfile.open(path.join(app.config['UPLOAD_FOLDER'], filename)) as service_gz:
service_gz.extractall(path.join(app.config['UPLOAD_FOLDER'],
filename.rsplit('.', 2)[0]))
# !! TODO
# allow exception for dockerfile, check at root as well
# check for existence of necessary files
missing_files = {}
for key,value in app.config['SERVICE_DICT'].items():
if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
filename.rsplit('.', 2)[0],
filename.rsplit('.', 2)[0],
value)):
missing_files[key] = value
services.append(filename.rsplit('.', 2)[0])
if missing_files:
if "dockerfile" in missing_files:
return render_template("failed.html")
else:
return render_template("forms.html",
services=services,
missing_files=missing_files,
filename=filename,
indexDesc=desc,
url=url)
move_services(filename, 2)
else:
return render_template("failed.html")
# !! TODO
# some post-processing once the file is uploaded
else:
return render_template("failed.html")
except:
print "No file selected"
elif url != "":
try:
if url:
url_path = (url.rsplit('/', 1)[1]).rsplit('.', 1)[0]
# !! TODO try/except
if url.rsplit('.', 1)[1] == "git":
# !! TODO try/except - if the folder already exists
git.clone(url, path.join(app.config['UPLOAD_FOLDER'],
url_path))
# check for dockerfile at root
# check for dockerfile assuming repo is the services folder
if path.exists(path.join(app.config['UPLOAD_FOLDER'],
url_path,
"Dockerfile")) or path.exists(path.join(app.config['UPLOAD_FOLDER'],
url_path,
app.config['SERVICE_DICT']['dockerfile'])):
# check for existence of necessary files
missing_files = {}
for key,value in app.config['SERVICE_DICT'].items():
if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
url_path,
value)):
missing_files[key] = value
services.append(url_path)
if "dockerfile" in missing_files:
del missing_files['dockerfile']
if missing_files:
return render_template("forms.html",
services=services,
missing_files=missing_files,
filename=file,
indexDesc=desc,
url=url)
# move to services folder
i = 0
while i != -1:
try:
if i == 0:
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path),
app.config['SERVICES_FOLDER'])
elif i == 1:
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path),
path.join(app.config['UPLOAD_FOLDER'],
url_path+str(i)))
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path+str(i)),
app.config['SERVICES_FOLDER'])
else:
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path+str(i-1)),
path.join(app.config['UPLOAD_FOLDER'],
url_path+str(i)))
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path+str(i)),
app.config['SERVICES_FOLDER'])
i = -1
except:
i += 1
try:
# remove leftover files in tmp
rmdir(path.join(app.config['UPLOAD_FOLDER'],
url_path))
except:
pass
else:
i = 0
repo_dirs = []
for root, dirs, files in walk(path.join(app.config['UPLOAD_FOLDER'],
url_path)):
if i == 0:
repo_dirs = dirs
i += 1
if ".git" in repo_dirs:
repo_dirs.remove(".git")
services=repo_dirs
for service_dir in repo_dirs:
# check for dockerfile one folder deep
# check for dockerfile in regular services folder
# could be more than one
if path.exists(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir, "Dockerfile")) or path.exists(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir, app.config['SERVICE_DICT']['dockerfile'])):
# check for existence of necessary files
missing_files = {}
for key,value in app.config['SERVICE_DICT'].items():
if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir,
value)):
missing_files[key] = value
if "dockerfile" in missing_files:
del missing_files['dockerfile']
if missing_files:
# !! TODO TODO TODO
# this needs to be re-worked for times
# when there is more than one service_dir
return render_template("forms.html",
services=services,
missing_files=missing_files,
filename=file,
indexDesc=desc,
url=url)
# move to services folder
i = 0
while i != -1:
try:
if i == 0:
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir),
app.config['SERVICES_FOLDER'])
elif i == 1:
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir),
path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir+str(i)))
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir+str(i)),
app.config['SERVICES_FOLDER'])
else:
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir+str(i-1)),
path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir+str(i)))
mv(path.join(app.config['UPLOAD_FOLDER'],
url_path,
service_dir+str(i)),
app.config['SERVICES_FOLDER'])
i = -1
except:
i += 1
try:
rmdir(path.join(app.config['UPLOAD_FOLDER'],
url_path))
except:
pass
else:
# !! TODO
# should point to docker index url, expects a <meta name="description"
# won't have a dockerfile in the service folder
# note the naming scheme will mess with directory structure of service name
# needs to be handled as a special case
repo = ""
desc = ""
try:
index_repo = (requests.get(url).content).split('<meta name="description" content="')
index_repo = index_repo[1].split("\" />")
# !! TODO try, if fails, there is no description.
try:
repo, desc = index_repo[0].split(": ", 1)
desc = desc.replace("\n", " ")
print repo, desc
except:
repo = index_repo[0]
print repo
except:
return render_template("failed.html")
if repo == "":
return render_template("failed.html")
missing_files = {}
for key,value in app.config['SERVICE_DICT'].items():
missing_files[key] = value
del missing_files["dockerfile"]
if desc != "":
del missing_files["description"]
services.append(repo)
return render_template("forms.html",
services=services,
missing_files=missing_files,
filename=file,
indexDesc=desc,
url=url)
except:
print "Bad URL"
else:
return render_template("failed.html")
return redirect(url_for('index'))
row = ""
services = [name for name in listdir("services") if path.isdir(path.join("services", name))]
for service in services:
last_modified = ""
last_modified = time.ctime(path.getmtime("services/"+service))
description = ""
row += '<tr><td class="rowlink-skip"><a href="saas/'+service+'">'+service+'</a></td><td>'
try:
description_path = "services/"+service+"/"+app.config['SERVICE_DICT']['description']
with open(description_path, 'r') as content_file:
description = content_file.read()
row += description
except:
row += "no description"
row += '</td><td><a href="saas/'+service+'">'+last_modified+'</a></td><td><a href="edit/'+service+'">Edit</a></td></tr>'
row = Markup(row)
return render_template("index.html",row=row)
0
Example 64
def generate_dont_trace_files():
template = '''# Important: Autogenerated file.
# DO NOT edit manually!
# DO NOT edit manually!
from _pydevd_bundle.pydevd_constants import IS_PY3K
LIB_FILE = 1
PYDEV_FILE = 2
DONT_TRACE = {
# commonly used things from the stdlib that we don't want to trace
'Queue.py':LIB_FILE,
'queue.py':LIB_FILE,
'socket.py':LIB_FILE,
'weakref.py':LIB_FILE,
'_weakrefset.py':LIB_FILE,
'linecache.py':LIB_FILE,
'threading.py':LIB_FILE,
#things from pydev that we don't want to trace
'_pydev_execfile.py':PYDEV_FILE,
%(pydev_files)s
}
if IS_PY3K:
# if we try to trace io.py it seems it can get halted (see http://bugs.python.org/issue4716)
DONT_TRACE['io.py'] = LIB_FILE
# Don't trace common encodings too
DONT_TRACE['cp1252.py'] = LIB_FILE
DONT_TRACE['utf_8.py'] = LIB_FILE
'''
pydev_files = []
for root, dirs, files in os.walk(root_dir):
for d in [
'.git',
'.settings',
'build',
'build_tools',
'dist',
'pydevd.egg-info',
'pydevd_attach_to_process',
'pydev_sitecustomize',
'stubs',
'tests',
'tests_mainloop',
'tests_python',
'tests_runfiles',
'test_pydevd_reload',
'third_party',
'__pycache__',
'_pydev_runfiles',
'pydev_ipython',
]:
try:
dirs.remove(d)
except:
pass
for f in files:
if f.endswith('.py'):
if f not in (
'__init__.py',
'runfiles.py',
'pydev_coverage.py',
'pydev_pysrc.py',
'setup.py',
'setup_cython.py',
'interpreterInfo.py',
):
pydev_files.append(" '%s': PYDEV_FILE," % (f,))
contents = template % (dict(pydev_files='\n'.join(sorted(pydev_files))))
assert 'pydevd.py' in contents
assert 'pydevd_dont_trace.py' in contents
with open(os.path.join(root_dir, '_pydevd_bundle', 'pydevd_dont_trace_files.py'), 'w') as stream:
stream.write(contents)
0
Example 65
Project: yaraQA Source File: yaraqa.py
def match_yara_rules(self):
'''
This method tries to match yara rules at malware and/or goodware repo.
'''
rules = self.init_yara_rules()
self.PLOT_LABELS.append(format(str(self.family)))
for path in self.DIRECTORIES:
EXPECTED_MATCHES = 0
TOTAL_STATIC_MATCHES = 0
TOTAL_MEMORY_MATCHES = 0
STATIC_FAMILY_MATCHES = 0
MEMORY_FAMILY_MATCHES = 0
STATIC_FALSE_POSITIVES = 0
MEMORY_FALSE_POSITIVES = 0
STATIC_MISS = 0
MEMORY_MISS = 0
TOTAL_FILES = 0
TOTAL_MATCHES = 0
self.logger.debug('Matching against {0}'.format(str(path)))
self.logger.debug('========================================\n')
for root, dirs, files in os.walk(path):
for file in files:
current_file = os.path.join(root, file)
file_matched = False
if self.targeted:
if self.family not in current_file:
continue
TOTAL_FILES = TOTAL_FILES + 1
if self.family in current_file:
EXPECTED_MATCHES = EXPECTED_MATCHES + 1
self.logger.debug('\nTARGET: {0}'.format(str(current_file)))
if (self.method == 'STATIC'):
matches = rules.match(current_file)
elif (self.method == 'MEMORY'):
task_id = self.create_cuckoo_task(current_file)
else:
matches = rules.match(current_file)
task_id = self.create_cuckoo_task(current_file)
# MATCH STATIC
if (self.method == 'STATIC' or self.method == 'ALL'):
if matches:
TOTAL_STATIC_MATCHES = TOTAL_STATIC_MATCHES + 1
if self.family in current_file:
if not file_matched:
TOTAL_MATCHES = TOTAL_MATCHES + 1
file_matched = True
STATIC_FAMILY_MATCHES = STATIC_FAMILY_MATCHES + 1
self.logger.debug('-> STATIC YARA MATCH {0} \033[0;32m[OK]\033[0m'.format(str(matches)))
else:
STATIC_FALSE_POSITIVES = STATIC_FALSE_POSITIVES + 1
self.logger.debug('FALSE POSITIVE: ' + current_file)
self.logger.debug('-> STATIC YARA MATCH {0} \033[0;31m[FALSE POSITIVE]\033[0m'.format(str(matches)))
else:
if self.family in current_file:
STATIC_MISS = STATIC_MISS + 1
self.logger.debug('-> STATIC YARA \033[0;31m[MISS]\033[0m')
# MATCH MEMORY
if (self.method == 'MEMORY' or self.method == 'ALL'):
report = self.view_cuckoo_report(task_id)
matched = False
rxp = re.compile(self.family, re.IGNORECASE)
if 'memory' in report:
if 'yarascan' in report['memory']:
if 'data' in report['memory']['yarascan']:
matched = any(rxp.search(yar_n['rule']) for yar_n in report['memory']['yarascan']['data'])
else:
if self.family in current_file:
self.logger.debug("Warning: No 'data' key found in 'yarascan' section. file = {0}".format(str(current_file)))
else:
if self.family in current_file:
self.logger.debug("Warning: No 'yarascan' key found in 'memory' section. file = {0}".format(str(current_file)))
else:
if self.family in current_file:
self.logger.debug("Warning: No 'memory' key found in report data. file = {0}".format(str(current_file)))
if matched:
TOTAL_MEMORY_MATCHES = TOTAL_MEMORY_MATCHES + 1
if self.family in current_file:
if not file_matched:
TOTAL_MATCHES = TOTAL_MATCHES + 1
file_matched = True
MEMORY_FAMILY_MATCHES = MEMORY_FAMILY_MATCHES + 1
self.logger.debug('-> MEMORY YARA MATCH \033[0;32m[OK]\033[0m')
else:
MEMORY_FALSE_POSITIVES = MEMORY_FALSE_POSITIVES + 1
self.logger.debug('FALSE POSITIVE: {0}'.format(str(current_file)))
self.logger.debug('-> MEMORY YARA MATCH \033[0;31m[FALSE POSITIVE]\033[0m')
else:
if self.family in current_file:
MEMORY_MISS = MEMORY_MISS + 1
self.logger.debug('-> MEMORY YARA \033[0;31m[MISS]\033[0m')
if path == self.MALWARE_DIR:
self.logger.debug('\n\t_MALWARE REPO_')
elif path == self.GOODWARE_DIR:
self.logger.debug('\n\t_GOODWARE REPO_')
if (self.method == 'STATIC' or self.method == 'ALL'):
self.logger.debug('\n STATIC YARA Q&A OVERVIEW:')
self.logger.debug(' =========================')
self.print_results('STATIC', path, EXPECTED_MATCHES, STATIC_FAMILY_MATCHES, STATIC_MISS, STATIC_FALSE_POSITIVES, TOTAL_STATIC_MATCHES)
if (self.method == 'MEMORY' or self.method == 'ALL'):
self.logger.debug('\n MEMORY YARA Q&A OVERVIEW:')
self.logger.debug(' =========================')
self.print_results('MEMORY', path, EXPECTED_MATCHES, MEMORY_FAMILY_MATCHES, MEMORY_MISS, MEMORY_FALSE_POSITIVES, TOTAL_MEMORY_MATCHES)
if path == self.MALWARE_DIR:
if EXPECTED_MATCHES != 0:
TOTAL_MATCHES = (TOTAL_MATCHES/EXPECTED_MATCHES)*100
TOTAL_MATCHES = "{:.2f}".format(TOTAL_MATCHES)
self.PLOT_TOTAL_MATCH.append(float(TOTAL_MATCHES))
self.print_threshold(" Total Accuracy: ", TOTAL_MATCHES)
self.logger.debug(" Total files analyzed: {0}\n\n".format(str(TOTAL_FILES)))
if self.plot:
if pygal_available:
self.render_plot()
DATA_PLOT = [self.PLOT_LABELS, self.PLOT_STATIC_RATIOS, self.PLOT_MEMORY_RATIOS, self.PLOT_TOTAL_MATCH]
return DATA_PLOT
0
Example 66
def getFiles(configDict, configFile = None):
cfg = None
if configFile:
cfg = Config(configFile)
## Build array of directories
allDirs = []
for k, v in configDict.iteritems():
if not v in allDirs:
allDirs.append(v)
allFiles = []
## Find all the Javascript source files
for sourceDirectory in allDirs:
for root, dirs, files in os.walk(sourceDirectory):
for filename in files:
if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."):
filepath = os.path.join(root, filename)[len(sourceDirectory)+1:]
filepath = filepath.replace("\\", "/")
if cfg and cfg.include:
if filepath in cfg.include or filepath in cfg.forceFirst:
allFiles.append(filepath)
elif (not cfg) or (filepath not in cfg.exclude):
allFiles.append(filepath)
files = {}
order = [] # List of filepaths to output, in a dependency satisfying order
## Import file source code
## TODO: Do import when we walk the directories above?
for filepath in allFiles:
print "Importing: %s" % filepath
filekey = filepath.replace("\\", "/").split("/")[0]
fullpath = os.path.join(configDict[filekey], filepath)
content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
files[filepath] = SourceFile(filepath, content) # TODO: Chop path?
print
from toposortmf import toposort
complete = False
resolution_pass = 1
while not complete:
order = [] # List of filepaths to output, in a dependency satisfying order
nodes = []
routes = []
## Resolve the dependencies
print "Resolution pass %s... " % resolution_pass
resolution_pass += 1
for filepath, info in files.items():
nodes.append(filepath)
for neededFilePath in info.requires:
routes.append((neededFilePath, filepath))
for dependencyLevel in toposort(nodes, routes):
for filepath in dependencyLevel:
order.append(filepath)
if not files.has_key(filepath):
print "Importing: %s" % filepath
filekey = filepath.replace("\\", "/").split("/")[0]
fullpath = os.path.join(configDict[filekey], filepath)
content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
files[filepath] = SourceFile(filepath, content) # TODO: Chop path?
# Double check all dependencies have been met
complete = True
try:
for fp in order:
if max([order.index(rfp) for rfp in files[fp].requires] +
[order.index(fp)]) != order.index(fp):
complete = False
except:
complete = False
print
## Move forced first and last files to the required position
if cfg:
print "Re-ordering files..."
order = cfg.forceFirst + [item
for item in order
if ((item not in cfg.forceFirst) and
(item not in cfg.forceLast))] + cfg.forceLast
return (files, order)
0
Example 67
Project: autospec Source File: buildreq.py
def scan_for_configure(package, dir, autospecdir):
global default_summary
count = 0
for dirpath, dirnames, files in os.walk(dir):
default_score = 2
if dirpath != dir:
default_score = 1
if any(file.endswith(".go") for file in files) and tarball.go_pkgname:
add_buildreq("go")
tarball.name = tarball.go_pkgname
buildpattern.set_build_pattern("golang", default_score)
if "CMakeLists.txt" in files and "configure.ac" not in files:
add_buildreq("cmake")
buildpattern.set_build_pattern("cmake", default_score)
if "configure" in files and os.access(dirpath + '/configure', os.X_OK):
buildpattern.set_build_pattern("configure", default_score)
if "requires.txt" in files:
grab_python_requirements(dirpath + '/requires.txt')
if "setup.py" in files:
add_buildreq("python-dev")
add_buildreq("setuptools")
add_buildreq("pbr")
add_buildreq("pip")
if setup_py_python3(dirpath + '/setup.py') or setup_py_python3(dirpath + '/PKG-INFO'):
add_buildreq("python3-dev")
buildpattern.set_build_pattern("distutils23", default_score)
# force override the pypi rule
if buildpattern.default_pattern == 'distutils' and buildpattern.pattern_strengh <= 10:
buildpattern.default_pattern = 'distutils23'
else:
# check for adding python3 support in patches
try:
with open(autospecdir + '/series', 'r') as series:
for patchname in series:
if setup_py_python3(autospecdir + '/' + patchname.strip()):
add_buildreq("python3-dev")
buildpattern.set_build_pattern("distutils23", default_score)
# force override the pypi rule
if buildpattern.default_pattern == 'distutils' and buildpattern.pattern_strengh <= 10:
buildpattern.default_pattern = 'distutils23'
except:
pass
buildpattern.set_build_pattern("distutils", default_score)
if "Makefile.PL" in files or "Build.PL" in files:
buildpattern.set_build_pattern("cpan", default_score)
if "SConstruct" in files:
add_buildreq("scons")
add_buildreq("python-dev")
buildpattern.set_build_pattern("scons", default_score)
if "requirements.txt" in files:
grab_python_requirements(dirpath + '/requirements.txt')
for name in files:
if name.lower().startswith("configure."):
parse_configure_ac(os.path.join(dirpath, name))
if name.lower().startswith("rakefile"):
Rakefile(os.path.join(dirpath, name))
if name.lower() == "makefile":
buildpattern.set_build_pattern("make", default_score)
if name.lower() == "autogen.sh":
buildpattern.set_build_pattern("autogen", default_score)
if name.lower() == "cmakelists.txt":
buildpattern.set_build_pattern("cmake", default_score)
can_reconf = os.path.exists(os.path.join(dir, "configure.ac"))
if not can_reconf:
can_reconf = os.path.exists(os.path.join(dir, "configure.in"))
if can_reconf and patches.autoreconf:
print("Patches touch configure.*, adding autoreconf stage")
for breq in autoreconf_reqs:
add_buildreq(breq)
else:
patches.autoreconf = False
print("Buildreqs : ", end="")
for lic in sorted(buildreqs):
if count > 4:
count = 0
print("\nBuildreqs : ", end="")
count = count + 1
print(lic + " ", end="")
print("")
0
Example 68
def main():
#print '\n\tnot working yet - code under development !!\n'
#return
if len(sys.argv) < 3:
sys.exit('\nNeed at least 4 arguments: \n\n '
'<path to files> \n <sampling in seconds> \n'
'<output dir> \n <stationname>\n'
'[optional: <recursive flag -R>]\n'
'(set this option for including all subfolders)\n\n')
print
outdir = None
stationname = None
recursive = False
multiple_stations = False
if len(sys.argv) > 3:
optionals = sys.argv[3:]
for o in optionals:
o = o.strip()
if o[0] == '-':
if o[1].lower() == 'r':
recursive = True
continue
elif outdir is None:
outdir = o
continue
elif stationname is None:
stationname = o
continue
if stationname is not None:
#check, if it's actually a comma-separated list:
try:
stationlist = stationname.split(',')
if len(stationlist) > 1:
multiple_stations = True
stationlist = [i.upper() for i in stationlist]
except:
stationlist = [stationname]
else: stationlist = [None]
print stationlist
pathname_raw = sys.argv[1]
pathname = op.abspath(op.realpath(pathname_raw))
if not op.isdir(pathname):
sys.exit('Data file(s) path not existing: {0}\n'.format(pathname))
try:
sampling = float(sys.argv[2])
if sampling <= 0 : raise
except:
sys.exit('Second argument must be sampling interval in seconds (int/float)')
if recursive is True:
lo_folders = []
for i,j,k in os.walk(pathname):
lof = [op.abspath(op.join(i,f)) for f in j]
if stationname is not None:
for stationname in stationlist:
for curr_folder in lof:
content_of_folder = os.listdir(curr_folder)
#print curr_folder
lof_station = [i for i in content_of_folder if stationname.lower() in i.lower()]
if len(lof_station) > 0 :
lo_folders.append(curr_folder)
pathname = list(set(lo_folders))
if len(pathname) == 0:
sys.exit('\n\tERROR - No (sub-) folders for stations {0} found\n'.format(stationlist))
for stationname in stationlist:
print '....\n'
print 'processing station ',stationname.upper()
# if pathname[0] is not None:
# station_pathname = [i for i in pathname if stationname.lower() in i.lower()]
# if len(station_pathname) == 0:
# station_pathname = None
# else:
station_pathname = pathname
try:
MTfh.EDL_make_Nhour_files(6,station_pathname, sampling, stationname.upper(), outdir)
except MTex.MTpyError_inputarguments:
if stationname is None:
sys.exit('\n\tERROR - No data found in (sub-)folders\n')
else:
sys.exit('\n\tERROR - No data found in (sub-)folders for station {0}\n'.format(stationname.upper()))
except MemoryError:
sys.exit('\n\tERROR - Not enough memory to store temporary arrays!\n')
except IOError:
sys.exit('\n\tERROR - Not enough space on local disk to store output!\n')
except:
sys.exit('\n\tERROR - could not process (sub-)folders')
print '\n'
0
Example 69
Project: qiime Source File: all_tests.py
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
if (opts.suppress_unit_tests and opts.suppress_script_usage_tests):
option_parser.error(
"You're suppressing both test types. Nothing to run.")
test_dir = abspath(dirname(__file__))
unittest_good_pattern = re.compile('OK\s*$')
application_not_found_pattern = re.compile('ApplicationNotFoundError')
python_name = 'python'
bad_tests = []
missing_application_tests = []
# Run through all of QIIME's unit tests, and keep track of any files which
# fail unit tests.
if not opts.suppress_unit_tests:
unittest_names = []
if not opts.unit_test_glob:
for root, dirs, files in walk(test_dir):
for name in files:
if name.startswith('test_') and name.endswith('.py'):
unittest_names.append(join(root, name))
else:
for fp in glob(opts.unit_test_glob):
fn = split(fp)[1]
if fn.startswith('test_') and fn.endswith('.py'):
unittest_names.append(abspath(fp))
unittest_names.sort()
for unittest_name in unittest_names:
print "Testing %s:\n" % unittest_name
command = '%s %s -v' % (python_name, unittest_name)
stdout, stderr, return_value = qiime_system_call(command)
print stderr
if not unittest_good_pattern.search(stderr):
if application_not_found_pattern.search(stderr):
missing_application_tests.append(unittest_name)
else:
bad_tests.append(unittest_name)
qiime_test_data_dir = join(get_qiime_project_dir(), 'qiime_test_data')
qiime_test_data_dir_exists = exists(qiime_test_data_dir)
if not opts.suppress_script_usage_tests and qiime_test_data_dir_exists:
if opts.script_usage_tests is not None:
script_usage_tests = opts.script_usage_tests.split(',')
else:
script_usage_tests = None
# Run the script usage testing functionality
script_usage_result_summary, has_script_usage_example_failures = \
run_script_usage_tests(
test_data_dir=qiime_test_data_dir,
scripts_dir=get_qiime_scripts_dir(),
working_dir=get_qiime_temp_dir(),
verbose=True,
tests=script_usage_tests,
force_overwrite=True,
timeout=400)
print "==============\nResult summary\n=============="
if not opts.suppress_unit_tests:
print "\nUnit test result summary\n------------------------\n"
if bad_tests:
print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)
if missing_application_tests:
print "\nFailed the following unit tests, in part or whole due " +\
"to missing external applications.\nDepending on the QIIME features " +\
"you plan to use, this may not be critical.\n%s"\
% '\n'.join(missing_application_tests)
if not (missing_application_tests or bad_tests):
print "\nAll unit tests passed.\n\n"
if not opts.suppress_script_usage_tests:
if qiime_test_data_dir_exists:
print "\nScript usage test result summary\n--------------------------------\n"
print script_usage_result_summary
else:
print "\nCould not run script usage tests because the directory %s does not exist." % qiime_test_data_dir
print ""
# If script usage tests weren't suppressed, the qiime_test_data dir must
# exist and we can't have any failures.
script_usage_tests_success = (opts.suppress_script_usage_tests or
(qiime_test_data_dir_exists and
not has_script_usage_example_failures))
# If any of the unit tests or script usage tests fail, or if we have any
# missing application errors, use return code 1 (as python's unittest
# module does to indicate one or more failures).
return_code = 1
if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and
script_usage_tests_success):
return_code = 0
return return_code
0
Example 70
Project: sublime-config Source File: api_docs.py
def run():
"""
Looks through the docs/ dir and parses each markdown docuement, looking for
sections to update from Python docstrings. Looks for section headers in
the format:
- ### `ClassName()` class
- ##### `.method_name()` method
- ##### `.attribute_name` attribute
- ### `function_name()` function
The markdown content following these section headers up until the next
section header will be replaced by new markdown generated from the Python
docstrings of the associated source files.
By default maps docs/{name}.md to {modulename}/{name}.py. Allows for
custom mapping via the MD_SOURCE_MAP variable.
"""
print('Updating API docs...')
md_files = []
for root, _, filenames in os.walk(docs_dir):
for filename in filenames:
if not filename.endswith('.md'):
continue
md_files.append(os.path.join(root, filename))
parser = CommonMark.DocParser()
for md_file in md_files:
md_file_relative = md_file[len(project_dir) + 1:]
if md_file_relative in MD_SOURCE_MAP:
py_files = MD_SOURCE_MAP[md_file_relative]
py_paths = [os.path.join(project_dir, py_file) for py_file in py_files]
else:
py_files = [os.path.basename(md_file).replace('.md', '.py')]
py_paths = [os.path.join(project_dir, module_name, py_files[0])]
if not os.path.exists(py_paths[0]):
continue
with open(md_file, 'rb') as f:
markdown = f.read().decode('utf-8')
original_markdown = markdown
md_lines = list(markdown.splitlines())
md_ast = parser.parse(markdown)
last_class = []
last = {}
sections = OrderedDict()
find_sections(md_ast, sections, last, last_class, markdown.count("\n") + 1)
md_chunks = {}
for index, py_file in enumerate(py_files):
py_path = py_paths[index]
with open(os.path.join(py_path), 'rb') as f:
code = f.read().decode('utf-8')
module_ast = ast.parse(code, filename=py_file)
code_lines = list(code.splitlines())
for node in ast.iter_child_nodes(module_ast):
walk_ast(node, code_lines, sections, md_chunks)
added_lines = 0
def _replace_md(key, sections, md_chunk, md_lines, added_lines):
start, end = sections[key]
start -= 1
start += added_lines
end += added_lines
new_lines = md_chunk.split('\n')
added_lines += len(new_lines) - (end - start)
# Ensure a newline above each class header
if start > 0 and md_lines[start][0:4] == '### ' and md_lines[start - 1][0:1] == '>':
added_lines += 1
new_lines.insert(0, '')
md_lines[start:end] = new_lines
return added_lines
for key in sections:
if key not in md_chunks:
raise ValueError('No docuementation found for %s' % key[1])
added_lines = _replace_md(key, sections, md_chunks[key], md_lines, added_lines)
markdown = '\n'.join(md_lines).strip() + '\n'
if original_markdown != markdown:
with open(md_file, 'wb') as f:
f.write(markdown.encode('utf-8'))
0
Example 71
Project: Panda3D-Shader-Generator Source File: shaderBuilder.py
def loadPath(self,paths):
"""
called by init, but can be called again if you wish to reload the same paths, or a different one
"""
libs=[]
for root, dirs, files in itertools.chain.from_iterable(os.walk(path) for path in paths):
for name in files:
ext=os.path.splitext(name)[1]
if ext==".txt":
currentFile=join(root, name)
for key,xitems in _parseFile(currentFile).iteritems():
if key=="node":
for items in xitems:
if "info" not in items:
print "node missing info section in: "+currentFile
else:
info=_parseInfoLines(items["info"],currentFile)
if "name" not in info:
print "invalid info entry missing name in: "+currentFile
else:
name=info["name"]
shaderInputs=[]
if "shaderinputs" in items:
for s in items["shaderinputs"]:
shaderInputs.append(param.shaderParamFromDefCode(s))
if "output" in info:
o=info["output"]
assert o in ["True","False"]
isOutPut=o=="True"
assert "stage" in info
stage=info["stage"]
else:
isOutPut=False
stage=None
inLinks=[]
if "inlinks" in items:
for s in items["inlinks"]:
inLinks.append(param.linkEndFromDefCode(s))
outLinks=[]
if "outlinks" in items:
for s in items["outlinks"]:
outLinks.append(param.linkEndFromDefCode(s))
code=""
if "code" in items:
code="\n".join(items["code"])
node=nodes.metaCodeNode(name,code,shaderInputs,inLinks,outLinks,isOutPut=isOutPut,stage=stage)
if name in self.nodeTypeClassMap:
print "Warning: overwriting node "+repr(self.nodeTypeClassMap[name])+" with "+repr(node)+" from "+currentFile
self.nodeTypeClassMap[name]=node
elif key=="lib":
libs.append(xitems)
else:
print "Warning: throwing away invalid majorSection with unrecognized name: "+key+" in file: "+currentFile
libSource="\n".join(itertools.chain.from_iterable(lib["code"] for lib in itertools.chain.from_iterable(libs) if "code" in lib))
self.libSource=libSource
0
Example 72
Project: dagbldr Source File: datasets.py
def fetch_fruitspeech():
""" Check for fruitspeech data
Recorded by Hakon Sandsmark
Returns
-------
summary : dict
A dictionary cantaining data
summary["data"] : list
List of list of ints
summary["specgrams"] : list
List of arrays in (n_frames, n_features) format
summary["target_names"] : list
List of strings
summary["target"] : list
List of list of int
summary["train_indices"] : array
Indices for training samples
summary["valid_indices"] : array
Indices for validation samples
summary["vocabulary_size"] : int
Total vocabulary size
summary["vocabulary"] : string
The whole vocabulary as a string
"""
data_path = check_fetch_fruitspeech()
audio_matches = []
for root, dirnames, filenames in os.walk(data_path):
for filename in fnmatch.filter(filenames, '*.wav'):
audio_matches.append(os.path.join(root, filename))
all_chars = []
all_words = []
all_data = []
all_specgram_data = []
for wav_path in audio_matches:
# Convert chars to int classes
word = wav_path.split(os.sep)[-1][:-6]
chars = string_to_character_index(word)
fs, d = wavfile.read(wav_path)
d = d.astype("int32")
# Preprocessing from A. Graves "Towards End-to-End Speech
# Recognition"
Pxx = 10. * np.log10(np.abs(stft(d, fftsize=128))).astype(
theano.config.floatX)
all_data.append(d)
all_specgram_data.append(Pxx)
all_chars.append(chars)
all_words.append(word)
vocabulary_size = len(all_vocabulary_chars)
# Shuffle data
all_lists = list(safe_zip(all_data, all_specgram_data, all_chars,
all_words))
random_state = np.random.RandomState(1999)
random_state.shuffle(all_lists)
all_data, all_specgram_data, all_chars, all_words = zip(*all_lists)
wordset = list(set(all_words))
train_matches = []
valid_matches = []
for w in wordset:
matches = [n for n, i in enumerate(all_words) if i == w]
# Hold out ~25% of the data, keeping some of every class
train_matches.append(matches[:-4])
valid_matches.append(matches[-4:])
train_indices = np.array(sorted(
[r for i in train_matches for r in i])).astype("int32")
valid_indices = np.array(sorted(
[r for i in valid_matches for r in i])).astype("int32")
# reorganize into contiguous blocks
def reorg(list_):
ret = [list_[i] for i in train_indices] + [
list_[i] for i in valid_indices]
return np.asarray(ret)
all_data = reorg(all_data)
all_specgram_data = reorg(all_specgram_data)
all_chars = reorg(all_chars)
all_words = reorg(all_words)
# after reorganizing finalize indices
train_indices = np.arange(len(train_indices))
valid_indices = np.arange(len(valid_indices)) + len(train_indices)
return {"data": all_data,
"specgrams": all_specgram_data,
"target": all_chars,
"target_names": all_words,
"train_indices": train_indices,
"valid_indices": valid_indices,
"vocabulary_size": vocabulary_size,
"vocabulary": all_vocabulary_chars}
0
Example 73
Project: OWASP-ZSC Source File: run.py
def getcommand(commands):
backup_commands = commands
crawler = 0
command_path = ['zsc']
command = ''
while True:
try:
command = _input('/'.join(command_path), 'any', False)
if command is None:
_lets_error
except:
warn('interrupted by user!\nExit\n')
sys.exit(0)
check = True
if command.startswith('#'): # allows for comments
continue
inContext = ['clear', 'help', 'about', 'version', 'back']
for option in commands:
if command == option and command not in inContext:
crawler += 1
if crawler is 1:
commands = commands[option][1]
command_path.append(option)
if crawler is 2:
if command == 'search':
_search_shellcode(False,0)
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
elif command == 'download':
_download_shellcode(False,0,'')
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
elif command == 'shell_storm_list':
_grab_all()
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
elif command == 'generate':
commands = commands[option]
command_path.append(option)
else:
while True:
f = []
import os as OS
for (dirpath, dirnames, filenames) in OS.walk('.'):
f.extend(filenames)
break
completer = autocomplete(f)
readline.set_completer(completer.complete)
filename = _input('filename', 'any', True)
completer = autocomplete(commands)
readline.set_completer(completer.complete)
try:
content = open(filename, 'rb').read()
break
except:
warn('sorry, cann\'t find file\n')
commands = commands[option]
command_path.append(option)
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
t = True
while t:
encode = _input('encode', 'any', True)
for en in commands:
if encode == en:
t = False
if t is True:
warn('please enter a valid encode name\n')
obf_code(option, encode, filename, content,False)
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
if crawler is 3:
os = option
commands = commands[option]
command_path.append(option)
if crawler is 4:
func = option
commands = commands[option]
command_path.append(option)
if crawler is 5:
data = []
backup_option = option
if option != '':
options = option.rsplit('&&')
for o in options:
data.append(_input(o,'any',True))
n = 0
write('\n')
for o in options:
info('%s set to "%s"\n' % (o, data[n]))
n += 1
run = getattr(
__import__('lib.generator.%s.%s' % (os, func),
fromlist=['run']),
'run')
shellcode = run(data)
write('\n')
for encode in backup_commands['shellcode'][1]['generate'][
os][func][backup_option]:
info(encode + '\n')
write('\n\n')
info('enter encode type\n')
completer = autocomplete(backup_commands['shellcode'][1][
'generate'][os][func][backup_option])
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
try:
encode = _input('/'.join(command_path) + "/encode_type", 'any', False)
if encode is None:
_lets_error
except:
encode = 'none'
warn(
'\n"none" encode selected\n')
write('\n')
assembly_code_or_not = _input(
'Output assembly code?(y or n)', 'any', True)
if assembly_code_or_not == 'y':
assembly_code = True
else:
assembly_code = False
if assembly_code is True:
write('\n'+encode_process(encode, shellcode, os, func) + '\n\n')
output_shellcode = _input('Output shellcode to screen?(y or n)', 'any', True)
shellcode_op = op( encode_process(encode, shellcode, os, func), os)
if output_shellcode == 'y':
info('Generated shellcode is:\n' + shellcode_op +'\n\n')
file_or_not = _input('Shellcode output to a .c file?(y or n)', 'any', True)
if file_or_not == 'y':
target = _input('Target .c file?', 'any', True)
file_output(target, func, data, os, encode, shellcode, shellcode_op)
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
check = False
if command == 'exit' or command == 'quit':
write(color.color('reset'))
sys.exit('Exit')
elif command == 'update':
_update(__version__)
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
elif command == 'help':
_help(help)
elif command == 'restart':
commands = backup_commands
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler = 0
command_path = ['zsc']
elif command == 'about':
about()
elif command == 'version':
_version()
elif command == 'clear':
_clear()
elif command == 'back':
if len(command_path) > 1:
command_path.pop()
commands = backup_commands
for option in command_path:
if option == 'zsc':
pass
elif option == command_path[1]:
commands = commands[option][1]
else:
commands = commands[option]
completer = autocomplete(commands)
readline.set_completer(completer.complete)
readline.parse_and_bind('tab: complete')
crawler -= 1
else:
info('Can\'t go back from here!\n')
else:
if command != '' and check is True:
info('Command not found!\n')
0
Example 74
Project: dexy Source File: process.py
def add_new_files(self):
"""
Walk working directory and add a new dexy docuement for every newly
created file found.
"""
wd = self.workspace()
self.log_debug("adding new files found in %s for %s" % (wd, self.key))
add_new_files = self.setting('add-new-files')
if isinstance(add_new_files, basestring):
add_new_files = [add_new_files]
exclude = self.setting('exclude-add-new-files')
skip_dirs = self.setting('exclude-new-files-from-dir')
if isinstance(exclude, basestring):
raise dexy.exceptions.UserFeedback("exclude-add-new-files should be a list, not a string")
new_files_added = 0
for dirpath, subdirs, filenames in os.walk(wd):
# Prune subdirs which match exclude.
subdirs[:] = [d for d in subdirs if d not in skip_dirs]
# Iterate over files in directory.
for filename in filenames:
filepath = os.path.normpath(os.path.join(dirpath, filename))
relpath = os.path.relpath(filepath, wd)
self.log_debug("Processing %s" % filepath)
if relpath in self._files_workspace_populated_with:
# already have this file
continue
if isinstance(add_new_files, list):
is_valid_file_extension = False
for pattern in add_new_files:
if "*" in pattern:
if fnmatch.fnmatch(relpath, pattern):
is_valid_file_extension = True
continue
else:
if filename.endswith(pattern):
is_valid_file_extension = True
continue
if not is_valid_file_extension:
msg = "Not adding filename %s, does not match patterns: %s"
args = (filepath, ", ".join(add_new_files))
self.log_debug(msg % args)
continue
elif isinstance(add_new_files, bool):
if not add_new_files:
msg = "add_new_files method should not be called if setting is False"
raise dexy.exceptions.InternalDexyProblem(msg)
is_valid_file_extension = True
else:
msg = "add-new-files setting should be list or boolean. Type is %s value is %s"
args = (add_new_files.__class__, add_new_files,)
raise dexy.exceptions.InternalDexyProblem(msg % args)
# Check if should be excluded.
skip_because_excluded = False
for skip_pattern in exclude:
if skip_pattern in filepath:
msg = "skipping adding new file %s because it matches exclude %s"
args = (filepath, skip_pattern,)
self.log_debug(msg % args)
skip_because_excluded = True
continue
if skip_because_excluded:
continue
if not is_valid_file_extension:
raise Exception("Should not get here unless is_valid_file_extension")
self.log_debug("Adding %s" % filepath)
with open(filepath, 'rb') as f:
contents = f.read()
self.add_doc(relpath, contents)
new_files_added += 1
if new_files_added > 10:
self.log_warn("%s additional files added" % (new_files_added))
0
Example 75
Project: captchacker2 Source File: characters_train_test_SVM.py
Function: generate_simulation_based_model
Function: generate_simulation_based_model
def generate_simulation_based_model(KERNEL = SIGMOID,TRAINING_FOLDER = 'DBTraining-Simulation_based'):
CRANGE = [1000]
for C in CRANGE:
MODEL_FILE = "simulation_based_NEW_C="+str(C)+"_KERNEL="+str(KERNEL)+".svm"
print MODEL_FILE
print """
##############################################################################
############################ TRAINING ##################################
##############################################################################
"""
labels = []
samples = []
print "LOADING IMAGES..."
train_elem = '3de2mt'
#Train everything
train_elem = ''
print TRAINING_FOLDER
for folder, subfolders, files in os.walk(TRAINING_FOLDER):
if (folder[0] != ".") and (folder[-1] in train_elem or train_elem == ''):
loaded = False
for file in [file for file in files if 'bmp' in file]:
if not loaded:
print "folder", folder, "loaded"
loaded = True
im = Image.open(os.path.join(folder, file))
#print ord(folder[-1])-65
labels.append(ord(folder[-1])-65)
#print map(lambda e:e/255., list(im.getdata()))
#samples.append(map(lambda e:e/255., list(im.getdata())))
samples.append(list(im.point(lambda i: (i/255.)).getdata()))
print "Done.\n"
print "GENERATING MODEL..."
problem = svm_problem(labels, samples);
size = len(samples)
#param = svm_parameter(C = 10,nr_weight = 2,weight_label = [1,0],weight = [10,1], probability=1)
#param = svm_parameter(kernel_type = KERNEL, C=C, probability = 1)
param = svm_parameter('-t %s -c %s -b %s' % (KERNEL, C, 1))
#kernels : LINEAR, POLY, RBF, and SIGMOID
#types : C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, and NU_SVR
#model = svm_model(problem,param)
#model = libsvm.svm_train(problem, param)
#model = toPyModel(model)
model = svm_train(problem, param)
#model.save(os.path.join(MODEL_FOLDER, MODEL_FILE))
svm_save_model(os.path.join(MODEL_FOLDER, MODEL_FILE),model)
print "Done.\n"
return os.path.join(MODEL_FOLDER, MODEL_FILE)
0
Example 76
def import_static_content(
course_data_path, static_content_store,
target_id, subpath='static', verbose=False):
remap_dict = {}
# now import all static assets
static_dir = course_data_path / subpath
try:
with open(course_data_path / 'policies/assets.json') as f:
policy = json.load(f)
except (IOError, ValueError) as err:
# xml backed courses won't have this file, only exported courses;
# so, its absence is not really an exception.
policy = {}
verbose = True
mimetypes.add_type('application/octet-stream', '.sjson')
mimetypes.add_type('application/octet-stream', '.srt')
mimetypes_list = mimetypes.types_map.values()
for dirname, _, filenames in os.walk(static_dir):
for filename in filenames:
content_path = os.path.join(dirname, filename)
if re.match(ASSET_IGNORE_REGEX, filename):
if verbose:
log.debug('skipping static content %s...', content_path)
continue
if verbose:
log.debug('importing static content %s...', content_path)
try:
with open(content_path, 'rb') as f:
data = f.read()
except IOError:
if filename.startswith('._'):
# OS X "companion files". See
# http://www.diigo.com/annotated/0c936fda5da4aa1159c189cea227e174
continue
# Not a 'hidden file', then re-raise exception
raise
# strip away leading path from the name
fullname_with_subpath = content_path.replace(static_dir, '')
if fullname_with_subpath.startswith('/'):
fullname_with_subpath = fullname_with_subpath[1:]
asset_key = StaticContent.compute_location(target_id, fullname_with_subpath)
policy_ele = policy.get(asset_key.path, {})
# During export display name is used to create files, strip away slashes from name
displayname = escape_invalid_characters(
name=policy_ele.get('displayname', filename),
invalid_char_list=['/', '\\']
)
locked = policy_ele.get('locked', False)
mime_type = policy_ele.get('contentType')
# Check extracted contentType in list of all valid mimetypes
if not mime_type or mime_type not in mimetypes_list:
mime_type = mimetypes.guess_type(filename)[0] # Assign guessed mimetype
content = StaticContent(
asset_key, displayname, mime_type, data,
import_path=fullname_with_subpath, locked=locked
)
# first let's save a thumbnail so we can get back a thumbnail location
thumbnail_content, thumbnail_location = static_content_store.generate_thumbnail(content)
if thumbnail_content is not None:
content.thumbnail_location = thumbnail_location
# then commit the content
try:
static_content_store.save(content)
except Exception as err:
log.exception(u'Error importing {0}, error={1}'.format(
fullname_with_subpath, err
))
# store the remapping information which will be needed
# to subsitute in the module data
remap_dict[fullname_with_subpath] = asset_key
return remap_dict
0
Example 77
Project: RITSAR Source File: phsRead.py
def AFRL(directory, pol, start_az, n_az=3):
##############################################################################
# #
# This function reads in the AFRL *.mat files from the user supplied #
# directory and exports both the phs and a Python dictionary compatible #
# with ritsar. #
# #
##############################################################################
#Check Python version
version = sys.version_info
#Get filenames
walker = os.walk(directory+'/'+pol)
if version.major < 3:
w = walker.next()
else:
w = walker.__next__()
prefix = '/'+pol+'/'+w[2][0][0:19]
az_str = []
fnames = []
az = np.arange(start_az, start_az+n_az)
[az_str.append(str('%03d_'%a)) for a in az]
[fnames.append(directory+prefix+a+pol+'.mat') for a in az_str]
#Grab n_az phase histories
phs = []; platform = []
for fname in fnames:
#Convert MATLAB structure to Python dictionary
MATdata = loadmat(fname)['data'][0][0]
data =\
{
'fp' : MATdata[0],
'freq' : MATdata[1][:,0],
'x' : MATdata[2].T,
'y' : MATdata[3].T,
'z' : MATdata[4].T,
'r0' : MATdata[5][0],
'th' : MATdata[6][0],
'phi' : MATdata[7][0],
}
#Define phase history
phs_tmp = data['fp'].T
phs.append(phs_tmp)
#Transform data to be compatible with ritsar
c = 299792458.0
nsamples = int(phs_tmp.shape[1])
npulses = int(phs_tmp.shape[0])
freq = data['freq']
pos = np.hstack((data['x'], data['y'], data['z']))
k_r = 4*pi*freq/c
B_IF = data['freq'].max()-data['freq'].min()
delta_r = c/(2*B_IF)
delta_t = 1.0/B_IF
t = np.linspace(-nsamples/2, nsamples/2, nsamples)*delta_t
chirprate, f_0, r, p, s\
= linregress(t, freq)
#Vector to scene center at synthetic aperture center
if np.mod(npulses,2)>0:
R_c = pos[npulses/2]
else:
R_c = np.mean(
pos[npulses/2-1:npulses/2+1],
axis = 0)
#Save values to dictionary for export
platform_tmp = \
{
'f_0' : f_0,
'freq' : freq,
'chirprate' : chirprate,
'B_IF' : B_IF,
'nsamples' : nsamples,
'npulses' : npulses,
'pos' : pos,
'delta_r' : delta_r,
'R_c' : R_c,
't' : t,
'k_r' : k_r,
}
platform.append(platform_tmp)
#Stack data from different azimuth files
phs = np.vstack(phs)
npulses = int(phs.shape[0])
pos = platform[0]['pos']
for i in range(1, n_az):
pos = np.vstack((pos, platform[i]['pos']))
if np.mod(npulses,2)>0:
R_c = pos[npulses/2]
else:
R_c = np.mean(
pos[npulses/2-1:npulses/2+1],
axis = 0)
#Replace Dictionary values
platform = platform_tmp
platform['npulses'] = npulses
platform['pos'] = pos
platform['R_c'] = R_c
#Synthetic aperture length
L = norm(pos[-1]-pos[0])
#Add k_y
platform['k_y'] = np.linspace(-npulses/2,npulses/2,npulses)*2*pi/L
return(phs, platform)
0
Example 78
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if argv is None:
argv = sys.argv
parser = E.OptionParser(version="%prog version: $Id: clean.py 2782 2009-09-10 11:40:29Z andreas $",
usage=globals()["__doc__"])
parser.add_option("-g", "--glob", dest="glob_pattern", type="string",
help="glob pattern to use for collecting files [%default].")
parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true",
help="only print out actions, do not execute them [%default].")
parser.add_option("-f", "--file-pattern", dest="file_pattern", type="string",
help="only check files matching this pattern [%default].")
parser.set_defaults(glob_pattern="data.dir",
file_pattern=".out",
check_completeness="python",
skip_dirs=[],
dry_run=False,
)
(options, args) = E.Start(parser,
add_pipe_options=True)
if args:
starts = args
elif options.glob_pattern:
starts = glob.glob(options.glob_pattern)
else:
starts = "."
ndirs, nfiles, ndeleted = 0, 0, 0
if options.check_completeness == "python":
isComplete = checkPythonRuns
rx = re.compile(options.file_pattern)
for start in starts:
for root, dirs, files in os.walk(start):
ndirs += 1
# exclude directories
for dir in options.skip_dirs:
if dir in dirs:
dirs.remove(dir)
for filename in files:
p = os.path.join(root, filename)
if rx.search(filename) and not isComplete(p):
if options.dry_run:
options.stdlog.write("# removing file %s\n" % p)
else:
os.remove(p)
ndeleted += 1
if options.loglevel >= 1:
options.stdlog.write("# ndirs=%i, nfiles=%i, ndeleted=%i\n" %
(ndirs, nfiles, ndeleted))
E.Stop()
0
Example 79
Project: sparknotebook Source File: spark_ec2.py
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
active_master = master_nodes[0].public_dns_name
num_disks = get_num_disks(opts.instance_type)
hdfs_data_dirs = "/mnt/ephemeral-hdfs/data"
mapred_local_dirs = "/mnt/hadoop/mrlocal"
spark_local_dirs = "/mnt/spark"
if num_disks > 1:
for i in range(2, num_disks + 1):
hdfs_data_dirs += ",/mnt%d/ephemeral-hdfs/data" % i
mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i
spark_local_dirs += ",/mnt%d/spark" % i
cluster_url = "%s:7077" % active_master
if "." in opts.spark_version:
# Pre-built spark & shark deploy
(spark_v, shark_v) = get_spark_shark_version(opts)
else:
# Spark-only custom deploy
spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version)
shark_v = ""
modules = filter(lambda x: x != "shark", modules)
template_vars = {
"master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
"active_master": active_master,
"slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]),
"cluster_url": cluster_url,
"hdfs_data_dirs": hdfs_data_dirs,
"mapred_local_dirs": mapred_local_dirs,
"spark_local_dirs": spark_local_dirs,
"swap": str(opts.swap),
"modules": '\n'.join(modules),
"spark_version": spark_v,
"shark_version": shark_v,
"hadoop_major_version": opts.hadoop_major_version,
"metastore_user": "hive",
"metastore_passwd": ''.join(random.SystemRandom().choice(string.uppercase + string.digits) for _ in xrange(10)),
"spark_worker_instances": "%d" % opts.worker_instances,
"spark_master_opts": opts.master_opts
}
# Create a temp directory in which we will place all the files to be
# deployed after we substitue template parameters in them
print root_dir
tmp_dir = tempfile.mkdtemp()
for path, dirs, files in os.walk(root_dir):
if path.find(".svn") == -1:
dest_dir = os.path.join('/', path[len(root_dir):])
local_dir = tmp_dir + dest_dir
if not os.path.exists(local_dir):
os.makedirs(local_dir)
for filename in files:
if filename[0] not in '#.~' and filename[-1] != '~':
dest_file = os.path.join(dest_dir, filename)
local_file = tmp_dir + dest_file
with open(os.path.join(path, filename)) as src:
with open(local_file, "w") as dest:
text = src.read()
for key in template_vars:
text = text.replace("{{" + key + "}}", template_vars[key])
dest.write(text)
dest.close()
# rsync the whole directory over to the master machine
command = [
'rsync', '-rv',
'-e', stringify_command(ssh_command(opts)),
"%s/" % tmp_dir,
"%s@%s:/" % (opts.user, active_master)
]
subprocess.check_call(command)
# Remove the temp directory we created above
shutil.rmtree(tmp_dir)
print tmp_dir
0
Example 80
def main():
module = AnsibleModule(
argument_spec=dict(
disks=dict(type='list',required=True),
ssd_device=dict(required=True),
journal_guid=dict(required=True),
),
)
disks = module.params.get('disks')
ssd_device = module.params.get('ssd_device')
journal_guid = module.params.get('journal_guid')
changed = False
uuids_in_order = [None] * len(disks)
# the disks have symlinks to /dev/bcacheX. we need the disks
# in increasing order by X.
for subdir, dirs, files in os.walk('/dev/disk/by-uuid/'):
for uuid in files:
disk = os.path.join(subdir, uuid)
path = os.path.realpath(disk)
if 'bcache' in path:
bcache_index = int(path[len(path)-1:])
uuids_in_order.pop(bcache_index)
uuids_in_order.insert(bcache_index,uuid)
for i in range(0, len(uuids_in_order)):
# running this command with the uuid argument will return the same value each time
cmd = ['ceph', 'osd', 'create', uuids_in_order[i]]
rc, out, err = module.run_command(cmd, check_rc=True)
osd_id = out.rstrip()
# if first time running 'ceph osd create' against this uuid, create the osd dir
# and handle rest of activation. if directory exists, the device has already
# been activated.
if not os.path.exists('/var/lib/ceph/osd/ceph-' + osd_id):
os.makedirs('/var/lib/ceph/osd/ceph-' + osd_id)
changed = True
bcache_index = int(osd_id) % len(disks)
partition_index = bcache_index + 1
cmd = ['mount', '/dev/bcache' + str(bcache_index), '/var/lib/ceph/osd/ceph-' + osd_id]
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['ceph-osd', '-i', osd_id, '--mkfs', '--mkkey', '--osd-uuid', uuids_in_order[i]]
rc, out, err = module.run_command(cmd, check_rc=True)
os.remove('/var/lib/ceph/osd/ceph-' + osd_id + '/journal')
cmd = ['chown', 'ceph:ceph', '/dev/' + ssd_device + str(partition_index)]
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['sgdisk', '-t', str(partition_index) + ':' + journal_guid, '/dev/' + ssd_device]
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['ln', '-s', '/dev/' + ssd_device + str(partition_index), '/var/lib/ceph/osd/ceph-' + osd_id + '/journal']
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['ceph-osd', '-i', osd_id, '--mkjournal']
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['umount', '/var/lib/ceph/osd/ceph-' + osd_id]
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['ceph-disk', 'activate', '/dev/bcache' + str(bcache_index)]
rc, out, err = module.run_command(cmd, check_rc=True)
cmd = ['chown', '-R', 'ceph:ceph', '/var/lib/ceph/osd/ceph-' + osd_id]
rc, out, err = module.run_command(cmd, check_rc=True)
with open("/etc/fstab", "a") as fstab:
fstab.write('UUID=' + uuids_in_order[i] + ' /var/lib/ceph/osd/ceph-' + osd_id + ' xfs defaults,noatime,largeio,inode64,swalloc 0 0\n')
module.exit_json(changed=changed)
0
Example 81
Project: qibuild Source File: sh.py
def iter_directory(directory, filter_fun=None, all=False):
"""Returns a generator for all the files present in a directory,
relative to this directory.
Empty directories are ignored.
By default, do not list hidden files and do not descend into
hidden directories.
You can use ``all=True`` to list all the files
.. note:: Hidden in this context means "starting with a dot"
(If you want to support MacOS or Windows hidden files
you are on your own ...)
If ``filter_fun`` is given, it will be called with
``(filename, dirname)`` optional argument and should return True
if the directory should be descended into or the filename should be yield
For instance, with::
foo
|__ eggs
| |__ c
| |__ d
|__ empty
|__ spam
|__a
|__b
``iter_directory(foo)`` yields::
["eggs/c", "eggs/d", "spam/a", "spam/b"]
Note that paths will always be POSIX, even on Windows
"""
def non_hidden(filename=None, dirname=None):
if filename:
return not filename.startswith(".")
if dirname:
return not dirname.startswith(".")
def filter_none(filename=None, dirname=None):
return True
if not filter_fun:
if all:
filter_fun = filter_none
else:
filter_fun = non_hidden
res = list()
for root, dirs, files in os.walk(directory, topdown=True):
new_root = os.path.relpath(root, directory)
new_root = qisys.sh.to_posix_path(new_root)
dirs[:] = [x for x in dirs if filter_fun(dirname=x)]
files = [x for x in files if filter_fun(filename=x)]
if new_root == "." and not files:
continue
if new_root == "." and files:
for f in files:
yield f
continue
for f in files:
yield posixpath.join(new_root, f)
0
Example 82
Project: tika-similarity Source File: similarity.py
def main(argv = None):
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], 'hvf:c:a:', ['help', 'verbose', 'directory=', 'file=', 'accept=' ])
except getopt.error, msg:
raise _Usage(msg)
if len(opts) ==0:
raise _Usage(_helpMessage)
dirFile = ""
filenames = []
filename_list = []
allowed_mime_types = []
directory_flag = 0
for option, value in opts:
if option in ('-h', '--help'):
raise _Usage(_helpMessage)
elif option in ('-c', '--file'):
#extract file names from command line
if '-c' in argv :
index_of_file_option = argv.index('-c')
else :
index_of_file_option = argv.index('--file')
filenames = argv[index_of_file_option+1 : ]
elif option in ('-f', '--directory'):
dirFile = value
directory_flag = 1
for root, dirnames, files in os.walk(dirFile):
dirnames[:] = [d for d in dirnames if not d.startswith('.')]
for filename in files:
if not filename.startswith('.'):
filename_list.append(os.path.join(root, filename))
elif option in ('--accept'):
#extract accepted mime types from command line
index_of_mime_type_option = argv.index('--accept')
allowed_mime_types = argv[index_of_mime_type_option+1 : ]
elif option in ('-v', '--verbose'):
global _verbose
_verbose = True
#format filename
if directory_flag == 0:
filenames = [x.strip() for x in filenames]
filenames = [filenames[k].strip('\'\n') for k in range(len(filenames))]
for filename in filenames:
if not os.path.isfile(os.path.join(dirFile, filename)):
continue
filename = os.path.join(dirFile, filename) if dirFile else filename
filename_list.append(filename)
if len(filename_list) <2 :
raise _Usage("you need to type in at least two valid files")
#allow only files with specifed mime types
if len(allowed_mime_types) != 0:
filename_list = [filename for filename in filename_list if parser.from_file(filename) and str(parser.from_file(filename)['metadata']['Content-Type'].encode('utf-8')).split('/')[-1] in allowed_mime_types]
else:
print "Accepting all MIME Types....."
union_feature_names = set()
file_parsed_data = {}
resemblance_scores = {}
#count similarity for two given files
for filename in filename_list:
# first compute the union of all features
try:
parsedData = parser.from_file(filename)
filename_stripped = filename.replace(",","")
if parsedData:
file_parsed_data[filename_stripped] = parsedData["metadata"]
union_feature_names = union_feature_names | set(parsedData["metadata"].keys())
except ConnectionError:
sleep(1)
except KeyError:
continue
total_num_features = len(union_feature_names)
# now compute the specific resemblance and containment scores
for filename in file_parsed_data:
overlap = {}
overlap = set(file_parsed_data[filename].keys()) & set(union_feature_names)
resemblance_scores[filename] = float(len(overlap))/total_num_features
sorted_resemblance_scores = sorted(resemblance_scores.items(), key=operator.itemgetter(1), reverse=True)
'''print "Resemblance:\n"
for tuple in sorted_resemblance_scores:
print tuple[0]+","+str(tuple[1]) + "," + convertUnicode(file_parsed_data[tuple[0]])+'\n'''
with open("similarity-scores.txt", "w") as f:
f.write("Resemblance : \n")
for tuple in sorted_resemblance_scores:
f.write(os.path.basename(tuple[0].rstrip(os.sep)) + ","+str(tuple[1]) + "," + tuple[0] + "," + convertUnicode(file_parsed_data[tuple[0]]) + '\n')
except _Usage, err:
print >>sys.stderr, sys.argv[0].split('/')[-1] + ': ' + str(err.msg)
return 2
0
Example 83
Project: C-PAC Source File: build_sublist.py
def return_bids_template(base_dir, scan_type, creds_path=None):
'''
Function that returns the path template of the desired scan type
from a BIDS dataset
Parameters
----------
base_dir : string
base directory of the BIDS dataset
scan_type : string
type of scan; e.g. 'anat', 'func', etc.
creds_path : string (optional); default=None
filepath to a set of AWS credentials to access a BIDS dataset
stored on S3 that isn't public
Returns
-------
file_template : string
regular expression-compatible file template indicating data
path organization
'''
# Import packages
import os
from indi_aws import fetch_creds
# Init variables
s3_str = 's3://'
file_path = None
# If base directory is in S3
if base_dir.startswith(s3_str):
bucket_name = base_dir.split('/')[2]
s3_prefix = '/'.join(base_dir.split('/')[:3])
# Extract base prefix to search through in S3
prefix = base_dir.split('*')[0].replace(s3_prefix, '').lstrip('/')
# Attempt to get bucket
try:
bucket = fetch_creds.return_bucket(creds_path, bucket_name)
except Exception as exc:
err_msg = 'There was an error in retrieving S3 bucket: %s.\nError: %s'\
%(bucket_name, exc)
raise Exception(err_msg)
# Get filepaths from S3 with prefix
print 'Gathering files from S3 to parse...'
for s3_obj in bucket.objects.filter(Prefix=prefix):
file_path = s3_obj.key
scan_dir = file_path.split('/')[-2]
if scan_dir == scan_type:
break
# Else, the base directory is locally stored
else:
for root, dirs, files in os.walk(base_dir):
if file_path:
break
for fil in files:
file_path = os.path.join(root, fil)
scan_dir = file_path.split('/')[-2]
if fil.endswith('.nii.gz') and scan_dir == scan_type:
break
else:
file_path = None
# Now replace file_path intermediate dirs with *
if file_path:
rel_path = file_path.replace(base_dir, '').lstrip('/')
interm_dirs = rel_path.split('/')[:-2]
for imd in interm_dirs:
file_path = file_path.replace(imd, '*')
else:
err_msg = 'Could not find any files in directory, check files!'
raise Exception(err_msg)
# Set template as any file *
file_template = os.path.join(os.path.dirname(file_path), '*.nii.gz')
# Return file pattern template
return file_template
0
Example 84
Project: qiime Source File: multiple_split_libraries_fastq.py
def main():
option_parser, opts, args =\
parse_command_line_parameters(suppress_verbose=True, **script_info)
input_dir = opts.input_dir
demultiplexing_method = opts.demultiplexing_method
parameter_fp = opts.parameter_fp
read_indicator = opts.read_indicator
barcode_indicator = opts.barcode_indicator
mapping_indicator = opts.mapping_indicator
mapping_extensions = opts.mapping_extensions.split(',')
sampleid_indicator = opts.sampleid_indicator
leading_text = opts.leading_text
trailing_text = opts.trailing_text
include_input_dir_path = opts.include_input_dir_path
output_dir = abspath(opts.output_dir)
remove_filepath_in_name = opts.remove_filepath_in_name
print_only = opts.print_only
if remove_filepath_in_name and not include_input_dir_path:
option_parser.error("If --remove_filepath_in_name enabled, "
"--include_input_dir_path must be enabled.")
if opts.parameter_fp:
with open(opts.parameter_fp, 'U') as parameter_f:
params_dict = parse_qiime_parameters(parameter_f)
params_str = get_params_str(params_dict['split_libraries_fastq'])
else:
params_dict = {}
params_str = ""
create_dir(output_dir)
all_fastq = []
all_mapping = []
extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
for root, dir, fps in walk(input_dir):
for fp in fps:
for extension in extensions:
if fp.endswith(extension):
all_fastq += [abspath(join(root, fp))]
if demultiplexing_method == 'mapping_barcode_files':
for root, dir, fps in walk(input_dir):
for fp in fps:
for mapping_extension in mapping_extensions:
if fp.endswith(mapping_extension):
all_mapping += [abspath(join(root, fp))]
all_files = get_matching_files(all_fastq, all_mapping,
read_indicator, barcode_indicator, mapping_indicator)
else:
# Filter down files to only the target files, raise error if nothing found
all_files = filter(all_fastq, read_indicator)
if not all_files:
raise ValueError,("No reads detected-please check the values indicated with "
"the --read_indicator parameter. Set as '*' to include all files, or use "
"a value such as '*fastqjoin.join*' to detect only the reads that are "
"joined after join_paired_ends.py.")
commands = create_commands_slf(all_files, demultiplexing_method, output_dir,
params_str, leading_text, trailing_text, include_input_dir_path,
remove_filepath_in_name, sampleid_indicator)
qiime_config = load_qiime_config()
if print_only:
command_handler = print_commands
else:
command_handler = call_commands_serially
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params_dict,
qiime_config=qiime_config)
# Call the command handler on the list of commands
command_handler(commands,
status_update_callback=no_status_updates,
logger=logger,
close_logger_on_success=True)
0
Example 85
def run (sourceDirectory, outputFilename = None, configFile = None):
cfg = None
if configFile:
cfg = Config(configFile)
allFiles = []
## Find all the Javascript source files
for root, dirs, files in os.walk(sourceDirectory):
for filename in files:
if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."):
filepath = os.path.join(root, filename)[len(sourceDirectory)+1:]
filepath = filepath.replace("\\", "/")
if cfg and cfg.include:
if filepath in cfg.include or filepath in cfg.forceFirst:
allFiles.append(filepath)
elif (not cfg) or (not undesired(filepath, cfg.exclude)):
allFiles.append(filepath)
## Header inserted at the start of each file in the output
HEADER = "/* " + "=" * 70 + "\n %s\n" + " " + "=" * 70 + " */\n\n"
files = {}
## Import file source code
## TODO: Do import when we walk the directories above?
for filepath in allFiles:
print "Importing: %s" % filepath
fullpath = os.path.join(sourceDirectory, filepath).strip()
content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
files[filepath] = SourceFile(filepath, content) # TODO: Chop path?
print
from toposort import toposort
complete = False
resolution_pass = 1
while not complete:
complete = True
## Resolve the dependencies
print "Resolution pass %s... " % resolution_pass
resolution_pass += 1
for filepath, info in files.items():
for path in info.requires:
if not files.has_key(path):
complete = False
fullpath = os.path.join(sourceDirectory, path).strip()
if os.path.exists(fullpath):
print "Importing: %s" % path
content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
files[path] = SourceFile(path, content) # TODO: Chop path?
else:
raise MissingImport("File '%s' not found (required by '%s')." % (path, filepath))
# create dictionary of dependencies
dependencies = {}
for filepath, info in files.items():
dependencies[filepath] = info.requires
print "Sorting..."
order = toposort(dependencies) #[x for x in toposort(dependencies)]
## Move forced first and last files to the required position
if cfg:
print "Re-ordering files..."
order = cfg.forceFirst + [item
for item in order
if ((item not in cfg.forceFirst) and
(item not in cfg.forceLast))] + cfg.forceLast
print
## Output the files in the determined order
result = []
for fp in order:
f = files[fp]
print "Exporting: ", f.filepath
result.append(HEADER % f.filepath)
source = f.source
result.append(source)
if not source.endswith("\n"):
result.append("\n")
print "\nTotal files merged: %d " % len(files)
if outputFilename:
print "\nGenerating: %s" % (outputFilename)
open(outputFilename, "w").write("".join(result))
return "".join(result)
0
Example 86
Project: neural-fuzzer Source File: triage.py
def triage(cmd, seeds, depth=5, prune=False):
#gdb_cmd = "env -i ASAN_OPTIONS='abort_on_error=1' gdb -batch -ex 'tty /dev/null' -ex run -ex bt 20 --args @@ 2> /dev/null"
gdb_cmd = "env -i ASAN_OPTIONS='abort_on_error=1' gdb -batch -ex run -ex 'bt 20' --args @@ 2> /dev/null"
all_files = []
dedup_files = dict()
for x, y, files in os.walk(seeds):
nfiles = len(files)
for f in files:
f = f.replace("(","\(")
f = f.replace(")","\)")
f = f.replace("$","\$")
f = f.replace(",","\,")
all_files.append(x + "/".join(y) + "/" + f)
random.shuffle(all_files)
#all_files = all_files[:1000]
nfiles = len(all_files)
for progress, testcase in enumerate(all_files):
prepared_cmd = cmd.split("@@")
prepared_cmd = prepared_cmd[0].split(
" ") + [testcase] + prepared_cmd[1].split(" ")
prepared_cmd = remove_nils(prepared_cmd)
#print prepared_cmd
out = subprocess.check_output(gdb_cmd.replace(
"@@", " ".join(prepared_cmd)), shell=True)
#print out
backtrace = out.split("#")[1:]
key = ""
size = os.path.getsize(testcase)
dkey = 0
for x in backtrace:
if dkey == depth:
break
if "??" in x or "__" in x:
continue
if " in " in x:
x = remove_nils(x.split(" "))
key = key + " " + x[3]
dkey = dkey + 1
else:
x = remove_nils(x.split(" "))
key = key + " " + x[1]
dkey = dkey + 1
# print key
y = dedup_files.get(key, [])
dedup_files[key] = y + [(testcase,size)]
out = dict()
for (k, xs) in dedup_files.items():
#print "*"+k,
xs = sorted(xs, key=lambda x: x[1])
for x in xs[:1]:
out[k] = x
#if prune:
# for x in xs[1:]:
# os.remove(x[0])
#print ""
#print out
return out
0
Example 87
Project: courtlistener Source File: import_law_box.py
def main():
parser = argparse.ArgumentParser(
description='Import the corpus provided by lawbox')
parser.add_argument('-s', '--simulate', default=False, required=False,
action='store_true',
help='Run the code in simulate mode, making no permanent changes.')
parser.add_argument('-d', '--dir', type=readable_dir,
help='The directory where the lawbox bulk data can be found.')
parser.add_argument('-f', '--file', type=str, default="index.txt",
required=False, dest="file_name",
help="The file that has all the URLs to import, one per line.")
parser.add_argument('-l', '--line', type=int, default=1, required=False,
help='If provided, this will be the line number in the index file where we resume processing.')
parser.add_argument('-r', '--resume', default=False, required=False,
action='store_true',
help='Use the saved marker to resume operation where it last failed.')
parser.add_argument('-x', '--random', default=False, required=False,
action='store_true',
help='Pick cases randomly rather than serially.')
parser.add_argument('-m', '--marker', type=str,
default='lawbox_progress_marker.txt', required=False,
help="The name of the file that tracks the progress (useful if multiple versions run at same time)")
parser.add_argument('-e', '--end', type=int, required=False,
default=2000000,
help="An optional endpoint for an importer.")
args = parser.parse_args()
if args.dir:
def case_generator(dir_root):
"""Yield cases, one by one to the importer by recursing and iterating the import directory"""
for root, dirnames, filenames in os.walk(dir_root):
for filename in fnmatch.filter(filenames, '*'):
yield os.path.join(root, filename)
cases = case_generator(args.root)
i = 0
else:
def generate_random_line(file_name):
while True:
total_bytes = os.stat(file_name).st_size
random_point = random.randint(0, total_bytes)
f = open(file_name)
f.seek(random_point)
f.readline() # skip this line to clear the partial line
yield f.readline().strip()
def case_generator(line_number):
"""Yield cases from the index file."""
enumerated_line_number = line_number - 1 # The enumeration is zero-index, but files are one-index.
index_file = open(args.file_name)
for i, line in enumerate(index_file):
if i >= enumerated_line_number:
yield line.strip()
if args.random:
cases = generate_random_line(args.file_name)
i = 0
elif args.resume:
with open(args.marker) as marker:
resume_point = int(marker.read().strip())
cases = case_generator(resume_point)
i = resume_point
else:
cases = case_generator(args.line)
i = args.line
for case_path in cases:
if i % 1000 == 0:
db.reset_queries() # Else we leak memory when DEBUG is True
if 'counter' in DEBUG: # and i % 1000 == 0:
log_print("\n%s: Doing case (%s): file://%s" % (
datetime.datetime.now(), i, case_path))
try:
doc = import_law_box_case(case_path)
duplicates = find_duplicates(doc, case_path)
if not args.simulate:
if len(duplicates) == 0:
doc.html_lawbox, blocked = anonymize(doc.html)
doc.html = ''
if blocked:
doc.blocked = True
doc.date_blocked = now()
# Save nothing to the index for now (it'll get done
# when we find citations)
doc.save(index=False)
if len(duplicates) == 1:
dup_helpers.merge_cases_simple(doc, duplicates[0])
if len(duplicates) > 1:
# complex_merge
if 'log_multimerge' in DEBUG:
with open('index_multimerge.txt', 'a') as log:
log.write('%s\n' % case_path)
if args.resume:
# Don't change the progress marker unless you're in resume mode
with open(args.marker, 'w') as marker:
marker.write(
str(i + 1)) # Files are one-index, not zero-index
with open('lawbox_fix_file.pkl', 'wb') as fix_file:
pickle.dump(fixes, fix_file)
i += 1
if i == args.end:
log_print(
"Hit the endpoint after importing number %s. Breaking." % i)
break
except Exception, err:
log_print(traceback.format_exc())
exit(1)
0
Example 88
Project: bokeh Source File: test_code_quality.py
def collect_errors():
errors = []
def test_this_file(fname, test_file):
line = None
for idx, line in enumerate(test_file):
line = line.decode('utf-8')
line_no = idx + 1
if idx == 0 and len(line.strip()) == 0:
errors.append((message_multi_bof, fname, line_no))
if line.endswith(" \n") or line.endswith("\t\n"):
errors.append((message_space, fname, line_no))
if line.endswith("\r\n") or line.endswith("\r"):
errors.append((message_carriage, fname, line_no))
if tab_in_leading(line):
errors.append((message_tabs, fname, line_no))
#if len(line) > MAX_LINE_LENGTH:
# errors.append((message_too_long, fname, line_no))
if line is not None:
if idx > 0 and len(line.strip()) == 0:
errors.append((message_multi_eof, fname, line_no))
if not line.endswith('\n'):
errors.append((message_eof, fname, line_no))
def test(fname):
with open(fname, "Urb") as test_file:
test_this_file(fname, test_file)
def canonicalize(path):
return path.replace('/', sep)
def check_tree(base_path, patterns, dir_exclusions=None, file_exclusions=None):
dir_exclusions = dir_exclusions or []
file_exclusions = file_exclusions or []
base_path = join(TOP_PATH, canonicalize(base_path))
dir_exclusions = set([ join(base_path, canonicalize(path)) for path in dir_exclusions ])
for root, dirs, _ in walk(base_path):
if root in dir_exclusions:
del dirs[:]
continue
for pattern in patterns:
files = glob(join(root, pattern))
check_files(files, file_exclusions)
def check_files(files, file_exclusions=None):
file_exclusions = file_exclusions or []
for fname in files:
if not isabs(fname):
fname = join(TOP_PATH, fname)
if not exists(fname) or not isfile(fname):
continue
if basename(fname) in file_exclusions:
continue
test(fname)
check_files(["setup.py"])
check_tree('bin', ['*'])
check_tree('bokeh', ['*.py', '*.html', '*.js'], ["server/static"], ["__conda_version__.py"])
check_tree('bokehjs', ['*.coffee', '*.js', '*.ts', '*.less', '*.css', '*.json'], ['build', 'node_modules', 'src/vendor', 'typings'])
check_tree('conda.recipe', ['*.py', '*.sh', '*.yaml'])
check_tree('examples', ['*.py', '*.ipynb'])
check_tree('scripts', ['*.py', '*.sh'])
check_tree('sphinx', ['*.rst', '*.py'], ['_build', 'source/docs/gallery'])
check_tree('tests', ['*.py', '*.js'])
return errors
0
Example 89
def main():
if len(sys.argv) < 3:
sys.exit('\nNeed at least 2 arguments: \n\n '
'<path to files> \n <sampling in seconds> \n\n'
'[optional: <output dir>] \n [optional: <stationname>]\n'
'[optional: <recursive flag -R>]\n'
'(set this option for including all subfolders)\n\n')
outdir = None
stationname = None
recursive = False
multiple_stations = False
if len(sys.argv) > 3:
optionals = sys.argv[3:]
for o in optionals:
o = o.strip()
if o[0] == '-':
if o[1].lower() == 'r':
recursive = True
continue
elif outdir is None:
outdir = o
continue
elif stationname is None:
stationname = o
continue
if stationname is not None:
#check, if it's actually a comma-separated list:
if 1:
stationlist = stationname.split(',')
if len(stationlist) > 1:
multiple_stations = True
stationlist = [i.upper() for i in stationlist]
# except:
# stationlist = [stationname]
else: stationlist = [None]
print stationlist
pathname_raw = sys.argv[1]
pathname = op.abspath(op.realpath(pathname_raw))
if not op.isdir(pathname):
sys.exit('Data file(s) path not existing: {0}'.format(pathname))
try:
sampling = float(sys.argv[2])
if sampling <= 0 : raise
except:
sys.exit('Second argument must be sampling interval in seconds (int/float)')
if recursive is True:
lo_files = []
for i,j,k in os.walk(pathname):
lof = [op.abspath(op.join(i,f)) for f in j]
if stationname is not None:
for stationname in stationlist:
lof_station = [i for i in lof if stationname.lower() in i.lower()]
lo_files.extend(lof_station)
pathname = list(set(lo_files))
if len(pathname) == 0:
sys.exit('\n\tERROR - No (sub-) folders for stations {0} found\n'.format(stationlist))
for stationname in stationlist:
print 'processing station ',stationname.upper()
# if pathname[0] is not None:
# station_pathname = [i for i in pathname if stationname.lower() in i.lower()]
# if len(station_pathname) == 0:
# station_pathname = None
# else:
station_pathname = pathname
try:
MTfh.EDL_make_dayfiles(station_pathname, sampling, stationname.upper(), outdir)
except MTex.MTpyError_inputarguments:
if stationname is None:
sys.exit('\n\tERROR - No data found in (sub-)folders\n')
else:
sys.exit('\n\tERROR - No data found in (sub-)folders for station {0}\n'.format(stationname.upper()))
except:
sys.exit('\n\tERROR - could not process (sub-)folders')
0
Example 90
Project: spark-cluster-deployment Source File: spark_ec2.py
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
active_master = master_nodes[0].public_dns_name
num_disks = get_num_disks(opts.instance_type)
hdfs_data_dirs = "/mnt/ephemeral-hdfs/data"
mapred_local_dirs = "/mnt/hadoop/mrlocal"
spark_local_dirs = "/mnt/spark"
if num_disks > 1:
for i in range(2, num_disks + 1):
hdfs_data_dirs += ",/mnt%d/ephemeral-hdfs/data" % i
mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i
spark_local_dirs += ",/mnt%d/spark" % i
cluster_url = "%s:7077" % active_master
if "." in opts.spark_version:
# Pre-built spark & shark deploy
(spark_v, shark_v) = get_spark_shark_version(opts)
else:
# Spark-only custom deploy
spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version)
shark_v = ""
modules = filter(lambda x: x != "shark", modules)
template_vars = {
"master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
"active_master": active_master,
"slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]),
"cluster_url": cluster_url,
"hdfs_data_dirs": hdfs_data_dirs,
"mapred_local_dirs": mapred_local_dirs,
"spark_local_dirs": spark_local_dirs,
"swap": str(opts.swap),
"modules": '\n'.join(modules),
"spark_version": spark_v,
"shark_version": shark_v,
"hadoop_major_version": opts.hadoop_major_version,
"spark_worker_instances": "%d" % opts.worker_instances,
"spark_master_opts": opts.master_opts
}
# Create a temp directory in which we will place all the files to be
# deployed after we substitue template parameters in them
tmp_dir = tempfile.mkdtemp()
for path, dirs, files in os.walk(root_dir):
if path.find(".svn") == -1:
dest_dir = os.path.join('/', path[len(root_dir):])
local_dir = tmp_dir + dest_dir
if not os.path.exists(local_dir):
os.makedirs(local_dir)
for filename in files:
if filename[0] not in '#.~' and filename[-1] != '~':
dest_file = os.path.join(dest_dir, filename)
local_file = tmp_dir + dest_file
with open(os.path.join(path, filename)) as src:
with open(local_file, "w") as dest:
text = src.read()
for key in template_vars:
text = text.replace("{{" + key + "}}", template_vars[key])
dest.write(text)
dest.close()
# rsync the whole directory over to the master machine
command = [
'rsync', '-rv',
'-e', stringify_command(ssh_command(opts)),
"%s/" % tmp_dir,
"%s@%s:/" % (opts.user, active_master)
]
subprocess.check_call(command)
# Remove the temp directory we created above
shutil.rmtree(tmp_dir)
0
Example 91
def main():
if len(sys.argv) < 3:
sys.exit('\nNeed at least 4 arguments: \n\n '
'<path to files> \n <sampling in seconds> \n'
'<output dir> \n <stationname>\n'
'[optional: <recursive flag -R>]\n'
'(set this option for including all subfolders)\n\n')
print
outdir = None
stationname = None
recursive = False
multiple_stations = False
if len(sys.argv) > 3:
optionals = sys.argv[3:]
for o in optionals:
o = o.strip()
if o[0] == '-':
if o[1].lower() == 'r':
recursive = True
continue
elif outdir is None:
outdir = o
continue
elif stationname is None:
stationname = o
continue
if stationname is not None:
#check, if it's actually a comma-separated list:
try:
stationlist = stationname.split(',')
if len(stationlist) > 1:
multiple_stations = True
stationlist = [i.upper() for i in stationlist]
except:
stationlist = [stationname]
else: stationlist = [None]
print stationlist
pathname_raw = sys.argv[1]
pathname = op.abspath(op.realpath(pathname_raw))
if not op.isdir(pathname):
sys.exit('Data file(s) path not existing: {0}\n'.format(pathname))
try:
sampling = float(sys.argv[2])
if sampling <= 0 : raise
except:
sys.exit('Second argument must be sampling interval in seconds (int/float)')
if recursive is True:
lo_folders = []
for i,j,k in os.walk(pathname):
lof = [op.abspath(op.join(i,f)) for f in j]
if stationname is not None:
for stationname in stationlist:
for curr_folder in lof:
content_of_folder = os.listdir(curr_folder)
#print curr_folder
lof_station = [i for i in content_of_folder if stationname.lower() in i.lower()]
if len(lof_station) > 0 :
lo_folders.append(curr_folder)
pathname = list(set(lo_folders))
if len(pathname) == 0:
sys.exit('\n\tERROR - No (sub-) folders for stations {0} found\n'.format(stationlist))
for stationname in stationlist:
print '....\n'
print 'processing station ',stationname.upper()
# if pathname[0] is not None:
# station_pathname = [i for i in pathname if stationname.lower() in i.lower()]
# if len(station_pathname) == 0:
# station_pathname = None
# else:
station_pathname = pathname
try :
MTfh.EDL_make_dayfiles(station_pathname, sampling, stationname.upper(), outdir)
except MTex.MTpyError_inputarguments:
if stationname is None:
sys.exit('\n\tERROR - No data found in (sub-)folders\n')
else:
sys.exit('\n\tERROR - No data found in (sub-)folders for station {0}\n'.format(stationname.upper()))
except MemoryError:
sys.exit('\n\tERROR - Not enough memory to store temporary arrays!\n')
except:
sys.exit('\n\tERROR - could not process (sub-)folders')
print '\n'
0
Example 92
def run(self, dirs):
self.settings = context.get_settings()
all_types = {}
for _dir in dirs:
for dirpath, dirnames, filenames in os.walk(_dir):
for filename in filenames:
if filename.endswith("-meta.xml"): continue
if not filename.endswith(".xml"): continue
# Package file name
package_xml = os.path.join(dirpath, filename)
# Read package.xml content
with open(package_xml, "rb") as fp:
content = fp.read()
""" Combine types sample: [
{"ApexClass": ["test"]},
{"ApexTrigger": ["test"]}
]
"""
try:
_types = util.build_package_types(content)
except xml.parsers.expat.ExpatError as ee:
message = "%s parse error: %s" % (package_xml, str(ee))
Printer.get("error").write(message)
if not sublime.ok_cancel_dialog(message, "Skip?"): return
continue
except KeyError as ex:
if self.settings["debug_mode"]:
print ("%s is not valid package.xml" % package_xml)
continue
for _type in _types:
members = _types[_type]
if _type in all_types:
members.extend(all_types[_type])
members = list(set(members))
all_types[_type] = sorted(members)
if not all_types:
Printer.get("error").write_start().write("No available package.xml to combine")
return
# print (json.dumps(all_types, indent=4))
metadata_objects = []
for _type in all_types:
metadata_objects.append(
"<types>%s<name>%s</name></types>" % (
"".join(["<members>%s</members>" % m for m in all_types[_type]]),
_type
)
)
self.package_xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<Package xmlns="http://soap.sforce.com/2006/04/metadata">
{metadata_objects}
<version>{api_version}.0</version>
</Package>
""".format(
metadata_objects="".join(metadata_objects),
api_version=self.settings["api_version"]
)
package_path = os.path.join(dirs[0], "combined package.xml")
sublime.active_window().show_input_panel("Input Package.xml Path",
package_path, self.on_input_package_path, None, None)
0
Example 93
Project: girder Source File: lib_test.py
def testUploadCallbacks(self):
callbackUser = self.model('user').createUser(
firstName='Callback', lastName='Last', login='callback',
password='password', email='[email protected]')
callbackPublicFolder = six.next(self.model('folder').childFolders(
parentType='user', parent=callbackUser, user=None, limit=1))
callbackCounts = {'folder': 0, 'item': 0}
folders = {}
items = {}
folders[self.libTestDir] = False
folderCount = 1 # 1 for self.libTestDir
item_count = 0
for root, dirs, files in os.walk(self.libTestDir):
for name in files:
items[os.path.join(root, name)] = False
item_count += 1
for name in dirs:
folders[os.path.join(root, name)] = False
folderCount += 1
def folderCallback(folder, filepath):
self.assertIn(filepath, six.viewkeys(folders))
folders[filepath] = True
callbackCounts['folder'] += 1
def itemCallback(item, filepath):
self.assertIn(filepath, six.viewkeys(items))
items[filepath] = True
callbackCounts['item'] += 1
self.client.addFolderUploadCallback(folderCallback)
self.client.addItemUploadCallback(itemCallback)
self.client.upload(self.libTestDir, callbackPublicFolder['_id'])
# make sure counts are the same (callbacks not called more than once)
# and that all folders and files have callbacks called on them
self.assertEqual(folderCount, callbackCounts['folder'])
self.assertEqual(item_count, callbackCounts['item'])
self.assertTrue(all(six.viewvalues(items)))
self.assertTrue(all(six.viewvalues(folders)))
# Upload again with reuseExisting on
existingList = list(self.model('folder').childFolders(
parentType='folder', parent=callbackPublicFolder,
user=callbackUser, limit=0))
self.client.upload(self.libTestDir, callbackPublicFolder['_id'],
reuseExisting=True)
newList = list(self.model('folder').childFolders(
parentType='folder', parent=callbackPublicFolder,
user=callbackUser, limit=0))
self.assertEqual(existingList, newList)
self.assertEqual(len(newList), 1)
self.assertEqual([f['name'] for f in self.model('folder').childFolders(
parentType='folder', parent=newList[0],
user=callbackUser, limit=0)], ['sub0', 'sub1', 'sub2'])
# Test upload via a file-like object into a folder
callbacks = []
path = os.path.join(self.libTestDir, 'sub0', 'f')
size = os.path.getsize(path)
def progressCallback(info):
callbacks.append(info)
with open(path) as f:
with self.assertRaises(girder_client.IncorrectUploadLengthError):
try:
self.client.uploadFile(
callbackPublicFolder['_id'], stream=f, name='test',
size=size + 1, parentType='folder')
except girder_client.IncorrectUploadLengthError as exc:
self.assertEqual(
exc.upload['received'], exc.upload['size'] - 1)
upload = self.model('upload').load(exc.upload['_id'])
self.assertEqual(upload, None)
raise
with open(path) as f:
file = self.client.uploadFile(
callbackPublicFolder['_id'], stream=f, name='test',
size=size, parentType='folder',
progressCallback=progressCallback)
self.assertEqual(len(callbacks), 1)
self.assertEqual(callbacks[0]['current'], size)
self.assertEqual(callbacks[0]['total'], size)
self.assertEqual(file['name'], 'test')
self.assertEqual(file['size'], size)
# Files with no extension should fallback to the default MIME type
self.assertEqual(file['mimeType'], 'application/octet-stream')
items = list(
self.model('folder').childItems(folder=callbackPublicFolder))
self.assertEqual(len(items), 1)
self.assertEqual(items[0]['name'], 'test')
files = list(self.model('item').childFiles(items[0]))
self.assertEqual(len(files), 1)
# Make sure MIME type propagates correctly when explicitly passed
with open(path) as f:
file = self.client.uploadFile(
callbackPublicFolder['_id'], stream=f, name='test',
size=size, parentType='folder', mimeType='image/jpeg')
self.assertEqual(file['mimeType'], 'image/jpeg')
# Make sure MIME type is guessed based on file name if not passed
with open(path) as f:
file = self.client.uploadFile(
callbackPublicFolder['_id'], stream=f, name='test.txt',
size=size, parentType='folder')
self.assertEqual(file['mimeType'], 'text/plain')
0
Example 94
Project: CouchPotatoV1 Source File: movie.py
def _checkMovieExists(self, movie):
if cherrypy.config.get('config').get('XBMC', 'dbpath'):
dbfile = None
for root, dirs, files in os.walk(cherrypy.config.get('config').get('XBMC', 'dbpath')):
for file in files:
if file.startswith('MyVideos'):
dbfile = os.path.join(root, file)
if dbfile:
#------Opening connection to XBMC DB------
connXbmc = MySqlite.connect(dbfile)
if connXbmc:
log.debug('Checking if movie exists in XBMC by IMDB id:' + movie.imdb)
connXbmc.row_factory = MySqlite.Row
cXbmc = connXbmc.cursor()
#sqlQuery = 'select c09 from movie where c09="' + movie.imdb + '"'
sqlQuery = self._generateSQLQuery(movie)
cXbmc.execute(sqlQuery)
#------End of Opening connection to XBMC DB------
inXBMC = False
for rowXbmc in cXbmc: # do a final check just to be sure
log.debug('Found in XBMC:' + rowXbmc["c09"])
if movie.imdb == rowXbmc["c09"]:
inXBMC = True
else:
inXBMC = False
cXbmc.close()
if inXBMC:
log.info('Movie already exists in XBMC, skipping.')
return True
else:
log.info('Could not connect to the XBMC database at ' + cherrypy.config.get('config').get('XBMC', 'dbpath'))
else:
log.info('Could not find the XBMC MyVideos db at ' + cherrypy.config.get('config').get('XBMC', 'dbpath'))
if cherrypy.config.get('config').get('XBMC', 'useWebAPIExistingCheck'):
xbmc = XBMC()
#sqlQuery = 'select c09 from movie where c09="' + movie.imdb + '"'
sqlQuery = self._generateSQLQuery(movie)
xbmcResultsHosts = xbmc.queryVideoDatabase(sqlQuery)
if xbmcResultsHosts:
for xmbcResults in xbmcResultsHosts:
records = xmbcResults.strip().split("<record>")
for xmbcResult in records:
# xmbcResult = xmbcResult.strip()
xmbcResult = xmbcResult.replace("</record>", "")
# xmbcResult = xmbcResult.strip()
if xmbcResult == "":
continue
fields = filter(lambda x: x != "", [field.replace("</field>", "") for field in xmbcResult.split("<field>")])
log.debug("fields = %s" % fields)
c09 = fields[0]
if c09==movie.imdb:
log.info('Movie already exists in XBMC (web API call), skipping.')
return True
return False
0
Example 95
Project: django-comps Source File: views.py
def export_comps(request):
"""
Returns a zipfile of the rendered HTML templates in the COMPS_DIR
"""
in_memory = BytesIO()
zip = ZipFile(in_memory, "a")
comps = settings.COMPS_DIR
static = settings.STATIC_ROOT
context = RequestContext(request, {})
context['debug'] = False
# dump static resources
# TODO: inspect each template and only pull in resources that are used
for dirname, dirs, filenames in os.walk(static):
for filename in filenames:
full_path = os.path.join(dirname, filename)
rel_path = os.path.relpath(full_path, static)
content = open(full_path, 'rb').read()
try:
ext = os.path.splitext(filename)[1]
except IndexError:
pass
if ext == '.css':
# convert static refs to relative links
dotted_rel = os.path.relpath(static, full_path)
new_rel_path = '{0}{1}'.format(dotted_rel, '/static')
content = content.replace(b'/static', bytes(new_rel_path, 'utf8'))
path = os.path.join('static', rel_path)
zip.writestr(path, content)
for dirname, dirs, filenames in os.walk(comps):
for filename in filenames:
full_path = os.path.join(dirname, filename)
rel_path = os.path.relpath(full_path, comps)
template_path = os.path.join(comps.split('/')[-1], rel_path)
html = render_to_string(template_path, context)
# convert static refs to relative links
depth = len(rel_path.split(os.sep)) - 1
if depth == 0:
dotted_rel = '.'
else:
dotted_rel = ''
i = 0
while i < depth:
dotted_rel += '../'
i += 1
new_rel_path = '{0}{1}'.format(dotted_rel, '/static')
html = html.replace('/static', new_rel_path)
if PY2:
html = unicode(html)
zip.writestr(rel_path, html.encode('utf8'))
for item in zip.filelist:
item.create_system = 0
zip.close()
response = HttpResponse(content_type="application/zip")
response["Content-Disposition"] = "attachment; filename=comps.zip"
in_memory.seek(0)
response.write(in_memory.read())
return response
0
Example 96
Project: python-steemlib Source File: upload_posts.py
def main() :
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=("Post files into STEEM\n\n"
"This script goes into the posts directory that "
"contains subfolders named after the authors.\n"
"This subfolders contain markdown (*.md) files "
"from which \n"
" * the file name is used as permlink\n"
" * the first line of content is subject\n"
" * the rest of the content is body\n")
)
parser.add_argument('--author',
type=str,
help='Only publish/update posts of this author')
parser.add_argument('--permlink',
type=str,
help='Only publish/update the permlink')
parser.add_argument('--category',
type=str,
help='Post in category')
parser.add_argument('--dir',
type=str,
help='Directory that holds all posts (default: "posts")')
parser.add_argument('-d',
"--dryrun",
help="Not not actually post anything",
action="store_true")
parser.set_defaults(dir="./posts", dryrun=False, category="")
args = parser.parse_args()
if not path.isdir(args.dir):
raise Exception("Directory %s does not exist!" % args.dir)
try:
client = SteemClient(Config)
except:
raise Exception("Coudn't open conenction to wallet!")
if client.wallet.is_locked():
raise Exception("Wallet is locked! Please unlock it!")
for (dirpath, dirnames, filenames) in walk(args.dir):
for f in filenames:
author = dirpath.split("/")[-1]
permlink = f.replace(".md", "")
if args.author and author != args.author:
continue
if args.permlink and permlink != args.permlink:
continue
content = open(dirpath + "/" + f).read().split("\n")
subject = content[0].replace("# ", "")
body = "\n".join(content[2:])
pprint(client.wallet.post_comment(author,
permlink,
"", args.category,
subject,
body,
"{}",
not args.dryrun))
0
Example 97
Project: MongoApp Source File: inotify_c.py
def read_events(self, event_buffer_size=DEFAULT_EVENT_BUFFER_SIZE):
"""
Reads events from inotify and yields them.
"""
# HACK: We need to traverse the directory path
# recursively and simulate events for newly
# created subdirectories/files. This will handle
# mkdir -p foobar/blah/bar; touch foobar/afile
def _recursive_simulate(src_path):
events = []
for root, dirnames, filenames in os.walk(src_path):
for dirname in dirnames:
try:
full_path = absolute_path(os.path.join(root, dirname))
wd_dir = self._add_watch(full_path, self._event_mask)
e = InotifyEvent(
wd_dir, InotifyConstants.IN_CREATE | InotifyConstants.IN_ISDIR, 0, dirname, full_path)
events.append(e)
except OSError:
pass
for filename in filenames:
full_path = absolute_path(os.path.join(root, filename))
wd_parent_dir = self._wd_for_path[absolute_path(os.path.dirname(full_path))]
e = InotifyEvent(
wd_parent_dir, InotifyConstants.IN_CREATE, 0, filename, full_path)
events.append(e)
return events
while True:
try:
event_buffer = os.read(self._inotify_fd, event_buffer_size)
except OSError as e:
if e.errno == errno.EINTR:
continue
break
with self._lock:
event_list = []
for wd, mask, cookie, name in Inotify._parse_event_buffer(event_buffer):
if wd == -1:
continue
wd_path = unicode_paths.encode(self._path_for_wd[wd])
src_path = absolute_path(os.path.join(wd_path, name))
inotify_event = InotifyEvent(
wd, mask, cookie, name, src_path)
if inotify_event.is_moved_from:
self.remember_move_from_event(inotify_event)
elif inotify_event.is_moved_to:
move_src_path = self.source_for_move(inotify_event)
if move_src_path in self._wd_for_path:
moved_wd = self._wd_for_path[move_src_path]
del self._wd_for_path[move_src_path]
self._wd_for_path[inotify_event.src_path] = moved_wd
self._path_for_wd[moved_wd] = inotify_event.src_path
src_path = absolute_path(os.path.join(wd_path, name))
inotify_event = InotifyEvent(wd, mask, cookie, name, src_path)
if inotify_event.is_ignored:
# Clean up book-keeping for deleted watches.
self._remove_watch_bookkeeping(src_path)
continue
event_list.append(inotify_event)
if (self.is_recursive and
inotify_event.is_directory and
inotify_event.is_create):
# TODO: When a directory from another part of the
# filesystem is moved into a watched directory, this
# will not generate events for the directory tree.
# We need to coalesce IN_MOVED_TO events and those
# IN_MOVED_TO events which don't pair up with
# IN_MOVED_FROM events should be marked IN_CREATE
# instead relative to this directory.
try:
self._add_watch(src_path, self._event_mask)
except OSError:
continue
event_list.extend(_recursive_simulate(src_path))
return event_list
0
Example 98
Project: jasy Source File: Create.py
def massFilePatcher(path, data):
# Convert method with access to local data
def convertPlaceholder(mo):
field = mo.group(1)
value = data.get(field)
# Verify that None means missing
if value is None and not data.has(field):
raise ValueError('No value for placeholder "%s"' % field)
# Requires value being a string
return str(value)
# Patching files recursively
Console.info("Patching files...")
Console.indent()
for dirPath, dirNames, fileNames in os.walk(path):
relpath = os.path.relpath(dirPath, path)
# Filter dotted directories like .git, .bzr, .hg, .svn, etc.
for dirname in dirNames:
if dirname.startswith("."):
dirNames.remove(dirname)
for fileName in fileNames:
filePath = os.path.join(dirPath, fileName)
fileRel = os.path.normpath(os.path.join(relpath, fileName))
Console.debug("Processing: %s..." % fileRel)
fileHandle = open(filePath, "r", encoding="utf-8", errors="surrogateescape")
fileContent = []
# Parse file line by line to detect binary files early and omit
# fully loading them into memory
try:
isBinary = False
for line in fileHandle:
if '\0' in line:
isBinary = True
break
else:
fileContent.append(line)
if isBinary:
Console.debug("Ignoring binary file: %s", fileRel)
continue
except UnicodeDecodeError as ex:
Console.warn("Can't process file: %s: %s", fileRel, ex)
continue
fileContent = "".join(fileContent)
# Update content with available data
try:
resultContent = fieldPattern.sub(convertPlaceholder, fileContent)
except ValueError as ex:
Console.warn("Unable to process file %s: %s!", fileRel, ex)
continue
# Only write file if there where any changes applied
if resultContent != fileContent:
Console.info("Updating: %s...", Console.colorize(fileRel, "bold"))
fileHandle = open(filePath, "w", encoding="utf-8", errors="surrogateescape")
fileHandle.write(resultContent)
fileHandle.close()
Console.outdent()
0
Example 99
Project: unrpyc Source File: unrpyc.py
def main():
# python27 unrpyc.py [-c] [-d] [--python-screens|--ast-screens|--no-screens] file [file ...]
parser = argparse.ArgumentParser(description="Decompile .rpyc files")
parser.add_argument('-c', '--clobber', dest='clobber', action='store_true',
help="overwrites existing output files")
parser.add_argument('-d', '--dump', dest='dump', action='store_true',
help="instead of decompiling, pretty print the ast to a file")
parser.add_argument('-p', '--processes', dest='processes', action='store', default=cpu_count(),
help="use the specified number of processes to decompile")
parser.add_argument('-t', '--translation-file', dest='translation_file', action='store', default=None,
help="use the specified file to translate during decompilation")
parser.add_argument('-T', '--write-translation-file', dest='write_translation_file', action='store', default=None,
help="store translations in the specified file instead of decompiling")
parser.add_argument('-l', '--language', dest='language', action='store', default='english',
help="if writing a translation file, the language of the translations to write")
parser.add_argument('--sl1-as-python', dest='decompile_python', action='store_true',
help="Only dumping and for decompiling screen language 1 screens. "
"Convert SL1 Python AST to Python code instead of dumping it or converting it to screenlang.")
parser.add_argument('--comparable', dest='comparable', action='store_true',
help="Only for dumping, remove several false differences when comparing dumps. "
"This suppresses attributes that are different even when the code is identical, such as file modification times. ")
parser.add_argument('--no-pyexpr', dest='no_pyexpr', action='store_true',
help="Only for dumping, disable special handling of PyExpr objects, instead printing them as strings. "
"This is useful when comparing dumps from different versions of Ren'Py. "
"It should only be used if necessary, since it will cause loss of information such as line numbers.")
parser.add_argument('file', type=str, nargs='+',
help="The filenames to decompile. "
"All .rpyc files in any directories passed or their subdirectories will also be decompiled.")
args = parser.parse_args()
if args.write_translation_file and not args.clobber and path.exists(args.write_translation_file):
# Fail early to avoid wasting time going through the files
print "Output translation file already exists. Pass --clobber to overwrite."
return
if args.translation_file:
with open(args.translation_file, 'rb') as in_file:
args.translations = in_file.read()
# Expand wildcards
filesAndDirs = map(glob.glob, args.file)
# Concatenate lists
filesAndDirs = list(itertools.chain(*filesAndDirs))
# Recursively add .rpyc files from any directories passed
files = []
for i in filesAndDirs:
if path.isdir(i):
for dirpath, dirnames, filenames in walk(i):
files.extend(path.join(dirpath, j) for j in filenames if len(j) >= 5 and j[-5:] == '.rpyc')
else:
files.append(i)
# Check if we actually have files
if len(files) == 0:
parser.print_help();
parser.error("No script files given.")
files = map(lambda x: (args, x, path.getsize(x)), files)
processes = int(args.processes)
if processes > 1:
# If a big file starts near the end, there could be a long time with
# only one thread running, which is inefficient. Avoid this by starting
# big files first.
files.sort(key=itemgetter(2), reverse=True)
results = Pool(int(args.processes), sharelock, [printlock]).map(worker, files, 1)
else:
# Decompile in the order Ren'Py loads in
files.sort(key=itemgetter(1))
results = map(worker, files)
if args.write_translation_file:
print "Writing translations to %s..." % args.write_translation_file
translated_dialogue = {}
translated_strings = {}
good = 0
bad = 0
for result in results:
if not result:
bad += 1
continue
good += 1
translated_dialogue.update(magic.loads(result[0], class_factory))
translated_strings.update(result[1])
with open(args.write_translation_file, 'wb') as out_file:
magic.safe_dump((args.language, translated_dialogue, translated_strings), out_file)
else:
# Check per file if everything went well and report back
good = results.count(True)
bad = results.count(False)
if bad == 0:
print "Decompilation of %d script file%s successful" % (good, 's' if good>1 else '')
elif good == 0:
print "Decompilation of %d file%s failed" % (bad, 's' if bad>1 else '')
else:
print "Decompilation of %d file%s successful, but decompilation of %d file%s failed" % (good, 's' if good>1 else '', bad, 's' if bad>1 else '')
0
Example 100
Project: butterflow Source File: cli.py
def main():
par = argparse.ArgumentParser(usage='butterflow [options] [video]',
add_help=False)
req = par.add_argument_group('Required arguments')
gen = par.add_argument_group('General options')
dsp = par.add_argument_group('Display options')
vid = par.add_argument_group('Video options')
mux = par.add_argument_group('Muxing options')
fgr = par.add_argument_group('Advanced options')
req.add_argument('video', type=str, nargs='?', default=None,
help='Specify the input video')
gen.add_argument('-h', '--help', action='help',
help='Show this help message and exit')
gen.add_argument('--version', action='store_true',
help='Show program\'s version number and exit')
gen.add_argument('-d', '--devices', action='store_true',
help='Show detected OpenCL devices and exit')
gen.add_argument('-sw', action='store_true',
help='Set to force software rendering')
gen.add_argument('-c', '--cache', action='store_true',
help='Show cache information and exit')
gen.add_argument('--rm-cache', action='store_true',
help='Set to clear the cache and exit')
gen.add_argument('-prb', '--probe', action='store_true',
help='Show media file information and exit')
gen.add_argument('-v', '--verbosity', action='count',
help='Set to increase output verbosity')
gen.add_argument('-q', '--quiet', action='store_true',
help='Set to suppress console output')
dsp.add_argument('-p', '--show-preview', action='store_true',
help='Set to show video preview')
dsp.add_argument('-a', '--add-info', action='store_true',
help='Set to embed debugging info into the output video')
dsp.add_argument('-tt', '--text-type',
choices=['light', 'dark', 'stroke'],
default=settings['text_type'],
help='Specify text type for debugging info, '
'(default: %(default)s)')
dsp.add_argument('-mrk', '--mark-frames', action='store_true',
help='Set to mark interpolated frames')
vid.add_argument('-o', '--output-path', type=str,
default=settings['out_path'],
help='Specify path to the output video')
vid.add_argument('-r', '--playback-rate', type=str,
help='Specify the playback rate as an integer or a float '
'Fractional forms are acceptable, e.g., 24/1.001 is the '
'same as 23.976. To use a multiple of the source '
'video\'s rate, follow a number with `x`, e.g., "2x" '
'will double the frame rate. The original rate will be '
'used by default if nothing is specified.')
vid.add_argument('-s', '--subregions', type=str,
help='Specify rendering subregions in the form: '
'"a=TIME,b=TIME,TARGET=VALUE" where TARGET is either '
'`fps`, `dur`, `spd`. Valid TIME syntaxes are [hr:m:s], '
'[m:s], [s], [s.xxx], or `end`, which signifies to the '
'end the video. You can specify multiple subregions by '
'separating them with a colon `:`. A special subregion '
'format that conveniently describes the entire clip is '
'available in the form: "full,TARGET=VALUE".')
vid.add_argument('-k', '--keep-subregions', action='store_true',
help='Set to render subregions that are not explicitly '
'specified')
vid.add_argument('-vs', '--video-scale', type=str,
default=str(settings['video_scale']),
help='Specify output video size in the form: '
'"WIDTH:HEIGHT" or by using a factor. To keep the '
'aspect ratio only specify one component, either width '
'or height, and set the other component to -1, '
'(default: %(default)s)')
vid.add_argument('-l', '--lossless', action='store_true',
help='Set to use lossless encoding settings')
vid.add_argument('-sm', '--smooth-motion', action='store_true',
help='Set to tune for smooth motion. This mode yields '
'artifact-less frames by emphasizing blended frames over '
'warping pixels.')
mux.add_argument('-mux', action='store_true',
help='Set to mux the source audio with the output video')
fgr.add_argument('--fast-pyr', action='store_true',
help='Set to use fast pyramids')
fgr.add_argument('--pyr-scale', type=float,
default=settings['pyr_scale'],
help='Specify pyramid scale factor, '
'(default: %(default)s)')
fgr.add_argument('--levels', type=int,
default=settings['levels'],
help='Specify number of pyramid layers, '
'(default: %(default)s)')
fgr.add_argument('--winsize', type=int,
default=settings['winsize'],
help='Specify averaging window size, '
'(default: %(default)s)')
fgr.add_argument('--iters', type=int,
default=settings['iters'],
help='Specify number of iterations at each pyramid '
'level, (default: %(default)s)')
fgr.add_argument('--poly-n', type=int,
choices=settings['poly_n_choices'],
default=settings['poly_n'],
help='Specify size of pixel neighborhood, '
'(default: %(default)s)')
fgr.add_argument('--poly-s', type=float,
default=settings['poly_s'],
help='Specify standard deviation to smooth derivatives, '
'(default: %(default)s)')
fgr.add_argument('-ff', '--flow-filter', choices=['box', 'gaussian'],
default=settings['flow_filter'],
help='Specify which filter to use for optical flow '
'estimation, (default: %(default)s)')
for i, arg in enumerate(sys.argv):
if arg[0] == '-' and arg[1].isdigit():
sys.argv[i] = ' '+arg
args = par.parse_args()
fmt = '[butterflow:%(filename)s:%(funcName)s.%(levelname)s]: %(message)s'
logging.basicConfig(level=settings['loglevel_0'], format=fmt)
log = logging.getLogger('butterflow')
if args.verbosity == 1:
log.setLevel(settings['loglevel_1'])
if args.verbosity >= 2:
log.setLevel(settings['loglevel_2'])
if args.quiet:
log.setLevel(settings['loglevel_quiet'])
settings['quiet'] = True
if args.version:
print(__version__)
return 0
cachedir = settings['tempdir']
if args.cache:
nfiles = 0
sz = 0
for dirpath, dirnames, filenames in os.walk(cachedir):
if dirpath == settings['clbdir']:
continue
for filename in filenames:
nfiles += 1
fp = os.path.join(dirpath, filename)
sz += os.path.getsize(fp)
sz = sz / 1024.0**2
print('{} files, {:.2f} MB'.format(nfiles, sz))
print('cache @ '+cachedir)
return 0
if args.rm_cache:
if os.path.exists(cachedir):
import shutil
shutil.rmtree(cachedir)
print('cache deleted, done.')
return 0
if args.devices:
ocl.print_ocl_devices()
return 0
if not args.video:
print('no file specified, use: -h for help')
return 1
elif not os.path.exists(args.video):
print('file does not exist')
return 1
if args.probe:
avinfo.print_av_info(args.video)
return 0
extension = os.path.splitext(os.path.basename(args.output_path))[1].lower()
if extension[1:] != 'mp4':
print('bad out file extension')
return 0
av_info = avinfo.get_av_info(args.video)
use_sw_interpolate = args.sw or not ocl.compat_ocl_device_available()
if use_sw_interpolate:
log.warn('not using opencl, ctrl+c to quit')
if args.flow_filter == 'gaussian':
args.flow_filter = cv2.OPTFLOW_FARNEBACK_GAUSSIAN
else:
args.flow_filter = 0
if args.smooth_motion:
args.polys = 0.01
def optflow_fn(x, y,
pyr=args.pyr_scale, levels=args.levels,
winsize=args.winsize, iters=args.iters, polyn=args.poly_n,
polys=args.poly_s, fast=args.fast_pyr,
filt=args.flow_filter):
if use_sw_interpolate:
return cv2.calcOpticalFlowFarneback(
x, y, pyr, levels, winsize, iters, polyn, polys, filt)
else:
return motion.ocl_farneback_optical_flow(
x, y, pyr, levels, winsize, iters, polyn, polys, fast, filt)
interpolate_fn = None
if use_sw_interpolate:
from butterflow.interpolate import sw_interpolate_flow
interpolate_fn = sw_interpolate_flow
else:
interpolate_fn = motion.ocl_interpolate_flow
try:
w, h = w_h_from_input_str(args.video_scale, av_info['w'], av_info['h'])
sequence = sequence_from_input_str(args.subregions,
av_info['duration'],
av_info['frames'])
rate = rate_from_input_str(args.playback_rate, av_info['rate'])
except (ValueError, AttributeError) as error:
print('error: '+str(error))
return 1
def nearest_even_int(x):
return x & ~1
w1, h1 = av_info['w'], av_info['h']
w2, h2 = nearest_even_int(w), nearest_even_int(h)
if w1*h1 > w2*h2:
scaling_method = settings['scaler_dn']
elif w1*h1 < w2*h2:
scaling_method = settings['scaler_up']
else:
scaling_method = None
rnd = Renderer(args.video,
args.output_path,
sequence,
rate,
optflow_fn,
interpolate_fn,
w2,
h2,
scaling_method,
args.lossless,
args.keep_subregions,
args.show_preview,
args.add_info,
args.text_type,
args.mark_frames,
args.mux)
motion.set_num_threads(settings['ocv_threads'])
log.info('will render:\n' + str(rnd.sequence))
success = True
total_time = 0
try:
import timeit
total_time = timeit.timeit(rnd.render,
setup='import gc;gc.enable()',
number=1)
except (KeyboardInterrupt, SystemExit):
success = False
if success:
log.info('made: '+args.output_path)
out_sz = os.path.getsize(args.output_path) / 1024.0**2
log.info('write ratio: {}/{}, ({:.2f}%) {:.2f} MB'.format(
rnd.frs_written,
rnd.frs_to_render,
rnd.frs_written*100.0/rnd.frs_to_render,
out_sz))
txt = 'frames: {} real, +{} interpolated, +{} dupe, -{} drop'
if not settings['quiet']:
print(txt.format(rnd.source_frs,
rnd.frs_interpolated,
rnd.frs_duped,
rnd.frs_dropped))
log.info('butterflow took {:.3g} mins, done.'.format(total_time / 60))
return 0
else:
log.warn('quit unexpectedly')
log.warn('files left in cache @ '+settings['tempdir'])
return 1