os.walk

Here are the examples of the python api os.walk taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

165 Examples 7

Example 51

Project: pyinfra Source File: files.py
@operation(pipeline_facts={
    'find_files': 'destination'
})
def sync(
    state, host, source, destination,
    user=None, group=None, mode=None, delete=False, exclude=None
):
    '''
    Syncs a local directory with a remote one, with delete support. Note that delete will
    remove extra files on the remote side, but not extra directories.

    + source: local directory to sync
    + destination: remote directory to sync to
    + user: user to own the files and directories
    + group: group to own the files and directories
    + mode: permissions of the files
    + delete: delete remote files not present locally
    + exclude: string or list/tuple of strings to match & exclude files (eg *.pyc)
    '''

    # If we don't enforce the source ending with /, remote_dirname below might start with
    # a /, which makes the path.join cut off the destination bit.
    if not source.endswith(path.sep):
        source = '{0}{1}'.format(source, path.sep)

    # Source relative to deploy.py
    if state.deploy_dir:
        source = path.join(state.deploy_dir, source)

    # Ensure exclude is a list/tuple
    if exclude is not None:
        if not isinstance(exclude, (list, tuple)):
            exclude = [exclude]

    put_files = []
    ensure_dirnames = []
    for dirname, _, filenames in walk(source):
        remote_dirname = dirname.replace(source, '')

        if remote_dirname:
            ensure_dirnames.append(remote_dirname)

        for filename in filenames:
            full_filename = path.join(dirname, filename)

            # Should we exclude this file?
            to_exclude = False
            if exclude:
                for match in exclude:
                    if fnmatch(full_filename, match):
                        to_exclude = True

            if to_exclude:
                continue

            put_files.append((
                # Join local as normal (unix, win)
                full_filename,
                # Join remote as unix like
                '/'.join(
                    item for item in
                    (destination, remote_dirname, filename)
                    if item
                )
            ))

    # Ensure the destination directory
    yield directory(
        state, host, destination,
        user=user, group=group,
    )

    # Ensure any remote dirnames
    for dirname in ensure_dirnames:
        yield directory(
            state, host,
            '/'.join((destination, dirname)),
            user=user, group=group,
        )

    # Put each file combination
    for local_filename, remote_filename in put_files:
        yield put(
            state, host,
            local_filename, remote_filename,
            user=user, group=group, mode=mode,
            add_deploy_dir=False,
        )

    # Delete any extra files
    if delete:
        remote_filenames = set(host.fact.find_files(destination) or [])
        wanted_filenames = set([remote_filename for _, remote_filename in put_files])
        files_to_delete = remote_filenames - wanted_filenames
        for filename in files_to_delete:
            yield file(state, host, filename, present=False)

Example 52

Project: nikola Source File: status.py
    def _execute(self, options, args):
        """Display site status."""
        self.site.scan_posts()

        last_deploy = self.site.state.get('last_deploy')
        if last_deploy is not None:
            last_deploy = datetime.strptime(last_deploy, "%Y-%m-%dT%H:%M:%S.%f")
            last_deploy_offset = datetime.utcnow() - last_deploy
        else:
            print("It does not seem like you've ever deployed the site (or cache missing).")

        if last_deploy:

            fmod_since_deployment = []
            for root, dirs, files in os.walk(self.site.config["OUTPUT_FOLDER"], followlinks=True):
                if not dirs and not files:
                    continue
                for fname in files:
                    fpath = os.path.join(root, fname)
                    fmodtime = datetime.fromtimestamp(os.stat(fpath).st_mtime)
                    if fmodtime.replace(tzinfo=tzlocal()) > last_deploy.replace(tzinfo=gettz("UTC")).astimezone(tz=tzlocal()):
                        fmod_since_deployment.append(fpath)

            if len(fmod_since_deployment) > 0:
                print("{0} output files modified since last deployment {1} ago.".format(str(len(fmod_since_deployment)), self.human_time(last_deploy_offset)))
                if options['list_modified']:
                    for fpath in fmod_since_deployment:
                        print("Modified: '{0}'".format(fpath))
            else:
                print("Last deployment {0} ago.".format(self.human_time(last_deploy_offset)))

        now = datetime.utcnow().replace(tzinfo=gettz("UTC"))

        posts_count = len(self.site.all_posts)

        # find all published posts
        posts_published = [post for post in self.site.all_posts if post.use_in_feeds]
        posts_published = sorted(posts_published, key=lambda post: post.source_path)

        # find all private posts
        posts_private = [post for post in self.site.all_posts if post.is_private]
        posts_private = sorted(posts_private, key=lambda post: post.source_path)

        # find all drafts
        posts_drafts = [post for post in self.site.all_posts if post.is_draft]
        posts_drafts = sorted(posts_drafts, key=lambda post: post.source_path)

        # find all scheduled posts with offset from now until publishing time
        posts_scheduled = [
            (post.date - now, post) for post in self.site.all_posts
            if post.publish_later and not (post.is_draft or post.is_private)
        ]
        posts_scheduled = sorted(posts_scheduled, key=lambda offset_post: (offset_post[0], offset_post[1].source_path))

        if len(posts_scheduled) > 0:
            if options['list_scheduled']:
                for offset, post in posts_scheduled:
                    print("Scheduled: '{1}' ({2}; source: {3}) in {0}".format(self.human_time(offset), post.meta('title'), post.permalink(), post.source_path))
            else:
                offset, post = posts_scheduled[0]
                print("{0} to next scheduled post ('{1}'; {2}; source: {3}).".format(self.human_time(offset), post.meta('title'), post.permalink(), post.source_path))
        if options['list_drafts']:
            for post in posts_drafts:
                print("Draft: '{0}' ({1}; source: {2})".format(post.meta('title'), post.permalink(), post.source_path))
        if options['list_private']:
            for post in posts_private:
                print("Private: '{0}' ({1}; source: {2})".format(post.meta('title'), post.permalink(), post.source_path))
        if options['list_published']:
            for post in posts_published:
                print("Published: '{0}' ({1}; source: {2})".format(post.meta('title'), post.permalink(), post.source_path))
        print("{0} posts in total, {1} scheduled, {2} drafts, {3} private and {4} published.".format(posts_count, len(posts_scheduled), len(posts_drafts), len(posts_private), len(posts_published)))

Example 53

Project: esky Source File: util.py
def create_zipfile(source,target,get_zipinfo=None,members=None,compress=None):
    """Bundle the contents of a given directory into a zipfile.

    The argument 'source' names the directory to read, while 'target' names
    the zipfile to be written.

    If given, the optional argument 'get_zipinfo' must be a function mapping
    filenames to ZipInfo objects.  It may also return None to indicate that
    defaults should be used, or a string to indicate that defaults should be
    used with a new archive name.

    If given, the optional argument 'members' must be an iterable yielding
    names or ZipInfo objects.  Files will be added to the archive in the
    order specified by this function.

    If the optional argument 'compress' is given, it must be a bool indicating
    whether to compress the files by default.  The default is no compression.
    """
    if not compress:
        compress_type = zipfile.ZIP_STORED
    else:
        compress_type = zipfile.ZIP_DEFLATED
    zf = zipfile.ZipFile(target,"w",compression=compress_type)
    if members is None:
        def gen_members():
            for (dirpath,dirnames,filenames) in os.walk(source):
                for fn in filenames:
                    yield os.path.join(dirpath,fn)[len(source)+1:]
        members = gen_members()
    for fpath in members:
        if isinstance(fpath,zipfile.ZipInfo):
            zinfo = fpath
            fpath = os.path.join(source,zinfo.filename)
        else:
            if get_zipinfo:
                zinfo = get_zipinfo(fpath)
            else:
                zinfo = None
            fpath = os.path.join(source,fpath)
        if os.path.islink(fpath):
            # For information about adding symlinks to a zip file, see
            # https://mail.python.org/pipermail/python-list/2005-June/322180.html
            dest = os.readlink(fpath)
            if zinfo is None:
                zinfo = zipfile.ZipInfo()
                zinfo.filename = fpath[len(source)+1:]
            elif isinstance(zinfo,basestring):
                link = zinfo
                zinfo = zipfile.ZipInfo()
                zinfo.filename = link
            else: # isinstance(zinfo,zipfile.ZipInfo)
                pass
            zinfo.create_system = 3
            zinfo.external_attr = 2716663808L # symlink: 0xA1ED0000
            zf.writestr(zinfo,dest)
        else: # not a symlink
            if zinfo is None:
                zf.write(fpath,fpath[len(source)+1:])
            elif isinstance(zinfo,basestring):
                zf.write(fpath,zinfo)
            else:
                with open(fpath,"rb") as f:
                    zf.writestr(zinfo,f.read())
    zf.close()

Example 54

Project: andoc Source File: import_maildir.py
def main():
    if path.exists(argv[1]) and path.isdir(argv[1]):
        search_dir = argv[1]
    else:
        print "Error: invalid directory"
        exit(1)

    valid_emails = []
    for root, dirs, files in walk(search_dir):
        for name in files:
            valid_emails.append(path.join(root, name))
            
    if len(valid_emails) == 0:
        print "Error: no files found"
        exit(1)

    r = redis.Redis()
    for email in valid_emails:
        msg = message_from_file(open(email))

        has_plaintext = False
        if msg.is_multipart():
            for part in msg.walk():
                if part.get_content_type() == 'text/plain':
                    has_plaintext = True
                    plaintext = part.get_payload(decode=True)
        else:
            if msg.get_content_type() == 'text/plain':
                has_plaintext = True
                plaintext = msg.get_payload(decode=True)


        if has_plaintext:
            destfile = open('data/%s.txt' % path.basename(email), 'w')
            selections = []
            dates = []
            for k,v in msg.items():
                selection_start = destfile.tell()
                # web browser counts one char for \r\n
                destfile.write('%s: %s\n' % (
                    k.replace('\r','').strip(),
                    v.replace('\r','').strip())
                    )
                selection_end = destfile.tell()
                selections.append((selection_start, selection_end,
                    'http://www.w3.org/1999/xhtml/#div'))

                if k == 'Date':
                    ts = mktime_tz(parsedate_tz(v))
                    ts_start = len('%s: ' %k )
                    ts_end = selection_end - selection_start
                    dates.append(
                        (selection_start, selection_end, ts, ts_start, ts_end))

            destfile.write('\n')
            bstart = destfile.tell()
            destfile.write(plaintext.replace('\r','').strip())
            bend = destfile.tell()
            destfile.close()
            selections.append((bstart, bend+1, 
                'http://www.w3.org/1999/xhtml/#div'))
            
            doc = Docuement(r)
            if doc.add('data/%s.txt' % path.basename(email)):
                for start,end,ref in selections:
                    text_selection = TextSelection(doc.id, start, end, ref)
                    text_selection.save(r)

                for s_start, s_end, ts, ts_start, ts_end in dates:
                    pre = 'date'
                    sub = '%s%s#%s.s%se%s' %  (
                        'http://127.0.0.1:8080/doc/struc/',
                         doc.id, 'div', s_start, s_end)
                    # http://127.0.0.1:8080/doc/struc/1#div.s1086e1124/t6e37
                    trsub = '%s/t%se%s' % (sub, ts_start, ts_end)
                    trip = Triple(sub, pre, str(ts))
                    tid = trip.save(r)

                    h = HtmlSelection(doc.id, sub, ts_start, ts_end, tid)
                    h.save(r)

                   # save the object relation to this docuement
                    doc.add_relation(pre, str(ts))

Example 55

Project: docket Source File: __init__.py
def strip_private_layer(client, tag, parent_id, private_layer_id):

    build_tar = tempfile.NamedTemporaryFile()
    logger.info('saving tar file from build %s', build_tar.name)

    # TODO save using client
    p_args = ['docker', 'save', '--output', build_tar.name, tag]
    p = Popen(p_args)

    res = p.wait()
    if res != 0:
        sys.exit(res)

    try:
        client.remove_image(tag)
    except Exception:
        pass

    extract_dir = tempfile.mkdtemp()
    logger.info('extract the build tar %s', extract_dir)

    try:
        with tarfile.open(mode='r', fileobj=build_tar) as tar:
            tar.extractall(path=extract_dir)

        # prune away image layers under private_id
        # we alreayd have them, don't need them again
        def prune(basepath, start_id):
            json_path = basepath + '/' + start_id + '/json'
            f = open(json_path, 'r+')
            content = json.load(f)
            f.close()
            if content.has_key('parent'):
                prune(basepath, content['parent'])
            elif content.has_key('Parent'):
                prune(basepath, content['Parent'])
            logger.debug('pruning %s', start_id)
            shutil.rmtree(basepath + '/' + start_id)

        logger.info('Splice out private layer id %s', private_layer_id)
        prune(extract_dir, private_layer_id)

        for (dirpath, dirnames, filenames) in walk(extract_dir):
            for dir in dirnames:
                json_path = extract_dir + '/' + dir + '/json'

                f = open(json_path, 'r+')
                content = json.load(f)
                if content.has_key('parent') and content['parent'] == private_layer_id:
                    content['parent'] = parent_id
                    content['Parent'] = parent_id
                    content['config']['Image'] = parent_id
                    content['container_config']['Image'] = parent_id
                    f.seek(0)
                    json.dump(content, f)
                    f.truncate()
                elif content.has_key('Parent') and content['Parent'] == private_layer_id:
                    content['parent'] = parent_id
                    content['Parent'] = parent_id
                    content['config']['Image'] = parent_id
                    content['container_config']['Image'] = parent_id
                    f.seek(0)
                    json.dump(content, f)
                    f.truncate()
                f.close()

        logger.info('make final tarball')

        tmp_fpath = tempfile.mkstemp()
        try:
            tmp_file = tmp_fpath[0]
            tmp_path = tmp_fpath[1]

            with tarfile.open(name=tmp_path, mode='w') as tar:
                tar.add(extract_dir, arcname='')

            os.fsync(tmp_file)

            logger.info('loading final image %s', tmp_path)
            p_args = ['docker', 'load', '--input', tmp_path]
            p = Popen(p_args)

            res = p.wait()
            if res != 0:
                sys.exit(res)
        finally:
            os.remove(tmp_fpath[1])

    finally:
        shutil.rmtree(extract_dir)

Example 56

Project: tika-similarity Source File: value-similarity.py
def main(argv = None):
	if argv is None:
		argv = sys.argv

	try:
		try:
			opts, args = getopt.getopt(argv[1:], 'hvf:c:a:', ['help', 'verbose', 'directory=', 'file=', 'accept=' ])
		except getopt.error, msg:
			raise _Usage(msg)

		if len(opts) ==0:
			raise _Usage(_helpMessage)

		dirFile = ""
		filenames = []
		filename_list = []
		allowed_mime_types = []
		directory_flag = 0

		for option, value in opts:
			if option in ('-h', '--help'):
				raise _Usage(_helpMessage)

			elif option in ('-c', '--file'):
				#extract file names from command line
				if '-c' in argv :
					index_of_file_option = argv.index('-c')
				else :
					index_of_file_option = argv.index('--file')
				filenames = argv[index_of_file_option+1 : ]

			elif option in ('-f', '--directory'):
				dirFile = value
				directory_flag = 1
				for root, dirnames, files in os.walk(dirFile):
					dirnames[:] = [d for d in dirnames if not d.startswith('.')]
					for filename in files:
						if not filename.startswith('.'):							
							filename_list.append(os.path.join(root, filename))

			elif option in ('--accept'):
				#extract accepted mime types from command line
				index_of_mime_type_option = argv.index('--accept')
				allowed_mime_types = argv[index_of_mime_type_option+1 : ]

			elif option in ('-v', '--verbose'):
				global _verbose
				_verbose = True

		#format filename
		if directory_flag == 0:			
			filenames = [x.strip() for x in filenames]
			filenames = [filenames[k].strip('\'\n') for k in range(len(filenames))]
			for filename in filenames :
				if not os.path.isfile(os.path.join(dirFile, filename)):
					continue
				filename = os.path.join(dirFile, filename) if dirFile else filename
				filename_list.append(filename)

		if len(filename_list) <2 :
			raise _Usage("you need to type in at least two valid files")

		#allow only files with specifed mime types
		if len(allowed_mime_types) != 0:
			filename_list = [filename for filename in filename_list if parser.from_file(filename) and str(parser.from_file(filename)['metadata']['Content-Type'].encode('utf-8')).split('/')[-1] in allowed_mime_types]
		else:
			print "Accepting all MIME Types....."

		union_feature_names = set()
		file_parsed_data = {}
		resemblance_scores = {}
		file_metadata={}

		for filename in filename_list:
			file_parsed = []
			# first compute the union of all features
			parsedData = parser.from_file(filename)
			filename_stripped = filename.replace(",", "")
			try:
				file_metadata[filename_stripped] = parsedData["metadata"]

				#get key : value of metadata
				for key in parsedData["metadata"]:
					value = parsedData["metadata"][key]
					if isinstance(value, list):
						value = ", ".join(parsedData["metadata"][key])

					file_parsed.append(str(key.strip(' ').encode('utf-8') + ": " + value.strip(' ').encode('utf-8')))

				file_parsed_data[filename_stripped] = set(file_parsed)
				union_feature_names = union_feature_names | set(file_parsed_data[filename_stripped])

			except ConnectionError:
				sleep(1)
			except KeyError:
				continue

		total_num_features = len(union_feature_names)
		
				

		# now compute the specific resemblance and containment scores
		for filename in file_parsed_data:
			overlap = {}
			overlap = file_parsed_data[filename] & set(union_feature_names)
			resemblance_scores[filename] = float(len(overlap))/total_num_features

		sorted_resemblance_scores = sorted(resemblance_scores.items(), key=operator.itemgetter(1), reverse=True)

		'''print "Resemblance:\n"
		for tuple in sorted_resemblance_scores:
			print os.path.basename(tuple[0].rstrip(os.sep))+","+str(tuple[1]) +"," + tuple[0] + ","+ convertUnicode(file_metadata[tuple[0]])+'\n'''
		with open("similarity-scores.txt", "w") as f:
			f.write("Resemblance : \n")
			for tuple in sorted_resemblance_scores:
				f.write(os.path.basename(tuple[0].rstrip(os.sep))+","+str(tuple[1]) +"," + tuple[0] + ","+ convertUnicode(file_metadata[tuple[0]])+'\n')

	except _Usage, err:
		print >>sys.stderr, sys.argv[0].split('/')[-1] + ': ' + str(err.msg)
		return 2

Example 57

Project: courtlistener Source File: import_columbia.py
def do_many(dir_path, limit, random_order, status_interval, log_file, 
            newcases, skipdupes, skip_newcases, avoid_nocites, courtdates,
            startfolder, startfile, debug):
    """Runs through a directory of the form /data/[state]/[sub]/.../[folders]/[.xml docuements].
    Parses each .xml docuement, instantiates the associated model object, and
    saves the object. Prints/logs status updates and tracebacks instead of
    raising exceptions.

    :param dir_path: The directory.
    :param limit: A limit on how many files to run through. If None, will run
    through all (or if random order, forever).
    :param random_order: If true, will run through the directories and files in
    random order.
    :param status_interval: How often a status update will be given.
    :param log_file: If not None, file paths that raise Exceptions will be
    logged to this file.
    :param newcases: If true, skip court-years that already have data.
    :param skipdupes: If true, skip duplicates.    
    :param skip_newcases: If true, skip cases imported under newcases.
    :param avoid_nocites: If true, skip cases from dates after any case with no cite.
    :param courtdates: If true, skip cases with dates before court established.
    :param startfolder: If not None, start on startfolder
    :param startfile: If not None, start on this file (for resuming)
    """
    if limit:
        total = limit
    elif not random_order:
        print ("Getting an initial file count ...")
        print
        total = 0
        for _, _, file_names in os.walk(dir_path):
            total += len(fnmatch.filter(file_names, '*.xml'))
    else:
        total = None
    log = None
    if log_file:
        print ("Logging problematic file paths to '%s' ..." % log_file)
        print
        log = logging.getLogger(__name__)
        log.setLevel(logging.INFO)
        log.addHandler(logging.FileHandler(log_file))
    # go through the files, yielding parsed files and printing status updates as
    # we go
    folders = glob(dir_path+'/*')
    folders.sort()
    count = 0

    # get earliest dates for each court
    if newcases:
        print('Only new cases: getting earliest dates by court.')
        min_dates = get_min_dates()
    else:
        min_dates = None
        
    if avoid_nocites:
        if newcases:
            raise Exception("Cannot use both avoid_nocites and newcases options.")
        print('Avoiding no cites: getting earliest dates by court with no citation.')
        min_dates = get_min_nocite()
        
    if courtdates:
        start_dates = get_courtdates()
    else:
        start_dates = None

    # check if skipping first columbias cases

    if skip_newcases:
        skiplist = get_path_list()
    else:
        skiplist = set()

    # start/resume functionality
    if startfolder is not None:
        skipfolder = True
    else:
        skipfolder = False
    if startfile is not None:
        skipfile = True
    else:
        skipfile = False

    for folder in folders:
        if skipfolder:
            if startfolder is not None:
                checkfolder = folder.split('/')[-1]
                if checkfolder == startfolder:
                    skipfolder = False
                else:
                    continue
        print(folder)

        for path in file_generator(folder, random_order, limit):

            if skipfile:
                if startfile is not None:
                    checkfile = path.split('/')[-1]
                    if checkfile == startfile:
                        skipfile = False
                    else:
                        continue

            if path in skiplist:
                continue

            # skip cases in 'misc*' folders -- they are relatively different
            # than the other cases, so we'll deal with them later
            if 'miscellaneous_court_opinions' in path:
                continue

            print(path)

            # try to parse/save the case and print any exceptions with full
            # tracebacks
            try:
                parsed = parse_file(path)
                make_and_save(parsed, skipdupes, min_dates, start_dates, debug)
            except Exception as e:
                # log the file name
                if log:
                    log.info(path)
                # print simple exception summaries for known problems
                known = [
                    'mismatched tag', 'Failed to get a citation',
                    'Failed to find a court ID',
                    'null value in column "date_filed"', 'duplicate(s)'
                ]
                if any(k in str(e) for k in known):
                    print
                    print "Known exception in file '%s':" % path
                    print str(e)
                    print
                else:
                    # otherwise, print generic traceback
                    print
                    print "Unknown exception in file '%s':" % path
                    print traceback.format_exc()
                    print
        # status update
        count += 1
        if count % status_interval == 0:
            print
            if total:
                print "Finished %s out of %s files." % (count, total)
            else:
                print "Finished %s files." % count
            print

Example 58

Project: bsdpy Source File: bsdpserver.py
def getNbiOptions(incoming):
    """
        The getNbiOptions() function walks through a given directory and
        finds and parses compatible NBIs by looking for NBImageInfo.plist
        files which are then processed with plistlib to extract an NBI's
        configuration items that are needed later on to send to BSDP clients.

        It is assumed that the NBI root directory is laid out as follows:
            /nbi/MyGreatImage.nbi
            /nbi/AnotherNetBootImage.nbi
    """
    # Initialize lists to store NBIs and their options
    nbioptions = []
    nbisources = []
    try:
        for path, dirs, files in os.walk(incoming):
            # Create an empty dict that will hold an NBI's settings
            thisnbi = {}
            if os.path.splitext(path)[1] == '.nbi':
                del dirs[:]

                # Search the path for an NBImageInfo.plist and parse it.
                logging.debug('Considering NBI source at ' + str(path))
                nbimageinfoplist = find('NBImageInfo.plist', path)[0]
                nbimageinfo = plistlib.readPlist(nbimageinfoplist)

                # Pull NBI settings out of the plist for use later on:
                #   booter = The kernel which is loaded with tftp
                #   disabledsysids = System IDs to blacklist, optional
                #   dmg = The actual OS image loaded after the booter
                #   enabledsysids = System IDs to whitelist, optional
                #   enabledmacaddrs = Enabled MAC addresses to whitelist, optional
                #                     (and for which a key may not exist in)
                #   id = The NBI Identifier, must be unique
                #   isdefault = Indicates the NBI is the default
                #   length = Length of the NBI name, needed for BSDP packet
                #   name = The name of the NBI

                if nbimageinfo['Index'] == 0:
                    logging.debug('Image "%s" Index is NULL (0), skipping!'
                                    % nbimageinfo['Name'])
                    continue
                elif nbimageinfo['IsEnabled'] is False:
                    logging.debug('Image "%s" is disabled, skipping.'
                                    % nbimageinfo['Name'])
                    continue
                else:
                    thisnbi['id'] = nbimageinfo['Index']

                thisnbi['booter'] = \
                    find('booter', path)[0]
                thisnbi['description'] = \
                    nbimageinfo['Description']
                thisnbi['disabledsysids'] = \
                    nbimageinfo['DisabledSystemIdentifiers']
                thisnbi['dmg'] = \
                    '/'.join(find('*.dmg', path)[0].split('/')[2:])

                thisnbi['enabledmacaddrs'] = \
                    nbimageinfo.get('EnabledMACAddresses', [])
                # EnabledMACAddresses must be lower-case - Apple's tools create them
                # as such, but in case they aren't..
                thisnbi['enabledmacaddrs'] = [mac.lower() for mac in
                                              thisnbi['enabledmacaddrs']]

                thisnbi['enabledsysids'] = \
                    nbimageinfo['EnabledSystemIdentifiers']
                thisnbi['isdefault'] = \
                    nbimageinfo['IsDefault']
                thisnbi['length'] = \
                    len(nbimageinfo['Name'])
                thisnbi['name'] = \
                    nbimageinfo['Name']
                thisnbi['proto'] = \
                    nbimageinfo['Type']


                # Add the parameters for the current NBI to nbioptions
                nbioptions.append(thisnbi)
                # Found an eligible NBI source, add it to our nbisources list
                nbisources.append(path)
    except:
        logging.debug("Unexpected error getNbiOptions: %s" %
                        sys.exc_info()[1])
        raise

    return nbioptions, nbisources

Example 59

Project: import-mailbox-to-gmail Source File: import-mailbox-to-gmail.py
def main():
  """Import multiple users' mbox files to Gmail.

  """
  httplib2.debuglevel = args.httplib2debuglevel
  # Use args.logging_level if defined.
  try:
    logging_level = args.logging_level
  except AttributeError:
    logging_level = 'INFO'

  # Default logging to standard output
  logging.basicConfig(
      level=logging_level,
      format='%(asctime)s %(levelname)s %(funcName)s@%(filename)s %(message)s',
      datefmt='%H:%M:%S')

  # More detailed logging to file
  file_handler = logging.handlers.RotatingFileHandler(args.log,
                                                      maxBytes=1024 * 1024 * 32,
                                                      backupCount=8)
  file_formatter = logging.Formatter(
      '%(asctime)s %(process)d %(levelname)s %(funcName)s '
      '(%(filename)s:%(lineno)d) %(message)s')
  file_formatter.datefmt = '%Y-%m-%dT%H:%M:%S (%z)'
  file_handler.setFormatter(file_formatter)
  logging.getLogger().addHandler(file_handler)

  logging.info('*** Starting %s %s on Python %s ***',
               APPLICATION_NAME,
               APPLICATION_VERSION,
               sys.version)
  logging.info('Arguments:')
  for arg, value in sorted(vars(args).items()):
    logging.info('\t%s: %r', arg, value)

  number_of_labels_imported_without_error = 0
  number_of_labels_imported_with_some_errors = 0
  number_of_labels_failed = 0
  number_of_messages_imported_without_error = 0
  number_of_messages_failed = 0
  number_of_users_imported_without_error = 0
  number_of_users_imported_with_some_errors = 0
  number_of_users_failed = 0

  for username in next(os.walk(args.dir))[1]:
    try:
      logging.info('Processing user %s', username)
      try:
        credentials = get_credentials(username)
        http = credentials.authorize(set_user_agent(
            httplib2.Http(),
            '%s-%s' % (APPLICATION_NAME, APPLICATION_VERSION)))
        service = discovery.build('gmail', 'v1', http=http)
      except Exception:
        logging.error("Can't get access token for user %s", username)
        raise

      try:
        results = service.users().labels().list(
            userId=username,
            fields='labels(id,name)').execute(num_retries=args.num_retries)
        labels = results.get('labels', [])
      except Exception:
        logging.error("Can't get labels for user %s", username)
        raise

      try:
        result = process_mbox_files(username, service, labels)
      except Exception:
        logging.error("Can't process mbox files for user %s", username)
        raise
      if result[2] == 0 and result[4] == 0:
        number_of_users_imported_without_error += 1
      elif result[0] > 0 or result[3] > 0:
        number_of_users_imported_with_some_errors += 1
      else:
        number_of_users_failed += 1
      number_of_labels_imported_without_error += result[0]
      number_of_labels_imported_with_some_errors += result[1]
      number_of_labels_failed += result[2]
      number_of_messages_imported_without_error += result[3]
      number_of_messages_failed += result[4]
      logging.info('Done importing user %s. Labels: %d succeeded, %d with some '
                   'errors, %d failed. Messages: %d succeeded, %d failed.',
                   username,
                   result[0],
                   result[1],
                   result[2],
                   result[3],
                   result[4])
    except Exception:
      number_of_users_failed += 1
      logging.exception("Can't process user %s", username)
  logging.info("*** Done importing all users from directory '%s'", args.dir)
  logging.info('*** Import summary:')
  logging.info('    %d users imported with no failures',
               number_of_users_imported_without_error)
  logging.info('    %d users imported with some failures',
               number_of_users_imported_with_some_errors)
  logging.info('    %d users failed',
               number_of_users_failed)
  logging.info('    %d labels (mbox files) imported with no failures',
               number_of_labels_imported_without_error)
  logging.info('    %d labels (mbox files) imported with some failures',
               number_of_labels_imported_with_some_errors)
  logging.info('    %d labels (mbox files) failed',
               number_of_labels_failed)
  logging.info('    %d messages imported successfully',
               number_of_messages_imported_without_error)
  logging.info('    %d messages failed\n',
               number_of_messages_failed)
  if (number_of_messages_failed + number_of_labels_failed +
      number_of_users_failed > 0):
    logging.info('*** Check log file %s for detailed errors.', args.log)
  logging.info('Finished.\n\n')

Example 60

Project: needy Source File: universal_binary.py
    def build(self):
        print('Building universal binary %s' % self.name())

        universal_paths = dict()

        for library in self.libraries():
            for root, dirs, files in os.walk(library.build_directory()):
                for path in files + dirs:
                    key = os.path.join(os.path.relpath(root, library.build_directory()), path)
                    if key not in universal_paths:
                        universal_paths[key] = []
                    universal_paths[key].append((library, os.path.join(root, path)))

        directory = self.build_directory()

        if os.path.exists(directory):
            shutil.rmtree(directory)

        os.makedirs(directory)

        try:
            for path, builds in universal_paths.items():
                if len(builds) != len(self.libraries()):
                    continue

                file_name, extension = os.path.splitext(path)
                output_path = os.path.join(directory, path)

                self.__make_output_dirs_for_builds(output_path, builds)

                if not os.path.islink(builds[0][1]) and any([os.path.isdir(source_path) for _, source_path in builds]):
                    continue
                elif not os.path.islink(builds[0][1]) and len(self.libraries()) == 1:
                    print('Copying %s' % path)
                    shutil.copy(builds[0][1], output_path)
                elif extension in ['.h', '.hpp', '.hxx', '.ipp', '.c', '.cc', '.cpp']:
                    header_contents = '#if __APPLE__\n#include "TargetConditionals.h"\n#endif\n'
                    for library, header in builds:
                        macro = library.target().platform.detection_macro(library.target().architecture)
                        if not macro:
                            header_contents = ''
                            break
                        header_directory = os.path.join(os.path.dirname(output_path), 'needy_targets', library.target().platform.identifier(), library.target().architecture)
                        if not os.path.exists(header_directory):
                            os.makedirs(header_directory)
                        header_path = os.path.join(header_directory, os.path.basename(header))
                        shutil.copyfile(header, header_path)
                        header_contents += '#if {}\n#include "{}"\n#endif\n'.format(macro, os.path.relpath(header_path, os.path.dirname(output_path)))
                    if header_contents:
                        print('Creating universal header %s' % path)
                        with open(output_path, 'w') as f:
                            f.write(header_contents)
                elif os.path.islink(builds[0][1]):
                    print('Copying symlink %s' % path)
                    os.symlink(os.readlink(builds[0][1]), output_path)
                elif extension in ['.a', '.dylib', '.so']:
                    print('Creating universal library %s' % path)
                    inputs = []
                    for library, lib in builds:
                        f = tempfile.NamedTemporaryFile(delete=True)
                        try:
                            with open(os.devnull, 'w') as devnull:
                                subprocess.check_call(['lipo', '-extract', library.target().architecture, lib, '-output', f.name], stderr=devnull)
                        except subprocess.CalledProcessError:
                            subprocess.check_call(['cp', lib, f.name])
                        inputs.append(f)
                    subprocess.check_call(['lipo', '-create'] + [input.name for input in inputs] + ['-output', output_path])
                    for input in inputs:
                        input.close()
                elif extension == '.pc' and 'pkgconfig' in path:
                    universal_pc = None
                    for library, pc in builds:
                        with open(pc, 'r') as f:
                            contents = f.read().decode()
                            fixed = contents.replace(library.build_directory(), '${pcfiledir}/../..')
                            if universal_pc is not None and fixed != universal_pc:
                                print('Package config differs beyond prefix. Not creating %s' % path)
                                universal_pc = None
                                break
                            universal_pc = fixed
                    if universal_pc:
                        print('Creating universal package config: %s' % path)
                        with open(output_path, 'w') as f:
                            f.write(universal_pc.encode())
        except:
            shutil.rmtree(directory)
            raise

        if not self.is_in_development_mode():
            with open(self.build_status_path(), 'w') as status_file:
                status = {
                    'configuration': binascii.hexlify(self.configuration_hash()).decode()
                }
                json.dump(status, status_file)

Example 61

Project: MySignaturePlugin Source File: mysign.py
Function: run
	def run(self):
		if self.file:
			try:
				self.parse_functions(norm_path(self.file))
			except:
				pass
		elif not Pref.scan_running:
			Pref.scan_running = True
			Pref.scan_started = time.time()

			# the list of opened files in all the windows
			files = list(Pref.updated_files)
			# the list of opened folders in all the windows
			folders = list(Pref.updated_folders)
			Pref.folders = list(folders) # this is the "cache id" to know when to rescan the whole thing again
			# add also as folders, the dirname of the current opened files
			folders += [norm_path(dirname(file)) for file in files]
			# deduplicate
			folders = list(set(folders))
			_folders = []
			for folder in folders:
				_folders = deduplicate_crawl_folders(_folders, folder)
			folders = _folders

			if debug:
				print('Folders to scan:')
				print("\n".join(folders))

			# pasing
			files_seen = 0
			files_js = 0
			files_cache_miss = 0
			files_cache_hit = 0
			files_failed_parsing = 0

			# parse files with priority
			for file in files:
				if should_abort():
					break
				files_seen += 1
				files_js += 1
				if file not in MySign.files:
					try:
						self.parse_functions(file)
						files_cache_miss += 1
					except:
						files_failed_parsing += 1# the file may be unreachable/unreadable
				else:
					files_cache_hit += 1

			# now parse folders
			for folder in folders:
				if should_abort():
					break
				for dir, dnames, files in os.walk(folder):
					if should_abort():
						break
					for f in files:
						if should_abort():
							break
						files_seen += 1
						file = os.path.join(dir, f)
						if not should_exclude(file) and is_javascript_file(file):
							files_js += 1
							file = norm_path(file)
							if file not in MySign.files:
								try:
									self.parse_functions(file)
									files_cache_miss += 1
								except:
									files_failed_parsing += 1# the file may be unreachable/unreadable
							else:
								files_cache_hit += 1

			if debug:
				print('Scan done in '+str(time.time()-Pref.scan_started)+' seconds - Scan was aborted: '+str(Pref.scan_aborted))
				print('Files Seen:'+str(files_seen)+', Files JS:'+str(files_js)+', Cache Miss:'+str(files_cache_miss)+', Cache Hit:'+str(files_cache_hit)+', Failed Parsing:'+str(files_failed_parsing))

			Pref.scan_running = False
			Pref.scan_aborted = False

Example 62

Project: DIRAC Source File: FileStorage.py
Function: remove_directory
  def removeDirectory( self, path, recursive = False ):
    """Remove a directory on the physical storage together with all its files and
       subdirectories.
       :param path : single or list of path
       :param recursive : if True, we recursively delete the subdir
       :return: successful and failed dictionaries. The keys are the pathes,
             the values are dictionary {'Files': amount of files deleted, 'Size': amount of data deleted}

      Note: it is known that if recursive is False, the removal of a non existing directory is successful,
            while it is failed for recursive = True. That's stupid, but well... I guess I have to keep the interface
    """
    res = checkArgumentFormat( path )
    if not res['OK']:
      return res
    urls = res['Value']

    self.log.debug( "FileStorage.removeDirectory: Attempting to remove %s directories." % len( urls ) )

    successful = {}
    failed = {}

    for url in urls:
      if recursive:
        nbOfFiles = 0
        totalSize = 0
        # Calculate the original size
        for root, _dirs, files in os.walk( url ):
          nbOfFiles += len( files )
          totalSize += sum( os.path.getsize( os.path.join( root, fn ) ) for fn in files )
        try:
          shutil.rmtree(url)
          successful[url] = {'FilesRemoved':nbOfFiles, 'SizeRemoved':totalSize}
        except OSError as ose:
          # if the directory does not exist, then the numbers are already correct, no need to re do
          # the walk
          if ose.errno != errno.ENOENT:
            # If we only removed partially, check how much was removed
            leftFiles = 0
            leftSize = 0
            for root, _dirs, files in os.walk( url ):
              leftFiles += len( files )
              leftSize += sum( os.path.getsize( os.path.join( root, fn ) ) for fn in files )
            nbOfFiles -= leftFiles
            totalSize -= leftSize
          failed[url] = {'FilesRemoved':nbOfFiles, 'SizeRemoved':totalSize}
      # If no recursive
      else:
        try:
          # Delete all the files
          for child in os.listdir( url ):
            fullpath = os.path.join( url, child )
            if os.path.isfile( fullpath ):
              os.unlink( fullpath )
          successful[url] = True
        except OSError as ose:
          # If we get as exception that the directory does not exist
          # (it can only be the directory), then success
          if ose.errno == errno.ENOENT:
            successful[url] = True
          else:
            failed[url] = str( ose )
          


    resDict = {'Failed':failed, 'Successful':successful}
    return S_OK( resDict )

Example 63

Project: wharf Source File: index.py
@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        url = ""
        file = ""
        desc = ""
        services = []
        try:
            url = request.form['wharf_url']
        except:
            url = ""
        try:
            file = request.files['file']
        except:
            file = ""
        if file != "":
            try:
                if file and allowed_file(file.filename):
                    filename = secure_filename(file.filename)
                    file_path = path.join(app.config['UPLOAD_FOLDER'], filename)
                    file.save(file_path)
                    if filename.rsplit('.', 1)[1] == "zip":
                        with zipfile.ZipFile(file_path, 'r') as service_zip:
                            service_zip.extractall(path.join(app.config['UPLOAD_FOLDER'],
                                                             filename.rsplit('.', 1)[0]))
                            # !! TODO
                            #    allow exception for dockerfile, check at root as well
                            # check for existence of necessary files
                            missing_files = {}
                            for key,value in app.config['SERVICE_DICT'].items():
                                if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                             filename.rsplit('.', 1)[0],
                                                             filename.rsplit('.', 1)[0],
                                                             value)):
                                    missing_files[key] = value
                            services.append(filename.rsplit('.', 1)[0])
                            if missing_files:
                                if "dockerfile" in missing_files:
                                    return render_template("failed.html")
                                else:
                                    return render_template("forms.html",
                                                           services=services,
                                                           missing_files=missing_files,
                                                           filename=filename,
                                                           indexDesc=desc,
                                                           url=url)
                            move_services(filename, 1)

                    elif filename.rsplit('.', 1)[1] == "gz":
                        with tarfile.open(path.join(app.config['UPLOAD_FOLDER'], filename)) as service_gz:
                            service_gz.extractall(path.join(app.config['UPLOAD_FOLDER'],
                                                            filename.rsplit('.', 2)[0]))
                            # !! TODO
                            #    allow exception for dockerfile, check at root as well
                            # check for existence of necessary files
                            missing_files = {}
                            for key,value in app.config['SERVICE_DICT'].items():
                                if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                             filename.rsplit('.', 2)[0],
                                                             filename.rsplit('.', 2)[0],
                                                             value)):
                                    missing_files[key] = value
                            services.append(filename.rsplit('.', 2)[0])
                            if missing_files:
                                if "dockerfile" in missing_files:
                                    return render_template("failed.html")
                                else:
                                    return render_template("forms.html",
                                                           services=services,
                                                           missing_files=missing_files,
                                                           filename=filename,
                                                           indexDesc=desc,
                                                           url=url)
                            move_services(filename, 2)
                    else:
                        return render_template("failed.html")
                    # !! TODO
                    #    some post-processing once the file is uploaded
                else:
                    return render_template("failed.html")
            except:
                print "No file selected"
        elif url != "":
            try:
                if url:
                    url_path = (url.rsplit('/', 1)[1]).rsplit('.', 1)[0]
                    # !! TODO try/except
                    if url.rsplit('.', 1)[1] == "git":
                        # !! TODO try/except - if the folder already exists
                        git.clone(url, path.join(app.config['UPLOAD_FOLDER'],
                                                 url_path))

                        # check for dockerfile at root
                        # check for dockerfile assuming repo is the services folder
                        if path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                 url_path,
                                                 "Dockerfile")) or path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                 url_path,
                                                 app.config['SERVICE_DICT']['dockerfile'])):
                            # check for existence of necessary files
                            missing_files = {}
                            for key,value in app.config['SERVICE_DICT'].items():
                                if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             value)):
                                    missing_files[key] = value
                            services.append(url_path)
                            if "dockerfile" in missing_files:
                                del missing_files['dockerfile']
                            if missing_files:
                                return render_template("forms.html",
                                                       services=services,
                                                       missing_files=missing_files,
                                                       filename=file,
                                                       indexDesc=desc,
                                                       url=url)
                            # move to services folder
                            i = 0
                            while i != -1:
                                try:
                                    if i == 0:
                                        mv(path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path),
                                           app.config['SERVICES_FOLDER'])
                                    elif i == 1:
                                        mv(path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path),
                                           path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path+str(i)))
                                        mv(path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path+str(i)),
                                           app.config['SERVICES_FOLDER'])
                                    else:
                                        mv(path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path+str(i-1)),
                                           path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path+str(i)))
                                        mv(path.join(app.config['UPLOAD_FOLDER'],
                                                     url_path+str(i)),
                                           app.config['SERVICES_FOLDER'])
                                    i = -1
                                except:
                                    i += 1
                            try:
                                # remove leftover files in tmp
                                rmdir(path.join(app.config['UPLOAD_FOLDER'],
                                                url_path))
                            except:
                                pass
                        else:
                            i = 0
                            repo_dirs = []
                            for root, dirs, files in walk(path.join(app.config['UPLOAD_FOLDER'],
                                                                    url_path)):
                                if i == 0:
                                    repo_dirs = dirs
                                i += 1
                            if ".git" in repo_dirs:
                                repo_dirs.remove(".git")
                            services=repo_dirs
                            for service_dir in repo_dirs:
                                # check for dockerfile one folder deep
                                # check for dockerfile in regular services folder
                                # could be more than one
                                if path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                         url_path,
                                                         service_dir, "Dockerfile")) or path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                           url_path,
                                                           service_dir, app.config['SERVICE_DICT']['dockerfile'])):
                                    # check for existence of necessary files
                                    missing_files = {}
                                    for key,value in app.config['SERVICE_DICT'].items():
                                        if not path.exists(path.join(app.config['UPLOAD_FOLDER'],
                                                                     url_path,
                                                                     service_dir,
                                                                     value)):
                                            missing_files[key] = value

                                    if "dockerfile" in missing_files:
                                        del missing_files['dockerfile']
                                    if missing_files:
                                        # !! TODO TODO TODO
                                        #    this needs to be re-worked for times 
                                        #    when there is more than one service_dir
                                        return render_template("forms.html",
                                                               services=services,
                                                               missing_files=missing_files,
                                                               filename=file,
                                                               indexDesc=desc,
                                                               url=url)
                                    # move to services folder
                                    i = 0
                                    while i != -1:
                                        try:
                                            if i == 0:
                                                mv(path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir),
                                                   app.config['SERVICES_FOLDER'])
                                            elif i == 1:
                                                mv(path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir),
                                                   path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir+str(i)))
                                                mv(path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir+str(i)),
                                                   app.config['SERVICES_FOLDER'])
                                            else:
                                                mv(path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir+str(i-1)),
                                                   path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir+str(i)))
                                                mv(path.join(app.config['UPLOAD_FOLDER'],
                                                             url_path,
                                                             service_dir+str(i)),
                                                   app.config['SERVICES_FOLDER'])
                                            i = -1
                                        except:
                                            i += 1
                            try:
                                rmdir(path.join(app.config['UPLOAD_FOLDER'],
                                                url_path))
                            except:
                                pass
                    else:
                        # !! TODO
                        #    should point to docker index url, expects a <meta name="description"
                        #    won't have a dockerfile in the service folder
                        #    note the naming scheme will mess with directory structure of service name
                        #    needs to be handled as a special case
                        repo = ""
                        desc = ""
                        try:
                            index_repo = (requests.get(url).content).split('<meta name="description" content="')
                            index_repo = index_repo[1].split("\" />")
                            # !! TODO try, if fails, there is no description.
                            try:
                                repo, desc = index_repo[0].split(": ", 1)
                                desc = desc.replace("\n", " ")
                                print repo, desc
                            except:
                                repo = index_repo[0]
                                print repo
                        except:
                            return render_template("failed.html")
                        if repo == "":
                            return render_template("failed.html")
                        missing_files = {}
                        for key,value in app.config['SERVICE_DICT'].items():
                            missing_files[key] = value
                        del missing_files["dockerfile"]
                        if desc != "":
                            del missing_files["description"]
                        services.append(repo)

                        return render_template("forms.html",
                                               services=services,
                                               missing_files=missing_files,
                                               filename=file,
                                               indexDesc=desc,
                                               url=url)
            except:
                print "Bad URL"
        else:
            return render_template("failed.html")
        return redirect(url_for('index'))

    row = ""
    services = [name for name in listdir("services") if path.isdir(path.join("services", name))]
    for service in services:
        last_modified = ""
        last_modified = time.ctime(path.getmtime("services/"+service))
        description = ""
        row += '<tr><td class="rowlink-skip"><a href="saas/'+service+'">'+service+'</a></td><td>'
        try:
            description_path = "services/"+service+"/"+app.config['SERVICE_DICT']['description']
            with open(description_path, 'r') as content_file:
                description = content_file.read()
            row += description
        except:
            row += "no description"
        row += '</td><td><a href="saas/'+service+'">'+last_modified+'</a></td><td><a href="edit/'+service+'">Edit</a></td></tr>'
    row = Markup(row)
    return render_template("index.html",row=row)

Example 64

Project: PyDev.Debugger Source File: generate_code.py
Function: generate_dont_trace_files
def generate_dont_trace_files():
    template = '''# Important: Autogenerated file.

# DO NOT edit manually!
# DO NOT edit manually!

from _pydevd_bundle.pydevd_constants import IS_PY3K

LIB_FILE = 1
PYDEV_FILE = 2

DONT_TRACE = {
    # commonly used things from the stdlib that we don't want to trace
    'Queue.py':LIB_FILE,
    'queue.py':LIB_FILE,
    'socket.py':LIB_FILE,
    'weakref.py':LIB_FILE,
    '_weakrefset.py':LIB_FILE,
    'linecache.py':LIB_FILE,
    'threading.py':LIB_FILE,

    #things from pydev that we don't want to trace
    '_pydev_execfile.py':PYDEV_FILE,
%(pydev_files)s
}

if IS_PY3K:
    # if we try to trace io.py it seems it can get halted (see http://bugs.python.org/issue4716)
    DONT_TRACE['io.py'] = LIB_FILE

    # Don't trace common encodings too
    DONT_TRACE['cp1252.py'] = LIB_FILE
    DONT_TRACE['utf_8.py'] = LIB_FILE
'''

    pydev_files = []

    for root, dirs, files in os.walk(root_dir):
        for d in [
            '.git',
            '.settings',
            'build',
            'build_tools',
            'dist',
            'pydevd.egg-info',
            'pydevd_attach_to_process',
            'pydev_sitecustomize',
            'stubs',
            'tests',
            'tests_mainloop',
            'tests_python',
            'tests_runfiles',
            'test_pydevd_reload',
            'third_party',
            '__pycache__',
            '_pydev_runfiles',
            'pydev_ipython',
            ]:
            try:
                dirs.remove(d)
            except:
                pass
            
        for f in files:
            if f.endswith('.py'):
                if f not in (
                    '__init__.py',
                    'runfiles.py',
                    'pydev_coverage.py',
                    'pydev_pysrc.py',
                    'setup.py',
                    'setup_cython.py',
                    'interpreterInfo.py',
                    ):
                    pydev_files.append("    '%s': PYDEV_FILE," % (f,))

    contents = template % (dict(pydev_files='\n'.join(sorted(pydev_files))))
    assert 'pydevd.py' in contents
    assert 'pydevd_dont_trace.py' in contents
    with open(os.path.join(root_dir, '_pydevd_bundle', 'pydevd_dont_trace_files.py'), 'w') as stream:
        stream.write(contents)

Example 65

Project: yaraQA Source File: yaraqa.py
    def match_yara_rules(self):
        '''
        This method tries to match yara rules at malware and/or goodware repo.
        '''
        rules = self.init_yara_rules()

        self.PLOT_LABELS.append(format(str(self.family)))

        for path in self.DIRECTORIES:

            EXPECTED_MATCHES = 0
            TOTAL_STATIC_MATCHES = 0
            TOTAL_MEMORY_MATCHES = 0
            STATIC_FAMILY_MATCHES = 0
            MEMORY_FAMILY_MATCHES = 0
            STATIC_FALSE_POSITIVES = 0
            MEMORY_FALSE_POSITIVES = 0
            STATIC_MISS = 0
            MEMORY_MISS = 0
            TOTAL_FILES = 0
            TOTAL_MATCHES = 0

            self.logger.debug('Matching against {0}'.format(str(path)))
            self.logger.debug('========================================\n')

            for root, dirs, files in os.walk(path):
                for file in files:

                    current_file = os.path.join(root, file)
                    file_matched = False
                    if self.targeted:
                        if self.family not in current_file:
                            continue

                    TOTAL_FILES = TOTAL_FILES + 1

                    if self.family in current_file:
                        EXPECTED_MATCHES = EXPECTED_MATCHES + 1
                        self.logger.debug('\nTARGET: {0}'.format(str(current_file)))
                    if (self.method == 'STATIC'):
                        matches = rules.match(current_file)
                    elif (self.method == 'MEMORY'):
                        task_id = self.create_cuckoo_task(current_file)
                    else:
                        matches = rules.match(current_file)
                        task_id = self.create_cuckoo_task(current_file)

                    #  MATCH STATIC
                    if (self.method == 'STATIC' or self.method == 'ALL'):
                        if matches:
                            TOTAL_STATIC_MATCHES = TOTAL_STATIC_MATCHES + 1
                            if self.family in current_file:
                                if not file_matched:
                                    TOTAL_MATCHES = TOTAL_MATCHES + 1
                                    file_matched = True
                                STATIC_FAMILY_MATCHES = STATIC_FAMILY_MATCHES + 1
                                self.logger.debug('-> STATIC YARA MATCH {0} \033[0;32m[OK]\033[0m'.format(str(matches)))
                            else:
                                STATIC_FALSE_POSITIVES = STATIC_FALSE_POSITIVES + 1
                                self.logger.debug('FALSE POSITIVE: ' + current_file)
                                self.logger.debug('-> STATIC YARA MATCH {0} \033[0;31m[FALSE POSITIVE]\033[0m'.format(str(matches)))
                        else:
                            if self.family in current_file:
                                STATIC_MISS = STATIC_MISS + 1
                                self.logger.debug('-> STATIC YARA \033[0;31m[MISS]\033[0m')

                    #  MATCH MEMORY
                    if (self.method == 'MEMORY' or self.method == 'ALL'):
                        report = self.view_cuckoo_report(task_id)
                        matched = False
                        rxp = re.compile(self.family, re.IGNORECASE)

                        if 'memory' in report:
                            if 'yarascan' in report['memory']:
                                if 'data' in report['memory']['yarascan']:
                                    matched = any(rxp.search(yar_n['rule']) for yar_n in report['memory']['yarascan']['data'])
                                else:
                                    if self.family in current_file:
                                        self.logger.debug("Warning: No 'data' key found in 'yarascan' section. file = {0}".format(str(current_file)))
                            else:
                                if self.family in current_file:
                                    self.logger.debug("Warning: No 'yarascan' key found in 'memory' section. file = {0}".format(str(current_file)))
                        else:
                            if self.family in current_file:
                                self.logger.debug("Warning: No 'memory' key found in report data. file = {0}".format(str(current_file)))


                        if matched:
                            TOTAL_MEMORY_MATCHES = TOTAL_MEMORY_MATCHES + 1
                            if self.family in current_file:
                                if not file_matched:
                                    TOTAL_MATCHES = TOTAL_MATCHES + 1
                                    file_matched = True
                                MEMORY_FAMILY_MATCHES = MEMORY_FAMILY_MATCHES + 1
                                self.logger.debug('-> MEMORY YARA MATCH \033[0;32m[OK]\033[0m')
                            else:
                                MEMORY_FALSE_POSITIVES = MEMORY_FALSE_POSITIVES + 1
                                self.logger.debug('FALSE POSITIVE: {0}'.format(str(current_file)))
                                self.logger.debug('-> MEMORY YARA MATCH \033[0;31m[FALSE POSITIVE]\033[0m')
                        else:
                            if self.family in current_file:
                                MEMORY_MISS = MEMORY_MISS + 1
                                self.logger.debug('-> MEMORY YARA \033[0;31m[MISS]\033[0m')
 
            if path == self.MALWARE_DIR:
                self.logger.debug('\n\t_MALWARE REPO_')
            elif path == self.GOODWARE_DIR:
                self.logger.debug('\n\t_GOODWARE REPO_')

            if (self.method == 'STATIC' or self.method == 'ALL'):
                self.logger.debug('\n STATIC YARA Q&A OVERVIEW:')       
                self.logger.debug(' =========================')
                self.print_results('STATIC', path, EXPECTED_MATCHES, STATIC_FAMILY_MATCHES, STATIC_MISS, STATIC_FALSE_POSITIVES, TOTAL_STATIC_MATCHES)
            
            if (self.method == 'MEMORY' or self.method == 'ALL'):
                self.logger.debug('\n MEMORY YARA Q&A OVERVIEW:')
                self.logger.debug(' =========================')
                self.print_results('MEMORY', path, EXPECTED_MATCHES, MEMORY_FAMILY_MATCHES, MEMORY_MISS, MEMORY_FALSE_POSITIVES, TOTAL_MEMORY_MATCHES)

            if path == self.MALWARE_DIR:
                if EXPECTED_MATCHES != 0:
                    TOTAL_MATCHES = (TOTAL_MATCHES/EXPECTED_MATCHES)*100
                    TOTAL_MATCHES = "{:.2f}".format(TOTAL_MATCHES)
                    self.PLOT_TOTAL_MATCH.append(float(TOTAL_MATCHES))
            
                    self.print_threshold(" Total Accuracy: ", TOTAL_MATCHES)

            self.logger.debug(" Total files analyzed: {0}\n\n".format(str(TOTAL_FILES)))

        if self.plot:
            if pygal_available:
                self.render_plot()

        DATA_PLOT = [self.PLOT_LABELS, self.PLOT_STATIC_RATIOS, self.PLOT_MEMORY_RATIOS, self.PLOT_TOTAL_MATCH]

        return DATA_PLOT

Example 66

Project: ssf Source File: mergejsmf.py
Function: get_files
def getFiles(configDict, configFile = None):
    cfg = None
    if configFile:
        cfg = Config(configFile)

    ## Build array of directories
    allDirs = []
    for k, v in configDict.iteritems():
        if not v in allDirs:
            allDirs.append(v)

    allFiles = []

    ## Find all the Javascript source files
    for sourceDirectory in allDirs:
        for root, dirs, files in os.walk(sourceDirectory):
            for filename in files:
                if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."):
                    filepath = os.path.join(root, filename)[len(sourceDirectory)+1:]
                    filepath = filepath.replace("\\", "/")
                    if cfg and cfg.include:
                        if filepath in cfg.include or filepath in cfg.forceFirst:
                            allFiles.append(filepath)
                    elif (not cfg) or (filepath not in cfg.exclude):
                        allFiles.append(filepath)

    files = {}
    order = [] # List of filepaths to output, in a dependency satisfying order 

    ## Import file source code
    ## TODO: Do import when we walk the directories above?
    for filepath in allFiles:
        print "Importing: %s" % filepath
        filekey = filepath.replace("\\", "/").split("/")[0]
        fullpath = os.path.join(configDict[filekey], filepath)
        content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
        files[filepath] = SourceFile(filepath, content) # TODO: Chop path?

    print

    from toposortmf import toposort

    complete = False
    resolution_pass = 1

    while not complete:
        order = [] # List of filepaths to output, in a dependency satisfying order 
        nodes = []
        routes = []
        ## Resolve the dependencies
        print "Resolution pass %s... " % resolution_pass
        resolution_pass += 1 

        for filepath, info in files.items():
            nodes.append(filepath)
            for neededFilePath in info.requires:
                routes.append((neededFilePath, filepath))

        for dependencyLevel in toposort(nodes, routes):
            for filepath in dependencyLevel:
                order.append(filepath)
                if not files.has_key(filepath):
                    print "Importing: %s" % filepath
                    filekey = filepath.replace("\\", "/").split("/")[0]
                    fullpath = os.path.join(configDict[filekey], filepath)
                    content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
                    files[filepath] = SourceFile(filepath, content) # TODO: Chop path?

        # Double check all dependencies have been met
        complete = True
        try:
            for fp in order:
                if max([order.index(rfp) for rfp in files[fp].requires] +
                       [order.index(fp)]) != order.index(fp):
                    complete = False
        except:
            complete = False
        
        print    


    ## Move forced first and last files to the required position
    if cfg:
        print "Re-ordering files..."
        order = cfg.forceFirst + [item
                     for item in order
                     if ((item not in cfg.forceFirst) and
                         (item not in cfg.forceLast))] + cfg.forceLast

    return (files, order)

Example 67

Project: autospec Source File: buildreq.py
def scan_for_configure(package, dir, autospecdir):
    global default_summary
    count = 0
    for dirpath, dirnames, files in os.walk(dir):
        default_score = 2
        if dirpath != dir:
            default_score = 1

        if any(file.endswith(".go") for file in files) and tarball.go_pkgname:
            add_buildreq("go")
            tarball.name = tarball.go_pkgname
            buildpattern.set_build_pattern("golang", default_score)
        if "CMakeLists.txt" in files and "configure.ac" not in files:
            add_buildreq("cmake")
            buildpattern.set_build_pattern("cmake", default_score)

        if "configure" in files and os.access(dirpath + '/configure', os.X_OK):
            buildpattern.set_build_pattern("configure", default_score)

        if "requires.txt" in files:
                grab_python_requirements(dirpath + '/requires.txt')

        if "setup.py" in files:
            add_buildreq("python-dev")
            add_buildreq("setuptools")
            add_buildreq("pbr")
            add_buildreq("pip")
            if setup_py_python3(dirpath + '/setup.py') or setup_py_python3(dirpath + '/PKG-INFO'):
                add_buildreq("python3-dev")
                buildpattern.set_build_pattern("distutils23", default_score)
                # force override the pypi rule
                if buildpattern.default_pattern == 'distutils' and buildpattern.pattern_strengh <= 10:
                    buildpattern.default_pattern = 'distutils23'
            else:
                # check for adding python3 support in patches
                try:
                    with open(autospecdir + '/series', 'r') as series:
                        for patchname in series:
                            if setup_py_python3(autospecdir + '/' + patchname.strip()):
                                add_buildreq("python3-dev")
                                buildpattern.set_build_pattern("distutils23", default_score)
                                # force override the pypi rule
                                if buildpattern.default_pattern == 'distutils' and buildpattern.pattern_strengh <= 10:
                                    buildpattern.default_pattern = 'distutils23'
                except:
                    pass
                buildpattern.set_build_pattern("distutils", default_score)

        if "Makefile.PL" in files or "Build.PL" in files:
            buildpattern.set_build_pattern("cpan", default_score)
        if "SConstruct" in files:
            add_buildreq("scons")
            add_buildreq("python-dev")
            buildpattern.set_build_pattern("scons", default_score)

        if "requirements.txt" in files:
                grab_python_requirements(dirpath + '/requirements.txt')

        for name in files:
            if name.lower().startswith("configure."):
                parse_configure_ac(os.path.join(dirpath, name))
            if name.lower().startswith("rakefile"):
                Rakefile(os.path.join(dirpath, name))
            if name.lower() == "makefile":
                buildpattern.set_build_pattern("make", default_score)
            if name.lower() == "autogen.sh":
                buildpattern.set_build_pattern("autogen", default_score)
            if name.lower() == "cmakelists.txt":
                buildpattern.set_build_pattern("cmake", default_score)

    can_reconf = os.path.exists(os.path.join(dir, "configure.ac"))
    if not can_reconf:
        can_reconf = os.path.exists(os.path.join(dir, "configure.in"))

    if can_reconf and patches.autoreconf:
        print("Patches touch configure.*, adding autoreconf stage")
        for breq in autoreconf_reqs:
            add_buildreq(breq)
    else:
        patches.autoreconf = False

    print("Buildreqs   : ", end="")
    for lic in sorted(buildreqs):
        if count > 4:
            count = 0
            print("\nBuildreqs   : ", end="")
        count = count + 1
        print(lic + " ", end="")
    print("")

Example 68

Project: mtpy Source File: EDLmake6hourfiles.py
Function: main
def main():

    #print '\n\tnot working yet - code under development !!\n'
    #return

    if len(sys.argv) < 3:
        sys.exit('\nNeed at least 4 arguments: \n\n '
            '<path to files> \n <sampling in seconds> \n'
            '<output dir> \n <stationname>\n'
            '[optional: <recursive flag -R>]\n'
            '(set this option for including all subfolders)\n\n')

    print 

    outdir = None
    stationname = None
    recursive = False

    multiple_stations = False

    if len(sys.argv) > 3:
        optionals = sys.argv[3:]
        for o in optionals:
            o = o.strip()
            if o[0] == '-':
                if o[1].lower() == 'r':
                    recursive = True
                continue
            elif outdir is None:
                outdir = o
                continue
            elif stationname is None:
                stationname = o 
                continue
    
    if stationname is not None:
        #check, if it's actually a comma-separated list:
        try:
            stationlist = stationname.split(',')
            if len(stationlist) > 1:
                multiple_stations = True
                stationlist = [i.upper() for i in stationlist]
        except:
            stationlist = [stationname]
    else: stationlist = [None]

    print stationlist 

    pathname_raw = sys.argv[1]
    pathname = op.abspath(op.realpath(pathname_raw))

    if not op.isdir(pathname):
        sys.exit('Data file(s) path not existing: {0}\n'.format(pathname))

    try:
        sampling = float(sys.argv[2])
        if sampling <= 0 : raise
    except:
        sys.exit('Second argument must be sampling interval in seconds (int/float)')

    if recursive is True:
        lo_folders = []
        for i,j,k in os.walk(pathname):
            lof = [op.abspath(op.join(i,f)) for f in j]            
            if stationname is not None:
                for stationname in stationlist:
                    for curr_folder in lof:
                        content_of_folder = os.listdir(curr_folder)
                        #print curr_folder
                        lof_station = [i for i in content_of_folder if stationname.lower() in i.lower()]
                        if len(lof_station) > 0 :
                            lo_folders.append(curr_folder)
        pathname = list(set(lo_folders))

    if len(pathname) == 0:
        sys.exit('\n\tERROR - No (sub-) folders for stations {0} found\n'.format(stationlist))


        
    for stationname in stationlist:
        print '....\n'
        print 'processing station ',stationname.upper()
        # if pathname[0] is not None:
        #     station_pathname = [i for i in pathname if stationname.lower() in i.lower()]
        #     if len(station_pathname) == 0:
        #         station_pathname = None
        # else:
        station_pathname = pathname
        
        try:
            MTfh.EDL_make_Nhour_files(6,station_pathname, sampling, stationname.upper(), outdir)
        except MTex.MTpyError_inputarguments:
            if stationname is None:
                sys.exit('\n\tERROR - No data found in (sub-)folders\n')
            else:
                sys.exit('\n\tERROR - No data found in (sub-)folders for station {0}\n'.format(stationname.upper()))
        except MemoryError:
            sys.exit('\n\tERROR - Not enough memory to store temporary arrays!\n')
        except IOError:
            sys.exit('\n\tERROR - Not enough space on local disk to store output!\n')

        except:
            sys.exit('\n\tERROR - could not process (sub-)folders')
    print '\n'

Example 69

Project: qiime Source File: all_tests.py
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if (opts.suppress_unit_tests and opts.suppress_script_usage_tests):
        option_parser.error(
            "You're suppressing both test types. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of QIIME's unit tests, and keep track of any files which
    # fail unit tests.
    if not opts.suppress_unit_tests:
        unittest_names = []
        if not opts.unit_test_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith('test_') and name.endswith('.py'):
                        unittest_names.append(join(root, name))
        else:
            for fp in glob(opts.unit_test_glob):
                fn = split(fp)[1]
                if fn.startswith('test_') and fn.endswith('.py'):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = '%s %s -v' % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    qiime_test_data_dir = join(get_qiime_project_dir(), 'qiime_test_data')
    qiime_test_data_dir_exists = exists(qiime_test_data_dir)
    if not opts.suppress_script_usage_tests and qiime_test_data_dir_exists:
        if opts.script_usage_tests is not None:
            script_usage_tests = opts.script_usage_tests.split(',')
        else:
            script_usage_tests = None

        # Run the script usage testing functionality
        script_usage_result_summary, has_script_usage_example_failures = \
            run_script_usage_tests(
                test_data_dir=qiime_test_data_dir,
                scripts_dir=get_qiime_scripts_dir(),
                working_dir=get_qiime_temp_dir(),
                verbose=True,
                tests=script_usage_tests,
                force_overwrite=True,
                timeout=400)

    print "==============\nResult summary\n=============="

    if not opts.suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)

        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due " +\
                "to missing external applications.\nDepending on the QIIME features " +\
                "you plan to use, this may not be critical.\n%s"\
                % '\n'.join(missing_application_tests)

        if not (missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"

    if not opts.suppress_script_usage_tests:
        if qiime_test_data_dir_exists:
            print "\nScript usage test result summary\n--------------------------------\n"
            print script_usage_result_summary
        else:
            print "\nCould not run script usage tests because the directory %s does not exist." % qiime_test_data_dir
        print ""

    # If script usage tests weren't suppressed, the qiime_test_data dir must
    # exist and we can't have any failures.
    script_usage_tests_success = (opts.suppress_script_usage_tests or
                                  (qiime_test_data_dir_exists and
                                   not has_script_usage_example_failures))

    # If any of the unit tests or script usage tests fail, or if we have any
    # missing application errors, use return code 1 (as python's unittest
    # module does to indicate one or more failures).
    return_code = 1
    if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and
            script_usage_tests_success):
        return_code = 0
    return return_code

Example 70

Project: sublime-config Source File: api_docs.py
def run():
    """
    Looks through the docs/ dir and parses each markdown docuement, looking for
    sections to update from Python docstrings. Looks for section headers in
    the format:

     - ### `ClassName()` class
     - ##### `.method_name()` method
     - ##### `.attribute_name` attribute
     - ### `function_name()` function

    The markdown content following these section headers up until the next
    section header will be replaced by new markdown generated from the Python
    docstrings of the associated source files.

    By default maps docs/{name}.md to {modulename}/{name}.py. Allows for
    custom mapping via the MD_SOURCE_MAP variable.
    """

    print('Updating API docs...')

    md_files = []
    for root, _, filenames in os.walk(docs_dir):
        for filename in filenames:
            if not filename.endswith('.md'):
                continue
            md_files.append(os.path.join(root, filename))

    parser = CommonMark.DocParser()

    for md_file in md_files:
        md_file_relative = md_file[len(project_dir) + 1:]
        if md_file_relative in MD_SOURCE_MAP:
            py_files = MD_SOURCE_MAP[md_file_relative]
            py_paths = [os.path.join(project_dir, py_file) for py_file in py_files]
        else:
            py_files = [os.path.basename(md_file).replace('.md', '.py')]
            py_paths = [os.path.join(project_dir, module_name, py_files[0])]

            if not os.path.exists(py_paths[0]):
                continue

        with open(md_file, 'rb') as f:
            markdown = f.read().decode('utf-8')

        original_markdown = markdown
        md_lines = list(markdown.splitlines())
        md_ast = parser.parse(markdown)

        last_class = []
        last = {}
        sections = OrderedDict()
        find_sections(md_ast, sections, last, last_class, markdown.count("\n") + 1)

        md_chunks = {}

        for index, py_file in enumerate(py_files):
            py_path = py_paths[index]

            with open(os.path.join(py_path), 'rb') as f:
                code = f.read().decode('utf-8')
                module_ast = ast.parse(code, filename=py_file)
                code_lines = list(code.splitlines())

            for node in ast.iter_child_nodes(module_ast):
                walk_ast(node, code_lines, sections, md_chunks)

        added_lines = 0

        def _replace_md(key, sections, md_chunk, md_lines, added_lines):
            start, end = sections[key]
            start -= 1
            start += added_lines
            end += added_lines
            new_lines = md_chunk.split('\n')
            added_lines += len(new_lines) - (end - start)

            # Ensure a newline above each class header
            if start > 0 and md_lines[start][0:4] == '### ' and md_lines[start - 1][0:1] == '>':
                added_lines += 1
                new_lines.insert(0, '')

            md_lines[start:end] = new_lines
            return added_lines

        for key in sections:
            if key not in md_chunks:
                raise ValueError('No docuementation found for %s' % key[1])
            added_lines = _replace_md(key, sections, md_chunks[key], md_lines, added_lines)

        markdown = '\n'.join(md_lines).strip() + '\n'

        if original_markdown != markdown:
            with open(md_file, 'wb') as f:
                f.write(markdown.encode('utf-8'))

Example 71

Project: Panda3D-Shader-Generator Source File: shaderBuilder.py
    def loadPath(self,paths):
        """
        
        called by init, but can be called again if you wish to reload the same paths, or a different one
        
        """
        
        
        libs=[]
        
        for root, dirs, files in itertools.chain.from_iterable(os.walk(path) for path in paths):
            for name in files:
                ext=os.path.splitext(name)[1]
                if ext==".txt":
                    currentFile=join(root, name)
                    for key,xitems in _parseFile(currentFile).iteritems():
                        if key=="node":
                            for items in xitems:
                                if "info" not in items:
                                    print "node missing info section in: "+currentFile
                                else:
                                    
                                    
                                    info=_parseInfoLines(items["info"],currentFile)
                                    
                                    if "name" not in info:
                                        print "invalid info entry missing name in: "+currentFile
                                    else:
                                        name=info["name"]
                                        
                                        shaderInputs=[]
                                        if "shaderinputs" in items:
                                            for s in items["shaderinputs"]:
                                                shaderInputs.append(param.shaderParamFromDefCode(s))
                                        
                                        if "output" in info:
                                            o=info["output"]
                                            assert o in ["True","False"]
                                            isOutPut=o=="True"
                                            assert "stage" in info
                                            stage=info["stage"]
                                        else:
                                            isOutPut=False
                                            stage=None
                                        
                                        inLinks=[]
                                        if "inlinks" in items:
                                            for s in items["inlinks"]:
                                                inLinks.append(param.linkEndFromDefCode(s))
                                        outLinks=[]
                                        if "outlinks" in items:
                                            for s in items["outlinks"]:
                                                outLinks.append(param.linkEndFromDefCode(s))
                                        
                                        
                                        code=""
                                        if "code" in items:
                                            code="\n".join(items["code"])
                                        
                                        node=nodes.metaCodeNode(name,code,shaderInputs,inLinks,outLinks,isOutPut=isOutPut,stage=stage)
                                        if name in self.nodeTypeClassMap:
                                            print "Warning: overwriting node "+repr(self.nodeTypeClassMap[name])+" with "+repr(node)+" from "+currentFile
                                        self.nodeTypeClassMap[name]=node
                                
                        elif key=="lib":
                            libs.append(xitems)
                        else:
                            print "Warning: throwing away invalid majorSection with unrecognized name: "+key+" in file: "+currentFile
                            
        libSource="\n".join(itertools.chain.from_iterable(lib["code"] for lib in itertools.chain.from_iterable(libs) if "code" in lib))
        
        self.libSource=libSource

Example 72

Project: dagbldr Source File: datasets.py
def fetch_fruitspeech():
    """ Check for fruitspeech data

    Recorded by Hakon Sandsmark

    Returns
    -------
    summary : dict
        A dictionary cantaining data

        summary["data"] : list
            List of list of ints

        summary["specgrams"] : list
            List of arrays in (n_frames, n_features) format

        summary["target_names"] : list
            List of strings

        summary["target"] : list
            List of list of int

        summary["train_indices"] : array
            Indices for training samples

        summary["valid_indices"] : array
            Indices for validation samples

        summary["vocabulary_size"] : int
            Total vocabulary size

        summary["vocabulary"] : string
            The whole vocabulary as a string
    """

    data_path = check_fetch_fruitspeech()
    audio_matches = []
    for root, dirnames, filenames in os.walk(data_path):
        for filename in fnmatch.filter(filenames, '*.wav'):
            audio_matches.append(os.path.join(root, filename))
    all_chars = []
    all_words = []
    all_data = []
    all_specgram_data = []
    for wav_path in audio_matches:
        # Convert chars to int classes
        word = wav_path.split(os.sep)[-1][:-6]
        chars = string_to_character_index(word)
        fs, d = wavfile.read(wav_path)
        d = d.astype("int32")
        # Preprocessing from A. Graves "Towards End-to-End Speech
        # Recognition"
        Pxx = 10. * np.log10(np.abs(stft(d, fftsize=128))).astype(
            theano.config.floatX)
        all_data.append(d)
        all_specgram_data.append(Pxx)
        all_chars.append(chars)
        all_words.append(word)
    vocabulary_size = len(all_vocabulary_chars)
    # Shuffle data
    all_lists = list(safe_zip(all_data, all_specgram_data, all_chars,
                              all_words))
    random_state = np.random.RandomState(1999)
    random_state.shuffle(all_lists)
    all_data, all_specgram_data, all_chars, all_words = zip(*all_lists)
    wordset = list(set(all_words))
    train_matches = []
    valid_matches = []
    for w in wordset:
        matches = [n for n, i in enumerate(all_words) if i == w]
        # Hold out ~25% of the data, keeping some of every class
        train_matches.append(matches[:-4])
        valid_matches.append(matches[-4:])
    train_indices = np.array(sorted(
        [r for i in train_matches for r in i])).astype("int32")
    valid_indices = np.array(sorted(
        [r for i in valid_matches for r in i])).astype("int32")

    # reorganize into contiguous blocks
    def reorg(list_):
        ret = [list_[i] for i in train_indices] + [
            list_[i] for i in valid_indices]
        return np.asarray(ret)
    all_data = reorg(all_data)
    all_specgram_data = reorg(all_specgram_data)
    all_chars = reorg(all_chars)
    all_words = reorg(all_words)
    # after reorganizing finalize indices
    train_indices = np.arange(len(train_indices))
    valid_indices = np.arange(len(valid_indices)) + len(train_indices)
    return {"data": all_data,
            "specgrams": all_specgram_data,
            "target": all_chars,
            "target_names": all_words,
            "train_indices": train_indices,
            "valid_indices": valid_indices,
            "vocabulary_size": vocabulary_size,
            "vocabulary": all_vocabulary_chars}

Example 73

Project: OWASP-ZSC Source File: run.py
def getcommand(commands):

	backup_commands = commands
	crawler = 0
	command_path = ['zsc']
	command = ''
	while True:
		try:
			command = _input('/'.join(command_path), 'any', False)
			if command is None:
				_lets_error
		except:
			warn('interrupted by user!\nExit\n')
			sys.exit(0)
		check = True

		if command.startswith('#'): # allows for comments
			continue

		inContext = ['clear', 'help', 'about', 'version', 'back']
		for option in commands:
			if command == option and command not in inContext:
				crawler += 1
				if crawler is 1:
					commands = commands[option][1]
					command_path.append(option)
				if crawler is 2:
					if command == 'search':
						_search_shellcode(False,0)
						commands = backup_commands
						completer = autocomplete(commands)
						readline.set_completer(completer.complete)
						readline.parse_and_bind('tab: complete')
						crawler = 0
						command_path = ['zsc']
					elif command == 'download':
						_download_shellcode(False,0,'')
						commands = backup_commands
						completer = autocomplete(commands)
						readline.set_completer(completer.complete)
						readline.parse_and_bind('tab: complete')
						crawler = 0
						command_path = ['zsc']
					elif command == 'shell_storm_list':
						_grab_all()
						commands = backup_commands
						completer = autocomplete(commands)
						readline.set_completer(completer.complete)
						readline.parse_and_bind('tab: complete')
						crawler = 0
						command_path = ['zsc']
					elif command == 'generate':
						commands = commands[option]
						command_path.append(option)
					else:
						while True:
							f = []
							import os as OS
							for (dirpath, dirnames, filenames) in OS.walk('.'):
								f.extend(filenames)
								break
							completer = autocomplete(f)
							readline.set_completer(completer.complete)
							filename = _input('filename', 'any', True)
							completer = autocomplete(commands)
							readline.set_completer(completer.complete)
							try:
								content = open(filename, 'rb').read()
								break
							except:
								warn('sorry, cann\'t find file\n')
						commands = commands[option]
						command_path.append(option)
						completer = autocomplete(commands)
						readline.set_completer(completer.complete)
						readline.parse_and_bind('tab: complete')
						t = True
						while t:
							encode = _input('encode', 'any', True)
							for en in commands:
								if encode == en:
									t = False
							if t is True:
								warn('please enter a valid encode name\n')
						obf_code(option, encode, filename, content,False)
						commands = backup_commands
						completer = autocomplete(commands)
						readline.set_completer(completer.complete)
						readline.parse_and_bind('tab: complete')
						crawler = 0
						command_path = ['zsc']
				if crawler is 3:
					os = option
					commands = commands[option]
					command_path.append(option)
				if crawler is 4:
					func = option
					commands = commands[option]
					command_path.append(option)
				if crawler is 5:
					data = []
					backup_option = option
					if option != '':
						options = option.rsplit('&&')
						for o in options:
							data.append(_input(o,'any',True))
						n = 0
						write('\n')
						for o in options:
							info('%s set to "%s"\n' % (o, data[n]))
							n += 1
					run = getattr(
						__import__('lib.generator.%s.%s' % (os, func),
								   fromlist=['run']),
						'run')
					shellcode = run(data)
					write('\n')
					for encode in backup_commands['shellcode'][1]['generate'][
							os][func][backup_option]:
						info(encode + '\n')
					write('\n\n')
					info('enter encode type\n')
					completer = autocomplete(backup_commands['shellcode'][1][
						'generate'][os][func][backup_option])
					readline.set_completer(completer.complete)
					readline.parse_and_bind('tab: complete')
					try:
						encode = _input('/'.join(command_path) + "/encode_type", 'any', False)
						if encode is None:
							_lets_error
					except:
						encode = 'none'
						warn(
							'\n"none" encode selected\n')
					write('\n')
					assembly_code_or_not = _input(
						'Output assembly code?(y or n)', 'any', True)
					if assembly_code_or_not == 'y':
						assembly_code = True
					else:
						assembly_code = False
					if assembly_code is True:
						write('\n'+encode_process(encode, shellcode, os, func) + '\n\n')
					output_shellcode = _input('Output shellcode to screen?(y or n)', 'any', True)
					shellcode_op = op( encode_process(encode, shellcode, os, func), os)
					if output_shellcode == 'y':
						info('Generated shellcode is:\n' + shellcode_op +'\n\n')
					file_or_not = _input('Shellcode output to a .c file?(y or n)', 'any', True)
					if file_or_not == 'y':
						target = _input('Target .c file?', 'any', True)
						file_output(target, func, data, os, encode, shellcode, shellcode_op)
					commands = backup_commands
					completer = autocomplete(commands)
					readline.set_completer(completer.complete)
					readline.parse_and_bind('tab: complete')
					crawler = 0
					command_path = ['zsc']
				completer = autocomplete(commands)
				readline.set_completer(completer.complete)
				readline.parse_and_bind('tab: complete')
				check = False
		if command == 'exit' or command == 'quit':
			write(color.color('reset'))
			sys.exit('Exit')
		elif command == 'update':
			_update(__version__)
			commands = backup_commands
			completer = autocomplete(commands)
			readline.set_completer(completer.complete)
			readline.parse_and_bind('tab: complete')
			crawler = 0
			command_path = ['zsc']
		elif command == 'help':
			_help(help)
		elif command == 'restart':
			commands = backup_commands
			completer = autocomplete(commands)
			readline.set_completer(completer.complete)
			readline.parse_and_bind('tab: complete')
			crawler = 0
			command_path = ['zsc']
		elif command == 'about':
			about()
		elif command == 'version':
			_version()
		elif command == 'clear':
			_clear()
		elif command == 'back':
			if len(command_path) > 1:
				command_path.pop()
				commands = backup_commands
				for option in command_path:
					if option == 'zsc':
						pass
					elif option == command_path[1]:
						commands = commands[option][1]
					else:
						commands = commands[option]
				completer = autocomplete(commands)
				readline.set_completer(completer.complete)
				readline.parse_and_bind('tab: complete')
				crawler -= 1
			else:
				info('Can\'t go back from here!\n')
		else:
			if command != '' and check is True:
				info('Command not found!\n')

Example 74

Project: dexy Source File: process.py
    def add_new_files(self):
        """
        Walk working directory and add a new dexy docuement for every newly
        created file found.
        """
        wd = self.workspace()
        self.log_debug("adding new files found in %s for %s" % (wd, self.key))

        add_new_files = self.setting('add-new-files')
        if isinstance(add_new_files, basestring):
            add_new_files = [add_new_files]

        exclude = self.setting('exclude-add-new-files')
        skip_dirs = self.setting('exclude-new-files-from-dir')

        if isinstance(exclude, basestring):
            raise dexy.exceptions.UserFeedback("exclude-add-new-files should be a list, not a string")

        new_files_added = 0
        for dirpath, subdirs, filenames in os.walk(wd):
            # Prune subdirs which match exclude.
            subdirs[:] = [d for d in subdirs if d not in skip_dirs]

            # Iterate over files in directory.
            for filename in filenames:
                filepath = os.path.normpath(os.path.join(dirpath, filename))
                relpath = os.path.relpath(filepath, wd)
                self.log_debug("Processing %s" % filepath)

                if relpath in self._files_workspace_populated_with:
                    # already have this file
                    continue

                if isinstance(add_new_files, list):
                    is_valid_file_extension = False
                    for pattern in add_new_files:
                        if "*" in pattern:
                            if fnmatch.fnmatch(relpath, pattern):
                                is_valid_file_extension = True
                                continue
                        else:
                            if filename.endswith(pattern):
                                is_valid_file_extension = True
                                continue

                    if not is_valid_file_extension:
                        msg = "Not adding filename %s, does not match patterns: %s"
                        args = (filepath, ", ".join(add_new_files))
                        self.log_debug(msg % args)
                        continue

                elif isinstance(add_new_files, bool):
                    if not add_new_files:
                        msg = "add_new_files method should not be called if setting is False"
                        raise dexy.exceptions.InternalDexyProblem(msg)
                    is_valid_file_extension = True

                else:
                    msg = "add-new-files setting should be list or boolean. Type is %s value is %s"
                    args = (add_new_files.__class__, add_new_files,)
                    raise dexy.exceptions.InternalDexyProblem(msg % args)

                # Check if should be excluded.
                skip_because_excluded = False
                for skip_pattern in exclude:
                    if skip_pattern in filepath:
                        msg = "skipping adding new file %s because it matches exclude %s"
                        args = (filepath, skip_pattern,)
                        self.log_debug(msg % args)
                        skip_because_excluded = True
                        continue

                if skip_because_excluded:
                    continue

                if not is_valid_file_extension:
                    raise Exception("Should not get here unless is_valid_file_extension")

                self.log_debug("Adding %s" % filepath)
                with open(filepath, 'rb') as f:
                    contents = f.read()
                self.add_doc(relpath, contents)
                new_files_added += 1

        if new_files_added > 10:
            self.log_warn("%s additional files added" % (new_files_added))

Example 75

Project: captchacker2 Source File: characters_train_test_SVM.py
Function: generate_simulation_based_model
def generate_simulation_based_model(KERNEL = SIGMOID,TRAINING_FOLDER = 'DBTraining-Simulation_based'):
    CRANGE = [1000]

    for C in CRANGE:
        MODEL_FILE = "simulation_based_NEW_C="+str(C)+"_KERNEL="+str(KERNEL)+".svm"
        print MODEL_FILE
        print """
        ##############################################################################
        ############################    TRAINING    ##################################
        ##############################################################################
        """

        labels = []
        samples = []

        print "LOADING IMAGES..."

        train_elem = '3de2mt'

        #Train everything
        train_elem = ''
        print TRAINING_FOLDER
        for folder, subfolders, files in os.walk(TRAINING_FOLDER):
            if (folder[0] != ".") and (folder[-1] in train_elem or train_elem == ''):
                loaded = False
                for file in [file for file in files if 'bmp' in file]:
                    if not loaded:
                        print "folder", folder, "loaded"
                        loaded = True
                    im = Image.open(os.path.join(folder, file))
                    #print ord(folder[-1])-65
                    labels.append(ord(folder[-1])-65)
                    #print map(lambda e:e/255., list(im.getdata()))
                    #samples.append(map(lambda e:e/255., list(im.getdata())))
                    samples.append(list(im.point(lambda i: (i/255.)).getdata()))
        print "Done.\n"

        print "GENERATING MODEL..."

        problem = svm_problem(labels, samples);
        size = len(samples)

        #param = svm_parameter(C = 10,nr_weight = 2,weight_label = [1,0],weight = [10,1], probability=1)
        #param = svm_parameter(kernel_type = KERNEL, C=C, probability = 1)
        param = svm_parameter('-t %s -c %s -b %s' % (KERNEL, C, 1))


        #kernels : LINEAR, POLY, RBF, and SIGMOID
        #types : C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, and NU_SVR

        #model = svm_model(problem,param)
        #model = libsvm.svm_train(problem, param)
        #model = toPyModel(model)
        model = svm_train(problem, param)
        
        #model.save(os.path.join(MODEL_FOLDER, MODEL_FILE))
        svm_save_model(os.path.join(MODEL_FOLDER, MODEL_FILE),model)

        print "Done.\n" 
        return  os.path.join(MODEL_FOLDER, MODEL_FILE)

Example 76

Project: edx-platform Source File: xml_importer.py
Function: import_static_content
def import_static_content(
        course_data_path, static_content_store,
        target_id, subpath='static', verbose=False):

    remap_dict = {}

    # now import all static assets
    static_dir = course_data_path / subpath
    try:
        with open(course_data_path / 'policies/assets.json') as f:
            policy = json.load(f)
    except (IOError, ValueError) as err:
        # xml backed courses won't have this file, only exported courses;
        # so, its absence is not really an exception.
        policy = {}

    verbose = True

    mimetypes.add_type('application/octet-stream', '.sjson')
    mimetypes.add_type('application/octet-stream', '.srt')
    mimetypes_list = mimetypes.types_map.values()

    for dirname, _, filenames in os.walk(static_dir):
        for filename in filenames:

            content_path = os.path.join(dirname, filename)

            if re.match(ASSET_IGNORE_REGEX, filename):
                if verbose:
                    log.debug('skipping static content %s...', content_path)
                continue

            if verbose:
                log.debug('importing static content %s...', content_path)

            try:
                with open(content_path, 'rb') as f:
                    data = f.read()
            except IOError:
                if filename.startswith('._'):
                    # OS X "companion files". See
                    # http://www.diigo.com/annotated/0c936fda5da4aa1159c189cea227e174
                    continue
                # Not a 'hidden file', then re-raise exception
                raise

            # strip away leading path from the name
            fullname_with_subpath = content_path.replace(static_dir, '')
            if fullname_with_subpath.startswith('/'):
                fullname_with_subpath = fullname_with_subpath[1:]
            asset_key = StaticContent.compute_location(target_id, fullname_with_subpath)

            policy_ele = policy.get(asset_key.path, {})

            # During export display name is used to create files, strip away slashes from name
            displayname = escape_invalid_characters(
                name=policy_ele.get('displayname', filename),
                invalid_char_list=['/', '\\']
            )
            locked = policy_ele.get('locked', False)
            mime_type = policy_ele.get('contentType')

            # Check extracted contentType in list of all valid mimetypes
            if not mime_type or mime_type not in mimetypes_list:
                mime_type = mimetypes.guess_type(filename)[0]   # Assign guessed mimetype
            content = StaticContent(
                asset_key, displayname, mime_type, data,
                import_path=fullname_with_subpath, locked=locked
            )

            # first let's save a thumbnail so we can get back a thumbnail location
            thumbnail_content, thumbnail_location = static_content_store.generate_thumbnail(content)

            if thumbnail_content is not None:
                content.thumbnail_location = thumbnail_location

            # then commit the content
            try:
                static_content_store.save(content)
            except Exception as err:
                log.exception(u'Error importing {0}, error={1}'.format(
                    fullname_with_subpath, err
                ))

            # store the remapping information which will be needed
            # to subsitute in the module data
            remap_dict[fullname_with_subpath] = asset_key

    return remap_dict

Example 77

Project: RITSAR Source File: phsRead.py
def AFRL(directory, pol, start_az, n_az=3):
##############################################################################
#                                                                            #
#  This function reads in the AFRL *.mat files from the user supplied        #
#  directory and exports both the phs and a Python dictionary compatible     #
#  with ritsar.                                                              #
#                                                                            #
##############################################################################
    
    #Check Python version
    version = sys.version_info   
    
    #Get filenames
    walker = os.walk(directory+'/'+pol)
    if version.major < 3:
        w = walker.next()
    else:
        w = walker.__next__()
    prefix = '/'+pol+'/'+w[2][0][0:19]
    az_str = []
    fnames = []
    az = np.arange(start_az, start_az+n_az)
    [az_str.append(str('%03d_'%a))      for a in az]
    [fnames.append(directory+prefix+a+pol+'.mat') for a in az_str]
    
    #Grab n_az phase histories
    phs = []; platform = []
    for fname in fnames:
        #Convert MATLAB structure to Python dictionary
        MATdata = loadmat(fname)['data'][0][0]
        
        data =\
        {
        'fp'    :   MATdata[0],
        'freq'  :   MATdata[1][:,0],
        'x'     :   MATdata[2].T,
        'y'     :   MATdata[3].T,
        'z'     :   MATdata[4].T,
        'r0'    :   MATdata[5][0],
        'th'    :   MATdata[6][0],
        'phi'   :   MATdata[7][0],
        }
        
        #Define phase history
        phs_tmp     = data['fp'].T
        phs.append(phs_tmp)
        
        #Transform data to be compatible with ritsar
        c           = 299792458.0
        nsamples    = int(phs_tmp.shape[1])
        npulses     = int(phs_tmp.shape[0])
        freq        = data['freq']
        pos         = np.hstack((data['x'], data['y'], data['z']))
        k_r         = 4*pi*freq/c
        B_IF        = data['freq'].max()-data['freq'].min()
        delta_r     = c/(2*B_IF)
        delta_t     = 1.0/B_IF
        t           = np.linspace(-nsamples/2, nsamples/2, nsamples)*delta_t
        
        chirprate, f_0, r, p, s\
                    = linregress(t, freq)
                    
        #Vector to scene center at synthetic aperture center
        if np.mod(npulses,2)>0:
            R_c = pos[npulses/2]
        else:
            R_c = np.mean(
                    pos[npulses/2-1:npulses/2+1],
                    axis = 0)
        
        #Save values to dictionary for export
        platform_tmp = \
        {
            'f_0'       :   f_0,
            'freq'      :   freq,
            'chirprate' :   chirprate,
            'B_IF'      :   B_IF,
            'nsamples'  :   nsamples,
            'npulses'   :   npulses,
            'pos'       :   pos,
            'delta_r'   :   delta_r,
            'R_c'       :   R_c,
            't'         :   t,
            'k_r'       :   k_r,
        }
        platform.append(platform_tmp)
    
    #Stack data from different azimuth files
    phs = np.vstack(phs)
    npulses = int(phs.shape[0])
    
    pos = platform[0]['pos']
    for i in range(1, n_az):
        pos = np.vstack((pos, platform[i]['pos']))
                       
    if np.mod(npulses,2)>0:
        R_c = pos[npulses/2]
    else:
        R_c = np.mean(
                pos[npulses/2-1:npulses/2+1],
                axis = 0)
                       
    #Replace Dictionary values
    platform = platform_tmp
    platform['npulses'] =   npulses
    platform['pos']     =   pos
    platform['R_c']     =   R_c
    
    #Synthetic aperture length
    L = norm(pos[-1]-pos[0])

    #Add k_y
    platform['k_y'] = np.linspace(-npulses/2,npulses/2,npulses)*2*pi/L
    
    return(phs, platform)

Example 78

Project: cgat Source File: clean.py
Function: main
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id: clean.py 2782 2009-09-10 11:40:29Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-g", "--glob", dest="glob_pattern", type="string",
                      help="glob pattern to use for collecting files [%default].")

    parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true",
                      help="only print out actions, do not execute them [%default].")

    parser.add_option("-f", "--file-pattern", dest="file_pattern", type="string",
                      help="only check files matching this pattern [%default].")

    parser.set_defaults(glob_pattern="data.dir",
                        file_pattern=".out",
                        check_completeness="python",
                        skip_dirs=[],
                        dry_run=False,
                        )

    (options, args) = E.Start(parser,
                              add_pipe_options=True)

    if args:
        starts = args
    elif options.glob_pattern:
        starts = glob.glob(options.glob_pattern)
    else:
        starts = "."

    ndirs, nfiles, ndeleted = 0, 0, 0

    if options.check_completeness == "python":
        isComplete = checkPythonRuns

    rx = re.compile(options.file_pattern)

    for start in starts:
        for root, dirs, files in os.walk(start):

            ndirs += 1
            # exclude directories
            for dir in options.skip_dirs:
                if dir in dirs:
                    dirs.remove(dir)

            for filename in files:
                p = os.path.join(root, filename)
                if rx.search(filename) and not isComplete(p):
                    if options.dry_run:
                        options.stdlog.write("# removing file %s\n" % p)
                    else:
                        os.remove(p)
                    ndeleted += 1

    if options.loglevel >= 1:
        options.stdlog.write("# ndirs=%i, nfiles=%i, ndeleted=%i\n" %
                             (ndirs, nfiles, ndeleted))

    E.Stop()

Example 79

Project: sparknotebook Source File: spark_ec2.py
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
  active_master = master_nodes[0].public_dns_name

  num_disks = get_num_disks(opts.instance_type)
  hdfs_data_dirs = "/mnt/ephemeral-hdfs/data"
  mapred_local_dirs = "/mnt/hadoop/mrlocal"
  spark_local_dirs = "/mnt/spark"
  if num_disks > 1:
    for i in range(2, num_disks + 1):
      hdfs_data_dirs += ",/mnt%d/ephemeral-hdfs/data" % i
      mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i
      spark_local_dirs += ",/mnt%d/spark" % i

  cluster_url = "%s:7077" % active_master

  if "." in opts.spark_version:
    # Pre-built spark & shark deploy
    (spark_v, shark_v) = get_spark_shark_version(opts)
  else:
    # Spark-only custom deploy
    spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version)
    shark_v = ""
    modules = filter(lambda x: x != "shark", modules)

  template_vars = {
    "master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
    "active_master": active_master,
    "slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]),
    "cluster_url": cluster_url,
    "hdfs_data_dirs": hdfs_data_dirs,
    "mapred_local_dirs": mapred_local_dirs,
    "spark_local_dirs": spark_local_dirs,
    "swap": str(opts.swap),
    "modules": '\n'.join(modules),
    "spark_version": spark_v,
    "shark_version": shark_v,
    "hadoop_major_version": opts.hadoop_major_version,
    "metastore_user": "hive",
    "metastore_passwd": ''.join(random.SystemRandom().choice(string.uppercase + string.digits) for _ in xrange(10)),
    "spark_worker_instances": "%d" % opts.worker_instances,
    "spark_master_opts": opts.master_opts
  }

  # Create a temp directory in which we will place all the files to be
  # deployed after we substitue template parameters in them
  print root_dir
  tmp_dir = tempfile.mkdtemp()
  for path, dirs, files in os.walk(root_dir):
    if path.find(".svn") == -1:
      dest_dir = os.path.join('/', path[len(root_dir):])
      local_dir = tmp_dir + dest_dir
      if not os.path.exists(local_dir):
        os.makedirs(local_dir)
      for filename in files:
        if filename[0] not in '#.~' and filename[-1] != '~':
          dest_file = os.path.join(dest_dir, filename)
          local_file = tmp_dir + dest_file
          with open(os.path.join(path, filename)) as src:
            with open(local_file, "w") as dest:
              text = src.read()
              for key in template_vars:
                text = text.replace("{{" + key + "}}", template_vars[key])
              dest.write(text)
              dest.close()
  # rsync the whole directory over to the master machine
  command = [
      'rsync', '-rv',
      '-e', stringify_command(ssh_command(opts)),
      "%s/" % tmp_dir,
      "%s@%s:/" % (opts.user, active_master)
    ]
  subprocess.check_call(command)
  # Remove the temp directory we created above
  shutil.rmtree(tmp_dir)
  print tmp_dir

Example 80

Project: ursula Source File: ceph_bcache.py
Function: main
def main():
    module = AnsibleModule(
        argument_spec=dict(
            disks=dict(type='list',required=True),
            ssd_device=dict(required=True),
            journal_guid=dict(required=True),
        ),
    )
    disks = module.params.get('disks')
    ssd_device = module.params.get('ssd_device')
    journal_guid = module.params.get('journal_guid')
    changed = False
    uuids_in_order = [None] * len(disks)

    # the disks have symlinks to /dev/bcacheX. we need the disks
    # in increasing order by X.
    for subdir, dirs, files in os.walk('/dev/disk/by-uuid/'):
      for uuid in files:
        disk = os.path.join(subdir, uuid)
        path = os.path.realpath(disk)

        if 'bcache' in path:
          bcache_index = int(path[len(path)-1:])
          uuids_in_order.pop(bcache_index)
          uuids_in_order.insert(bcache_index,uuid)

    for i in range(0, len(uuids_in_order)):

      # running this command with the uuid argument will return the same value each time
      cmd = ['ceph', 'osd', 'create', uuids_in_order[i]]
      rc, out, err = module.run_command(cmd, check_rc=True)
      osd_id = out.rstrip()

      # if first time running 'ceph osd create' against this uuid, create the osd dir
      # and handle rest of activation. if directory exists, the device has already
      # been activated.
      if not os.path.exists('/var/lib/ceph/osd/ceph-' + osd_id):
        os.makedirs('/var/lib/ceph/osd/ceph-' + osd_id)
        changed = True

        bcache_index = int(osd_id) % len(disks)
        partition_index = bcache_index + 1

        cmd = ['mount', '/dev/bcache' + str(bcache_index), '/var/lib/ceph/osd/ceph-' + osd_id]
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['ceph-osd', '-i', osd_id, '--mkfs', '--mkkey', '--osd-uuid', uuids_in_order[i]]
        rc, out, err = module.run_command(cmd, check_rc=True)

        os.remove('/var/lib/ceph/osd/ceph-' + osd_id + '/journal')

        cmd = ['chown', 'ceph:ceph', '/dev/' + ssd_device + str(partition_index)]
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['sgdisk', '-t', str(partition_index) + ':' + journal_guid, '/dev/' + ssd_device]
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['ln', '-s', '/dev/' + ssd_device + str(partition_index), '/var/lib/ceph/osd/ceph-' + osd_id + '/journal']
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['ceph-osd', '-i', osd_id, '--mkjournal']
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['umount', '/var/lib/ceph/osd/ceph-' + osd_id]
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['ceph-disk', 'activate', '/dev/bcache' + str(bcache_index)]
        rc, out, err = module.run_command(cmd, check_rc=True)

        cmd = ['chown', '-R', 'ceph:ceph', '/var/lib/ceph/osd/ceph-' + osd_id]
        rc, out, err = module.run_command(cmd, check_rc=True)

        with open("/etc/fstab", "a") as fstab:
          fstab.write('UUID=' + uuids_in_order[i] + ' /var/lib/ceph/osd/ceph-' + osd_id + ' xfs defaults,noatime,largeio,inode64,swalloc 0 0\n')

    module.exit_json(changed=changed)

Example 81

Project: qibuild Source File: sh.py
def iter_directory(directory, filter_fun=None, all=False):
    """Returns a generator for all the files present in a directory,
    relative to this directory.

    Empty directories are ignored.

    By default, do not list hidden files and do not descend into
    hidden directories.

    You can use ``all=True`` to list all the files

    .. note:: Hidden in this context means "starting with a dot"

              (If you want to support MacOS or Windows hidden files
              you are on your own ...)

    If  ``filter_fun`` is given, it will be called with
    ``(filename, dirname)`` optional argument and should return True
    if the directory should be descended into or the filename should be yield

    For instance, with::

        foo
        |__ eggs
        |    |__ c
        |    |__ d
        |__ empty
        |__ spam
            |__a
            |__b

    ``iter_directory(foo)`` yields::

        ["eggs/c", "eggs/d", "spam/a", "spam/b"]

    Note that paths will always be POSIX, even on Windows

    """
    def non_hidden(filename=None, dirname=None):
        if filename:
            return not filename.startswith(".")
        if dirname:
            return not dirname.startswith(".")

    def filter_none(filename=None, dirname=None):
        return True

    if not filter_fun:
        if all:
            filter_fun = filter_none
        else:
            filter_fun = non_hidden

    res = list()
    for root, dirs, files in os.walk(directory, topdown=True):
        new_root = os.path.relpath(root, directory)
        new_root = qisys.sh.to_posix_path(new_root)
        dirs[:] = [x for x in dirs if filter_fun(dirname=x)]
        files = [x for x in files if filter_fun(filename=x)]
        if new_root == "." and not files:
            continue
        if new_root == "." and files:
            for f in files:
                yield f
            continue
        for f in files:
            yield posixpath.join(new_root, f)

Example 82

Project: tika-similarity Source File: similarity.py
def main(argv = None):
	if argv is None:
		argv = sys.argv

	try:
		try:
			opts, args = getopt.getopt(argv[1:], 'hvf:c:a:', ['help', 'verbose', 'directory=', 'file=', 'accept=' ])
		except getopt.error, msg:
			raise _Usage(msg)

		if len(opts) ==0:
			raise _Usage(_helpMessage)

		dirFile = ""
		filenames = []
		filename_list = []
		allowed_mime_types = []
		directory_flag = 0

		for option, value in opts:
			if option in ('-h', '--help'):
				raise _Usage(_helpMessage)

			elif option in ('-c', '--file'):
				#extract file names from command line
				if '-c' in argv :
					index_of_file_option = argv.index('-c')
				else :
					index_of_file_option = argv.index('--file')
				filenames = argv[index_of_file_option+1 : ]

			elif option in ('-f', '--directory'):
				dirFile = value
				directory_flag = 1
				for root, dirnames, files in os.walk(dirFile):
					dirnames[:] = [d for d in dirnames if not d.startswith('.')]
					for filename in files:
						if not filename.startswith('.'):							
							filename_list.append(os.path.join(root, filename))

			elif option in ('--accept'):
				#extract accepted mime types from command line
				index_of_mime_type_option = argv.index('--accept')
				allowed_mime_types = argv[index_of_mime_type_option+1 : ]
		
			elif option in ('-v', '--verbose'):
				global _verbose
				_verbose = True

		#format filename
		if directory_flag == 0:
			filenames = [x.strip() for x in filenames]
			filenames = [filenames[k].strip('\'\n') for k in range(len(filenames))]
			for filename in filenames:
				if not os.path.isfile(os.path.join(dirFile, filename)):
					continue
				filename = os.path.join(dirFile, filename) if dirFile else filename
				filename_list.append(filename)

		if len(filename_list) <2 :
			raise _Usage("you need to type in at least two valid files")

		#allow only files with specifed mime types
		if len(allowed_mime_types) != 0:
			filename_list = [filename for filename in filename_list if parser.from_file(filename) and str(parser.from_file(filename)['metadata']['Content-Type'].encode('utf-8')).split('/')[-1] in allowed_mime_types]
		else:
			print "Accepting all MIME Types....."

		union_feature_names = set()
		file_parsed_data = {}
		resemblance_scores = {}

		#count similarity for two given files
		for filename in filename_list:
			# first compute the union of all features
			try:
				parsedData = parser.from_file(filename)
				filename_stripped = filename.replace(",","")
				if parsedData:
					file_parsed_data[filename_stripped] = parsedData["metadata"]
					union_feature_names = union_feature_names | set(parsedData["metadata"].keys())
			except ConnectionError:
				sleep(1)
			except KeyError:
				continue

		total_num_features = len(union_feature_names)



		# now compute the specific resemblance and containment scores
		for filename in file_parsed_data:
			overlap = {}
			overlap = set(file_parsed_data[filename].keys()) & set(union_feature_names)
			resemblance_scores[filename] = float(len(overlap))/total_num_features

		sorted_resemblance_scores = sorted(resemblance_scores.items(), key=operator.itemgetter(1), reverse=True)

		'''print "Resemblance:\n"
		for tuple in sorted_resemblance_scores:
			print tuple[0]+","+str(tuple[1]) + "," + convertUnicode(file_parsed_data[tuple[0]])+'\n'''
		with open("similarity-scores.txt", "w") as f:
			f.write("Resemblance : \n")
			for tuple in sorted_resemblance_scores:
                            f.write(os.path.basename(tuple[0].rstrip(os.sep)) + ","+str(tuple[1]) + "," + tuple[0] + "," + convertUnicode(file_parsed_data[tuple[0]]) + '\n')

	except _Usage, err:
		print >>sys.stderr, sys.argv[0].split('/')[-1] + ': ' + str(err.msg)
		return 2

Example 83

Project: C-PAC Source File: build_sublist.py
def return_bids_template(base_dir, scan_type, creds_path=None):
    '''
    Function that returns the path template of the desired scan type
    from a BIDS dataset

    Parameters
    ----------
    base_dir : string
        base directory of the BIDS dataset
    scan_type : string
        type of scan; e.g. 'anat', 'func', etc.
    creds_path : string (optional); default=None
        filepath to a set of AWS credentials to access a BIDS dataset
        stored on S3 that isn't public

    Returns
    -------
    file_template : string
        regular expression-compatible file template indicating data
        path organization
    '''

    # Import packages
    import os
    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    file_path = None

    # If base directory is in S3
    if base_dir.startswith(s3_str):
        bucket_name = base_dir.split('/')[2]
        s3_prefix = '/'.join(base_dir.split('/')[:3])

        # Extract base prefix to search through in S3
        prefix = base_dir.split('*')[0].replace(s3_prefix, '').lstrip('/')

        # Attempt to get bucket
        try:
            bucket = fetch_creds.return_bucket(creds_path, bucket_name)
        except Exception as exc:
            err_msg = 'There was an error in retrieving S3 bucket: %s.\nError: %s'\
                      %(bucket_name, exc)
            raise Exception(err_msg)

        # Get filepaths from S3 with prefix
        print 'Gathering files from S3 to parse...'
        for s3_obj in bucket.objects.filter(Prefix=prefix):
            file_path = s3_obj.key
            scan_dir = file_path.split('/')[-2]
            if scan_dir == scan_type:
                break
    # Else, the base directory is locally stored
    else:
        for root, dirs, files in os.walk(base_dir):
            if file_path:
                break
            for fil in files:
                file_path = os.path.join(root, fil)
                scan_dir = file_path.split('/')[-2]
                if fil.endswith('.nii.gz') and scan_dir == scan_type:
                    break
                else:
                    file_path = None

    # Now replace file_path intermediate dirs with *
    if file_path:
        rel_path = file_path.replace(base_dir, '').lstrip('/')
        interm_dirs = rel_path.split('/')[:-2]
        for imd in interm_dirs:
            file_path = file_path.replace(imd, '*')
    else:
        err_msg = 'Could not find any files in directory, check files!'
        raise Exception(err_msg)

    # Set template as any file *
    file_template = os.path.join(os.path.dirname(file_path), '*.nii.gz')

    # Return file pattern template
    return file_template

Example 84

Project: qiime Source File: multiple_split_libraries_fastq.py
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping,
            read_indicator, barcode_indicator, mapping_indicator)
    else:
        # Filter down files to only the target files, raise error if nothing found
        all_files = filter(all_fastq, read_indicator)
        if not all_files:
            raise ValueError,("No reads detected-please check the values indicated with "
                "the --read_indicator parameter. Set as '*' to include all files, or use "
                "a value such as '*fastqjoin.join*' to detect only the reads that are "
                "joined after join_paired_ends.py.")

    commands = create_commands_slf(all_files, demultiplexing_method, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)

Example 85

Project: ssf Source File: mergejs.py
Function: run
def run (sourceDirectory, outputFilename = None, configFile = None):
    cfg = None
    if configFile:
        cfg = Config(configFile)

    allFiles = []

    ## Find all the Javascript source files
    for root, dirs, files in os.walk(sourceDirectory):
        for filename in files:
            if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."):
                filepath = os.path.join(root, filename)[len(sourceDirectory)+1:]
                filepath = filepath.replace("\\", "/")
                if cfg and cfg.include:
                    if filepath in cfg.include or filepath in cfg.forceFirst:
                        allFiles.append(filepath)
                elif (not cfg) or (not undesired(filepath, cfg.exclude)):
                    allFiles.append(filepath)

    ## Header inserted at the start of each file in the output
    HEADER = "/* " + "=" * 70 + "\n    %s\n" + "   " + "=" * 70 + " */\n\n"

    files = {}

    ## Import file source code
    ## TODO: Do import when we walk the directories above?
    for filepath in allFiles:
        print "Importing: %s" % filepath
        fullpath = os.path.join(sourceDirectory, filepath).strip()
        content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
        files[filepath] = SourceFile(filepath, content) # TODO: Chop path?

    print

    from toposort import toposort

    complete = False
    resolution_pass = 1

    while not complete:
        complete = True

        ## Resolve the dependencies
        print "Resolution pass %s... " % resolution_pass
        resolution_pass += 1 

        for filepath, info in files.items():
            for path in info.requires:
                if not files.has_key(path):
                    complete = False
                    fullpath = os.path.join(sourceDirectory, path).strip()
                    if os.path.exists(fullpath):
                        print "Importing: %s" % path
                        content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?
                        files[path] = SourceFile(path, content) # TODO: Chop path?
                    else:
                        raise MissingImport("File '%s' not found (required by '%s')." % (path, filepath))
        
    # create dictionary of dependencies
    dependencies = {}
    for filepath, info in files.items():
        dependencies[filepath] = info.requires

    print "Sorting..."
    order = toposort(dependencies) #[x for x in toposort(dependencies)]

    ## Move forced first and last files to the required position
    if cfg:
        print "Re-ordering files..."
        order = cfg.forceFirst + [item
                     for item in order
                     if ((item not in cfg.forceFirst) and
                         (item not in cfg.forceLast))] + cfg.forceLast
    
    print
    ## Output the files in the determined order
    result = []

    for fp in order:
        f = files[fp]
        print "Exporting: ", f.filepath
        result.append(HEADER % f.filepath)
        source = f.source
        result.append(source)
        if not source.endswith("\n"):
            result.append("\n")

    print "\nTotal files merged: %d " % len(files)

    if outputFilename:
        print "\nGenerating: %s" % (outputFilename)
        open(outputFilename, "w").write("".join(result))
    return "".join(result)

Example 86

Project: neural-fuzzer Source File: triage.py
def triage(cmd, seeds, depth=5, prune=False):
    #gdb_cmd = "env -i ASAN_OPTIONS='abort_on_error=1' gdb -batch -ex 'tty /dev/null' -ex run -ex bt 20 --args @@ 2> /dev/null"
    gdb_cmd = "env -i ASAN_OPTIONS='abort_on_error=1' gdb -batch -ex run -ex 'bt 20' --args @@ 2> /dev/null"

    all_files = []
    dedup_files = dict()

    for x, y, files in os.walk(seeds):
        nfiles = len(files)
        for f in files:
            f = f.replace("(","\(")
            f = f.replace(")","\)")
            f = f.replace("$","\$")
            f = f.replace(",","\,")

            all_files.append(x + "/".join(y) + "/" + f)


    random.shuffle(all_files)
    #all_files = all_files[:1000]
    nfiles = len(all_files)

    for progress, testcase in enumerate(all_files):
        prepared_cmd = cmd.split("@@")
        prepared_cmd = prepared_cmd[0].split(
            " ") + [testcase] + prepared_cmd[1].split(" ")
        prepared_cmd = remove_nils(prepared_cmd)
        #print prepared_cmd
        out = subprocess.check_output(gdb_cmd.replace(
            "@@", " ".join(prepared_cmd)), shell=True)
        #print out
        backtrace = out.split("#")[1:]
        key = ""
        size = os.path.getsize(testcase)
        dkey = 0
        for x in backtrace:

            if dkey == depth:
                break

            if "??" in x or "__" in x:
                continue
            if " in " in x:
                x = remove_nils(x.split(" "))
                key = key + " " + x[3]
                dkey = dkey + 1

            else:
                x = remove_nils(x.split(" "))
                key = key + " " + x[1]
                dkey = dkey + 1

        # print key
        y = dedup_files.get(key, [])
        dedup_files[key] = y + [(testcase,size)]

    out = dict()

    for (k, xs) in dedup_files.items():
        #print "*"+k,
        xs = sorted(xs, key=lambda x: x[1])
        for x in xs[:1]:
            out[k] = x

        #if prune:
        #    for x in xs[1:]:
        #        os.remove(x[0])

        #print ""
    #print out
    return out

Example 87

Project: courtlistener Source File: import_law_box.py
def main():
    parser = argparse.ArgumentParser(
        description='Import the corpus provided by lawbox')
    parser.add_argument('-s', '--simulate', default=False, required=False,
                        action='store_true',
                        help='Run the code in simulate mode, making no permanent changes.')
    parser.add_argument('-d', '--dir', type=readable_dir,
                        help='The directory where the lawbox bulk data can be found.')
    parser.add_argument('-f', '--file', type=str, default="index.txt",
                        required=False, dest="file_name",
                        help="The file that has all the URLs to import, one per line.")
    parser.add_argument('-l', '--line', type=int, default=1, required=False,
                        help='If provided, this will be the line number in the index file where we resume processing.')
    parser.add_argument('-r', '--resume', default=False, required=False,
                        action='store_true',
                        help='Use the saved marker to resume operation where it last failed.')
    parser.add_argument('-x', '--random', default=False, required=False,
                        action='store_true',
                        help='Pick cases randomly rather than serially.')
    parser.add_argument('-m', '--marker', type=str,
                        default='lawbox_progress_marker.txt', required=False,
                        help="The name of the file that tracks the progress (useful if multiple versions run at same time)")
    parser.add_argument('-e', '--end', type=int, required=False,
                        default=2000000,
                        help="An optional endpoint for an importer.")
    args = parser.parse_args()

    if args.dir:
        def case_generator(dir_root):
            """Yield cases, one by one to the importer by recursing and iterating the import directory"""
            for root, dirnames, filenames in os.walk(dir_root):
                for filename in fnmatch.filter(filenames, '*'):
                    yield os.path.join(root, filename)

        cases = case_generator(args.root)
        i = 0
    else:
        def generate_random_line(file_name):
            while True:
                total_bytes = os.stat(file_name).st_size
                random_point = random.randint(0, total_bytes)
                f = open(file_name)
                f.seek(random_point)
                f.readline()  # skip this line to clear the partial line
                yield f.readline().strip()

        def case_generator(line_number):
            """Yield cases from the index file."""
            enumerated_line_number = line_number - 1  # The enumeration is zero-index, but files are one-index.
            index_file = open(args.file_name)
            for i, line in enumerate(index_file):
                if i >= enumerated_line_number:
                    yield line.strip()

        if args.random:
            cases = generate_random_line(args.file_name)
            i = 0
        elif args.resume:
            with open(args.marker) as marker:
                resume_point = int(marker.read().strip())
            cases = case_generator(resume_point)
            i = resume_point
        else:
            cases = case_generator(args.line)
            i = args.line

    for case_path in cases:
        if i % 1000 == 0:
            db.reset_queries()  # Else we leak memory when DEBUG is True

        if 'counter' in DEBUG:  # and i % 1000 == 0:
            log_print("\n%s: Doing case (%s): file://%s" % (
                datetime.datetime.now(), i, case_path))
        try:
            doc = import_law_box_case(case_path)
            duplicates = find_duplicates(doc, case_path)
            if not args.simulate:
                if len(duplicates) == 0:
                    doc.html_lawbox, blocked = anonymize(doc.html)
                    doc.html = ''
                    if blocked:
                        doc.blocked = True
                        doc.date_blocked = now()
                        # Save nothing to the index for now (it'll get done
                        # when we find citations)
                    doc.save(index=False)
                if len(duplicates) == 1:
                    dup_helpers.merge_cases_simple(doc, duplicates[0])
                if len(duplicates) > 1:
                    # complex_merge
                    if 'log_multimerge' in DEBUG:
                        with open('index_multimerge.txt', 'a') as log:
                            log.write('%s\n' % case_path)
            if args.resume:
                # Don't change the progress marker unless you're in resume mode
                with open(args.marker, 'w') as marker:
                    marker.write(
                        str(i + 1))  # Files are one-index, not zero-index
            with open('lawbox_fix_file.pkl', 'wb') as fix_file:
                pickle.dump(fixes, fix_file)
            i += 1
            if i == args.end:
                log_print(
                    "Hit the endpoint after importing number %s. Breaking." % i)
                break
        except Exception, err:
            log_print(traceback.format_exc())
            exit(1)

Example 88

Project: bokeh Source File: test_code_quality.py
def collect_errors():
    errors = []

    def test_this_file(fname, test_file):
        line = None

        for idx, line in enumerate(test_file):
            line = line.decode('utf-8')
            line_no = idx + 1

            if idx == 0 and len(line.strip()) == 0:
                errors.append((message_multi_bof, fname, line_no))
            if line.endswith(" \n") or line.endswith("\t\n"):
                errors.append((message_space, fname, line_no))
            if line.endswith("\r\n") or line.endswith("\r"):
                errors.append((message_carriage, fname, line_no))
            if tab_in_leading(line):
                errors.append((message_tabs, fname, line_no))
            #if len(line) > MAX_LINE_LENGTH:
            #    errors.append((message_too_long, fname, line_no))

        if line is not None:
            if idx > 0 and len(line.strip()) == 0:
                errors.append((message_multi_eof, fname, line_no))
            if not line.endswith('\n'):
                errors.append((message_eof, fname, line_no))

    def test(fname):
        with open(fname, "Urb") as test_file:
            test_this_file(fname, test_file)

    def canonicalize(path):
        return path.replace('/', sep)

    def check_tree(base_path, patterns, dir_exclusions=None, file_exclusions=None):
        dir_exclusions = dir_exclusions or []
        file_exclusions = file_exclusions or []
        base_path = join(TOP_PATH, canonicalize(base_path))
        dir_exclusions = set([ join(base_path, canonicalize(path)) for path in dir_exclusions ])

        for root, dirs, _ in walk(base_path):
            if root in dir_exclusions:
                del dirs[:]
                continue

            for pattern in patterns:
                files = glob(join(root, pattern))
                check_files(files, file_exclusions)

    def check_files(files, file_exclusions=None):
        file_exclusions = file_exclusions or []
        for fname in files:
            if not isabs(fname):
                fname = join(TOP_PATH, fname)

            if not exists(fname) or not isfile(fname):
                continue

            if basename(fname) in file_exclusions:
                continue

            test(fname)

    check_files(["setup.py"])
    check_tree('bin',          ['*'])
    check_tree('bokeh',        ['*.py', '*.html', '*.js'], ["server/static"], ["__conda_version__.py"])
    check_tree('bokehjs',      ['*.coffee', '*.js', '*.ts', '*.less', '*.css', '*.json'], ['build', 'node_modules', 'src/vendor', 'typings'])
    check_tree('conda.recipe', ['*.py', '*.sh', '*.yaml'])
    check_tree('examples',     ['*.py', '*.ipynb'])
    check_tree('scripts',      ['*.py', '*.sh'])
    check_tree('sphinx',       ['*.rst', '*.py'], ['_build', 'source/docs/gallery'])
    check_tree('tests',        ['*.py', '*.js'])

    return errors

Example 89

Project: mtpy Source File: EDLmakedayfiles.py
Function: main
def main():

    if len(sys.argv) < 3:
        sys.exit('\nNeed at least 2 arguments: \n\n '
            '<path to files> \n <sampling in seconds> \n\n'
            '[optional: <output dir>] \n [optional: <stationname>]\n'
            '[optional: <recursive flag -R>]\n'
            '(set this option for including all subfolders)\n\n')

    outdir = None
    stationname = None
    recursive = False

    multiple_stations = False

    if len(sys.argv) > 3:
        optionals = sys.argv[3:]
        for o in optionals:
            o = o.strip()
            if o[0] == '-':
                if o[1].lower() == 'r':
                    recursive = True
                continue
            elif outdir is None:
                outdir = o
                continue
            elif stationname is None:
                stationname = o 
                continue
    
    if stationname is not None:
        #check, if it's actually a comma-separated list:
        if 1:
            stationlist = stationname.split(',')
            if len(stationlist) > 1:
                multiple_stations = True
                stationlist = [i.upper() for i in stationlist]
        # except:
        #     stationlist = [stationname]
    else: stationlist = [None]

    print stationlist 

    pathname_raw = sys.argv[1]
    pathname = op.abspath(op.realpath(pathname_raw))

    if not op.isdir(pathname):
        sys.exit('Data file(s) path not existing: {0}'.format(pathname))

    try:
        sampling = float(sys.argv[2])
        if sampling <= 0 : raise
    except:
        sys.exit('Second argument must be sampling interval in seconds (int/float)')

    if recursive is True:
        lo_files = []
        for i,j,k in os.walk(pathname):
            lof = [op.abspath(op.join(i,f)) for f in j]            
            if stationname is not None:
                for stationname in stationlist:                    
                    lof_station = [i for i in lof if stationname.lower() in i.lower()]
                    lo_files.extend(lof_station)
        pathname = list(set(lo_files))

    if len(pathname) == 0:
        sys.exit('\n\tERROR - No (sub-) folders for stations {0} found\n'.format(stationlist))
    
        
    for stationname in stationlist:
        print 'processing station ',stationname.upper()
        # if pathname[0] is not None:
        #     station_pathname = [i for i in pathname if stationname.lower() in i.lower()]
        #     if len(station_pathname) == 0:
        #         station_pathname = None
        # else:
        station_pathname = pathname
        
        try:
            MTfh.EDL_make_dayfiles(station_pathname, sampling, stationname.upper(), outdir)
        except MTex.MTpyError_inputarguments:
            if stationname is None:
                sys.exit('\n\tERROR - No data found in (sub-)folders\n')
            else:
                sys.exit('\n\tERROR - No data found in (sub-)folders for station {0}\n'.format(stationname.upper()))
        except:
            sys.exit('\n\tERROR - could not process (sub-)folders')

Example 90

Project: spark-cluster-deployment Source File: spark_ec2.py
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
  active_master = master_nodes[0].public_dns_name

  num_disks = get_num_disks(opts.instance_type)
  hdfs_data_dirs = "/mnt/ephemeral-hdfs/data"
  mapred_local_dirs = "/mnt/hadoop/mrlocal"
  spark_local_dirs = "/mnt/spark"
  if num_disks > 1:
    for i in range(2, num_disks + 1):
      hdfs_data_dirs += ",/mnt%d/ephemeral-hdfs/data" % i
      mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i
      spark_local_dirs += ",/mnt%d/spark" % i

  cluster_url = "%s:7077" % active_master

  if "." in opts.spark_version:
    # Pre-built spark & shark deploy
    (spark_v, shark_v) = get_spark_shark_version(opts)
  else:
    # Spark-only custom deploy
    spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version)
    shark_v = ""
    modules = filter(lambda x: x != "shark", modules)

  template_vars = {
    "master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
    "active_master": active_master,
    "slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]),
    "cluster_url": cluster_url,
    "hdfs_data_dirs": hdfs_data_dirs,
    "mapred_local_dirs": mapred_local_dirs,
    "spark_local_dirs": spark_local_dirs,
    "swap": str(opts.swap),
    "modules": '\n'.join(modules),
    "spark_version": spark_v,
    "shark_version": shark_v,
    "hadoop_major_version": opts.hadoop_major_version,
    "spark_worker_instances": "%d" % opts.worker_instances,
    "spark_master_opts": opts.master_opts
  }

  # Create a temp directory in which we will place all the files to be
  # deployed after we substitue template parameters in them
  tmp_dir = tempfile.mkdtemp()
  for path, dirs, files in os.walk(root_dir):
    if path.find(".svn") == -1:
      dest_dir = os.path.join('/', path[len(root_dir):])
      local_dir = tmp_dir + dest_dir
      if not os.path.exists(local_dir):
        os.makedirs(local_dir)
      for filename in files:
        if filename[0] not in '#.~' and filename[-1] != '~':
          dest_file = os.path.join(dest_dir, filename)
          local_file = tmp_dir + dest_file
          with open(os.path.join(path, filename)) as src:
            with open(local_file, "w") as dest:
              text = src.read()
              for key in template_vars:
                text = text.replace("{{" + key + "}}", template_vars[key])
              dest.write(text)
              dest.close()
  # rsync the whole directory over to the master machine
  command = [
      'rsync', '-rv',
      '-e', stringify_command(ssh_command(opts)),
      "%s/" % tmp_dir,
      "%s@%s:/" % (opts.user, active_master)
    ]
  subprocess.check_call(command)
  # Remove the temp directory we created above
  shutil.rmtree(tmp_dir)

Example 91

Project: mtpy Source File: EDLmakedayfiles.py
Function: main
def main():

    if len(sys.argv) < 3:
        sys.exit('\nNeed at least 4 arguments: \n\n '
            '<path to files> \n <sampling in seconds> \n'
            '<output dir> \n <stationname>\n'
            '[optional: <recursive flag -R>]\n'
            '(set this option for including all subfolders)\n\n')

    print 

    outdir = None
    stationname = None
    recursive = False

    multiple_stations = False

    if len(sys.argv) > 3:
        optionals = sys.argv[3:]
        for o in optionals:
            o = o.strip()
            if o[0] == '-':
                if o[1].lower() == 'r':
                    recursive = True
                continue
            elif outdir is None:
                outdir = o
                continue
            elif stationname is None:
                stationname = o 
                continue
    
    if stationname is not None:
        #check, if it's actually a comma-separated list:
        try:
            stationlist = stationname.split(',')
            if len(stationlist) > 1:
                multiple_stations = True
                stationlist = [i.upper() for i in stationlist]
        except:
            stationlist = [stationname]
    else: stationlist = [None]

    print stationlist 

    pathname_raw = sys.argv[1]
    pathname = op.abspath(op.realpath(pathname_raw))

    if not op.isdir(pathname):
        sys.exit('Data file(s) path not existing: {0}\n'.format(pathname))

    try:
        sampling = float(sys.argv[2])
        if sampling <= 0 : raise
    except:
        sys.exit('Second argument must be sampling interval in seconds (int/float)')

    if recursive is True:
        lo_folders = []
        for i,j,k in os.walk(pathname):
            lof = [op.abspath(op.join(i,f)) for f in j]            
            if stationname is not None:
                for stationname in stationlist:
                    for curr_folder in lof:
                        content_of_folder = os.listdir(curr_folder)
                        #print curr_folder
                        lof_station = [i for i in content_of_folder if stationname.lower() in i.lower()]
                        if len(lof_station) > 0 :
                            lo_folders.append(curr_folder)
        pathname = list(set(lo_folders))

    if len(pathname) == 0:
        sys.exit('\n\tERROR - No (sub-) folders for stations {0} found\n'.format(stationlist))


        
    for stationname in stationlist:
        print '....\n'
        print 'processing station ',stationname.upper()
        # if pathname[0] is not None:
        #     station_pathname = [i for i in pathname if stationname.lower() in i.lower()]
        #     if len(station_pathname) == 0:
        #         station_pathname = None
        # else:
        station_pathname = pathname
        
        try :
            MTfh.EDL_make_dayfiles(station_pathname, sampling, stationname.upper(), outdir)
        except MTex.MTpyError_inputarguments:
            if stationname is None:
                sys.exit('\n\tERROR - No data found in (sub-)folders\n')
            else:
                sys.exit('\n\tERROR - No data found in (sub-)folders for station {0}\n'.format(stationname.upper()))
        except MemoryError:
            sys.exit('\n\tERROR - Not enough memory to store temporary arrays!\n')
        except:
            sys.exit('\n\tERROR - could not process (sub-)folders')

    print '\n'

Example 92

Project: haoide Source File: package.py
Function: run
    def run(self, dirs):
        self.settings = context.get_settings()

        all_types = {}
        for _dir in dirs:
            for dirpath, dirnames, filenames in os.walk(_dir):
                for filename in filenames:
                    if filename.endswith("-meta.xml"): continue
                    if not filename.endswith(".xml"): continue

                    # Package file name
                    package_xml = os.path.join(dirpath, filename)

                    # Read package.xml content
                    with open(package_xml, "rb") as fp:
                        content = fp.read()

                    """ Combine types sample: [
                        {"ApexClass": ["test"]},
                        {"ApexTrigger": ["test"]}
                    ]
                    """
                    try:
                        _types = util.build_package_types(content)
                    except xml.parsers.expat.ExpatError as ee:
                        message = "%s parse error: %s" % (package_xml, str(ee))
                        Printer.get("error").write(message)
                        if not sublime.ok_cancel_dialog(message, "Skip?"): return
                        continue
                    except KeyError as ex:
                        if self.settings["debug_mode"]:
                            print ("%s is not valid package.xml" % package_xml)
                        continue

                    for _type in _types:
                        members = _types[_type]

                        if _type in all_types:
                            members.extend(all_types[_type])
                            members = list(set(members))
                        
                        all_types[_type] = sorted(members)

        if not all_types:
            Printer.get("error").write_start().write("No available package.xml to combine")
            return

        # print (json.dumps(all_types, indent=4))
        metadata_objects = []
        for _type in all_types:
            metadata_objects.append(
                "<types>%s<name>%s</name></types>" % (
                    "".join(["<members>%s</members>" % m for m in all_types[_type]]),
                    _type
                )
            )

        self.package_xml_content = """<?xml version="1.0" encoding="UTF-8"?>
            <Package xmlns="http://soap.sforce.com/2006/04/metadata">
                {metadata_objects}
                <version>{api_version}.0</version>
            </Package>
        """.format(
            metadata_objects="".join(metadata_objects),
            api_version=self.settings["api_version"]
        )

        package_path = os.path.join(dirs[0], "combined package.xml")
        sublime.active_window().show_input_panel("Input Package.xml Path", 
            package_path, self.on_input_package_path, None, None)

Example 93

Project: girder Source File: lib_test.py
    def testUploadCallbacks(self):
        callbackUser = self.model('user').createUser(
            firstName='Callback', lastName='Last', login='callback',
            password='password', email='[email protected]')
        callbackPublicFolder = six.next(self.model('folder').childFolders(
            parentType='user', parent=callbackUser, user=None, limit=1))
        callbackCounts = {'folder': 0, 'item': 0}
        folders = {}
        items = {}
        folders[self.libTestDir] = False
        folderCount = 1     # 1 for self.libTestDir
        item_count = 0
        for root, dirs, files in os.walk(self.libTestDir):
            for name in files:
                items[os.path.join(root, name)] = False
                item_count += 1
            for name in dirs:
                folders[os.path.join(root, name)] = False
                folderCount += 1

        def folderCallback(folder, filepath):
            self.assertIn(filepath, six.viewkeys(folders))
            folders[filepath] = True
            callbackCounts['folder'] += 1

        def itemCallback(item, filepath):
            self.assertIn(filepath, six.viewkeys(items))
            items[filepath] = True
            callbackCounts['item'] += 1

        self.client.addFolderUploadCallback(folderCallback)
        self.client.addItemUploadCallback(itemCallback)
        self.client.upload(self.libTestDir, callbackPublicFolder['_id'])

        # make sure counts are the same (callbacks not called more than once)
        # and that all folders and files have callbacks called on them
        self.assertEqual(folderCount, callbackCounts['folder'])
        self.assertEqual(item_count, callbackCounts['item'])
        self.assertTrue(all(six.viewvalues(items)))
        self.assertTrue(all(six.viewvalues(folders)))

        # Upload again with reuseExisting on
        existingList = list(self.model('folder').childFolders(
            parentType='folder', parent=callbackPublicFolder,
            user=callbackUser, limit=0))
        self.client.upload(self.libTestDir, callbackPublicFolder['_id'],
                           reuseExisting=True)
        newList = list(self.model('folder').childFolders(
            parentType='folder', parent=callbackPublicFolder,
            user=callbackUser, limit=0))
        self.assertEqual(existingList, newList)
        self.assertEqual(len(newList), 1)
        self.assertEqual([f['name'] for f in self.model('folder').childFolders(
            parentType='folder', parent=newList[0],
            user=callbackUser, limit=0)], ['sub0', 'sub1', 'sub2'])

        # Test upload via a file-like object into a folder
        callbacks = []
        path = os.path.join(self.libTestDir, 'sub0', 'f')
        size = os.path.getsize(path)

        def progressCallback(info):
            callbacks.append(info)

        with open(path) as f:
            with self.assertRaises(girder_client.IncorrectUploadLengthError):
                try:
                    self.client.uploadFile(
                        callbackPublicFolder['_id'], stream=f, name='test',
                        size=size + 1, parentType='folder')
                except girder_client.IncorrectUploadLengthError as exc:
                    self.assertEqual(
                        exc.upload['received'], exc.upload['size'] - 1)
                    upload = self.model('upload').load(exc.upload['_id'])
                    self.assertEqual(upload, None)
                    raise

        with open(path) as f:
            file = self.client.uploadFile(
                callbackPublicFolder['_id'], stream=f, name='test',
                size=size, parentType='folder',
                progressCallback=progressCallback)

        self.assertEqual(len(callbacks), 1)
        self.assertEqual(callbacks[0]['current'], size)
        self.assertEqual(callbacks[0]['total'], size)
        self.assertEqual(file['name'], 'test')
        self.assertEqual(file['size'], size)
        # Files with no extension should fallback to the default MIME type
        self.assertEqual(file['mimeType'], 'application/octet-stream')

        items = list(
            self.model('folder').childItems(folder=callbackPublicFolder))
        self.assertEqual(len(items), 1)
        self.assertEqual(items[0]['name'], 'test')

        files = list(self.model('item').childFiles(items[0]))
        self.assertEqual(len(files), 1)

        # Make sure MIME type propagates correctly when explicitly passed
        with open(path) as f:
            file = self.client.uploadFile(
                callbackPublicFolder['_id'], stream=f, name='test',
                size=size, parentType='folder', mimeType='image/jpeg')
            self.assertEqual(file['mimeType'], 'image/jpeg')

        # Make sure MIME type is guessed based on file name if not passed
        with open(path) as f:
            file = self.client.uploadFile(
                callbackPublicFolder['_id'], stream=f, name='test.txt',
                size=size, parentType='folder')
            self.assertEqual(file['mimeType'], 'text/plain')

Example 94

Project: CouchPotatoV1 Source File: movie.py
    def _checkMovieExists(self, movie):
        if cherrypy.config.get('config').get('XBMC', 'dbpath'):
            dbfile = None
            for root, dirs, files in os.walk(cherrypy.config.get('config').get('XBMC', 'dbpath')):
                for file in files:
                    if file.startswith('MyVideos'):
                        dbfile = os.path.join(root, file)

            if dbfile:
                #------Opening connection to XBMC DB------
                connXbmc = MySqlite.connect(dbfile)
                if connXbmc:
                    log.debug('Checking if movie exists in XBMC by IMDB id:' + movie.imdb)
                    connXbmc.row_factory = MySqlite.Row
                    cXbmc = connXbmc.cursor()
                    #sqlQuery = 'select c09 from movie where c09="' + movie.imdb + '"'
                    sqlQuery = self._generateSQLQuery(movie)
                    cXbmc.execute(sqlQuery)
                    #------End of Opening connection to XBMC DB------
                    inXBMC = False
                    for rowXbmc in cXbmc: # do a final check just to be sure
                        log.debug('Found in XBMC:' + rowXbmc["c09"])
                        if movie.imdb == rowXbmc["c09"]:
                            inXBMC = True
                        else:
                            inXBMC = False

                    cXbmc.close()

                    if inXBMC:
                        log.info('Movie already exists in XBMC, skipping.')
                        return True
                else:
                    log.info('Could not connect to the XBMC database at ' + cherrypy.config.get('config').get('XBMC', 'dbpath'))
            else:
                log.info('Could not find the XBMC MyVideos db at ' + cherrypy.config.get('config').get('XBMC', 'dbpath'))

        if cherrypy.config.get('config').get('XBMC', 'useWebAPIExistingCheck'):
            xbmc = XBMC()
            #sqlQuery = 'select c09 from movie where c09="' + movie.imdb + '"'
            sqlQuery = self._generateSQLQuery(movie)
            xbmcResultsHosts = xbmc.queryVideoDatabase(sqlQuery)
            
            if xbmcResultsHosts:
                for xmbcResults in xbmcResultsHosts:
                    records = xmbcResults.strip().split("<record>")
                    for xmbcResult in records:
#                        xmbcResult = xmbcResult.strip()
                        xmbcResult = xmbcResult.replace("</record>", "")
#                        xmbcResult = xmbcResult.strip()
                        
                        if xmbcResult == "":
                            continue
                        
                        fields = filter(lambda x: x != "", [field.replace("</field>", "") for field in xmbcResult.split("<field>")])
                    
                        log.debug("fields = %s" % fields)                 
                        c09 = fields[0]
                        if c09==movie.imdb:
                            log.info('Movie already exists in XBMC (web API call), skipping.')
                            return True

        return False

Example 95

Project: django-comps Source File: views.py
def export_comps(request):
    """
    Returns a zipfile of the rendered HTML templates in the COMPS_DIR
    """
    in_memory = BytesIO()
    zip = ZipFile(in_memory, "a")

    comps = settings.COMPS_DIR
    static = settings.STATIC_ROOT
    context = RequestContext(request, {})
    context['debug'] = False

    # dump static resources
    # TODO: inspect each template and only pull in resources that are used
    for dirname, dirs, filenames in os.walk(static):
        for filename in filenames:
            full_path = os.path.join(dirname, filename)
            rel_path = os.path.relpath(full_path, static)
            content = open(full_path, 'rb').read()
            try:
                ext = os.path.splitext(filename)[1]
            except IndexError:
                pass
            if ext == '.css':
                # convert static refs to relative links
                dotted_rel = os.path.relpath(static, full_path)
                new_rel_path = '{0}{1}'.format(dotted_rel, '/static')
                content = content.replace(b'/static', bytes(new_rel_path, 'utf8'))
            path = os.path.join('static', rel_path)
            zip.writestr(path, content)

    for dirname, dirs, filenames in os.walk(comps):
        for filename in filenames:
            full_path = os.path.join(dirname, filename)
            rel_path = os.path.relpath(full_path, comps)
            template_path = os.path.join(comps.split('/')[-1], rel_path)
            html = render_to_string(template_path, context)
            # convert static refs to relative links
            depth = len(rel_path.split(os.sep)) - 1
            if depth == 0:
                dotted_rel = '.'
            else:
                dotted_rel = ''
                i = 0
                while i < depth:
                    dotted_rel += '../'
                    i += 1
            new_rel_path = '{0}{1}'.format(dotted_rel, '/static')
            html = html.replace('/static', new_rel_path)
            if PY2:
                html = unicode(html)
            zip.writestr(rel_path, html.encode('utf8'))

    for item in zip.filelist:
        item.create_system = 0
    zip.close()

    response = HttpResponse(content_type="application/zip")
    response["Content-Disposition"] = "attachment; filename=comps.zip"
    in_memory.seek(0)
    response.write(in_memory.read())

    return response

Example 96

Project: python-steemlib Source File: upload_posts.py
def main() :
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=("Post files into STEEM\n\n"
                "This script goes into the posts directory that "
                "contains subfolders named after the authors.\n"
                "This subfolders contain markdown (*.md) files "
                "from which \n"
                "   * the file name is used as permlink\n"
                "   * the first line of content is subject\n"
                "   * the rest of the content is body\n")
    )
    parser.add_argument('--author',
                        type=str,
                        help='Only publish/update posts of this author')
    parser.add_argument('--permlink',
                        type=str,
                        help='Only publish/update the permlink')
    parser.add_argument('--category',
                        type=str,
                        help='Post in category')
    parser.add_argument('--dir',
                        type=str,
                        help='Directory that holds all posts (default: "posts")')
    parser.add_argument('-d',
                        "--dryrun",
                        help="Not not actually post anything",
                        action="store_true")
    parser.set_defaults(dir="./posts", dryrun=False, category="")
    args = parser.parse_args()

    if not path.isdir(args.dir):
        raise Exception("Directory %s does not exist!" % args.dir)
        
    try:
        client = SteemClient(Config)
    except:
        raise Exception("Coudn't open conenction to wallet!")

    if client.wallet.is_locked():
        raise Exception("Wallet is locked! Please unlock it!")

    for (dirpath, dirnames, filenames) in walk(args.dir):
        for f in filenames:
            author = dirpath.split("/")[-1]
            permlink = f.replace(".md", "")
            if args.author and author != args.author:
                continue
            if args.permlink and permlink != args.permlink:
                continue

            content = open(dirpath + "/" + f).read().split("\n")
            subject = content[0].replace("# ", "")
            body = "\n".join(content[2:])

            pprint(client.wallet.post_comment(author,
                                              permlink,
                                              "", args.category,
                                              subject,
                                              body,
                                              "{}",
                                              not args.dryrun))

Example 97

Project: MongoApp Source File: inotify_c.py
    def read_events(self, event_buffer_size=DEFAULT_EVENT_BUFFER_SIZE):
        """
        Reads events from inotify and yields them.
        """
        # HACK: We need to traverse the directory path
        # recursively and simulate events for newly
        # created subdirectories/files. This will handle
        # mkdir -p foobar/blah/bar; touch foobar/afile

        def _recursive_simulate(src_path):
            events = []
            for root, dirnames, filenames in os.walk(src_path):
                for dirname in dirnames:
                    try:
                        full_path = absolute_path(os.path.join(root, dirname))
                        wd_dir = self._add_watch(full_path, self._event_mask)
                        e = InotifyEvent(
                            wd_dir, InotifyConstants.IN_CREATE | InotifyConstants.IN_ISDIR, 0, dirname, full_path)
                        events.append(e)
                    except OSError:
                        pass
                for filename in filenames:
                    full_path = absolute_path(os.path.join(root, filename))
                    wd_parent_dir = self._wd_for_path[absolute_path(os.path.dirname(full_path))]
                    e = InotifyEvent(
                        wd_parent_dir, InotifyConstants.IN_CREATE, 0, filename, full_path)
                    events.append(e)
            return events

        while True:
            try:
                event_buffer = os.read(self._inotify_fd, event_buffer_size)
            except OSError as e:
                if e.errno == errno.EINTR:
                    continue
            break

        with self._lock:
            event_list = []
            for wd, mask, cookie, name in Inotify._parse_event_buffer(event_buffer):
                if wd == -1:
                    continue
                wd_path = unicode_paths.encode(self._path_for_wd[wd])
                src_path = absolute_path(os.path.join(wd_path, name))
                inotify_event = InotifyEvent(
                    wd, mask, cookie, name, src_path)

                if inotify_event.is_moved_from:
                    self.remember_move_from_event(inotify_event)
                elif inotify_event.is_moved_to:
                    move_src_path = self.source_for_move(inotify_event)
                    if move_src_path in self._wd_for_path:
                        moved_wd = self._wd_for_path[move_src_path]
                        del self._wd_for_path[move_src_path]
                        self._wd_for_path[inotify_event.src_path] = moved_wd
                        self._path_for_wd[moved_wd] = inotify_event.src_path
                    src_path = absolute_path(os.path.join(wd_path, name))
                    inotify_event = InotifyEvent(wd, mask, cookie, name, src_path)

                if inotify_event.is_ignored:
                    # Clean up book-keeping for deleted watches.
                    self._remove_watch_bookkeeping(src_path)
                    continue

                event_list.append(inotify_event)

                if (self.is_recursive and
                        inotify_event.is_directory and
                        inotify_event.is_create):

                    # TODO: When a directory from another part of the
                    # filesystem is moved into a watched directory, this
                    # will not generate events for the directory tree.
                    # We need to coalesce IN_MOVED_TO events and those
                    # IN_MOVED_TO events which don't pair up with
                    # IN_MOVED_FROM events should be marked IN_CREATE
                    # instead relative to this directory.
                    try:
                        self._add_watch(src_path, self._event_mask)
                    except OSError:
                        continue

                    event_list.extend(_recursive_simulate(src_path))

        return event_list

Example 98

Project: jasy Source File: Create.py
def massFilePatcher(path, data):
    
    # Convert method with access to local data
    def convertPlaceholder(mo):
        field = mo.group(1)
        value = data.get(field)

        # Verify that None means missing
        if value is None and not data.has(field):
            raise ValueError('No value for placeholder "%s"' % field)
    
        # Requires value being a string
        return str(value)
        
    # Patching files recursively
    Console.info("Patching files...")
    Console.indent()
    for dirPath, dirNames, fileNames in os.walk(path):
        relpath = os.path.relpath(dirPath, path)

        # Filter dotted directories like .git, .bzr, .hg, .svn, etc.
        for dirname in dirNames:
            if dirname.startswith("."):
                dirNames.remove(dirname)
        
        for fileName in fileNames:
            filePath = os.path.join(dirPath, fileName)
            fileRel = os.path.normpath(os.path.join(relpath, fileName))
            
            Console.debug("Processing: %s..." % fileRel)

            fileHandle = open(filePath, "r", encoding="utf-8", errors="surrogateescape")
            fileContent = []
            
            # Parse file line by line to detect binary files early and omit
            # fully loading them into memory
            try:
                isBinary = False

                for line in fileHandle:
                    if '\0' in line:
                        isBinary = True
                        break 
                    else:
                        fileContent.append(line)
        
                if isBinary:
                    Console.debug("Ignoring binary file: %s", fileRel)
                    continue

            except UnicodeDecodeError as ex:
                Console.warn("Can't process file: %s: %s", fileRel, ex)
                continue

            fileContent = "".join(fileContent)

            # Update content with available data
            try:
                resultContent = fieldPattern.sub(convertPlaceholder, fileContent)
            except ValueError as ex:
                Console.warn("Unable to process file %s: %s!", fileRel, ex)
                continue

            # Only write file if there where any changes applied
            if resultContent != fileContent:
                Console.info("Updating: %s...", Console.colorize(fileRel, "bold"))
                
                fileHandle = open(filePath, "w", encoding="utf-8", errors="surrogateescape")
                fileHandle.write(resultContent)
                fileHandle.close()
                
    Console.outdent()

Example 99

Project: unrpyc Source File: unrpyc.py
def main():
    # python27 unrpyc.py [-c] [-d] [--python-screens|--ast-screens|--no-screens] file [file ...]
    parser = argparse.ArgumentParser(description="Decompile .rpyc files")

    parser.add_argument('-c', '--clobber', dest='clobber', action='store_true',
                        help="overwrites existing output files")

    parser.add_argument('-d', '--dump', dest='dump', action='store_true',
                        help="instead of decompiling, pretty print the ast to a file")

    parser.add_argument('-p', '--processes', dest='processes', action='store', default=cpu_count(),
                        help="use the specified number of processes to decompile")

    parser.add_argument('-t', '--translation-file', dest='translation_file', action='store', default=None,
                        help="use the specified file to translate during decompilation")

    parser.add_argument('-T', '--write-translation-file', dest='write_translation_file', action='store', default=None,
                        help="store translations in the specified file instead of decompiling")

    parser.add_argument('-l', '--language', dest='language', action='store', default='english',
                        help="if writing a translation file, the language of the translations to write")

    parser.add_argument('--sl1-as-python', dest='decompile_python', action='store_true',
                        help="Only dumping and for decompiling screen language 1 screens. "
                        "Convert SL1 Python AST to Python code instead of dumping it or converting it to screenlang.")

    parser.add_argument('--comparable', dest='comparable', action='store_true',
                        help="Only for dumping, remove several false differences when comparing dumps. "
                        "This suppresses attributes that are different even when the code is identical, such as file modification times. ")

    parser.add_argument('--no-pyexpr', dest='no_pyexpr', action='store_true',
                        help="Only for dumping, disable special handling of PyExpr objects, instead printing them as strings. "
                        "This is useful when comparing dumps from different versions of Ren'Py. "
                        "It should only be used if necessary, since it will cause loss of information such as line numbers.")

    parser.add_argument('file', type=str, nargs='+',
                        help="The filenames to decompile. "
                        "All .rpyc files in any directories passed or their subdirectories will also be decompiled.")

    args = parser.parse_args()

    if args.write_translation_file and not args.clobber and path.exists(args.write_translation_file):
        # Fail early to avoid wasting time going through the files
        print "Output translation file already exists. Pass --clobber to overwrite."
        return

    if args.translation_file:
        with open(args.translation_file, 'rb') as in_file:
            args.translations = in_file.read()

    # Expand wildcards
    filesAndDirs = map(glob.glob, args.file)
    # Concatenate lists
    filesAndDirs = list(itertools.chain(*filesAndDirs))

    # Recursively add .rpyc files from any directories passed
    files = []
    for i in filesAndDirs:
        if path.isdir(i):
            for dirpath, dirnames, filenames in walk(i):
                files.extend(path.join(dirpath, j) for j in filenames if len(j) >= 5 and j[-5:] == '.rpyc')
        else:
            files.append(i)

    # Check if we actually have files
    if len(files) == 0:
        parser.print_help();
        parser.error("No script files given.")

    files = map(lambda x: (args, x, path.getsize(x)), files)
    processes = int(args.processes)
    if processes > 1:
        # If a big file starts near the end, there could be a long time with
        # only one thread running, which is inefficient. Avoid this by starting
        # big files first.
        files.sort(key=itemgetter(2), reverse=True)
        results = Pool(int(args.processes), sharelock, [printlock]).map(worker, files, 1)
    else:
        # Decompile in the order Ren'Py loads in
        files.sort(key=itemgetter(1))
        results = map(worker, files)

    if args.write_translation_file:
        print "Writing translations to %s..." % args.write_translation_file
        translated_dialogue = {}
        translated_strings = {}
        good = 0
        bad = 0
        for result in results:
            if not result:
                bad += 1
                continue
            good += 1
            translated_dialogue.update(magic.loads(result[0], class_factory))
            translated_strings.update(result[1])
        with open(args.write_translation_file, 'wb') as out_file:
            magic.safe_dump((args.language, translated_dialogue, translated_strings), out_file)

    else:
        # Check per file if everything went well and report back
        good = results.count(True)
        bad = results.count(False)

    if bad == 0:
        print "Decompilation of %d script file%s successful" % (good, 's' if good>1 else '')
    elif good == 0:
        print "Decompilation of %d file%s failed" % (bad, 's' if bad>1 else '')
    else:
        print "Decompilation of %d file%s successful, but decompilation of %d file%s failed" % (good, 's' if good>1 else '', bad, 's' if bad>1 else '')

Example 100

Project: butterflow Source File: cli.py
def main():
    par = argparse.ArgumentParser(usage='butterflow [options] [video]',
                                  add_help=False)
    req = par.add_argument_group('Required arguments')
    gen = par.add_argument_group('General options')
    dsp = par.add_argument_group('Display options')
    vid = par.add_argument_group('Video options')
    mux = par.add_argument_group('Muxing options')
    fgr = par.add_argument_group('Advanced options')

    req.add_argument('video', type=str, nargs='?', default=None,
                     help='Specify the input video')

    gen.add_argument('-h', '--help', action='help',
                     help='Show this help message and exit')
    gen.add_argument('--version', action='store_true',
                     help='Show program\'s version number and exit')
    gen.add_argument('-d', '--devices', action='store_true',
                     help='Show detected OpenCL devices and exit')
    gen.add_argument('-sw', action='store_true',
                     help='Set to force software rendering')
    gen.add_argument('-c', '--cache', action='store_true',
                     help='Show cache information and exit')
    gen.add_argument('--rm-cache', action='store_true',
                     help='Set to clear the cache and exit')
    gen.add_argument('-prb', '--probe', action='store_true',
                     help='Show media file information and exit')
    gen.add_argument('-v', '--verbosity', action='count',
                     help='Set to increase output verbosity')
    gen.add_argument('-q', '--quiet', action='store_true',
                     help='Set to suppress console output')

    dsp.add_argument('-p', '--show-preview', action='store_true',
                     help='Set to show video preview')
    dsp.add_argument('-a', '--add-info', action='store_true',
                     help='Set to embed debugging info into the output video')
    dsp.add_argument('-tt', '--text-type',
                     choices=['light', 'dark', 'stroke'],
                     default=settings['text_type'],
                     help='Specify text type for debugging info, '
                     '(default: %(default)s)')
    dsp.add_argument('-mrk', '--mark-frames', action='store_true',
                     help='Set to mark interpolated frames')

    vid.add_argument('-o', '--output-path', type=str,
                     default=settings['out_path'],
                     help='Specify path to the output video')
    vid.add_argument('-r', '--playback-rate', type=str,
                     help='Specify the playback rate as an integer or a float '
                     'Fractional forms are acceptable, e.g., 24/1.001 is the '
                     'same as 23.976. To use a multiple of the source '
                     'video\'s rate, follow a number with `x`, e.g., "2x" '
                     'will double the frame rate. The original rate will be '
                     'used by default if nothing is specified.')
    vid.add_argument('-s', '--subregions', type=str,
                     help='Specify rendering subregions in the form: '
                     '"a=TIME,b=TIME,TARGET=VALUE" where TARGET is either '
                     '`fps`, `dur`, `spd`. Valid TIME syntaxes are [hr:m:s], '
                     '[m:s], [s], [s.xxx], or `end`, which signifies to the '
                     'end the video. You can specify multiple subregions by '
                     'separating them with a colon `:`. A special subregion '
                     'format that conveniently describes the entire clip is '
                     'available in the form: "full,TARGET=VALUE".')
    vid.add_argument('-k', '--keep-subregions', action='store_true',
                     help='Set to render subregions that are not explicitly '
                          'specified')
    vid.add_argument('-vs', '--video-scale', type=str,
                     default=str(settings['video_scale']),
                     help='Specify output video size in the form: '
                     '"WIDTH:HEIGHT" or by using a factor. To keep the '
                     'aspect ratio only specify one component, either width '
                     'or height, and set the other component to -1, '
                     '(default: %(default)s)')
    vid.add_argument('-l', '--lossless', action='store_true',
                     help='Set to use lossless encoding settings')
    vid.add_argument('-sm', '--smooth-motion', action='store_true',
                     help='Set to tune for smooth motion. This mode yields '
                     'artifact-less frames by emphasizing blended frames over '
                     'warping pixels.')

    mux.add_argument('-mux', action='store_true',
                     help='Set to mux the source audio with the output video')

    fgr.add_argument('--fast-pyr', action='store_true',
                     help='Set to use fast pyramids')
    fgr.add_argument('--pyr-scale', type=float,
                     default=settings['pyr_scale'],
                     help='Specify pyramid scale factor, '
                     '(default: %(default)s)')
    fgr.add_argument('--levels', type=int,
                     default=settings['levels'],
                     help='Specify number of pyramid layers, '
                     '(default: %(default)s)')
    fgr.add_argument('--winsize', type=int,
                     default=settings['winsize'],
                     help='Specify averaging window size, '
                     '(default: %(default)s)')
    fgr.add_argument('--iters', type=int,
                     default=settings['iters'],
                     help='Specify number of iterations at each pyramid '
                     'level, (default: %(default)s)')
    fgr.add_argument('--poly-n', type=int,
                     choices=settings['poly_n_choices'],
                     default=settings['poly_n'],
                     help='Specify size of pixel neighborhood, '
                     '(default: %(default)s)')
    fgr.add_argument('--poly-s', type=float,
                     default=settings['poly_s'],
                     help='Specify standard deviation to smooth derivatives, '
                     '(default: %(default)s)')
    fgr.add_argument('-ff', '--flow-filter', choices=['box', 'gaussian'],
                     default=settings['flow_filter'],
                     help='Specify which filter to use for optical flow '
                     'estimation, (default: %(default)s)')

    for i, arg in enumerate(sys.argv):
        if arg[0] == '-' and arg[1].isdigit():
            sys.argv[i] = ' '+arg

    args = par.parse_args()

    fmt = '[butterflow:%(filename)s:%(funcName)s.%(levelname)s]: %(message)s'
    logging.basicConfig(level=settings['loglevel_0'], format=fmt)
    log = logging.getLogger('butterflow')

    if args.verbosity == 1:
        log.setLevel(settings['loglevel_1'])
    if args.verbosity >= 2:
        log.setLevel(settings['loglevel_2'])
    if args.quiet:
        log.setLevel(settings['loglevel_quiet'])
        settings['quiet'] = True

    if args.version:
        print(__version__)
        return 0

    cachedir = settings['tempdir']
    if args.cache:
        nfiles = 0
        sz = 0
        for dirpath, dirnames, filenames in os.walk(cachedir):
            if dirpath == settings['clbdir']:
                continue
            for filename in filenames:
                nfiles += 1
                fp = os.path.join(dirpath, filename)
                sz += os.path.getsize(fp)
        sz = sz / 1024.0**2
        print('{} files, {:.2f} MB'.format(nfiles, sz))
        print('cache @ '+cachedir)
        return 0
    if args.rm_cache:
        if os.path.exists(cachedir):
            import shutil
            shutil.rmtree(cachedir)
        print('cache deleted, done.')
        return 0

    if args.devices:
        ocl.print_ocl_devices()
        return 0

    if not args.video:
        print('no file specified, use: -h for help')
        return 1
    elif not os.path.exists(args.video):
        print('file does not exist')
        return 1

    if args.probe:
        avinfo.print_av_info(args.video)
        return 0

    extension = os.path.splitext(os.path.basename(args.output_path))[1].lower()
    if extension[1:] != 'mp4':
        print('bad out file extension')
        return 0

    av_info = avinfo.get_av_info(args.video)

    use_sw_interpolate = args.sw or not ocl.compat_ocl_device_available()
    if use_sw_interpolate:
        log.warn('not using opencl, ctrl+c to quit')

    if args.flow_filter == 'gaussian':
        args.flow_filter = cv2.OPTFLOW_FARNEBACK_GAUSSIAN
    else:
        args.flow_filter = 0

    if args.smooth_motion:
        args.polys = 0.01

    def optflow_fn(x, y,
                   pyr=args.pyr_scale, levels=args.levels,
                   winsize=args.winsize, iters=args.iters, polyn=args.poly_n,
                   polys=args.poly_s, fast=args.fast_pyr,
                   filt=args.flow_filter):
        if use_sw_interpolate:
            return cv2.calcOpticalFlowFarneback(
                x, y, pyr, levels, winsize, iters, polyn, polys, filt)
        else:
            return motion.ocl_farneback_optical_flow(
                x, y, pyr, levels, winsize, iters, polyn, polys, fast, filt)

    interpolate_fn = None
    if use_sw_interpolate:
        from butterflow.interpolate import sw_interpolate_flow
        interpolate_fn = sw_interpolate_flow
    else:
        interpolate_fn = motion.ocl_interpolate_flow

    try:
        w, h = w_h_from_input_str(args.video_scale, av_info['w'], av_info['h'])
        sequence = sequence_from_input_str(args.subregions,
                                           av_info['duration'],
                                           av_info['frames'])
        rate = rate_from_input_str(args.playback_rate, av_info['rate'])
    except (ValueError, AttributeError) as error:
        print('error: '+str(error))
        return 1

    def nearest_even_int(x):
        return x & ~1

    w1, h1 = av_info['w'], av_info['h']
    w2, h2 = nearest_even_int(w), nearest_even_int(h)

    if w1*h1 > w2*h2:
        scaling_method = settings['scaler_dn']
    elif w1*h1 < w2*h2:
        scaling_method = settings['scaler_up']
    else:
        scaling_method = None

    rnd = Renderer(args.video,
                   args.output_path,
                   sequence,
                   rate,
                   optflow_fn,
                   interpolate_fn,
                   w2,
                   h2,
                   scaling_method,
                   args.lossless,
                   args.keep_subregions,
                   args.show_preview,
                   args.add_info,
                   args.text_type,
                   args.mark_frames,
                   args.mux)

    motion.set_num_threads(settings['ocv_threads'])

    log.info('will render:\n' + str(rnd.sequence))

    success = True
    total_time = 0
    try:
        import timeit
        total_time = timeit.timeit(rnd.render,
                                   setup='import gc;gc.enable()',
                                   number=1)
    except (KeyboardInterrupt, SystemExit):
        success = False
    if success:
        log.info('made: '+args.output_path)
        out_sz = os.path.getsize(args.output_path) / 1024.0**2
        log.info('write ratio: {}/{}, ({:.2f}%) {:.2f} MB'.format(
                 rnd.frs_written,
                 rnd.frs_to_render,
                 rnd.frs_written*100.0/rnd.frs_to_render,
                 out_sz))
        txt = 'frames: {} real, +{} interpolated, +{} dupe, -{} drop'
        if not settings['quiet']:
            print(txt.format(rnd.source_frs,
                             rnd.frs_interpolated,
                             rnd.frs_duped,
                             rnd.frs_dropped))
        log.info('butterflow took {:.3g} mins, done.'.format(total_time / 60))
        return 0
    else:
        log.warn('quit unexpectedly')
        log.warn('files left in cache @ '+settings['tempdir'])
        return 1
See More Examples - Go to Next Page
Page 1 Page 2 Selected Page 3 Page 4