json.load

Here are the examples of the python api json.load taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

174 Examples 7

Example 151

Project: Arelle Source File: TableStructure.py
def evaluateTableIndex(modelXbrl, lang=None):
    usgaapRoleDefinitionPattern = re.compile(r"([0-9]+) - (Statement|Disclosure|Schedule|Docuement) - (.+)")
    ifrsRoleDefinitionPattern = re.compile(r"\[([0-9]+)\] (.+)")
    # build EFM rendering-compatible index
    definitionElrs = dict((modelXbrl.roleTypeDefinition(roleURI, lang), roleType)
                          for roleURI in modelXbrl.relationshipSet(XbrlConst.parentChild).linkRoleUris
                          for roleType in modelXbrl.roleTypes.get(roleURI,()))
    sortedRoleTypes = sorted(definitionElrs.items(), key=lambda item: item[0])
    disclosureSystem = modelXbrl.modelManager.disclosureSystem
    _usgaapStyleELRs = _isJpFsa = _ifrsStyleELRs = False
    if disclosureSystem.validationType == "EFM":
        _usgaapStyleELRs = True
    elif "jp-fsa" in modelXbrl.modelManager.disclosureSystem.names:
        _isJpFsa = True
    else:
        # attempt to determine type
        if any(usgaapRoleDefinitionPattern.match(r[0]) for r in sortedRoleTypes if r[0]):
            _usgaapStyleELRs = True
        elif any(ifrsRoleDefinitionPattern.match(r[0]) for r in sortedRoleTypes if r[0]):
            _ifrsStyleELRs = True
    if _usgaapStyleELRs:
        COVER    = "1Cover"
        STMTS    = "2Financial Statements"
        NOTES    = "3Notes to Financial Statements"
        POLICIES = "4Accounting Policies"
        TABLES   = "5Notes Tables"
        DETAILS  = "6Notes Details"
        UNCATEG  = "7Uncategorized"
        isRR = any(ns.startswith("http://xbrl.sec.gov/rr/") for ns in modelXbrl.namespaceDocs.keys() if ns)
        tableGroup = None
        firstTableLinkroleURI = None
        firstDocuementLinkroleURI = None
        for roleDefinition, roleType in sortedRoleTypes:
            roleType._tableChildren = []
            match = usgaapRoleDefinitionPattern.match(roleDefinition) if roleDefinition else None
            if not match: 
                roleType._tableIndex = (UNCATEG, "", roleType.roleURI)
                continue
            seq, tblType, tblName = match.groups()
            if isRR:
                tableGroup = COVER
            elif not tableGroup:
                tableGroup = ("Paren" in tblName and COVER or tblType == "Statement" and STMTS or
                              "(Polic" in tblName and NOTES or "(Table" in tblName and TABLES or
                              "(Detail" in tblName and DETAILS or COVER)
            elif tableGroup == COVER:
                tableGroup = (tblType == "Statement" and STMTS or "Paren" in tblName and COVER or
                              "(Polic" in tblName and NOTES or "(Table" in tblName and TABLES or
                              "(Detail" in tblName and DETAILS or NOTES)
            elif tableGroup == STMTS:
                tableGroup = ((tblType == "Statement" or "Paren" in tblName) and STMTS or
                              "(Polic" in tblName and NOTES or "(Table" in tblName and TABLES or
                              "(Detail" in tblName and DETAILS or NOTES)
            elif tableGroup == NOTES:
                tableGroup = ("(Polic" in tblName and POLICIES or "(Table" in tblName and TABLES or 
                              "(Detail" in tblName and DETAILS or tblType == "Disclosure" and NOTES or UNCATEG)
            elif tableGroup == POLICIES:
                tableGroup = ("(Table" in tblName and TABLES or "(Detail" in tblName and DETAILS or 
                              ("Paren" in tblName or "(Polic" in tblName) and POLICIES or UNCATEG)
            elif tableGroup == TABLES:
                tableGroup = ("(Detail" in tblName and DETAILS or 
                              ("Paren" in tblName or "(Table" in tblName) and TABLES or UNCATEG)
            elif tableGroup == DETAILS:
                tableGroup = (("Paren" in tblName or "(Detail" in tblName) and DETAILS or UNCATEG)
            else:
                tableGroup = UNCATEG
            if firstTableLinkroleURI is None and tableGroup == COVER:
                firstTableLinkroleURI = roleType.roleURI
            if tblType == "Docuement" and not firstDocuementLinkroleURI:
                firstDocuementLinkroleURI = roleType.roleURI
            roleType._tableIndex = (tableGroup, seq, tblName)

        # flow allocate facts to roles (SEC presentation groups)
        if not modelXbrl.qnameDimensionDefaults: # may not have run validatino yet
            from arelle import ValidateXbrlDimensions
            ValidateXbrlDimensions.loadDimensionDefaults(modelXbrl)
        reportedFacts = set() # facts which were shown in a higher-numbered ELR table
        factsByQname = modelXbrl.factsByQname
        reportingPeriods = set()
        nextEnd = None
        deiFact = {}
        for conceptName in ("DocuementPeriodEndDate", "DocuementType", "CurrentFiscalPeriodEndDate"):
            for concept in modelXbrl.nameConcepts[conceptName]:
                for fact in factsByQname[concept.qname]:
                    deiFact[conceptName] = fact
                    if fact.context is not None:
                        reportingPeriods.add((None, fact.context.endDatetime)) # for instant
                        reportingPeriods.add((fact.context.startDatetime, fact.context.endDatetime)) # for startEnd
                        nextEnd = fact.context.startDatetime
                        duration = (fact.context.endDatetime - fact.context.startDatetime).days + 1
                        break
        if "DocuementType" in deiFact:
            fact = deiFact["DocuementType"]
            if fact.xValid >= VALID and "-Q" in fact.xValue or "": # fact may be invalid
                # need quarterly and yr to date durations
                endDatetime = fact.context.endDatetime
                # if within 2 days of end of month use last day of month
                endDatetimeMonth = endDatetime.month
                if (endDatetime + timedelta(2)).month != endDatetimeMonth:
                    # near end of month
                    endOfMonth = True
                    while endDatetime.month == endDatetimeMonth:
                        endDatetime += timedelta(1) # go forward to next month
                else:
                    endOfMonth = False
                startYr = endDatetime.year
                startMo = endDatetime.month - 3
                if startMo <= 0:
                    startMo += 12
                    startYr -= 1
                startDatetime = datetime(startYr, startMo, endDatetime.day, endDatetime.hour, endDatetime.minute, endDatetime.second)
                if endOfMonth:
                    startDatetime -= timedelta(1)
                    endDatetime -= timedelta(1)
                reportingPeriods.add((startDatetime, endDatetime))
                duration = 91
        # find preceding compatible default context periods
        while (nextEnd is not None):
            thisEnd = nextEnd
            prevMaxStart = thisEnd - timedelta(duration * .9)
            prevMinStart = thisEnd - timedelta(duration * 1.1)
            nextEnd = None
            for cntx in modelXbrl.contexts.values():
                if (cntx.isStartEndPeriod and not cntx.qnameDims and thisEnd == cntx.endDatetime and
                    prevMinStart <= cntx.startDatetime <= prevMaxStart):
                    reportingPeriods.add((None, cntx.endDatetime))
                    reportingPeriods.add((cntx.startDatetime, cntx.endDatetime))
                    nextEnd = cntx.startDatetime
                    break
                elif (cntx.isInstantPeriod and not cntx.qnameDims and thisEnd == cntx.endDatetime):
                    reportingPeriods.add((None, cntx.endDatetime))
        stmtReportingPeriods = set(reportingPeriods)       

        sortedRoleTypes.reverse() # now in descending order
        for i, roleTypes in enumerate(sortedRoleTypes):
            roleDefinition, roleType = roleTypes
            # find defined non-default axes in pre hierarchy for table
            tableFacts = set()
            tableGroup, tableSeq, tableName = roleType._tableIndex
            roleURIdims, priItemQNames = EFMlinkRoleURIstructure(modelXbrl, roleType.roleURI)
            for priItemQName in priItemQNames:
                for fact in factsByQname[priItemQName]:
                    cntx = fact.context
                    # non-explicit dims must be default
                    if (cntx is not None and
                        all(dimQn in modelXbrl.qnameDimensionDefaults
                            for dimQn in (roleURIdims.keys() - cntx.qnameDims.keys())) and
                        all(mdlDim.memberQname in roleURIdims[dimQn]
                            for dimQn, mdlDim in cntx.qnameDims.items()
                            if dimQn in roleURIdims)):
                        # the flow-up part, drop
                        cntxStartDatetime = cntx.startDatetime
                        cntxEndDatetime = cntx.endDatetime
                        if (tableGroup != STMTS or
                            (cntxStartDatetime, cntxEndDatetime) in stmtReportingPeriods and
                             (fact not in reportedFacts or
                              all(dimQn not in cntx.qnameDims # unspecified dims are all defaulted if reported elsewhere
                                  for dimQn in (cntx.qnameDims.keys() - roleURIdims.keys())))):
                            tableFacts.add(fact)
                            reportedFacts.add(fact)
            roleType._tableFacts = tableFacts
            
            # find parent if any
            closestParentType = None
            closestParentMatchLength = 0
            for _parentRoleDefinition, parentRoleType in sortedRoleTypes[i+1:]:
                matchLen = parentNameMatchLen(tableName, parentRoleType)
                if matchLen > closestParentMatchLength:
                    closestParentMatchLength = matchLen
                    closestParentType = parentRoleType
            if closestParentType is not None:
                closestParentType._tableChildren.insert(0, roleType)
                
            # remove lesser-matched children if there was a parent match
            unmatchedChildRoles = set()
            longestChildMatchLen = 0
            numChildren = 0
            for childRoleType in roleType._tableChildren:
                matchLen = parentNameMatchLen(tableName, childRoleType)
                if matchLen < closestParentMatchLength:
                    unmatchedChildRoles.add(childRoleType)
                elif matchLen > longestChildMatchLen:
                    longestChildMatchLen = matchLen
                    numChildren += 1
            if numChildren > 1: 
                # remove children that don't have the full match pattern length to parent
                for childRoleType in roleType._tableChildren:
                    if (childRoleType not in unmatchedChildRoles and 
                        parentNameMatchLen(tableName, childRoleType) < longestChildMatchLen):
                        unmatchedChildRoles.add(childRoleType)

            for unmatchedChildRole in unmatchedChildRoles:
                roleType._tableChildren.remove(unmatchedChildRole)

            for childRoleType in roleType._tableChildren:
                childRoleType._tableParent = roleType
                
            unmatchedChildRoles = None # dereference
        
        global UGT_TOPICS
        if UGT_TOPICS is None:
            try:
                from arelle import FileSource
                fh = FileSource.openFileStream(modelXbrl.modelManager.cntlr, 
                                               os.path.join(modelXbrl.modelManager.cntlr.configDir, "ugt-topics.zip/ugt-topics.json"),
                                               'r', 'utf-8')
                UGT_TOPICS = json.load(fh)
                fh.close()
                for topic in UGT_TOPICS:
                    topic[6] = set(topic[6]) # change concept abstracts list into concept abstracts set
                    topic[7] = set(topic[7]) # change concept text blocks list into concept text blocks set
                    topic[8] = set(topic[8]) # change concept names list into concept names set
            except Exception as ex:
                    UGT_TOPICS = None

        if UGT_TOPICS is not None:
            def roleUgtConcepts(roleType):
                roleConcepts = set()
                for rel in modelXbrl.relationshipSet(XbrlConst.parentChild, roleType.roleURI).modelRelationships:
                    if isinstance(rel.toModelObject, ModelConcept):
                        roleConcepts.add(rel.toModelObject.name)
                    if isinstance(rel.fromModelObject, ModelConcept):
                        roleConcepts.add(rel.fromModelObject.name)
                if hasattr(roleType, "_tableChildren"):
                    for _tableChild in roleType._tableChildren:
                        roleConcepts |= roleUgtConcepts(_tableChild)
                return roleConcepts
            topicMatches = {} # topicNum: (best score, roleType)
    
            for roleDefinition, roleType in sortedRoleTypes:
                roleTopicType = 'S' if roleDefinition.startswith('S') else 'D'
                if getattr(roleType, "_tableParent", None) is None:                
                    # rooted tables in reverse order
                    concepts = roleUgtConcepts(roleType)
                    for i, ugtTopic in enumerate(UGT_TOPICS):
                        if ugtTopic[0] == roleTopicType:
                            countAbstracts = len(concepts & ugtTopic[6])
                            countTextBlocks = len(concepts & ugtTopic[7])
                            countLineItems = len(concepts & ugtTopic[8])
                            if countAbstracts or countTextBlocks or countLineItems:
                                _score = (10 * countAbstracts +
                                          1000 * countTextBlocks +
                                          countLineItems / len(concepts))
                                if i not in topicMatches or _score > topicMatches[i][0]:
                                    topicMatches[i] = (_score, roleType)
            for topicNum, scoredRoleType in topicMatches.items():
                _score, roleType = scoredRoleType
                if _score > getattr(roleType, "_tableTopicScore", 0):
                    ugtTopic = UGT_TOPICS[topicNum]
                    roleType._tableTopicScore = _score
                    roleType._tableTopicType = ugtTopic[0]
                    roleType._tableTopicName = ugtTopic[3]
                    roleType._tableTopicCode = ugtTopic[4]
                    # print ("Match score {:.2f} topic {} preGrp {}".format(_score, ugtTopic[3], roleType.definition))
        return (firstTableLinkroleURI or firstDocuementLinkroleURI), None # no restriction on contents linkroles
    elif _isJpFsa:
        # find ELR with only iod:identifierItem subs group concepts
        roleElrs = dict((roleURI, roleType)
                        for roleURI in modelXbrl.relationshipSet(XbrlConst.parentChild).linkRoleUris
                        for roleType in modelXbrl.roleTypes.get(roleURI,()))
        roleIdentifierItems = {}
        for roleURI, roleType in roleElrs.items():
            roleType._tableChildren = []
            relSet = modelXbrl.relationshipSet(XbrlConst.parentChild, roleURI)
            for rootConcept in relSet.rootConcepts:
                if rootConcept.substitutionGroupQname and rootConcept.substitutionGroupQname.localName == "identifierItem":
                    roleIdentifierItems[rootConcept] = roleType
        linkroleUri = None
        for roleURI, roleType in roleElrs.items():
            relSet = modelXbrl.relationshipSet(XbrlConst.parentChild, roleURI)
            def addRoleIdentifiers(fromConcept, parentRoleType, visited):
                for rel in relSet.fromModelObject(fromConcept):
                    _fromConcept = rel.fromModelObject
                    _toConcept = rel.toModelObject
                    if isinstance(_fromConcept, ModelConcept) and isinstance(_toConcept, ModelConcept):
                        _fromSubQn = _fromConcept.substitutionGroupQname
                        _toSubQn = _toConcept.substitutionGroupQname
                        if ((parentRoleType is not None or
                             (_fromSubQn and _fromSubQn.localName == "identifierItem" and _fromConcept in roleIdentifierItems )) and
                            _toSubQn and _toSubQn.localName == "identifierItem" and
                            _toConcept in roleIdentifierItems):
                            if parentRoleType is None:
                                parentRoleType = roleIdentifierItems[_fromConcept]
                            _toRoleType = roleIdentifierItems[_toConcept]
                            if _toConcept not in parentRoleType._tableChildren:
                                parentRoleType._tableChildren.append(_toRoleType)
                            if _toConcept not in visited:
                                visited.add(_toConcept)
                                addRoleIdentifiers(_toConcept, _toRoleType, visited)
                                visited.discard(_toConcept)
                        elif _toConcept not in visited:
                            visited.add(_toConcept)
                            addRoleIdentifiers(_toConcept, parentRoleType, visited)
                            visited.discard(_toConcept)
            for rootConcept in relSet.rootConcepts:
                addRoleIdentifiers(rootConcept, None, set())
                if not linkroleUri and len(roleType._tableChildren) > 0:
                    linkroleUri = roleURI
        return linkroleUri, linkroleUri  # only show linkroleUri in index table   
    elif _ifrsStyleELRs: 
        for roleType in definitionElrs.values():
            roleType._tableChildren = []
        return sortedRoleTypes[0][1], None # first link role in order             
    return None, None

Example 152

Project: grokmirror Source File: fsck.py
def fsck_mirror(name, config, verbose=False, force=False):
    global logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Running grok-fsck for [%s]' % name)

    # Lock the tree to make sure we only run one instance
    logger.debug('Attempting to obtain lock on %s' % config['lock'])
    flockh = open(config['lock'], 'w')
    try:
        lockf(flockh, LOCK_EX | LOCK_NB)
    except IOError:
        logger.info('Could not obtain exclusive lock on %s' % config['lock'])
        logger.info('Assuming another process is running.')
        return 0

    manifest = grokmirror.read_manifest(config['manifest'])

    if os.path.exists(config['statusfile']):
        logger.info('Reading status from %s' % config['statusfile'])
        stfh = open(config['statusfile'], 'r')
        try:
            # Format of the status file:
            #  {
            #    '/full/path/to/repository': {
            #      'lastcheck': 'YYYY-MM-DD' or 'never',
            #      'nextcheck': 'YYYY-MM-DD',
            #      'lastrepack': 'YYYY-MM-DD',
            #      'fingerprint': 'sha-1',
            #      's_elapsed': seconds,
            #      'quick_repack_count': times,
            #    },
            #    ...
            #  }

            status = json.load(stfh)
        except:
            # Huai le!
            logger.critical('Failed to parse %s' % config['statusfile'])
            lockf(flockh, LOCK_UN)
            flockh.close()
            return 1
    else:
        status = {}

    frequency = int(config['frequency'])

    today = datetime.datetime.today()

    # Go through the manifest and compare with status
    for gitdir in manifest.keys():
        fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/'))
        if fullpath not in status.keys():
            # Newly added repository
            # Randomize next check between now and frequency
            delay = random.randint(0, frequency)
            nextdate = today + datetime.timedelta(days=delay)
            nextcheck = nextdate.strftime('%F')
            status[fullpath] = {
                'lastcheck': 'never',
                'nextcheck': nextcheck,
            }
            logger.info('Added new repository %s with next check on %s' % (
                gitdir, nextcheck))

    total_checked = 0
    total_elapsed = 0

    # Go through status and queue checks for all the dirs that are due today
    # (unless --force, which is EVERYTHING)
    todayiso = today.strftime('%F')
    for fullpath in status.keys():
        # Check to make sure it's still in the manifest
        gitdir = fullpath.replace(config['toplevel'], '', 1)
        gitdir = '/' + gitdir.lstrip('/')

        if gitdir not in manifest.keys():
            del status[fullpath]
            logger.info('Removed %s which is no longer in manifest' % gitdir)
            continue

        # If nextcheck is before today, set it to today
        # XXX: If a system comes up after being in downtime for a while, this
        #      may cause pain for them, so perhaps use randomization here?
        nextcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'],
                                               '%Y-%m-%d')

        if force or nextcheck <= today:
            logger.debug('Preparing to check %s' % fullpath)
            # Calculate elapsed seconds
            startt = time.time()
            run_git_fsck(fullpath, config)
            total_checked += 1

            # Did the fingerprint change since last time we repacked?
            oldfpr = None
            if 'fingerprint' in status[fullpath].keys():
                oldfpr = status[fullpath]['fingerprint']

            fpr = grokmirror.get_repo_fingerprint(config['toplevel'], gitdir, force=True)

            if fpr != oldfpr or force:
                full_repack = False
                if not 'quick_repack_count' in status[fullpath].keys():
                    status[fullpath]['quick_repack_count'] = 0

                quick_repack_count = status[fullpath]['quick_repack_count']
                if 'full_repack_every' in config.keys():
                    # but did you set 'full_repack_flags' as well?
                    if 'full_repack_flags' not in config.keys():
                        logger.critical('full_repack_every is set, but not full_repack_flags')
                    else:
                        full_repack_every = int(config['full_repack_every'])
                        # is it anything insane?
                        if full_repack_every < 2:
                            full_repack_every = 2
                            logger.warning('full_repack_every is too low, forced to 2')

                        # is it time to trigger full repack?
                        # We -1 because if we want a repack every 10th time, then we need to trigger
                        # when current repack count is 9.
                        if quick_repack_count >= full_repack_every-1:
                            logger.debug('Time to do full repack on %s' % fullpath)
                            full_repack = True
                            quick_repack_count = 0
                            status[fullpath]['lastfullrepack'] = todayiso
                        else:
                            logger.debug('Repack count for %s not yet reached full repack trigger' % fullpath)
                            quick_repack_count += 1

                run_git_repack(fullpath, config, full_repack)
                run_git_prune(fullpath, config, manifest)
                status[fullpath]['lastrepack'] = todayiso
                status[fullpath]['quick_repack_count'] = quick_repack_count

            else:
                logger.debug('No changes to %s since last run, not repacking' % gitdir)

            endt = time.time()

            total_elapsed += endt-startt

            status[fullpath]['fingerprint'] = fpr
            status[fullpath]['lastcheck'] = todayiso
            status[fullpath]['s_elapsed'] = int(endt - startt)

            if force:
                # Use randomization for next check, again
                delay = random.randint(1, frequency)
            else:
                delay = frequency

            nextdate = today + datetime.timedelta(days=delay)
            status[fullpath]['nextcheck'] = nextdate.strftime('%F')

            # Write status file after each check, so if the process dies, we won't
            # have to recheck all the repos we've already checked
            logger.debug('Updating status file in %s' % config['statusfile'])
            stfh = open(config['statusfile'], 'w')
            json.dump(status, stfh, indent=2)
            stfh.close()

    if not total_checked:
        logger.info('No new repos to check.')
    else:
        logger.info('Repos checked: %s' % total_checked)
        logger.info('Total running time: %s s' % int(total_elapsed))

    lockf(flockh, LOCK_UN)
    flockh.close()

Example 153

Project: realdebrid-CLI Source File: rdcli.py
def main():
    """
    Main program
    """

    base = path.join(path.expanduser('~'), '.config', 'rdcli-py')
    conf_file = path.join(base, 'conf.json')
    cookie_file = path.join(base, 'cookie.txt')

    list_only = False
    test = False
    verbose = True
    timeout = 120

    # make sure the config dir exists
    if not path.exists(base):
        makedirs(base)

    try:
        with open(conf_file, 'r') as conf:
            configuration = load(conf)
    except (IOError, ValueError):
        configuration = {}

    # the default output dir is taken from the config file
    # if it hasn't been configured, then use the current directory
    output_dir = configuration.get('output_dir', getcwd())
    download_password = ''

    worker = RDWorker(cookie_file)

    # parse command-line arguments
    try:
        opts, args = gnu_getopt(argv[1:], 'hviqtlp:o:T:O:', ['config', 'version'])
    except GetoptError as e:
        print str(e)
        print_help()
        exit(1)

    for option, argument in opts:
        if option == '-h':
            print_help()
            exit(0)
        if option == '--version' or option == '-v':
            print_version()
            exit(0)
        if option == '--config':
            config_args = argv[2:]

            if len(config_args) == 0:
                print 'Error: No configuration option supplied'
                exit(1)
            if len(config_args) == 1:
                config_args.append(None)

            if len(config_args) > 2:
                print 'WARNING: the following values have been ignored:', ', '.join(config_args[2:])
                config_args = config_args[0:2]

            config.update_value(*config_args, conf_file=conf_file)
            exit(0)
        elif option == '-i':
            username, password = config.ask_credentials()
            config.save_credentials(username, password, conf_file)
        elif option == '-q':
            if not list_only:
                verbose = False
        elif option == '-t':
            if not list_only:
                test = True
        elif option == '-l':
            list_only = True
            test = False
            verbose = False
        elif option == '-o':
            output_dir = argument
        elif option == '-p':
            download_password = argument
        elif option == '-T':
            timeout = int(argument)
        elif option == '-O':
            filename = argument

    # stop now if no download and no output wanted
    if test and not verbose:
        exit(0)

    if verbose:
        def debug(s):
            print s,
    else:
        def debug(s):
            pass

    # make sure we have something to process
    if len(args) > 0:
        output_dir = path.abspath(path.expanduser(output_dir))
        # ensure we can write in output directory
        if not output_dir == getcwd() and not path.exists(unicode(output_dir)):
            debug('%s no such directory' % unicode(output_dir))
            exit(1)
        else:
            if not access(output_dir, W_OK | X_OK):
                debug('Output directory not writable')
                exit(1)
            else:
                debug(u'Output directory: %s\n' % output_dir)

        # retrieve login info
        try:
            with open(conf_file, 'r') as conf:
                configuration = load(conf)
                username = configuration.get('username', '')
                password = configuration.get('password', '')
        except (KeyError, IOError, ValueError):
            username, password = config.ask_credentials()
            config.save_credentials(username, password, conf_file)

        # login
        try:
            worker.login(username, password)
        except BaseException as e:
            exit('Login failed: %s' % str(e))

        if path.isfile(args[0]):
            with open(args[0], 'r') as f:
                links = f.readlines()
        else:
            links = args[0].splitlines()

        # unrestrict and download
        for link in links:
            link = link.strip()
            debug('\nUnrestricting %s' % link)

            try:
                unrestricted, original_filename = worker.unrestrict(link, download_password)
                debug(u' -> ' + unrestricted + '\n')

                if list_only:
                    print unrestricted
                elif not test:

                    if len(links) == 1:
                        try:
                            fullpath = path.join(output_dir, filename)
                        except NameError:
                            fullpath = path.join(output_dir, original_filename)
                    else:
                        fullpath = path.join(output_dir, original_filename)

                    try:
                        to_mb = lambda b: b / 1048576.
                        to_kb = lambda b: b / 1024.

                        opener = build_opener(HTTPCookieProcessor(worker.cookies))
                        stream = opener.open(unrestricted)
                        info = stream.info().getheaders('Content-Length')

                        total_size = 0
                        downloaded_size = 0

                        if len(info):
                            total_size = float(info[0])
                            start = 'Downloading: %s (%.2f MB)\n' % (fullpath, to_mb(total_size))
                        else:
                            start = 'Downloading: %s (unknown size)\n' % fullpath

                        debug(start)

                        with open(fullpath, 'wb') as output:
                            start = datetime.now()
                            end = datetime.now()

                            if verbose:
                                status = ''

                            while True:
                                try:
                                    content = stream.read(20480)  # 20 KB

                                    if not content:
                                        break

                                    output.write(content)
                                    downloaded_size += len(content)

                                    if verbose:
                                        padding_length = len(status)
                                        last_downloaded = len(content)

                                        if last_downloaded > 1024:
                                            speed = to_mb(last_downloaded) / (datetime.now() - end).total_seconds()
                                            unit = 'MB/s'
                                        else:
                                            speed = to_kb(last_downloaded) / (datetime.now() - end).total_seconds()
                                            unit = 'kB/s'

                                        status = '\r%.3f MB' % to_mb(downloaded_size)

                                        if total_size > 0:
                                            status += '  [%3.2f%%]' % (downloaded_size * 100. / total_size)

                                        status += '  @ %.2f %s' % (speed, unit)

                                        print status.ljust(padding_length),
                                        end = datetime.now()

                                except KeyboardInterrupt:
                                    break

                            output.flush()
                            stream.close()

                        speed = to_mb(downloaded_size) / (end - start).total_seconds()

                        if total_size > 0:
                            final_status = '%.2f MB [%.2f%%] downloaded in %s (%.2f MB/s avg.)' \
                                           % (to_mb(downloaded_size), (downloaded_size * 100. / total_size),
                                              str(end - start).split('.')[0], speed)
                        else:
                            final_status = '%.2f MB downloaded in %s (%.2f MB/s avg.)' \
                                           % (to_mb(downloaded_size), str(end - start).split('.')[0], speed)
                        debug('\r%s\n' % final_status)
                    except BaseException as e:
                        debug('\nDownload failed: %s\n' % e)
            except UnrestrictionError as e:
                debug('-> WARNING, unrestriction failed (%s)' % str(e) + '\n')

        debug('End\n')
        return 0
    else:
        print_help()
        exit(1)

Example 154

Project: lrrbot Source File: affc03cb46f5_game_data.py
def upgrade():
	conn = alembic.context.get_context().bind
	meta = sqlalchemy.MetaData(bind=conn)
	meta.reflect()
	users = meta.tables["users"]
	all_users = dict(conn.execute(sqlalchemy.select([users.c.name, users.c.id])).fetchall())

	shows = alembic.op.create_table(
		"shows",
		sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
		sqlalchemy.Column("string_id", sqlalchemy.Text, nullable=False, unique=True),
		sqlalchemy.Column("name", sqlalchemy.Text, nullable=False),
	)

	alembic.op.execute(sqlalchemy.schema.CreateSequence(sqlalchemy.Sequence("games_id_seq", start=-1, increment=-1)))
	games = alembic.op.create_table(
		"games",
		sqlalchemy.Column("id", sqlalchemy.Integer, sqlalchemy.Sequence("game_id_seq"), primary_key=True, server_default=sqlalchemy.func.nextval('games_id_seq')),
		sqlalchemy.Column("name", sqlalchemy.Text, unique=True, nullable=False),
	)
	alembic.op.execute("ALTER SEQUENCE games_id_seq OWNED BY games.id")

	game_per_show_data = alembic.op.create_table(
		"game_per_show_data",
		sqlalchemy.Column("game_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("games.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("show_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("shows.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("display_name", sqlalchemy.Text),
		sqlalchemy.Column("verified", sqlalchemy.Boolean),
	)
	alembic.op.create_primary_key("game_per_show_data_pk", "game_per_show_data", ["game_id", "show_id"])

	stats = alembic.op.create_table(
		"stats",
		sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
		sqlalchemy.Column("string_id", sqlalchemy.Text, nullable=False, unique=True),
		sqlalchemy.Column("singular", sqlalchemy.Text),
		sqlalchemy.Column("plural", sqlalchemy.Text),
		sqlalchemy.Column("emote", sqlalchemy.Text),
	)

	game_stats = alembic.op.create_table(
		"game_stats",
		sqlalchemy.Column("game_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("games.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("show_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("shows.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("stat_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("shows.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("count", sqlalchemy.Integer, nullable=False),
	)
	alembic.op.create_primary_key("game_stats_pk", "game_stats", ["game_id", "show_id", "stat_id"])

	game_votes = alembic.op.create_table(
		"game_votes",
		sqlalchemy.Column("game_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("games.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("show_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("shows.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("vote", sqlalchemy.Boolean, nullable=False),
	)
	alembic.op.create_primary_key("game_votes_pk", "game_votes", ["game_id", "show_id", "user_id"])

	disabled_stats = alembic.op.create_table(
		"disabled_stats",
		sqlalchemy.Column("show_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("shows.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
		sqlalchemy.Column("stat_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("stats.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False),
	)
	alembic.op.create_primary_key("disabled_stats_pk", "disabled_stats", ["show_id", "stat_id"])

	# Move data
	datafile = alembic.context.config.get_section_option("lrrbot", "datafile", "data.json")
	clientid = alembic.context.config.get_section_option("lrrbot", "twitch_clientid")
	with open(datafile) as f:
		data = json.load(f)

	# stats
	alembic.op.bulk_insert(stats, [{
		"string_id": string_id,
		"emote": values.get("emote"),
		"plural": values.get("plural"),
		"singular": values.get("singular"),
	} for string_id, values in data.get("stats", {}).items()])
	all_stats = dict(conn.execute(sqlalchemy.select([stats.c.string_id, stats.c.id])).fetchall())

	# shows
	alembic.op.bulk_insert(shows, [{
		"string_id": show,
		"name": values["name"],
	} for show, values in data.get("shows", {}).items()])
	all_shows = dict(conn.execute(sqlalchemy.select([shows.c.string_id, shows.c.id])).fetchall())

	# games
	def parse_id(id):
		if id is None:
			return None
		try:
			return int(id)
		except ValueError:
			return None
	for show in data.get("shows", {}).values():
		for game_id, game in show.get("games", {}).items():
			game_id = parse_id(game_id) or parse_id(game.get("id"))
			if game_id is None:
				conn.execute("INSERT INTO games (name) VALUES (%(name)s) ON CONFLICT (name) DO NOTHING", {"name": game["name"]})
			else:
				conn.execute("""
					INSERT INTO games (
						id,
						name
					) VALUES (
						%(id)s,
						%(name)s
					) ON CONFLICT (name) DO UPDATE SET
						id = EXCLUDED.id
				""", {"id": game_id, "name": game["name"]})
	all_games = dict(conn.execute(sqlalchemy.select([games.c.name, games.c.id])).fetchall())

	# game_per_show_data
	display_names = []
	for show_id, show in data.get("shows", {}).items():
		for game in show.get("games", {}).values():
			if "display" in game:
				display_names.append({
					"show_id": all_shows[show_id],
					"game_id": parse_id(game.get("id")) or all_games[game["name"]],
					"display_name": game["display"],
				})
	alembic.op.bulk_insert(game_per_show_data, display_names)

	# game_stats
	all_game_stats = []
	for show_id, show in data.get("shows", {}).items():
		for game in show.get("games", {}).values():
			game_id = parse_id(game.get("id")) or all_games[game["name"]]
			for stat, count in game.get("stats", {}).items():
				all_game_stats.append({
					"show_id": all_shows[show_id],
					"game_id": game_id,
					"stat_id": all_stats[stat],
					"count": count,
				})
	alembic.op.bulk_insert(game_stats, all_game_stats)

	# game_votes
	all_votes = []
	with requests.Session() as session:
		for show_id, show in data.get("shows", {}).items():
			for game in show.get("games", {}).values():
				game_id = parse_id(game.get("id")) or all_games[game["name"]]
				for nick, vote in game.get("votes", {}).items():
					if nick not in all_users:
						try:
							req = session.get("https://api.twitch.tv/kraken/users/%s" % urllib.parse.quote(nick), headers={'Client-ID': clientid})
							req.raise_for_status()
							user = req.json()
							all_users[nick] = user["_id"]
							alembic.op.bulk_insert(users, [{
								"id": user["_id"],
								"name": user["name"],
								"display_name": user.get("display_name"),
							}])
						except Exception:
							log.exception("Failed to fetch data for %r", nick)
							all_users[nick] = None
					if all_users[nick] is None:
						continue
					all_votes.append({
						"show_id": all_shows[show_id],
						"game_id": game_id,
						"user_id": all_users[nick],
						"vote": vote,
					})
	alembic.op.bulk_insert(game_votes, all_votes)

	# disabled_stats
	if "swiftlycam" in all_shows:
		for_cameron = []
		if "death" in all_stats:
			for_cameron.append({
				"show_id": all_shows["swiftlycam"],
				"stat_id": all_stats["death"]
			})
		if "tilt" in all_stats:
			for_cameron.append({
				"show_id": all_shows["swiftlycam"],
				"stat_id": all_stats["tilt"]
			})
		if "pave" in all_stats:
			for_cameron.append({
				"show_id": all_shows["swiftlycam"],
				"stat_id": all_stats["pave"],
			})
		alembic.op.bulk_insert(disabled_stats, for_cameron)

	alembic.op.add_column("quotes", sqlalchemy.Column("game_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("games.id", ondelete="CASCADE", onupdate="CASCADE")))
	alembic.op.add_column("quotes", sqlalchemy.Column("show_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("shows.id", ondelete="CASCADE", onupdate="CASCADE")))
	alembic.op.execute("""
		UPDATE quotes
		SET
			show_id = shows.id
		FROM shows
		WHERE quotes.show = shows.name
	""")
	alembic.op.execute("""
		UPDATE quotes
		SET
			game_id = game_per_show_data.game_id
		FROM game_per_show_data
		WHERE quotes.game = game_per_show_data.display_name AND game_per_show_data.show_id = quotes.show_id
	""")
	alembic.op.execute("""
		UPDATE quotes
		SET
			game_id = games.id
		FROM games
		WHERE quotes.game = games.name
	""")
	alembic.op.drop_column("quotes", "game")
	alembic.op.drop_column("quotes", "show")

	data.pop("shows", None)
	data.pop("stats", None)
	with open(datafile, "w") as f:
		json.dump(data, f, indent=2, sort_keys=True)

Example 155

Project: lrrbot Source File: build_carddb.py
def main():
	if not do_download_file(URL, ZIP_FILENAME) and not os.access(EXTRAS_FILENAME, os.F_OK):
		print("No new version of mtgjson data file")
		return

	print("Reading card data...")
	with zipfile.ZipFile(ZIP_FILENAME) as zfp:
		fp = io.TextIOWrapper(zfp.open(SOURCE_FILENAME))
		mtgjson = json.load(fp)

	try:
		with open(EXTRAS_FILENAME) as fp:
			extracards = json.load(fp)
	except IOError:
		pass
	else:
		# If the set is in both mtgjson and the extra data, use the one from mtgjson
		extracards.update(mtgjson)
		mtgjson = extracards
		del extracards

	print("Processing...")
	cards = metadata.tables["cards"]
	card_multiverse = metadata.tables["card_multiverse"]
	card_collector = metadata.tables["card_collector"]
	with engine.begin() as conn:
		conn.execute(card_multiverse.delete())
		conn.execute(card_collector.delete())
		conn.execute(cards.delete())
		cardid = 0
		for setid, expansion in mtgjson.items():
			release_date = dateutil.parser.parse(expansion.get('releaseDate', '1970-01-01')).date()
			for card in expansion['cards']:
				cardid += 1
				if card['layout'] in ('token', 'plane', 'scheme', 'phenomenon', 'vanguard'):  # don't care about these special cards for now
					continue
				if card['name'] == 'B.F.M. (Big Furry Monster)':  # do this card special
					continue

				cardname, description, multiverseids, collector = process_card(card, expansion)
				if description is None:
					continue

				# Check if there's already a row for this card in the DB
				# (keep the one with the latest release date - it's more likely to have the accurate text in mtgjson)
				rows = conn.execute(sqlalchemy.select([cards.c.id, cards.c.lastprinted])
					.where(cards.c.filteredname == cardname)).fetchall()
				if not rows:
					real_cardid = cardid
					conn.execute(cards.insert(),
						id=real_cardid,
						filteredname=cardname,
						name=card['name'],
						text=description,
						lastprinted=release_date,
					)
				elif rows[0][1] < release_date:
					real_cardid = rows[0][0]
					conn.execute(cards.update().where(cards.c.id == real_cardid),
						name=card["name"],
						text=description,
						lastprinted=release_date,
					)
				else:
					real_cardid = rows[0][0]

				for mid in multiverseids:
					rows = conn.execute(sqlalchemy.select([card_multiverse.c.cardid])
						.where(card_multiverse.c.id == mid)).fetchall()
					if not rows:
						conn.execute(card_multiverse.insert(),
							id=mid,
							cardid=real_cardid,
						)
					elif rows[0][0] != real_cardid:
						rows2 = conn.execute(sqlalchemy.select([cards.c.name]).where(cards.c.id == rows[0][0])).fetchall()
						print("Different names for multiverseid %d: \"%s\" and \"%s\"" % (mid, card['name'], rows2[0][0]))

				if collector:
					rows = conn.execute(sqlalchemy.select([card_collector.c.cardid])
						.where((card_collector.c.setid == setid) & (card_collector.c.collector == collector))).fetchall()
					if not rows:
						conn.execute(card_collector.insert(),
							setid=setid,
							collector=collector,
							cardid=real_cardid,
						)
					elif rows[0][0] != real_cardid:
						rows2 = conn.execute(sqlalchemy.select([cards.c.name]).where(cards.c.id == rows[0][0])).fetchall()
						print("Different names for set %s collector number %s: \"%s\" and \"%s\"" % (setid, collector, card['name'], rows2[0][0]))

		cardid += 1
		conn.execute(cards.insert(),
			id=cardid,
			filteredname="bfmbigfurrymonster",
			name="B.F.M. (Big Furry Monster)""B.F.M. (Big Furry Monster)",
			text="B.F.M. (Big Furry Monster) (BBBBBBBBBBBBBBB) | Summon \u2014 The Biggest, Baddest, Nastiest, Scariest Creature You'll Ever See [99/99] | You must play both B.F.M. cards to put B.F.M. into play. If either B.F.M. card leaves play, sacrifice the other. / B.F.M. can only be blocked by three or more creatures.",
			lastprinted=datetime.date(1998, 8, 11),
		)
		conn.execute(card_multiverse.insert(), [
			{"id": 9780, "cardid": cardid},
			{"id": 9844, "cardid": cardid},
		])
		conn.execute(card_collector.insert(), [
			{"setid": "UGL", "collector": "28", "cardid": cardid},
			{"setid": "UGL", "collector": "29", "cardid": cardid},
		])

Example 156

Project: greentea Source File: mbed_test_api.py
def get_test_spec(opts):
    """! Closure encapsulating how we get test specification and load it from file of from yotta module
    @return Returns tuple of (test specification, ret code). Test specification == None if test spec load was not successful
    """
    test_spec = None

    # Check if test_spec.json file exist, if so we will pick it up as default file and load it
    test_spec_file_name = opts.test_spec
    test_spec_file_name_list = []

    # Note: test_spec.json will have higher priority than module.json file
    #       so if we are inside directory with module.json and test_spec.json we will use test spec file
    #       instead of using yotta's module.json file

    def get_all_test_specs_from_build_dir(path_to_scan):
        """! Searches for all test_spec.json files
        @param path_to_scan Directory path used to recursively search for test_spec.json
        @result List of locations of test_spec.json
        """
        return [os.path.join(dp, f) for dp, dn, filenames in os.walk(path_to_scan) for f in filenames if f == 'test_spec.json']

    def merge_multiple_test_specifications_from_file_list(test_spec_file_name_list):
        """! For each file in test_spec_file_name_list merge all test specifications into one
        @param test_spec_file_name_list List of paths to different test specifications
        @return TestSpec object with all test specification data inside
        """

        def copy_builds_between_test_specs(source, destination):
            """! Copies build key-value pairs between two test_spec dicts
                @param source Source dictionary
                @param destination Dictionary with will be applied with 'builds' key-values
                @return Dictionary with merged source
            """
            result = destination.copy()
            if 'builds' in source and 'builds' in destination:
                for k in source['builds']:
                    result['builds'][k] = source['builds'][k]
            return result

        merged_test_spec = {}
        for test_spec_file in test_spec_file_name_list:
            gt_logger.gt_log_tab("using '%s'"% test_spec_file)
            try:
                with open(test_spec_file, 'r') as f:
                    test_spec_data = json.load(f)
                    merged_test_spec = copy_builds_between_test_specs(merged_test_spec, test_spec_data)
            except Exception as e:
                gt_logger.gt_log_err("Unexpected error while processing '%s' test specification file"% test_spec_file)
                gt_logger.gt_log_tab(str(e))
                merged_test_spec = {}

        test_spec = TestSpec()
        test_spec.parse(merged_test_spec)
        return test_spec

    # Test specification look-up
    if opts.test_spec:
        # Loading test specification from command line specified file
        gt_logger.gt_log("test specification file '%s' (specified with --test-spec option)"% opts.test_spec)
    elif os.path.exists('test_spec.json'):
        # Test specification file exists in current directory
        gt_logger.gt_log("using 'test_spec.json' from current directory!")
        test_spec_file_name = 'test_spec.json'
    elif 'BUILD' in os.listdir(os.getcwd()):
        # Checking 'BUILD' directory for test specifications
        # Using `os.listdir()` since it preserves case
        test_spec_file_name_list = get_all_test_specs_from_build_dir('BUILD')
    elif os.path.exists('.build'):
        # Checking .build directory for test specifications
        test_spec_file_name_list = get_all_test_specs_from_build_dir('.build')
    elif os.path.exists('mbed-os') and 'BUILD' in os.listdir('mbed-os'):
        # Checking mbed-os/.build directory for test specifications
        # Using `os.listdir()` since it preserves case
        test_spec_file_name_list = get_all_test_specs_from_build_dir(os.path.join(['mbed-os', 'BUILD']))
    elif os.path.exists(os.path.join('mbed-os', '.build')):
        # Checking mbed-os/.build directory for test specifications
        test_spec_file_name_list = get_all_test_specs_from_build_dir(os.path.join(['mbed-os', '.build']))

    # Actual load and processing of test specification from sources
    if test_spec_file_name:
        # Test specification from command line (--test-spec) or default test_spec.json will be used
        gt_logger.gt_log("using '%s' from current directory!"% test_spec_file_name)
        test_spec = TestSpec(test_spec_file_name)
        if opts.list_binaries:
            list_binaries_for_builds(test_spec)
            return None, 0
    elif test_spec_file_name_list:
        # Merge multiple test specs into one and keep calm
        gt_logger.gt_log("using multiple test specifications from current directory!")
        test_spec = merge_multiple_test_specifications_from_file_list(test_spec_file_name_list)
        if opts.list_binaries:
            list_binaries_for_builds(test_spec)
            return None, 0
    elif os.path.exists('module.json'):
        # If inside yotta module load module data and generate test spec
        gt_logger.gt_log("using 'module.json' from current directory!")
        if opts.list_binaries:
            # List available test binaries (names, no extension)
            list_binaries_for_targets()
            return None, 0
        else:
            test_spec = get_test_spec_from_yt_module(opts)
    else:
        gt_logger.gt_log_err("greentea should be run inside a Yotta module or --test-spec switch should be used")
        return None, -1
    return test_spec, 0

Example 157

Project: yotta Source File: pack.py
    def __init__(
            self,
            path,
            description_filename,
            installed_linked,
            schema_filename = None,
            latest_suitable_version = None,
            inherit_shrinkwrap = None
        ):
        # version, , represent versions and specifications, internal
        from yotta.lib import version
        # vcs, , represent version controlled directories, internal
        from yotta.lib import vcs

        # resolve links at creation time, to minimise path lengths:
        self.unresolved_path = path
        self.path = fsutils.realpath(path)
        self.installed_linked = installed_linked
        self.vcs = None
        self.error = None
        self.latest_suitable_version = latest_suitable_version
        self.version = None
        self.description_filename = description_filename
        self.ignore_list_fname = Ignore_List_Fname
        self.ignore_patterns = copy.copy(Default_Publish_Ignore)
        self.origin_info = None
        description_file = os.path.join(path, description_filename)
        if os.path.isfile(description_file):
            try:
                self.description = ordered_json.load(description_file)
                if self.description:
                    if not 'name' in self.description:
                        raise Exception('missing "name"')
                    if 'version' in self.description:
                        self.version = version.Version(self.description['version'])
                    else:
                        raise Exception('missing "version"')
            except Exception as e:
                self.description = OrderedDict()
                self.error = "Description invalid %s: %s" % (description_file, e);
                logger.debug(self.error)
                raise InvalidDescription(self.error)
        else:
            self.error = "No %s file." % description_filename
            self.description = OrderedDict()
        try:
            with open(os.path.join(path, self.ignore_list_fname), 'r') as ignorefile:
                self.ignore_patterns += self._parseIgnoreFile(ignorefile)
        except IOError as e:
            if e.errno != errno.ENOENT:
                raise
        # warn about invalid yotta versions before schema errors (as new yotta
        # might introduce new schema)
        yotta_version_spec = None
        if self.description and self.description.get('yotta', None):
            try:
                yotta_version_spec = version.Spec(self.description['yotta'])
            except ValueError as e:
                logger.warning(
                    "could not parse yotta version spec '%s' from %s: it "+
                    "might require a newer version of yotta",
                    self.description['yotta'],
                    self.description['name']
                )
        if yotta_version_spec is not None:
            import yotta
            yotta_version = version.Version(yotta.__version__)
            if not yotta_version_spec.match(yotta_version):
                self.error = "requires yotta version %s (current version is %s). see http://yottadocs.mbed.com for update instructions" % (
                    str(yotta_version_spec),
                    str(yotta_version)
                )

        if self.description and schema_filename and not self.path in self.schema_errors_displayed:
            self.schema_errors_displayed.add(self.path)
            have_errors = False
            with open(schema_filename, 'r') as schema_file:
                schema = json.load(schema_file)
                validator = jsonschema.Draft4Validator(schema)
                for error in validator.iter_errors(self.description):
                    if not have_errors:
                        logger.warning(u'%s has invalid %s:' % (
                            os.path.split(self.path.rstrip('/'))[1],
                            description_filename
                        ))
                        have_errors = True
                    logger.warning(u"  %s value %s" % (u'.'.join([str(x) for x in error.path]), error.message))
            # for now schema validation errors aren't fatal... will be soon
            # though!
            #if have_errors:
            #    raise InvalidDescription('Invalid %s' % description_filename)
        self.inherited_shrinkwrap = None
        self.shrinkwrap = None
        # we can only apply shrinkwraps to instances with valid descriptions:
        # instances do not become valid after being invalid so this is safe
        # (but it means you cannot trust the shrinkwrap of an invalid
        # component)
        # (note that it is unsafe to use the __bool__ operator on self here as
        # we are not fully constructed)
        if self.description:
            self.inherited_shrinkwrap = inherit_shrinkwrap
            self.shrinkwrap = tryReadJSON(os.path.join(path, Shrinkwrap_Fname), Shrinkwrap_Schema)
            if self.shrinkwrap:
                logger.warning('dependencies of %s are pegged by yotta-shrinkwrap.json', self.getName())
                if self.inherited_shrinkwrap:
                    logger.warning('shrinkwrap in %s overrides inherited shrinkwrap', self.getName())
        #logger.info('%s created with inherited_shrinkwrap %s', self.getName(), self.inherited_shrinkwrap)
        self.vcs = vcs.getVCS(path)

Example 158

Project: berrl Source File: pipehtml.py
def make_bindings_type(filenames,color_input,colorkey,file_dictionary,sidebar,bounds):
	# instantiating string the main string block for the javascript block of html code
	string = ''

	'''
	# logic for instantiating variable colorkey input 
	if not colorkeyfields == False:
		colorkey = 'selectedText'
	'''

	# iterating through each geojson filename
	count = 0
	for row in filenames:
		color_input = ''
		colorkeyfields = False
		count += 1
		filename = row
		zoomrange = ['','']
		# reading in geojson file into memory
		with open(filename) as data_file:    
   			data = json.load(data_file)
   		#pprint(data)

   		# getting the featuretype which will later dictate what javascript splices are needed
   		data = data['features']
   		data = data[0]
   		featuretype = data['geometry']
   		featuretype = featuretype['type']
		data = data['properties']


		# logic for overwriting colorkey fields if it exists for the filename 
		# in the file dictionary
		try:
			colorkeyfields = file_dictionary[filename][str('colorkeyfields')]
		except KeyError:
			colorkeyfields = False
		except TypeError:
			colorkeyfields = False

		if not colorkeyfields == False:
			if len(colorkeyfields) == 1:
				colorkey = colorkeyfields[0]
				colorkeyfields = False

		try:
			zoomrange = file_dictionary[filename][str('zooms')]
		except KeyError:
			zoomrange = ['','']
		except TypeError:
			zoomrange = ['','']



		# code for if the file_dictionary input isn't false 
		#(i.e. getting the color inputs out of dictionary variable)
		if file_dictionary==False and colorkey == False:			
			# logic for getting the colorline for different feature types
			# the point feature requires a different line of code
			if featuretype == 'Point':
				colorline = get_colorline_marker(color_input)
			else:
				colorline = get_colorline_marker2(color_input)



		# setting minzoom and maxzoom to be sent into js parsing 
		minzoom,maxzoom = zoomrange

		# getting filter file dictionary if filter_dictonary exists
		if not file_dictionary == False:
			filter_file_dictionary = file_dictionary[filename]
		else:
			filter_file_dictionary = False 

		# checking to see if a chart_dictionary exists
		try: 
			chart_dictionary = filter_file_dictionary['chart_dictionary']
		except KeyError:
			chart_dictionary = False
		except TypeError:
			chart_dictionary = False


		# sending min and max zoom into the function that makes the zoom block
		zoomblock = make_zoom_block(minzoom,maxzoom,count,colorkeyfields,bounds,filter_file_dictionary)

		# logic for if a color key is given 
		# HINT look here for rgb raw color integration in a color line
   		if not colorkey == '':
   			if row == filenames[0]:
   				if colorkey == 'selectedText':
   					colorkey = """feature.properties[%s]""" % colorkey
   				else:
   					colorkey = """feature.properties['%s']""" % colorkey
   			if featuretype == 'Point':
   				colorline = get_colorline_marker(str(colorkey))
   			else:
   				colorline = get_colorline_marker2(str(colorkey))


   		# this may be able to be deleted 
   		# test later 
   		# im not sure what the cuem its here for 
   		if file_dictionary == False and colorkey == '':
	   		if featuretype == 'Point':
	   			colorline = get_colorline_marker(color_input)
	   		else:
	   			colorline = get_colorline_marker2(color_input)
   		if colorkey == '' and colorkeyfields == False:
	   		if featuretype == 'Point':
	   			colorline = get_colorline_marker(color_input)
	   		else:
	   			colorline = get_colorline_marker2(color_input)

   		# iterating through each header 
   		headers = []
   		for row in data:
   			headers.append(str(row))

   		# logic for getting sidebar string that will be added in make_blockstr()
   		if sidebar == True:
   			sidebarstring = make_sidebar_string(headers,chart_dictionary)
   		else:
   			sidebarstring = ''

   		# section of javascript code dedicated to the adding the data layer 
   		if count == 1:
	   		blocky = """
	function add%s() { 
	\n\tfunction addDataToMap%s(data, map) {
	\t\tvar dataLayer = L.geoJson(data);
	\t\tvar map = L.mapbox.map('map', 'mapbox.streets',{
	\t\t\tzoom: 5
	\t\t\t}).fitBounds(dataLayer.getBounds());
	\t\tdataLayer.addTo(map)
	\t}\n""" % (count,count)
		else:
			blocky = """
	function add%s() { 
	\n\tfunction addDataToMap%s(data, map) {
	\t\tvar dataLayer = L.geoJson(data);
	\t\tdataLayer.addTo(map)
	\t}\n""" % (count,count)
		
		# making the string section that locally links the geojson file to the html do****ent
		'''
		if not time == '':
			preloc='\tfunction add%s() {\n' % (str(count))
			loc = """\t$.getJSON('http://localhost:8000/%s',function(data) { addDataToMap%s(data,map); });""" % (filename,count)
			loc = preloc + loc
		else: 
		'''
		loc = """\t$.getJSON('http://localhost:8000/%s',function(data) { addDataToMap%s(data,map); });""" % (filename,count)			
		# creating block to be added to the total or constituent string block
		if featuretype == 'Point':
			bindings = make_bindings(headers,count,colorline,featuretype,zoomblock,filename,sidebarstring,colorkeyfields)+'\n'
			stringblock = blocky + loc + bindings
		else:
			bindings = make_bindings(headers,count,colorline,featuretype,zoomblock,filename,sidebarstring,colorkeyfields)+'\n'
			stringblock = blocky + loc + bindings
		
		# adding the stringblock (one geojson file javascript block) to the total string block
		string += stringblock

	# adding async function to end of string block
	string = string + async_function_call(count)

	return string

Example 159

Project: mapit Source File: mapit_UK_import_police_force_areas.py
    def handle_label(self, directory, **options):

        err = False
        for k in ['generation_id', 'area_type_code', 'name_type_code', 'code_type']:
            if options[k]:
                continue
            print("Missing argument '--%s'" % k)
            err = True
        if err:
            sys.exit(1)

        generation_id = options['generation_id']
        area_type_code = options['area_type_code']
        name_type_code = options['name_type_code']
        code_type_code = options['code_type']

        try:
            Country.objects.get(code='E')
            Country.objects.get(code='W')
            Country.objects.get(code='N')
        except Country.DoesNotExist:
            print("England, Wales and Northern Ireland don't exist yet; load the UK fixture first.")
            sys.exit(1)
        welsh_forces = ('dyfed-powys', 'gwent', 'north-wales', 'south-wales')

        # The KML files don't contain the names of each force, but the filenames
        # are the force IDs used by the police API, so we can fetch the names
        # data and save the IDs as codes for future use:
        names_data_filename = os.path.join(DATA_DIRECTORY, "police_force_names.json")
        if not os.path.exists(names_data_filename):
            print(
                "Can't find force names data at %s; trying to fetch it from the police API instead..." %
                names_data_filename)
            url = "http://data.police.uk/api/forces"
            forces = urllib.request.urlopen(url)
            with open(names_data_filename, 'w') as f:
                f.write(forces.read())
            print("...successfully fetched and saved the force names data.")

        with open(names_data_filename) as names_file:
            names_data = json.load(names_file)

        # Map force codes to names for easy lookup:
        codes_to_names = dict((d['id'], d['name']) for d in names_data)

        # Ensure that these types exist already, because if --commit is not
        # specified then mapit_import will prompt for their descriptions
        # for each force:
        try:
            Type.objects.get(code=area_type_code)
            NameType.objects.get(code=name_type_code)
            CodeType.objects.get(code=code_type_code)
        except (Type.DoesNotExist, NameType.DoesNotExist, CodeType.DoesNotExist) as e:
            print(e, "Create the area, name and code types first.")
            sys.exit(1)

        print("Importing police force areas from %s" % directory)

        # mapit_import command kwargs which are common to all forces:
        command_kwargs = {
            'generation_id': generation_id,
            'area_type_code': area_type_code,
            'name_type_code': name_type_code,
            'code_type': code_type_code,
            'name_field': None,
            'code_field': None,
            'use_code_as_id': False,
            'encoding': None,
        }
        for option in ('commit', 'preserve', 'new', 'fix_invalid_polygons'):
            command_kwargs[option] = options[option]

        for kml_file in os.listdir(directory):
            code, extension = os.path.splitext(kml_file)
            if extension.lower() != '.kml':
                continue
            file_path = os.path.join(directory, kml_file)

            country_code = 'E'
            if code in welsh_forces:
                country_code = 'W'
            elif code == 'northern-ireland':
                country_code = 'N'

            try:
                name = codes_to_names[code]
            except KeyError:
                print("Could not find a force name in API JSON data for %s" % code)
                sys.exit(1)

            call_command(
                'mapit_import',
                file_path,
                override_name=name,
                override_code=code,
                country_code=country_code,
                **command_kwargs
            )

Example 160

Project: yournextrepresentative Source File: 0009_migrate_to_django_popolo.py
def import_from_popit(apps, schema_editor):
    if settings.ELECTION_APP not in ELECTION_APPS_WITH_EXISTING_DATA:
        return
    if settings.RUNNING_TESTS:
        return
    # Create the party sets for this country:
    party_set_from_slug = {}
    party_set_from_name = {}
    for party_set_data in PARTY_SETS_BY_ELECTION_APP.get(
            settings.ELECTION_APP, []
    ):
        PartySet = apps.get_model('candidates', 'partyset')
        party_set = PartySet.objects.create(**party_set_data)
        party_set_from_slug[party_set_data['slug']] = party_set
        party_set_from_name[party_set_data['name']] = party_set
    # Now run the standard import:
    importer = YNRPopItImporter(apps, schema_editor)
    host_and_port = {
        'uk_general_election_2015': 'yournextmp.popit.mysociety.org:80',
        'ar_elections_2015': 'ynr-argentina.popit.mysociety.org:80',
        'bf_elections_2015': 'burkina-faso.popit.mysociety.org:80',
        'st_paul_municipal_2015': 'twincities.popit.mysociety.org:80',
    }[settings.ELECTION_APP]
    url = 'http://{host_and_port}/api/v0.1/export.json'.format(
        host_and_port=host_and_port
    )
    export_filename = get_url_cached(url)
    importer.import_from_export_json(export_filename)
    # Now reset the database sequence for popolo_person's id field,
    # since we've specified the id when creating each person.
    Person = apps.get_model('popolo', 'person')
    reset_sql_list = connection.ops.sequence_reset_sql(
        no_style(), [Person]
    )
    if reset_sql_list:
        cursor = connection.cursor()
        for reset_sql in reset_sql_list:
            cursor.execute(reset_sql)
    # For Argentina, we need the original party JSON to decide on the
    # party sets.
    if settings.ELECTION_APP == 'ar_elections_2015':
        ar_party_id_to_party_sets = {}
        ar_filename = join(
            dirname(__file__), '..', '..', 'elections', 'ar_elections_2015',
            'data', 'all-parties-from-popit.json'
        )
        with open(ar_filename) as f:
            ar_all_party_data = json.load(f)
            for party_data in ar_all_party_data:
                territory = party_data.get('territory')
                if territory:
                    party_set = party_set_from_name[territory]
                    ar_party_id_to_party_sets[party_data['id']] = \
                        [party_set]
                else:
                    ar_party_id_to_party_sets[party_data['id']] = \
                        party_set_from_name.values()

    # And add each party to a party set:
    Organization = apps.get_model('popolo', 'organization')
    for party in Organization.objects.filter(
            classification='Party',
    ).prefetch_related('extra'):
        if settings.ELECTION_APP == 'bf_elections_2015':
            party.party_sets.add(party_set_from_slug['national'])
        elif settings.ELECTION_APP == 'st_paul_municipal_2015':
            party.party_sets.add(party_set_from_slug['st-paul'])
        elif settings.ELECTION_APP == 'uk_general_election_2015':
            register = party.extra.register
            if register == 'Great Britain':
                party.party_sets.add(party_set_from_slug['gb'])
            elif register == 'Northern Ireland':
                party.party_sets.add(party_set_from_slug['ni'])
            else:
                party.party_sets.add(*PartySet.objects.all())
        elif settings.ELECTION_APP == 'ar_elections_2015':
            party_sets = ar_party_id_to_party_sets[party.extra.slug]
            party.party_sets.add(*party_sets)
    # It turns out that there were quite a lot of duplicate
    # memberships in the old YNR PopIt instances, so try to remove any
    # duplicates:
    Membership = apps.get_model('popolo', 'membership')
    for duplicate in Membership.objects.values(
            'label',
            'role',
            'person_id',
            'organization_id',
            'on_behalf_of',
            'post_id',
            'start_date',
            'end_date',
            'role',
            'extra__election__slug') \
        .annotate(Count('id')).filter(id__count__gt=1):
        del duplicate['id__count']
        for membership in Membership.objects.filter(**duplicate)[1:]:
            membership.delete()

    # Also remove any old-style party memberships - these are now
    # represented by the on_behalf_of property of candidacy memberships:
    Membership.objects.filter(
        post__isnull=True,
        organization__classification='Party'
    ).delete()

Example 161

Project: PySnip Source File: run.py
    def __init__(self, interface, config):
        self.config = config
        if config.get('random_rotation', False):
            self.map_rotator_type = random_choice_cycle
        else:
            self.map_rotator_type = itertools.cycle
        self.default_time_limit = config.get('default_time_limit', 20.0)
        self.default_cap_limit = config.get('cap_limit', 10.0)
        self.advance_on_win = int(config.get('advance_on_win', False))
        self.win_count = itertools.count(1)
        self.bans = NetworkDict()
        try:
            self.bans.read_list(json.load(open(os.path.join(RESOURCE_DIR,'bans.txt'), 'rb')))
        except IOError:
            pass
        self.hard_bans = set() # possible DDoS'ers are added here
        self.player_memory = deque(maxlen = 100)
        self.config = config
        if len(self.name) > MAX_SERVER_NAME_SIZE:
            print '(server name too long; it will be truncated to "%s")' % (
                self.name[:MAX_SERVER_NAME_SIZE])
        self.respawn_time = config.get('respawn_time', 8)
        self.respawn_waves = config.get('respawn_waves', False)
        game_mode = config.get('game_mode', 'ctf')
        if game_mode == 'ctf':
            self.game_mode = CTF_MODE
        elif game_mode == 'tc':
            self.game_mode = TC_MODE
        elif self.game_mode is None:
            raise NotImplementedError('invalid game mode: %s' % game_mode)
        self.game_mode_name = game_mode
        team1 = config.get('team1', {})
        team2 = config.get('team2', {})
        self.team1_name = team1.get('name', 'Blue')
        self.team2_name = team2.get('name', 'Green')
        self.team1_color = tuple(team1.get('color', (0, 0, 196)))
        self.team2_color = tuple(team2.get('color', (0, 196, 0)))
        self.friendly_fire = config.get('friendly_fire', True)
        self.friendly_fire_time = config.get('grief_friendly_fire_time', 2.0)
        self.spade_teamkills_on_grief = config.get('spade_teamkills_on_grief',
            False)
        self.fall_damage = config.get('fall_damage', True)
        self.teamswitch_interval = config.get('teamswitch_interval', 0)
        self.max_players = config.get('max_players', 20)
        self.melee_damage = config.get('melee_damage', 100)
        self.max_connections_per_ip = config.get('max_connections_per_ip', 0)
        self.passwords = config.get('passwords', {})
        self.server_prefix = encode(config.get('server_prefix', '[*]'))
        self.time_announcements = config.get('time_announcements',
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 60, 120, 180, 240, 300, 600,
             900, 1200, 1800, 2400, 3000])
        self.balanced_teams = config.get('balanced_teams', None)
        self.login_retries = config.get('login_retries', 1)
        
        # voting configuration
        self.default_ban_time = config.get('default_ban_duration', 24*60)
        
        self.speedhack_detect = config.get('speedhack_detect', True)
        if config.get('user_blocks_only', False):
            self.user_blocks = set()
        self.set_god_build = config.get('set_god_build', False)
        self.debug_log = config.get('debug_log', False)
        if self.debug_log:
            pyspades.debug.open_debug_log(os.path.join(RESOURCE_DIR,'debug.log'))
        ssh = config.get('ssh', {})
        if ssh.get('enabled', False):
            from ssh import RemoteConsole
            self.remote_console = RemoteConsole(self, ssh)
        irc = config.get('irc', {})
        if irc.get('enabled', False):
            from irc import IRCRelay
            self.irc_relay = IRCRelay(self, irc)
        status = config.get('status_server', {})
        if status.get('enabled', False):
            from statusserver import StatusServerFactory
            self.status_server = StatusServerFactory(self, status)
        publish = config.get('ban_publish', {})
        if publish.get('enabled', False):
            from banpublish import PublishServer
            self.ban_publish = PublishServer(self, publish)
        ban_subscribe = config.get('ban_subscribe', {})
        if ban_subscribe.get('enabled', True):
            import bansubscribe
            self.ban_manager = bansubscribe.BanManager(self, ban_subscribe)
        # logfile location in resource dir if not abs path given
        logfile = choose_path(RESOURCE_DIR,config.get('logfile', None))
        if logfile is not None and logfile.strip():
            if config.get('rotate_daily', False):
                create_filename_path(logfile)
                logging_file = DailyLogFile(logfile, '.')
            else:
                logging_file = open_create(logfile, 'a')
            log.addObserver(log.FileLogObserver(logging_file).emit)
            log.msg('pyspades server started on %s' % time.strftime('%c'))
        log.startLogging(sys.stdout) # force twisted logging
        
        self.start_time = reactor.seconds()
        self.end_calls = []
        self.console = create_console(self)

        # check for default password usage
        for group, passwords in self.passwords.iteritems():
            if group in DEFAULT_PASSWORDS:
                for password in passwords:
                    if password in DEFAULT_PASSWORDS[group]:
                        print ("WARNING: FOUND DEFAULT PASSWORD '%s'" \
                               " IN GROUP '%s'" % (password, group))
        
        for password in self.passwords.get('admin', []):
            if not password:
                self.everyone_is_admin = True
        commands.rights.update(config.get('rights', {}))
        
        port = self.port = config.get('port', 32887)
        ServerProtocol.__init__(self, port, interface)
        self.host.receiveCallback = self.receive_callback
        ret = self.set_map_rotation(config['maps'])
        if not ret:
            print 'Invalid map in map rotation (%s), exiting.' % ret.map
            raise SystemExit

        self.update_format()
        self.tip_frequency = config.get('tip_frequency', 0)
        if self.tips is not None and self.tip_frequency > 0:
            reactor.callLater(self.tip_frequency * 60, self.send_tip)

        self.master = config.get('master', True)
        self.set_master()
        
        get_external_ip(config.get('network_interface', '')).addCallback(
            self.got_external_ip)

Example 162

Project: geoinference Source File: app.py
def cross_validate(args): 
	parser = argparse.ArgumentParser(prog='geoinf cross_validate', description='evaluate a geocinference method using cross-validation')
	parser.add_argument('-f', '--force', help='overwrite the output model directory if it already exists')
	parser.add_argument('method_name', help='the method to use')
	parser.add_argument('method_settings', help='a json file containing method-specific configurations')
	parser.add_argument('dataset_dir', help='a directory containing a geoinference dataset')
	parser.add_argument('fold_dir', help='the name of the directory containing information on the cross-validation folds')
	parser.add_argument('results_dir', help='a (non-existent) directory where the evaluation results will be stored')
        parser.add_argument('--fold', nargs=1, 
                            help='runs just that fold from the cross-fold dataset')
        parser.add_argument('--location-source', nargs=1, 
                            help='specifies the source of ground-truth locations')

	args = parser.parse_args(args)

	# confirm that the output directory doesn't exist
#	if os.path.exists(args.results_dir) and not args.force:
#		raise Exception, 'output results_dir cannot already exist'

	if not os.path.exists(args.results_dir): #and not args.force:
		#raise Exception, 'output fold_dir cannot already exist'
                os.mkdir(args.results_dir)


	# load the method
	method = get_method_by_name(args.method_name)

	# load the data
	with open(args.method_settings, 'r') as fh:
		settings = json.load(fh)

        specific_fold_to_run = args.fold
        if specific_fold_to_run:
                specific_fold_to_run = specific_fold_to_run[0]
        location_source = args.location_source
        if location_source:
                logger.debug('Using %s as the source of ground truth location' % location_source)
                location_source = location_source[0]
                settings['location_source'] = location_source

                
        print "running fold %s" % (specific_fold_to_run)

	# Load the folds to be used in the dataset
	cfv_fh = open(os.path.join(args.fold_dir, 'folds.info.tsv'))

	# Each line contains two files specifying the post IDs to be held out
	# from the full dataset (for that fold) and the corresponding file in
	# the fold_dir containing the testing data for that fold
	for line in cfv_fh:
                line = line.strip()
		fold_name, testing_post_ids_file, testing_user_ids_file, testing_users_file = line.split("\t")

                # Skip this fold if the user has told us to run only one fold by name
                if specific_fold_to_run is not None and fold_name != specific_fold_to_run:
                        continue
		
                logger.debug('starting processing of fold %s' % fold_name)
                
		# Read in the post IDs to exclude
		testing_post_ids = set()
		tpi_fh = open(os.path.join(args.fold_dir, testing_post_ids_file.replace('held-out-','')))
		for id_str in tpi_fh:
			testing_post_ids.add(id_str.strip())
		tpi_fh.close()

		# Read in the user IDs to exclude
		testing_user_ids = set()
		tpi_fh = open(os.path.join(args.fold_dir, testing_user_ids_file.replace('held-out-','')))
		for id_str in tpi_fh:
			testing_user_ids.add(id_str.strip())
		tpi_fh.close()

                logger.debug('Loaded %d users whose location data will be held out' % len(testing_user_ids))

		# load the dataset
                training_data = None
                if not location_source is None:
                        training_data = SparseDataset(args.dataset_dir, excluded_users=testing_user_ids, default_location_source=location_source)
                else:
                        training_data = SparseDataset(args.dataset_dir, excluded_users=testing_user_ids)
                
		# load the method
		method = get_method_by_name(args.method_name)
		method_inst = method()
                
                # Create the temporary directory that will hold the model for
                # this fold
                model_dir = os.path.join(args.results_dir, fold_name)
                if not os.path.exists(model_dir):
                        os.mkdir(model_dir)
                
		# Train on the datset, holding out the testing post IDs
		model = method_inst.train_model(settings, training_data, None)

                logger.debug('Finished training during fold %s; beginning testing' % fold_name)

                logger.debug("Reading testing data from %s" % (os.path.join(args.fold_dir,testing_users_file)))

		testing_data = Dataset(args.fold_dir, users_file=os.path.join(args.fold_dir,testing_users_file))

                logger.debug("Writing results to %s" % (os.path.join(args.results_dir, fold_name + ".results.tsv.gz")))
                
		out_fh = gzip.open(os.path.join(args.results_dir, fold_name + ".results.tsv.gz"), 'w')

                num_tested_users = 0
                num_tested_posts = 0
                seen_ids = set()
		for user in testing_data.user_iter():
			user_id = user['user_id']
			posts = user['posts']

			locs = model.infer_posts_locations_by_user(user_id, posts)

			if len(locs) != len(posts):
                                print "#WUT %d != %d" % (len(locs), len(posts))
                        
                        num_located_posts = 0 
                        num_tested_posts += len(posts)
			for loc, post in zip(locs, posts):
                                pid = post['id']
                                if pid in seen_ids:
                                        continue
                                seen_ids.add(pid)
                                if not loc is None:
                                        out_fh.write('%s\t%f\t%f\n' % (post['id'], loc[0], loc[1]))
                                        num_located_posts += 1
                        num_tested_users += 1
                        if num_tested_users % 10000 == 0:
                                logger.debug('During testing of fold %s, processed %d users, %d posts, %d located' % (fold_name, num_tested_users, num_tested_posts, num_located_posts))

		out_fh.close()
                logger.debug('Finished testing of fold %s' % fold_name)

Example 163

Project: modl Source File: hcp_plot.py
def display_explained_variance_density(output_dir):
    dir_list = [join(output_dir, f) for f in os.listdir(output_dir) if
                os.path.isdir(join(output_dir, f))]

    fig = plt.figure(figsize=(fig_width * 0.73, fig_height))
    gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])
    fig.subplots_adjust(bottom=0.29)
    fig.subplots_adjust(left=0.075)
    fig.subplots_adjust(right=.92)

    results = []
    analyses = []
    ref_time = 1000000
    for dir_name in dir_list:
        try:
            analyses.append(
                json.load(open(join(dir_name, 'analysis.json'), 'r')))
            results.append(
                json.load(open(join(dir_name, 'results.json'), 'r')))
            if results[-1]['reduction'] == 12:
                timings = np.array(results[-1]['timings'])
                diff = timings[1:] - timings[:1]
                ref_time = min(ref_time, np.min(diff))
        except IOError:
            pass
    print(ref_time)
    h_reductions = []
    ax = {}
    ylim = {1e-2: [2.455e8, 2.525e8], 1e-3: [2.3e8, 2.47e8],
            1e-4: [2.16e8, 2.42e8]}
    for i, alpha in enumerate([1e-3, 1e-4]):
        ax[alpha] = fig.add_subplot(gs[:, i])
        if i == 0:
            ax[alpha].set_ylabel('Objective value on test set')
        ax[alpha].annotate('$\\lambda  = 10^{%.0f}$' % log(alpha, 10),
                           xy=(.65, .85),
                           fontsize=8,
                           xycoords='axes fraction')
        ax[alpha].set_xlim([.05, 200])
        ax[alpha].set_ylim(ylim[alpha])

        for tick in ax[alpha].xaxis.get_major_ticks():
            tick.label.set_fontsize(7)
        ax[alpha].set_xscale('log')

        ax[alpha].set_xticks([.1, 1, 10, 100])
        ax[alpha].set_xticklabels(['.1 h', '1 h', '10 h', '100 h'])

        sns.despine(fig=fig, ax=ax[alpha])

        ax[alpha].spines['left'].set_color((.6, .6, .6))
        ax[alpha].spines['bottom'].set_color((.6, .6, .6))
        ax[alpha].xaxis.set_tick_params(color=(.6, .6, .6), which='both')
        ax[alpha].yaxis.set_tick_params(color=(.6, .6, .6), which='both')
        for tick in ax[alpha].xaxis.get_major_ticks():
            tick.label.set_color('black')
        for tick in ax[alpha].yaxis.get_major_ticks():
            tick.label.set_fontsize(6)

            tick.label.set_color('black')
        t = ax[alpha].yaxis.get_offset_text()
        t.set_size(5)
    ax[1e-4].set_xlabel('CPU\ntime', ha='right')
    ax[1e-4].xaxis.set_label_coords(1.15, -0.05)

    colormap = sns.cubehelix_palette(4, start=0, rot=0., hue=1, dark=.3,
                                     light=.7,
                                     reverse=False)
    other_colormap = sns.cubehelix_palette(4, start=0, rot=.5, hue=1, dark=.3,
                                           light=.7,
                                           reverse=False)
    colormap[0] = other_colormap[0]
    colormap_dict = {reduction: color for reduction, color in
                     zip([1, 4, 8, 12],
                         colormap)}

    x_bar = []
    y_bar_objective = []
    y_bar_density = []
    hue_bar = []

    for result, analysis in zip(results, analyses):
        if result['alpha'] != 1e-2 and result['reduction'] != 2:
            print("%s %s" % (result['alpha'], result['reduction']))
            timings = (np.array(analysis['records']) + 1) / int(
                result['reduction']) * 12 * ref_time / 3600
            # timings = np.array(result['timings'])[np.array(analysis['records']) + 1] / 3600
            s, = ax[result[
                'alpha']].plot(
                timings,
                np.array(analysis['objectives']) / 4,
                color=colormap_dict[int(result['reduction'])],
                linewidth=2,
                linestyle='--' if result[
                                      'reduction'] == 1 else '-',
                zorder=result['reduction'] if result[
                                                  'reduction'] != 1 else 100)
            if result['alpha'] == 1e-3:
                h_reductions.append(
                    (s, '%.0f' % result['reduction']))

    handles, labels = list(zip(*h_reductions[::-1]))
    argsort = sorted(range(len(labels)), key=lambda t: int(labels[t]))
    handles = [handles[i] for i in argsort]
    labels = [labels[i] for i in argsort]

    offset = .3
    yoffset = -.05
    legend_vanilla = mlegend.Legend(ax[1e-3], handles[:1], ['No reduction'],
                                    loc='lower left',
                                    ncol=5,
                                    numpoints=1,
                                    handlelength=2,
                                    markerscale=1.4,
                                    bbox_to_anchor=(
                                        0.3 + offset, -.39 + yoffset),
                                    fontsize=8,
                                    frameon=False
                                    )

    legend_ratio = mlegend.Legend(ax[1e-3], handles[1:], labels[1:],
                                  loc='lower left',
                                  ncol=5,
                                  markerscale=1.4,
                                  handlelength=2,
                                  fontsize=8,
                                  bbox_to_anchor=(
                                      0.3 + offset, -.54 + yoffset),
                                  frameon=False
                                  )
    ax[1e-3].annotate('Original online algorithm',
                      xy=(0.28 + offset, -.27 + yoffset),
                      xycoords='axes fraction',
                      horizontalalignment='right', verticalalignment='bottom',
                      fontsize=8)
    ax[1e-3].annotate('Proposed reduction factor $r$',
                      xy=(0.28 + offset, -.42 + yoffset),
                      xycoords='axes fraction',
                      horizontalalignment='right', verticalalignment='bottom',
                      fontsize=8)
    ax[1e-3].add_artist(legend_ratio)
    ax[1e-3].add_artist(legend_vanilla)

    ax[1e-3].annotate('(a) Convergence speed', xy=(0.7, 1.02), ha='center',
                      fontsize=9, va='bottom', xycoords='axes fraction')

    fig.savefig(join(output_dir, 'hcp_bench.pdf'))

    for result, analysis in zip(results, analyses):
        if result['alpha'] != 1e-2 and result['reduction'] != 2:
            x_bar.append(result['alpha'])
            y_bar_objective.append(analysis['objectives'][-1])
            y_bar_density.append(analysis['densities'][-1])
            hue_bar.append(result['reduction'])
    ref_objective = {}
    for objective, alpha, reduction in zip(y_bar_objective, x_bar, hue_bar):
        if reduction == 1:
            ref_objective[alpha] = objective

    for i, (objective, alpha) in enumerate(zip(y_bar_objective, x_bar)):
        y_bar_objective[i] /= ref_objective[alpha]
        y_bar_objective[i] -= 1

    ####################### Final objective
    fig = plt.figure(figsize=(fig_width * 0.27, fig_height))
    fig.subplots_adjust(bottom=0.29)
    fig.subplots_adjust(left=0.05)
    fig.subplots_adjust(right=1.2)
    fig.subplots_adjust(top=0.85)
    gs = gridspec.GridSpec(2, 1, width_ratios=[1, 1], height_ratios=[1.2, 0.8])
    ax_bar_objective = fig.add_subplot(gs[0])
    ax_bar_objective.set_ylim(-0.007, 0.007)
    ax_bar_objective.set_yticks([-0.005, 0, 0.005])
    ax_bar_objective.set_yticklabels(['-0.5\%', '0\%', '0.5\%'])
    ax_bar_objective.tick_params(axis='y', labelsize=6)

    sns.despine(fig=fig, ax=ax_bar_objective, left=True, right=False)

    sns.barplot(x=x_bar, y=y_bar_objective, hue=hue_bar, ax=ax_bar_objective,
                order=[1e-3, 1e-4],
                palette=colormap)
    plt.setp(ax_bar_objective.patches, linewidth=0.1)
    ax_bar_objective.legend_ = None
    ax_bar_objective.get_xaxis().set_visible(False)
    ax_bar_objective.set_xlim([-.5, 1.6])
    ax_bar_objective.annotate('Final\nobjective\ndeviation\n(relative)',
                              xy=(1.28, 0.45), fontsize=7, va='center',
                              xycoords='axes fraction')
    ax_bar_objective.annotate('(Less is better)', xy=(.06, 0.1), fontsize=7,
                              va='center', xycoords='axes fraction')
    ax_bar_objective.yaxis.set_label_position('right')

    ################################## Density
    x_bar = []
    y_bar_density = []
    hue_bar = []
    for result, analysis in zip(results, analyses):
        if result['alpha'] != 1e-2 and result['reduction'] != 2:
            x_bar.append(result['alpha'])
            y_bar_density.append(analysis['densities'][-1])
            hue_bar.append(result['reduction'])

    ax_bar_density = fig.add_subplot(gs[1])
    ax_bar_density.set_yscale('log')
    ax_bar_density.set_ylim(100, 1000)
    ax_bar_density.set_yticks([100, 1000])
    ax_bar_density.set_yticklabels(['100', '1000'])
    ax_bar_density.tick_params(axis='y', labelsize=6)

    sns.barplot(x=x_bar, y=y_bar_density, hue=hue_bar, ax=ax_bar_density,
                order=[1e-3, 1e-4],
                palette=colormap)
    ax_bar_density.set_xticklabels(['$10^{-2}$', '$10^{-3}$', '$10^{-4}$'])
    sns.despine(fig=fig, ax=ax_bar_density, left=True, right=False)
    # ax_bar_density.get_xaxis().set_ticks([])
    ax_bar_density.set_xlim([-.5, 1.6])
    ax_bar_density.set_xlabel('Regularization $\\lambda$')
    ax_bar_density.annotate('$\\frac{\\ell_1}{\\ell_2}(\\mathbf D)$',
                            xy=(1.26, 0.45),
                            fontsize=7, va='center', xycoords='axes fraction')
    ax_bar_density.yaxis.set_label_position('right')

    plt.setp(ax_bar_density.patches, linewidth=0.1)
    ax_bar_density.legend_ = None

    for ax in [ax_bar_density, ax_bar_objective]:
        ax.spines['right'].set_color((.6, .6, .6))
        ax.spines['bottom'].set_color((.6, .6, .6))
        ax.xaxis.set_tick_params(color=(.6, .6, .6), which='both')
        ax.yaxis.set_tick_params(color=(.6, .6, .6), which='both')

    for tic in ax_bar_density.xaxis.get_major_ticks():
        tic.tick1On = tic.tick2On = False
    ax_bar_objective.spines['bottom'].set_position(('data', 0))
    ax_bar_objective.spines['bottom'].set_linewidth(.3)
    ax_bar_objective.annotate('(b) Decomposition quality', xy=(0.7, 1.21),
                              ha='center', va='bottom', fontsize=9,
                              xycoords='axes fraction')

    fig.savefig(expanduser(join(output_dir, 'bar_plot.pdf')))

Example 164

Project: modl Source File: hcp_plot.py
def display_explained_variance_epoch(output_dir):
    dir_list = [join(output_dir, f) for f in os.listdir(output_dir) if
                os.path.isdir(join(output_dir, f))]

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1, width_ratios=[1])
    fig.set_figwidth(3.25653379549)
    fig.set_figheight(1.3)
    fig.subplots_adjust(bottom=0.105)
    fig.subplots_adjust(top=0.9)
    fig.subplots_adjust(left=0.12)
    fig.subplots_adjust(right=.95)

    results = []
    analyses = []
    ref_time = 1000000
    for dir_name in dir_list:
        try:
            analyses.append(
                json.load(open(join(dir_name, 'analysis.json'), 'r')))
            results.append(
                json.load(open(join(dir_name, 'results.json'), 'r')))
            if results[-1]['reduction'] == 12:
                timings = np.array(results[-1]['timings'])
                diff = timings[1:] - timings[:1]
                ref_time = min(ref_time, np.min(diff))
        except IOError:
            pass
    h_reductions = []
    ax = {}
    ylim = {1e-2: [2.475e8, 2.522e8], 1e-3: [2.3e8, 2.335e8],
            1e-4: [2.16e8, 2.24e8]}
    for i, alpha in enumerate([1e-4]):
        ax[alpha] = fig.add_subplot(gs[:, i])
        if i == 0:
            ax[alpha].set_ylabel('Objective value on test set')
        ax[alpha].set_xlim([50, 4000])

        for tick in ax[alpha].xaxis.get_major_ticks():
            tick.label.set_fontsize(7)
        ax[alpha].set_xscale('log')

        ax[alpha].set_xticks([100, 1000, 1947, 4000])
        ax[alpha].set_xticklabels(['100', '1000', 'Epoch', '4000'])


        ax[alpha].set_ylim(ylim[alpha])
        sns.despine(fig=fig, ax=ax[alpha])

        ax[alpha].spines['left'].set_color((.6, .6, .6))
        ax[alpha].spines['bottom'].set_color((.6, .6, .6))
        ax[alpha].xaxis.set_tick_params(color=(.6, .6, .6), which='both')
        ax[alpha].yaxis.set_tick_params(color=(.6, .6, .6), which='both')
        for tick in ax[alpha].xaxis.get_major_ticks():
            tick.label.set_color('black')
        for tick in ax[alpha].yaxis.get_major_ticks():
            tick.label.set_fontsize(7)

            tick.label.set_color('black')
        t = ax[alpha].yaxis.get_offset_text()
        t.set_size(6)
    ax[1e-4].set_xlabel('Records')
    ax[1e-4].xaxis.set_label_coords(-0.04, -0.047)

    colormap = sns.cubehelix_palette(4, start=0, rot=0., hue=1, dark=.3,
                                     light=.7,
                                     reverse=False)

    other_colormap = sns.cubehelix_palette(4, start=0, rot=.5, hue=1, dark=.3,
                                           light=.7,
                                           reverse=False)
    colormap[0] = other_colormap[0]
    colormap_dict = {reduction: color for reduction, color in
                     zip([1, 4, 8, 12],
                         colormap)}
    for result, analysis in zip(results, analyses):
        if result['alpha'] in [1e-4] and result['reduction'] in [1, 4, 8, 12]:

            print("%s %s" % (result['alpha'], result['reduction']))
            s, = ax[result[
                'alpha']].plot(np.array(analysis['records']),
                               np.array(analysis['objectives']) / 4,
                               color=colormap_dict[result['reduction']],
                               linewidth=1.5,
                               linestyle='--' if result[
                                                     'reduction'] == 1 else '-',
                               zorder=result['reduction'] if result[
                                                                    'reduction'] > 1 else 100)
            h_reductions.append(
                (s, result['reduction']))

    handles, labels = list(zip(*h_reductions[::-1]))
    argsort = sorted(range(len(labels)), key=lambda t: int(labels[t]))
    handles = [handles[i] for i in argsort]
    labels = [('$r=%i$' % labels[i]) for i in argsort]
    labels[0] = 'No reduction\n(original alg.)'

    ax[1e-4].annotate('$\\lambda  = 10^{%.0f}$' % log(alpha, 10),
                       xy=(0.07, 0.07),
                       ha='left',
                       va='bottom',
                       fontsize=8,
                       xycoords='axes fraction')
    legend_ratio = mlegend.Legend(ax[1e-4], handles[0:], labels[0:],
                                  loc='upper right',
                                  ncol=1,
                                  numpoints=1,
                                  handlelength=2,
                                  frameon=False,
                                  bbox_to_anchor=(1, 1.15)
                                  )
    ax[1e-4].add_artist(legend_ratio)

    fig.savefig(join(output_dir, 'hcp_epoch.pdf'))

Example 165

Project: pyon Source File: manhole.py
def main():
    import sys, os, re, errno, json, socket
    from pkg_resources import load_entry_point

    r = re.compile('manhole-(\d+).json')

    if len(sys.argv) == 2:
        mh_file = sys.argv[1]
    else:
        # find manhole file in local dir
        mh_files = [f for f in os.listdir(os.getcwd()) if r.search(f) is not None]
        if len(mh_files) == 0:
            print >>sys.stderr, "No manhole files detected, specify it manually"
            sys.exit(1)
        elif len(mh_files) > 1:

            def legal_manhole_file(f):
                """
                Helper method to check if a process exists and is likely a manhole-able container.

                @return True/False if is a likely container.
                """
                mh_pid = int(r.search(f).group(1))
                try:
                    os.getpgid(mh_pid)
                except OSError as e:
                    if e.errno == errno.ESRCH:
                        return False
                    raise   # unexpected, just re-raise

                # the pid seems legal, now check status of sockets - the pid may be reused
                with open(f) as ff:
                    mh_doc = json.load(ff)

                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                try:
                    s.bind((mh_doc['ip'], mh_doc['shell_port']))
                except socket.error as e:
                    if e.errno == errno.EADDRINUSE:
                        return True
                    raise   # unexpected, re-raise
                finally:
                    s.close()

                return False

            # try to see if these are active processes
            legal_mh_files = filter(legal_manhole_file, mh_files)

            if len(legal_mh_files) > 1:
                print >>sys.stderr, "Multiple legal manhole files detected, specify it manually:", legal_mh_files
                sys.exit(1)

            # we found a single legal file, use it
            mh_file = legal_mh_files[0]

            # perform cleanup of stale files
            dead_mh_files = [x for x in mh_files if x not in legal_mh_files]
            for df in dead_mh_files:
                print >>sys.stderr, "Cleaning up stale manhole file", df
                os.unlink(df)

        else:
            mh_file = mh_files[0]

    if not os.access(mh_file, os.R_OK):
        print >>sys.stderr, "Manhole file (%s) does not exist" % mh_file
        sys.exit(1)

    mhpid = r.search(mh_file).group(1)

    # configure branding
    manhole_logo = """
 __   __  _______  __    _  __   __  _______  ___      _______ 
|  |_|  ||   _   ||  |  | ||  | |  ||       ||   |    |       |
|       ||  |_|  ||   |_| ||  |_|  ||   _   ||   |    |    ___|
|       ||       ||       ||       ||  | |  ||   |    |   |___ 
|       ||       ||  _    ||       ||  |_|  ||   |___ |    ___|
| ||_|| ||   _   || | |   ||   _   ||       ||       ||   |___ 
|_|   |_||__| |__||_|  |__||__| |__||_______||_______||_______|
"""

    # manipulate argv!
    sys.argv = [sys.argv[0], "console", "--existing", mh_file,
                "--PromptManager.in_template=>o> ",
                "--PromptManager.in2_template=... ",
                "--PromptManager.out_template=--> ",
                "--TerminalInteractiveShell.banner1=%s" % manhole_logo,
                "--TerminalInteractiveShell.banner2=ION Container Manhole, connected to %s\n" % mhpid]

    ipy_entry = load_entry_point('ipython', 'console_scripts', 'ipython')()
    sys.exit(ipy_entry)

Example 166

Project: anvil Source File: opts.py
def parse(previous_settings=None):

    version_str = "%s v%s" % ('anvil', version.version_string())
    help_formatter = SmithyHelpFormatter(width=120)
    parser = OptionParser(version=version_str, formatter=help_formatter,
                          prog='smithy')

    # Root options
    parser.add_option("-v", "--verbose",
                      action="store_true",
                      dest="verbose",
                      default=False,
                      help="make the output logging verbose")

    # Install/start/stop/uninstall specific options
    base_group = OptionGroup(parser, "Action specific options")
    base_group.add_option("-p", "--persona",
                          action="store",
                          type="string",
                          dest="persona_fn",
                          default=sh.joinpths(settings.PERSONA_DIR, 'in-a-box', 'basic.yaml'),
                          metavar="FILE",
                          help="persona yaml file to apply (default: %default)")
    base_group.add_option("-a", "--action",
                          action="store",
                          type="string",
                          dest="action",
                          metavar="ACTION",
                          help="required action to perform: %s" % (_format_list(actions.names())))
    base_group.add_option("-o", "--origins",
                          action="store",
                          type="string",
                          dest="origins_fn",
                          default=sh.joinpths(settings.ORIGINS_DIR, 'master.yaml'),
                          metavar="FILE",
                          help="yaml file describing where to get openstack sources "
                               "from (default: %default)")
    base_group.add_option("--origins-patch",
                          action="store",
                          type="string",
                          dest="origins_patch_fn",
                          default=None,
                          metavar="FILE",
                          help="origins file patch, jsonpath format (rfc6902)")
    base_group.add_option("--distros-patch",
                          action="store",
                          type="string",
                          dest="distros_patch_fn",
                          default=None,
                          metavar="FILE",
                          help="distros file patch, jsonpath format (rfc6902)")
    base_group.add_option("-j", "--jobs",
                          action="store",
                          type="int",
                          dest="jobs",
                          default=multiprocessing.cpu_count() + 1,
                          metavar="JOBS",
                          help="number of building jobs to run simultaneously (default: %default)")
    base_group.add_option("-d", "--directory",
                          action="store",
                          type="string",
                          dest="dir",
                          metavar="DIR",
                          default=_get_default_dir(),
                          help=("empty root DIR or DIR with existing components (default: %default)"))
    base_group.add_option("--tee-file",
                          action="store",
                          type="string",
                          dest="tee_file",
                          metavar="FILE",
                          default='/var/log/anvil.log',
                          help=("location to store tee of output (default: %default)"))
    parser.add_option_group(base_group)

    build_group = OptionGroup(parser, "Build specific options")
    build_group.add_option('-u', "--usr-only",
                           action="store_true",
                           dest="usr_only",
                           default=False,
                           help=("when packaging only store /usr directory"
                                 " (default: %default)"))
    build_group.add_option("--venv-deploy-dir",
                           action="store",
                           type="string",
                           dest="venv_deploy_dir",
                           default=None,
                           help=("for virtualenv builds, make the virtualenv "
                                 "relocatable to a directory different from "
                                 "build directory"))
    build_group.add_option('-c', "--overwrite-configs",
                           action="store_true",
                           dest="overwrite_configs",
                           default=False,
                           help=("When packaging do you want rpm to mark config "
                                 "files with %config or treat them as files and "
                                 "overwrite them each time on rpm install"))
    parser.add_option_group(build_group)

    # Extract only what we care about, these will be passed
    # to the constructor of actions as arguments
    # so don't adjust the naming wily nilly...
    if previous_settings:
        parser.set_defaults(**previous_settings)

    (options, _args) = parser.parse_args()
    values = {}
    values['dir'] = (options.dir or "")
    values['action'] = (options.action or "")
    values['jobs'] = options.jobs
    values['persona_fn'] = options.persona_fn
    values['origins_fn'] = options.origins_fn
    values['verbose'] = options.verbose
    values['usr_only'] = options.usr_only
    values['tee_file'] = options.tee_file
    values['overwrite_configs'] = options.overwrite_configs
    if options.origins_patch_fn:
        with open(options.origins_patch_fn) as fp:
            values['origins_patch'] = json.load(fp)
    if options.distros_patch_fn:
        with open(options.distros_patch_fn) as fp:
            values['distros_patch'] = json.load(fp)
    values['venv_deploy_dir'] = options.venv_deploy_dir
    return values

Example 167

Project: glance Source File: metadata.py
def _populate_metadata(meta, metadata_path=None, merge=False,
                       prefer_new=False, overwrite=False):
    if not metadata_path:
        metadata_path = CONF.metadata_source_path

    try:
        if isfile(metadata_path):
            json_schema_files = [metadata_path]
        else:
            json_schema_files = [f for f in os.listdir(metadata_path)
                                 if isfile(join(metadata_path, f))
                                 and f.endswith('.json')]
    except OSError as e:
        LOG.error(encodeutils.exception_to_unicode(e))
        return

    if not json_schema_files:
        LOG.error(_LE("Json schema files not found in %s. Aborting."),
                  metadata_path)
        return

    namespaces_table = get_metadef_namespaces_table(meta)
    namespace_rt_table = get_metadef_namespace_resource_types_table(meta)
    objects_table = get_metadef_objects_table(meta)
    tags_table = get_metadef_tags_table(meta)
    properties_table = get_metadef_properties_table(meta)
    resource_types_table = get_metadef_resource_types_table(meta)

    for json_schema_file in json_schema_files:
        try:
            file = join(metadata_path, json_schema_file)
            with open(file) as json_file:
                metadata = json.load(json_file)
        except Exception as e:
            LOG.error(_LE("Failed to parse json file %(file_path)s while "
                          "populating metadata due to: %(error_msg)s"),
                      {"file_path": file,
                       "error_msg": encodeutils.exception_to_unicode(e)})
            continue

        values = {
            'namespace': metadata.get('namespace', None),
            'display_name': metadata.get('display_name', None),
            'description': metadata.get('description', None),
            'visibility': metadata.get('visibility', None),
            'protected': metadata.get('protected', None),
            'owner': metadata.get('owner', 'admin')
        }

        db_namespace = select(
            [namespaces_table.c.id]
        ).where(
            namespaces_table.c.namespace == values['namespace']
        ).select_from(
            namespaces_table
        ).execute().fetchone()

        if db_namespace and overwrite:
            LOG.info(_LI("Overwriting namespace %s"), values['namespace'])
            _clear_namespace_metadata(meta, db_namespace[0])
            db_namespace = None

        if not db_namespace:
            values.update({'created_at': timeutils.utcnow()})
            _insert_data_to_db(namespaces_table, values)

            db_namespace = select(
                [namespaces_table.c.id]
            ).where(
                namespaces_table.c.namespace == values['namespace']
            ).select_from(
                namespaces_table
            ).execute().fetchone()
        elif not merge:
            LOG.info(_LI("Skipping namespace %s. It already exists in the "
                         "database."), values['namespace'])
            continue
        elif prefer_new:
            values.update({'updated_at': timeutils.utcnow()})
            _update_data_in_db(namespaces_table, values,
                               namespaces_table.c.id, db_namespace[0])

        namespace_id = db_namespace[0]

        for resource_type in metadata.get('resource_type_associations', []):
            rt_id = _get_resource_type_id(meta, resource_type['name'])
            if not rt_id:
                val = {
                    'name': resource_type['name'],
                    'created_at': timeutils.utcnow(),
                    'protected': True
                }
                _insert_data_to_db(resource_types_table, val)
                rt_id = _get_resource_type_id(meta, resource_type['name'])
            elif prefer_new:
                val = {'updated_at': timeutils.utcnow()}
                _update_data_in_db(resource_types_table, val,
                                   resource_types_table.c.id, rt_id)

            values = {
                'namespace_id': namespace_id,
                'resource_type_id': rt_id,
                'properties_target': resource_type.get(
                    'properties_target', None),
                'prefix': resource_type.get('prefix', None)
            }
            namespace_resource_type = _get_namespace_resource_type_by_ids(
                meta, namespace_id, rt_id)
            if not namespace_resource_type:
                values.update({'created_at': timeutils.utcnow()})
                _insert_data_to_db(namespace_rt_table, values)
            elif prefer_new:
                values.update({'updated_at': timeutils.utcnow()})
                _update_rt_association(namespace_rt_table, values,
                                       rt_id, namespace_id)

        for property, schema in six.iteritems(metadata.get('properties',
                                                           {})):
            values = {
                'name': property,
                'namespace_id': namespace_id,
                'json_schema': json.dumps(schema)
            }
            property_id = _get_resource_id(properties_table,
                                           namespace_id, property)
            if not property_id:
                values.update({'created_at': timeutils.utcnow()})
                _insert_data_to_db(properties_table, values)
            elif prefer_new:
                values.update({'updated_at': timeutils.utcnow()})
                _update_data_in_db(properties_table, values,
                                   properties_table.c.id, property_id)

        for object in metadata.get('objects', []):
            values = {
                'name': object['name'],
                'description': object.get('description', None),
                'namespace_id': namespace_id,
                'json_schema': json.dumps(
                    object.get('properties', None))
            }
            object_id = _get_resource_id(objects_table, namespace_id,
                                         object['name'])
            if not object_id:
                values.update({'created_at': timeutils.utcnow()})
                _insert_data_to_db(objects_table, values)
            elif prefer_new:
                values.update({'updated_at': timeutils.utcnow()})
                _update_data_in_db(objects_table, values,
                                   objects_table.c.id, object_id)

        for tag in metadata.get('tags', []):
            values = {
                'name': tag.get('name'),
                'namespace_id': namespace_id,
            }
            tag_id = _get_resource_id(tags_table, namespace_id, tag['name'])
            if not tag_id:
                values.update({'created_at': timeutils.utcnow()})
                _insert_data_to_db(tags_table, values)
            elif prefer_new:
                values.update({'updated_at': timeutils.utcnow()})
                _update_data_in_db(tags_table, values,
                                   tags_table.c.id, tag_id)

        LOG.info(_LI("File %s loaded to database."), file)

    LOG.info(_LI("Metadata loading finished"))

Example 168

Project: heat-templates Source File: hook-puppet.py
def main(argv=sys.argv):
    log = logging.getLogger('heat-config')
    handler = logging.StreamHandler(sys.stderr)
    handler.setFormatter(
        logging.Formatter(
            '[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
    log.addHandler(handler)
    log.setLevel('DEBUG')

    prepare_dir(OUTPUTS_DIR)
    prepare_dir(WORKING_DIR)
    os.chdir(WORKING_DIR)

    c = json.load(sys.stdin)

    use_hiera = c['options'].get('enable_hiera', False)
    use_facter = c['options'].get('enable_facter', True)
    modulepath = c['options'].get('modulepath')
    tags = c['options'].get('tags')
    debug = c['options'].get('enable_debug', False)

    facts = {}
    hiera = {}

    fqdn = get_hostname_f(log)
    if fqdn:
        facts['FACTER_fqdn'] = fqdn

    for input in c['inputs']:
        input_name = input['name']
        input_value = input.get('value', '')
        if use_facter:
            fact_name = 'FACTER_%s' % input_name
            facts[fact_name] = input_value
        if use_hiera:
            hiera[input_name] = input_value

    if use_hiera:
        prepare_dir(HIERA_DATADIR)
        hiera_data = os.path.join(HIERA_DATADIR,
                                  'heat_config_%s.json' % c['name'])
        with os.fdopen(os.open(hiera_data,
                               os.O_CREAT | os.O_TRUNC | os.O_WRONLY, 0o600),
                       'w') as hiera_file:
            hiera_file.write(json.dumps(hiera).encode('utf8'))
        facts['FACTER_deploy_config_name'] = c['name']

    fn = os.path.join(WORKING_DIR, '%s.pp' % c['id'])
    heat_outputs_path = os.path.join(OUTPUTS_DIR, c['id'])
    facts['FACTER_heat_outputs_path'] = heat_outputs_path

    env_debug = ' '.join('%s="%s" ' % (k, v) for k, v in facts.items())

    env = os.environ.copy()
    env.update(facts)

    with os.fdopen(os.open(fn, os.O_CREAT | os.O_TRUNC | os.O_WRONLY, 0o700),
                   'w') as f:
        f.write(c.get('config', '').encode('utf-8'))

    cmd = [PUPPET_CMD, 'apply', '--detailed-exitcodes', fn]
    if modulepath:
        cmd.insert(-1, '--modulepath')
        cmd.insert(-1, modulepath)
    if tags:
        cmd.insert(-1, '--tags')
        cmd.insert(-1, tags)
    if debug:
        cmd.insert(-1, '--debug')

    prepare_dir(PUPPET_LOGDIR)
    timestamp = re.sub('[:T]', '-', c['creation_time'])
    base_path = os.path.join(
        PUPPET_LOGDIR, '{timestamp}-{c[id]}'.format(**locals())
    )
    stdout_log = open('{0}-stdout.log'.format(base_path), 'w')
    stderr_log = open('{0}-stderr.log'.format(base_path), 'w')
    log.debug('Running %s %s' % (env_debug, ' '.join(cmd)))
    try:
        subproc = subprocess.Popen(
            cmd, stdout=stdout_log, stderr=stderr_log, env=env
        )
        subproc.wait()
    except OSError:
        log.warn('puppet not installed yet')
        return
    finally:
        stdout_log.close()
        stderr_log.close()

    log.info('Return code %s' % subproc.returncode)
    response = {}
    for i in 'stdout', 'stderr':
        with open('{0}-{1}.log'.format(base_path, i)) as logfile:
            content = logfile.read()
        if content.strip():
            log.info(content)
        response['deploy_{0}'.format(i)] = content

    # returncode of 2 means there were successful changes
    if subproc.returncode in (0, 2):
        returncode = 0
        log.info('Completed %s' % fn)
    else:
        returncode = subproc.returncode
        log.error("Error running %s. [%s]\n" % (fn, subproc.returncode))

    for output in c.get('outputs') or []:
        output_name = output['name']
        try:
            with open('%s.%s' % (heat_outputs_path, output_name)) as out:
                response[output_name] = out.read()
        except IOError:
            pass

    response.update({
        'deploy_status_code': returncode,
    })
    json.dump(response, sys.stdout)

Example 169

Project: glean Source File: test_glean.py
    @mock.patch('platform.dist', new_callable=mock.Mock)
    @mock.patch('subprocess.call', return_value=0, new_callable=mock.Mock)
    @mock.patch('subprocess.check_output', return_value=0,
                new_callable=mock.Mock)
    @mock.patch('os.unlink', return_value=0, new_callable=mock.Mock)
    @mock.patch('os.symlink', return_value=0, new_callable=mock.Mock)
    @mock.patch('os.path.exists', new_callable=mock.Mock)
    @mock.patch('os.listdir', new_callable=mock.Mock)
    @mock.patch('os.system', return_value=0, new_callable=mock.Mock)
    @mock.patch('glean.cmd.open', new_callable=mock.Mock)
    @mock.patch.object(sys, 'argv', ['./glean', '--hostname'])
    def _assert_distro_provider(self, distro, provider, interface,
                                mock_open,
                                mock_os_system,
                                mock_os_listdir,
                                mock_os_path_exists,
                                mock_os_symlink,
                                mock_os_unlink,
                                mock_check_output,
                                mock_call,
                                mock_platform_dist):
        """Main test function

        :param distro: distro to return from "platform.dist"
        :param provider: we will look in fixtures/provider for mocked
                         out files
        :param interface: --interface argument; None for no argument
        """

        mock_platform_dist.return_value = (distro, '', '')

        # These functions are watching the path and faking results
        # based on various things
        # XXX : There are several virtual file-systems available, we
        # might like to look into them and just point ourselves at
        # testing file-systems in the future if this becomes more
        # complex.
        mock_os_path_exists.side_effect = functools.partial(
            self.os_path_exists_side_effect, provider)
        mock_os_listdir.side_effect = functools.partial(
            self.os_listdir_side_effect, provider)
        mock_open.side_effect = functools.partial(
            self.open_side_effect, provider)

        if interface:
            sys.argv.append('--interface=%s' % interface)

        cmd.main()

        output_filename = '%s.%s.network.out' % (provider, distro.lower())
        output_path = os.path.join(sample_data_path, 'test', output_filename)

        # Generate a list of (dest, content) into write_blocks to assert
        write_blocks = []
        lines = open(output_path).readlines()
        write_dest = None
        write_content = None
        for line in lines:
            if line.startswith('### Write '):
                if write_dest is not None:
                    write_blocks.append((write_dest, write_content))
                write_dest = line[len('### Write '):-1]
                write_content = ''
            else:
                write_content += line
        if write_dest is not None:
            write_blocks.append((write_dest, write_content))

        for dest, content in write_blocks:
            if interface and interface not in dest:
                continue
            self.assertNotIn("eth2", dest)
            self.assertIn(dest, self.file_handle_mocks)
            write_handle = self.file_handle_mocks[dest].write
            write_handle.assert_called_once_with(content)

        if self._resolv_unlinked:
            mock_os_unlink.assert_called_once_with('/etc/resolv.conf')

        # Check hostname
        meta_data_path = 'mnt/config/openstack/latest/meta_data.json'
        hostname = None
        with open(os.path.join(sample_data_path, provider,
                               meta_data_path)) as fh:
            meta_data = json.load(fh)
            hostname = meta_data['name']

        mock_call.assert_has_calls([mock.call(['hostname', hostname])])
        if distro.lower() is 'gentoo':
            (self.file_handle_mocks['/etc/conf.d/hostname'].write.
                assert_has_calls([mock.call(hostname)]))
        else:
            self.file_handle_mocks['/etc/hostname'].write.assert_has_calls(
                [mock.call(hostname), mock.call('\n')])

        # Check hosts entry
        hostname_ip = ips[provider]
        calls = [mock.call('%s %s\n' % (hostname_ip, hostname)), ]
        short_hostname = hostname.split('.')[0]
        if hostname != short_hostname:
            calls.append(mock.call('%s %s\n' % (hostname_ip, short_hostname)))

        self.file_handle_mocks['/etc/hosts'].write.assert_has_calls(
            calls, any_order=True)

Example 170

Project: billy Source File: update.py
def main():
    try:
        parser = argparse.ArgumentParser(
            description='update billy data',
            parents=[base_arg_parser],
        )

        what = parser.add_argument_group(
            'what to scrape', 'flags that help select what data to scrape')
        scrape = parser.add_argument_group('scraper config',
                                           'settings for the scraper')

        parser.add_argument('module', type=str, help='scraper module (eg. nc)')
        parser.add_argument('--pdb', action='store_true', default=False,
                            help='invoke PDB when exception is raised')
        parser.add_argument('--ipdb', action='store_true', default=False,
                            help='invoke PDB when exception is raised')
        parser.add_argument('--pudb', action='store_true', default=False,
                            help='invoke PUDB when exception is raised')
        what.add_argument('-s', '--session', action='append',
                          dest='sessions', default=[],
                          help='session(s) to scrape')
        what.add_argument('-t', '--term', action='append', dest='terms',
                          help='term(s) to scrape', default=[])

        for arg in ('upper', 'lower'):
            what.add_argument('--' + arg, action='append_const',
                              dest='chambers', const=arg)
        for arg in ('bills', 'legislators', 'committees',
                    'votes', 'events', 'speeches'):
            what.add_argument('--' + arg, action='append_const', dest='types',
                              const=arg)
        for arg in ('scrape', 'import', 'report', 'session-list'):
            parser.add_argument('--' + arg, dest='actions',
                                action="append_const", const=arg,
                                help='only run %s step' % arg)

        # special modes for debugging
        scrape.add_argument('--nonstrict', action='store_false', dest='strict',
                            default=True, help="don't fail immediately when"
                            " encountering validation warning")
        scrape.add_argument('--fastmode', help="scrape in fast mode",
                            action="store_true", default=False)

        # scrapelib overrides
        scrape.add_argument('-r', '--rpm', action='store', type=int,
                            dest='SCRAPELIB_RPM')
        scrape.add_argument('--timeout', action='store', type=int,
                            dest='SCRAPELIB_TIMEOUT')
        scrape.add_argument('--retries', type=int,
                            dest='SCRAPELIB_RETRY_ATTEMPTS')
        scrape.add_argument('--retry_wait', type=int,
                            dest='SCRAPELIB_RETRY_WAIT_SECONDS')

        args = parser.parse_args()

        if args.pdb or args.pudb or args.ipdb:
            _debugger = pdb
            if args.pudb:
                try:
                    import pudb
                    _debugger = pudb
                except ImportError:
                    pass
            if args.ipdb:
                try:
                    import ipdb
                    _debugger = ipdb
                except ImportError:
                    pass

            # turn on PDB-on-error mode
            # stolen from http://stackoverflow.com/questions/1237379/
            # if this causes problems in interactive mode check that page
            def _tb_info(type, value, tb):
                traceback.print_exception(type, value, tb)
                _debugger.pm()
            sys.excepthook = _tb_info

        # inject scraper paths so scraper module can be found
        for newpath in settings.SCRAPER_PATHS:
            sys.path.insert(0, newpath)

        # get metadata
        module = importlib.import_module(args.module)
        metadata = module.metadata
        module_settings = getattr(module, 'settings', {})
        abbrev = metadata['abbreviation']

        # load module settings, then command line settings
        settings.update(module_settings)
        settings.update(args)

        # make output dir
        args.output_dir = os.path.join(settings.BILLY_DATA_DIR, abbrev)

        # if terms aren't set, use latest
        if not args.terms:
            if args.sessions:
                for session in args.sessions:
                    args.terms.append(
                        term_for_session(metadata['abbreviation'], session,
                                         metadata))
                args.terms = list(set(args.terms or []))
            else:
                latest_term = metadata['terms'][-1]['name']
                args.terms = [latest_term]
        # only set sessions from terms if sessions weren't set
        elif not args.sessions:
            for term in metadata['terms']:
                if term['name'] in args.terms:
                    args.sessions.extend(term['sessions'])
            # dedup sessions
            args.sessions = list(set(args.sessions or []))

        if not args.sessions:
            args.sessions = [metadata['terms'][-1]['sessions'][-1]]

        # determine chambers
        if not args.chambers:
            args.chambers = ['upper', 'lower']

        if not args.actions:
            args.actions = ['scrape', 'import', 'report']

        if not args.types:
            args.types = ['bills', 'legislators', 'votes', 'committees',
                          'alldata']

            if 'events' in metadata['feature_flags']:
                args.types.append('events')

            if 'speeches' in metadata['feature_flags']:
                args.types.append('speeches')

        plan = """billy-update abbr=%s
    actions=%s
    types=%s
    sessions=%s
    terms=%s""" % (args.module, ','.join(args.actions), ','.join(args.types),
                   ','.join(args.sessions), ','.join(args.terms))
        logging.getLogger('billy').info(plan)

        scrape_data = {}

        if 'scrape' in args.actions:
            _clear_scraped_data(args.output_dir)

            # validate then write metadata
            if hasattr(module, 'session_list'):
                session_list = module.session_list()
            else:
                session_list = []
            check_sessions(metadata, session_list)

            try:
                schema_path = os.path.join(os.path.split(__file__)[0],
                                           '../schemas/metadata.json')
                schema = json.load(open(schema_path))

                validator = DatetimeValidator()
                validator.validate(metadata, schema)
            except ValueError as e:
                logging.getLogger('billy').warning(
                    'metadata validation error: ' + str(e))

            run_record = []
            exec_record = {
                "run_record": run_record,
                "args": sys.argv,
            }

            lex = None
            exc_traceback = None

            # start to run scrapers
            exec_start = dt.datetime.utcnow()

            # scraper order matters
            order = ('legislators', 'committees', 'votes', 'bills',
                     'events', 'speeches')
            _traceback = None
            try:
                for stype in order:
                    if stype in args.types:
                        run_record += _run_scraper(stype, args, metadata)
            except Exception as e:
                _traceback = _, _, exc_traceback = sys.exc_info()
                run_record += [{"exception": e, "type": stype}]
                lex = e

            exec_end = dt.datetime.utcnow()
            exec_record['started'] = exec_start
            exec_record['ended'] = exec_end
            scrape_data['scraped'] = exec_record
            scrape_data['abbr'] = abbrev

            for record in run_record:
                if "exception" in record:
                    ex = record['exception']
                    fb = traceback.format_exception(*_traceback)
                    trace = ""
                    for t in fb:
                        trace += t
                    record['exception'] = {
                        "type": ex.__class__.__name__,
                        "message": ex.message,
                        'traceback': trace
                    }
                    scrape_data['failure'] = True
            if lex:
                if 'import' in args.actions:
                    try:
                        db.billy_runs.save(scrape_data, safe=True)
                    except Exception:
                        raise lex, None, exc_traceback
                        # XXX: This should *NEVER* happen, but it has
                        # in the past, so we're going to catch any errors
                        # writing # to pymongo, and raise the original
                        # exception rather then let it look like Mongo's fault.
                        # Thanks for catching this, Thom.
                        #
                        # We lose the stack trace, but the Exception is the
                        # same in every other way.
                        #  -- paultag
                raise

        # imports
        if 'import' in args.actions:
            import_report = _do_imports(abbrev, args)
            scrape_data['imported'] = import_report
            # We're tying the run-logging into the import stage - since import
            # already writes to the DB, we might as well throw this in too.
            db.billy_runs.save(scrape_data, safe=True)

        # reports
        if 'report' in args.actions:
            _do_reports(abbrev, args)

        if 'session-list' in args.actions:
            if hasattr(module, 'session_list'):
                print("\n".join(module.session_list()))
            else:
                raise ScrapeError('session_list() is not defined')

    except ScrapeError as e:
        logging.getLogger('billy').critical('Error: %s', e)
        sys.exit(1)

Example 171

Project: otm-core Source File: perform_migration.py
    def handle(self, *args, **options):

        if settings.DEBUG:
            self.stdout.write('In order to run this command you must manually'
                              'set DEBUG=False in your settings file. '
                              'Unfortunately, django runs out of memory when '
                              'this command is run in DEBUG mode.')
            return 1

        if options['config_file']:
            config_data = json.load(open(options['config_file'], 'r'))
            for k, v in config_data.items():
                if not options.get(k, None):
                    options[k] = v

        rule_module = (options['rule_module'] or
                       'otm1_migrator.migration_rules.standard_otm1')
        migration_mod = importlib.import_module(rule_module)
        migration_rules = migration_mod.MIGRATION_RULES
        try:
            model_order = migration_mod.MODEL_ORDER
        except AttributeError:
            model_order = ORDER
        try:
            udfs = migration_mod.UDFS
        except AttributeError:
            udfs = {}

        # user photos live on userprofile in otm1
        userphoto_path = options.get('userphoto_path', None)
        user_photo_fixture_specified_but_not_base_path = (
            userphoto_path is None and
            options.get('userphoto_fixture') is not None)

        if user_photo_fixture_specified_but_not_base_path:
            raise MigrationException('Must specify the user photo path to '
                                     'import photos. please include a %s or '
                                     '%s flag when importing.'
                                     % USERPHOTO_ARGS)

        treephoto_path = options.get('treephoto_path', None)
        treephoto_fixture_with_no_path = (
            treephoto_path is None and
            options.get('treephoto_fixture') is not None)

        if treephoto_fixture_with_no_path:
            raise MigrationException('Must specify the tree photo path to '
                                     'import photo')

        ################################################
        # BEGIN SIDE EFFECTS
        ################################################

        migration_event = MigrationEvent.objects.create()

        if options['instance']:
            # initialize system_user??
            instance, __ = self.setup_env(*args, **options)
        else:
            migration_event.status = MigrationEvent.FAILURE
            migration_event.save()
            self.stdout.write('Invalid instance provided.')
            return 1

        create_udfs(udfs, instance)
        add_udfs_to_migration_rules(migration_rules, udfs, instance)

        relic_ids = {model: {} for model in migration_rules}

        def default_partial(fn, *args):
            return partial(fn, migration_rules, migration_event, *args)

        # TODO: should this be merged into MIGRATION_RULES?
        process_fns = {
            'boundary': default_partial(save_boundary),
            'user': default_partial(save_user),
            'audit': default_partial(save_audit, relic_ids),
            'species': default_partial(save_species),
            'plot': default_partial(save_plot),
            'tree': default_partial(save_tree),
            'treephoto': default_partial(save_treephoto, treephoto_path),
            'contenttype': default_partial(process_contenttype),
            'reputation': default_partial(process_reputation),
            'registrationprofile': default_partial(save_registrationprofile),
            'userprofile': default_partial(process_userprofile,
                                           userphoto_path),
            'threadedcomment': default_partial(save_threadedcomment,
                                               relic_ids),
            'comment': default_partial(save_comment, relic_ids),
            'treefavorite': default_partial(save_treefavorite),
        }

        user_relics = OTM1UserRelic.objects.filter(instance=instance)
        model_relics = (OTM1ModelRelic
                        .objects
                        .filter(instance=instance)
                        .iterator())

        comment_relics = (OTM1CommentRelic
                          .objects
                          .filter(instance=instance)
                          .iterator())

        def _rpad_string(desired_length, pad_char, string):
            return string + (desired_length - len(string)) * pad_char

        self.stdout.write(_rpad_string(50, ".", "Reading relics into memory"))
        # depedency_ids is a cache of old pks to new pks, it is inflated
        # from database records for performance.
        for relic in chain(user_relics, model_relics, comment_relics):
            model = relic.otm2_model_name
            otm1_id = relic.otm1_model_id
            relic_ids[model][otm1_id] = relic.otm2_model_id
        self.stdout.write(_rpad_string(50, ".",
                                       "Done reading relics into memory"))

        def _get_json_dict(model_name):
            """
            look for fixtures of the form '<model>_fixture' that
            were passed in as command line args and load them as
            python objects
            """
            option_name = model_name + '_fixture'
            if options[option_name] and os.path.exists(options[option_name]):
                model_file = open(options[option_name], 'r')
                self.stdout.write(
                    "%sSUCCESS" %
                    _rpad_string(50, ".",
                                 "Loaded fixture '%s'" % option_name))
                json_dict = json.load(model_file)
                model_file.close()
            else:
                self.stdout.write(
                    "%sSKIPPING" %
                    _rpad_string(50, ".",
                                 "No valid '%s' fixture " % model_name))
                json_dict = None
            return json_dict

        for model in model_order:
            json_dict = _get_json_dict(model)
            if json_dict:
                # dicts must be sorted by pk for the case of models
                # that have foreign keys to themselves
                sorted_dicts = sorted(json_dict,
                                      key=operator.itemgetter('pk'))
                try:
                    save_objects(migration_rules,
                                 model, sorted_dicts,
                                 relic_ids,
                                 process_fns[model],
                                 instance,
                                 message_receiver=print)
                except MigrationException:
                    migration_event.status = MigrationEvent.FAILURE
                    migration_event.save()
                    raise

        migration_event.status = MigrationEvent.SUCCESS
        migration_event.save()

Example 172

Project: labuildings Source File: convert.py
def convert(buildingsFile, osmOut):
    with open(buildingsFile) as f:
        features = json.load(f)
    allAddresses = {}
    buildings = []
    buildingShapes = []
    buildingIdx = index.Index()

    # Returns the coordinates for this address
    def keyFromAddress(address):
        return str(address['geometry']['coordinates'][0]) + "," + str(address['geometry']['coordinates'][1])

    for feature in features:
        if feature['geometry']['type'] == 'Polygon' or feature['geometry']['type'] == 'MultiPolygon':
            extra_tags = osm_tags.get_osm_tags(feature)
            feature['properties']['osm'] = extra_tags
            buildings.append(feature)
            shape = asShape(feature['geometry'])
            buildingShapes.append(shape)
            buildingIdx.add(len(buildingShapes) - 1, shape.bounds)

        # These are the addresses that don't overlap any buildings
        elif feature['geometry']['type'] == 'Point':
            # The key is the coordinates of this address. Track how many addresses share these coords.
            key = keyFromAddress(feature)
            if key in allAddresses:
                allAddresses[key].append(feature)
            else:
                allAddresses[key] = [feature]

        else:
            print "geometry of unknown type:", feature['geometry']['type']

    # Generates a new osm id.
    osmIds = dict(node = -1, way = -1, rel = -1)
    def newOsmId(type):
        osmIds[type] = osmIds[type] - 1
        return osmIds[type]

    ## Formats multi part house numbers
    def formatHousenumber(p):
        def suffix(part1, part2, hyphen_type=None):
            #part1 = stripZeroes(part1)
            if not part2:
                return str(part1)
            #part2 = stripZeroes(part2)
            return str(part1) + ' ' + str(part2)
        #def stripZeroes(addr): # strip leading zeroes from numbers
        #    if addr.isdigit():
        #        addr = str(int(addr))
        #    if '-' in addr:
        #        try:
        #            addr2 = addr.split('-')
        #            if len(addr2) == 2:
        #                addr = str(int(addr2[0])) + '-' + str(int(addr2[1])).zfill(2)
        #        except:
        #            pass
        #    return addr
        number = suffix(p['Number'], p['NumSuffix'])
        if p['NumPrefix']:
            number = p['NumPrefix'] + number
        return number

    # Converts an address
    def convertAddress(address):
        result = dict()
        if all (k in address for k in ('Number', 'StreetName')):
            if address['Number']:
                result['addr:housenumber'] = formatHousenumber(address)
            if address['StreetName']:

                # Titlecase
                streetname = address['StreetName'].title()
                if address['StArticle']:
                    streetname = address['StArticle'].title() + " " + streetname
                if address['PreType']:
                    streetname = address['PreType'].title() + " " + streetname
                if address['PreDir']:
                    streetname = address['PreDir'].title() + " " + streetname
                if address['PreMod']:
                    streetname = address['PreMod'].title() + " " + streetname
                if address['PostType']:
                    streetname = streetname + " " + address['PostType'].title()
                if address['PostDir']:
                    streetname = streetname + " " + address['PostDir'].title()
                if address['PostMod']:
                    streetname = streetname + " " + address['PostMod'].title()

                # Fix titlecase on 1St, 2Nd, 3Rd, 4Th, etc
                streetname = re.sub(r"(.*)(\d+)St\s*(.*)", r"\1\2st \3", streetname)
                streetname = re.sub(r"(.*)(\d+)Nd\s*(.*)", r"\1\2nd \3", streetname)
                streetname = re.sub(r"(.*)(\d+)Rd\s*(.*)", r"\1\2rd \3", streetname)
                streetname = re.sub(r"(.*)(\d+)Th\s*(.*)", r"\1\2th \3", streetname)

                # Expand 'St ' -> 'Saint'
                # relevant for:
                #   'St Clair'
                #   'St Louis'
                #   'St James'
                #   'St James Park'
                #   'St Andrews'
                #   'St Nicolas'
                #   'St Cloud'
                #   'St Ambrose'
                #   'St Bonaventure'
                #   'St Joseph'
                #   'St Tropez'
                if streetname[0:3] == 'St ': streetname = 'Saint ' + streetname[3:]
                # Middle name expansions
                streetname = streetname.replace(' St ', ' Street ')
                streetname = streetname.replace(' Rd ', ' Road ')
                streetname = streetname.replace(' Blvd ', ' Boulevard ')
                result['addr:street'] = streetname
            if address['PCITY1']:
                result['addr:city'] = address['PCITY1'].title()
            elif address['LegalComm']:
                result['addr:city'] = address['LegalComm'].title()
            if address['ZipCode']:
                result['addr:postcode'] = str(int(address['ZipCode']))
            if address['UnitName']:
                result['addr:unit'] = address['UnitName']
        return result

    # Distills coincident addresses into one address where possible.
    # Takes an array of addresses and returns an array of 1 or more addresses
    def distillAddresses(addresses):
        # Only distill addresses if the following conditions are true:
        # 1) the addresses share the same coordinates.
        # AND
        # 2a) all the attributes are the same _except_ the unit number/name
        # OR
        # 2b) the street number is the same but the street names are referring to the same thing

        outputAddresses = []

        # First, group the addresses into separate lists for each unique location
        addressesByCoords = {}
        for address in addresses:
            key = keyFromAddress(address)
            if key in addressesByCoords:
                addressesByCoords[key].append(address)
            else:
                addressesByCoords[key] = [address]

        # loop over unique coordinates
        for key in addressesByCoords:
            # Here see if we can collapse any of these addresses at the same coords.

            # addressesByCoords[key] is an array of addresses at this location.

            # We are only looking for the 2 possibilities above (2a) and (2b).
            # If the situation is more complicated, change nothing.
            outputAddresses.extend(distillAddressesAtPoint(addressesByCoords[key]))

        return outputAddresses

    # This function is called by distillAddresses.
    # It assumes all addresses are at the same coordinates.
    # Returns an array of 1 or more addresses
    def distillAddressesAtPoint(addresses):

        if len(addresses) == 1:
            return addresses

        firstAddress = addresses[0]

        # (2a) If the first address is an apartment, see if all the rest are too.

        # NOTE: sometimes an apartment building has a few address points that lack a UnitName...
        # ...so checking for the presence of UnitName in firstAddress wouldn't always work.
        props = firstAddress['properties']
        if debug: print "Testing to see if these are apartments...", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
        # Compare subsequent addresses in the array to the first address.
        # Hence, range starts at 1.
        for i in range(1, len(addresses)):
            if not areSameAddressExceptUnit(firstAddress, addresses[i]):
                props = addresses[i]['properties']
                if debug: print "No, this address was different...........", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
                #print firstAddress
                #print addresses[i]
                break
            # else, keep going

        else: # else for the `for` statement. Executes only if `break` never did.
            # We checked them all, and they're all the same except UnitName.
            # In this case the apartment data is useless to OSM because the
            # apartment nodes are all on top of each other.
            # So, discard the unit information and return just one address.
            firstAddress['properties']['UnitName'] = None
            if debug: print "Yes they were apartments! Collapsed", len(addresses), "into one"
            return [firstAddress]

        # (2b) Check if the street number is all the same.
        # For this, we use a list of alternative names (like HWY 1, etc)...
        # ...and we need to know which canonical name to keep.
        if debug: print "Testing to see if the street names are synonyms.."
        canonicalStreetName = None
        for i in range(1, len(addresses)):
            props = addresses[i]['properties']
            if not areSameAddressExceptStreet(firstAddress, addresses[i]):
                if debug: print "No, this address was different...........", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
                #print firstAddress
                #print addresses[i]
                break
            compoundStreetName = (str(props['PreType']),str(props['StreetName']),str(props['PostType']))
            currentCanonicalStreetName = getCanonicalName(compoundStreetName)
            if currentCanonicalStreetName:
                if debug: print "found canonical name", currentCanonicalStreetName
                if ((currentCanonicalStreetName == canonicalStreetName) or (canonicalStreetName == None)):
                    canonicalStreetName = currentCanonicalStreetName
                else:
                    if debug: print "canonicalStreetNames didn't match:", canonicalStreetName, currentCanonicalStreetName
                    break
            else:
                print "couldn't find canonicalStreetName for", compoundStreetName
                break

        else: # else for the `for` statement. Executes only if `break` never did.
            # We checked them all, and they're all the same except StreetName.
            # If we can determine that they are all the same synonym, we can
            # overwrite the other streetname information and return just one address.
            firstAddress['properties']['PreType'] = canonicalStreetName[0]
            firstAddress['properties']['StreetName'] = canonicalStreetName[1]
            firstAddress['properties']['PostType'] = canonicalStreetName[2]
            if debug: print "Yes they were synonyms! Collapsed", len(addresses), "into one"
            return [firstAddress]

        # This is only excuted if neither of the two `else` statements executed 
        # for the two `for` statements above. That means we were unable to collapse
        # separate apartments into one, or collapse synonymous street names into one.
        # So, instead of returning just one address, we fail and return all of them.
        return addresses

    def areSameAddressExceptUnit(a1, a2):
        for key in ['NumPrefix', 'Number', 'NumSuffix', 'PreMod', 'PreDir', 'PreType', 'StArticle', 'StreetName', 'PostType', 'PostDir', 'PostMod', 'ZipCode', 'LegalComm', 'PCITY1']:
            if a1['properties'][key] != a2['properties'][key]:
                #print key, a1['properties'][key], "!=", a2['properties'][key]
                return False
        return True

    def areSameAddressExceptStreet(a1, a2):
        for key in ['NumPrefix', 'Number', 'NumSuffix', 'PreMod', 'PreDir', 'StArticle', 'UnitName', 'PostDir', 'PostMod', 'ZipCode', 'LegalComm', 'PCITY1']:
            if a1['properties'][key] != a2['properties'][key]:
                #print key, a1['properties'][key], "!=", a2['properties'][key]
                return False
        return True

    # Sometimes we have identical addresses that differ only by street name.
    # Usually these are because the street name is also a highway. We want to 
    # remove all the highway names and only use the street name for the address
    canonicalNames = {
        ("None", "LINCOLN", "BOULEVARD"): (None, "LINCOLN", "BOULEVARD"),
        ("ROUTE", "1", "None"): (None, "LINCOLN", "BOULEVARD"),
        ("HIGHWAY", "1", "None"): (None, "LINCOLN", "BOULEVARD"),
        ("None", "SR-1", "None"): (None, "LINCOLN", "BOULEVARD"),
        ("None", "PCH", "None"): (None, "LINCOLN", "BOULEVARD"),
    }

    def getCanonicalName(compoundStreetName):
        result = None
        try:
            result = canonicalNames[compoundStreetName]
        except KeyError:
            return None
        return result

    # Appends new node or returns existing if exists.
    nodes = {}
    def appendNewNode(coords, osmXml):
        rlon = int(float(coords[0]*10**7))
        rlat = int(float(coords[1]*10**7))
        if (rlon, rlat) in nodes:
            return nodes[(rlon, rlat)]
        node = etree.Element('node', visible = 'true', id = str(newOsmId('node')))
        node.set('lon', str(Decimal(coords[0])*Decimal(1)))
        node.set('lat', str(Decimal(coords[1])*Decimal(1)))
        nodes[(rlon, rlat)] = node
        osmXml.append(node)
        return node

    # Sometimes we want to force overlapping nodes, such as with addresses.
    # This way they'll show up in JOSM and the contributor can deal with them manually.
    # Otherwise, we might try to apply multiple address tags to the same node...
    # ...which is also incorrect, but harder to detect.
    def appendNewNodeIgnoringExisting(coords, osmXml):
        rlon = int(float(coords[0]*10**7))
        rlat = int(float(coords[1]*10**7))
        #if (rlon, rlat) in nodes:
        #    return nodes[(rlon, rlat)]
        node = etree.Element('node', visible = 'true', id = str(newOsmId('node')))
        node.set('lon', str(Decimal(coords[0])*Decimal(1)))
        node.set('lat', str(Decimal(coords[1])*Decimal(1)))
        nodes[(rlon, rlat)] = node
        osmXml.append(node)
        return node

    def appendNewWay(coords, intersects, osmXml):
        way = etree.Element('way', visible='true', id=str(newOsmId('way')))
        firstNid = 0
        for i, coord in enumerate(coords):
            if i == 0: continue # the first and last coordinate are the same
            node = appendNewNode(coord, osmXml)
            if i == 1: firstNid = node.get('id')
            way.append(etree.Element('nd', ref=node.get('id')))

            # Check each way segment for intersecting nodes
            int_nodes = {}
            try:
                line = LineString([coord, coords[i+1]])
            except IndexError:
                line = LineString([coord, coords[1]])
            for idx, c in enumerate(intersects):
                if line.buffer(0.000001).contains(Point(c[0], c[1])) and c not in coords:
                    t_node = appendNewNode(c, osmXml)
                    for n in way.iter('nd'):
                        if n.get('ref') == t_node.get('id'):
                            break
                    else:
                        int_nodes[t_node.get('id')] = Point(c).distance(Point(coord))
            for n in sorted(int_nodes, key=lambda key: int_nodes[key]): # add intersecting nodes in order
                way.append(etree.Element('nd', ref=n))
            
        way.append(etree.Element('nd', ref=firstNid)) # close way
        osmXml.append(way)
        return way

    # Appends an address to a given node or way.
    def appendAddress(address, element):
    #    # Need to check if these tags already exist on this element
        for k, v in convertAddress(address['properties']).iteritems():
            # TODO: is this doing anything useful?
            #for child in element:
            #    if child.tag == 'tag':
            #        #print k, v
            #        if child.attrib.get('k') == k:
            #            print "found key", k
            #            if child.attrib.get('v') == v:
            #                print "found matching value", v
           element.append(etree.Element('tag', k=k, v=v))

    # Appends a building to a given OSM xml docuement.
    def appendBuilding(building, shape, address, osmXml):
        # Check for intersecting buildings
        intersects = []
        for i in buildingIdx.intersection(shape.bounds):
            try:
                for c in buildingShapes[i].exterior.coords:
                    if Point(c[0], c[1]).buffer(0.000001).intersects(shape):
                        intersects.append(c)
            except AttributeError:
                for c in buildingShapes[i][0].exterior.coords:
                    if Point(c[0], c[1]).buffer(0.000001).intersects(shape):
                        intersects.append(c)

        # Export building, create multipolygon if there are interior shapes.
        interiors = []
        try:
            way = appendNewWay(list(shape.exterior.coords), intersects, osmXml)
            for interior in shape.interiors:
                interiors.append(appendNewWay(list(interior.coords), [], osmXml))
        except AttributeError:
            way = appendNewWay(list(shape[0].exterior.coords), intersects, osmXml)
            for interior in shape[0].interiors:
                interiors.append(appendNewWay(list(interior.coords), [], osmXml))
        if len(interiors) > 0:
            relation = etree.Element('relation', visible='true', id=str(newOsmId('way')))
            relation.append(etree.Element('member', type='way', role='outer', ref=way.get('id')))
            for interior in interiors:
                relation.append(etree.Element('member', type='way', role='inner', ref=interior.get('id')))
            relation.append(etree.Element('tag', k='type', v='multipolygon'))
            osmXml.append(relation)
            way = relation
        for tag in building['properties']['osm']:
            value = building['properties']['osm'][tag]
            way.append(etree.Element('tag', k=tag, v=value))
        # if 'GeneralUse' in building['properties']:
        #     way.append(etree.Element('tag', k='building', v=building['properties']['GeneralUse']))
        # else:
        #     way.append(etree.Element('tag', k='building', v='yes'))
        # if 'SpecificUs' in building['properties']:
        #     way.append(etree.Element('tag', k='building:use', v=building['properties']['GeneralUse']))
        if 'YearBuilt' in building['properties'] and building['properties']['YearBuilt'] is not None:
            YearBuilt = int(building['properties']['YearBuilt'])
            if YearBuilt > 0:
                    way.append(etree.Element('tag', k='start_date', v=str(YearBuilt)))
        # if 'Specific_1' in building['properties']:
        #         way.append(etree.Element('tag', k='amenity', v=building['properties']['Specific_1']))
        if 'Units' in building['properties'] and building['properties']['Units'] is not None:
            units = int(round(float(building['properties']['Units']), 0))
            if units > 0:
                way.append(etree.Element('tag', k='building:units', v=str(units)))
        if 'HEIGHT' in building['properties']:
            height = round(((building['properties']['HEIGHT'] * 12) * 0.0254), 1)
            if height > 0:
                way.append(etree.Element('tag', k='height', v=str(height)))
        if 'ELEV' in building['properties']:
            elevation = round(((building['properties']['ELEV'] * 12) * 0.0254), 1)
            if elevation > 0:
                way.append(etree.Element('tag', k='ele', v=str(elevation)))
        if 'BLD_ID' in building['properties']:
            way.append(etree.Element('tag', k='lacounty:bld_id', v=str(building['properties']['BLD_ID'])))
        if 'AIN' in building['properties'] and building['properties']['AIN'] is not None:
            way.append(etree.Element('tag', k='lacounty:ain', v=str(building['properties']['AIN'])))
#        if address:
#            appendAddress(address, way)

    # Export buildings & addresses. Only export address with building if there is exactly
    # one address per building. Export remaining addresses as individual nodes.
    # The remaining addresses are added to a dictionary hashed by their coordinates.
    # This way we catch any addresses that have the same coordinates.
    osmXml = etree.Element('osm', version='0.6', generator='[email protected]')
    for i in range(0, len(buildings)):

        buildingAddresses = []
        for address in buildings[i]['properties']['addresses']:
            buildingAddresses.append(address)
        address = None
        if len(buildingAddresses) == 1:
            # There's only one address in the building footprint
            address = buildingAddresses[0]
        elif len(buildingAddresses) > 1:
            # If there are multiple addresses, first try to distill them.
            # If we can distill them to one address, we can still add it to this building.
            distilledAddresses = distillAddresses(buildingAddresses)
            if len(distilledAddresses) == 1:
                # We distilled down to one address. Add it to the building.
                address = distilledAddresses[0]
            else:
                # We could not distilled down to one address. Instead export as nodes.
                for address in distilledAddresses:
                    # The key is the coordinates of this address. Track how many addresses share these coords.
                    key = keyFromAddress(address)
                    if key in allAddresses:
                        allAddresses[key].append(address)
                    else:
                        allAddresses[key] = [address]

        appendBuilding(buildings[i], buildingShapes[i], address, osmXml)


    # Export any addresses that aren't the only address for a building.
    if (len(allAddresses) > 0):

        # Iterate over the list of distinct coordinates found in the address data
        for coordskey in allAddresses:
            # if a distinct coordinate has only one associated address,
            # then export that address as a new node
            if len(allAddresses[coordskey]) == 1:
                address = allAddresses[coordskey][0]
                coordinates = address['geometry']['coordinates']
#                node = appendNewNode(coordinates, osmXml) # returns old node if one exists at these coords
#                appendAddress(address, node)

            # If there is more than one address at these coordinates, do something.
            # ...but do what exactly?
            else:
                distilledAddresses = distillAddresses(allAddresses[coordskey])
                if len(distilledAddresses) == 1:
                    # We distilled down to one address. Append it.
                    address = distilledAddresses[0]
                    coordinates = address['geometry']['coordinates']
#                    node = appendNewNode(coordinates, osmXml) # returns old node if one exists at these coords
#                    appendAddress(address, node)
                else:
                    if debug: print "found duplicate coordinates that could not be distilled:", coordskey, "has", len(allAddresses[coordskey]), "addresses"
                    if debug: print '\t'.join(["num", "numsufx", "pretype", "street", "posttype", "unit"])
                    for address in distilledAddresses:
                        # TODO: do something smart here. These are overlapping addresses that we couldn't distill.
                        # TODO: maybe jitter them, or leave stacked but with FIXME?
                        # TODO: For now, we use appendNewNodeIgnoringExisting to pile the nodes on top of each other.
                        #print address
                        props = address['properties']
                        if debug: print '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])])
                        coordinates = address['geometry']['coordinates']
#                        node = appendNewNodeIgnoringExisting(coordinates, osmXml) # Force overlapping nodes so JOSM will catch them
#                        appendAddress(address, node)

    with open(osmOut, 'w') as outFile:
        outFile.writelines(tostring(osmXml, pretty_print=True, xml_declaration=True, encoding='UTF-8'))
        print 'Exported ' + osmOut

Example 173

Project: nycbuildings Source File: convert.py
def convert(buildingsFile, osmOut):
    with open(buildingsFile) as f:
        buildings = json.load(f)
    buildingShapes = []
    buildingIdx = index.Index()
    for building in buildings:
        shape = asShape(building['geometry'])
        buildingShapes.append(shape)
        buildingIdx.add(len(buildingShapes) - 1, shape.bounds)

    # Generates a new osm id.
    osmIds = dict(node = -1, way = -1, rel = -1)
    def newOsmId(type):
        osmIds[type] = osmIds[type] - 1
        return osmIds[type]

    ## Formats multi part house numbers
    def formatHousenumber(p):
        def suffix(part1, part2, hyphen_type=None):
            part1 = stripZeroes(part1)
            if not part2:
                return str(part1)
            part2 = stripZeroes(part2)
            if hyphen_type == 'U': # unit numbers
                return part1 + '-' + part2
            if len(part2) == 1 and part2.isalpha(): # single letter extensions
                return part1 + part2
            return part1 + ' ' + part2 # All others
        def stripZeroes(addr): # strip leading zeroes from numbers
            if addr.isdigit():
                addr = str(int(addr))
            if '-' in addr:
                try:
                    addr2 = addr.split('-')
                    if len(addr2) == 2:
                        addr = str(int(addr2[0])) + '-' + str(int(addr2[1])).zfill(2)
                except:
                    pass
            return addr
        number = suffix(p['HOUSE_NUMB'], p['HOUSE_NU_1'], p['HYPHEN_TYP'])
        return number

    # Converts an address
    def convertAddress(address):
        result = dict()
        if all (k in address for k in ('HOUSE_NUMB', 'STREET_NAM')):
            if address['HOUSE_NUMB']:
                result['addr:housenumber'] = formatHousenumber(address)
            if address['STREET_NAM']:
                streetname = address['STREET_NAM'].title()
                streetname = streetname.replace('F D R ', 'FDR ')
                # Expand Service Road
                # See https://github.com/osmlab/nycbuildings/issues/30
                streetname = re.sub(r"(.*)\bSr\b(.*)", r"\1Service Road\2", streetname)
                # Expand cardinal directions on Service Roads
                streetname = re.sub(r"(.*\bService Road\s)\bN\b(.*)", r"\1North\2", streetname)
                streetname = re.sub(r"(.*\bService Road\s)\bE\b(.*)", r"\1East\2", streetname)
                streetname = re.sub(r"(.*\bService Road\s)\bS\b(.*)", r"\1South\2", streetname)
                streetname = re.sub(r"(.*\bService Road\s)\bW\b(.*)", r"\1West\2", streetname)
                # Expand Expressway on Service Roads
                streetname = re.sub(r"(.*)Expwy\s\bN\b(.*)", r"\1Expressway North\2", streetname)
                streetname = re.sub(r"(.*)Expwy\s\bE\b(.*)", r"\1Expressway East\2", streetname)
                streetname = re.sub(r"(.*)Expwy\s\bS\b(.*)", r"\1Expressway South\2", streetname)
                streetname = re.sub(r"(.*)Expwy\s\bW\b(.*)", r"\1Expressway West\2", streetname)
                streetname = re.sub(r"(.*)Expwy(.*)", r"\1Expressway\2", streetname)
                # Add ordinal suffixes to numerals
                streetname = re.sub(r"(.*)(\d*11)\s+(.*)", r"\1\2th \3", streetname)
                streetname = re.sub(r"(.*)(\d*12)\s+(.*)", r"\1\2th \3", streetname)
                streetname = re.sub(r"(.*)(\d*13)\s+(.*)", r"\1\2th \3", streetname)
                streetname = re.sub(r"(.*)(\d*1)\s+(.*)", r"\1\2st \3", streetname)
                streetname = re.sub(r"(.*)(\d*2)\s+(.*)", r"\1\2nd \3", streetname)
                streetname = re.sub(r"(.*)(\d*3)\s+(.*)", r"\1\2rd \3", streetname)
                streetname = re.sub(r"(.*)(\d+)\s+(.*)", r"\1\2th \3", streetname)
                # Expand 'Ft' -> 'Fort'
                if streetname[0:3] == 'Ft ': streetname = 'Fort ' + streetname[3:]
                # Expand 'St ' -> 'Saint'
                if streetname[0:3] == 'St ': streetname = 'Saint ' + streetname[3:]
                # Expand 'Rev ' -> 'Reverend '
                if streetname[0:4] == 'Rev ': streetname = 'Reverend ' + streetname[3:]
                # Expand middlename ' St John' fix
                streetname = streetname.replace('St John', 'Saint John')
                # Middle name expansions
                streetname = streetname.replace(' St ', ' Street ')
                streetname = streetname.replace(' Rd ', ' Road ')
                streetname = streetname.replace(' Blvd ', ' Boulevard ')
                result['addr:street'] = streetname
            if address['ZIPCODE']:
                result['addr:postcode'] = str(int(address['ZIPCODE']))
        return result

    # Appends new node or returns existing if exists.
    nodes = {}
    def appendNewNode(coords, osmXml):
        rlon = int(float(coords[0]*10**7))
        rlat = int(float(coords[1]*10**7))
        if (rlon, rlat) in nodes:
            return nodes[(rlon, rlat)]
        node = etree.Element('node', visible = 'true', id = str(newOsmId('node')))
        node.set('lon', str(Decimal(coords[0])*Decimal(1)))
        node.set('lat', str(Decimal(coords[1])*Decimal(1)))
        nodes[(rlon, rlat)] = node
        osmXml.append(node)
        return node

    def appendNewWay(coords, intersects, osmXml):
        way = etree.Element('way', visible='true', id=str(newOsmId('way')))
        firstNid = 0
        for i, coord in enumerate(coords):
            if i == 0: continue # the first and last coordinate are the same
            node = appendNewNode(coord, osmXml)
            if i == 1: firstNid = node.get('id')
            way.append(etree.Element('nd', ref=node.get('id')))
            
            # Check each way segment for intersecting nodes
            int_nodes = {}
            try:
                line = LineString([coord, coords[i+1]])
            except IndexError:
                line = LineString([coord, coords[1]])
            for idx, c in enumerate(intersects):
                if line.buffer(0.000001).contains(Point(c[0], c[1])) and c not in coords:
                    t_node = appendNewNode(c, osmXml)
                    for n in way.iter('nd'):
                        if n.get('ref') == t_node.get('id'):
                            break
                    else:
                        int_nodes[t_node.get('id')] = Point(c).distance(Point(coord))
            for n in sorted(int_nodes, key=lambda key: int_nodes[key]): # add intersecting nodes in order
                way.append(etree.Element('nd', ref=n))
            
        way.append(etree.Element('nd', ref=firstNid)) # close way
        osmXml.append(way)
        return way

    # Appends an address to a given node or way.
    def appendAddress(address, element):
        for k, v in convertAddress(address['properties']).iteritems():
            element.append(etree.Element('tag', k=k, v=v))

    # Appends a building to a given OSM xml docuement.
    def appendBuilding(building, shape, address, osmXml):
        # Check for intersecting buildings
        intersects = []
        for i in buildingIdx.intersection(shape.bounds):
            try:
                for c in buildingShapes[i].exterior.coords:
                    if Point(c[0], c[1]).buffer(0.000001).intersects(shape):
                        intersects.append(c)
            except AttributeError:
                for c in buildingShapes[i][0].exterior.coords:
                    if Point(c[0], c[1]).buffer(0.000001).intersects(shape):
                        intersects.append(c)

        # Export building, create multipolygon if there are interior shapes.
        interiors = []
        try:
            way = appendNewWay(list(shape.exterior.coords), intersects, osmXml)
            for interior in shape.interiors:
                interiors.append(appendNewWay(list(interior.coords), [], osmXml))
        except AttributeError:
            way = appendNewWay(list(shape[0].exterior.coords), intersects, osmXml)
            for interior in shape[0].interiors:
                interiors.append(appendNewWay(list(interior.coords), [], osmXml))
        if len(interiors) > 0:
            relation = etree.Element('relation', visible='true', id=str(newOsmId('way')))
            relation.append(etree.Element('member', type='way', role='outer', ref=way.get('id')))
            for interior in interiors:
                relation.append(etree.Element('member', type='way', role='inner', ref=interior.get('id')))
            relation.append(etree.Element('tag', k='type', v='multipolygon'))
            osmXml.append(relation)
            way = relation
        way.append(etree.Element('tag', k='building', v='yes'))
        if 'HEIGHT_ROO' in building['properties']:
            height = round(((building['properties']['HEIGHT_ROO'] * 12) * 0.0254), 1)
            if height > 0:
                way.append(etree.Element('tag', k='height', v=str(height)))
        if 'BIN' in building['properties']:
            way.append(etree.Element('tag', k='nycdoitt:bin', v=str(building['properties']['BIN'])))
        if address: appendAddress(address, way)

    # Export buildings & addresses. Only export address with building if there is exactly
    # one address per building. Export remaining addresses as individual nodes.
    allAddresses = []
    osmXml = etree.Element('osm', version='0.6', generator='[email protected]')
    for i in range(0, len(buildings)):

        # Filter out special addresses categories A and B
        buildingAddresses = []
        for address in buildings[i]['properties']['addresses']:
            if address['properties']['SPECIAL_CO'] not in ['A', 'B']:
                buildingAddresses.append(address)
        address = None
        if len(buildingAddresses) == 1:
            address = buildingAddresses[0]
        else:
            allAddresses.extend(buildingAddresses)

        if int(buildings[i]['properties']['HEIGHT_ROO']) == 0:
            if shape.area > 1e-09:
                appendBuilding(buildings[i], buildingShapes[i], address, osmXml)
        else:
            appendBuilding(buildings[i], buildingShapes[i], address, osmXml)

    # Export any addresses that aren't the only address for a building.
    if (len(allAddresses) > 0):
        for address in allAddresses:
            node = appendNewNode(address['geometry']['coordinates'], osmXml)
            appendAddress(address, node)

    with open(osmOut, 'w') as outFile:
        outFile.writelines(tostring(osmXml, pretty_print=True, xml_declaration=True, encoding='UTF-8'))
        print 'Exported ' + osmOut

Example 174

Project: ansible-pan Source File: module_formatter.py
def main():

    p = optparse.OptionParser(
        version='%prog 1.0',
        usage='usage: %prog [options] arg1 arg2',
        description='Convert Ansible module DOCUMENTATION strings to other formats',
    )

    p.add_option("-A", "--ansible-version",
            action="store",
            dest="ansible_version",
            default="unknown",
            help="Ansible version number")
    p.add_option("-M", "--module-dir",
            action="store",
            dest="module_dir",
            default=MODULEDIR,
            help="Ansible modules/ directory")
    p.add_option("-T", "--template-dir",
            action="store",
            dest="template_dir",
            default="hacking/templates",
            help="directory containing Jinja2 templates")
    p.add_option("-t", "--type",
            action='store',
            dest='type',
            choices=['html', 'latex', 'man', 'rst', 'json'],
            default='latex',
            help="Output type")
    p.add_option("-m", "--module",
            action='append',
            default=[],
            dest='module_list',
            help="Add modules to process in module_dir")
    p.add_option("-v", "--verbose",
            action='store_true',
            default=False,
            help="Verbose")
    p.add_option("-o", "--output-dir",
            action="store",
            dest="output_dir",
            default=None,
            help="Output directory for module files")
    p.add_option("-I", "--includes-file",
            action="store",
            dest="includes_file",
            default=None,
            help="Create a file containing list of processed modules")
    p.add_option("-G", "--generate",
            action="store_true",
            dest="do_boilerplate",
            default=False,
            help="generate boilerplate DOCUMENTATION to stdout")
    p.add_option('-V', action='version', help='Show version number and exit')

    (options, args) = p.parse_args()

#    print "M: %s" % options.module_dir
#    print "t: %s" % options.type
#    print "m: %s" % options.module_list
#    print "v: %s" % options.verbose

    if options.do_boilerplate:
        boilerplate()
        sys.exit(0)

    if not options.module_dir:
        print "Need module_dir"
        sys.exit(1)
    if not os.path.exists(options.module_dir):
        print >>sys.stderr, "Module directory does not exist: %s" % options.module_dir
        sys.exit(1)


    if not options.template_dir:
        print "Need template_dir"
        sys.exit(1)

    env = Environment(loader=FileSystemLoader(options.template_dir),
        variable_start_string="@{",
        variable_end_string="}@",
        trim_blocks=True,
        )

    env.globals['xline'] = rst_xline

    if options.type == 'latex':
        env.filters['jpfunc'] = latex_ify
        template = env.get_template('latex.j2')
        outputname = "%s.tex"
        includecmt = "% generated code\n"
        includefmt = "\\input %s\n"
    if options.type == 'html':
        env.filters['jpfunc'] = html_ify
        template = env.get_template('html.j2')
        outputname = "%s.html"
        includecmt = ""
        includefmt = ""
    if options.type == 'man':
        env.filters['jpfunc'] = man_ify
        template = env.get_template('man.j2')
        outputname = "ansible.%s.3"
        includecmt = ""
        includefmt = ""
    if options.type == 'rst':
        env.filters['jpfunc'] = rst_ify
        env.filters['html_ify'] = html_ify
        env.filters['fmt'] = rst_fmt
        env.filters['xline'] = rst_xline
        template = env.get_template('rst.j2')
        outputname = "%s.rst"
        includecmt = RST_HEADER
        includefmt = "   modules/%s\n"
    if options.type == 'json':
        env.filters['jpfunc'] = json_ify
        outputname = "%s.json"
        includecmt = ""
        includefmt = ""
    if options.type == 'js':
        env.filters['jpfunc'] = js_ify
        template = env.get_template('js.j2')
        outputname = "%s.js"

    if options.includes_file is not None and includefmt != "":
        incfile = open(options.includes_file, "w")
        incfile.write(includecmt)

    # Temporary variable required to genrate aggregated content in 'js' format.
    js_data = []
    for module in sorted(os.listdir(options.module_dir)):
        if len(options.module_list):
            if not module in options.module_list:
                continue

        fname = os.path.join(options.module_dir, module)
        extra = os.path.join("inc", "%s.tex" % module)

        if fname.endswith(".swp"):
            continue

        print " processing module source ---> %s" % fname

        if options.type == 'js':
            if fname.endswith(".json"):
                f = open(fname)
                j = json.load(f)
                f.close()
                js_data.append(j)
            continue

        doc, examples = get_docstring(fname, verbose=options.verbose)

        if doc is None and module not in BLACKLIST_MODULES:
            sys.stderr.write("*** ERROR: CORE MODULE MISSING DOCUMENTATION: %s ***\n" % module)
            #sys.exit(1)

        if not doc is None:

            doc['filename']         = fname
            doc['docuri']           = doc['module'].replace('_', '-')
            doc['now_date']         = datetime.date.today().strftime('%Y-%m-%d')
            doc['ansible_version']  = options.ansible_version

            if examples is not None:
                doc['plainexamples'] = examples

            if options.includes_file is not None and includefmt != "":
                incfile.write(includefmt % module)

            if options.verbose:
                print json.dumps(doc, indent=4)


            if options.type == 'latex':
                if os.path.exists(extra):
                    f = open(extra)
                    extradata = f.read()
                    f.close()
                    doc['extradata'] = extradata

            if options.type == 'json':
                text = json.dumps(doc, indent=2)
            else:
                text = template.render(doc)

            return_data(text, options, outputname, module)

    if options.type == 'js':
        docs = {}
        docs['json'] = json.dumps(js_data, indent=2)
        text = template.render(docs)
        return_data(text, options, outputname, 'modules')
See More Examples - Go to Next Page
Page 1 Page 2 Page 3 Page 4 Selected