Here are the examples of the python api collections.defaultdict taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
160 Examples
0
Example 151
Project: geoinference Source File: geocoder.py
def __init__(self,dataset="geonames"):
"""
Initializes the "reverse_geocoder" and "geocoder" dictionaries,
based on the dataset selected. By default "geonames" is selected,
"""
self.reverse_geocoder = defaultdict(list)
self.geocoder = {}
self.abbv_to_state = state_abbv_data()
self.state_abbv_regex = re.compile(r'(\b' + (r'\b|\b'.join(self.abbv_to_state.keys())) + r'\b)')
self.all_city_names = set()
LOGGER.debug("Geocoder loading city-location mapping from %s" % (dataset))
# If the user specifies GeoLite data or if they are using GPS data, for
# which GeoList is the default gazetteer
if dataset == "geolite" or dataset == "geo-median":
data = geolite_data()
city_to_latlon = {}
city_name_counts = collections.Counter()
for line in data[2:]:
country_name = line[2].lower()
region_name = line[3].lower()
city_name = line[4].lower()
if not city_name:
continue
lat = float(line[0])
lon = float(line[1])
# Keep track of how many times city names occur
city_to_latlon[city_name] = (lat,lon)
city_name_counts[city_name] += 1
#sets bins of 0.01 accuracy of lat/lon for reverse_geocoding
rounded_lat = round(lat,2)
rounded_lon = round(lon,2)
#builds the geocoder dictionary based on a city\tregion\tcountry format
if city_name and region_name and country_name:
city_region_country = city_name+"\t"+region_name+"\t"+country_name
city_region = city_name+"\t"+region_name
city_country = city_name+"\t"+country_name
self.geocoder[city_region_country] = (lat,lon)
self.geocoder[city_region] = (lat,lon)
self.geocoder[city_country] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_region_country))
self.all_city_names.add(city_region_country)
elif city_name and region_name:
city_region = city_name+"\t"+region_name
self.geocoder[city_region] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_region))
elif city_name and country_name:
if city_name == country_name:
self.geocoder[city_name] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_name))
self.all_city_names.add(city_name)
else:
city_country = city_name+"\t"+country_name
self.geocoder[city_country] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_country))
self.all_city_names.add(city_country)
# If there was only ever one city with this name, allow it to be an
# unabiguosus lookup with just the city name
unambiguous_cities = 0
for city_name, (lat,lon) in city_to_latlon.iteritems():
if city_name_counts[city_name] == 1:
self.geocoder[city_name] = (lat,lon)
unambiguous_cities += 1
#print "Saw %d unambiguous cities in %s" % (unambiguous_cities, dataset)
elif dataset == "google":
data = google_data()
city_to_latlon = {}
city_name_counts = collections.Counter()
for line in data[1:]:
#TODO this city name should be formatted the same as incoming tweets
city_name = line[6].lower()
if not city_name:
continue
country_name = line[2].lower()
region_name = line[3].lower()
lat = float(line[0])
lon = float(line[1])
rounded_lat = round(lat,2)
rounded_lon = round(lon,2)
#self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,country_name,region_name,city_name))
# Keep track of how many times city names occur
city_to_latlon[city_name] = (lat,lon)
city_name_counts[city_name] += 1
if city_name and region_name and country_name:
city_region_country = city_name+"\t"+region_name+"\t"+country_name
city_region = city_name+"\t"+region_name
city_country = city_name+"\t"+country_name
self.geocoder[city_region_country] = (lat,lon)
self.geocoder[city_region] = (lat,lon)
self.geocoder[city_country] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_region_country))
self.all_city_names.add(city_region_country)
elif city_name and region_name:
city_region = city_name+"\t"+region_name
self.geocoder[city_region] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_region))
self.all_city_names.add(city_region)
elif city_name and country_name:
if city_name == country_name:
self.geocoder[city_name] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_name))
self.all_city_names.add(city_name)
else:
city_country = city_name+"\t"+country_name
self.geocoder[city_country] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_country))
self.all_city_names.add(city_country)
# If there was only ever one city with this name, allow it to be an
# unabiguosus lookup with just the city name
unambiguous_cities = 0
for city_name, (lat,lon) in city_to_latlon.iteritems():
if city_name_counts[city_name] == 1:
self.geocoder[city_name] = (lat,lon)
unambiguous_cities += 1
#print "Saw %d unambiguous cities in %s" % (unambiguous_cities, dataset)
elif dataset == "dbpedia":
data = dbpedia_data()
city_to_latlon = {}
city_name_counts = collections.Counter()
already_entered = set()
line_no = 0
for cols in data[1:]:
line_no += 1
if line_no % 1000000 == 0:
LOGGER.debug("currently read %d locations from %s" %
(line_no, dataset))
lat = cols[3]
lon = cols[4]
# Guard against weirdness
if lat == 'NAN' or lon == 'NAN':
continue
try:
lat = float(lat)
lon = float(lon)
except ValueError:
continue
# Ensure we can use this location if we're not allowing duplicates
lat_lon = (lat, lon)
already_entered.add(lat_lon)
city = cols[0].lower()
country = cols[2].lower()
states = cols[1].lower().split('|')
city_to_latlon[city] = (lat,lon)
city_name_counts[city] += 1
self.__add_name(city + "\t" + country, lat_lon)
if city == country:
self.__add_name(city, lat_lon)
for state in states:
self.__add_name(city + "\t" + state + "\t" + country, lat_lon)
unambiguous_cities = 0
for city_name, (lat,lon) in city_to_latlon.iteritems():
if city_name_counts[city_name] == 1:
self.geocoder[city_name] = (lat,lon)
unambiguous_cities += 1
elif dataset == "geonames":
data = geonames_data()
city_to_latlon = {}
city_name_counts = collections.Counter()
line_no = 0
for line in data[1:]:
#TODO this city name should be formatted the same as incoming tweets
city_name = line[0].lower()
if not city_name:
continue
line_no += 1
if line_no % 1000000 == 0:
LOGGER.debug("currently read %d locations from %s" %
(line_no, dataset))
country_name = line[2].lower()
region_name = line[1].lower()
lat = float(line[3])
lon = float(line[4])
rounded_lat = round(lat,2)
rounded_lon = round(lon,2)
# Keep track of how many times city names occur
city_to_latlon[city_name] = (lat,lon)
city_name_counts[city_name] += 1
if city_name and region_name and country_name:
city_region_country = city_name+"\t"+region_name+"\t"+country_name
city_region = city_name+"\t"+region_name
city_country = city_name+"\t"+country_name
self.geocoder[city_region_country] = (lat,lon)
self.geocoder[city_region] = (lat,lon)
self.geocoder[city_country] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_region_country))
self.all_city_names.add(city_region_country)
elif city_name and region_name:
city_region = city_name+"\t"+region_name
self.geocoder[city_region] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_region))
self.all_city_names.add(city_region)
elif city_name and country_name:
if city_name == country_name:
self.geocoder[city_name] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_name))
self.all_city_names.add(city_name)
else:
city_country = city_name+"\t"+country_name
self.geocoder[city_country] = (lat,lon)
self.reverse_geocoder[(rounded_lat,rounded_lon)].append((lat,lon,city_country))
self.all_city_names.add(city_country)
# If there was only ever one city with this name, allow it to be an
# unabiguosus lookup with just the city name
unambiguous_cities = 0
for city_name, (lat,lon) in city_to_latlon.iteritems():
if city_name_counts[city_name] == 1:
self.geocoder[city_name] = (lat,lon)
unambiguous_cities += 1
#print "Saw %d unambiguous cities in %s" % (unambiguous_cities, dataset)
else:
raise NotImplementedError(dataset)
# create a lower-case dictionary for noisy lookups
self.lc_name_to_location = {}
for name, (lat, lon) in self.geocoder.iteritems():
self.lc_name_to_location[name.lower()] = (lat, lon)
LOGGER.debug("Geocoder loaded %d locations from %s" %
(len(self.geocoder), dataset))
0
Example 152
Project: ansible-snmp-facts Source File: snmp_facts.py
def main():
module = AnsibleModule(
argument_spec=dict(
host=dict(required=True),
version=dict(required=True, choices=['v2', 'v2c', 'v3']),
community=dict(required=False, default=False),
username=dict(required=False),
level=dict(required=False, choices=['authNoPriv', 'authPriv']),
integrity=dict(required=False, choices=['md5', 'sha']),
privacy=dict(required=False, choices=['des', 'aes']),
authkey=dict(required=False),
privkey=dict(required=False),
removeplaceholder=dict(required=False)),
required_together = ( ['username','level','integrity','authkey'],['privacy','privkey'],),
supports_check_mode=False)
m_args = module.params
if not has_pysnmp:
module.fail_json(msg='Missing required pysnmp module (check docs)')
cmdGen = cmdgen.CommandGenerator()
# Verify that we receive a community when using snmp v2
if m_args['version'] == "v2" or m_args['version'] == "v2c":
if m_args['community'] == False:
module.fail_json(msg='Community not set when using snmp version 2')
if m_args['version'] == "v3":
if m_args['username'] == None:
module.fail_json(msg='Username not set when using snmp version 3')
if m_args['level'] == "authPriv" and m_args['privacy'] == None:
module.fail_json(msg='Privacy algorithm not set when using authPriv')
if m_args['integrity'] == "sha":
integrity_proto = cmdgen.usmHMACSHAAuthProtocol
elif m_args['integrity'] == "md5":
integrity_proto = cmdgen.usmHMACMD5AuthProtocol
if m_args['privacy'] == "aes":
privacy_proto = cmdgen.usmAesCfb128Protocol
elif m_args['privacy'] == "des":
privacy_proto = cmdgen.usmDESPrivProtocol
# Use SNMP Version 2
if m_args['version'] == "v2" or m_args['version'] == "v2c":
snmp_auth = cmdgen.CommunityData(m_args['community'])
# Use SNMP Version 3 with authNoPriv
elif m_args['level'] == "authNoPriv":
snmp_auth = cmdgen.UsmUserData(m_args['username'], authKey=m_args['authkey'], authProtocol=integrity_proto)
# Use SNMP Version 3 with authPriv
else:
snmp_auth = cmdgen.UsmUserData(m_args['username'], authKey=m_args['authkey'], privKey=m_args['privkey'], authProtocol=integrity_proto, privProtocol=privacy_proto)
# Use p to prefix OIDs with a dot for polling
p = DefineOid(dotprefix=True)
# Use v without a prefix to use with return values
v = DefineOid(dotprefix=False)
Tree = lambda: defaultdict(Tree)
results = Tree()
errorIndication, errorStatus, errorIndex, varBinds = cmdGen.getCmd(
snmp_auth,
cmdgen.UdpTransportTarget((m_args['host'], 161)),
cmdgen.MibVariable(p.sysDescr,),
cmdgen.MibVariable(p.sysObjectId,),
cmdgen.MibVariable(p.sysUpTime,),
cmdgen.MibVariable(p.sysContact,),
cmdgen.MibVariable(p.sysName,),
cmdgen.MibVariable(p.sysLocation,),
)
if errorIndication:
module.fail_json(msg=str(errorIndication))
for oid, val in varBinds:
current_oid = oid.prettyPrint()
current_val = val.prettyPrint()
if current_oid == v.sysDescr:
results['ansible_sysdescr'] = decode_hex(current_val)
elif current_oid == v.sysObjectId:
results['ansible_sysobjectid'] = current_val
elif current_oid == v.sysUpTime:
results['ansible_sysuptime'] = current_val
elif current_oid == v.sysContact:
results['ansible_syscontact'] = current_val
elif current_oid == v.sysName:
results['ansible_sysname'] = current_val
elif current_oid == v.sysLocation:
results['ansible_syslocation'] = current_val
errorIndication, errorStatus, errorIndex, varTable = cmdGen.nextCmd(
snmp_auth,
cmdgen.UdpTransportTarget((m_args['host'], 161)),
cmdgen.MibVariable(p.ifIndex,),
cmdgen.MibVariable(p.ifDescr,),
cmdgen.MibVariable(p.ifMtu,),
cmdgen.MibVariable(p.ifSpeed,),
cmdgen.MibVariable(p.ifPhysAddress,),
cmdgen.MibVariable(p.ifAdminStatus,),
cmdgen.MibVariable(p.ifOperStatus,),
cmdgen.MibVariable(p.ipAdEntAddr,),
cmdgen.MibVariable(p.ipAdEntIfIndex,),
cmdgen.MibVariable(p.ipAdEntNetMask,),
cmdgen.MibVariable(p.ifAlias,),
)
if errorIndication:
module.fail_json(msg=str(errorIndication))
interface_indexes = []
all_ipv4_addresses = []
ipv4_networks = Tree()
for varBinds in varTable:
for oid, val in varBinds:
current_oid = oid.prettyPrint()
current_val = val.prettyPrint()
if v.ifIndex in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['ifindex'] = current_val
interface_indexes.append(ifIndex)
if v.ifDescr in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['name'] = current_val
if v.ifMtu in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['mtu'] = current_val
if v.ifMtu in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['speed'] = current_val
if v.ifPhysAddress in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['mac'] = decode_mac(current_val)
if v.ifAdminStatus in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['adminstatus'] = lookup_adminstatus(int(current_val))
if v.ifOperStatus in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['operstatus'] = lookup_operstatus(int(current_val))
if v.ipAdEntAddr in current_oid:
curIPList = current_oid.rsplit('.', 4)[-4:]
curIP = ".".join(curIPList)
ipv4_networks[curIP]['address'] = current_val
all_ipv4_addresses.append(current_val)
if v.ipAdEntIfIndex in current_oid:
curIPList = current_oid.rsplit('.', 4)[-4:]
curIP = ".".join(curIPList)
ipv4_networks[curIP]['interface'] = current_val
if v.ipAdEntNetMask in current_oid:
curIPList = current_oid.rsplit('.', 4)[-4:]
curIP = ".".join(curIPList)
ipv4_networks[curIP]['netmask'] = current_val
if v.ifAlias in current_oid:
ifIndex = int(current_oid.rsplit('.', 1)[-1])
results['ansible_interfaces'][ifIndex]['description'] = current_val
interface_to_ipv4 = {}
for ipv4_network in ipv4_networks:
current_interface = ipv4_networks[ipv4_network]['interface']
current_network = {
'address': ipv4_networks[ipv4_network]['address'],
'netmask': ipv4_networks[ipv4_network]['netmask']
}
if not current_interface in interface_to_ipv4:
interface_to_ipv4[current_interface] = []
interface_to_ipv4[current_interface].append(current_network)
else:
interface_to_ipv4[current_interface].append(current_network)
for interface in interface_to_ipv4:
results['ansible_interfaces'][int(interface)]['ipv4'] = interface_to_ipv4[interface]
results['ansible_all_ipv4_addresses'] = all_ipv4_addresses
module.exit_json(ansible_facts=results)
0
Example 153
Project: networkx Source File: gml.py
def parse_gml_lines(lines, label, destringizer):
"""Parse GML into a graph.
"""
def tokenize():
patterns = [
r'[A-Za-z][0-9A-Za-z_]*\b', # keys
r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?', # reals
r'[+-]?[0-9]+', # ints
r'".*?"', # strings
r'\[', # dict start
r'\]', # dict end
r'#.*$|\s+' # comments and whitespaces
]
tokens = re.compile(
'|'.join('(' + pattern + ')' for pattern in patterns))
lineno = 0
for line in lines:
length = len(line)
pos = 0
while pos < length:
match = tokens.match(line, pos)
if match is not None:
for i in range(len(patterns)):
group = match.group(i + 1)
if group is not None:
if i == 0: # keys
value = group.rstrip()
elif i == 1: # reals
value = float(group)
elif i == 2: # ints
value = int(group)
else:
value = group
if i != 6: # comments and whitespaces
yield (i, value, lineno + 1, pos + 1)
pos += len(group)
break
else:
raise NetworkXError('cannot tokenize %r at (%d, %d)' %
(line[pos:], lineno + 1, pos + 1))
lineno += 1
yield (None, None, lineno + 1, 1) # EOF
def unexpected(curr_token, expected):
category, value, lineno, pos = curr_token
raise NetworkXError(
'expected %s, found %s at (%d, %d)' %
(expected, repr(value) if value is not None else 'EOF', lineno,
pos))
def consume(curr_token, category, expected):
if curr_token[0] == category:
return next(tokens)
unexpected(curr_token, expected)
def parse_kv(curr_token):
dct = defaultdict(list)
while curr_token[0] == 0: # keys
key = curr_token[1]
curr_token = next(tokens)
category = curr_token[0]
if category == 1 or category == 2: # reals or ints
value = curr_token[1]
curr_token = next(tokens)
elif category == 3: # strings
value = unescape(curr_token[1][1:-1])
if destringizer:
try:
value = destringizer(value)
except ValueError:
pass
curr_token = next(tokens)
elif category == 4: # dict start
curr_token, value = parse_dict(curr_token)
else:
unexpected(curr_token, "an int, float, string or '['")
dct[key].append(value)
dct = {key: (value if not isinstance(value, list) or len(value) != 1
else value[0]) for key, value in dct.items()}
return curr_token, dct
def parse_dict(curr_token):
curr_token = consume(curr_token, 4, "'['") # dict start
curr_token, dct = parse_kv(curr_token)
curr_token = consume(curr_token, 5, "']'") # dict end
return curr_token, dct
def parse_graph():
curr_token, dct = parse_kv(next(tokens))
if curr_token[0] is not None: # EOF
unexpected(curr_token, 'EOF')
if 'graph' not in dct:
raise NetworkXError('input contains no graph')
graph = dct['graph']
if isinstance(graph, list):
raise NetworkXError('input contains more than one graph')
return graph
tokens = tokenize()
graph = parse_graph()
directed = graph.pop('directed', False)
multigraph = graph.pop('multigraph', False)
if not multigraph:
G = nx.DiGraph() if directed else nx.Graph()
else:
G = nx.MultiDiGraph() if directed else nx.MultiGraph()
G.graph.update((key, value) for key, value in graph.items()
if key != 'node' and key != 'edge')
def pop_attr(dct, category, attr, i):
try:
return dct.pop(attr)
except KeyError:
raise NetworkXError(
"%s #%d has no '%s' attribute" % (category, i, attr))
nodes = graph.get('node', [])
mapping = {}
labels = set()
for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]):
id = pop_attr(node, 'node', 'id', i)
if id in G:
raise NetworkXError('node id %r is duplicated' % (id,))
if label != 'id':
label = pop_attr(node, 'node', 'label', i)
if label in labels:
raise NetworkXError('node label %r is duplicated' % (label,))
labels.add(label)
mapping[id] = label
G.add_node(id, **node)
edges = graph.get('edge', [])
for i, edge in enumerate(edges if isinstance(edges, list) else [edges]):
source = pop_attr(edge, 'edge', 'source', i)
target = pop_attr(edge, 'edge', 'target', i)
if source not in G:
raise NetworkXError(
'edge #%d has an undefined source %r' % (i, source))
if target not in G:
raise NetworkXError(
'edge #%d has an undefined target %r' % (i, target))
if not multigraph:
if not G.has_edge(source, target):
G.add_edge(source, target, **edge)
else:
raise nx.NetworkXError(
'edge #%d (%r%s%r) is duplicated' %
(i, source, '->' if directed else '--', target))
else:
key = edge.pop('key', None)
if key is not None and G.has_edge(source, target, key):
raise nx.NetworkXError(
'edge #%d (%r%s%r, %r) is duplicated' %
(i, source, '->' if directed else '--', target, key))
G.add_edge(source, target, key, **edge)
if label != 'id':
G = nx.relabel_nodes(G, mapping)
return G
0
Example 154
Project: scancode-toolkit Source File: index.py
def _add_rules(self, rules, _ranked_tokens=global_tokens_by_ranks):
"""
Add a list of Rule objects to the index and constructs optimized and
immutable index structures.
"""
if self.optimized:
raise Exception('Index has been optimized and cannot be updated.')
# this assigns the rule ids implicitly: this is the index in the list
self.rules_by_rid = list(rules)
#######################################################################
# classify rules, collect tokens and frequencies
#######################################################################
# accuemulate all rule tokens strings. This is used only during indexing
token_strings_by_rid = []
# collect the unique token strings and compute their global frequency
# This is used only during indexing
frequencies_by_token = Counter()
for rid, rul in enumerate(self.rules_by_rid):
rul_tokens = list(rul.tokens())
token_strings_by_rid.append(rul_tokens)
frequencies_by_token.update(rul_tokens)
# assign the rid to the rule object for sanity
rul.rid = rid
# classify rules and build disjuncted sets of rids
rul_len = rul.length
if rul.false_positive:
# false positive rules do not participate in the matches at all
# they are used only in post-matching filtering
self.false_positive_rids.add(rid)
if rul_len > self.largest_false_positive_length:
self.largest_false_positive_length = rul_len
elif rul.negative():
# negative rules are matched early and their exactly matched
# tokens are removed from the token stream
self.negative_rids.add(rid)
elif rul.small():
# small rules are best matched with a specialized approach
self.small_rids.add(rid)
else:
# regular rules are matched using a common approach
self.regular_rids.add(rid)
# Create the tokens lookup structure at once. Note that tokens ids are
# assigned randomly here at first by unzipping: we get the frequencies
# and tokens->id at once this way
tokens_by_tid, frequencies_by_tid = izip(*frequencies_by_token.items())
self.tokens_by_tid = tokens_by_tid
self.len_tokens = len_tokens = len(tokens_by_tid)
assert len_tokens <= MAX_TOKENS, 'Cannot support more than licensedcode.index.MAX_TOKENS: %d' % MAX_TOKENS
# initial dictionary mapping to old/random token ids
self.dictionary = dictionary = {ts: tid for tid, ts in enumerate(tokens_by_tid)}
sparsify(dictionary)
# replace token strings with arbitrary (and temporary) random integer ids
self.tids_by_rid = [[dictionary[tok] for tok in rule_tok] for rule_tok in token_strings_by_rid]
#######################################################################
# renumber token ids based on frequencies and common words
#######################################################################
renumbered = self.renumber_token_ids(frequencies_by_tid, _ranked_tokens)
self.len_junk, self.dictionary, self.tokens_by_tid, self.tids_by_rid = renumbered
len_junk, dictionary, tokens_by_tid, tids_by_rid = renumbered
self.len_good = len_good = len_tokens - len_junk
#######################################################################
# build index structures
#######################################################################
len_rules = len(self.rules_by_rid)
# since we only use these for regular rules, these lists may be sparse
# their index is the rule rid
self.high_postings_by_rid = [None for _ in range(len_rules)]
self.tids_sets_by_rid = [None for _ in range(len_rules)]
self.tids_msets_by_rid = [None for _ in range(len_rules)]
# track all duplicate rules: fail and report dupes at once at the end
dupe_rules_by_hash = defaultdict(list)
# build closures for methods that populate automatons
negative_automaton_add = partial(match_aho.add_sequence, automaton=self.negative_automaton)
rules_automaton_add = partial(match_aho.add_sequence, automaton=self.rules_automaton)
# build by-rule index structures over the token ids seq of each rule
for rid, rule_token_ids in enumerate(tids_by_rid):
rule = self.rules_by_rid[rid]
# build hashes index and check for duplicates rule texts
rule_hash = index_hash(rule_token_ids)
dupe_rules_by_hash[rule_hash].append(rule)
if rule.false_positive:
# FP rules are not used for any matching
# there is nothing else for these rules
self.false_positive_rid_by_hash[rule_hash] = rid
else:
# negative, small and regular
# update hashes index
self.rid_by_hash[rule_hash] = rid
# update high postings index: positions by high tids
# TODO: this could be optimized with a group_by
postings = defaultdict(list)
for pos, tid in enumerate(rule_token_ids):
if tid >= len_junk:
postings[tid].append(pos)
# OPTIMIZED: for speed and memory: convert postings to arrays
postings = {tid: array('h', value) for tid, value in postings.items()}
# OPTIMIZED: for speed, sparsify dict
sparsify(postings)
self.high_postings_by_rid[rid] = postings
# build high and low tids sets and multisets
rlow_set, rhigh_set, rlow_mset, rhigh_mset = index_token_sets(rule_token_ids, len_junk, len_good)
self.tids_sets_by_rid[rid] = rlow_set, rhigh_set
self.tids_msets_by_rid[rid] = rlow_mset, rhigh_mset
# populate automatons...
if rule.negative():
# ... with only the whole rule tokens sequence
negative_automaton_add(tids=rule_token_ids, rid=rid)
else:
# ... or with the whole rule tokens sequence
rules_automaton_add(tids=rule_token_ids, rid=rid)
# ... and ngrams: compute ngrams and populate the automaton with ngrams
if USE_AHO_FRAGMENTS and not rule.is_url and not rule.solid and len(rule_token_ids) > NGRAM_LEN:
all_ngrams = ngrams(rule_token_ids, ngram_length=NGRAM_LEN)
selected_ngrams = select_ngrams(all_ngrams, with_pos=True)
for pos, ngram in selected_ngrams:
rules_automaton_add(tids=ngram, rid=rid, start=pos)
# update rule thresholds
rule.low_unique = tids_set_counter(rlow_set)
rule.high_unique = tids_set_counter(rhigh_set)
rule.length_unique = rule.high_unique + rule.low_unique
rule.low_length = tids_multiset_counter(rlow_mset)
rule.high_length = tids_multiset_counter(rhigh_mset)
assert rule.length == rule.low_length + rule.high_length
# # finalize automatons
self.negative_automaton.make_automaton()
self.rules_automaton.make_automaton()
# sparser dicts for faster lookup
sparsify(self.rid_by_hash)
sparsify(self.false_positive_rid_by_hash)
dupe_rules = [rules for rules in dupe_rules_by_hash.values() if len(rules) > 1]
if dupe_rules:
dupe_rule_paths = [['file://' + rule.text_file for rule in rules] for rules in dupe_rules]
msg = (u'Duplicate rules: \n' + u'\n'.join(map(repr, dupe_rule_paths)))
raise AssertionError(msg)
self.optimized = True
0
Example 155
Project: eyed3 Source File: fixup.py
def handleDirectory(self, directory, _):
if not self._file_cache:
return
directory = os.path.abspath(directory)
print("\n" + Style.BRIGHT + Fore.GREY +
"Scanning directory%s %s" % (Style.RESET_ALL, directory))
def _path(af):
return af.path
self._handled_one = True
# Make sure all of the audio files has a tag.
for f in self._file_cache:
if f.tag is None:
f.initTag()
audio_files = sorted(list(self._file_cache), key=_path)
self._file_cache = []
edited_files = set()
self._curr_dir_type = self.args.dir_type
if self._curr_dir_type is None:
types = set([a.tag.album_type for a in audio_files])
if len(types) == 1:
self._curr_dir_type = types.pop()
# Check for corrections to LP, EP, COMP
if (self._curr_dir_type is None and len(audio_files) < EP_MAX_HINT):
# Do you want EP?
if False in [a.tag.album_type == EP_TYPE for a in audio_files]:
if prompt("Only %d audio files, process directory as an EP" %
len(audio_files),
default=True):
self._curr_dir_type = EP_TYPE
else:
self._curr_dir_type = EP_TYPE
elif (self._curr_dir_type in (EP_TYPE, DEMO_TYPE) and
len(audio_files) > EP_MAX_HINT):
# Do you want LP?
if prompt("%d audio files is large for type %s, process "
"directory as an LP" % (len(audio_files),
self._curr_dir_type),
default=True):
self._curr_dir_type = LP_TYPE
last = defaultdict(lambda: None)
album_artist = None
artists = set()
album = None
if self._curr_dir_type != SINGLE_TYPE:
album_artist, artists = self._resolveArtistInfo(audio_files)
print(Fore.BLUE + u"Album artist: " + Style.RESET_ALL +
(album_artist or u""))
print(Fore.BLUE + "Artist" + ("s" if len(artists) > 1 else "") +
": " + Style.RESET_ALL + u", ".join(artists))
album = self._getAlbum(audio_files)
print(Fore.BLUE + "Album: " + Style.RESET_ALL + album)
rel_date, orel_date, rec_date = self._getDates(audio_files)
for what, d in [("Release", rel_date),
("Original", orel_date),
("Recording", rec_date)]:
print(Fore.BLUE + ("%s date: " % what) + Style.RESET_ALL +
str(d))
num_audio_files = len(audio_files)
track_nums = set([f.tag.track_num[0] for f in audio_files])
fix_track_nums = set(range(1, num_audio_files + 1)) != track_nums
new_track_nums = []
dir_type = self._curr_dir_type
for f in sorted(audio_files, key=_path):
print(Style.BRIGHT + Fore.GREEN + u"Checking" + Fore.RESET +
Fore.GREY + (" %s" % os.path.basename(f.path)) +
Style.RESET_ALL)
if not f.tag:
print("\tAdding new tag")
f.initTag()
edited_files.add(f)
tag = f.tag
if tag.version != ID3_V2_4:
print("\tConverting to ID3 v2.4")
tag.version = ID3_V2_4
edited_files.add(f)
if (dir_type != SINGLE_TYPE and album_artist != tag.album_artist):
print(u"\tSetting album artist: %s" % album_artist)
tag.album_artist = album_artist
edited_files.add(f)
if not tag.artist and dir_type in (VARIOUS_TYPE, SINGLE_TYPE):
# Prompt artist
tag.artist = prompt("Artist name", default=last["artist"])
last["artist"] = tag.artist
elif len(artists) == 1 and tag.artist != artists[0]:
assert(dir_type != SINGLE_TYPE)
print(u"\tSetting artist: %s" % artists[0])
tag.artist = artists[0]
edited_files.add(f)
if tag.album != album and dir_type != SINGLE_TYPE:
print(u"\tSetting album: %s" % album)
tag.album = album
edited_files.add(f)
orig_title = tag.title
if not tag.title:
tag.title = prompt("Track title")
tag.title = tag.title.strip()
if self.args.fix_case:
tag.title = _fixCase(tag.title)
if orig_title != tag.title:
print(u"\tSetting title: %s" % tag.title)
edited_files.add(f)
if dir_type != SINGLE_TYPE:
# Track numbers
tnum, ttot = tag.track_num
update = False
if ttot != num_audio_files:
update = True
ttot = num_audio_files
if fix_track_nums or not (1 <= tnum <= num_audio_files):
tnum = None
while tnum is None:
tnum = int(prompt("Track #", type_=int))
if not (1 <= tnum <= num_audio_files):
print(Fore.RED + "Out of range: " + Fore.RESET +
"1 <= %d <= %d" % (tnum, num_audio_files))
tnum = None
elif tnum in new_track_nums:
print(Fore.RED + "Duplicate value: " + Fore.RESET +
str(tnum))
tnum = None
else:
update = True
new_track_nums.append(tnum)
if update:
tag.track_num = (tnum, ttot)
print("\tSetting track numbers: %s" % str(tag.track_num))
edited_files.add(f)
else:
# Singles
if tag.track_num != (None, None):
tag.track_num = (None, None)
edited_files.add(f)
if dir_type != SINGLE_TYPE:
# Dates
if rec_date and tag.recording_date != rec_date:
print("\tSetting %s date (%s)" %
("recording", str(rec_date)))
tag.recording_date = rec_date
edited_files.add(f)
if rel_date and tag.release_date != rel_date:
print("\tSetting %s date (%s)" % ("release", str(rel_date)))
tag.release_date = rel_date
edited_files.add(f)
if orel_date and tag.original_release_date != orel_date:
print("\tSetting %s date (%s)" % ("original release",
str(orel_date)))
tag.original_release_date = orel_date
edited_files.add(f)
for frame in list(tag.frameiter(["USER", "PRIV"])):
print("\tRemoving %s frames: %s" %
(frame.id,
frame.owner_id if frame.id == b"PRIV" else frame.text))
tag.frame_set[frame.id].remove(frame)
edited_files.add(f)
# Add TLEN
tlen = tag.getTextFrame("TLEN")
real_tlen = f.info.time_secs * 1000
if tlen is None or int(tlen) != real_tlen:
print("\tSetting TLEN (%d)" % real_tlen)
tag.setTextFrame("TLEN", UnicodeType(real_tlen))
edited_files.add(f)
# Add custom album type if special and otherwise not able to be
# determined.
curr_type = tag.album_type
if curr_type != dir_type:
print("\tSetting %s = %s" % (TXXX_ALBUM_TYPE, dir_type))
tag.album_type = dir_type
edited_files.add(f)
try:
if not self._checkCoverArt(directory, audio_files):
if not prompt("Proceed without valid cover file", default=True):
return
finally:
self._dir_images = []
# Determine other changes, like file and/or directory renames
# so they can be reported before save confirmation.
# File renaming
file_renames = []
if self.args.file_rename_pattern:
format_str = self.args.file_rename_pattern
else:
if dir_type == SINGLE_TYPE:
format_str = SINGLE_FNAME_FORMAT
elif dir_type in (VARIOUS_TYPE, COMP_TYPE):
format_str = VARIOUS_FNAME_FORMAT
else:
format_str = NORMAL_FNAME_FORMAT
for f in audio_files:
orig_name, orig_ext = os.path.splitext(os.path.basename(f.path))
new_name = TagTemplate(format_str).substitute(f.tag, zeropad=True)
if orig_name != new_name:
printMsg(u"Rename file to %s%s" % (new_name, orig_ext))
file_renames.append((f, new_name, orig_ext))
# Directory renaming
dir_rename = None
if dir_type != SINGLE_TYPE:
if self.args.dir_rename_pattern:
dir_format = self.args.dir_rename_pattern
else:
if dir_type == LIVE_TYPE:
dir_format = LIVE_DNAME_FORMAT
else:
dir_format = NORMAL_DNAME_FORMAT
template = TagTemplate(dir_format,
dotted_dates=self.args.dotted_dates)
pref_dir = template.substitute(audio_files[0].tag, zeropad=True)
if os.path.basename(directory) != pref_dir:
new_dir = os.path.join(os.path.dirname(directory), pref_dir)
printMsg("Rename directory to %s" % new_dir)
dir_rename = (directory, new_dir)
# Cruft files to remove
file_removes = []
if self._dir_files_to_remove:
for f in self._dir_files_to_remove:
print("Remove file: " + os.path.basename(f))
file_removes.append(f)
self._dir_files_to_remove = set()
if not self.args.dry_run:
confirmed = False
if (edited_files or file_renames or dir_rename or file_removes):
confirmed = prompt("\nSave changes", default=True)
if confirmed:
for f in edited_files:
print(u"Saving %s" % os.path.basename(f.path))
f.tag.save(version=ID3_V2_4, preserve_file_time=True)
for f, new_name, orig_ext in file_renames:
printMsg(u"Renaming file to %s%s" % (new_name, orig_ext))
f.rename(new_name, preserve_file_time=True)
if file_removes:
for f in file_removes:
printMsg("Removing file %s" % os.path.basename(f))
os.remove(f)
if dir_rename:
printMsg("Renaming directory to %s" % dir_rename[1])
s = os.stat(dir_rename[0])
os.rename(dir_rename[0], dir_rename[1])
# With a rename use the origianl access time
os.utime(dir_rename[1], (s.st_atime, s.st_atime))
else:
printMsg("\nNo changes made (run without -n/--dry-run)")
0
Example 156
Project: kmodes Source File: kprototypes.py
def k_prototypes(X, categorical, n_clusters, max_iter, num_dissim, cat_dissim,
gamma, init, n_init, verbose):
"""k-prototypes algorithm"""
if sparse.issparse(X):
raise TypeError("k-prototypes does not support sparse data.")
if categorical is None or not categorical:
raise NotImplementedError(
"No categorical data selected, effectively doing k-means. "
"Present a list of categorical columns, or use scikit-learn's "
"KMeans instead."
)
if isinstance(categorical, int):
categorical = [categorical]
assert len(categorical) != X.shape[1], \
"All columns are categorical, use k-modes instead of k-prototypes."
assert max(categorical) < X.shape[1], \
"Categorical index larger than number of columns."
ncatattrs = len(categorical)
nnumattrs = X.shape[1] - ncatattrs
npoints = X.shape[0]
assert n_clusters <= npoints, "More clusters than data points?"
Xnum, Xcat = _split_num_cat(X, categorical)
Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
# Convert the categorical values in Xcat to integers for speed.
# Based on the unique values in Xcat, we can make a mapping to achieve this.
Xcat, enc_map = encode_features(Xcat)
# Are there more n_clusters than unique rows? Then set the unique
# rows as initial values and skip iteration.
unique = get_unique_rows(X)
n_unique = unique.shape[0]
if n_unique <= n_clusters:
max_iter = 0
n_init = 1
n_clusters = n_unique
init = list(_split_num_cat(unique, categorical))
init[1], _ = encode_features(init[1], enc_map)
# Estimate a good value for gamma, which determines the weighing of
# categorical values in clusters (see Huang [1997]).
if gamma is None:
gamma = 0.5 * Xnum.std()
all_centroids = []
all_labels = []
all_costs = []
all_n_iters = []
for init_no in range(n_init):
# For numerical part of initialization, we don't have a guarantee
# that there is not an empty cluster, so we need to retry until
# there is none.
init_tries = 0
while True:
init_tries += 1
# _____ INIT _____
if verbose:
print("Init: initializing centroids")
if isinstance(init, str) and init == 'Huang':
centroids = kmodes.init_huang(Xcat, n_clusters, cat_dissim)
elif isinstance(init, str) and init == 'Cao':
centroids = kmodes.init_cao(Xcat, n_clusters, cat_dissim)
elif isinstance(init, str) and init == 'random':
seeds = np.random.choice(range(npoints), n_clusters)
centroids = Xcat[seeds]
elif isinstance(init, list):
# Make sure inits are 2D arrays.
init = [np.atleast_2d(cur_init).T if len(cur_init.shape) == 1
else cur_init
for cur_init in init]
assert init[0].shape[0] == n_clusters, \
"Wrong number of initial numerical centroids in init " \
"({}, should be {}).".format(init[0].shape[0], n_clusters)
assert init[0].shape[1] == nnumattrs, \
"Wrong number of numerical attributes in init ({}, should be {})."\
.format(init[0].shape[1], nnumattrs)
assert init[1].shape[0] == n_clusters, \
"Wrong number of initial categorical centroids in init ({}, " \
"should be {}).".format(init[1].shape[0], n_clusters)
assert init[1].shape[1] == ncatattrs, \
"Wrong number of categorical attributes in init ({}, should be {})."\
.format(init[1].shape[1], ncatattrs)
centroids = [np.asarray(init[0], dtype=np.float64),
np.asarray(init[1], dtype=np.uint8)]
else:
raise NotImplementedError("Initialization method not supported.")
if not isinstance(init, list):
# Numerical is initialized by drawing from normal distribution,
# categorical following the k-modes methods.
meanx = np.mean(Xnum, axis=0)
stdx = np.std(Xnum, axis=0)
centroids = [
meanx + np.random.randn(n_clusters, nnumattrs) * stdx,
centroids
]
if verbose:
print("Init: initializing clusters")
membship = np.zeros((n_clusters, npoints), dtype=np.uint8)
# Keep track of the sum of attribute values per cluster so that we
# can do k-means on the numerical attributes.
cl_attr_sum = np.zeros((n_clusters, nnumattrs), dtype=np.float64)
# cl_attr_freq is a list of lists with dictionaries that contain
# the frequencies of values per cluster and attribute.
cl_attr_freq = [[defaultdict(int) for _ in range(ncatattrs)]
for _ in range(n_clusters)]
for ipoint in range(npoints):
# Initial assignment to clusters
clust = np.argmin(
num_dissim(centroids[0], Xnum[ipoint]) +
gamma * cat_dissim(centroids[1], Xcat[ipoint])
)
membship[clust, ipoint] = 1
# Count attribute values per cluster.
for iattr, curattr in enumerate(Xnum[ipoint]):
cl_attr_sum[clust, iattr] += curattr
for iattr, curattr in enumerate(Xcat[ipoint]):
cl_attr_freq[clust][iattr][curattr] += 1
# If no empty clusters, then consider initialization finalized.
if membship.sum(axis=1).min() > 0:
break
if init_tries == MAX_INIT_TRIES:
# Could not get rid of empty clusters. Randomly
# initialize instead.
init = 'random'
elif init_tries == RAISE_INIT_TRIES:
raise ValueError(
"Clustering algorithm could not initialize. "
"Consider assigning the initial clusters manually."
)
# Perform an initial centroid update.
for ik in range(n_clusters):
for iattr in range(nnumattrs):
centroids[0][ik, iattr] = \
cl_attr_sum[ik, iattr] / sum(membship[ik, :])
for iattr in range(ncatattrs):
centroids[1][ik, iattr] = \
get_max_value_key(cl_attr_freq[ik][iattr])
# _____ ITERATION _____
if verbose:
print("Starting iterations...")
itr = 0
converged = False
cost = np.Inf
while itr <= max_iter and not converged:
itr += 1
centroids, moves = _k_prototypes_iter(Xnum, Xcat, centroids,
cl_attr_sum, cl_attr_freq,
membship, num_dissim, cat_dissim, gamma)
# All points seen in this iteration
labels, ncost = _labels_cost(Xnum, Xcat, centroids,
num_dissim, cat_dissim, gamma)
converged = (moves == 0) or (ncost >= cost)
cost = ncost
if verbose:
print("Run: {}, iteration: {}/{}, moves: {}, ncost: {}"
.format(init_no + 1, itr, max_iter, moves, ncost))
# Store results of current run.
all_centroids.append(centroids)
all_labels.append(labels)
all_costs.append(cost)
all_n_iters.append(itr)
best = np.argmin(all_costs)
if n_init > 1 and verbose:
print("Best run was number {}".format(best + 1))
# Note: return gamma in case it was automatically determined.
return all_centroids[best], enc_map, all_labels[best], \
all_costs[best], all_n_iters[best], gamma
0
Example 157
def next(self, record, curInputBookmark):
""" Return the next aggregated record, if any
Parameters:
------------------------------------------------------------------------
record: The input record (values only) from the input source, or
None if the input has reached EOF (this will cause this
method to force completion of and return any partially
aggregated time period)
curInputBookmark: The bookmark to the next input record
retval:
(outputRecord, inputBookmark)
outputRecord: the aggregated record
inputBookmark: a bookmark to the last position from the input that
contributed to this aggregated record.
If we don't have any aggregated records yet, returns (None, None)
The caller should generally do a loop like this:
while True:
inRecord = reader.getNextRecord()
bookmark = reader.getBookmark()
(aggRecord, aggBookmark) = aggregator.next(inRecord, bookmark)
# reached EOF?
if inRecord is None and aggRecord is None:
break
if aggRecord is not None:
proessRecord(aggRecord, aggBookmark)
This method makes use of the self._slice member variable to build up
the values we need to aggregate. This is a dict of lists. The keys are
the field indices and the elements of each list are the values for that
field. For example:
self._siice = { 0: [42, 53], 1: [4.0, 5.1] }
"""
# This will hold the aggregated record we return
outRecord = None
# This will hold the bookmark of the last input used within the
# aggregated record we return.
retInputBookmark = None
if record is not None:
# Increment input count
self._inIdx += 1
#print self._inIdx, record
# Apply the filter, ignore the record if any field is unacceptable
if self._filter != None and not self._filter[0](self._filter[1], record):
return (None, None)
# If no aggregation info just return as-is
if self._nullAggregation:
return (record, curInputBookmark)
# ----------------------------------------------------------------------
# Do aggregation
#
# Remember the very first record time stamp - it will be used as
# the timestamp for all first records in all sequences to align
# times for the aggregation/join of sequences.
#
# For a set of aggregated records, it will use the beginning of the time
# window as a timestamp for the set
#
t = record[self._timeFieldIdx]
if self._firstSequenceStartTime == None:
self._firstSequenceStartTime = t
# Create initial startTime and endTime if needed
if self._startTime is None:
self._startTime = t
if self._endTime is None:
self._endTime = self._getEndTime(t)
assert self._endTime > t
#print 'Processing line:', i, t, endTime
#from dbgp.client import brk; brk(port=9011)
# ----------------------------------------------------------------------
# Does this record have a reset signal or sequence Id associated with it?
# If so, see if we've reached a sequence boundary
if self._resetFieldIdx is not None:
resetSignal = record[self._resetFieldIdx]
else:
resetSignal = None
if self._sequenceIdFieldIdx is not None:
currSequenceId = record[self._sequenceIdFieldIdx]
else:
currSequenceId = None
newSequence = (resetSignal == 1 and self._inIdx > 0) \
or self._sequenceId != currSequenceId \
or self._inIdx == 0
if newSequence:
self._sequenceId = currSequenceId
# --------------------------------------------------------------------
# We end the aggregation chunk if we go past the end time
# -OR- we get an out of order record (t < startTime)
sliceEnded = (t >= self._endTime or t < self._startTime)
# -------------------------------------------------------------------
# Time to generate a new output record?
if (newSequence or sliceEnded) and len(self._slice) > 0:
# Create aggregated record
# print 'Creating aggregate record...'
# Make first record timestamp as the beginning of the time period,
# in case the first record wasn't falling on the beginning of the period
for j, f in enumerate(self._fields):
index = f[0]
if index == self._timeFieldIdx:
self._slice[j][0] = self._startTime
break
# Generate the aggregated record
outRecord = self._createAggregateRecord()
retInputBookmark = self._aggrInputBookmark
# Reset the slice
self._slice = defaultdict(list)
# --------------------------------------------------------------------
# Add current record to slice (Note keeping slices in memory). Each
# field in the slice is a list of field values from all the sliced
# records
for j, f in enumerate(self._fields):
index = f[0]
# append the parsed field value to the proper aggregated slice field.
self._slice[j].append(record[index])
self._aggrInputBookmark = curInputBookmark
# --------------------------------------------------------------------
# If we've encountered a new sequence, start aggregation over again
if newSequence:
# TODO: May use self._firstSequenceStartTime as a start for the new
# sequence (to align all sequences)
self._startTime = t
self._endTime = self._getEndTime(t)
# --------------------------------------------------------------------
# If a slice just ended, re-compute the start and end time for the
# next aggregated record
if sliceEnded:
# Did we receive an out of order record? If so, go back and iterate
# till we get to the next end time boundary.
if t < self._startTime:
self._endTime = self._firstSequenceStartTime
while t >= self._endTime:
self._startTime = self._endTime
self._endTime = self._getEndTime(self._endTime)
# If we have a record to return, do it now
if outRecord is not None:
return (outRecord, retInputBookmark)
# ---------------------------------------------------------------------
# Input reached EOF
# Aggregate one last time in the end if necessary
elif self._slice:
# Make first record timestamp as the beginning of the time period,
# in case the first record wasn't falling on the beginning of the period
for j, f in enumerate(self._fields):
index = f[0]
if index == self._timeFieldIdx:
self._slice[j][0] = self._startTime
break
outRecord = self._createAggregateRecord()
retInputBookmark = self._aggrInputBookmark
self._slice = defaultdict(list)
# Return aggregated record
return (outRecord, retInputBookmark)
0
Example 158
Project: nupic Source File: aggregator.py
def __init__(self, aggregationInfo, inputFields, timeFieldName=None,
sequenceIdFieldName=None, resetFieldName=None, filterInfo=None):
""" Construct an aggregator instance
Params:
- aggregationInfo: a dictionary that contains the following entries
- fields: a list of pairs. Each pair is a field name and an
aggregation function (e.g. sum). The function will be used to aggregate
multiple values during the aggregation period.
- aggregation period: 0 or more of unit=value fields; allowed units are:
[years months] | [weeks days hours minutes seconds milliseconds
microseconds]
NOTE: years and months are mutually-exclusive with the other units. See
getEndTime() and _aggregate() for more details.
Example1: years=1, months=6,
Example2: hours=1, minutes=30,
If none of the period fields are specified or if all that are specified
have values of 0, then aggregation will be suppressed, and the given
inputFile parameter value will be returned.
- inputFields: The fields from the data source. This is a sequence of
`nupic.data.fieldmeta.FieldMetaInfo` instances.
- timeFieldName: name of the field to use as the time field. If None,
then the time field will be queried from the reader.
- sequenceIdFieldName: name of the field to use as the sequenecId. If None,
then the time field will be queried from the reader.
- resetFieldName: name of the field to use as the reset field. If None,
then the time field will be queried from the reader.
- filterInfo: a structure with rules for filtering records out
If the input file contains a time field, sequence id field or reset field
that were not specified in aggregationInfo fields, those fields will be
added automatically with the following rules:
1. The order will be R, S, T, rest of the fields
2. The aggregation function for these will be to pick the first:
lambda x: x[0]
"""
# -----------------------------------------------------------------------
# Save member variables.
# The same aggregationInfo dict may be used by the caller for generating
# more datasets (with slight changes), so it is safer to copy it here and
# all changes made here will not affect the input aggregationInfo
self._filterInfo = filterInfo
self._nullAggregation = False
self._inputFields = inputFields
# See if this is a null aggregation
self._nullAggregation = False
if aggregationInfo is None:
self._nullAggregation = True
else:
aggDef = defaultdict(lambda: 0, aggregationInfo)
if (aggDef['years'] == aggDef['months'] == aggDef['weeks'] ==
aggDef['days'] == aggDef['hours'] == aggDef['minutes'] ==
aggDef['seconds'] == aggDef['milliseconds'] ==
aggDef['microseconds'] == 0):
self._nullAggregation = True
# Prepare the field filtering info. The filter allows us to ignore records
# based on specified min or max values for each field.
self._filter = initFilter(self._inputFields, self._filterInfo)
# ----------------------------------------------------------------------
# Fill in defaults
self._fields = None
self._resetFieldIdx = None
self._timeFieldIdx = None
self._sequenceIdFieldIdx = None
self._aggTimeDelta = datetime.timedelta()
self._aggYears = 0
self._aggMonths = 0
# Init state variables used within next()
self._aggrInputBookmark = None
self._startTime = None
self._endTime = None
self._sequenceId = None
self._firstSequenceStartTime = None
self._inIdx = -1
self._slice = defaultdict(list)
# ========================================================================
# Get aggregation params
# self._fields will be a list of tuples: (fieldIdx, funcPtr, funcParam)
if not self._nullAggregation:
# ---------------------------------------------------------------------
# Verify that all aggregation field names exist in the input
fieldNames = [f[0] for f in aggregationInfo['fields']]
readerFieldNames = [f[0] for f in self._inputFields]
for name in fieldNames:
if not name in readerFieldNames:
raise Exception('No such input field: %s' % (name))
# ---------------------------------------------------------------------
# Get the indices of the special fields, if given to our constructor
if timeFieldName is not None:
self._timeFieldIdx = readerFieldNames.index(timeFieldName)
if resetFieldName is not None:
self._resetFieldIdx = readerFieldNames.index(resetFieldName)
if sequenceIdFieldName is not None:
self._sequenceIdFieldIdx = readerFieldNames.index(sequenceIdFieldName)
# ---------------------------------------------------------------------
# Re-order the fields to match the order in the reader and add in any
# fields from the reader that were not already in the aggregationInfo
# fields list.
self._fields = []
fieldIdx = -1
for (name, type, special) in self._inputFields:
fieldIdx += 1
# See if it exists in the aggregationInfo
found = False
for field in aggregationInfo['fields']:
if field[0] == name:
aggFunctionName = field[1]
found = True
break
if not found:
aggFunctionName = 'first'
# Convert to a function pointer and optional params
(funcPtr, params) = self._getFuncPtrAndParams(aggFunctionName)
# Add it
self._fields.append((fieldIdx, funcPtr, params))
# Is it a special field that we are still looking for?
if special == FieldMetaSpecial.reset and self._resetFieldIdx is None:
self._resetFieldIdx = fieldIdx
if special == FieldMetaSpecial.timestamp and self._timeFieldIdx is None:
self._timeFieldIdx = fieldIdx
if (special == FieldMetaSpecial.sequence and
self._sequenceIdFieldIdx is None):
self._sequenceIdFieldIdx = fieldIdx
assert self._timeFieldIdx is not None, "No time field was found"
# Create an instance of _AggregationPeriod with the aggregation period
self._aggTimeDelta = datetime.timedelta(days=aggDef['days'],
hours=aggDef['hours'],
minutes=aggDef['minutes'],
seconds=aggDef['seconds'],
milliseconds=aggDef['milliseconds'],
microseconds=aggDef['microseconds'],
weeks=aggDef['weeks'])
self._aggYears = aggDef['years']
self._aggMonths = aggDef['months']
if self._aggTimeDelta:
assert self._aggYears == 0
assert self._aggMonths == 0
0
Example 159
Project: openstates Source File: bills.py
def scrape(self, session, chambers):
bill_type_map = {
'B': 'bill',
'R': 'resolution',
'JR': 'joint resolution',
'CR': 'concurrent resolution',
}
chamber_map = {
'H': 'lower',
'S': 'upper',
'J': 'joint',
'E': 'other', # Effective date
}
action_code_map = {
'HI': ['other'],
'SI': ['other'],
'HH': ['other'],
'SH': ['other'],
'HPF': ['bill:introduced'],
'HDSAS': ['other'],
'SPF': ['bill:introduced'],
'HSR': ['bill:reading:2'],
'SSR': ['bill:reading:2'],
'HFR': ['bill:reading:1'],
'SFR': ['bill:reading:1'],
'HRECM': ['bill:withdrawn', 'committee:referred'],
'SRECM': ['bill:withdrawn', 'committee:referred'],
'SW&C': ['bill:withdrawn', 'committee:referred'],
'HW&C': ['bill:withdrawn', 'committee:referred'],
'HRA': ['bill:passed'],
'SRA': ['bill:passed'],
'HPA': ['bill:passed'],
'HRECO': ['other'],
'SPA': ['bill:passed'],
'HTABL': ['other'], # 'House Tabled' - what is this?
'SDHAS': ['other'],
'HCFR': ['committee:passed:favorable'],
'SCFR': ['committee:passed:favorable'],
'HRAR': ['committee:referred'],
'SRAR': ['committee:referred'],
'STR': ['bill:reading:3'],
'SAHAS': ['other'],
'SE': ['bill:passed'],
'SR': ['committee:referred'],
'HTRL': ['bill:reading:3', 'bill:failed'],
'HTR': ['bill:reading:3'],
'S3RLT': ['bill:reading:3', 'bill:failed'],
'HASAS': ['other'],
'S3RPP': ['other'],
'STAB': ['other'],
'SRECO': ['other'],
'SAPPT': ['other'],
'HCA': ['other'],
'HNOM': ['other'],
'HTT': ['other'],
'STT': ['other'],
'SRECP': ['other'],
'SCRA': ['other'],
'SNOM': ['other'],
'S2R': ['bill:reading:2'],
'H2R': ['bill:reading:2'],
'SENG': ['bill:passed'],
'HENG': ['bill:passed'],
'HPOST': ['other'],
'HCAP': ['other'],
'SDSG': ['governor:signed'],
'SSG': ['governor:received'],
'Signed Gov': ['governor:signed'],
'HDSG': ['governor:signed'],
'HSG': ['governor:received'],
'EFF': ['other'],
'HRP': ['other'],
'STH': ['other'],
'HTS': ['other'],
}
sid = self.metadata['session_details'][session]['_guid']
legislation = backoff(
self.lservice.GetLegislationForSession,
sid
)['LegislationIndex']
for leg in legislation:
lid = leg['Id']
instrument = backoff(self.lservice.GetLegislationDetail, lid)
history = [x for x in instrument['StatusHistory'][0]]
actions = reversed([{
'code': x['Code'],
'action': x['Description'],
'_guid': x['Id'],
'date': x['Date']
} for x in history])
guid = instrument['Id']
# A little bit hacky.
bill_prefix = instrument['DocuementType']
bill_chamber = chamber_map[bill_prefix[0]]
bill_type = bill_type_map[bill_prefix[1:]]
bill_id = '%s %s' % (
bill_prefix,
instrument['Number'],
)
if instrument['Suffix']:
bill_id += instrument['Suffix']
title = instrument['Caption']
description = instrument['Summary']
if title is None:
continue
bill = Bill(session, bill_chamber, bill_id, title, type=bill_type,
description=description, _guid=guid)
if instrument['Votes']:
for vote_ in instrument['Votes']:
_, vote_ = vote_
vote_ = backoff(self.vservice.GetVote, vote_[0]['VoteId'])
vote = Vote(
{'House': 'lower', 'Senate': 'upper'}[vote_['Branch']],
vote_['Date'],
vote_['Caption'] or 'Vote on Bill',
(vote_['Yeas'] > vote_['Nays']),
vote_['Yeas'],
vote_['Nays'],
(vote_['Excused'] + vote_['NotVoting']),
session=session,
bill_id=bill_id,
bill_chamber=bill_chamber)
vote.add_source(self.vsource)
methods = {'Yea': vote.yes, 'Nay': vote.no,}
for vdetail in vote_['Votes'][0]:
whom = vdetail['Member']
how = vdetail['MemberVoted']
try:
m = methods[how]
except KeyError:
m = vote.other
m(whom['Name'])
bill.add_vote(vote)
ccommittees = defaultdict(list)
committees = instrument['Committees']
if committees:
for committee in committees[0]:
ccommittees[{
'House': 'lower',
'Senate': 'upper',
}[committee['Type']]].append(committee['Name'])
for action in actions:
action_chamber = chamber_map[action['code'][0]]
try:
action_types = action_code_map[action['code']]
except KeyError:
error_msg = ('Code {code} for action {action} not '
'recognized.'.format(
code=action['code'],
action=action['action']))
self.logger.warning(error_msg)
action_types = ['other']
committees = []
if any(('committee' in x for x in action_types)):
committees = [str(x) for x in ccommittees.get(
action_chamber, [])]
bill.add_action(action_chamber, action['action'],
action['date'], action_types, committees=committees,
_code=action['code'], _code_id=action['_guid'])
sponsors = []
if instrument['Authors']:
sponsors = instrument['Authors']['Sponsorship']
if 'Sponsors' in instrument and instrument['Sponsors']:
sponsors += instrument['Sponsors']['Sponsorship']
sponsors = [
(x['Type'], self.get_member(x['MemberId'])) for x in sponsors
]
for typ, sponsor in sponsors:
name = '{First} {Last}'.format(**dict(sponsor['Name']))
bill.add_sponsor(
'primary' if 'Author' in typ else 'seconday',
name
)
for version in instrument['Versions']['DocuementDescription']:
name, url, doc_id, version_id = [
version[x] for x in [
'Description',
'Url',
'Id',
'Version'
]
]
bill.add_version(
name,
url,
mimetype='application/pdf',
_internal_docuement_id=doc_id,
_version_id=version_id
)
versions = sorted(
bill['versions'],
key=lambda x: x['_internal_docuement_id']
)
bill['versions'] = versions
bill.add_source(self.msource)
bill.add_source(self.lsource)
bill.add_source(SOURCE_URL.format(**{
'session': session,
'bid': guid,
}))
self.save_bill(bill)
0
Example 160
Project: unisubs Source File: deploy.py
def setup_images(self):
timer = LoggingTimer()
# make sure amara-enterprise is on the correct commit
subprocess.check_call(['bin/update-integration.py', '--skip-fetch'])
subprocess.check_call(['bin/build.py', self.image_name)
timer.log_time('image build')
# Send the image from builder to the other docker hosts
log('sending image from builder to docker hosts')
save_proc = self.docker.Popen(BUILDER_DOCKER_HOST, 'save',
self.image_name, stdout=subprocess.PIPE)
load_procs = [
self.docker.Popen(host, 'load', stdin=subprocess.PIPE)
for host in self.env.docker_hosts()
]
BUF_SIZE = 4096
while True:
data = save_proc.stdout.read(BUF_SIZE)
if not data:
break
for proc in load_procs:
proc.stdin.write(data)
for proc in load_procs:
proc.stdin.close()
if save_proc.wait() != 0:
raise subprocess.CalledProcessError(
"docker save error: {}".format(save_proc.return_code))
for proc in load_procs:
if proc.wait() != 0:
raise subprocess.CalledProcessError(
"docker load error: {}".format(proc.return_code))
timer.log_time('image save/load')
if self.env.BRANCH in ('staging', 'production'):
# Tag the image with amara-<branch>:latest
# We use the tags to run periodic tasks, like updating
# translations
for host in [BUILDER_DOCKER_HOST] + self.env.docker_hosts():
self.docker.run(host, 'tag', self.image_name, self.tag_name())
class ContainerManager(object):
"""Start/stop docker containers """
def __init__(self, env, commit_id, image_name):
self.docker = Docker()
self.env = env
self.commit_id = commit_id
self.image_name = image_name
self.containers_started = []
self.containers_stopped = []
def building_preview(self):
return self.env.BRANCH not in ('staging', 'production')
def app_params(self, limited=False):
"""Get docker params to used for both app containers and workers
"""
aws_access_id, aws_secret_key = self.env.aws_credentials(limited)
params = [
# AWS Auth info
'-e', 'AWS_ACCESS_ID=' + aws_access_id,
'-e', 'AWS_SECRET_KEY=' + aws_secret_key,
# REVISION controls the git revision we check out before starting
# this is actually somewhat redundant since we already copy the
# files into the docker image
'-e', 'REVISION=' + self.env.BRANCH,
# mount the workspace volume inside our container
'-v', '/var/workspace:/var/workspace',
]
if self.env.DB_NAME:
params.extend([
'-e', 'DB_NAME=' + self.env.DB_NAME,
])
if self.building_preview():
# SETTINGS_REVISION controls how to download the
# server_local_settings.py file (see .docker/config_env.sh)
params.extend(['-e', 'SETTINGS_REVISION=staging'])
return params
def interlock_params(self):
if self.env.BRANCH == 'production':
return [
'-e', ('INTERLOCK_DATA={"alias_domains": '
'["www.amara.org"]}'),
'-e', 'NEW_RELIC_APP_NAME=AmaraVPC',
'-e', ('NEW_RELIC_LICENSE_KEY=' +
self.env.NEW_RELIC_LICENSE_KEY)
]
else:
return []
def app_hostname(self):
"""Hostname for app containers.
The hostname sets an entry in the hosts file for the container. But
more importantly, it tells Interlock what web traffic should be routed
to the container.
"""
if self.env.BRANCH == 'production':
return 'amara.org'
else:
return '{}.amara.org'.format(self.env.BRANCH)
def container_name_prefix_for_branch(self):
"""Start of docker container names for this git branch."""
return 'app-amara-{}-'.format(self.env.BRANCH)
def container_name_prefix_for_build(self):
"""Start of docker container names for this particular build. """
# Include both the git commit ID and the build number since both could
# be useful.
return self.container_name_prefix_for_branch() + '{}-{}-'.format(
self.commit_id[:6], self.env.BUILD_NUMBER)
def run_app_command(self, command, argument=None):
"""Run a command using the app container
Use this to run a command that does something then quits like
build_media or migrate.
Args:
command: command to pass to our entrypoint. The entrypoint is a
copy of .docker/entry.sh
"""
cmd_line = [ 'run', '-t', '--rm', ]
cmd_line += self.app_params()
cmd_line += [self.image_name, command]
if argument is not None:
cmd_line += [argument]
self.docker.run(self.env.DOCKER_HOST_1, *cmd_line)
def start_worker_container(self, host, name, command):
"""Start an app contanier running the feed/master worker
Args:
host: docker host
name: docker name suffix of the container. This is what shows up
in docker ps. All names will be prefixed with
container_name_prefix_for_build().
command: command to pass to our entry point (feed_worker, or
master_worker)
"""
host_name = '{}-{}.amara.org'.format(self.env.BRANCH, name)
name = self.container_name_prefix_for_build() + name
cmd_line = [
'run', '-t', '-d',
'-h', host_name,
'--name', name,
'--restart=always',
] + self.app_params(limited=True) + [self.image_name, command]
cid = self.docker.run_and_return_output(host, *cmd_line).strip()
log("container id: {}", cid)
self.containers_started.append(ContainerInfo(host, name, cid))
def start_app_container(self, host, name):
"""Start an app contanier running a web server
Args:
host: docker host
name: docker name suffix of the container. This is what shows up
in docker ps. All names will be prefixed with
container_name_prefix_for_build().
"""
name = self.container_name_prefix_for_build() + name
cmd_line = [
'run', '-t', '-d', '-P',
'-h', self.app_hostname(),
'--name', name,
'--restart=always',
] + self.app_params(limited=True) + self.interlock_params() + [self.image_name]
cid = self.docker.run_and_return_output(host, *cmd_line).strip()
log("container id: {}", cid)
self.containers_started.append(ContainerInfo(host, name, cid))
def start_new_containers(self):
"""Start docker containers for this deploy."""
if self.env.BRANCH == 'production':
# for production we start up many instances, spread across the
# hosts
host_iter = itertools.cycle(self.env.docker_hosts())
for i in range(int(self.env.PRODUCTION_NUM_INSTANCES)):
host = host_iter.next()
self.start_app_container(host, str(i + 1))
elif self.env.BRANCH == 'staging':
# for staging we start up 1 instance per host
for i, host in enumerate(self.env.docker_hosts()):
self.start_app_container(host, str(i + 1))
else:
# for preview branches we start 1 instance on the builder host.
# Also we don't start up the workers
self.start_app_container(BUILDER_DOCKER_HOST, 'preview')
return
self.start_worker_container(self.env.DOCKER_HOST_1, 'master-worker',
'master_worker')
self.start_worker_container(self.env.DOCKER_HOST_2, 'feed-worker',
'feed_worker')
def find_old_containers(self):
"""Find containers started by previous deploys.
Returns a list of (host, container_id) tuples.
"""
old_containers = []
hosts_to_search = [BUILDER_DOCKER_HOST] + self.env.docker_hosts()
for host in hosts_to_search:
for container in self.docker.get_containers(host):
try:
name = container['Names'][0]
except KeyError:
pass
if self.container_name_prefix_for_branch() in name:
cid = container['Id']
old_containers.append(ContainerInfo(host, name, cid))
log("old app servers: {}", old_containers)
return old_containers
def shutdown_old_containers(self, old_containers):
for container in old_containers:
self.docker.run(container.host, 'kill', container.cid)
self.containers_stopped.append(container)
def remove_old_app_servers(self, old_containers):
for container in old_containers:
self.docker.run(container.host, 'rm', '-v', container.cid)
def print_report(self):
line_fmt = "{:<60} {:<60} {:<64}"
log("------------- Containers Stopped ---------------")
log(line_fmt, 'Host', 'Name', 'Container ID')
for container in self.containers_stopped:
log(line_fmt, *container)
log("------------- Containers Started ---------------")
log(line_fmt, 'Host', 'Name', 'Container ID')
for container in self.containers_started:
log(line_fmt, *container)
log("------------- Shell Command Line ---------------")
cmd_line = [
'docker', 'run', '-it', '--rm',
] + self.app_params() + [self.image_name, 'shell']
log_nostar(' '.join(cmd_line))
class Deploy(object):
"""Top-level manager for the deploy."""
def run(self):
self.setup()
if not self.env.ROLLBACK_ID:
self.image_builder.setup_images()
if self.env.RESET_DB == 'true':
if self.container_manager.building_preview():
self.container_manager.run_app_command("reset_db")
else:
log("Not calling reset_db since we are not "
"building a preview.")
if not self.env.ROLLBACK_ID:
self.container_manager.run_app_command("build_media")
self.start_and_stop_containers()
if self.container_manager.building_preview():
self.container_manager.run_app_command("setup_preview_site", argument=self.container_manager.app_hostname())
self.container_manager.print_report()
def build(self):
self.setup(needs_migrations=False)
self.image_builder.setup_images()
self.container_manager.run_app_command("build_media")
self.container_manager.print_report()
def stop_old_containers(self):
self.setup()
old_containers = self.container_manager.find_old_containers()
self.container_manager.shutdown_old_containers(old_containers)
def setup(self, needs_migrations=True):
self.cd_to_project_root()
self.env = Environment(needs_migrations)
commit_id = self.get_commit_id()
self.image_builder = ImageBuilder(self.env, commit_id)
self.container_manager = ContainerManager(
self.env, commit_id, self.image_builder.image_name)
def get_commit_id(self):
if self.env.ROLLBACK_ID:
log("Getting commit ID from ROLLBACK_ID")
commit_id = self.env.ROLLBACK_ID
else:
cmd = ["git", "rev-parse", "HEAD"]
commit_id = subprocess.check_output(cmd).strip()
if not re.match('^[0-9a-f]{40}$', commit_id):
raise ValueError("Invalid commit id: {}".format(commit_id))
return commit_id
def cd_to_project_root(self):
project_root = os.path.abspath(
os.path.dirname(os.path.dirname(__file__))
)
log('cd to {}', project_root)
os.chdir(project_root)
def start_and_stop_containers(self):
old_containers = self.container_manager.find_old_containers()
if self.env.MIGRATIONS == 'DONT_MIGRATE':
self.container_manager.start_new_containers()
time.sleep(30)
self.container_manager.shutdown_old_containers(old_containers)
elif self.env.MIGRATIONS == 'MIGRATE_WHILE_RUNNING_OLD_CODE':
self.container_manager.run_app_command('migrate')
self.container_manager.start_new_containers()
time.sleep(30)
self.container_manager.shutdown_old_containers(old_containers)
elif self.env.MIGRATIONS == 'STOP_SERVERS_TO_MIGRATE':
self.container_manager.shutdown_old_containers(old_containers)
self.container_manager.run_app_command('migrate')
self.container_manager.start_new_containers()
else:
raise ValueError("Unknown MIGRATIONS value: {}".format(
self.env.MIGRATIONS))
# give containers some time to shutdown before we remove them
time.sleep(5)
self.container_manager.remove_old_app_servers(old_containers)
class Cleanup(object):
def run(self):
docker_hosts = self.get_docker_hosts()
self.docker = Docker()
for host in [BUILDER_DOCKER_HOST] + docker_hosts:
log("Host: {}", host)
self.remove_stopped_containers(host)
self.remove_unused_images(host)
def get_docker_hosts(self):
try:
return os.environ['DOCKER_HOSTS'].split()
except KeyError:
log("DOCKER_HOSTS ENV variable missing")
sys.exit(1)
def remove_stopped_containers(self, host):
log("checking for stoped containers")
for container in self.docker.get_containers(host, status=['exited']):
log("removing stopped container: {}", container['Id'])
self.docker.run(host, 'rm', '-v', container['Id'])
def remove_image(self, host, image):
try:
self.docker.run(host, 'rmi', image)
except subprocess.CalledProcessError:
# removing an image can fail if another container was started that
# uses the image after get the list. Just print a warning and
# continue on
log("Warning: error removing image {}", image)
def remove_unused_images(self, host):
log("checking for unused images")
container_ids = [
]
used_images = collections.defaultdict(list)
for container in self.docker.get_containers(host):
cid = container['Id']
details = self.docker.get_container_details(host, cid)
used_images[details['Image']].append(details['Name'])
for image_info in self.docker.get_images(host):
image = image_info['Id']
tags = [
tag for tag in image_info['RepoTags']
if tag != '<none>:<none>'
]
if self.should_skip_image(image, tags):
continue
if image in used_images:
log("Image {} in use {}", image, used_images[image])
else:
for tag in tags:
log("Untagging {}", tag)
self.remove_image(host, tag)
if self.docker.image_exists(host, image):
log("removing unused image: {}", image)
self.remove_image(host, image)
else:
log("image removed from untagging: {}", image)
def should_skip_image(self, image, tags):
for tag in tags:
if tag.endswith(":latest"):
log("skipping {} because of tag {}", image, tag)
return True
return False
def main(argv):
try:
try:
command = argv[1].lower()
except IndexError:
command = 'deploy'
if command == 'deploy':
Deploy().run()
elif command == 'stop-deploy':
Deploy().stop_old_containers()
elif command == 'build':
Deploy().build()
elif command == 'cleanup':
Cleanup().run()
else:
log.write("Unknown command: {}", command)
sys.exit(1)
except Exception, e:
sys.stderr.write("Error: {}\n{}".format(
e, ''.join(traceback.format_exc())))
sys.exit(1)
if __name__ == '__main__':
main(sys.argv)