Here are the examples of the python api re.sub taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
200 Examples
3
Example 1
View license@error.context_aware def run(test, params, env): """ This test virsh domtime command and its options. 1) Start a guest with/without guest agent configured; 2) Record guest times; 3) Do some operation to stop VM; 4) Run virsh domtime command with different options; 5) Check the command result; 6) Check the guest times against expectation; 7) Cleanup test environment. """ epoch = datetime.datetime(1970, 1, 1, 0, 0, 0) # Max time can be set with domtime successfully in newer qemu-ga time_max_1 = 3155731199 # Max time can be set with domtime successfully in older qemu-ga time_max_2 = 3155759999 # Max time can be set with domtime bug failed to set RTC in older qemu-ga time_max_3 = 9223372035 def init_time(session): """ Initialize guest RTC time to epoch + 1234567890 and system time one day latter. :param session: Session from which to access guest """ res = virsh.domtime(vm_name, time=1234567890) if res.exit_status: logging.debug("Failed to init time to 1234567890:\n%s", res) status, output = session.cmd_status_output('date -s "1 day"') if status: raise error.TestError("Failed to set guest time:\n%s" % output) def get_host_utc_time(): """ Get host UTC time from date command. """ res = utils.run("date -u") # Strip timezone info from output # e.g. 'Sun Feb 15 07:31:40 CST 2009' -> 'Sun Feb 15 07:31:40 2009' time_str = re.sub(r'\S+ (?=\S+$)', '', res.stdout.strip()) return datetime.datetime.strptime(time_str, r"%a %b %d %H:%M:%S %Y") def run_cmd(session, cmd): """ Run a command in a session and record duration of call. """ start = time.time() output = session.cmd_output(cmd) duration = time.time() - start logging.info('Result of command "%s". Duration: %s. Output:%s', cmd, duration, output.strip()) return output, duration def get_guest_times(session): """ Retrieve different guest time as a dict for checking. Keys: local_hw: Guest RTC time in local timezone local_sys: Guest system time in local timezone utc_sys: Guest system time in UTC domtime: Guest system time in UTC got from virsh domtime command :param session: Session from which to access guest """ times = {} get_begin = time.time() # Guest RTC local timezone time output, _ = run_cmd(session, 'hwclock') time_str, _ = re.search(r"(.+) (\S+ seconds)", output).groups() try: # output format 1: Tue 01 Mar 2016 01:53:46 PM CST # Remove timezone info from output new_str = re.sub(r'\S+$', '', time_str) times['local_hw'] = datetime.datetime.strptime( new_str, r"%a %d %b %Y %I:%M:%S %p") except ValueError: # There are two possible output format for `hwclock` # output format 2: Sat Feb 14 07:31:33 2009 times['local_hw'] = datetime.datetime.strptime( time_str, r"%a %b %d %H:%M:%S %Y") delta = time.time() - get_begin times['local_hw'] -= datetime.timedelta(seconds=delta) # Guest system local timezone time output, _ = run_cmd(session, 'date') # Strip timezone info from output # e.g. 'Sun Feb 15 07:31:40 CST 2009' -> 'Sun Feb 15 07:31:40 2009' time_str = re.sub(r'\S+ (?=\S+$)', '', output.strip()) times['local_sys'] = datetime.datetime.strptime( time_str, r"%a %b %d %H:%M:%S %Y") delta = time.time() - get_begin times['local_sys'] -= datetime.timedelta(seconds=delta) # Guest system UTC timezone time output, _ = run_cmd(session, 'date -u') # Strip timezone info from output # e.g. 'Sun Feb 15 07:31:40 CST 2009' -> 'Sun Feb 15 07:31:40 2009' time_str = re.sub(r'\S+ (?=\S+$)', '', output.strip()) times['utc_sys'] = datetime.datetime.strptime( time_str, r"%a %b %d %H:%M:%S %Y") delta = time.time() - get_begin times['utc_sys'] -= datetime.timedelta(seconds=delta) # Guest UTC time from virsh domtime res = virsh.domtime(vm_name, pretty=True, ignore_status=True) if not res.exit_status: logging.info('Result of "domtime". Duration: %s. Output:%s', res.duration, res.stdout.strip()) _, time_str = res.stdout.split(" ", 1) times['domtime'] = datetime.datetime.strptime( time_str.strip(), r"%Y-%m-%d %H:%M:%S") delta = time.time() - get_begin times['domtime'] -= datetime.timedelta(seconds=delta) else: logging.debug("Unable to get domain time:\n%s", res) times['domtime'] = None return times, time.time() - get_begin def check_get_success(expected_times): """ Check virsh command get result against expected times :param expected_times: Expected time for checking """ _, time_str = res.stdout.split(" ", 1) if pretty: # Time: 2015-01-13 06:29:18 domtime = datetime.datetime.strptime(time_str.strip(), r"%Y-%m-%d %H:%M:%S") else: # Time: 1421130740 domtime = epoch + datetime.timedelta(seconds=int(time_str)) time_shift = time.time() - start logging.debug("Time shift is %s", time_shift) result_diff = (domtime - expected_times['domtime']).total_seconds() if abs(result_diff) > 2.0: raise error.TestFail("Expect get time %s, but got %s, time " "diff: %s" % (org_times['domtime'], domtime, result_diff)) def check_guest_times(expected_times, cur_times): """ Check guest times after test against expected times :param expected_times: Expected time for checking """ time_shift = time.time() - start logging.debug("Time shift is %s", time_shift) error_msgs = [] for key in cur_times: if cur_times[key] is not None: cur = cur_times[key] expect = expected_times[key] diff = (cur - expect).total_seconds() msg = "For %s, expect get time %s, got %s, time diff: %s" % ( key, expect, cur, diff) logging.debug(msg) if abs(diff) > 2.0: error_msgs.append(msg) if error_msgs: raise error.TestFail('\n'.join(error_msgs)) def check_time(result, org_times, cur_times): """ Check whether domain time has been change accordingly. :param result: virsh domtime CmdResult instance :param org_times: Original guest times """ action = "get" if now or sync or (set_time is not None): action = "set" tz_diff = org_times['local_sys'] - org_times['utc_sys'] logging.debug("Timezone diff on guest is %d hours.", (tz_diff.total_seconds() / 3600)) # Hardware time will never stop logging.info('Add %ss to expected guest time', interval) if action == 'get': expected_times = org_times elif action == 'set': if result.exit_status: # Time not change if domtime fails expected_times = org_times else: # Time change accordingly if succeed. if now: utc_time = org_host_time local_time = utc_time + tz_diff elif sync: local_time = org_times["local_hw"] utc_time = local_time - tz_diff elif set_time is not None: utc_time = epoch + datetime.timedelta( seconds=(int(set_time) - guest_duration)) local_time = utc_time + tz_diff expected_times = {} expected_times['local_hw'] = local_time expected_times['local_sys'] = local_time expected_times["utc_sys"] = utc_time expected_times["domtime"] = utc_time # Add interval between two checks of guest time for key in expected_times: if expected_times[key] is not None: expected_times[key] += interval # Hardware time will never stop # Software time will stop if suspended or managed-saved if suspend or managedsave: logging.info('Remove %ss from expected guest software time', stop_time) expected_times["domtime"] -= stop_time expected_times["local_sys"] -= stop_time expected_times["utc_sys"] -= stop_time # Check guest time if domtime succeeded check_guest_times(expected_times, cur_times) # Check if output of domtime is correct if action == 'get' and not result.exit_status: check_get_success(expected_times) def prepare_fail_patts(): """ Predict fail pattern from test parameters. """ fail_patts = [] if not channel: fail_patts.append(r"QEMU guest agent is not configured") if not agent: # For older version fail_patts.append(r"Guest agent not available for now") # For newer version fail_patts.append(r"Guest agent is not responding") if int(now) + int(sync) + int(bool(set_time)) > 1: fail_patts.append(r"Options \S+ and \S+ are mutually exclusive") if shutdown: fail_patts.append(r"domain is not running") if set_time is not None: if int(set_time) < 0: fail_patts.append(r"Invalid argument") elif time_max_1 < int(set_time) <= time_max_2: fail_patts.append(r"Invalid time") elif time_max_2 < int(set_time) <= time_max_3: fail_patts.append(r"Invalid time") elif time_max_3 < int(set_time): fail_patts.append(r"too big for guest agent") return fail_patts def stop_vm(): """ Suspend, managedsave, pmsuspend or shutdown a VM for a period of time """ stop_start = time.time() if suspend: vm.pause() time.sleep(10) vm.resume() elif managedsave: vm.managedsave() time.sleep(10) vm.start() vm.wait_for_login() elif pmsuspend: vm.pmsuspend() time.sleep(10) vm.pmwakeup() vm.wait_for_login() elif shutdown: vm.destroy() # Check real guest stop time stop_seconds = time.time() - stop_start stop_time = datetime.timedelta(seconds=stop_seconds) logging.debug("Guest stopped: %s", stop_time) return stop_time # Check availability of virsh command domtime if not virsh.has_help_command('domtime'): raise error.TestNAError("This version of libvirt does not support " "the domtime test") channel = (params.get("prepare_channel", "yes") == 'yes') agent = (params.get("start_agent", "yes") == 'yes') pretty = (params.get("domtime_pretty", "no") == 'yes') now = (params.get("domtime_now", "no") == 'yes') sync = (params.get("domtime_sync", "no") == 'yes') set_time = params.get("domtime_time", None) shutdown = (params.get("shutdown_vm", "no") == 'yes') suspend = (params.get("suspend_vm", "no") == 'yes') managedsave = (params.get("managedsave_vm", "no") == 'yes') pmsuspend = (params.get("pmsuspend_vm", "no") == 'yes') vm_name = params.get("main_vm") vm = env.get_vm(vm_name) # Backup domain XML xml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) try: if pmsuspend: vm_xml.VMXML.set_pm_suspend(vm_name) # Add or remove qemu-agent from guest before test vm.prepare_guest_agent(channel=channel, start=agent) session = vm.wait_for_login() try: if channel and agent: init_time(session) # Expected fail message patterns fail_patts = prepare_fail_patts() # Message patterns test should skip when met skip_patts = [ r'The command \S+ has not been found', ] # Record start time start = time.time() # Record host time before testing org_host_time = get_host_utc_time() # Get original guest times org_times, guest_duration = get_guest_times(session) # Run some operations to stop guest system stop_time = stop_vm() # Run command with specified options. res = virsh.domtime(vm_name, now=now, pretty=pretty, sync=sync, time=set_time) libvirt.check_result(res, fail_patts, skip_patts) # Check interval between two check of guest time interval = datetime.timedelta( seconds=(time.time() - start)) logging.debug("Interval between guest checking: %s", interval) if not shutdown: # Get current guest times cur_times, _ = get_guest_times(session) check_time(res, org_times, cur_times) finally: # Sync guest time with host if channel and agent and not shutdown: res = virsh.domtime(vm_name, now=True) if res.exit_status: session.close() raise error.TestError("Failed to recover guest time:\n%s" % res) session.close() finally: # Restore VM XML xml_backup.sync()
2
Example 2
View license@error.context_aware def run(test, params, env): """ Test Step: 1. Boot up guest using the openvswitch bridge 2. Setup related service in test enviroment(http, ftp etc.)(optional) 3. Access the service in guest 4. Setup access control rules in ovs to disable the access 5. Access the service in guest 6. Setup access control rules in ovs to enable the access 7. Access the service in guest 8. Delete the access control rules in ovs 9. Access the service in guest Params: :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def access_service(access_sys, access_targets, disabled, host_ip, ref=False): err_msg = "" err_type = "" for asys in access_sys: for atgt in access_targets: logging.debug("Try to access target %s from %s" % (atgt, asys)) access_params = access_sys[asys] atgt_disabled = access_params['disabled_%s' % atgt] if asys in vms_tags: vm = env.get_vm(asys) session = vm.wait_for_login(timeout=timeout) run_func = session.cmd remote_src = vm ssh_src_ip = vm.get_address() else: run_func = utils.system_output remote_src = "localhost" ssh_src_ip = host_ip if atgt in vms_tags: vm = env.get_vm(atgt) access_re_sub_string = vm.wait_for_get_address(0) else: access_re_sub_string = host_ip access_cmd = re.sub("ACCESS_TARGET", access_re_sub_string, access_params['access_cmd']) ref_cmd = re.sub("ACCESS_TARGET", access_re_sub_string, access_params['ref_cmd']) if access_cmd in ["ssh", "telnet"]: if atgt in vms_tags: target_vm = env.get_vm(atgt) target_ip = target_vm.get_address() else: target_vm = "localhost" target_ip = host_ip out = "" out_err = "" try: out = remote_login(access_cmd, target_ip, remote_src, params, host_ip) stat = 0 except remote.LoginError, err: stat = 1 out_err = "Failed to login %s " % atgt out_err += "from %s, err: %s" % (asys, err.output) try: out += remote_login(access_cmd, ssh_src_ip, target_vm, params, host_ip) except remote.LoginError, err: stat += 1 out_err += "Failed to login %s " % asys out_err += "from %s, err: %s" % (atgt, err.output) if out_err: out = out_err else: try: out = run_func(access_cmd, timeout=op_timeout) stat = 0 check_string = access_params.get("check_from_output") if check_string and check_string in out: stat = 1 except (aexpect.ShellCmdError, error.CmdError, aexpect.ShellTimeoutError), err: if isinstance(err, error.CmdError): out = err.result_obj.stderr stat = err.result_obj.exit_status else: out = err.output if isinstance(err, aexpect.ShellTimeoutError): stat = 1 session.close() session = vm.wait_for_login(timeout=timeout) run_func = session.cmd else: stat = err.status if access_params.get("clean_cmd"): try: run_func(access_params['clean_cmd']) except Exception: pass if disabled and atgt_disabled and stat == 0: err_msg += "Still can access %s after" % atgt err_msg += " disable it from ovs. " err_msg += "Command: %s. " % access_cmd err_msg += "Output: %s" % out if disabled and atgt_disabled and stat != 0: logging.debug("Can not access target as expect.") if not disabled and stat != 0: if ref: err_msg += "Can not access %s at the" % atgt err_msg += " beginning. Please check your setup." err_type = "ref" else: err_msg += "Still can not access %s" % atgt err_msg += " after enable the access" err_msg += "Command: %s. " % access_cmd err_msg += "Output: %s" % out if err_msg: session.close() if err_type == "ref": raise error.TestNAError(err_msg) raise error.TestFail(err_msg) if not ref_cmd: session.close() return try: out = run_func(ref_cmd, timeout=op_timeout) stat = 0 except (aexpect.ShellCmdError, error.CmdError, aexpect.ShellTimeoutError), err: if isinstance(err, error.CmdError): out = err.result_obj.stderr stat = err.result_obj.exit_status else: out = err.output if isinstance(err, aexpect.ShellTimeoutError): stat = 1 else: stat = err.status if stat != 0: if ref: err_msg += "Refernce command failed at beginning." err_type = "ref" else: err_msg += "Refernce command failed after setup" err_msg += " the rules" err_msg += "Command: %s. " % ref_cmd err_msg += "Output: %s" % out if err_msg: session.close() if err_type == "ref": raise error.TestNAError(err_msg) raise error.TestFail(err_msg) session.close() def get_acl_cmd(protocol, in_port, action, extra_options): acl_cmd = protocol.strip() acl_cmd += ",in_port=%s" % in_port.strip() if extra_options.strip(): acl_cmd += ",%s" % ",".join(extra_options.strip().split()) if action.strip(): acl_cmd += ",action=%s" % action.strip() return acl_cmd def acl_rules_check(acl_rules, acl_setup_cmd): acl_setup_cmd = re.sub("action=", "actions=", acl_setup_cmd) acl_option = re.split(",", acl_setup_cmd) for line in acl_rules.splitlines(): rule = [_.lower() for _ in re.split("[ ,]", line) if _] item_in_rule = 0 for acl_item in acl_option: if acl_item.lower() in rule: item_in_rule += 1 if item_in_rule == len(acl_option): return True return False def remote_login(client, host, src, params_login, host_ip): src_name = src if src != "localhost": src_name = src.name logging.info("Login %s from %s" % (host, src)) port = params_login["target_port"] username = params_login["username"] password = params_login["password"] prompt = params_login["shell_prompt"] linesep = eval("'%s'" % params_login.get("shell_linesep", r"\n")) quit_cmd = params.get("quit_cmd", "exit") if host == host_ip: # Try to login from guest to host. prompt = "^\[.*\][\#\$]\s*$" linesep = "\n" username = params_login["host_username"] password = params_login["host_password"] quit_cmd = "exit" if client == "ssh": # We only support ssh for Linux in this test cmd = ("ssh -o UserKnownHostsFile=/dev/null " "-o StrictHostKeyChecking=no " "-o PreferredAuthentications=password -p %s %[email protected]%s" % (port, username, host)) elif client == "telnet": cmd = "telnet -l %s %s %s" % (username, host, port) else: raise remote.LoginBadClientError(client) if src == "localhost": logging.debug("Login with command %s" % cmd) session = aexpect.ShellSession(cmd, linesep=linesep, prompt=prompt) else: if params_login.get("os_type") == "windows": if client == "telnet": cmd = "C:\\telnet.py %s %s " % (host, username) cmd += "%s \"%s\" && " % (password, prompt) cmd += "C:\\wait_for_quit.py" cmd = "%s || ping 127.0.0.1 -n 5 -w 1000 > nul" % cmd else: cmd += " || sleep 5" session = src.wait_for_login() logging.debug("Sending login command: %s" % cmd) session.sendline(cmd) try: out = remote.handle_prompts(session, username, password, prompt, timeout, debug=True) except Exception, err: session.close() raise err try: session.cmd(quit_cmd) session.close() except Exception: pass return out def setup_service(setup_target): setup_timeout = int(params.get("setup_timeout", 360)) if setup_target == "localhost": setup_func = utils.system_output os_type = "linux" else: setup_vm = env.get_vm(setup_target) setup_session = setup_vm.wait_for_login(timeout=timeout) setup_func = setup_session.cmd os_type = params["os_type"] setup_params = params.object_params(os_type) setup_cmd = setup_params.get("setup_cmd", "service SERVICE restart") prepare_cmd = setup_params.get("prepare_cmd") setup_cmd = re.sub("SERVICE", setup_params.get("service", ""), setup_cmd) error.context("Set up %s service in %s" % (setup_params.get("service"), setup_target), logging.info) if prepare_cmd: setup_func(prepare_cmd, timeout=setup_timeout) setup_func(setup_cmd, timeout=setup_timeout) if setup_target != "localhost": setup_session.close() def stop_service(setup_target): setup_timeout = int(params.get("setup_timeout", 360)) if setup_target == "localhost": setup_func = utils.system_output os_type = "linux" else: setup_vm = env.get_vm(setup_target) setup_session = setup_vm.wait_for_login(timeout=timeout) setup_func = setup_session.cmd os_type = params["os_type"] setup_params = params.object_params(os_type) stop_cmd = setup_params.get("stop_cmd", "service SERVICE stop") cleanup_cmd = setup_params.get("cleanup_cmd") stop_cmd = re.sub("SERVICE", setup_params.get("service", ""), stop_cmd) error.context("Stop %s service in %s" % (setup_params.get("service"), setup_target), logging.info) if stop_cmd: setup_func(stop_cmd, timeout=setup_timeout) if cleanup_cmd: setup_func(cleanup_cmd, timeout=setup_timeout) if setup_target != "localhost": setup_session.close() timeout = int(params.get("login_timeout", '360')) op_timeout = int(params.get("op_timeout", "360")) acl_protocol = params['acl_protocol'] acl_extra_options = params.get("acl_extra_options", "") for vm in env.get_all_vms(): session = vm.wait_for_login(timeout=timeout) if params.get("disable_iptables") == "yes": session.cmd("iptables -F") #session.cmd_status_output("service iptables stop") if params.get("copy_scripts"): root_dir = data_dir.get_root_dir() script_dir = os.path.join(root_dir, "shared", "scripts") tmp_dir = params.get("tmp_dir", "C:\\") for script in params.get("copy_scripts").split(): script_path = os.path.join(script_dir, script) vm.copy_files_to(script_path, tmp_dir) session.close() vms_tags = params.objects("vms") br_name = params.get("netdst") if br_name == "private": br_name = params.get("priv_brname", 'autotest-prbr0') for setup_target in params.get("setup_targets", "").split(): setup_service(setup_target) access_targets = params.get("access_targets", "localhost").split() deny_target = params.get("deny_target", "localhost") all_target = params.get("extra_target", "").split() + vms_tags target_port = params["target_port"] vm = env.get_vm(vms_tags[0]) nic = vm.virtnet[0] if_name = nic.ifname params_nic = params.object_params("nic1") if params["netdst"] == "private": params_nic["netdst"] = params_nic.get("priv_brname", "atbr0") host_ip = utils_net.get_host_ip_address(params_nic) if deny_target in vms_tags: deny_vm = env.get_vm(deny_target) deny_vm_ip = deny_vm.wait_for_get_address(0) elif deny_target == "localhost": deny_vm_ip = host_ip if "NW_DST" in acl_extra_options: acl_extra_options = re.sub("NW_DST", deny_vm_ip, acl_extra_options) acl_extra_options = re.sub("TARGET_PORT", target_port, acl_extra_options) access_sys = {} for target in all_target: if target not in access_targets: if target in vms_tags: os_type = params["os_type"] else: os_type = "linux" os_params = params.object_params(os_type) access_param = os_params.object_params(target) check_from_output = access_param.get("check_from_output") access_sys[target] = {} access_sys[target]['access_cmd'] = access_param['access_cmd'] access_sys[target]['ref_cmd'] = access_param.get('ref_cmd', "") access_sys[target]['clean_cmd'] = access_param.get('clean_guest', "") if check_from_output: access_sys[target]['check_from_output'] = check_from_output for tgt in access_targets: tgt_param = access_param.object_params(tgt) acl_disabled = tgt_param.get("acl_disabled") == "yes" access_sys[target]['disabled_%s' % tgt] = acl_disabled error.context("Try to access target before setup the rules", logging.info) access_service(access_sys, access_targets, False, host_ip, ref=True) error.context("Disable the access in ovs", logging.info) br_infos = utils_net.openflow_manager(br_name, "show").stdout if_port = re.findall("(\d+)\(%s\)" % if_name, br_infos) if not if_port: raise error.TestNAError("Can not find %s in bridge %s" % (if_name, br_name)) if_port = if_port[0] acl_cmd = get_acl_cmd(acl_protocol, if_port, "drop", acl_extra_options) utils_net.openflow_manager(br_name, "add-flow", acl_cmd) acl_rules = utils_net.openflow_manager(br_name, "dump-flows").stdout if not acl_rules_check(acl_rules, acl_cmd): raise error.TestFail("Can not find the rules from" " ovs-ofctl: %s" % acl_rules) error.context("Try to acess target to exam the disable rules", logging.info) access_service(access_sys, access_targets, True, host_ip) error.context("Enable the access in ovs", logging.info) acl_cmd = get_acl_cmd(acl_protocol, if_port, "normal", acl_extra_options) utils_net.openflow_manager(br_name, "mod-flows", acl_cmd) acl_rules = utils_net.openflow_manager(br_name, "dump-flows").stdout if not acl_rules_check(acl_rules, acl_cmd): raise error.TestFail("Can not find the rules from" " ovs-ofctl: %s" % acl_rules) error.context("Try to acess target to exam the enable rules", logging.info) access_service(access_sys, access_targets, False, host_ip) error.context("Delete the access rules in ovs", logging.info) acl_cmd = get_acl_cmd(acl_protocol, if_port, "", acl_extra_options) utils_net.openflow_manager(br_name, "del-flows", acl_cmd) acl_rules = utils_net.openflow_manager(br_name, "dump-flows").stdout if acl_rules_check(acl_rules, acl_cmd): raise error.TestFail("Still can find the rules from" " ovs-ofctl: %s" % acl_rules) error.context("Try to acess target to exam after delete the rules", logging.info) access_service(access_sys, access_targets, False, host_ip) for setup_target in params.get("setup_targets", "").split(): stop_service(setup_target)
2
Example 3
View licensedef setup_boot_disk(self): if self.unattended_file.endswith('.sif'): dest_fname = 'winnt.sif' setup_file = 'winnt.bat' boot_disk = utils_disk.FloppyDisk(self.floppy, self.qemu_img_binary, self.tmpdir, self.vfd_size) answer_path = boot_disk.get_answer_file_path(dest_fname) self.answer_windows_ini(answer_path) setup_file_path = os.path.join(self.unattended_dir, setup_file) boot_disk.copy_to(setup_file_path) if self.install_virtio == "yes": boot_disk.setup_virtio_win2003(self.virtio_floppy, self.virtio_oemsetup_id) boot_disk.copy_to(self.finish_program) elif self.unattended_file.endswith('.ks'): # Red Hat kickstart install dest_fname = 'ks.cfg' if self.params.get('unattended_delivery_method') == 'integrated': ks_param = 'ks=cdrom:/dev/sr0:/isolinux/%s' % dest_fname kernel_params = self.kernel_params if 'ks=' in kernel_params: kernel_params = re.sub('ks\=[\w\d\:\.\/]+', ks_param, kernel_params) else: kernel_params = '%s %s' % (kernel_params, ks_param) # Standard setting is kickstart disk in /dev/sr0 and # install cdrom in /dev/sr1. As we merge them together, # we need to change repo configuration to /dev/sr0 if 'repo=cdrom' in kernel_params: kernel_params = re.sub('repo\=cdrom[\:\w\d\/]*', 'repo=cdrom:/dev/sr0', kernel_params) self.kernel_params = None boot_disk = utils_disk.CdromInstallDisk( self.cdrom_unattended, self.tmpdir, self.cdrom_cd1_mount, kernel_params) elif self.params.get('unattended_delivery_method') == 'url': if self.unattended_server_port is None: self.unattended_server_port = utils_misc.find_free_port( 8000, 8099, self.url_auto_content_ip) path = os.path.join(os.path.dirname(self.cdrom_unattended), 'ks') boot_disk = RemoteInstall(path, self.url_auto_content_ip, self.unattended_server_port, dest_fname) ks_param = 'ks=%s' % boot_disk.get_url() kernel_params = self.kernel_params if 'ks=' in kernel_params: kernel_params = re.sub('ks\=[\w\d\:\.\/]+', ks_param, kernel_params) else: kernel_params = '%s %s' % (kernel_params, ks_param) # Standard setting is kickstart disk in /dev/sr0 and # install cdrom in /dev/sr1. When we get ks via http, # we need to change repo configuration to /dev/sr0 kernel_params = re.sub('repo\=cdrom[\:\w\d\/]*', 'repo=cdrom:/dev/sr0', kernel_params) self.kernel_params = kernel_params elif self.params.get('unattended_delivery_method') == 'cdrom': boot_disk = utils_disk.CdromDisk(self.cdrom_unattended, self.tmpdir) elif self.params.get('unattended_delivery_method') == 'floppy': boot_disk = utils_disk.FloppyDisk(self.floppy, self.qemu_img_binary, self.tmpdir, self.vfd_size) ks_param = 'ks=floppy' kernel_params = self.kernel_params if 'ks=' in kernel_params: # Reading ks from floppy directly doesn't work in some OS, # options 'ks=hd:/dev/fd0' can reading ks from mounted # floppy, so skip repace it; if not re.search("fd\d+", kernel_params): kernel_params = re.sub('ks\=[\w\d\:\.\/]+', ks_param, kernel_params) else: kernel_params = '%s %s' % (kernel_params, ks_param) kernel_params = re.sub('repo\=cdrom[\:\w\d\/]*', 'repo=cdrom:/dev/sr0', kernel_params) self.kernel_params = kernel_params else: raise ValueError("Neither cdrom_unattended nor floppy set " "on the config file, please verify") answer_path = boot_disk.get_answer_file_path(dest_fname) self.answer_kickstart(answer_path) elif self.unattended_file.endswith('.xml'): if "autoyast" in self.kernel_params: # SUSE autoyast install dest_fname = "autoinst.xml" if (self.cdrom_unattended and self.params.get('unattended_delivery_method') == 'cdrom'): boot_disk = utils_disk.CdromDisk(self.cdrom_unattended, self.tmpdir) elif self.floppy: autoyast_param = 'autoyast=device://fd0/autoinst.xml' kernel_params = self.kernel_params if 'autoyast=' in kernel_params: kernel_params = re.sub('autoyast\=[\w\d\:\.\/]+', autoyast_param, kernel_params) else: kernel_params = '%s %s' % ( kernel_params, autoyast_param) self.kernel_params = kernel_params boot_disk = utils_disk.FloppyDisk(self.floppy, self.qemu_img_binary, self.tmpdir, self.vfd_size) else: raise ValueError("Neither cdrom_unattended nor floppy set " "on the config file, please verify") answer_path = boot_disk.get_answer_file_path(dest_fname) self.answer_suse_xml(answer_path) else: # Windows unattended install dest_fname = "autounattend.xml" if self.params.get('unattended_delivery_method') == 'cdrom': boot_disk = utils_disk.CdromDisk(self.cdrom_unattended, self.tmpdir) if self.install_virtio == "yes": boot_disk.setup_virtio_win2008(self.virtio_floppy, self.cdrom_virtio) else: self.cdrom_virtio = None else: boot_disk = utils_disk.FloppyDisk(self.floppy, self.qemu_img_binary, self.tmpdir, self.vfd_size) if self.install_virtio == "yes": boot_disk.setup_virtio_win2008(self.virtio_floppy) answer_path = boot_disk.get_answer_file_path(dest_fname) self.answer_windows_xml(answer_path) boot_disk.copy_to(self.finish_program) else: raise ValueError('Unknown answer file type: %s' % self.unattended_file) boot_disk.close()
2
Example 4
View licensedef export(self): """Generate a MATLAB class definition containing the ODEs for the PySB model associated with the exporter. Returns ------- string String containing the MATLAB code for an implementation of the model's ODEs. """ output = StringIO() pysb.bng.generate_equations(self.model) docstring = '' if self.docstring: docstring += self.docstring.replace('\n', '\n % ') # Substitute underscores for any dots in the model name model_name = self.model.name.replace('.', '_') # -- Parameters and Initial conditions ------- # Declare the list of parameters as a struct params_str = 'self.parameters = struct( ...\n'+' '*16 params_str_list = [] for i, p in enumerate(self.model.parameters): # Add parameter to struct along with nominal value cur_p_str = "'%s', %.17g" % (_fix_underscores(p.name), p.value) # Decide whether to continue or terminate the struct declaration: if i == len(self.model.parameters) - 1: cur_p_str += ');' # terminate else: cur_p_str += ', ...' # continue params_str_list.append(cur_p_str) # Format and indent the params struct declaration params_str += ('\n'+' '*16).join(params_str_list) # Fill in an array of the initial conditions based on the named # parameter values initial_values_str = ('initial_values = zeros(1,%d);\n'+' '*12) % \ len(self.model.species) initial_values_str += ('\n'+' '*12).join( ['initial_values(%d) = self.parameters.%s; %% %s' % (i+1, _fix_underscores(ic[1].name), ic[0]) for i, ic in enumerate(self.model.initial_conditions)]) # -- Build observables declaration -- observables_str = 'self.observables = struct( ...\n'+' '*16 observables_str_list = [] for i, obs in enumerate(self.model.observables): # Associate species and coefficient lists with observable names, # changing from zero- to one-based indexing cur_obs_str = "'%s', [%s; %s]" % \ (_fix_underscores(obs.name), ' '.join([str(sp+1) for sp in obs.species]), ' '.join([str(c) for c in obs.coefficients])) # Decide whether to continue or terminate the struct declaration: if i == len(self.model.observables) - 1: cur_obs_str += ');' # terminate else: cur_obs_str += ', ...' # continue observables_str_list.append(cur_obs_str) # Format and indent the observables struct declaration observables_str += ('\n'+' '*16).join(observables_str_list) # -- Build ODEs ------- # Build a stringified list of species species_list = ['%% %s;' % s for i, s in enumerate(self.model.species)] # Build the ODEs as strings from the model.odes array odes_list = ['y(%d,1) = %s;' % (i+1, sympy.ccode(self.model.odes[i])) for i in range(len(self.model.odes))] # Zip the ODEs and species string lists and then flatten them # (results in the interleaving of the two lists) odes_species_list = [item for sublist in zip(species_list, odes_list) for item in sublist] # Flatten to a string and add correct indentation odes_str = ('\n'+' '*12).join(odes_species_list) # Change species names from, e.g., '__s(0)' to 'y0(1)' (note change # from zero-based indexing to 1-based indexing) odes_str = re.sub(r'__s(\d+)', \ lambda m: 'y0(%s)' % (int(m.group(1))+1), odes_str) # Change C code 'pow' function to MATLAB 'power' function odes_str = re.sub(r'pow\(', 'power(', odes_str) # Prepend 'p.' to named parameters and fix any underscores for i, p in enumerate(self.model.parameters): odes_str = re.sub(r'\b(%s)\b' % p.name, 'p.%s' % _fix_underscores(p.name), odes_str) # -- Build final output -- output.write(pad(r""" classdef %(model_name)s %% %(docstring)s %% A class implementing the ordinary differential equations %% for the %(model_name)s model. %% %% Save as %(model_name)s.m. %% %% Generated by pysb.export.matlab.MatlabExporter. %% %% Properties %% ---------- %% observables : struct %% A struct containing the names of the observables from the %% PySB model as field names. Each field in the struct %% maps the observable name to a matrix with two rows: %% the first row specifies the indices of the species %% associated with the observable, and the second row %% specifies the coefficients associated with the species. %% For any given timecourse of model species resulting from %% integration, the timecourse for an observable can be %% retrieved using the get_observable method, described %% below. %% %% parameters : struct %% A struct containing the names of the parameters from the %% PySB model as field names. The nominal values are set by %% the constructor and their values can be overriden %% explicitly once an instance has been created. %% %% Methods %% ------- %% %(model_name)s.odes(tspan, y0) %% The right-hand side function for the ODEs of the model, %% for use with MATLAB ODE solvers (see Examples). %% %% %(model_name)s.get_initial_values() %% Returns a vector of initial values for all species, %% specified in the order that they occur in the original %% PySB model (i.e., in the order found in model.species). %% Non-zero initial conditions are specified using the %% named parameters included as properties of the instance. %% Hence initial conditions other than the defaults can be %% used by assigning a value to the named parameter and then %% calling this method. The vector returned by the method %% is used for integration by passing it to the MATLAB %% solver as the y0 argument. %% %% %(model_name)s.get_observables(y) %% Given a matrix of timecourses for all model species %% (i.e., resulting from an integration of the model), %% get the trajectories corresponding to the observables. %% Timecourses are returned as a struct which can be %% indexed by observable name. %% %% Examples %% -------- %% Example integration using default initial and parameter %% values: %% %% >> m = %(model_name)s(); %% >> tspan = [0 100]; %% >> [t y] = ode15s(@m.odes, tspan, m.get_initial_values()); %% %% Retrieving the observables: %% %% >> y_obs = m.get_observables(y) %% properties observables parameters end methods function self = %(model_name)s() %% Assign default parameter values %(params_str)s %% Define species indices (first row) and coefficients %% (second row) of named observables %(observables_str)s end function initial_values = get_initial_values(self) %% Return the vector of initial conditions for all %% species based on the values of the parameters %% as currently defined in the instance. %(initial_values_str)s end function y = odes(self, tspan, y0) %% Right hand side function for the ODEs %% Shorthand for the struct of model parameters p = self.parameters; %(odes_str)s end function y_obs = get_observables(self, y) %% Retrieve the trajectories for the model observables %% from a matrix of the trajectories of all model %% species. %% Initialize the struct of observable timecourses %% that we will return y_obs = struct(); %% Iterate over the observables; observable_names = fieldnames(self.observables); for i = 1:numel(observable_names) obs_matrix = self.observables.(observable_names{i}); species = obs_matrix(1, :); coefficients = obs_matrix(2, :); y_obs.(observable_names{i}) = ... y(:, species) * coefficients'; end end end end """, 0) % {'docstring': docstring, 'model_name': model_name, 'params_str':params_str, 'initial_values_str': initial_values_str, 'observables_str': observables_str, 'params_str': params_str, 'odes_str': odes_str}) return output.getvalue()
2
Example 5
View licensedef __init__(self, model, tspan=None, initials=None, param_values=None, verbose=False, **kwargs): super(ScipyOdeSimulator, self).__init__(model, tspan=tspan, initials=initials, param_values=param_values, verbose=verbose, **kwargs) # We'll need to know if we're using the Jacobian when we get to run() self._use_analytic_jacobian = kwargs.get('use_analytic_jacobian', False) self.cleanup = kwargs.get('cleanup', True) integrator = kwargs.get('integrator', 'vode') # Generate the equations for the model pysb.bng.generate_equations(self._model, self.cleanup, self.verbose) def _eqn_substitutions(eqns): """String substitutions on the sympy C code for the ODE RHS and Jacobian functions to use appropriate terms for variables and parameters.""" # Substitute expanded parameter formulas for any named expressions for e in self._model.expressions: eqns = re.sub(r'\b(%s)\b' % e.name, '(' + sympy.ccode( e.expand_expr()) + ')', eqns) # Substitute sums of observable species that could've been added # by expressions for obs in self._model.observables: obs_string = '' for i in range(len(obs.coefficients)): if i > 0: obs_string += "+" if obs.coefficients[i] > 1: obs_string += str(obs.coefficients[i]) + "*" obs_string += "__s" + str(obs.species[i]) if len(obs.coefficients) > 1: obs_string = '(' + obs_string + ')' eqns = re.sub(r'\b(%s)\b' % obs.name, obs_string, eqns) # Substitute 'y[i]' for 'si' eqns = re.sub(r'\b__s(\d+)\b', lambda m: 'y[%s]' % (int(m.group(1))), eqns) # Substitute 'p[i]' for any named parameters for i, p in enumerate(self._model.parameters): eqns = re.sub(r'\b(%s)\b' % p.name, 'p[%d]' % i, eqns) return eqns # ODE RHS ----------------------------------------------- # Prepare the string representations of the RHS equations code_eqs = '\n'.join(['ydot[%d] = %s;' % (i, sympy.ccode(self._model.odes[i])) for i in range(len(self._model.odes))]) code_eqs = _eqn_substitutions(code_eqs) self._test_inline() # If we can't use weave.inline to run the C code, compile it as # Python code instead for use with # exec. Note: C code with array indexing, basic math operations, # and pow() just happens to also # be valid Python. If the equations ever have more complex things # in them, this might fail. if not self._use_inline: code_eqs_py = compile(code_eqs, '<%s odes>' % self._model.name, 'exec') else: for arr_name in ('ydot', 'y', 'p'): macro = arr_name.upper() + '1' code_eqs = re.sub(r'\b%s\[(\d+)\]' % arr_name, '%s(\\1)' % macro, code_eqs) def rhs(t, y, p): ydot = self.ydot # note that the evaluated code sets ydot as a side effect if self._use_inline: weave_inline(code_eqs, ['ydot', 't', 'y', 'p']) else: _exec(code_eqs_py, locals()) return ydot # JACOBIAN ----------------------------------------------- # We'll keep the code for putting together the matrix in Sympy # in case we want to do manipulations of the matrix later (e.g., to # put together the sensitivity matrix) jac_fn = None if self._use_analytic_jacobian: species_names = ['__s%d' % i for i in range(len(self._model.species))] jac_matrix = [] # Rows of jac_matrix are by equation f_i: # [[df1/x1, df1/x2, ..., df1/xn], # [ ... ], # [dfn/x1, dfn/x2, ..., dfn/xn], for eqn in self._model.odes: # Derivatives for f_i... jac_row = [] for species_name in species_names: # ... with respect to s_j d = sympy.diff(eqn, species_name) jac_row.append(d) jac_matrix.append(jac_row) # Next, prepare the stringified Jacobian equations jac_eqs_list = [] for i, row in enumerate(jac_matrix): for j, entry in enumerate(row): # Skip zero entries in the Jacobian if entry == 0: continue jac_eq_str = 'jac[%d, %d] = %s;' % ( i, j, sympy.ccode(entry)) jac_eqs_list.append(jac_eq_str) jac_eqs = _eqn_substitutions('\n'.join(jac_eqs_list)) # Try to inline the Jacobian if possible (as above for RHS) if not self._use_inline: jac_eqs_py = compile(jac_eqs, '<%s jacobian>' % self._model.name, 'exec') else: # Substitute array refs with calls to the JAC1 macro for inline jac_eqs = re.sub(r'\bjac\[(\d+), (\d+)\]', r'JAC2(\1, \2)', jac_eqs) # Substitute calls to the Y1 and P1 macros for arr_name in ('y', 'p'): macro = arr_name.upper() + '1' jac_eqs = re.sub(r'\b%s\[(\d+)\]' % arr_name, '%s(\\1)' % macro, jac_eqs) def jacobian(t, y, p): jac = self.jac # note that the evaluated code sets jac as a side effect if self._use_inline: weave_inline(jac_eqs, ['jac', 't', 'y', 'p']); else: _exec(jac_eqs_py, locals()) return jac # Initialize the jacobian argument to None if we're not going to # use it # jac = self.jac as defined in jacobian() earlier # Initialization of matrix for storing the Jacobian self.jac = np.zeros( (len(self._model.odes), len(self._model.species))) jac_fn = jacobian # build integrator options list from our defaults and any kwargs # passed to this function options = {} if self.default_integrator_options.get(integrator): options.update( self.default_integrator_options[integrator]) # default options options.update(kwargs.get('integrator_options', {})) # overwrite # defaults self.opts = options self.ydot = np.ndarray(len(self._model.species)) # Integrator if integrator == 'lsoda': # lsoda is accessed via scipy.integrate.odeint which, # as a function, # requires that we pass its args at the point of call. Thus we need # to stash stuff like the rhs and jacobian functions in self so we # can pass them in later. self.integrator = integrator # lsoda's rhs and jacobian function arguments are in a different # order to other integrators, so we define these shims that swizzle # the argument order appropriately. self.func = lambda t, y, p: rhs(y, t, p) if jac_fn is None: self.jac_fn = None else: self.jac_fn = lambda t, y, p: jac_fn(y, t, p) else: # The scipy.integrate.ode integrators on the other hand are object # oriented and hold the functions and such internally. Once we set # up the integrator object we only need to retain a reference to it # and can forget about the other bits. self.integrator = scipy.integrate.ode(rhs, jac=jac_fn) with warnings.catch_warnings(): warnings.filterwarnings('error', 'No integrator name match') self.integrator.set_integrator(integrator, **options)
2
Example 6
View licensedef __private(): class Token: def __init__(self, type, value=None): self.type = type self.value = value def __cmp__(self, o): return cmp(self.type, o) def __repr__(self): return self.value or self.type class AST: def __init__(self, type): self.type = type self._kids = [] def __getitem__(self, i): return self._kids[i] def __len__(self): return len(self._kids) def __setslice__(self, low, high, seq): self._kids[low:high] = seq def __cmp__(self, o): return cmp(self.type, o) class GdbMiScannerBase(spark.GenericScanner): def tokenize(self, input): self.rv = [] spark.GenericScanner.tokenize(self, input) return self.rv def t_nl(self, s): r'\n|\r\n' self.rv.append(Token('nl')) def t_whitespace(self, s): r'[ \t\f\v]+' pass def t_symbol(self, s): r',|\{|\}|\[|\]|\=' self.rv.append(Token(s, s)) def t_result_type(self, s): r'\*|\+|\^' self.rv.append(Token('result_type', s)) def t_stream_type(self, s): r'\@|\&|\~' self.rv.append(Token('stream_type', s)) def t_string(self, s): r'[\w-]+' self.rv.append(Token('string', s)) def t_c_string(self, s): r'\".*?(?<![\\\\])\"' inner = self.__unescape(s[1:len(s) - 1]) self.rv.append(Token('c_string', inner)) def t_default(self, s): r'( . | \n )+' raise Exception("Specification error: unmatched input for '%s'" % s) def __unescape(self, s): s = re.sub(r'\\r', r'\r', s) s = re.sub(r'\\n', r'\n', s) s = re.sub(r'\\t', r'\t', s) return re.sub(r'\\(.)', r'\1', s) class GdbMiScanner(GdbMiScannerBase): def t_token(self, s): r'\d+' self.rv.append(Token('token', s)) class GdbMiParser(spark.GenericASTBuilder): def __init__(self): spark.GenericASTBuilder.__init__(self, AST, 'output') def p_output(self, args): """ output ::= record_list record_list ::= generic_record record_list ::= generic_record record_list generic_record ::= result_record generic_record ::= stream_record result_record ::= result_header result_list nl result_record ::= result_header nl result_header ::= token result_type class result_header ::= result_type class result_header ::= token = class result_header ::= = class stream_record ::= stream_type c_string nl result_list ::= , result result_list result_list ::= , result result ::= variable = value class ::= string variable ::= string value ::= const value ::= tuple value ::= list value_list ::= , value value_list ::= , value value_list const ::= c_string tuple ::= { } tuple ::= { result } tuple ::= { result result_list } list ::= [ ] list ::= [ value ] list ::= [ value value_list ] list ::= [ result ] list ::= [ result result_list ] list ::= { value } list ::= { value value_list } """ pass def terminal(self, token): # Homogeneous AST. rv = AST(token.type) rv.value = token.value return rv def nonterminal(self, type, args): # Flatten AST a bit by not making nodes if there's only one child. exclude = [ 'record_list' ] if len(args) == 1 and type not in exclude: return args[0] return spark.GenericASTBuilder.nonterminal(self, type, args) def error(self, token, i=0, tokens=None): if i > 2: print('%s %s %s %s' % (tokens[i - 3], tokens[i - 2], tokens[i - 1], tokens[i])) raise Exception("Syntax error at or near %d:'%s' token" % (i, token)) class GdbMiInterpreter(spark.GenericASTTraversal): def __init__(self, ast): spark.GenericASTTraversal.__init__(self, ast) self.postorder() def __translate_type(self, type): table = { '^': 'result', '=': 'notify', '+': 'status', '*': 'exec', '~': 'console', '@': 'target', '&': 'log' } return table[type] def n_result(self, node): # result ::= variable = value node.value = {node[0].value: node[2].value} def n_tuple(self, node): if len(node) == 2: # tuple ::= {} node.value = {} elif len(node) == 3: # tuple ::= { result } node.value = node[1].value elif len(node) == 4: # tuple ::= { result result_list } node.value = node[1].value for result in node[2].value: for n, v in result.items(): if node.value.has_key(n): old = node.value[n] if not isinstance(old, list): node.value[n] = [node.value[n]] node.value[n].append(v) else: node.value[n] = v else: raise Exception('Invalid tuple') def n_list(self, node): if len(node) == 2: # list ::= [] node.value = [] elif len(node) == 3: # list ::= [ value ] node.value = [node[1].value] elif len(node) == 4: # list ::= [ value value_list ] node.value = [node[1].value] + node[2].value #list ::= [ result ] #list ::= [ result result_list ] #list ::= { value } #list ::= { value value_list } def n_value_list(self, node): if len(node) == 2: #value_list ::= , value node.value = [node[1].value] elif len(node) == 3: #value_list ::= , value value_list node.value = [node[1].value] + node[2].value def n_result_list(self, node): if len(node) == 2: # result_list ::= , result node.value = [node[1].value] else: # result_list ::= , result result_list node.value = [node[1].value] + node[2].value def n_result_record(self, node): node.value = node[0].value if len(node) == 3: # result_record ::= result_header result_list nl node.value['results'] = node[1].value elif len(node) == 2: # result_record ::= result_header nl pass def n_result_header(self, node): if len(node) == 3: # result_header ::= token result_type class node.value = { 'token': node[0].value, 'type': self.__translate_type(node[1].value), 'class_': node[2].value, 'record_type': 'result' } elif len(node) == 2: # result_header ::= result_type class node.value = { 'token': None, 'type': self.__translate_type(node[0].value), 'class_': node[1].value, 'record_type': 'result' } def n_stream_record(self, node): # stream_record ::= stream_type c_string nl node.value = { 'type': self.__translate_type(node[0].value), 'value': node[1].value, 'record_type': 'stream' } def n_record_list(self, node): if len(node) == 1: # record_list ::= generic_record node.value = [node[0].value] elif len(node) == 2: # record_list ::= generic_record record_list node.value = [node[0].value] + node[1].value class GdbDynamicObject: def __init__(self, dict_): self.graft(dict_) def __repr__(self): return pprint.pformat(self.__dict__) def __nonzero__(self): return len(self.__dict__) > 0 def __getitem__(self, i): if i == 0 and len(self.__dict__) > 0: return self else: raise IndexError def __getattr__(self, name): if name.startswith('__'): raise AttributeError return None def graft(self, dict_): for name, value in dict_.items(): name = name.replace('-', '_') if isinstance(value, dict): value = GdbDynamicObject(value) elif isinstance(value, list): x = value value = [] for item in x: if isinstance(item, dict): item = GdbDynamicObject(item) value.append(item) setattr(self, name, value) class GdbMiRecord: def __init__(self, record): self.result = None for name, value in record[0].items(): name = name.replace('-', '_') if name == 'results': for result in value: if not self.result: self.result = GdbDynamicObject(result) else: # graft this result to self.results self.result.graft(result) else: setattr(self, name, value) def __repr__(self): return pprint.pformat(self.__dict__) return (GdbMiScanner(), GdbMiParser(), GdbMiInterpreter, GdbMiRecord)
2
Example 7
View licensedef _create_helpers_module(): def to_str(val): """Convert value into string. Return '' if val is None. ex. >>> to_str(None) '' >>> to_str("foo") 'foo' >>> to_str(u"\u65e5\u672c\u8a9e") u'\u65e5\u672c\u8a9e' >>> to_str(123) '123' """ if val is None: return '' if isinstance(val, str): return val return str(val, 'utf-8') def generate_tostrfunc(encoding): """Generate 'to_str' function which encodes unicode to str. ex. import tenjin from tenjin.helpers import escape to_str = tenjin.generate_tostrfunc('utf-8') engine = tenjin.Engine() context = { 'items': [u'AAA', u'BBB', u'CCC'] } output = engine.render('example.pyhtml') print output """ def to_str(val): if val is None: return '' if isinstance(val, str): return val return str(val, 'utf-8') return to_str def echo(string): """add string value into _buf. this is equivarent to '#{string}'.""" frame = sys._getframe(1) context = frame.f_locals context['_buf'].append(string) def start_capture(varname=None): """ start capturing with name. ex. list.rbhtml <html><body> <?py start_capture('itemlist') ?> <ul> <?py for item in list: ?> <li>${item}</li> <?py #end ?> </ul> <?py stop_capture() ?> </body></html> ex. layout.rbhtml <html xml:lang="en" lang="en"> <head> <title>Capture Example</title> </head> <body> <!-- content --> #{itemlist} <!-- /content --> </body> </html> """ frame = sys._getframe(1) context = frame.f_locals context['_buf_tmp'] = context['_buf'] context['_capture_varname'] = varname context['_buf'] = [] def stop_capture(store_to_context=True): """ stop capturing and return the result of capturing. if store_to_context is True then the result is stored into _context[varname]. """ frame = sys._getframe(1) context = frame.f_locals result = ''.join(context['_buf']) context['_buf'] = context.pop('_buf_tmp') varname = context.pop('_capture_varname') if varname: context[varname] = result if store_to_context: context['_context'][varname] = result return result def captured_as(name): """ helper method for layout template. if captured string is found then append it to _buf and return True, else return False. """ frame = sys._getframe(1) context = frame.f_locals if name in context: _buf = context['_buf'] _buf.append(context[name]) return True return False def _p(arg): """ex. '/show/'+_p("item['id']") => "/show/#{item['id']}" """ return '<`#%s#`>' % arg # decoded into #{...} by preprocessor def _P(arg): """ex. '<b>%s</b>' % _P("item['id']") => "<b>${item['id']}</b>" """ return '<`$%s$`>' % arg # decoded into ${...} by preprocessor def _decode_params(s): """decode <`#...#`> and <`$...$`> into #{...} and ${...}""" from urllib.parse import unquote dct = { 'lt':'<', 'gt':'>', 'amp':'&', 'quot':'"', '#039':"'", } def unescape(s): #return s.replace('<', '<').replace('>', '>').replace('"', '"').replace(''', "'").replace('&', '&') return re.sub(r'&(lt|gt|quot|amp|#039);', lambda m: dct[m.group(1)], s) s = re.sub(r'%3C%60%23(.*?)%23%60%3E', lambda m: '#{%s}' % unquote(m.group(1)), s) s = re.sub(r'%3C%60%24(.*?)%24%60%3E', lambda m: '${%s}' % unquote(m.group(1)), s) s = re.sub(r'<`#(.*?)#`>', lambda m: '#{%s}' % unescape(m.group(1)), s) s = re.sub(r'<`\$(.*?)\$`>', lambda m: '${%s}' % unescape(m.group(1)), s) s = re.sub(r'<`#(.*?)#`>', r'#{\1}', s) s = re.sub(r'<`\$(.*?)\$`>', r'${\1}', s) return s mod = _create_module('tenjin.helpers') mod.to_str = to_str mod.generate_tostrfunc = generate_tostrfunc mod.echo = echo mod.start_capture = start_capture mod.stop_capture = stop_capture mod.captured_as = captured_as mod._p = _p mod._P = _P mod._decode_params = _decode_params mod.__all__ = ['escape', 'to_str', 'echo', 'generate_tostrfunc', 'start_capture', 'stop_capture', 'captured_as', '_p', '_P', '_decode_params', ] return mod
2
Example 8
View licensedef doStuff(self, channel): # Check what's on for each channel self.send(channel, "whatson") while not self.dataReady("whatson"): pass data = self.recv("whatson") if data == None: pid = None else: pid = data[0] title = data[1] offset = data[2] duration = data[3] expectedstart = data[4] if pid != self.channels[channel]: # Perhaps just do a duplicate scan before creating Twitter stream if pid == None: self.channels[channel] = None Print (channel, ": Off Air") else: self.channels[channel] = pid self.send(["http://www.bbc.co.uk/programmes/" + pid + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": programmedata = recvdata[1] else: # Fake programme data to prevent crash - not ideal programmedata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' # RDF reader needs to read from a file so write out first # Alternative is to read from a URL, but this lacks proper proxy support filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(programmedata) file.close() g = Graph() # This is a temporary proxy fix. A URL could be put here instead g.parse("tempRDF.txt") # Identify the brand and whether there are any official hashtags twittags = list() for bid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Brand')): # bid is Brand ID bidmod = bid.replace("#programme","") bidmod = str(bidmod.replace("file:///programmes/","")) if (bidmod in self.officialbrandtags): twittags = self.officialbrandtags[bidmod] break # Identify the series and whether there are any official hashtags if len(twittags) == 0: # Identify the brand and whether there are any official hashtags for sid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Series')): # sid is Series ID sidmod = sid.replace("#programme","") sidmod = str(sidmod.replace("file:///programmes/","")) if (sidmod in self.officialseriestags): twittags = self.officialseriestags[sidmod] break vidmod = "" so = g.subject_objects(predicate=rdflib.URIRef('http://purl.org/ontology/po/version')) # Pick a version, any version - for this which one doesn't matter for x in so: # vid is version id vid = x[1] vidmod = vid.replace("#programme","") vidmod = vidmod.replace("file:///programmes/","") break # Got version, now get people self.send(["http://www.bbc.co.uk/programmes/" + vidmod + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": versiondata = recvdata[1] else: versiondata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(versiondata) file.close() g = Graph() g.parse("tempRDF.txt") # Identify if this is a change of programme, or the first time we've checked what's on for Print clarity if self.firstrun: Print (channel , ": " + title) else: Print (channel , ": Changed to - " , title) # Minor alterations title = title.replace("&","and") if ":" in title: titlebits = title.split(":") title = titlebits[0] # Saving a copy here so apostrophes etc can be used in the Twitter people search titlesave = title # Remove punctuation for item in """!"#$%()*+,-./;<=>[email protected][\\]?_'`{|}?""": title = title.replace(item,"") keywords = dict() # Save keywords next to a descriptor of what they are keywords[pid] = "PID" # Add official hashtags to the list for tag in twittags: keywords[tag] = "Twitter" # Duplicates will be removed later if string.find(title,"The",0,3) != -1: newtitle = string.replace(re.sub("\s+","",title),"The ","",1) keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" # Check for and remove year too keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" keywords['#' + string.lower(re.sub("\s+","",newtitle))] = "Title" # Check for and remove year too keywords['#' + string.replace(string.lower(re.sub("\s+","",newtitle))," " + str(date.today().year),"",1)] = "Title" else: keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" allwordtitle = string.replace(title,"The ","",1) allwordtitle = allwordtitle.lower() # Remove current year from events allwordtitle = allwordtitle.replace(" " + str(date.today().year),"",1) titlewords = allwordtitle.split() if len(titlewords) > 1: keywords[allwordtitle] = "Title" else: # Trial fix for issue of one word titles producing huge amounts of data keywords[allwordtitle + "^" + "bbc"] = "Title" keywords["#" + re.sub("\s+","",allwordtitle)] = "Title" numwords = dict({"one" : 1, "two" : 2, "three": 3, "four" : 4, "five": 5, "six" : 6, "seven": 7}) for word in numwords: if word in channel.lower() and channel != "asiannetwork": # Bug fix! asianne2rk numchannel = string.replace(channel.lower(),word,str(numwords[word])) keywords[numchannel] = "Channel" break if str(numwords[word]) in channel.lower(): numchannel = string.replace(channel.lower(),str(numwords[word]),word) keywords[numchannel] = "Channel" break # Load NameCache (people we've already searched for on Twitter to avoid hammering PeopleSearch) save = False try: homedir = os.path.expanduser("~") file = open(homedir + "/namecache.conf",'r') save = True except IOError: e = sys.exc_info()[1] Print ("Failed to load name cache - will attempt to create a new file: " , e) if save: raw_config = file.read() file.close() try: config = cjson.decode(raw_config) except cjson.DecodeError: e = sys.exc_info()[1] config = dict() else: config = dict() s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Role')) for x in s: rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) if ((firstname + " " + lastname) in config): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Twitter" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: # Only use this Twitter screen name if there's a good chance they're the person we're after if ("verified" in user): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Participant" s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Character')) for x in s: character = str(g.value(subject=rdflib.BNode(x),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/name'))) rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) # This ^ is a temporary fix until I work out a better DB structure keywords[character + "^" + channel] = "Character" keywords[character + "^" + title] = "Character" if " " in character: # Looks like we have a firstname + surname situation charwords = character.split() if charwords[0] != "Dr" and charwords[0] != "Miss" and charwords[0] != "Mr" and charwords[0] != "Mrs" and charwords[0] != "Ms" and charwords[0] != "The": # As long as the first word isn't a title, add it as a first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[0] + "^" + channel] = "Character" keywords[charwords[0] + "^" + title] = "Character" elif len(charwords) > 2: # If the first word was a title, and the second word isn't a surname (checked by > 2) add the first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[1] + "^" + channel] = "Character" keywords[charwords[1] + "^" + title] = "Character" if ((firstname + " " + lastname) in config): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Actor" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if ("verified" in user): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Actor" # Radio appears to have been forgotten about a bit in RDF / scheduling at the mo # So, let's do some extra queries and see if the show title is a person's name on Twitter if "radio" in channel or "6music" in channel or "asiannetwork" in channel or "sportsextra" in channel or "worldservice" in channel: # However, radio shows are often named using the DJ - The cases where this isn't true will cause problems however as they'll be saved in json - DOH! TODO if (titlesave in config): # Found a cached value if config[titlesave] != "": keywords[config[titlesave]] = "Twitter" elif len(titlesave.split()) < 4: # Prevent some shows getting through at least - restricts people's names to three words self.send(titlesave, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if ("verified" in user): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == titlesave.lower(): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError: pass config[titlesave] = screenname try: file = open(homedir + "/namecache.conf",'w') raw_config = cjson.encode(config) file.write(raw_config) file.close() except IOError: Print ("Failed to save name cache - could cause rate limit problems") return [keywords,data] else: if pid == None: Print(channel , ": No change - Off Air") else: Print (channel , ": No change - " , title)
2
Example 9
View licensedef doStuff(self, channel): # Check what's on for each channel self.send(channel, "whatson") while not self.dataReady("whatson"): pass data = self.recv("whatson") if data == None: pid = None else: pid = data[0] title = data[1] offset = data[2] duration = data[3] expectedstart = data[4] if pid != self.channels[channel]: # Perhaps just do a duplicate scan before creating Twitter stream if pid == None: self.channels[channel] = None print (channel + ": Off Air") else: self.channels[channel] = pid self.send(["http://www.bbc.co.uk/programmes/" + pid + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": programmedata = recvdata[1] else: # Fake programme data to prevent crash - not ideal programmedata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' # RDF reader needs to read from a file so write out first # Alternative is to read from a URL, but this lacks proper proxy support filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(programmedata) file.close() g = Graph() # This is a temporary proxy fix. A URL could be put here instead g.parse("tempRDF.txt") # Identify the brand and whether there are any official hashtags twittags = list() for bid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Brand')): # bid is Brand ID bidmod = bid.replace("#programme","") bidmod = str(bidmod.replace("file:///programmes/","")) if self.officialbrandtags.has_key(bidmod): twittags = self.officialbrandtags[bidmod] break # Identify the series and whether there are any official hashtags if len(twittags) == 0: # Identify the brand and whether there are any official hashtags for sid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Series')): # sid is Series ID sidmod = sid.replace("#programme","") sidmod = str(sidmod.replace("file:///programmes/","")) if self.officialseriestags.has_key(sidmod): twittags = self.officialseriestags[sidmod] break vidmod = "" so = g.subject_objects(predicate=rdflib.URIRef('http://purl.org/ontology/po/version')) # Pick a version, any version - for this which one doesn't matter for x in so: # vid is version id vid = x[1] vidmod = vid.replace("#programme","") vidmod = vidmod.replace("file:///programmes/","") break # Got version, now get people self.send(["http://www.bbc.co.uk/programmes/" + vidmod + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": versiondata = recvdata[1] else: versiondata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(versiondata) file.close() g = Graph() g.parse("tempRDF.txt") # Identify if this is a change of programme, or the first time we've checked what's on for print clarity if self.firstrun: print (channel + ": " + title) else: print (channel + ": Changed to - " + title) # Minor alterations title = title.replace("&","and") if ":" in title: titlebits = title.split(":") title = titlebits[0] # Saving a copy here so apostrophes etc can be used in the Twitter people search titlesave = title # Remove punctuation for item in """!"#$%()*+,-./;<=>[email protected][\\]?_'`{|}?""": title = title.replace(item,"") keywords = dict() # Save keywords next to a descriptor of what they are keywords[pid] = "PID" # Add official hashtags to the list for tag in twittags: keywords[tag] = "Twitter" # Duplicates will be removed later # If the title has 'The' in it, add hashtags both with and without the 'the' to the keyword list # This simply broadens the list of search terms if string.find(title,"The",0,3) != -1: newtitle = string.replace(re.sub("\s+","",title),"The ","",1) keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" # Check for and remove year too - some programmes contain a year which may be undesirable from a search point of view keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" keywords['#' + string.lower(re.sub("\s+","",newtitle))] = "Title" # Check for and remove year too keywords['#' + string.replace(string.lower(re.sub("\s+","",newtitle))," " + str(date.today().year),"",1)] = "Title" else: keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" allwordtitle = string.replace(title,"The ","",1) allwordtitle = allwordtitle.lower() # Remove current year from events allwordtitle = allwordtitle.replace(" " + str(date.today().year),"",1) titlewords = allwordtitle.split() if len(titlewords) > 1: keywords[allwordtitle] = "Title" else: # Trial fix for issue of one word titles producing huge amounts of data # This occurs for keywords like 'Weather' and 'Breakfast' which aren't BBC limited terms keywords[allwordtitle + "^" + "bbc"] = "Title" keywords["#" + re.sub("\s+","",allwordtitle)] = "Title" # Where a channel uses text for a number, we also want to search using the numeric representation numwords = dict({"one" : 1, "two" : 2, "three": 3, "four" : 4, "five": 5, "six" : 6, "seven": 7}) for word in numwords: if word in channel.lower() and channel != "asiannetwork": # Bug fix! asianne2rk numchannel = string.replace(channel.lower(),word,str(numwords[word])) keywords[numchannel] = "Channel" break if str(numwords[word]) in channel.lower(): numchannel = string.replace(channel.lower(),str(numwords[word]),word) keywords[numchannel] = "Channel" break # Load NameCache (people we've already searched for on Twitter to avoid hammering PeopleSearch) save = False try: homedir = os.path.expanduser("~") file = open(homedir + "/namecache.conf",'r') save = True except IOError, e: print ("Failed to load name cache - will attempt to create a new file: " + str(e)) if save: raw_config = file.read() file.close() try: config = cjson.decode(raw_config) except cjson.DecodeError, e: config = dict() else: config = dict() # Find people's names in retrieved RDF s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Role')) for x in s: rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) if config.has_key(firstname + " " + lastname): # Found a cached value - this person has been searched for using Twitter if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Twitter" else: # Not cached yet - new request to Twitter people search self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: # Only use this Twitter screen name if there's a good chance they're the person we're after if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Participant" s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Character')) for x in s: character = str(g.value(subject=rdflib.BNode(x),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/name'))) rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) # This ^ is a temporary fix until I work out a better DB structure # Character names can sometimes be single common words, like 'James'. # For this reason, using this as a search term we require that either the channel name or programme title also appears in the tweet # The ^ signals to later states of this program that the channel name / title doesn't necessarily have to appear next to the character name keywords[character + "^" + channel] = "Character" keywords[character + "^" + title] = "Character" if " " in character: # Looks like we have a firstname + surname situation charwords = character.split() if charwords[0] != "Dr" and charwords[0] != "Miss" and charwords[0] != "Mr" and charwords[0] != "Mrs" and charwords[0] != "Ms" and charwords[0] != "The": # As long as the first word isn't a title, add it as a first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[0] + "^" + channel] = "Character" keywords[charwords[0] + "^" + title] = "Character" elif len(charwords) > 2: # If the first word was a title, and the second word isn't a surname (checked by > 2) add the first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[1] + "^" + channel] = "Character" keywords[charwords[1] + "^" + title] = "Character" if config.has_key(firstname + " " + lastname): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Actor" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Actor" # Radio appears to have been forgotten about a bit in RDF / scheduling at the mo # So, let's do some extra queries and see if the show title is a person's name on Twitter if "radio" in channel or "6music" in channel or "asiannetwork" in channel or "sportsextra" in channel or "worldservice" in channel: # However, radio shows are often named using the DJ - The cases where this isn't true will cause problems however as they'll be saved in json - DOH! TODO if config.has_key(titlesave): # Found a cached value if config[titlesave] != "": keywords[config[titlesave]] = "Twitter" elif len(titlesave.split()) < 4: # Prevent some shows getting through at least - restricts people's names to three words self.send(titlesave, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == titlesave.lower(): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[titlesave] = screenname try: file = open(homedir + "/namecache.conf",'w') raw_config = cjson.encode(config) file.write(raw_config) file.close() except IOError, e: print ("Failed to save name cache - could cause rate limit problems") return [keywords,data] else: if pid == None: print(channel + ": No change - Off Air") else: print (channel + ": No change - " + title)
2
Example 10
View licensedef doStuff(self, channel): # Check what's on for each channel self.send(channel, "whatson") while not self.dataReady("whatson"): pass data = self.recv("whatson") if data == None: pid = None else: pid = data[0] title = data[1] offset = data[2] duration = data[3] expectedstart = data[4] if pid != self.channels[channel]: # Perhaps just do a duplicate scan before creating Twitter stream if pid == None: self.channels[channel] = None print (channel + ": Off Air") else: self.channels[channel] = pid self.send(["http://www.bbc.co.uk/programmes/" + pid + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": programmedata = recvdata[1] else: # Fake programme data to prevent crash - not ideal programmedata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' # RDF reader needs to read from a file so write out first # Alternative is to read from a URL, but this lacks proper proxy support filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(programmedata) file.close() g = Graph() # This is a temporary proxy fix. A URL could be put here instead g.parse("tempRDF.txt") # Identify the brand and whether there are any official hashtags twittags = list() for bid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Brand')): # bid is Brand ID bidmod = bid.replace("#programme","") bidmod = str(bidmod.replace("file:///programmes/","")) if self.officialbrandtags.has_key(bidmod): twittags = self.officialbrandtags[bidmod] break # Identify the series and whether there are any official hashtags if len(twittags) == 0: # Identify the brand and whether there are any official hashtags for sid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Series')): # sid is Series ID sidmod = sid.replace("#programme","") sidmod = str(sidmod.replace("file:///programmes/","")) if self.officialseriestags.has_key(sidmod): twittags = self.officialseriestags[sidmod] break vidmod = "" so = g.subject_objects(predicate=rdflib.URIRef('http://purl.org/ontology/po/version')) # Pick a version, any version - for this which one doesn't matter for x in so: # vid is version id vid = x[1] vidmod = vid.replace("#programme","") vidmod = vidmod.replace("file:///programmes/","") break # Got version, now get people self.send(["http://www.bbc.co.uk/programmes/" + vidmod + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": versiondata = recvdata[1] else: versiondata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(versiondata) file.close() g = Graph() g.parse("tempRDF.txt") # Identify if this is a change of programme, or the first time we've checked what's on for print clarity if self.firstrun: print (channel + ": " + title) else: print (channel + ": Changed to - " + title) # Minor alterations title = title.replace("&","and") if ":" in title: titlebits = title.split(":") title = titlebits[0] # Saving a copy here so apostrophes etc can be used in the Twitter people search titlesave = title # Remove punctuation for item in """!"#$%()*+,-./;<=>[email protected][\\]?_'`{|}?""": title = title.replace(item,"") keywords = dict() # Save keywords next to a descriptor of what they are keywords[pid] = "PID" # Add official hashtags to the list for tag in twittags: keywords[tag] = "Twitter" # Duplicates will be removed later if string.find(title,"The",0,3) != -1: newtitle = string.replace(re.sub("\s+","",title),"The ","",1) keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" # Check for and remove year too keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" keywords['#' + string.lower(re.sub("\s+","",newtitle))] = "Title" # Check for and remove year too keywords['#' + string.replace(string.lower(re.sub("\s+","",newtitle))," " + str(date.today().year),"",1)] = "Title" else: keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" allwordtitle = string.replace(title,"The ","",1) allwordtitle = allwordtitle.lower() # Remove current year from events allwordtitle = allwordtitle.replace(" " + str(date.today().year),"",1) titlewords = allwordtitle.split() if len(titlewords) > 1: keywords[allwordtitle] = "Title" else: # Trial fix for issue of one word titles producing huge amounts of data keywords[allwordtitle + "^" + "bbc"] = "Title" keywords["#" + re.sub("\s+","",allwordtitle)] = "Title" numwords = dict({"one" : 1, "two" : 2, "three": 3, "four" : 4, "five": 5, "six" : 6, "seven": 7}) for word in numwords: if word in channel.lower() and channel != "asiannetwork": # Bug fix! asianne2rk numchannel = string.replace(channel.lower(),word,str(numwords[word])) keywords[numchannel] = "Channel" break if str(numwords[word]) in channel.lower(): numchannel = string.replace(channel.lower(),str(numwords[word]),word) keywords[numchannel] = "Channel" break # Load NameCache (people we've already searched for on Twitter to avoid hammering PeopleSearch) save = False try: homedir = os.path.expanduser("~") file = open(homedir + "/namecache.conf",'r') save = True except IOError, e: print ("Failed to load name cache - will attempt to create a new file: " + str(e)) if save: raw_config = file.read() file.close() try: config = cjson.decode(raw_config) except cjson.DecodeError, e: config = dict() else: config = dict() s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Role')) for x in s: rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) if config.has_key(firstname + " " + lastname): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Twitter" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: # Only use this Twitter screen name if there's a good chance they're the person we're after if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Participant" s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Character')) for x in s: character = str(g.value(subject=rdflib.BNode(x),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/name'))) rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) # This ^ is a temporary fix until I work out a better DB structure keywords[character + "^" + channel] = "Character" keywords[character + "^" + title] = "Character" if " " in character: # Looks like we have a firstname + surname situation charwords = character.split() if charwords[0] != "Dr" and charwords[0] != "Miss" and charwords[0] != "Mr" and charwords[0] != "Mrs" and charwords[0] != "Ms" and charwords[0] != "The": # As long as the first word isn't a title, add it as a first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[0] + "^" + channel] = "Character" keywords[charwords[0] + "^" + title] = "Character" elif len(charwords) > 2: # If the first word was a title, and the second word isn't a surname (checked by > 2) add the first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[1] + "^" + channel] = "Character" keywords[charwords[1] + "^" + title] = "Character" if config.has_key(firstname + " " + lastname): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Actor" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Actor" # Radio appears to have been forgotten about a bit in RDF / scheduling at the mo # So, let's do some extra queries and see if the show title is a person's name on Twitter if "radio" in channel or "6music" in channel or "asiannetwork" in channel or "sportsextra" in channel or "worldservice" in channel: # However, radio shows are often named using the DJ - The cases where this isn't true will cause problems however as they'll be saved in json - DOH! TODO if config.has_key(titlesave): # Found a cached value if config[titlesave] != "": keywords[config[titlesave]] = "Twitter" elif len(titlesave.split()) < 4: # Prevent some shows getting through at least - restricts people's names to three words self.send(titlesave, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == titlesave.lower(): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[titlesave] = screenname try: file = open(homedir + "/namecache.conf",'w') raw_config = cjson.encode(config) file.write(raw_config) file.close() except IOError, e: print ("Failed to save name cache - could cause rate limit problems") return [keywords,data] else: if pid == None: print(channel + ": No change - Off Air") else: print (channel + ": No change - " + title)
2
Example 11
View licensedef doStuff(self, channel): # Check what's on for each channel self.send(channel, "whatson") while not self.dataReady("whatson"): pass data = self.recv("whatson") if data == None: pid = None else: pid = data[0] title = data[1] offset = data[2] duration = data[3] expectedstart = data[4] if pid != self.channels[channel]: # Perhaps just do a duplicate scan before creating Twitter stream if pid == None: self.channels[channel] = None Print (channel, ": Off Air") else: self.channels[channel] = pid self.send(["http://www.bbc.co.uk/programmes/" + pid + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": programmedata = recvdata[1] else: # Fake programme data to prevent crash - not ideal programmedata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' # RDF reader needs to read from a file so write out first # Alternative is to read from a URL, but this lacks proper proxy support filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(programmedata) file.close() g = Graph() # This is a temporary proxy fix. A URL could be put here instead g.parse("tempRDF.txt") # Identify the brand and whether there are any official hashtags twittags = list() for bid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Brand')): # bid is Brand ID bidmod = bid.replace("#programme","") bidmod = str(bidmod.replace("file:///programmes/","")) if self.officialbrandtags.has_key(bidmod): twittags = self.officialbrandtags[bidmod] break # Identify the series and whether there are any official hashtags if len(twittags) == 0: # Identify the brand and whether there are any official hashtags for sid in g.subjects(object = rdflib.URIRef('http://purl.org/ontology/po/Series')): # sid is Series ID sidmod = sid.replace("#programme","") sidmod = str(sidmod.replace("file:///programmes/","")) if self.officialseriestags.has_key(sidmod): twittags = self.officialseriestags[sidmod] break vidmod = "" so = g.subject_objects(predicate=rdflib.URIRef('http://purl.org/ontology/po/version')) # Pick a version, any version - for this which one doesn't matter for x in so: # vid is version id vid = x[1] vidmod = vid.replace("#programme","") vidmod = vidmod.replace("file:///programmes/","") break # Got version, now get people self.send(["http://www.bbc.co.uk/programmes/" + vidmod + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": versiondata = recvdata[1] else: versiondata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(versiondata) file.close() g = Graph() g.parse("tempRDF.txt") # Identify if this is a change of programme, or the first time we've checked what's on for Print clarity if self.firstrun: Print (channel , ": " + title) else: Print (channel , ": Changed to - " , title) # Minor alterations title = title.replace("&","and") if ":" in title: titlebits = title.split(":") title = titlebits[0] # Saving a copy here so apostrophes etc can be used in the Twitter people search titlesave = title # Remove punctuation for item in """!"#$%()*+,-./;<=>[email protected][\\]?_'`{|}?""": title = title.replace(item,"") keywords = dict() # Save keywords next to a descriptor of what they are keywords[pid] = "PID" # Add official hashtags to the list for tag in twittags: keywords[tag] = "Twitter" # Duplicates will be removed later if string.find(title,"The",0,3) != -1: newtitle = string.replace(re.sub("\s+","",title),"The ","",1) keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" # Check for and remove year too keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" keywords['#' + string.lower(re.sub("\s+","",newtitle))] = "Title" # Check for and remove year too keywords['#' + string.replace(string.lower(re.sub("\s+","",newtitle))," " + str(date.today().year),"",1)] = "Title" else: keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+","",title))] = "Title" keywords["#" + string.replace(string.lower(re.sub("\s+","",title))," " + str(date.today().year),"",1)] = "Title" allwordtitle = string.replace(title,"The ","",1) allwordtitle = allwordtitle.lower() # Remove current year from events allwordtitle = allwordtitle.replace(" " + str(date.today().year),"",1) titlewords = allwordtitle.split() if len(titlewords) > 1: keywords[allwordtitle] = "Title" else: # Trial fix for issue of one word titles producing huge amounts of data keywords[allwordtitle + "^" + "bbc"] = "Title" keywords["#" + re.sub("\s+","",allwordtitle)] = "Title" numwords = dict({"one" : 1, "two" : 2, "three": 3, "four" : 4, "five": 5, "six" : 6, "seven": 7}) for word in numwords: if word in channel.lower() and channel != "asiannetwork": # Bug fix! asianne2rk numchannel = string.replace(channel.lower(),word,str(numwords[word])) keywords[numchannel] = "Channel" break if str(numwords[word]) in channel.lower(): numchannel = string.replace(channel.lower(),str(numwords[word]),word) keywords[numchannel] = "Channel" break # Load NameCache (people we've already searched for on Twitter to avoid hammering PeopleSearch) save = False try: homedir = os.path.expanduser("~") file = open(homedir + "/namecache.conf",'r') save = True except IOError, e: Print ("Failed to load name cache - will attempt to create a new file: " , e) if save: raw_config = file.read() file.close() try: config = cjson.decode(raw_config) except cjson.DecodeError, e: config = dict() else: config = dict() s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Role')) for x in s: rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) if config.has_key(firstname + " " + lastname): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Twitter" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: # Only use this Twitter screen name if there's a good chance they're the person we're after if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Participant" s = g.subjects(predicate=rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),object=rdflib.URIRef('http://purl.org/ontology/po/Character')) for x in s: character = str(g.value(subject=rdflib.BNode(x),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/name'))) rid = g.value(predicate=rdflib.URIRef('http://purl.org/ontology/po/role'),object=rdflib.BNode(x)) pid = g.value(subject=rdflib.BNode(rid),predicate=rdflib.URIRef('http://purl.org/ontology/po/participant')) firstname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/givenName'))) lastname = str(g.value(subject=rdflib.BNode(pid),predicate=rdflib.URIRef('http://xmlns.com/foaf/0.1/familyName'))) # This ^ is a temporary fix until I work out a better DB structure keywords[character + "^" + channel] = "Character" keywords[character + "^" + title] = "Character" if " " in character: # Looks like we have a firstname + surname situation charwords = character.split() if charwords[0] != "Dr" and charwords[0] != "Miss" and charwords[0] != "Mr" and charwords[0] != "Mrs" and charwords[0] != "Ms" and charwords[0] != "The": # As long as the first word isn't a title, add it as a first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[0] + "^" + channel] = "Character" keywords[charwords[0] + "^" + title] = "Character" elif len(charwords) > 2: # If the first word was a title, and the second word isn't a surname (checked by > 2) add the first name # This ^ is a temporary fix until I work out a better DB structure keywords[charwords[1] + "^" + channel] = "Character" keywords[charwords[1] + "^" + title] = "Character" if config.has_key(firstname + " " + lastname): # Found a cached value if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Actor" else: # Not cached yet - new request self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == string.lower(firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Actor" # Radio appears to have been forgotten about a bit in RDF / scheduling at the mo # So, let's do some extra queries and see if the show title is a person's name on Twitter if "radio" in channel or "6music" in channel or "asiannetwork" in channel or "sportsextra" in channel or "worldservice" in channel: # However, radio shows are often named using the DJ - The cases where this isn't true will cause problems however as they'll be saved in json - DOH! TODO if config.has_key(titlesave): # Found a cached value if config[titlesave] != "": keywords[config[titlesave]] = "Twitter" elif len(titlesave.split()) < 4: # Prevent some shows getting through at least - restricts people's names to three words self.send(titlesave, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000) and string.lower(user['name']) == titlesave.lower(): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[titlesave] = screenname try: file = open(homedir + "/namecache.conf",'w') raw_config = cjson.encode(config) file.write(raw_config) file.close() except IOError, e: Print ("Failed to save name cache - could cause rate limit problems") return [keywords,data] else: if pid == None: Print(channel , ": No change - Off Air") else: Print (channel , ": No change - " , title)
2
Example 12
View license@ModuleInfo.plugin('wb.util.exportSQLite', caption='Export SQLite CREATE script', input=[wbinputs.currentCatalog()], groups=['Catalog/Utilities', 'Menu/Catalog']) @ModuleInfo.export(grt.INT, grt.classes.db_Catalog) def exportSQLite(cat): """Function to go through all schemata in catalog and rename all FKs of table-objects """ def validate_for_sqlite_export(cat): """Check uniqueness of schema, table and index names. Return 0 on success otherwise return 1 (the export process should abort) """ have_errors = False idt = {} for i, schema in enumerate(cat.schemata): if schema.name in idt: have_errors = True if Workbench.confirm('Name conflict', 'Schemas %d and %d have the same name "%s".' ' Please rename one of them.\n' 'Search for more such errors?' % ( idt[schema.name], i, schema.name)) == 0: return False else: idt[schema.name] = i # Do not continue looking for errors on schema name error if have_errors: return False for schema in cat.schemata: idt = {} for i, tbl in enumerate(schema.tables): if tbl.name == '': have_errors = True if Workbench.confirm('Name conflict', 'Table %d in schema "%s". has no name.' ' Please rename.\n' 'Search for more such errors?' % ( i, schema.name)) == 0: return False if tbl.name in idt: have_errors = True if Workbench.confirm('Name conflict', 'Tables %d and %d in schema "%s"' ' have the same name "%s".' ' Please rename one of them.\n' 'Search for more such errors?' % ( idt[tbl.name], i, schema.name, tbl.name)) == 0: return False else: idt[tbl.name] = i if have_errors: return False for schema in cat.schemata: for tbl in schema.tables: idt = {} for i, column in enumerate(tbl.columns): if column.name == '': have_errors = True if Workbench.confirm('Name conflict', 'Column %d in table "%s"."%s". has no name.' ' Please rename.\n' 'Search for more such errors?' % ( i, schema.name, tbl.name)) == 0: return False if column.name in idt: have_errors = True if Workbench.confirm('Name conflict', 'Columns %d and %d in table "%s"."%s"' ' have the same name "%s".' ' Please rename one of them.\n' 'Search for more such errors?' % ( idt[column.name], i, schema.name, tbl.name, column.name)) == 0: return False else: idt[column.name] = i # Now check indices (except primary/unique) idt = {} for i, index in enumerate(tbl.indices): if index.indexType == 'INDEX': if index.name == '': have_errors = True if Workbench.confirm('Name conflict', 'Index %d in table "%s"."%s". has no name.' ' Please rename.\n' 'Search for more such errors?' % ( i, schema.name, tbl.name)) == 0: return False if index.name in idt: have_errors = True if Workbench.confirm('Name conflict', 'Indices %d and %d in table "%s"."%s"' ' have the same name "%s".' ' Please rename one of them.\n' 'Search for more such errors?' % ( idt[index.name], i, schema.name, tbl.name, column.name)) == 0: return False else: idt[index.name] = i if have_errors: return False return True def is_deferred(fkey): # Hack: if comment starts with "Defer..." we make it a deferred FK could # use member 'deferability' (WB has it), but there is no GUI for it return fkey.comment.lstrip().lower()[0:5] == 'defer' def export_table(out, db_name, schema, tbl): if len(tbl.columns) == 0: return out.write('CREATE TABLE %s%s(\n%s' % ( db_name, dq(tbl.name), schema_comment_format(tbl.comment))) primary_key = [i for i in tbl.indices if i.isPrimary == 1] primary_key = primary_key[0] if len(primary_key) > 0 else None pk_column = None if primary_key and len(primary_key.columns) == 1: pk_column = primary_key.columns[0].referencedColumn col_comment = '' for i, column in enumerate(tbl.columns): check, sqlite_type, flags = '', None, None if column.simpleType: sqlite_type = column.simpleType.name flags = column.simpleType.flags else: sqlite_type = column.userType.name flags = column.flags length = column.length # For INTEGER PRIMARY KEY column to become an alias for the rowid # the type needs to be "INTEGER" not "INT" # we fix it for other columns as well if 'INT' in sqlite_type or sqlite_type == 'LONG': sqlite_type = 'INTEGER' length = -1 # Check flags for "unsigned" if 'UNSIGNED' in column.flags: check = dq(column.name) + '>=0' # We even implement ENUM (because we can) if sqlite_type == 'ENUM': sqlite_type = 'TEXT' if column.datatypeExplicitParams: check = (dq(column.name) + ' IN' + column.datatypeExplicitParams) if i > 0: out.write(',' + comment_format(col_comment) + '\n') out.write(' ' + dq(column.name)) # Type is optional in SQLite if sqlite_type != '': out.write(' ' + sqlite_type) # For [VAR]CHAR and such types specify length even though this is # not used in SQLite if length > 0: out.write('(%d)' % length) # Must specify single-column PKs as column-constraints for AI/rowid # behaviour if column == pk_column: out.write(' PRIMARY KEY') if primary_key.columns[0].descend == 1: out.write(' DESC') # Only PK columns can be AI in SQLite if column.autoIncrement == 1: out.write(' AUTOINCREMENT') # Check for NotNull if column.isNotNull == 1: out.write(' NOT NULL') if check != '': out.write(' CHECK(' + check + ')') if column.defaultValue != '': out.write(' DEFAULT ' + column.defaultValue) col_comment = column.comment # For multicolumn PKs if primary_key and not pk_column: out.write(',%s\n PRIMARY KEY(%s)' % ( comment_format(col_comment), print_index_columns(primary_key))) col_comment = '' # Put non-primary, UNIQUE Keys in CREATE TABLE as well (because we can) for index in tbl.indices: if index != primary_key and index.indexType == 'UNIQUE': out.write(',%s\n' % comment_format(col_comment)) col_comment = '' if index.name != '': out.write(' CONSTRAINT %s\n ' % dq(index.name)) out.write(' UNIQUE(%s)' % print_index_columns(index)) for fkey in tbl.foreignKeys: have_fkeys = 1 out.write(',%s\n' % comment_format(col_comment)) col_comment = '' if fkey.name != '': out.write(' CONSTRAINT %s\n ' % dq(fkey.name)) out.write(' FOREIGN KEY(%s)\n' % print_fk_columns(fkey.columns)) out.write(' REFERENCES %s(%s)' % ( dq(fkey.referencedTable.name), print_fk_columns(fkey.referencedColumns))) if fkey.deleteRule in ['RESTRICT', 'CASCADE', 'SET NULL']: out.write('\n ON DELETE ' + fkey.deleteRule) if fkey.updateRule in ['RESTRICT', 'CASCADE', 'SET NULL']: out.write('\n ON UPDATE ' + fkey.updateRule) if is_deferred(fkey): out.write(' DEFERRABLE INITIALLY DEFERRED') out.write(comment_format(col_comment) + '\n);\n') # CREATE INDEX statements for all non-primary, non-unique, non-foreign # indexes for i, index in enumerate(tbl.indices): if index.indexType == 'INDEX': index_name = tbl.name + '.' + index.name if index.name == '': index_name = tbl.name + '.index' + i out.write('CREATE INDEX %s%s ON %s (%s);\n' % ( db_name, dq(index_name), dq(tbl.name), print_index_columns(index))) # Write the INSERTS (currently always) for insert in tbl.inserts().splitlines(): columns_values = '' insert_start = 'insert into `%s`.`%s` (' % (schema.name, tbl.name) if insert[0:len(insert_start)].lower() == insert_start.lower(): columns_values = insert[len(insert_start):] else: raise ExportSQLiteError( 'Error', 'Unrecognized command in insert') last_column = 0 for i, column in enumerate(tbl.columns): column_name = '`' + column.name + '`' if columns_values[0:len(column_name)] == column_name: columns_values = columns_values[len(column_name):] if columns_values[0:1] == ')': columns_values = columns_values[1:] last_column = i break else: if columns_values[0:2] == ', ': columns_values = columns_values[2:] else: raise ExportSQLiteError( 'Error', 'Unrecognized character in column list') else: raise ExportSQLiteError( 'Error', 'Unrecognized column in inserts') out.write('INSERT INTO %s(' % dq(tbl.name)) for i in range(last_column + 1): if i > 0: out.write(',') out.write(dq(tbl.columns[i].name)) if columns_values[0:9].lower() != ' values (': raise ExportSQLiteError( 'Error', 'Unrecognized SQL in insert') columns_values = columns_values[9:] out.write(') VALUES(') out.write(columns_values.replace("\\'", "''")) out.write('\n') def order_tables(out, db_name, schema, unordered, respect_deferredness): have_ordered = False while not have_ordered: if len(unordered) == 0: have_ordered = True for tbl in unordered.values(): has_forward_reference = False for fkey in tbl.foreignKeys: if (fkey.referencedTable.name in unordered and fkey.referencedTable.name != tbl.name and not ( respect_deferredness and is_deferred(fkey))): has_forward_reference = True break if not has_forward_reference: export_table(out, db_name, schema, tbl) del unordered[tbl.name] have_ordered = True def export_schema(out, schema, is_main_schema): if len(schema.tables) == 0: return out.write('\n-- Schema: %s\n' % schema.name) out.write(schema_comment_format(schema.comment)) db_name = '' if not is_main_schema: db_name = dq(schema.name) + '.' out.write('ATTACH "%s" AS %s;\n' % ( safe_file_name(schema.name + '.sdb'), dq(schema.name))) out.write('BEGIN;\n') # Find a valid table order for inserts from FK constraints unordered = {t.name: t for t in schema.tables} # Try treating deferred keys like non-deferred keys first for ordering order_tables(out, db_name, schema, unordered, False) # Now try harder (leave out deferred keys from determining an order) order_tables(out, db_name, schema, unordered, True) # Loop through all remaining tables, if any. Have circular FK refs. # How to handle? for tbl in unordered.values(): export_table(out, db_name, schema, tbl) out.write('COMMIT;\n') def print_index_columns(index): s = '' for i, column in enumerate(index.columns): if i > 0: s += ',' s += dq(column.referencedColumn.name) if column.descend == 1: s += ' DESC' return s def print_fk_columns(columns): s = '' for i, column in enumerate(columns): if i > 0: s += ',' s += dq(column.name) return s def dq(ident): """Double quote identifer, replacing " by "" """ return '"' + re.sub(r'"', '""', ident) + '"' def safe_file_name(ident): """Create safe filename from identifer""" def repl(c): return ["%%%02x" % c for c in bytearray(c, 'ascii')] return re.sub(r'[/\:*?"<>|%]', repl, ident) def info_format(header, body): """Format a info field as SQL comment""" body = body.strip() if body == '': return '' elif '\n' in body: # Multiline comment return '-- %s:\n-- %s\n' % ( header, re.sub(r'\n', '\n-- ', body)) else: # Single line return '-- %-14s %s\n' % (header + ':', body) def schema_comment_format(body): """Format a schema or table comment as SQL comment table comments to be stored in SQLite schema """ body = body.strip() if body == '': return '' else: # Multiline comment return '-- %s\n' % re.sub(r'\n', '\n-- ', body) def comment_format(body): body = body.strip() if body == '': return '' elif '\n' in body: # Multiline comment return '\n-- %s' % re.sub(r'\n', '\n-- ', body) else: # Single line return '-- %s' % body if not validate_for_sqlite_export(cat): return 1 out = StringIO.StringIO() out.write(info_format( 'Creator', 'MySQL Workbench %d.%d.%d/ExportSQLite Plugin %s\n' % ( grt.root.wb.info.version.majorNumber, grt.root.wb.info.version.minorNumber, grt.root.wb.info.version.releaseNumber, ModuleInfo.version))) out.write(info_format('Author', grt.root.wb.doc.info.author)) out.write(info_format('Caption', grt.root.wb.doc.info.caption)) out.write(info_format('Project', grt.root.wb.doc.info.project)) out.write(info_format('Changed', grt.root.wb.doc.info.dateChanged)) out.write(info_format('Created', grt.root.wb.doc.info.dateCreated)) out.write(info_format('Description', grt.root.wb.doc.info.description)) out.write('PRAGMA foreign_keys = OFF;\n') # Loop over all catalogs in schema, find main schema main schema is first # nonempty schema or nonempty schema named "main" try: for schema in [(s, s.name == 'main') for s in cat.schemata]: export_schema(out, schema[0], schema[1]) except ExportSQLiteError as e: Workbench.confirm(e.typ, e.message) return 1 sql_text = out.getvalue() out.close() wizard = ExportSQLiteWizard(sql_text) wizard.run() return 0
2
Example 13
View licensedef SetKernPairsMain( self, sender ): try: thisFont = Glyphs.font # frontmost font groupsUC = { "A" : ["UC_A", "UC_A"], "Aacute" : ["UC_A", "UC_A"], "Abreve" : ["UC_A", "UC_A"], "Acircumflex" : ["UC_A", "UC_A"], "Adieresis" : ["UC_A", "UC_A"], "Agrave" : ["UC_A", "UC_A"], "Amacron" : ["UC_A", "UC_A"], "Aogonek" : ["UC_A", "UC_A"], "Aring" : ["UC_A", "UC_A"], "Aringacute" : ["UC_A", "UC_A"], "Atilde" : ["UC_A", "UC_A"], "AE" : ["UC_AE", "UC_E"], "AEacute" : ["UC_AE", "UC_E"], "B" : ["UC_Stem", "UC_B"], "C" : ["UC_Round", "UC_C"], "Cacute" : ["UC_Round", "UC_C"], "Ccaron" : ["UC_Round", "UC_C"], "Ccedilla" : ["UC_Round", "UC_C"], "Ccircumflex" : ["UC_Round", "UC_C"], "Cdotaccent" : ["UC_Round", "UC_C"], "D" : ["UC_Stem", "UC_Round"], "Eth" : ["UC_Eth", "UC_Round"], "Dcaron" : ["UC_Stem", "UC_Round"], "Dcroat" : ["UC_Eth", "UC_Round"], "E" : ["UC_Stem", "UC_E"], "Eacute" : ["UC_Stem", "UC_E"], "Ebreve" : ["UC_Stem", "UC_E"], "Ecaron" : ["UC_Stem", "UC_E"], "Ecircumflex" : ["UC_Stem", "UC_E"], "Edieresis" : ["UC_Stem", "UC_E"], "Edotaccent" : ["UC_Stem", "UC_E"], "Egrave" : ["UC_Stem", "UC_E"], "Emacron" : ["UC_Stem", "UC_E"], "Eogonek" : ["UC_Stem", "UC_E"], "F" : ["UC_Stem", ""], "G" : ["UC_Round", "UC_G"], "Gbreve" : ["UC_Round", "UC_G"], "Gcircumflex" : ["UC_Round", "UC_G"], "Gcommaaccent" : ["UC_Round", "UC_G"], "Gcaron" : ["UC_Round", "UC_G"], "Gdotaccent" : ["UC_Round", "UC_G"], "H" : ["UC_Stem", "UC_Stem"], "Hbar" : ["UC_Stem", "UC_Stem"], "Hcircumflex" : ["UC_Stem", "UC_Stem"], "I" : ["UC_Stem", "UC_Stem"], "IJ" : ["UC_Stem", "UC_J"], "Iacute" : ["UC_Stem", "UC_Stem"], "Ibreve" : ["UC_Stem", "UC_Stem"], "Icircumflex" : ["UC_Stem", "UC_Stem"], "Idieresis" : ["UC_Stem", "UC_Stem"], "Idotaccent" : ["UC_Stem", "UC_Stem"], "Igrave" : ["UC_Stem", "UC_Stem"], "Imacron" : ["UC_Stem", "UC_Stem"], "Iogonek" : ["UC_Stem", "UC_Stem"], "Itilde" : ["UC_Stem", "UC_Stem"], "J" : ["UC_J", "UC_J"], "Jcircumflex" : ["UC_J", "UC_J"], "K" : ["UC_Stem", "UC_K"], "Kcommaaccent" : ["UC_Stem", "UC_K"], "L" : ["UC_Stem", "UC_L"], "Lacute" : ["UC_Stem", "UC_L"], "Lcaron" : ["UC_Stem", "UC_L"], "Lcommaaccent" : ["UC_Stem", "UC_L"], "Ldot" : ["UC_Stem", ""], "Lslash" : ["UC_Eth", "UC_L"], "M" : ["UC_Stem", "UC_Stem"], "N" : ["UC_Stem", "UC_Stem"], "Nacute" : ["UC_Stem", "UC_Stem"], "Ncaron" : ["UC_Stem", "UC_Stem"], "Ncommaaccent" : ["UC_Stem", "UC_Stem"], "Eng" : ["UC_Stem", ""], "Ntilde" : ["UC_Stem", "UC_Stem"], "O" : ["UC_Round", "UC_Round"], "Oacute" : ["UC_Round", "UC_Round"], "Obreve" : ["UC_Round", "UC_Round"], "Ocircumflex" : ["UC_Round", "UC_Round"], "Odieresis" : ["UC_Round", "UC_Round"], "Ograve" : ["UC_Round", "UC_Round"], "Ohungarumlaut" : ["UC_Round", "UC_Round"], "Omacron" : ["UC_Round", "UC_Round"], "Oslash" : ["UC_Round", "UC_Round"], "Oslashacute" : ["UC_Round", "UC_Round"], "Otilde" : ["UC_Round", "UC_Round"], "OE" : ["UC_Round", "UC_E"], "P" : ["UC_Stem", "UC_P"], "Thorn" : ["UC_Stem", ""], "Q" : ["UC_Round", ""], "R" : ["UC_Stem", "UC_R"], "Racute" : ["UC_Stem", "UC_R"], "Rcaron" : ["UC_Stem", "UC_R"], "Rcommaaccent" : ["UC_Stem", "UC_R"], "S" : ["UC_S", "UC_S"], "Sacute" : ["UC_S", "UC_S"], "Scaron" : ["UC_S", "UC_S"], "Scedilla" : ["UC_S", "UC_S"], "Scircumflex" : ["UC_S", "UC_S"], "Scommaaccent" : ["UC_S", "UC_S"], "T" : ["UC_T", "UC_T"], "Tbar" : ["UC_T", "UC_T"], "Tcaron" : ["UC_T", "UC_T"], "Tcedilla" : ["UC_T", "UC_T"], "Tcommaaccent" : ["UC_T", "UC_T"], "U" : ["UC_U", "UC_U"], "Uacute" : ["UC_U", "UC_U"], "Ubreve" : ["UC_U", "UC_U"], "Ucircumflex" : ["UC_U", "UC_U"], "Udieresis" : ["UC_U", "UC_U"], "Ugrave" : ["UC_U", "UC_U"], "Uhungarumlaut" : ["UC_U", "UC_U"], "Umacron" : ["UC_U", "UC_U"], "Uogonek" : ["UC_U", "UC_U"], "Uring" : ["UC_U", "UC_U"], "Utilde" : ["UC_U", "UC_U"], "W" : ["UC_W", "UC_W"], "Wacute" : ["UC_W", "UC_W"], "Wcircumflex" : ["UC_W", "UC_W"], "Wdieresis" : ["UC_W", "UC_W"], "Wgrave" : ["UC_W", "UC_W"], "X" : ["UC_X", "UC_X"], "Y" : ["UC_Y", "UC_Y"], "Yacute" : ["UC_Y", "UC_Y"], "Ycircumflex" : ["UC_Y", "UC_Y"], "Ydieresis" : ["UC_Y", "UC_Y"], "Ygrave" : ["UC_Y", "UC_Y"], "Z" : ["UC_Z", "UC_Z"], "Zacute" : ["UC_Z", "UC_Z"], "Zcaron" : ["UC_Z", "UC_Z"], "Zdotaccent" : ["UC_Z", "UC_Z"], "Schwa" : ["UC_Schwa", "UC_Round"], "A-cy" : ["UC_A", "UC_A"], "Be-cy" : ["UC_Stem", ""], "Ve-cy" : ["UC_Stem", "UC_B"], "Ge-cy" : ["UC_Stem", "UC_T"], "Gje-cy" : ["UC_Stem", "UC_T"], "Gheupturn-cy" : ["UC_Stem", "UC_T"], "De-cy" : ["", "UC_StemTooth"], "Ie-cy" : ["UC_Stem", "Stem_E"], "Iegrave-cy" : ["UC_Stem", "Stem_E"], "Io-cy" : ["UC_Stem", "Stem_E"], "Zhe-cy" : ["UC_Zhe", "UC_K"], "Ze-cy" : ["UC_Ze", "UC_B"], "Ii-cy" : ["UC_Stem", "UC_Stem"], "Iishort-cy" : ["UC_Stem", "UC_Stem"], "Iigrave-cy" : ["UC_Stem", "UC_Stem"], "Ka-cy" : ["UC_Stem", "UC_K"], "Kje-cy" : ["UC_Stem", "UC_K"], "El-cy" : ["UC_El", "UC_Stem"], "Em-cy" : ["UC_Stem", "UC_Stem"], "En-cy" : ["UC_Stem", "UC_Stem"], "O-cy" : ["UC_Round", "UC_Round"], "Pe-cy" : ["UC_Stem", "UC_Stem"], "Er-cy" : ["UC_Stem", "UC_P"], "Es-cy" : ["UC_Round", "UC_C"], "Te-cy" : ["UC_T", "UC_T"], "U-cy" : ["UC_CyrU", "UC_CyrU"], "Ushort-cy" : ["UC_CyrU", "UC_CyrU"], "Ef-cy" : ["UC_Ef", "UC_Ef"], "Ha-cy" : ["UC_X", "UC_X"], "Che-cy" : ["UC_Che", "UC_Stem"], "Tse-cy" : ["UC_Stem", "UC_StemTooth"], "Sha-cy" : ["UC_Stem", "UC_Stem"], "Shcha-cy" : ["UC_Stem", "UC_StemTooth"], "Dzhe-cy" : ["UC_Stem", "UC_Stem"], "Ia-cy" : ["", "UC_Stem"], "Softsign-cy" : ["UC_Stem", "UC_Softsign"], "Hardsign-cy" : ["UC_T", "UC_Softsign"], "Yeru-cy" : ["UC_Stem", "UC_Stem"], "Lje-cy" : ["UC_El", "UC_Softsign"], "Nje-cy" : ["UC_Stem", "UC_Softsign"], "Dze-cy" : ["UC_S", "UC_S"], "E-cy" : ["UC_Round", "UC_C"], "Ereversed-cy" : ["UC_Ze", "UC_Round"], "I-cy" : ["UC_Stem", "UC_Stem"], "Yi-cy" : ["UC_Stem", "UC_Stem"], "Je-cy" : ["UC_J", "UC_J"], "Tshe-cy" : ["UC_T", "UC_Shha"], "Iu-cy" : ["UC_Stem", "UC_Round"], "Dje-cy" : ["UC_T", "UC_Softsign"], "Fita-cy" : ["UC_Round", "UC_Round"], "Izhitsa-cy" : ["UC_V", ""], "Ghestroke-cy" : ["UC_Eth", "UC_Te"], "Ghemiddlehook-cy" : ["UC_Stem", ""], "Zhedescender-cy" : ["UC_Zhe", "UC_K"], "Zedescender-cy" : ["UC_Ze", "UC_B"], "Kadescender-cy" : ["UC_Stem", "UC_K"], "Kaverticalstroke-cy" : ["UC_Stem", "UC_K"], "Kastroke-cy" : ["UC_Stem", "UC_K"], "Kabashkir-cy" : ["UC_T", "UC_K"], "Endescender-cy" : ["UC_Stem", "UC_StemTooth"], "Pemiddlehook-cy" : ["UC_Stem", ""], "Haabkhasian-cy" : ["UC_Round", ""], "Esdescender-cy" : ["UC_Round", "UC_C"], "Tedescender-cy" : ["UC_T", "UC_T"], "Ustrait-cy" : ["UC_Y", "UC_Y"], "Ustraitstroke-cy" : ["UC_Y", "UC_Y"], "Hadescender-cy" : ["UC_X", "UC_X"], "Chedescender-cy" : ["UC_Che", "UC_StemTooth"], "Cheverticalstroke-cy" : ["UC_Che", "UC_Stem"], "Shha-cy" : ["UC_Stem", "UC_Shha"], "Cheabkhasian-cy" : ["UC_Cheabkhaz", "UC_Cheabkhaz"], "Chedescenderabkhasian-cy" : ["UC_Cheabkhaz", "UC_Cheabkhaz"], "Palochka-cy" : ["UC_Stem", "UC_Stem"], "Zhebreve-cy" : ["UC_Zhe", "UC_K"], "Kahook-cy" : ["UC_Stem", ""], "Eltail-cy" : ["UC_El", "UC_Stem"], "Enhook-cy" : ["UC_Stem", "UC_StemHook"], "Entail-cy" : ["UC_Stem", "UC_Stem"], "Chekhakassian-cy" : ["UC_Che", "UC_Stem"], "Emtail-cy" : ["UC_Stem", "UC_Stem"], "Abreve-cy" : ["UC_A", "UC_A"], "Adieresis-cy" : ["UC_A", "UC_A"], "Iebreve-cy" : ["UC_Stem", "UC_E"], "Schwa-cy" : ["UC_Schwa", "UC_Round"], "Schwadieresis-cy" : ["UC_Schwa", "UC_Round"], "Zhedieresis-cy" : ["UC_Zhe", "UC_K"], "Zedieresis-cy" : ["UC_Ze", "UC_B"], "Imacron-cy" : ["UC_Stem", "UC_Stem"], "Idieresis-cy" : ["UC_Stem", "UC_Stem"], "Odieresis-cy" : ["UC_Round", "UC_Round"], "Obarred-cy" : ["UC_Round", "UC_Round"], "Obarreddieresis-cy" : ["UC_Round", "UC_Round"], "Edieresis-cy" : ["UC_Ze", "UC_Round"], "Umacron-cy" : ["UC_CyrU", "UC_CyrU"], "Udieresis-cy" : ["UC_CyrU", "UC_CyrU"], "Uhungarumlaut-cy" : ["UC_CyrU", "UC_CyrU"], "Chedieresis-cy" : ["UC_Che", "UC_Stem"], "Ghedescender-cy" : ["UC_Stem", "UC_T"], "Yerudieresis-cy" : ["UC_Stem", "UC_Stem"], "Hahook-cy" : ["UC_X", "UC_X"], "Komide-cy" : ["", "UC_Stem"], "Elhook-cy" : ["UC_El", "UC_StemHook"], "Qa-cy" : ["UC_Round", "UC_Round"], "We-cy" : ["UC_W", ""], "Pedescender-cy" : ["UC_Stem", "UC_StemTooth"], "Shhadescender-cy" : ["UC_Stem", "UC_Shha"], "Ishorttail-cy" : ["UC_Stem", "UC_StemTooth"], "Enghe-cy" : ["UC_Stem", "UC_T"], "Tetse-cy" : ["UC_T", "UC_StemTooth"], "Ertick-cy" : ["UC_Stem","UC_P"], "Aie-cy" : ["", "UC_E"], "Alpha" : ["UC_A", "UC_A"], "Beta" : ["UC_Stem", "UC_B"], "Gamma" : ["UC_Stem", "UC_T"], "Delta" : ["UC_A", "UC_A"], "Epsilon" : ["UC_Stem", "UC_E"], "Zeta" : ["UC_Z", "UC_Z"], "Eta" : ["UC_Stem", "UC_Stem"], "Theta" : ["UC_Round", "UC_Round"], "Iota" : ["UC_Stem", "UC_Stem"], "Kappa" : ["UC_Stem", "UC_K"], "Lambda" : ["UC_A", "UC_A"], "Mu" : ["UC_Stem", "UC_Stem"], "Nu" : ["UC_Stem", "UC_Stem"], "Xi" : ["", "UC_E"], "Omicron" : ["UC_Round", "UC_Round"], "Pi" : ["UC_Stem", "UC_Stem"], "Rho" : ["UC_Stem", "UC_P"], "Sigma" : ["", "UC_E"], "Tau" : ["UC_T", "UC_T"], "Upsilon" : ["UC_Y", "UC_Y"], "Phi" : ["UC_Ef", "UC_Ef"], "Chi" : ["UC_X", "UC_X"], "Omega" : ["UC_Omega", "UC_Omega"], "Alphatonos" : ["", "UC_A"], "Epsilontonos" : ["UC_StemTonos", "UC_E"], "Etatonos" : ["UC_StemTonos", "UC_Stem"], "Iotatonos" : ["UC_StemTonos", "UC_Stem"], "Omicrontonos" : ["", "UC_Round"], "Upsilontonos" : ["", "UC_Y"], "Omegatonos" : ["", "UC_Omega"], "Iotadieresis" : ["UC_Stem", "UC_Stem"], "Upsilondieresis" : ["UC_Y", "UC_Y"] } groupsLCnormal = { "a" : ["lc_a", "lc_a"], "aacute" : ["lc_a", "lc_a"], "abreve" : ["lc_a", "lc_a"], "acircumflex" : ["lc_a", "lc_a"], "adieresis" : ["lc_a", "lc_a"], "agrave" : ["lc_a", "lc_a"], "amacron" : ["lc_a", "lc_a"], "aogonek" : ["lc_a", "lc_a"], "aring" : ["lc_a", "lc_a"], "aringacute" : ["lc_a", "lc_a"], "atilde" : ["lc_a", "lc_a"], "ae" : ["lc_a", "lc_e"], "aeacute" : ["lc_a", "lc_e"], "b" : ["lc_LongStem", "lc_Round"], "c" : ["lc_Round", "lc_c"], "cacute" : ["lc_Round", "lc_c"], "ccaron" : ["lc_Round", "lc_c"], "ccedilla" : ["lc_Round", "lc_c"], "ccircumflex" : ["lc_Round", "lc_c"], "cdotaccent" : ["lc_Round", "lc_c"], "d" : ["lc_Round", "lc_LongStem"], "eth" : ["lc_Round", ""], "dcaron" : ["lc_Round", "lc_Caron"], "dcroat" : ["lc_Round", "lc_LongStem"], "e" : ["lc_Round", "lc_e"], "eacute" : ["lc_Round", "lc_e"], "ebreve" : ["lc_Round", "lc_e"], "ecaron" : ["lc_Round", "lc_e"], "ecircumflex" : ["lc_Round", "lc_e"], "edieresis" : ["lc_Round", "lc_e"], "edotaccent" : ["lc_Round", "lc_e"], "egrave" : ["lc_Round", "lc_e"], "emacron" : ["lc_Round", "lc_e"], "eogonek" : ["lc_Round", "lc_e"], "f" : ["lc_f", "lc_f"], "g" : ["lc_g", "lc_g"], "gbreve" : ["lc_g", "lc_g"], "gcircumflex" : ["lc_g", "lc_g"], "gcommaaccent" : ["lc_g", "lc_g"], "gdotaccent" : ["lc_g", "lc_g"], "h" : ["lc_LongStem", "lc_Shoulder"], "hbar" : ["lc_LongStem", "lc_Shoulder"], "hcircumflex" : ["lc_LongStem", "lc_Shoulder"], "i" : ["lc_ShortStem", "lc_ShortStem"], "dotlessi" : ["lc_ShortStem", "lc_ShortStem"], "idotless" : ["lc_ShortStem", "lc_ShortStem"], "iacute" : ["lc_ShortStem", "lc_ShortStem"], "ibreve" : ["lc_ShortStem", "lc_ShortStem"], "icircumflex" : ["lc_ShortStem", "lc_ShortStem"], "idieresis" : ["lc_ShortStem", "lc_ShortStem"], "idotaccent" : ["lc_ShortStem", "lc_ShortStem"], "igrave" : ["lc_ShortStem", "lc_ShortStem"], "ij" : ["lc_ShortStem", "lc_j"], "imacron" : ["lc_ShortStem", "lc_ShortStem"], "iogonek" : ["lc_ShortStem", "lc_ShortStem"], "itilde" : ["lc_ShortStem", "lc_ShortStem"], "j" : ["lc_j", "lc_j"], "dotlessj" : ["lc_j", "lc_j"], "jdotless" : ["lc_j", "lc_j"], "jcircumflex" : ["lc_j", "lc_j"], "k" : ["lc_LongStem", "lc_k"], "kcommaaccent" : ["lc_LongStem", "lc_k"], "kgreenlandic" : ["lc_ShortStem", "lc_k"], "l" : ["lc_LongStem", "lc_LongStem"], "lacute" : ["lc_LongStem", "lc_LongStem"], "lcaron" : ["lc_LongStem", "lc_Caron"], "lcommaaccent" : ["lc_LongStem", "lc_LongStem"], "ldot" : ["lc_LongStem", ""], "lslash" : ["lc_lslash", "lc_lslash"], "m" : ["lc_ShortStem", "lc_Shoulder"], "n" : ["lc_ShortStem", "lc_Shoulder"], "nacute" : ["lc_ShortStem", "lc_Shoulder"], "napostrophe" : ["MSC_quoteright", "lc_Shoulder"], "ncaron" : ["lc_ShortStem", "lc_Shoulder"], "ncommaaccent" : ["lc_ShortStem", "lc_Shoulder"], "eng" : ["lc_ShortStem", "lc_Shoulder"], "ntilde" : ["lc_ShortStem", "lc_Shoulder"], "o" : ["lc_Round", "lc_Round"], "oacute" : ["lc_Round", "lc_Round"], "obreve" : ["lc_Round", "lc_Round"], "ocircumflex" : ["lc_Round", "lc_Round"], "odieresis" : ["lc_Round", "lc_Round"], "ograve" : ["lc_Round", "lc_Round"], "ohungarumlaut" : ["lc_Round", "lc_Round"], "omacron" : ["lc_Round", "lc_Round"], "oslash" : ["lc_Round", "lc_Round"], "oslashacute" : ["lc_Round", "lc_Round"], "otilde" : ["lc_Round", "lc_Round"], "oe" : ["lc_Round", "lc_e"], "p" : ["lc_p", "lc_Round"], "thorn" : ["lc_LongStem", "lc_Round"], "q" : ["lc_Round", ""], "r" : ["lc_ShortStem", "lc_r"], "racute" : ["lc_ShortStem", "lc_r"], "rcaron" : ["lc_ShortStem", "lc_r"], "rcommaaccent" : ["lc_ShortStem", "lc_r"], "s" : ["lc_s", "lc_s"], "sacute" : ["lc_s", "lc_s"], "scaron" : ["lc_s", "lc_s"], "scedilla" : ["lc_s", "lc_s"], "scircumflex" : ["lc_s", "lc_s"], "scommaaccent" : ["lc_s", "lc_s"], "t" : ["lc_t", "lc_t"], "tbar" : ["lc_t", ""], "tcaron" : ["lc_t", "lc_t"], "tcedilla" : ["lc_t", "lc_t"], "tcommaaccent" : ["lc_t", "lc_t"], "u" : ["lc_u", "lc_u"], "uacute" : ["lc_u", "lc_u"], "ubreve" : ["lc_u", "lc_u"], "ucircumflex" : ["lc_u", "lc_u"], "udieresis" : ["lc_u", "lc_u"], "ugrave" : ["lc_u", "lc_u"], "uhungarumlaut" : ["lc_u", "lc_u"], "umacron" : ["lc_u", "lc_u"], "uogonek" : ["lc_u", "lc_u"], "uring" : ["lc_u", "lc_u"], "utilde" : ["lc_u", "lc_u"], "v" : ["lc_vwy", "lc_vwy"], "w" : ["lc_vwy", "lc_vwy"], "wacute" : ["lc_vwy", "lc_vwy"], "wcircumflex" : ["lc_vwy", "lc_vwy"], "wdieresis" : ["lc_vwy", "lc_vwy"], "wgrave" : ["lc_vwy", "lc_vwy"], "x" : ["lc_x", "lc_x"], "y" : ["lc_vwy", "lc_vwy"], "yacute" : ["lc_vwy", "lc_vwy"], "ycircumflex" : ["lc_vwy", "lc_vwy"], "ydieresis" : ["lc_vwy", "lc_vwy"], "ygrave" : ["lc_vwy", "lc_vwy"], "z" : ["lc_z", "lc_z"], "zacute" : ["lc_z", "lc_z"], "zcaron" : ["lc_z", "lc_z"], "zdotaccent" : ["lc_z", "lc_z"], "schwa" : ["lc_schwa", "lc_Round"], "f_f" : ["lc_f", "lc_f"], "f_f_i" : ["lc_f", "lc_ShortStem"], "f_f_l" : ["lc_f", "lc_LongStem"], "f_i" : ["lc_f", "lc_ShortStem"], "f_l" : ["lc_f", "lc_LongStem"], "fi" : ["lc_f", "lc_ShortStem"], "fl" : ["lc_f", "lc_LongStem"], "a-cy" : ["lc_a", "lc_a"], "be-cy" : ["", "lc_Round"], "ve-cy" : ["lc_ShortStem", "lc_ze"], "ge-cy" : ["lc_ShortStem", "lc_te"], "gje-cy" : ["lc_ShortStem", "lc_te"], "gheupturn-cy" : ["lc_ShortStem", "lc_te"], "de-cy" : ["", "lc_StemTooth"], "ie-cy" : ["lc_Round", "lc_e"], "iegrave-cy" : ["lc_Round", "lc_e"], "io-cy" : ["lc_Round", "lc_e"], "zhe-cy" : ["lc_zhe", "lc_k"], "ze-cy" : ["lc_ze", "lc_ze"], "ii-cy" : ["lc_ShortStem", "lc_ShortStem"], "iishort-cy" : ["lc_ShortStem", "lc_ShortStem"], "iigrave-cy" : ["lc_ShortStem", "lc_ShortStem"], "ka-cy" : ["lc_ShortStem", "lc_k"], "kje-cy" : ["lc_ShortStem", "lc_k"], "el-cy" : ["lc_el", "lc_ShortStem"], "em-cy" : ["lc_ShortStem", "lc_ShortStem"], "en-cy" : ["lc_ShortStem", "lc_ShortStem"], "o-cy" : ["lc_Round", "lc_Round"], "pe-cy" : ["lc_ShortStem", "lc_ShortStem"], "er-cy" : ["lc_p", "lc_Round"], "es-cy" : ["lc_Round", "lc_c"], "te-cy" : ["lc_te", "lc_te"], "u-cy" : ["lc_vwy", "lc_vwy"], "ushort-cy" : ["lc_vwy", "lc_vwy"], "ef-cy" : ["lc_Round", "lc_Round"], "ha-cy" : ["lc_x", "lc_x"], "che-cy" : ["lc_che", "lc_ShortStem"], "tse-cy" : ["lc_ShortStem", "lc_StemTooth"], "sha-cy" : ["lc_ShortStem", "lc_ShortStem"], "shcha-cy" : ["lc_ShortStem", "lc_StemTooth"], "dzhe-cy" : ["lc_ShortStem", "lc_ShortStem"], "ia-cy" : ["", "lc_ShortStem"], "softsign-cy" : ["lc_ShortStem", "lc_softsign"], "hardsign-cy" : ["lc_te", "lc_softsign"], "yeru-cy" : ["lc_ShortStem", "lc_ShortStem"], "lje-cy" : ["lc_el", "lc_softsign"], "nje-cy" : ["lc_ShortStem", "lc_softsign"], "ereversed-cy" : ["lc_ze", "lc_Round"], "dze-cy" : ["lc_s", "lc_s"], "e-cy" : ["lc_Round", "lc_c"], "yi-cy" : ["lc_ShortStem", "lc_ShortStem"], "i-cy" : ["lc_ShortStem", "lc_ShortStem"], "je-cy" : ["lc_j", "lc_j"], "tshe-cy" : ["lc_LongStem", "lc_Shoulder"], "iu-cy" : ["lc_ShortStem", "lc_Round"], "dje-cy" : ["lc_LongStem", "lc_Shoulder"], "fita-cy" : ["lc_Round", "lc_Round"], "izhitsa-cy" : ["lc_vwy", ""], "ghestroke-cy" : ["lc_ShortStem", "lc_te"], "ghemiddlehook-cy" : ["lc_ShortStem", ""], "zhedescender-cy" : ["lc_zhe", "lc_k"], "zedescender-cy" : ["lc_ze", "lc_ze"], "kadescender-cy" : ["lc_ShortStem", "lc_k"], "kaverticalstroke-cy" : ["lc_ShortStem", "lc_k"], "kastroke-cy" : ["lc_LongStem", "lc_k"], "kabashkir-cy" : ["lc_te", "lc_k"], "endescender-cy" : ["lc_ShortStem", "lc_StemTooth"], "pemiddlehook-cy" : ["lc_ShortStem", ""], "haabkhasian-cy" : ["lc_Round", ""], "esdescender-cy" : ["lc_Round", "lc_c"], "tedescender-cy" : ["lc_te", "lc_te"], "ustrait-cy" : ["lc_vwy", "lc_vwy"], "hadescender-cy" : ["lc_x", "lc_x"], "chedescender-cy" : ["lc_che", "lc_StemTooth"], "cheverticalstroke-cy" : ["lc_che", "lc_ShortStem"], "shha-cy" : ["lc_LongStem", "lc_Shoulder"], "cheabkhasian-cy" : ["lc_cheabkhaz", "lc_e"], "chedescenderabkhasian-cy" : ["lc_cheabkhaz", "lc_e"], "palochka-cy" : ["lc_LongStem", "lc_LongStem"], "zhebreve-cy" : ["lc_zhe", "lc_k"], "kahook-cy" : ["lc_ShortStem", ""], "eltail-cy" : ["lc_el", "lc_StemTooth"], "enhook-cy" : ["lc_ShortStem", "lc_StemHook"], "entail-cy" : ["lc_ShortStem", "lc_StemTooth"], "chekhakassian-cy" : ["lc_che", "lc_ShortStem"], "emtail-cy" : ["lc_ShortStem", "lc_StemTooth"], "abreve-cy" : ["lc_a", "lc_a"], "adieresis-cy" : ["lc_a", "lc_a"], "iebreve-cy" : ["lc_Round", "lc_e"], "schwa-cy" : ["lc_schwa", "lc_Round"], "schwadieresis-cy" : ["lc_schwa", "lc_Round"], "zhedieresis-cy" : ["lc_zhe", "lc_k"], "zedieresis-cy" : ["lc_ze", "lc_ze"], "imacron-cy" : ["lc_ShortStem", "lc_ShortStem"], "idieresis-cy" : ["lc_ShortStem", "lc_ShortStem"], "odieresis-cy" : ["lc_Round", "lc_Round"], "obarred-cy" : ["lc_Round", "lc_Round"], "obarreddieresis-cy" : ["lc_Round", "lc_Round"], "edieresis-cy" : ["lc_ereversed", "lc_Round"], "umacron-cy" : ["lc_vwy", "lc_vwy"], "udieresis-cy" : ["lc_vwy", "lc_vwy"], "uhungarumlaut-cy" : ["lc_vwy", "lc_vwy"], "chedieresis-cy" : ["lc_che", "lc_ShortStem"], "ghedescender-cy" : ["lc_ShortStem", "lc_te"], "yerudieresis-cy" : ["lc_ShortStem", "lc_ShortStem"], "hahook-cy" : ["lc_x", "lc_x"], "komide-cy" : ["lc_Round", "lc_LongStem"], "elhook-cy" : ["lc_el", "lc_StemHook"], "we-cy" : ["lc_vwy", "lc_vwy"], "pedescender-cy" : ["lc_ShortStem", "lc_StemTooth"], "shhadescender-cy" : ["lc_LongStem", "lc_Shoulder"], "ishorttail-cy" : ["lc_ShortStem", "lc_StemTooth"], "ertick-cy" : ["lc_p", "p"], "enghe-cy" : ["lc_ShortStem", "lc_te"], "tetse-cy" : ["lc_te", "lc_StemTooth"], "aie-cy" : ["lc_a", "lc_e"], "alpha" : ["lc_Round", "lc_alpha"], "delta" : ["", "lc_Round"], "epsilon" : ["lc_epsilon", "lc_epsilon"], "eta" : ["lc_eta", "lc_eta"], "iota" : ["lc_iota", "lc_iota"], "mu" : ["lc_ShortStem", "lc_alpha"], "omicron" : ["lc_Round", "lc_Round"], "rho" : ["", "lc_Round"], "sigmafinal" : ["lc_Round", ""], "sigma" : ["lc_Round", ""], "upsilon" : ["lc_upsilon", "lc_upsilon"], "phi" : ["lc_Round", "lc_Round"], "psi" : ["", "lc_upsilon"], "omega" : ["lc_omega", "lc_upsilon"], "iotatonos" : ["lc_iota", "lc_iota"], "iotadieresis" : ["lc_iota", "lc_iota"], "iotadieresistonos" : ["lc_iota", "lc_iota"], "upsilontonos" : ["lc_upsilon", "lc_upsilon"], "upsilondieresis" : ["lc_upsilon", "lc_upsilon"], "upsilondieresistonos" : ["lc_upsilon", "lc_upsilon"], "omicrontonos" : ["lc_Round", "lc_Round"], "omegatonos" : ["lc_omega", "lc_upsilon"], "alphatonos" : ["lc_Round", "lc_alpha"], "epsilontonos" : ["lc_epsilon", "lc_epsilon"], "etatonos" : ["lc_eta", "lc_eta"] } groupsLCcursive = { "a" : ["lc_Round", "lc_ShortStem"], "aacute" : ["lc_Round", "lc_ShortStem"], "abreve" : ["lc_Round", "lc_ShortStem"], "acircumflex" : ["lc_Round", "lc_ShortStem"], "adieresis" : ["lc_Round", "lc_ShortStem"], "agrave" : ["lc_Round", "lc_ShortStem"], "amacron" : ["lc_Round", "lc_ShortStem"], "aogonek" : ["lc_Round", "lc_ShortStem"], "aring" : ["lc_Round", "lc_ShortStem"], "aringacute" : ["lc_Round", "lc_ShortStem"], "atilde" : ["lc_Round", "lc_ShortStem"], "ae" : ["lc_Round", "lc_e"], "aeacute" : ["lc_Round", "lc_e"], "b" : ["lc_LongStem1", "lc_Round"], "c" : ["lc_Round", "lc_c"], "cacute" : ["lc_Round", "lc_c"], "ccaron" : ["lc_Round", "lc_c"], "ccedilla" : ["lc_Round", "lc_c"], "ccircumflex" : ["lc_Round", "lc_c"], "cdotaccent" : ["lc_Round", "lc_c"], "d" : ["lc_Round", "lc_LongStem"], "eth" : ["lc_Round", ""], "dcaron" : ["lc_Round", "lc_Caron"], "dcroat" : ["lc_Round", "lc_LongStem"], "e" : ["lc_Round", "lc_e"], "eacute" : ["lc_Round", "lc_e"], "ebreve" : ["lc_Round", "lc_e"], "ecaron" : ["lc_Round", "lc_e"], "ecircumflex" : ["lc_Round", "lc_e"], "edieresis" : ["lc_Round", "lc_e"], "edotaccent" : ["lc_Round", "lc_e"], "egrave" : ["lc_Round", "lc_e"], "emacron" : ["lc_Round", "lc_e"], "eogonek" : ["lc_Round", "lc_e"], "f" : ["lc_f", "lc_f"], "g" : ["lc_g", "lc_g"], "gbreve" : ["lc_g", "lc_g"], "gcaron" : ["lc_g", "lc_g"], "gcircumflex" : ["lc_g", "lc_g"], "gcommaaccent" : ["lc_g", "lc_g"], "gdotaccent" : ["lc_g", "lc_g"], "h" : ["lc_LongStem2", "lc_Shoulder"], "hbar" : ["lc_LongStem2", "lc_Shoulder"], "hcircumflex" : ["lc_LongStem2", "lc_Shoulder"], "i" : ["lc_i", "lc_ShortStem"], "dotlessi" : ["lc_i", "lc_ShortStem"], "idotless" : ["lc_i", "lc_ShortStem"], "iacute" : ["lc_i", "lc_ShortStem"], "ibreve" : ["lc_i", "lc_ShortStem"], "icircumflex" : ["lc_i", "lc_ShortStem"], "idieresis" : ["lc_i", "lc_ShortStem"], "idotaccent" : ["lc_i", "lc_ShortStem"], "igrave" : ["lc_i", "lc_ShortStem"], "ij" : ["lc_i", "lc_j"], "imacron" : ["lc_i", "lc_ShortStem"], "iogonek" : ["lc_i", "lc_ShortStem"], "itilde" : ["lc_i", "lc_ShortStem"], "j" : ["lc_j", "lc_j"], "dotlessj" : ["lc_j", "lc_j"], "jdotless" : ["lc_j", "lc_j"], "jcircumflex" : ["lc_j", "lc_j"], "k" : ["lc_LongStem2", "lc_k"], "kcommaaccent" : ["lc_LongStem2", "lc_k"], "kgreenlandic" : ["lc_ShortStem", "lc_k"], "l" : ["lc_LongStem1", "lc_LongStem"], "lacute" : ["lc_LongStem1", "lc_LongStem"], "lcaron" : ["lc_LongStem1", "lc_Caron"], "lcommaaccent" : ["lc_LongStem1", "lc_LongStem"], "ldot" : ["lc_LongStem1", ""], "lslash" : ["lc_lslash", "lc_lslash"], "m" : ["lc_ShortStem", "lc_Shoulder"], "n" : ["lc_ShortStem", "lc_Shoulder"], "nacute" : ["lc_ShortStem", "lc_Shoulder"], "napostrophe" : ["MSC_quoteright", "lc_Shoulder"], "ncaron" : ["lc_ShortStem", "lc_Shoulder"], "ncommaaccent" : ["lc_ShortStem", "lc_Shoulder"], "eng" : ["lc_ShortStem", ""], "ntilde" : ["lc_ShortStem", "lc_Shoulder"], "o" : ["lc_Round", "lc_Round"], "oacute" : ["lc_Round", "lc_Round"], "obreve" : ["lc_Round", "lc_Round"], "ocircumflex" : ["lc_Round", "lc_Round"], "odieresis" : ["lc_Round", "lc_Round"], "ograve" : ["lc_Round", "lc_Round"], "ohungarumlaut" : ["lc_Round", "lc_Round"], "omacron" : ["lc_Round", "lc_Round"], "oslash" : ["lc_Round", "lc_Round"], "oslashacute" : ["lc_Round", "lc_Round"], "otilde" : ["lc_Round", "lc_Round"], "oe" : ["lc_Round", "lc_e"], "p" : ["", "lc_Round"], "thorn" : ["", "lc_Round"], "q" : ["lc_Round", ""], "r" : ["lc_ShortStem", "lc_r"], "racute" : ["lc_ShortStem", "lc_r"], "rcaron" : ["lc_ShortStem", "lc_r"], "rcommaaccent" : ["lc_ShortStem", "lc_r"], "s" : ["lc_s", "lc_s"], "sacute" : ["lc_s", "lc_s"], "scaron" : ["lc_s", "lc_s"], "scedilla" : ["lc_s", "lc_s"], "scircumflex" : ["lc_s", "lc_s"], "scommaaccent" : ["lc_s", "lc_s"], "t" : ["lc_t", "lc_t"], "tbar" : ["lc_t", "lc_t"], "tcaron" : ["lc_t", "lc_t"], "tcedilla" : ["lc_t", "lc_t"], "tcommaaccent" : ["lc_t", "lc_t"], "u" : ["lc_i", "lc_ShortStem"], "uacute" : ["lc_i", "lc_ShortStem"], "ubreve" : ["lc_i", "lc_ShortStem"], "ucircumflex" : ["lc_i", "lc_ShortStem"], "udieresis" : ["lc_i", "lc_ShortStem"], "ugrave" : ["lc_i", "lc_ShortStem"], "uhungarumlaut" : ["lc_i", "lc_ShortStem"], "umacron" : ["lc_i", "lc_ShortStem"], "uogonek" : ["lc_i", "lc_ShortStem"], "uring" : ["lc_i", "lc_ShortStem"], "utilde" : ["lc_i", "lc_ShortStem"], "v" : ["lc_vw", "lc_vw"], "w" : ["lc_vw", "lc_vw"], "wacute" : ["lc_vw", "lc_vw"], "wcircumflex" : ["lc_vw", "lc_vw"], "wdieresis" : ["lc_vw", "lc_vw"], "wgrave" : ["lc_vw", "lc_vw"], "x" : ["lc_x", "lc_x"], "y" : ["lc_y", "lc_y"], "yacute" : ["lc_y", "lc_y"], "ycircumflex" : ["lc_y", "lc_y"], "ydieresis" : ["lc_y", "lc_y"], "ygrave" : ["lc_y", "lc_y"], "z" : ["lc_z", "lc_z"], "zacute" : ["lc_z", "lc_z"], "zcaron" : ["lc_z", "lc_z"], "zdotaccent" : ["lc_z", "lc_z"], "schwa" : ["lc_schwa", "lc_Round"], "a-cy" : ["lc_Round", "lc_ShortStem"], "be-cy" : ["", "lc_Round"], "ve-cy" : ["lc_Round", "lc_ze"], "ge-cy" : ["lc_ge", "lc_ge"], "gje-cy" : ["lc_ge", "lc_ge"], "gheupturn-cy" : ["lc_ShortStem", ""], "de-cy" : ["lc_Round", ""], "ie-cy" : ["lc_Round", "lc_e"], "iegrave-cy" : ["lc_Round", "lc_e"], "io-cy" : ["lc_Round", "lc_e"], "zhe-cy" : ["lc_zhe", "lc_zhe"], "ze-cy" : ["lc_ze", "lc_ze"], "ii-cy" : ["lc_i", "lc_ShortStem"], "iishort-cy" : ["lc_i", "lc_ShortStem"], "iigrave-cy" : ["lc_i", "lc_ShortStem"], "ka-cy" : ["lc_ShortStem", "lc_k"], "kje-cy" : ["lc_ShortStem", "lc_k"], "el-cy" : ["lc_el", "lc_ShortStem"], "em-cy" : ["lc_el", "lc_ShortStem"], "en-cy" : ["lc_ShortStem", "lc_ShortStem"], "o-cy" : ["lc_Round", "lc_Round"], "pe-cy" : ["lc_ShortStem", "lc_Shoulder"], "er-cy" : ["lc_p", "lc_Round"], "es-cy" : ["lc_Round", "lc_c"], "te-cy" : ["lc_ShortStem", "lc_Shoulder"], "u-cy" : ["lc_vwy", "lc_vwy"], "ushort-cy" : ["lc_vwy", "lc_vwy"], "ef-cy" : ["lc_Round", "lc_Round"], "ha-cy" : ["lc_x", "lc_x"], "che-cy" : ["lc_che", "lc_ShortStem"], "tse-cy" : ["lc_i", "lc_StemTooth"], "sha-cy" : ["lc_i", "lc_ShortStem"], "shcha-cy" : ["lc_i", "lc_StemTooth"], "dzhe-cy" : ["lc_i", "lc_ShortStem"], "ia-cy" : ["", "lc_ShortStem"], "softsign-cy" : ["lc_i", "lc_softsign"], "hardsign-cy" : ["lc_hardsign", "lc_softsign"], "yeru-cy" : ["lc_i", "lc_ShortStem"], "lje-cy" : ["lc_el", "lc_softsign"], "nje-cy" : ["lc_ShortStem", "lc_softsign"], "ereversed-cy" : ["lc_ereversed", "lc_Round"], "e-cy" : ["lc_Round", "lc_c"], "yi-cy" : ["lc_i", "lc_ShortStem"], "i-cy" : ["lc_i", "lc_ShortStem"], "tshe-cy" : ["lc_LongStem", "lc_Shoulder"], "iu-cy" : ["lc_ShortStem", "lc_Round"], "dje-cy" : ["lc_LongStem", ""], "yat-cy" : ["lc_ShortStem", "lc_softsign"], "fita-cy" : ["lc_Round", "lc_Round"], "izhitsa-cy" : ["lc_vwy", ""], "ghestroke-cy" : ["lc_ge", "lc_ge"], "ghemiddlehook-cy" : ["lc_ShortStem", ""], "zhedescender-cy" : ["lc_zhe", "lc_zhe"], "zedescender-cy" : ["lc_ze", "lc_ze"], "kadescender-cy" : ["lc_ShortStem", "lc_k"], "kaverticalstroke-cy" : ["lc_ShortStem", "lc_k"], "kastroke-cy" : ["lc_LongStem", "lc_k"], "kabashkir-cy" : ["lc_hardsign", "lc_k"], "endescender-cy" : ["lc_ShortStem", "lc_StemTooth"], "pemiddlehook-cy" : ["lc_ShortStem", ""], "haabkhasian-cy" : ["lc_Round", ""], "esdescender-cy" : ["lc_Round", "lc_c"], "tedescender-cy" : ["lc_ShortStem", "lc_ShoulderTooth"], "ustrait-cy" : ["lc_ustait", "lc_ustait"], "ustraitstroke-cy" : ["lc_ustait", "lc_ustait"], "hadescender-cy" : ["lc_x", "lc_x"], "chedescender-cy" : ["lc_che", "lc_StemTooth"], "cheverticalstroke-cy" : ["lc_che", "lc_ShortStem"], "shha-cy" : ["lc_LongStem", "lc_Shoulder"], "cheabkhasian-cy" : ["lc_cheabkhaz", "lc_e"], "chedescenderabkhasian-cy" : ["lc_cheabkhaz", "lc_e"], "palochka-cy" : ["lc_LongStem", "lc_LongStem"], "zhebreve-cy" : ["lc_zhe", "lc_zhe"], "kahook-cy" : ["lc_ShortStem", ""], "eltail-cy" : ["lc_el", "lc_StemTooth"], "enhook-cy" : ["lc_ShortStem", "lc_StemHook"], "entail-cy" : ["lc_ShortStem", "lc_StemTooth"], "chekhakassian-cy" : ["lc_che", "lc_ShortStem"], "emtail-cy" : ["lc_el", "lc_StemTooth"], "abreve-cy" : ["lc_Round", "lc_ShortStem"], "adieresis-cy" : ["lc_Round", "lc_ShortStem"], "iebreve-cy" : ["lc_Round", "lc_e"], "schwa-cy" : ["lc_schwa", "lc_Round"], "schwadieresis-cy" : ["lc_schwa", "lc_Round"], "zhedieresis-cy" : ["lc_zhe", "lc_zhe"], "zedieresis-cy" : ["lc_ze", "lc_ze"], "imacron-cy" : ["lc_i", "lc_ShortStem"], "idieresis-cy" : ["lc_i", "lc_ShortStem"], "odieresis-cy" : ["lc_Round", "lc_Round"], "obarred-cy" : ["lc_Round", "lc_Round"], "obarreddieresis-cy" : ["lc_Round", "lc_Round"], "edieresis-cy" : ["lc_ze", "lc_Round"], "umacron-cy" : ["lc_vwy", "lc_vwy"], "udieresis-cy" : ["lc_vwy", "lc_vwy"], "uhungarumlaut-cy" : ["lc_vwy", "lc_vwy"], "chedieresis-cy" : ["lc_che", "lc_ShortStem"], "ghedescender-cy" : ["lc_ge", "lc_ge"], "yerudieresis-cy" : ["lc_i", "lc_ShortStem"], "hahook-cy" : ["lc_x", "lc_x"], "komide-cy" : ["lc_Round", "lc_LongStem"], "reversedze-cy" : ["", "lc_c"], "elhook-cy" : ["lc_el", "lc_StemHook"], "we-cy" : ["lc_vwy", ""], "pedescender-cy" : ["lc_ShortStem", "lc_ShoulderTooth"], "shhadescender-cy" : ["lc_LongStem", "lc_ShoulderTooth"], "ishorttail-cy" : ["lc_i", "lc_StemTooth"], "ertick-cy" : ["lc_er", ""], "enghe-cy" : ["lc_ShortStem", ""], "tetse-cy" : ["lc_te", ""], "aie-cy" : ["lc_Round", "lc_e"] } groupsMS = { "colon" : ["MSC_colon", "MSC_colon"], "comma" : ["MSC_period", "MSC_period"], "ellipsis" : ["MSC_period", "MSC_period"], "period" : ["MSC_period", "MSC_period"], "exclam" : ["MSC_exclam", "MSC_eclam"], "exclamdouble" : ["MSC_exclam", "MSC_eclam"], "quotedbl" : ["MSC_VertQuote", "MSC_VertQuote"], "quotesingle" : ["MSC_VertQuote", "MSC_VertQuote"], "semicolon" : ["MSC_colon", "MSC_colon"], "slash" : ["MSC_slash", "MSC_slash"], "braceleft" : ["", "MSC_bracketleft"], "braceright" : ["MSC_bracketright", ""], "bracketleft" : ["", "MSC_bracketleft"], "bracketright" : ["MSC_bracketright", ""], "parenleft" : ["", "MSC_bracketleft"], "parenright" : ["MSC_bracketright", ""], "emdash" : ["MSC_dash", "MSC_dash"], "endash" : ["MSC_dash", "MSC_dash"], "hyphen" : ["MSC_dash", "MSC_dash"], "horizontalbar" : ["MSC_dash", "MSC_dash"], "hyphentwo" : ["MSC_dash", "MSC_dash"], "softhyphen" : ["MSC_dash", "MSC_dash"], "guillemetleft" : ["MSC_guillemetleft", "MSC_guillemetleft"], "guillemetright" : ["MSC_guillemetright", "MSC_guillemetright"], "guilsinglleft" : ["MSC_guillemetleft", "MSC_guillemetleft"], "guilsinglright" : ["MSC_guillemetright", "MSC_guillemetright"], "quotedblbase" : ["MSC_period", "MSC_period"], "quotedblleft" : ["MSC_quoteleft", "MSC_quoteleft"], "quotedblright" : ["MSC_quoteright", "MSC_quoteright"], "quoteleft" : ["MSC_quoteleft", "MSC_quoteleft"], "quoteright" : ["MSC_quoteright", "MSC_quoteright"], "quotesinglbase" : ["MSC_period", "MSC_period"], "questiongreek" : ["MSC_colon", "MSC_colon"], "space" : ["MSC_space", "MSC_space"], "nbspace" : ["MSC_space", "MSC_space"], "divide" : ["MSC_minus", "MSC_minus"], "equal" : ["MSC_equal", "MSC_equal"], "greater" : ["", "MSC_minus"], "less" : ["MSC_minus", ""], "minus" : ["MSC_minus", "MSC_minus"], "notequal" : ["MSC_equal", "MSC_equal"], "percent" : ["MSC_percent", ""], "perthousand" : ["MSC_percent", ""], "plus" : ["MSC_minus", "MSC_minus"] } thisFont.disableUpdateInterface() # suppresses UI updates in Font View isNeeded = {} for glyph in thisFont.glyphs: isNeeded[glyph.name] = False if self.w.radioButton.get() == 1: for layer in thisFont.selectedLayers: isNeeded[layer.parent.name] = True else: for glyph in thisFont.glyphs: isNeeded[glyph.name] = True for key in groupsUC: if thisFont.glyphs[key] and isNeeded[key]: thisFont.glyphs[key].setLeftKerningGroup_(groupsUC[key][0]) thisFont.glyphs[key].setRightKerningGroup_(groupsUC[key][1]) if thisFont.glyphs[key.lower()+".sc"] and isNeeded[key]: thisFont.glyphs[key.lower()+".sc"].setLeftKerningGroup_(re.sub("UC_", "SC_",groupsUC[key][0])) thisFont.glyphs[key.lower()+".sc"].setRightKerningGroup_(re.sub("UC_", "SC_",groupsUC[key][1])) elif thisFont.glyphs[key.lower()+".smcp"] and isNeeded[key]: thisFont.glyphs[key.lower()+".smcp"].setLeftKerningGroup_(re.sub("UC_", "SC_",groupsUC[key][0])) thisFont.glyphs[key.lower()+".smcp"].setRightKerningGroup_(re.sub("UC_", "SC_",groupsUC[key][1])) if sender == self.w.allcapButton: if thisFont.glyphs[key] and isNeeded[key]: try: thisFont.glyphs[key.lower()].setLeftKerningGroup_(groupsUC[key][0]) thisFont.glyphs[key.lower()].setRightKerningGroup_(groupsUC[key][1]) except: print key.lower() for key in groupsMS: if thisFont.glyphs[key] and isNeeded[key]: thisFont.glyphs[key].setLeftKerningGroup_(groupsMS[key][0]) thisFont.glyphs[key].setRightKerningGroup_(groupsMS[key][1]) if (thisFont.glyphs[key.lower()+".case"] or thisFont.glyphs[key.lower()+".smcp"]) and isNeeded[key]: thisFont.glyphs[key].setLeftKerningGroup_(re.sub("MSC_", "MSC_UC_",groupsMS[key][0])) thisFont.glyphs[key].setRightKerningGroup_(re.sub("MSC_", "MSC_UC_",groupsMS[key][1])) if sender == self.w.normalButton: for key in groupsLCnormal: if thisFont.glyphs[key] and isNeeded[key]: thisFont.glyphs[key].setLeftKerningGroup_(groupsLCnormal[key][0]) thisFont.glyphs[key].setRightKerningGroup_(groupsLCnormal[key][1]) elif sender == self.w.cursiveButton: for key in groupsLCcursive: if thisFont.glyphs[key] and isNeeded[key]: thisFont.glyphs[key].setLeftKerningGroup_(groupsLCcursive[key][0]) thisFont.glyphs[key].setRightKerningGroup_(groupsLCcursive[key][1]) thisFont.enableUpdateInterface() # re-enables UI updates in Font View self.w.close() # delete if you want window to stay open except Exception, e: # brings macro window to front and reports error: Glyphs.showMacroWindow() print " SetKernPairsMain Error: %s" % e
2
Example 14
View licensedef _create_helpers_module(): def to_str(val): """Convert value into string. Return '' if val is None. ex. >>> to_str(None) '' >>> to_str("foo") 'foo' >>> to_str(u"\u65e5\u672c\u8a9e") u'\u65e5\u672c\u8a9e' >>> to_str(123) '123' """ if val is None: return '' if isinstance(val, str): return val if isinstance(val, unicode): return val return str(val) def generate_tostrfunc(encoding): """Generate 'to_str' function which encodes unicode to str. ex. import tenjin from tenjin.helpers import escape to_str = tenjin.generate_tostrfunc('utf-8') engine = tenjin.Engine() context = { 'items': [u'AAA', u'BBB', u'CCC'] } output = engine.render('example.pyhtml') print output """ def to_str(val): if val is None: return '' if isinstance(val, str): return val if isinstance(val, unicode): return val.encode(encoding) return str(val) return to_str def echo(string): """add string value into _buf. this is equivarent to '#{string}'.""" frame = sys._getframe(1) context = frame.f_locals context['_buf'].append(string) def start_capture(varname=None): """ start capturing with name. ex. list.rbhtml <html><body> <?py start_capture('itemlist') ?> <ul> <?py for item in list: ?> <li>${item}</li> <?py #end ?> </ul> <?py stop_capture() ?> </body></html> ex. layout.rbhtml <html xml:lang="en" lang="en"> <head> <title>Capture Example</title> </head> <body> <!-- content --> #{itemlist} <!-- /content --> </body> </html> """ frame = sys._getframe(1) context = frame.f_locals context['_buf_tmp'] = context['_buf'] context['_capture_varname'] = varname context['_buf'] = [] def stop_capture(store_to_context=True): """ stop capturing and return the result of capturing. if store_to_context is True then the result is stored into _context[varname]. """ frame = sys._getframe(1) context = frame.f_locals result = ''.join(context['_buf']) context['_buf'] = context.pop('_buf_tmp') varname = context.pop('_capture_varname') if varname: context[varname] = result if store_to_context: context['_context'][varname] = result return result def captured_as(name): """ helper method for layout template. if captured string is found then append it to _buf and return True, else return False. """ frame = sys._getframe(1) context = frame.f_locals if context.has_key(name): _buf = context['_buf'] _buf.append(context[name]) return True return False def _p(arg): """ex. '/show/'+_p("item['id']") => "/show/#{item['id']}" """ return '<`#%s#`>' % arg # decoded into #{...} by preprocessor def _P(arg): """ex. '<b>%s</b>' % _P("item['id']") => "<b>${item['id']}</b>" """ return '<`$%s$`>' % arg # decoded into ${...} by preprocessor def _decode_params(s): """decode <`#...#`> and <`$...$`> into #{...} and ${...}""" from urllib import unquote dct = { 'lt':'<', 'gt':'>', 'amp':'&', 'quot':'"', '#039':"'", } def unescape(s): #return s.replace('<', '<').replace('>', '>').replace('"', '"').replace(''', "'").replace('&', '&') return re.sub(r'&(lt|gt|quot|amp|#039);', lambda m: dct[m.group(1)], s) s = re.sub(r'%3C%60%23(.*?)%23%60%3E', lambda m: '#{%s}' % unquote(m.group(1)), s) s = re.sub(r'%3C%60%24(.*?)%24%60%3E', lambda m: '${%s}' % unquote(m.group(1)), s) s = re.sub(r'<`#(.*?)#`>', lambda m: '#{%s}' % unescape(m.group(1)), s) s = re.sub(r'<`\$(.*?)\$`>', lambda m: '${%s}' % unescape(m.group(1)), s) s = re.sub(r'<`#(.*?)#`>', r'#{\1}', s) s = re.sub(r'<`\$(.*?)\$`>', r'${\1}', s) return s mod = _create_module('tenjin.helpers') mod.to_str = to_str mod.generate_tostrfunc = generate_tostrfunc mod.echo = echo mod.start_capture = start_capture mod.stop_capture = stop_capture mod.captured_as = captured_as mod._p = _p mod._P = _P mod._decode_params = _decode_params mod.__all__ = ['escape', 'to_str', 'echo', 'generate_tostrfunc', 'start_capture', 'stop_capture', 'captured_as', '_p', '_P', '_decode_params', ] return mod
2
Example 15
View licensedef fanart(item): logger.info("pelisalacarta.peliculasdk fanart") itemlist = [] url = item.url data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) title= scrapertools.get_match(data,'<title>Ver Película(.*?) \(') title= re.sub(r"3D|SBS|-|","",title) title= title.replace('á','a') title= title.replace('Á','A') title= title.replace('é','e') title= title.replace('í','i') title= title.replace('ó','o') title= title.replace('ú','u') title= title.replace('ñ','n') title= title.replace('Crepusculo','Twilight') title= title.replace(' ','%20') url="http://api.themoviedb.org/3/search/movie?api_key="+Tmdb_key+"&query=" + title + "&language=es&include_adult=false" data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) patron = '"page":1.*?,"id":(.*?),.*?"backdrop_path":"\\\(.*?)"' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)==0: extra=item.thumbnail show= item.thumbnail posterdb = item.thumbnail fanart_info = item.thumbnail fanart_trailer = item.thumbnail category= item.thumbnail itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.thumbnail ,extra=extra, show=show, category= category, folder=True) ) else: for id, fan in matches: try: posterdb = scrapertools.get_match(data,'"page":1,.*?"poster_path":"\\\(.*?)"') posterdb = "https://image.tmdb.org/t/p/original" + posterdb except: posterdb = item.thumbnail fanart="https://image.tmdb.org/t/p/original" + fan item.extra= fanart url ="http://api.themoviedb.org/3/movie/"+id+"/images?api_key="+Tmdb_key data = scrapertools.cachePage( url ) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) patron = '"backdrops".*?"file_path":".*?",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches) == 0: patron = '"backdrops".*?"file_path":"(.*?)",.*?"file_path":"(.*?)",.*?"file_path":"(.*?)"' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches) == 0: fanart_info = item.extra fanart_trailer = item.extra fanart_2 = item.extra for fanart_info, fanart_trailer, fanart_2 in matches: fanart_info = "https://image.tmdb.org/t/p/original" + fanart_info fanart_trailer = "https://image.tmdb.org/t/p/original" + fanart_trailer fanart_2 = "https://image.tmdb.org/t/p/original" + fanart_2 #fanart_2 y arts url ="http://webservice.fanart.tv/v3/movies/"+id+"?api_key=dffe90fba4d02c199ae7a9e71330c987" data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) patron = '"hdmovielogo":.*?"url": "([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) if '"moviedisc"' in data: disc = scrapertools.get_match(data,'"moviedisc":.*?"url": "([^"]+)"') if '"movieposter"' in data: poster = scrapertools.get_match(data,'"movieposter":.*?"url": "([^"]+)"') if '"moviethumb"' in data: thumb = scrapertools.get_match(data,'"moviethumb":.*?"url": "([^"]+)"') if '"moviebanner"' in data: banner= scrapertools.get_match(data,'"moviebanner":.*?"url": "([^"]+)"') if len(matches)==0: extra= posterdb show = fanart_2 category = item.extra itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=posterdb, fanart=item.extra, extra=extra, show=show, category= category, folder=True) ) for logo in matches: if '"hdmovieclearart"' in data: clear=scrapertools.get_match(data,'"hdmovieclearart":.*?"url": "([^"]+)"') if '"moviebackground"' in data: extra=clear show= fanart_2 if '"moviedisc"' in data: category= disc else: category= clear itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show, category= category, folder=True) ) else: extra= clear show=fanart_2 if '"moviedisc"' in data: category = disc else: category = clear itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show, category= category, folder=True) ) if '"moviebackground"' in data: if '"hdmovieclearart"' in data: clear=scrapertools.get_match(data,'"hdmovieclearart":.*?"url": "([^"]+)"') extra=clear show= fanart_2 if '"moviedisc"' in data: category= disc else: category= clear else: extra=logo show= fanart_2 if '"moviedisc"' in data: category= disc else: category= logo itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra, extra=extra,show=show, category= category, folder=True) ) if not '"hdmovieclearart"' in data and not '"moviebackground"' in data: extra= logo show= fanart_2 if '"moviedisc"' in data: category= disc else: category= item.extra itemlist.append( Item(channel=item.channel, title = item.title , action="findvideos", url=item.url, server="torrent", thumbnail=logo, fanart=item.extra,category= category, extra=extra,show=show , folder=True) ) title ="Info" if posterdb == item.thumbnail: if '"movieposter"' in data: thumbnail= poster else: thumbnail = item.thumbnail else: thumbnail = posterdb title = title.replace(title,bbcode_kodi2html("[COLOR skyblue]"+title+"[/COLOR]")) itemlist.append( Item(channel=item.channel, action="info" , title=title , url=item.url, thumbnail=posterdb, fanart=fanart_info, extra = extra, show = show,folder=False )) title= bbcode_kodi2html("[COLOR crimson]Trailer[/COLOR]") if len(item.extra)==0: fanart=item.thumbnail else: fanart = item.extra if '"moviethumb"' in data: thumbnail = thumb else: thumbnail = posterdb if '"moviebanner"' in data: extra= banner else: if 'hdmovieclearart"' in data: extra = clear else: extra = posterdb itemlist.append( Item(channel=item.channel, action="trailer", title=title , url=item.url , thumbnail=thumbnail , fulltitle = item.title , fanart=fanart_trailer, extra=extra, plot = item.plot,folder=True) ) return itemlist
2
Example 16
View licensedef handle(self, connection): P2pproxy.logger.debug('Handling request') hostport = connection.headers['Host'] query = urlparse.urlparse(connection.path).query self.params = urlparse.parse_qs(query) if connection.reqtype == 'channels': # /channels/ branch if len(connection.splittedpath) == 3 and connection.splittedpath[2].split('?')[ 0] == 'play': # /channels/play?id=[id] channel_id = self.get_param('id') if not channel_id: # /channels/play?id=&_=[epoch timestamp] is Torrent-TV widget proxy check # P2pProxy simply closes connection on this request sending Server header, so do we if self.get_param('_'): P2pproxy.logger.debug('Status check') connection.send_response(200) connection.send_header('Access-Control-Allow-Origin', '*') connection.send_header('Connection', 'close') connection.send_header('Content-Type', 'text/plain;charset=utf-8') connection.send_header('Server', 'P2pProxy/1.0.3.1 AceProxy') connection.wfile.write('\r\n') return else: connection.dieWithError() # Bad request return stream_url = None session = TorrentTvApi.auth(config.email, config.password) stream_type, stream = TorrentTvApi.stream_source(session, channel_id) if stream_type == 'torrent': stream_url = re.sub('^(http.+)$', lambda match: '/torrent/' + urllib2.quote(match.group(0), '') + '/stream.mp4', stream) elif stream_type == 'contentid': stream_url = re.sub('^([0-9a-f]{40})', lambda match: '/pid/' + urllib2.quote(match.group(0), '') + '/stream.mp4', stream) connection.path = stream_url connection.splittedpath = stream_url.split('/') connection.reqtype = connection.splittedpath[1].lower() connection.handleRequest(False) elif self.get_param('type') == 'm3u': # /channels/?filter=[filter]&group=[group]&type=m3u connection.send_response(200) connection.send_header('Content-Type', 'application/x-mpegurl') connection.end_headers() param_group = self.get_param('group') param_filter = self.get_param('filter') if not param_filter: param_filter = 'all' # default filter session = TorrentTvApi.auth(config.email, config.password) translations_list = TorrentTvApi.translations(session, param_filter) playlistgen = PlaylistGenerator() P2pproxy.logger.debug('Generating requested m3u playlist') for channel in translations_list: group_id = channel.getAttribute('group') if param_group and param_group != 'all' and param_group != group_id: # filter channels by group continue name = channel.getAttribute('name') group = TorrentTvApi.CATEGORIES[int(group_id)].decode('utf-8') cid = channel.getAttribute('id') logo = channel.getAttribute('logo') if config.fullpathlogo: logo = 'http://torrent-tv.ru/uploads/' + logo fields = {'name': name, 'id': cid, 'url': cid, 'group': group, 'logo': logo} fields['tvgid'] = config.tvgid %fields playlistgen.addItem(fields) P2pproxy.logger.debug('Exporting') header = '#EXTM3U url-tvg="%s" tvg-shift=%d\n' %(config.tvgurl, config.tvgshift) exported = playlistgen.exportm3u(hostport=hostport, header=header) exported = exported.encode('utf-8') connection.wfile.write(exported) else: # /channels/?filter=[filter] param_filter = self.get_param('filter') if not param_filter: param_filter = 'all' # default filter session = TorrentTvApi.auth(config.email, config.password) translations_list = TorrentTvApi.translations(session, param_filter, True) P2pproxy.logger.debug('Exporting') connection.send_response(200) connection.send_header('Access-Control-Allow-Origin', '*') connection.send_header('Connection', 'close') connection.send_header('Content-Length', str(len(translations_list))) connection.send_header('Content-Type', 'text/xml;charset=utf-8') connection.end_headers() connection.wfile.write(translations_list) elif connection.reqtype == 'xbmc.pvr': # same as /channels request if len(connection.splittedpath) == 3 and connection.splittedpath[2] == 'playlist': session = TorrentTvApi.auth(config.email, config.password) translations_list = TorrentTvApi.translations(session, 'all', True) P2pproxy.logger.debug('Exporting') connection.send_response(200) connection.send_header('Access-Control-Allow-Origin', '*') connection.send_header('Connection', 'close') connection.send_header('Content-Length', str(len(translations_list))) connection.send_header('Content-Type', 'text/xml;charset=utf-8') connection.end_headers() connection.wfile.write(translations_list) elif connection.reqtype == 'archive': # /archive/ branch if len(connection.splittedpath) == 3 and connection.splittedpath[2] == 'channels': # /archive/channels session = TorrentTvApi.auth(config.email, config.password) archive_channels = TorrentTvApi.archive_channels(session, True) P2pproxy.logger.debug('Exporting') connection.send_response(200) connection.send_header('Access-Control-Allow-Origin', '*') connection.send_header('Connection', 'close') connection.send_header('Content-Length', str(len(archive_channels))) connection.send_header('Content-Type', 'text/xml;charset=utf-8') connection.end_headers() connection.wfile.write(archive_channels) if len(connection.splittedpath) == 3 and connection.splittedpath[2].split('?')[ 0] == 'play': # /archive/play?id=[record_id] record_id = self.get_param('id') if not record_id: connection.dieWithError() # Bad request return stream_url = None session = TorrentTvApi.auth(config.email, config.password) stream_type, stream = TorrentTvApi.archive_stream_source(session, record_id) if stream_type == 'torrent': stream_url = re.sub('^(http.+)$', lambda match: '/torrent/' + urllib2.quote(match.group(0), '') + '/stream.mp4', stream) elif stream_type == 'contentid': stream_url = re.sub('^([0-9a-f]{40})', lambda match: '/pid/' + urllib2.quote(match.group(0), '') + '/stream.mp4', stream) connection.path = stream_url connection.splittedpath = stream_url.split('/') connection.reqtype = connection.splittedpath[1].lower() connection.handleRequest(False) elif self.get_param('type') == 'm3u': # /archive/?type=m3u&date=[param_date]&channel_id=[param_channel] connection.send_response(200) connection.send_header('Content-Type', 'application/x-mpegurl') connection.end_headers() param_date = self.get_param('date') if not param_date: d = date.today() # consider default date as today if not given else: try: param_date = param_date.split('-') d = date(param_date[2], param_date[1], param_date[0]) except IndexError: P2pproxy.logger.error('date param is not correct!') connection.dieWithError() return param_channel = self.get_param('channel_id') if param_channel == '' or not param_channel: P2pproxy.logger.error('Got /archive/ request but no channel_id specified!') connection.dieWithError() return session = TorrentTvApi.auth(config.email, config.password) records_list = TorrentTvApi.records(session, param_channel, d.strftime('%d-%m-%Y')) channels_list = TorrentTvApi.archive_channels(session) playlistgen = PlaylistGenerator() P2pproxy.logger.debug('Generating archive m3u playlist') for record in records_list: record_id = record.getAttribute('record_id') name = record.getAttribute('name') channel_id = record.getAttribute('channel_id') channel_name = '' logo = '' for channel in channels_list: if channel.getAttribute('channel_id') == channel_id: channel_name = channel.getAttribute('name') logo = channel.getAttribute('logo') if channel_name != '': name = '(' + channel_name + ') ' + name if logo != '' and config.fullpathlogo: logo = 'http://torrent-tv.ru/uploads/' + logo playlistgen.addItem({'name': name, 'url': record_id, 'logo': logo}) P2pproxy.logger.debug('Exporting') exported = playlistgen.exportm3u(hostport, empty_header=True, archive=True) exported = exported.encode('utf-8') connection.wfile.write(exported) else: # /archive/?date=[param_date]&channel_id=[param_channel] param_date = self.get_param('date') if not param_date: d = date.today() else: try: param_date = param_date.split('-') d = date(param_date[2], param_date[1], param_date[0]) except IndexError: P2pproxy.logger.error('date param is not correct!') connection.dieWithError() return param_channel = self.get_param('channel_id') if param_channel == '' or not param_channel: P2pproxy.logger.error('Got /archive/ request but no channel_id specified!') connection.dieWithError() return session = TorrentTvApi.auth(config.email, config.password) records_list = TorrentTvApi.records(session, param_channel, d.strftime('%d-%m-%Y'), True) P2pproxy.logger.debug('Exporting') connection.send_response(200) connection.send_header('Access-Control-Allow-Origin', '*') connection.send_header('Connection', 'close') connection.send_header('Content-Length', str(len(records_list))) connection.send_header('Content-Type', 'text/xml;charset=utf-8') connection.end_headers() connection.wfile.write(records_list)
2
Example 17
View licensedef update_foreign_fields(old_id, node): dry_run = '--dry' in sys.argv logger.info('* Updating ForeignFields for node {}->{}'.format(old_id, node)) bns_owner = list(database['boxnodesettings'].find({'owner': old_id})) if bns_owner: logger.info('** Updating {} BoxNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in bns_owner])) for doc in bns_owner: database['boxnodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) bus_og = list(database['boxusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if bus_og: logger.info('** Updating {} BoxUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in bus_og])) for doc in bus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['boxusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) advns_o = list(database['addondataversenodesettings'].find({'owner': old_id})) if advns_o: logger.info('** Updating {} AddonDataverseNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in advns_o])) for doc in advns_o: database['addondataversenodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) advus_og = list(database['addondataverseusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if advus_og: logger.info('** Updating {} AddonDataverseUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in advus_og])) for doc in advus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['addondataverseusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) dbns_o = list(database['dropboxnodesettings'].find({'owner': old_id})) if dbns_o: logger.info('** Updating {} DropboxNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in dbns_o])) for doc in dbns_o: database['dropboxnodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) dbus_og = list(database['dropboxusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if dbus_og: logger.info('** Updating {} DropboxUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in dbus_og])) for doc in dbus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['dropboxusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) afsns_o = list(database['addonfigsharenodesettings'].find({'owner': old_id})) if afsns_o: logger.info('** Updating {} AddonFigShareNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in afsns_o])) for doc in afsns_o: database['addonfigsharenodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) ## Figshare has no oauth_grants fwns_o = list(database['forwardnodesettings'].find({'owner': old_id})) if fwns_o: logger.info('** Updating {} ForwardNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in fwns_o])) for doc in fwns_o: database['forwardnodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) ghns_o = list(database['githubnodesettings'].find({'owner': old_id})) if ghns_o: logger.info('** Updating {} GithubNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in ghns_o])) for doc in ghns_o: database['githubnodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) ghus_og = list(database['githubusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if ghus_og: logger.info('** Updating {} GithubUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in ghus_og])) for doc in ghus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['githubusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) gdns_o = list(database['googledrivenodesettings'].find({'owner': old_id})) if gdns_o: logger.info('** Updating {} GoogleDriveNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in gdns_o])) for doc in gdns_o: database['googledrivenodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) gdus_og = list(database['googledriveusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if gdus_og: logger.info('** Updating {} GoogleDriveUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in gdus_og])) for doc in gdus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['googledriveusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) mns_o = list(database['mendeleynodesettings'].find({'owner': old_id})) if mns_o: logger.info('** Updating {} MendeleyNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in mns_o])) for doc in mns_o: database['mendeleynodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) mus_og = list(database['mendeleyusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if mus_og: logger.info('** Updating {} MendeleyUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in mus_og])) for doc in mus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['mendeleyusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) osfsns_o = list(database['osfstoragenodesettings'].find({'owner': old_id})) if osfsns_o: logger.info('** Updating {} OsfStorageNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in osfsns_o])) for doc in osfsns_o: database['osfstoragenodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) ocns_o = list(database['addonowncloudnodesettings'].find({'owner': old_id})) if ocns_o: logger.info('** Updating {} AddonOwnCloudNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in ocns_o])) for doc in ocns_o: database['addonowncloudnodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) ocus_og = list(database['addonowncloudusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if ocus_og: logger.info('** Updating {} AddonOwnCloudUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in ocus_og])) for doc in ocus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['addonowncloudusersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) s3ns_o = list(database['s3nodesettings'].find({'owner': old_id})) if s3ns_o: logger.info('** Updating {} s3NodeSettings (owner) {}'.format(old_id, [d['_id'] for d in s3ns_o])) for doc in s3ns_o: database['s3nodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) s3us_og = list(database['s3usersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if s3us_og: logger.info('** Updating {} S3UserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in s3us_og])) for doc in s3us_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['s3usersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) awns_o = list(database['addonwikinodesettings'].find({'owner': old_id})) if awns_o: logger.info('** Updating {} AddonWikiNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in awns_o])) for doc in awns_o: database['addonwikinodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) nwp_n = list(database['nodewikipage'].find({'node': old_id})) if nwp_n: logger.info('** Updating {} NodeWikiPage (node) {}'.format(old_id, [d['_id'] for d in nwp_n])) for doc in nwp_n: database['nodewikipage'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': node._id }} ) zns_o = list(database['zoteronodesettings'].find({'owner': old_id})) if zns_o: logger.info('** Updating {} ZoteroNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in zns_o])) for doc in zns_o: database['zoteronodesettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'owner': node._id }} ) zus_og = list(database['zoterousersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}})) if zus_og: logger.info('** Updating {} ZoteroUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in zus_og])) for doc in zus_og: og = doc['oauth_grants'] og[node._id] = og.pop(old_id) database['zoterousersettings'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'oauth_grants': og }} ) aj_sn = list(database['archivejob'].find({'src_node': old_id})) if aj_sn: logger.info('** Updating {} ArchiveJobs (src_node) {}'.format(old_id, [d['_id'] for d in aj_sn])) for doc in aj_sn: database['archivejob'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'src_node': node._id }} ) tfn_n = list(database['trashedfilenode'].find({'node': old_id})) if tfn_n: logger.info('** Updating {} TrashedFileNodes (node) {}'.format(old_id, [d['_id'] for d in tfn_n])) for doc in tfn_n: del_on = doc.pop('deleted_on') # Remove non-JSON-serializable datetime fields last_touch = doc.pop('last_touched') hist_mods = [doc['history'][doc['history'].index(h)].pop('modified') for h in doc['history']] replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc))) for i, mod in enumerate(hist_mods): replacement['history'][i]['modified'] = mod database['trashedfilenode'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': replacement['node'], 'history': replacement['history'] }} ) sfn_n = list(database['storedfilenode'].find({'node': old_id})) if sfn_n: logger.info('** Updating {} StoredFileNodes (node) {}'.format(old_id, [d['_id'] for d in sfn_n])) for doc in sfn_n: doc.pop('last_touched') # Remove non-JSON-serializable datetime fields hist_mods = [doc['history'][doc['history'].index(h)].pop('modified') for h in doc['history']] replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc))) for i, mod in enumerate(hist_mods): replacement['history'][i]['modified'] = mod database['storedfilenode'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': replacement['node'], 'history': replacement['history'] }} ) com_n = list(database['comment'].find({'node': old_id})) if com_n: logger.info('** Updating {} Comments (node) {}'.format(old_id, [d['_id'] for d in com_n])) for doc in com_n: database['comment'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': node._id }} ) com_t = list(database['comment'].find({'target': {'$in': [old_id]}})) if com_t: logger.info('** Updating {} Comments (target) {}'.format(old_id, [d['_id'] for d in com_t])) for doc in com_t: targ = doc['target'] targ.insert(targ.index(old_id), node._id) targ.remove(old_id) database['comment'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'target': targ }} ) com_t = list(database['comment'].find({'root_target': {'$in': [old_id]}})) if com_t: logger.info('** Updating {} Comments (root_target) {}'.format(old_id, [d['_id'] for d in com_t])) for doc in com_t: rtarg = doc['root_target'] rtarg.insert(rtarg.index(old_id), node._id) rtarg.remove(old_id) database['comment'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'root_target': rtarg }} ) nl_on = list(database['nodelog'].find({'original_node': old_id})) if nl_on: logger.info('** Updating {} NodeLogs (original_node) {}'.format(old_id, [d['_id'] for d in nl_on])) for doc in nl_on: database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'original_node': node._id }} ) nl_n = list(database['nodelog'].find({'node': old_id})) if nl_n: logger.info('** Updating {} NodeLogs (node) {}'.format(old_id, [d['_id'] for d in nl_n])) for doc in nl_n: database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': node._id }} ) nl_pac = list(database['nodelog'].find({'params.auth.callback_url': {'$regex': '/{}/'.format(old_id)}})) if nl_pac: logger.info('** Updating {} NodeLogs (params.auth.callback_url) {}'.format(old_id, [d['_id'] for d in nl_pac])) for doc in nl_pac: params = doc['params'] params['auth']['callback_url'] = params['auth']['callback_url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_pn = list(database['nodelog'].find({'params.node': old_id})) if nl_pn: logger.info('** Updating {} NodeLogs (params.node) {}'.format(old_id, [d['_id'] for d in nl_pn])) for doc in nl_pn: params = doc['params'] params['node'] = node._id database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_ppar = list(database['nodelog'].find({'params.parent': old_id})) if nl_ppar: logger.info('** Updating {} NodeLogs (params.parent) {}'.format(old_id, [d['_id'] for d in nl_ppar])) for doc in nl_ppar: params = doc['params'] params['parent'] = node._id database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_ppro = list(database['nodelog'].find({'params.project': old_id})) if nl_ppro: logger.info('** Updating {} NodeLogs (params.project) {}'.format(old_id, [d['_id'] for d in nl_ppro])) for doc in nl_ppro: params = doc['params'] params['project'] = node._id database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_ppn = list(database['nodelog'].find({'params.parent_node': old_id})) if nl_ppn: logger.info('** Updating {} NodeLogs (params.parent_node) {}'.format(old_id, [d['_id'] for d in nl_ppn])) for doc in nl_ppn: params = doc['params'] params['parent_node'] = node._id database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_pdn = list(database['nodelog'].find({'params.destination.nid': old_id})) if nl_pdn: logger.info('** Updating {} NodeLogs (params.destination.nid) {}'.format(old_id, [d['_id'] for d in nl_pdn])) for doc in nl_pdn: params = doc['params'] params['destination']['nid'] = node._id if params['destination'].get('url', None): params['destination']['url'] = params['destination']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_pdr = list(database['nodelog'].find({'params.destination.resource': old_id})) if nl_pdr: logger.info('** Updating {} NodeLogs (params.destination.resource) {}'.format(old_id, [d['_id'] for d in nl_pdr])) for doc in nl_pdr: params = doc['params'] params['destination']['resource'] = node._id database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_pdni = list(database['nodelog'].find({'params.destination.node._id': old_id})) if nl_pdni: logger.info('** Updating {} NodeLogs (params.destination.node._id) {}'.format(old_id, [d['_id'] for d in nl_pdni])) for doc in nl_pdni: params = doc['params'] params['destination']['node']['_id'] = node._id if params['destination']['node'].get('url', None): params['destination']['node']['url'] = params['destination']['node']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_ppi = list(database['nodelog'].find({'params.pointer.id': old_id})) if nl_ppi: logger.info('** Updating {} NodeLogs (params.pointer.id) {}'.format(old_id, [d['_id'] for d in nl_ppi])) for doc in nl_ppi: params = doc['params'] params['pointer']['id'] = node._id if params['pointer'].get('url', None): params['pointer']['url'] = params['pointer']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_psn = list(database['nodelog'].find({'params.source.nid': old_id})) if nl_psn: logger.info('** Updating {} NodeLogs (params.source.nid) {}'.format(old_id, [d['_id'] for d in nl_psn])) for doc in nl_psn: params = doc['params'] params['source']['nid'] = node._id if params['source'].get('url', None): params['source']['url'] = params['source']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_psni = list(database['nodelog'].find({'params.source.node._id': old_id})) if nl_psni: logger.info('** Updating {} NodeLogs (params.source.node._id) {}'.format(old_id, [d['_id'] for d in nl_psni])) for doc in nl_psni: params = doc['params'] params['source']['node']['_id'] = node._id if params['source']['node'].get('url', None): params['source']['node']['url'] = params['source']['node']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_psr = list(database['nodelog'].find({'params.source.resource': old_id})) if nl_psr: logger.info('** Updating {} NodeLogs (params.source.resource) {}'.format(old_id, [d['_id'] for d in nl_psr])) for doc in nl_psr: params = doc['params'] params['source']['resource'] = node._id database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_ptni = list(database['nodelog'].find({'params.template_node._id': old_id})) if nl_ptni: logger.info('** Updating {} NodeLogs (params.template_node._id) {}'.format(old_id, [d['_id'] for d in nl_ptni])) for doc in nl_ptni: params = doc['params'] params['template_node']['_id'] = node._id if params['template_node'].get('url', None): params['template_node']['url'] = params['template_node']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) nl_pud = list(database['nodelog'].find({'params.urls.download': {'$regex': '/{}/'.format(old_id)}})) if nl_pud: logger.info('** Updating {} NodeLogs (params.source.node._id) {}'.format(old_id, [d['_id'] for d in nl_pud])) for doc in nl_pud: params = doc['params'] params['urls']['download'] = params['urls']['download'].replace('{}/'.format(old_id), '{}/'.format(node._id)) if params['urls'].get('view', None): params['urls']['view'] = params['urls']['view'].replace('{}/'.format(old_id), '{}/'.format(node._id)) database['nodelog'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'params': params }} ) ptr_n = list(database['pointer'].find({'node': old_id})) if ptr_n: logger.info('** Updating {} Pointers (node) {}'.format(old_id, [d['_id'] for d in ptr_n])) for doc in ptr_n: database['pointer'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': node._id }} ) n_ff = list(database['node'].find({'forked_from': old_id})) if n_ff: logger.info('** Updating {} Nodes (forked_from) {}'.format(old_id, [d['_id'] for d in n_ff])) for doc in n_ff: database['node'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'forked_from': node._id }} ) n_rf = list(database['node'].find({'registered_from': old_id})) if n_rf: logger.info('** Updating {} Nodes (registered_from) {}'.format(old_id, [d['_id'] for d in n_rf])) for doc in n_rf: database['node'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'registered_from': node._id }} ) n_root = list(database['node'].find({'root': old_id})) if n_root: logger.info('** Updating {} Nodes (root) {}'.format(old_id, [d['_id'] for d in n_root])) for doc in n_root: database['node'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'root': node._id }} ) n_par = list(database['node'].find({'parent': old_id})) if n_par: logger.info('** Updating {} Nodes (parent) {}'.format(old_id, [d['_id'] for d in n_par])) for doc in n_par: database['node'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'parent': node._id }} ) n_cns = list(database['node'].find({'$where': 'if (this.child_node_subscriptions!==undefined){{var keys=Object.keys(this.child_node_subscriptions);for(var i=0;i<keys.length;i+=1){{if(this.child_node_subscriptions[keys[i]].indexOf("{}")!==-1){{return true}}}}}}return false;'.format(old_id)})) if n_cns: docs = list(n_cns) logger.info('** Updating {} Nodes (child_node_subscriptions) {}'.format(old_id, [d['_id'] for d in docs])) for doc in docs: if doc['_id'] in cns_dict_to_update: cns = cns_dict_to_update[doc['_id']] else: cns = doc['child_node_subscriptions'] replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(cns))) cns_dict_to_update[doc['_id']] = replacement database['node'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'child_node_subscriptions': replacement }} ) nd_nl = list(database['notificationdigest'].find({'node_lineage': {'$in': [old_id]}})) if nd_nl: logger.info('** Updating {} NotificationDigest (node_lineage) {}'.format(old_id, [d['_id'] for d in nd_nl])) for doc in nd_nl: nl = doc['node_lineage'] nl.insert(nl.index(old_id), node._id) nl.remove(old_id) if doc['message'].find('/{}/'.format(old_id)) != -1: # avoid html regexes message = doc['message'].replace('/{}/'.format(old_id), '/{}/'.format(node._id)) database['notificationdigest'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'message': message, 'node_lineage': nl }} ) else: database['notificationdigest'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node_lineage': nl }} ) ns_i = list(database['notificationsubscription'].find({'_id': {'$regex': old_id}})) if ns_i: logger.info('** Updating {} NotificationSubscription (_id, owner) {}'.format(old_id, [d['_id'] for d in ns_i])) for doc in ns_i: replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc))) new_id = replacement.pop('_id') database['notificationsubscription'].find_and_modify( {'_id': new_id}, {'$set':replacement}, upsert=True ) database['notificationsubscription'].remove({'_id': doc['_id']}) u_uc = list(database['user'].find({'unclaimed_records.{}'.format(old_id): {'$ne': None}})) if u_uc: logger.info('** Updating {} Users (unclaimed_records) {}'.format(old_id, [d['_id'] for d in u_uc])) for doc in u_uc: ucr = doc['unclaimed_records'] ucr[node._id] = ucr.pop(old_id) database['user'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'unclaimed_records': ucr }} ) u_caer = list(database['user'].find({'contributor_added_email_records.{}'.format(old_id): {'$ne': None}})) if u_caer: logger.info('** Updating {} Users (contributor_added_email_records) {}'.format(old_id, [d['_id'] for d in u_caer])) for doc in u_caer: caer = doc['contributor_added_email_records'] caer[node._id] = caer.pop(old_id) database['user'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'contributor_added_email_records': caer }} ) u_nc = list(database['user'].find({'notifications_configured.{}'.format(old_id): {'$ne': None}})) if u_nc: logger.info('** Updating {} Users (notifications_configured) {}'.format(old_id, [d['_id'] for d in u_nc])) for doc in u_nc: nc = doc['notifications_configured'] nc[node._id] = nc.pop(old_id) database['user'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'notifications_configured': nc }} ) u_cvt = list(database['user'].find({'comments_viewed_timestamp.{}'.format(old_id): {'$ne': None}})) if u_cvt: logger.info('** Updating {} Users (comments_viewed_timestamp) {}'.format(old_id, [d['_id'] for d in u_cvt])) for doc in u_cvt: nc = doc['comments_viewed_timestamp'] nc[node._id] = nc.pop(old_id) database['user'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'comments_viewed_timestamp': nc }} ) pc_i = list(database['pagecounters'].find({'_id': {'$regex': ':{}:'.format(old_id)}})) if pc_i: logger.info('** Updating {} PageCounters (_id) {}'.format(old_id, [d['_id'] for d in pc_i])) for doc in pc_i: replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc))) new_id = replacement.pop('_id') database['pagecounters'].find_and_modify( {'_id': new_id}, {'$set':replacement}, upsert=True ) database['pagecounters'].remove({'_id': doc['_id']}) ss_dv = list(database['session'].find({'data.visited': {'$regex': ':{}:'.format(old_id)}})) if ss_dv: logger.info('** Updating {} Session (data) {}'.format(old_id, [d['_id'] for d in ss_dv])) for doc in ss_dv: repl_data = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc['data']))) database['session'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'data': repl_data }} ) wc_n = list(database['watchconfig'].find({'node': old_id})) if wc_n: logger.info('** Updating {} WatchConfigs (node) {}'.format(old_id, [d['_id'] for d in wc_n])) for doc in wc_n: database['watchconfig'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': node._id }} ) pl_n = list(database['privatelink'].find({'nodes': old_id})) if pl_n: logger.info('** Updating {} PrivateLinks (nodes) {}'.format(old_id, [d['_id'] for d in pl_n])) for d in pl_n: new_nodes = d['nodes'] new_nodes.remove(old_id) new_nodes.append(node._id) database['privatelink'].find_and_modify( {'_id': d['_id']}, {'$set':{ 'nodes': new_nodes }} ) dr_bf = list(database['draftregistration'].find({'branched_from': old_id})) if dr_bf: logger.info('** Updating {} DraftRegistrations (branched_from) {}'.format(old_id, [d['_id'] for d in dr_bf])) for doc in dr_bf: database['draftregistration'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'branched_from': node._id }} ) dr_rn = list(database['draftregistration'].find({'registered_node': old_id})) if dr_rn: logger.info('** Updating {} DraftRegistrations (registered_node) {}'.format(old_id, [d['_id'] for d in dr_rn])) for doc in dr_rn: database['draftregistration'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'registered_node': node._id }} ) eta_er = list(database['embargoterminationapproval'].find({'embargoed_registration': old_id})) if eta_er: logger.info('** Updating {} EmbargoTerminationApprovals (embargoed_registration) {}'.format(old_id, [d['_id'] for d in eta_er])) for doc in eta_er: database['embargoterminationapproval'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'embargoed_registration': node._id }} ) ra_su = list(database['registrationapproval'].find({'$where': 'var keys=Object.keys(this.stashed_urls);for(var i=0;i<keys.length;i+=1){{if(this.stashed_urls[keys[i]].view.indexOf("{}")!==-1){{return true}}if(this.stashed_urls[keys[i]].approve.indexOf("{}")!==-1){{return true}}if(this.stashed_urls[keys[i]].reject.indexOf("{}")!==-1){{return true}}}}return false;'.format(old_id, old_id, old_id)})) if ra_su: logger.info('** Updating {} RegistrationApprovals (stashed_urls) {}'.format(old_id, [d['_id'] for d in ra_su])) for doc in ra_su: updated_stash = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc['stashed_urls']))) database['registrationapproval'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'stashed_urls': updated_stash }} ) idf_r = list(database['identifier'].find({'referent': old_id})) if idf_r: logger.info('** Updating {} Identifiers (referent) {}'.format(old_id, [d['_id'] for d in idf_r])) for doc in idf_r: ref = doc['referent'] ref[1] = 'preprintservice' database['identifier'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'referent': ref }} ) qm_dn = list(database['queuedmail'].find({'data.nid': old_id})) if qm_dn: logger.info('** Updating {} QueuedMails (data.nid) {}'.format(old_id, [d['_id'] for d in qm_dn])) for doc in qm_dn: repl_data = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc['data']))) database['queuedmail'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'data': repl_data }} ) ps_n = list(database['preprintservice'].find({'node': old_id})) if ps_n: logger.info('** Updating {} PreprintServices (node) {}'.format(old_id, [d['_id'] for d in ps_n])) for doc in ps_n: database['preprintservice'].find_and_modify( {'_id': doc['_id']}, {'$set':{ 'node': node._id }} )
2
Example 18
View licensedef main(argv=None): if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: r_table2scatter.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take from table. Choices are 'all', 'all-but-first' or a ','-separated list of columns.") parser.add_option("--logscale", dest="logscale", type="string", help="log-transform one or both axes [default=%Default].") parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string", help="write hardcopy to file [default=%default].", metavar="FILE") parser.add_option("-f", "--file", dest="input_filename", type="string", help="filename with table data [default=%default].", metavar="FILE") parser.add_option("-2", "--file2", dest="input_filename2", type="string", help="additional data file [default=%default].", metavar="FILE") parser.add_option("-s", "--stats", dest="statistics", type="choice", choices=("correlation", "spearman", "pearson", "count"), help="statistical quantities to compute [default=%default]", action="append") parser.add_option("-p", "--plot", dest="plot", type="choice", choices=("scatter", "pairs", "panel", "bar", "bar-stacked", "bar-besides", "1_vs_x", "matched", "boxplot", "scatter+marginal", "scatter-regression"), help="plots to plot [default=%default]", action="append") parser.add_option("-t", "--threshold", dest="threshold", type="float", help="min threshold to use for counting method [default=%default].") parser.add_option("-o", "--colours", dest="colours", type="int", help="column with colour information [default=%default].") parser.add_option("-l", "--plot-labels", dest="labels", type="string", help="column labels for x and y in matched plots [default=%default].") parser.add_option("-d", "--add-diagonal", dest="add_diagonal", action="store_true", help="add diagonal to plot [default=%default].") parser.add_option("-e", "--plot-legend", dest="legend", type="int", help="column with legend [default=%default].") parser.add_option("-r", "--options", dest="r_options", type="string", help="R plotting options [default=%default].") parser.add_option("--format", dest="format", type="choice", choices=("full", "sparse"), help="output format [default=%default].") parser.add_option("--title", dest="title", type="string", help="""plot title [default=%default].""") parser.add_option("", "--xrange", dest="xrange", type="string", help="x viewing range of plot [default=%default].") parser.add_option("", "--yrange", dest="yrange", type="string", help="y viewing range of plot[default=%default].") parser.add_option("--allow-empty-file", dest="fail_on_empty", action="store_false", help="do not fail on empty input [default=%default].") parser.add_option("--fail-on-empty", dest="fail_on_empty", action="store_true", help="fail on empty input [default=%default].") parser.set_defaults( hardcopy=None, input_filename="", input_filename2=None, columns="all", logscale=None, statistics=[], plot=[], threshold=0.0, labels="x,y", colours=None, diagonal=False, legend=None, title=None, xrange=None, yrange=None, r_options="", fail_on_empty=True, format="full") (options, args) = E.Start(parser) if len(args) == 1 and not options.input_filename: options.input_filename = args[0] if options.columns not in ("all", "all-but-first"): options.columns = [int(x) - 1 for x in options.columns.split(",")] if options.colours: options.colours -= 1 if options.legend: options.legend -= 1 table = {} headers = [] # read data matrix if options.input_filename: lines = IOTools.openFile(options.input_filename, "r").readlines() else: # note: this will not work for interactive viewing, but # creating hardcopy plots works. lines = sys.stdin.readlines() lines = [x for x in lines if x[0] != "#"] if len(lines) == 0: if options.fail_on_empty: raise IOError("no input") E.warn("empty input") E.Stop() return matrix, headers, colours, legend = readTable(lines, "matrix", take_columns=options.columns, headers=True, colours=options.colours, row_names=options.legend) if options.input_filename2: # read another matrix (should be of the same format. matrix2, headers2, colours2, legend2 = readTable( lines, "matrix2", take_columns=options.columns, headers=True, colours=options.colours, row_names=options.legend) R.assign("headers", headers) ndata = R("""length( matrix[,1] )""")[0] if options.loglevel >= 1: options.stdlog.write("# read matrix: %ix%i\n" % (len(headers), ndata)) if colours: R.assign("colours", colours) for method in options.statistics: if method == "correlation": cor = R.cor(matrix, use="pairwise.complete.obs") writeMatrix(sys.stdout, cor, headers=headers, format="%5.2f") elif method == "pearson": options.stdout.write("\t".join(("var1", "var2", "coeff", "passed", "pvalue", "n", "method", "alternative")) + "\n") for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): try: result = R( """cor.test( matrix[,%i], matrix[,%i] )""" % (x + 1, y + 1)) except rpy.RPyException as msg: E.warn("correlation not computed for columns %i(%s) and %i(%s): %s" % ( x, headers[x], y, headers[y], msg)) options.stdout.write("%s\t%s\t%s\t%s\t%s\t%i\t%s\t%s\n" % (headers[x], headers[y], "na", "na", "na", 0, "na", "na")) else: options.stdout.write( "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" % (headers[x], headers[y], result.rx2('estimate').rx2( 'cor')[0], Stats.getSignificance( float(result.rx2('p.value')[0])), result.rx2('p.value')[0], result.rx2('parameter').rx2( 'df')[0], result.rx2('method')[0], result.rx2('alternative')[0])) elif method == "spearman": options.stdout.write("\t".join(("var1", "var2", "coeff", "passed", "pvalue", "method", "alternative")) + "\n") for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): result = R( """cor.test( matrix[,%i], matrix[,%i], method='spearman')""" % (x + 1, y + 1)) options.stdout.write( "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" % (headers[x], headers[y], result['estimate']['rho'], Stats.getSignificance(float(result['p.value'])), result['p.value'], result['parameter']['df'], result['method'], result['alternative'])) elif method == "count": # number of shared elements > threshold m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"), take=options.columns, headers=True) mask = numpy.greater(m, options.threshold) counts = numpy.dot(numpy.transpose(mask), mask) writeMatrix(options.stdout, counts, headers=c, format="%i") if options.plot: # remove columns that are completely empty if "pairs" in options.plot: colsums = R('''colSums( is.na(matrix ))''') take = [x for x in range(len(colsums)) if colsums[x] != ndata] if take: E.warn("removing empty columns %s before plotting" % str(take)) matrix = R.subset(matrix, select=[x + 1 for x in take]) R.assign("""matrix""", matrix) headers = [headers[x] for x in take] if legend: legend = [headers[x] for x in take] if options.r_options: extra_options = ", %s" % options.r_options else: extra_options = "" if options.legend is not None and len(legend): extra_options += ", legend=c('%s')" % "','".join(legend) if options.labels: xlabel, ylabel = options.labels.split(",") extra_options += ", xlab='%s', ylab='%s'" % (xlabel, ylabel) else: xlabel, ylabel = "", "" if options.colours: extra_options += ", col=colours" if options.logscale: extra_options += ", log='%s'" % options.logscale if options.xrange: extra_options += ", xlim=c(%f,%f)" % tuple( map(float, options.xrange.split(","))) if options.yrange: extra_options += ", ylim=c(%f,%f)" % tuple( map(float, options.yrange.split(","))) if options.hardcopy: if options.hardcopy.endswith(".eps"): R.postscript(options.hardcopy) elif options.hardcopy.endswith(".png"): R.png(options.hardcopy, width=1024, height=768, type="cairo") elif options.hardcopy.endswith(".jpg"): R.jpg(options.hardcopy, width=1024, height=768, type="cairo") for method in options.plot: if ndata < 100: point_size = "1" pch = "o" elif ndata < 1000: point_size = "1" pch = "o" else: point_size = "0.5" pch = "." if method == "scatter": R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" % ( point_size, extra_options)) if method == "scatter-regression": R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" % ( point_size, extra_options)) dat = R( """dat <- data.frame(x = matrix[,1], y = matrix[,2])""") R( """new <- data.frame(x = seq( min(matrix[,1]), max(matrix[,1]), (max(matrix[,1]) - min(matrix[,1])) / 100))""") mod = R("""mod <- lm( y ~ x, dat)""") R("""predict(mod, new, se.fit = TRUE)""") R("""pred.w.plim <- predict(mod, new, interval="prediction")""") R("""pred.w.clim <- predict(mod, new, interval="confidence")""") R( """matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""") R.mtext( "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], mod["coefficients"][ "(Intercept)"], R("""cor( dat )[2]"""), ndata), 3, cex=1.0) elif method == "pairs": if options.add_diagonal: R( """panel.hist <- function( x,y,... ) { points(x,y,...); abline(0,1); }""") else: R( """panel.hist <- function( x,y,... ) { points(x,y,...); }""") # There used to be a argument na_action="na.omit", but # removed this as there appeared error messages saying # "na.action is not a graphical parameter" and the # plots showed occasionally the wrong scale. # cex=point_size also caused trouble (error message: # "X11 used font size 8 when 2 was requested" or # similar) if options.colours: R.pairs(matrix, pch=pch, col=colours, main=options.title, panel="panel.hist", labels=headers, cex_labels=2.0) else: R.pairs(matrix, pch=pch, panel="panel.hist", main=options.title, labels=headers, cex_labels=2.0) elif method == "boxplot": extra_options += ",main='%s'" % options.title # set vertical orientation if max([len(x) for x in headers]) > 40 / len(headers): # remove xlabel: extra_options = re.sub(", xlab='[^']+'", "", extra_options) extra_options += ", names.arg=headers, las=2" R( """op <- par(mar=c(11,4,4,2))""") # the 10 allows the names.arg below the barplot R("""boxplot( matrix %s)""" % extra_options) elif method == "bar" or method == "bar-stacked": if not options.colours: extra_options += ", col=rainbow(5)" # set vertical orientation if max([len(x) for x in headers]) > 40 / len(headers): # remove xlabel: extra_options = re.sub(", xlab='[^']+'", "", extra_options) extra_options += ", names.arg=headers, las=2" R( """op <- par(mar=c(11,4,4,2))""") # the 10 allows the names.arg below the barplot R("""barplot(as.matrix(matrix), %s)""" % extra_options) elif method == "bar-besides": if not options.colours: extra_options += ", col=rainbow(%i)" % ndata # set vertical orientation if max([len(x) for x in headers]) > 40 / len(headers): # remove xlabel: extra_options = re.sub(", xlab='[^']+'", "", extra_options) extra_options += ", names.arg=headers, las=2" R( """op <- par(mar=c(11,4,4,2))""") # the 10 allows the names.arg below the barplot R("""barplot(as.matrix(matrix), beside=TRUE %s)""" % extra_options) elif method == "scatter+marginal": if options.title: # set the size of the outer margins - the title needs to be added at the end # after plots have been created R.par(oma=R.c(0, 0, 4, 0)) R("""matrix""") R(""" x <- matrix[,1]; y <- matrix[,2]; xhist <- hist(x, breaks=20, plot=FALSE); yhist <- hist(y, breaks=20, plot=FALSE); top <- max(c(xhist$counts, yhist$counts)); nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE ); par(mar=c(3,3,1,1)) ; plot(x, y, cex=%s, pch="o" %s) ; par(mar=c(0,3,1,1)) ; barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ; par(mar=c(3,0,1,1)) ; title(main='%s'); barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE ) ; title(main='%s'); """ % (point_size, extra_options, xlabel, ylabel)) if options.title: R.mtext(options.title, 3, outer=True, line=1, cex=1.5) elif method in ("panel", "1_vs_x", "matched"): if method == "panel": pairs = [] for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): pairs.append((x, y)) elif method == "1_vs_x": pairs = [] for x in range(1, len(headers)): pairs.append((0, x)) # print matching columns elif method == "matched": pairs = [] for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): if headers[x] == headers[y]: pairs.append((x, y)) break w = int(math.ceil(math.sqrt(len(pairs)))) h = int(math.ceil(float(len(pairs)) / w)) PosInf = 1e300000 NegInf = -1e300000 xlabel, ylabel = options.labels.split(",") R("""layout(matrix(seq(1,%i), %i, %i, byrow = TRUE))""" % (w * h, w, h)) for a, b in pairs: new_matrix = [x for x in zip( list(matrix[a].values())[0], list(matrix[b].values())[0]) if x[0] not in (float("nan"), PosInf, NegInf) and x[1] not in (float("nan"), PosInf, NegInf)] try: R("""plot(matrix[,%i], matrix[,%i], main='%s versus %s', cex=0.5, pch=".", xlab='%s', ylab='%s' )""" % ( a + 1, b + 1, headers[b], headers[a], xlabel, ylabel)) except rpy.RException as msg: print("could not plot %s versus %s: %s" % (headers[b], headers[a], msg)) if options.hardcopy: R['dev.off']() E.info("matrix added as >matrix< in R.") if not options.hardcopy: if options.input_filename: interpreter = code.InteractiveConsole(globals()) interpreter.interact() else: E.info( "can not start new interactive session as input has come from stdin.") E.Stop()
2
Example 19
View licensedef main(argv=None): ''' Process the command line arguments and create the JSON dump. :param argv: List of arguments, as if specified on the command-line. If None, ``sys.argv[1:]`` is used instead. :type argv: list of str ''' # Get command line arguments parser = argparse.ArgumentParser( description="Transfer all projects/repositories from GitLab to Stash. \ Note: This script assumes you have your SSH key \ registered with both GitLab and Stash.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve') parser.add_argument('gitlab_url', help='The full URL to your GitLab instance.') parser.add_argument('stash_url', help='The full URL to your Stash instance.') parser.add_argument('-p', '--password', help='The password to use to authenticate if token is \ not specified. If password and token are both \ unspecified, you will be prompted to enter a \ password.') parser.add_argument('-P', '--page_size', help='When retrieving result from GitLab, how many \ results should be included in a given page?.', type=int, default=20) parser.add_argument('-s', '--verify_ssl', help='Enable SSL certificate verification', action='store_true') parser.add_argument('-S', '--skip_existing', help='Do not update existing repositories and just \ skip them.', action='store_true') parser.add_argument('-t', '--token', help='The private GitLab API token to use for \ authentication. Either this or username and \ password must be set.') parser.add_argument('-u', '--username', help='The username to use for authentication, if token\ is unspecified.') parser.add_argument('-v', '--verbose', help='Print more status information. For every ' + 'additional time this flag is specified, ' + 'output gets more verbose.', default=0, action='count') parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(__version__)) args = parser.parse_args(argv) args.page_size = max(100, args.page_size) # Convert verbose flag to actually logging level log_levels = [logging.WARNING, logging.INFO, logging.DEBUG] log_level = log_levels[min(args.verbose, 2)] # Make warnings from built-in warnings module get formatted more nicely logging.captureWarnings(True) logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' + '%(message)s'), level=log_level) # Setup authenticated GitLab and Stash instances if args.token: git = GitLab(args.gitlab_url, token=args.token, verify_ssl=args.verify_ssl) else: git = None if not args.username: print('Username: ', end="", file=sys.stderr) args.username = input('').strip() if not args.password: args.password = getpass.getpass('Password: ') stash = stashy.connect(args.stash_url, args.username, args.password) if git is None: git = GitLab(args.gitlab_url, verify_ssl=args.verify_ssl) git.login(args.username, args.password) print('Retrieving existing Stash projects...', end="", file=sys.stderr) sys.stderr.flush() key_set = {proj['key'] for proj in stash.projects} stash_project_names = {proj['name'] for proj in stash.projects} names_to_keys = {proj['name']: proj['key'] for proj in stash.projects} print('done', file=sys.stderr) sys.stderr.flush() updated_projects = set() repo_to_slugs = {} failed_to_clone = set() cwd = os.getcwd() transfer_count = 0 skipped_count = 0 print('Processing GitLab projects...', file=sys.stderr) sys.stderr.flush() for project in gen_all_results(git.getallprojects, per_page=args.page_size): print('\n' + ('=' * 80) + '\n', file=sys.stderr) sys.stderr.flush() proj_name = project['namespace']['name'] # Create Stash project if it doesn't already exist if proj_name not in stash_project_names: # Create Stash project key key = proj_name if key.islower(): key = key.title() key = re.sub(r'[^A-Z]', '', key) if len(key) < 2: key = re.sub(r'[^A-Za-z]', '', proj_name)[0:2].upper() added = False suffix = 65 while key in key_set: if not added: key += 'A' else: suffix += 1 key = key[:-1] + chr(suffix) key_set.add(key) # Actually add the project to Stash print('Creating Stash project "%s" with key %s...' % (proj_name, key), end="", file=sys.stderr) sys.stderr.flush() stash.projects.create(key, proj_name) names_to_keys[proj_name] = key stash_project_names.add(proj_name) print('done', file=sys.stderr) sys.stderr.flush() else: key = names_to_keys[proj_name] stash_project = stash.projects[key] # Initialize maping from repository names to slugs for later if key not in repo_to_slugs: repo_to_slugs[key] = {repo['name']: repo['slug'] for repo in stash_project.repos} # Create Stash-compatible name for repository # Repository names are limited to 128 characters. # They must start with a letter or number and may contain spaces, # hyphens, underscores and periods repo_name = project['name'] if not repo_name[0].isalnum(): repo_name = 'A ' + repo_name repo_name = re.sub(r'[^A-Za-z0-9 _.-]', ' ', repo_name) if len(repo_name) > 128: repo_name = repo_name[0:128] # Add repository to Stash project if it's not already there if repo_name not in repo_to_slugs[key]: print('Creating Stash repository "%s" in project "%s"...' % (repo_name, proj_name), end="", file=sys.stderr) sys.stderr.flush() stash_repo = stash_project.repos.create(repo_name) repo_to_slugs[key][repo_name] = stash_repo['slug'] print('done', file=sys.stderr) sys.stderr.flush() elif args.skip_existing: print('Skipping existing Stash repository "%s" in project "%s"' % (repo_name, proj_name), file=sys.stderr) sys.stderr.flush() skipped_count += 1 continue else: print('Updating existing Stash repository "%s" in project "%s"' % (repo_name, proj_name), file=sys.stderr) sys.stderr.flush() repo_slug = repo_to_slugs[key][repo_name] stash_repo = stash_project.repos[repo_slug].get() for clone_link in stash_repo['links']['clone']: if clone_link['name'] == 'ssh': stash_repo_url = clone_link['href'] break with tempfile.TemporaryDirectory() as temp_dir: # Clone repository to temporary directory print('\nCloning GitLab repository...', file=sys.stderr) sys.stderr.flush() try: subprocess.check_call(['git', 'clone', '--mirror', project['ssh_url_to_repo'], temp_dir]) except subprocess.CalledProcessError: print('Failed to clone GitLab repository. This usually when ' + 'it does not exist.', file=sys.stderr) failed_to_clone.add(project['name_with_namespace']) skipped_count += 1 continue os.chdir(temp_dir) # Check that repository is not empty try: subprocess.check_call(['git', 'log', '--format=oneline', '-1'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: print('Repository is empty, so skipping push to Stash.', file=sys.stderr) skipped_count += 1 else: # Change remote to Stash and push print('\nPushing repository to Stash...', file=sys.stderr) sys.stderr.flush() subprocess.check_call(['git', 'remote', 'set-url', 'origin', stash_repo_url]) subprocess.check_call(['git', 'push', '--mirror']) transfer_count += 1 os.chdir(cwd) updated_projects.add(proj_name) print('\n' + ('=' * 35) + 'SUMMARY' + ('=' * 35), file=sys.stderr) print('{} repositories transferred.\n'.format(transfer_count), file=sys.stderr) print('{} repositories skipped.\n'.format(skipped_count), file=sys.stderr) print('Projects created/updated:', file=sys.stderr) for proj in sorted(updated_projects): print('\t' + proj, file=sys.stderr) print('Repositories that we could not clone:', file=sys.stderr) for repo_name in sorted(failed_to_clone): print('\t' + repo_name, file=sys.stderr)
2
Example 20
View licensedef on_task_input(self, task, config): config = self.prepare_config(config) urlconfig = {} urlappend = "?" entries = [] if config['unwatched_only'] and config['section'] != 'recentlyViewedShows' and config['section'] != 'all': urlconfig['unwatched'] = '1' if config['username'] and config['password']: accesstoken = self.plex_get_accesstoken(config) log.debug("Got accesstoken: %s" % accesstoken) urlconfig['X-Plex-Token'] = accesstoken for key in urlconfig: urlappend += '%s=%s&' % (key, urlconfig[key]) if not self.plex_section_is_int(config['section']): try: path = "/library/sections/" r = requests.get("http://%s:%d%s%s" % (config['plexserver'], config['port'], path, urlappend)) except requests.RequestException as e: raise plugin.PluginError('Error retrieving source: %s' % e) dom = parseString(r.text.encode("utf-8")) for node in dom.getElementsByTagName('Directory'): if node.getAttribute('title') == config['section']: config['section'] = int(node.getAttribute('key')) if not self.plex_section_is_int(config['section']): raise plugin.PluginError('Could not find section \'%s\'' % config['section']) log.debug("Fetching http://%s:%d/library/sections/%s/%s%s" % (config['server'], config['port'], config['section'], config['selection'], urlappend)) try: path = "/library/sections/%s/%s" % (config['section'], config['selection']) r = requests.get("http://%s:%d%s%s" % (config['plexserver'], config['port'], path, urlappend)) except requests.RequestException as e: raise plugin.PluginError('There is no section with number %d. (%s)' % (config['section'], e)) dom = parseString(r.text.encode("utf-8")) plexsectionname = dom.getElementsByTagName('MediaContainer')[0].getAttribute('title1') viewgroup = dom.getElementsByTagName('MediaContainer')[0].getAttribute('viewGroup') log.debug("Plex section \"%s\" is a \"%s\" section" % (plexsectionname, viewgroup)) if viewgroup != "movie" and viewgroup != "show" and viewgroup != "episode": raise plugin.PluginError("Section is neither a movie nor tv show section!") domroot = "Directory" titletag = "title" if viewgroup == "episode": domroot = "Video" titletag = "grandparentTitle" thumbtag = "thumb" arttag = "art" seasoncovertag = "parentThumb" covertag = "grandparentThumb" elif viewgroup == "movie": domroot = "Video" titletag = "title" arttag = "art" seasoncovertag = "thumb" covertag = "thumb" if config['fetch'] == "thumb": raise plugin.PluginError("Movie sections does not have any thumbnails to download!") for node in dom.getElementsByTagName(domroot): e = Entry() e['plex_server'] = config['plexserver'] e['plex_port'] = config['port'] e['plex_section'] = config['section'] e['plex_section_name'] = plexsectionname e['plex_episode_thumb'] = '' title = node.getAttribute(titletag) if config['strip_year']: title = re.sub(r'^(.*)\(\d{4}\)(.*)', r'\1\2', title) if config['strip_parens']: title = re.sub(r'\(.*?\)', r'', title) title = title.strip() if config['strip_non_alpha']: title = re.sub(r'[\(\)]', r'', title) title = re.sub(r'&', r'And', title) title = re.sub(r'[^A-Za-z0-9- \']', r'', title) if config['lowercase_title']: title = title.lower() if viewgroup == "show": e['title'] = title e['url'] = 'NULL' entries.append(e) # show ends here. continue e['plex_art'] = "http://%s:%d%s%s" % (config['server'], config['port'], node.getAttribute(arttag), urlappend) e['plex_cover'] = "http://%s:%d%s%s" % (config['server'], config['port'], node.getAttribute(covertag), urlappend) e['plex_season_cover'] = "http://%s:%d%s%s" % (config['server'], config['port'], node.getAttribute(seasoncovertag), urlappend) if viewgroup == "episode": e['plex_thumb'] = "http://%s:%d%s%s" % ( config['server'], config['port'], node.getAttribute('thumb'), urlappend) e['series_name'] = title season = int(node.getAttribute('parentIndex')) if node.getAttribute('parentIndex') == node.getAttribute('year'): season = node.getAttribute('originallyAvailableAt') filenamemap = "%s_%s%s_%s_%s_%s.%s" episode = "" e['series_id_type'] = 'date' e['series_date'] = season elif node.getAttribute('index'): episode = int(node.getAttribute('index')) filenamemap = "%s_%02dx%02d_%s_%s_%s.%s" e['series_season'] = season e['series_episode'] = episode e['series_id_type'] = 'ep' e['series_id'] = 'S%02dE%02d' % (season, episode) else: log.debug("Could not get episode number for '%s' (Hint, ratingKey: %s)" % (title, node.getAttribute('ratingKey'))) break elif viewgroup == "movie": filenamemap = "%s_%s_%s_%s.%s" e['plex_duration'] = node.getAttribute('duration') e['plex_summary'] = node.getAttribute('summary') e['plex_userrating'] = node.getAttribute('userrating') e['plex_key'] = node.getAttribute('ratingKey') count = node.getAttribute('viewCount') offset = node.getAttribute('viewOffset') if count: e['plex_status'] = "seen" elif offset: e['plex_status'] = "inprogress" else: e['plex_status'] = "unwatched" for media in node.getElementsByTagName('Media'): vcodec = media.getAttribute('videoCodec') acodec = media.getAttribute('audioCodec') if config['fetch'] == "file" or not config['fetch']: container = media.getAttribute('container') else: container = "jpg" resolution = media.getAttribute('videoResolution') + "p" for part in media.getElementsByTagName('Part'): if config['fetch'] == "file" or not config['fetch']: key = part.getAttribute('key') elif config['fetch'] == "art": key = node.getAttribute(arttag) elif config['fetch'] == "cover": key = node.getAttribute(arttag) elif config['fetch'] == "season_cover": key = node.getAttribute(seasoncovertag) elif config['fetch'] == "thumb": key = node.getAttribute(thumbtag) # key = part.getAttribute('key') duration = part.getAttribute('duration') e['plex_title'] = title if config['original_filename']: filename, fileext = os.path.splitext(basename(part.getAttribute('file'))) if config['fetch'] != 'file': filename += ".jpg" else: filename = "%s.%s" % (filename, fileext) else: if viewgroup == "episode": filename = filenamemap % (title.replace(" ", "."), season, episode, resolution, vcodec, acodec, container) title = filename elif viewgroup == "movie": filename = filenamemap % (title.replace(" ", "."), resolution, vcodec, acodec, container) e['plex_url'] = "http://%s:%d%s%s" % (config['server'], config['port'], key, urlappend) e['plex_path'] = key e['url'] = "http://%s:%d%s%s" % (config['server'], config['port'], key, urlappend) e['plex_duration'] = duration e['filename'] = filename e['title'] = title if key == "": log.debug("Could not find anything in PMS to download. Next!") else: entries.append(e) return entries
2
Example 21
View licensedef build_annotated_tgm(closest_gene_output,distance_to_tss,logistic_score_output,fasta_file,motif_ids,makeWindow=True,tgm_file='',do_pkl=True): ''' Takes existing tgm, and maps to gene names and TF ids within a specific window ''' from chipsequtil import Fasta ##get fasta file events, since these are columns in the logistic_score matrix seq_ids=Fasta.load(fasta_file,key_func=lambda x: x) ##need to get sequence mids in the order they are processed ##in the file, this is the index into the score_output file ##. ASSUMES GALAXY-formatted FASTA!!!! seq_mids=[] ##list of FASTA regions, in their appropriate order in the file filtered_events={}##gene name of closest gene to event within window for k in seq_ids.keys(): vals=k.split(';') if len(vals)==1: vals=k.split() if ':' in vals[0]: #bed tools used chr,range=vals[0].split(':') low,high=range.split('-') mid=str(int(low)+((int(high)-int(low))/2)) seq_mids.append(chr+':'+mid) elif 'random' not in vals[0]: #galaxy tools used genome,chr,low,high,strand=vals[0].split('_') mid=str(int(low)+((int(high)-int(low))/2)) seq_mids.append(chr+':'+mid) if len(vals)==3: filtered_events[chr+':'+mid]=vals[2] print 'Found %d events, of which %d have gene names'%(len(seq_mids),len(filtered_events)) ##this next section relies on xls ##filter events that are within distance from closest_gene_output to get gene mapping ## filtered_fc={}##FC of events within window, in case we want to use in the future event_indexes=[] ## # ###open the closest_gene_output and determine # try: # cgo=open(closest_gene_output,'rU').readlines() # except: # print "Error opening file:", sys.exc_info()[0] # print "Check to make sure file exists at %s"%(closest_gene_output) # raise # inds=cgo[0].strip().split('\t') # for row in cgo[1:]: # arr=row.strip().split('\t') # if 'geneSymbol' in inds: #this is true if we used an xref file # gene=arr[inds.index('geneSymbol')] # # mid=arr[2]+':'+str(int(arr[3])+(int(arr[4])-int(arr[3]))/2) # else: #otherwise we just gene id # gene=arr[inds.index('knownGeneID')] # #position mapping is different # if 'Position' in inds: #this is for GPS # mid='chr'+arr[inds.index('Position')] # elif 'chrom' in inds: #this is for BED # mid=arr[inds.index('chrom')]+':'+str(int(arr[inds.index('chromStart')])+(int(arr[inds.index('chromEnd')])-int(arr[inds.index('chromStart')]))/2) # else: #this is for MACS # mid=arr[inds.index('chr')]+':'+str(int(arr[inds.index('start')])+(int(arr[inds.index('end')])-int(arr[inds.index('start')]))/2) # #print gene,mid # dist=arr[inds.index('dist from feature')] # try: # sv=arr[inds.index('score')] # except: # try: # sv=arr[inds.index('IPvsCTR')] # except: # fc=0.0 # if sv!='': # fc=float(sv) # else: # next # #check absolute distance if we're doing a window, or negative distance if we're looking upstream # if distance_to_tss=='' or (makeWindow and np.absolute(int(dist))<int(distance_to_tss)) or int(dist)>(-1*int(distance_to_tss)): # # filtered_events[mid]=gene #(this was out of if clause, should it be there?) 1/2 # if mid in seq_mids: # event_indexes.append(seq_mids.index(mid))##index into fasta file value/maps to array # ##UPDATE: moved these to within if clause - so that unrelated scores are not included # filtered_events[mid]=gene ##gene name of event # filtered_fc[mid]=float(fc) ##fc value of event # # filtered_fc[mid]=float(fc) #see above, 2/2 # print 'Got '+str(len(filtered_events))+' per-gene events within '+distance_to_tss+' bp window out of '+str(len(cgo)) # print 'These map to '+str(len(event_indexes))+' regions in the FASTA file' ##get gene ids, or just use mid of sequence region gene_names=[t for t in set(filtered_events.values())] print gene_names[0:10] #get gene ids for all matrices list loaded in mi_files=motif_ids.split(',') if len(mi_files)>0: #open first motif name file that contains names for each element in TAMO file all_tf_names=[a.strip() for a in open(mi_files[0],'rU').readlines()] if len(mi_files)>1: #if we have additional files, check to see if if names already exist for i,f in enumerate(mi_files): if i==0: next try: #open file and read in extra ids newfs=[a.strip() for a in open(f,'rU').readlines()] except: print "Error opening file:", sys.exc_info()[0] print "Check to make sure file exists at %s"%(f) raise if len(newfs)==len(all_tf_names): #combine existing tf names with these with . delimiter.... all_tf_names=['.'.join((a,b)) for a,b in zip(all_tf_names,newfs)] ##now go through and clean up TF names cleaned_tf_names=[] for i,a in enumerate(all_tf_names): tfn=set([b for b in a.split('.') if '$' not in b and b!='']) if(len(tfn)==0): tfn=a.split('.') # else: # print 'Replacing %s with %s'%(a,'.'.join(tfn)) cleaned_tf_names.append('.'.join(tfn)) all_tf_names=cleaned_tf_names #print len(cleaned_tf_names) ##now actually map events to scores ##load motif matrix scanning output that maps matrices to regions print 'Loading complete motif score file...' event_scores=np.loadtxt(logistic_score_output) print '\t...Loaded!' #create new tgm matrix with approriate file name newmat=np.zeros((len(all_tf_names),len(gene_names)),dtype='float')##fill in gene length),dtype='float') if makeWindow: distance_to_tss=distance_to_tss+'_bpWindow' else: distance_to_tss=distance_to_tss+'_bpUpstream' if tgm_file=='': tgm_file=re.sub('.txt','_'+distance_to_tss+'.tgm',os.path.basename(logistic_score_output)) if do_pkl: pkl_file=re.sub('.tgm','.pkl',tgm_file) else: pkl_file='' ##sort event indexes from seq_mids that are in the filtered_events file event_indexes.sort() #populate matrix with greatest score attributed to that gene/tf combo for ind,arr in enumerate(event_scores): ##name of matrix/motif mat=all_tf_names[ind] #tfnames=[mat] ##here we enumerate which sequences were mapped to a gene within the window for k,val in enumerate(seq_mids):#k in event_indexes: #here we want the event midpoint for the index # val=seq_mids[k] #get score for that index score=arr[k] #now map it to closest gene for that midpoint cg=filtered_events[val] fc=1.0 ##update this if we want to normalize score by fold change score=float(score)*float(fc) ##this should do nothing sine fcgenerally =1 #if len(tfnames)==1: curscore=newmat[all_tf_names.index(mat),gene_names.index(cg)] ##updated to include maximum score!! if np.abs(score)>np.abs(curscore): newmat[all_tf_names.index(mat),gene_names.index(cg)]=score #else: # for t in tfnames: # curscore=newmat[all_tf_names.index(t),gene_names.index(cg)] # ##updated to include maximum score!! # if np.abs(float(score))>np.abs(curscore): # newmat[all_tf_names.index(t),gene_names.index(cg)]=float(score) ###save these intermediate files for debugging purposes np.savetxt(tgm_file,newmat) gin=re.sub('.tgm','_geneids.txt',tgm_file) tin=re.sub('.tgm','_tfids.txt',tgm_file) try: open(gin,'w').writelines([g+'\n' for g in gene_names]) open(tin,'w').writelines([t+'\n' for t in all_tf_names]) except: print "Error opening file:", sys.exc_info()[0] print "Check to make sure file exists at %s"%(closest_gene_output) raise if pkl_file!='': zipcmd='python '+os.path.join(progdir,'zipTgms.py')+' '+tgm_file+' '+tin+' '+gin+' --pkl='+pkl_file print 'Compressing matrix file into pkl' print zipcmd os.system(zipcmd) return pkl_file else: return tgm_file
2
Example 22
View licensedef find_duplicates(doc, case_path): """Return True if it should be saved, else False""" log_print("Running duplicate checks...") # 1. Is the item completely outside of the current corpus? if not needs_dup_check(doc): log_print( " - Not a duplicate: Outside of date range for selected court.") return [] else: log_print( " - Could be a duplicate: Inside of date range for selected court.") # 2. Can we find any duplicates and information about them? stats, candidates = dup_finder.get_dup_stats(doc) if len(candidates) == 0: log_print(" - Not a duplicate: No candidate matches found.") return [] elif len(candidates) == 1: if doc.docket.docket_number and candidates[0].get( 'docketNumber') is not None: # One in the other or vice versa if (re.sub("(\D|0)", "", candidates[0]['docketNumber']) in re.sub("(\D|0)", "", doc.docket.docket_number)) or \ (re.sub("(\D|0)", "", doc.docket.docket_number) in re.sub("(\D|0)", "", candidates[0]['docketNumber'])): log_print( " - Duplicate found: Only one candidate returned and docket number matches.") return [candidates[0]['id']] else: if doc.docket.court_id == 'cit': # CIT documents have neutral citations in the database. Look that up and compare against that. candidate_doc = Document.objects.get( pk=candidates[0]['id']) if doc.citation.neutral_cite and candidate_doc.citation.neutral_cite: if candidate_doc.neutral_cite in doc.docket.docket_number: log_print( ' - Duplicate found: One candidate from CIT and its neutral citation matches the new document\'s docket number.') return [candidates[0]['id']] else: log_print( " - Not a duplicate: Only one candidate but docket number differs.") return [] else: log_print(" - Skipping docket_number dup check.") if doc.case_name == candidates[0].get('caseName'): log_print( " - Duplicate found: Only one candidate and case name is a perfect match.") return [candidates[0]['id']] if dup_helpers.case_name_in_candidate(doc.case_name, candidates[0].get('caseName')): log_print( " - Duplicate found: All words in new document's case name are in the candidate's case name (%s)" % candidates[0].get('caseName')) return [candidates[0]['id']] else: # More than one candidate. if doc.docket.docket_number: dups_by_docket_number = dup_helpers.find_same_docket_numbers(doc, candidates) if len(dups_by_docket_number) > 1: log_print( " - Duplicates found: %s candidates matched by docket number." % len( dups_by_docket_number)) return [can['id'] for can in dups_by_docket_number] elif len(dups_by_docket_number) == 1: log_print( " - Duplicate found: Multiple candidates returned, but one matched by docket number.") return [dups_by_docket_number[0]['id']] else: log_print( " - Could be a duplicate: Unable to find good match via docket number.") else: log_print(" - Skipping docket_number dup check.") # 3. Filter out obviously bad cases and then pass remainder forward for manual review. filtered_candidates, filtered_stats = dup_helpers.filter_by_stats( candidates, stats) log_print(" - %s candidates before filtering. With stats: %s" % ( stats['candidate_count'], stats['cos_sims'])) log_print(" - %s candidates after filtering. Using filtered stats: %s" % ( filtered_stats['candidate_count'], filtered_stats['cos_sims'])) if len(filtered_candidates) == 0: log_print( " - Not a duplicate: After filtering no good candidates remained.") return [] elif len(filtered_candidates) == 1 and filtered_stats['cos_sims'][ 0] > 0.93: log_print( " - Duplicate found: One candidate after filtering and cosine similarity is high (%s)" % filtered_stats['cos_sims'][0]) return [filtered_candidates[0]['id']] else: duplicates = [] high_sims_count = len( [sim for sim in filtered_stats['cos_sims'] if sim > 0.98]) low_sims_count = len( [sim for sim in filtered_stats['cos_sims'] if sim < 0.95]) for k in range(0, len(filtered_candidates)): if all([(high_sims_count == 1), # Only one high score (low_sims_count == filtered_stats['candidate_count'] - 1) # All but one have low scores ]): # If only one of the items is very high, then we can ignore the others and assume it's right if filtered_stats['cos_sims'][k] > 0.98: duplicates.append(filtered_candidates[k]['id']) break else: # ignore the others continue else: # Have to determine by "hand" log_print(" %s) Case name: %s" % (k + 1, doc.case_name)) log_print( " %s" % filtered_candidates[k]['caseName']) log_print(" Docket nums: %s" % doc.docket.docket_number) log_print(" %s" % filtered_candidates[k].get( 'docketNumber', 'None')) log_print( " Cosine Similarity: %s" % filtered_stats['cos_sims'][ k]) log_print(" Candidate URL: file://%s" % case_path) log_print(" Match URL: https://www.courtlistener.com%s" % (filtered_candidates[k]['absolute_url'])) choice = raw_input("Is this a duplicate? [Y/n]: ") choice = choice or "y" if choice == 'y': duplicates.append(filtered_candidates[k]['id']) if len(duplicates) == 0: log_print( " - Not a duplicate: Manual determination found no matches.") return [] elif len(duplicates) == 1: log_print( " - Duplicate found: Manual determination found one match.") return [duplicates[0]] elif len(duplicates) > 1: log_print( " - Duplicates found: Manual determination found %s matches." % len( duplicates)) return duplicates
2
Example 23
View licensedef derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: fc_dataset_dict: Dict keyed by processing level (e.g. ORTHO, FC, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'FC': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'FC', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_FC_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict' def create_rgb_tif(input_dataset_path, output_dataset_path, pqa_mask=None, rgb_bands=None, input_no_data_value=-999, output_no_data_value=0, input_range=()): if os.path.exists(output_dataset_path): logger.info('Output dataset %s already exists - skipping', output_dataset_path) return if not self.lock_object(output_dataset_path): logger.info('Output dataset %s already locked - skipping', output_dataset_path) return if not rgb_bands: rgb_bands = [3, 1, 2] scale_factor = 10000.0 / 255.0 # Scale factor to translate from +ve int16 to byte input_gdal_dataset = gdal.Open(input_dataset_path) assert input_gdal_dataset, 'Unable to open input dataset %s' % (input_dataset_path) try: # Create multi-band dataset for masked data logger.debug('output_dataset path = %s', output_dataset_path) gdal_driver = gdal.GetDriverByName('GTiff') log_multiline(logger.debug, gdal_driver.GetMetadata(), 'gdal_driver.GetMetadata()') output_gdal_dataset = gdal_driver.Create(output_dataset_path, input_gdal_dataset.RasterXSize, input_gdal_dataset.RasterYSize, len(rgb_bands), gdal.GDT_Byte, ['INTERLEAVE=PIXEL']) #['INTERLEAVE=PIXEL','COMPRESS=NONE','BIGTIFF=YES']) assert output_gdal_dataset, 'Unable to open input dataset %s' % output_dataset_path output_gdal_dataset.SetGeoTransform(input_gdal_dataset.GetGeoTransform()) output_gdal_dataset.SetProjection(input_gdal_dataset.GetProjection()) dest_band_no = 0 for source_band_no in rgb_bands: dest_band_no += 1 logger.debug('Processing source band %d, destination band %d', source_band_no, dest_band_no) input_band_array = input_gdal_dataset.GetRasterBand(source_band_no).ReadAsArray() input_gdal_dataset.FlushCache() output_band_array = (input_band_array / scale_factor).astype(numpy.byte) output_band_array[numpy.logical_or((input_band_array < 0), (input_band_array > 10000))] = output_no_data_value # Set any out-of-bounds values to no-data if pqa_mask is not None: # Need to perform masking output_band_array[numpy.logical_or((input_band_array == input_no_data_value), ~pqa_mask)] = output_no_data_value # Apply PQA mask and no-data value else: output_band_array[(input_band_array == input_no_data_value)] = output_no_data_value # Re-apply no-data value output_band = output_gdal_dataset.GetRasterBand(dest_band_no) output_band.SetNoDataValue(output_no_data_value) output_band.WriteArray(output_band_array) output_band.FlushCache() output_gdal_dataset.FlushCache() finally: self.unlock_object(output_dataset_path) dtype = {'FC_PV' : gdalconst.GDT_Int16, 'FC_NPV' : gdalconst.GDT_Int16, 'FC_BS' : gdalconst.GDT_Int16} no_data_value = {'FC_PV' : -999, 'FC_NPV' : -999, 'FC_BS' : -999} log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') # Test function to copy ORTHO & FC band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} fc_dataset_info = input_dataset_dict['FC'] # Only need FC data for NDVI #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands if fc_dataset_info is None: logger.info('FC dataset does not exist') return fc_dataset_path = fc_dataset_info['tile_pathname'] if input_dataset_dict['PQA'] is None: logger.info('PQA dataset for %s does not exist', fc_dataset_path) return # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) fc_dataset = gdal.Open(fc_dataset_path) assert fc_dataset, 'Unable to open dataset %s' % fc_dataset band_array = None; # List of outputs to generate from each file output_tag_list = ['FC_PV', 'FC_NPV', 'FC_BS'] input_band_index = 0 for output_tag in output_tag_list: # List of outputs to generate from each file # TODO: Make the stack file name reflect the date range output_stack_path = os.path.join(self.output_dir, re.sub('\+', '', '%s_%+04d_%+04d' % (output_tag, stack_output_info['x_index'], stack_output_info['y_index']))) if stack_output_info['start_datetime']: output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%Y%m%d') if stack_output_info['end_datetime']: output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%Y%m%d') output_stack_path += '_pqa_stack.vrt' output_tile_path = os.path.join(self.output_dir, re.sub('\.\w+$', tile_type_info['file_extension'], re.sub('FC', output_tag, os.path.basename(fc_dataset_path) ) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(fc_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 output_dataset_info['nodata_value'] = no_data_value[output_tag] # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path): if self.lock_object(output_tile_path): # Test for concurrent writes to the same file try: # Read whole fc_dataset into one array. # 62MB for float32 data should be OK for memory depending on what else happens downstream if band_array is None: band_array = fc_dataset.ReadAsArray() # Re-project issues with PQ. REDO the contiguity layer. non_contiguous = (band_array < 0).any(0) pqa_mask[non_contiguous] = False gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # fc_dataset.RasterXSize, fc_dataset.RasterYSize, # 1, fc_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, fc_dataset.RasterXSize, fc_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(fc_dataset.GetGeoTransform()) output_dataset.SetProjection(fc_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate each output here # Remember band_array indices are zero-based data_array = band_array[input_band_index].copy() if no_data_value[output_tag]: self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value[output_tag]) gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # fc_dataset.RasterXSize, fc_dataset.RasterYSize, # 1, fc_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, fc_dataset.RasterXSize, fc_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(fc_dataset.GetGeoTransform()) output_dataset.SetProjection(fc_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(output_dataset_info['nodata_value']) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = fc_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing dataset %s', output_tile_path) finally: self.unlock_object(output_tile_path) else: logger.info('Skipped locked dataset %s', output_tile_path) sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile else: logger.info('Skipped existing dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info input_band_index += 1 # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') # End of loop fc_rgb_path = os.path.join(self.output_dir, re.sub('\.\w+$', '.tif', # Write to .tif file re.sub('^LS\d_[^_]+_', '', # Remove satellite & sensor reference to allow proper sorting by filename re.sub('FC', # Write to FC_RGB file 'FC_RGB', os.path.basename(fc_dataset_path) ) ) ) ) logger.info('Creating FC RGB output file %s', fc_rgb_path) create_rgb_tif(input_dataset_path=fc_dataset_path, output_dataset_path=fc_rgb_path, pqa_mask=pqa_mask) log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # Datasets processed - return info return output_dataset_dict
2
Example 24
View licensedef derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type(input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict' dtype = gdalconst.GDT_Float32 # All output is to be float32 no_data_value = numpy.nan log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} nbar_dataset_info = input_dataset_dict.get('NBAR') # Only need NBAR data for NDVI #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands # Need to skip tiles which don't have an NBAR tile (i.e. for non-mosaiced FC tiles at W & E sides of test area) if nbar_dataset_info is None: logger.warning('NBAR tile does not exist') return None # Nasty work-around for bad PQA due to missing thermal bands for LS8-OLI if nbar_dataset_info['satellite_tag'] == 'LS8' and nbar_dataset_info['sensor_name'] == 'OLI': logger.debug('Work-around for LS8-OLI PQA issue applied: TILE SKIPPED') return None # Instantiate band lookup object with all required lookup parameters lookup = BandLookup(data_cube=self, lookup_scheme_name='LANDSAT-LS5/7', tile_type_id=tile_type_info['tile_type_id'], satellite_tag=nbar_dataset_info['satellite_tag'], sensor_name=nbar_dataset_info['sensor_name'], level_name=nbar_dataset_info['level_name'] ) nbar_dataset_path = nbar_dataset_info['tile_pathname'] if input_dataset_dict.get('PQA') is None: # No PQA tile available return # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) log_multiline(logger.debug, pqa_mask, 'pqa_mask', '\t') nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset band_array = None; # List of outputs to generate from each file output_tag_list = ['B', 'G', 'R', 'NIR', 'SWIR1', 'SWIR2', 'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI'] for output_tag in sorted(output_tag_list): # List of outputs to generate from each file # TODO: Make the stack file name reflect the date range output_stack_path = os.path.join(self.output_dir, re.sub('\+', '', '%s_%+04d_%+04d' % (output_tag, stack_output_info['x_index'], stack_output_info['y_index']))) if stack_output_info['start_datetime']: output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%Y%m%d') if stack_output_info['end_datetime']: output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%Y%m%d') output_stack_path += '_pqa_stack.vrt' output_tile_path = os.path.join(self.output_dir, re.sub('\.\w+$', tile_type_info['file_extension'], re.sub('NBAR', output_tag, os.path.basename(nbar_dataset_path) ) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 output_dataset_info['nodata_value'] = no_data_value # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path): if self.lock_object(output_tile_path): # Test for concurrent writes to the same file try: # Read whole nbar_dataset into one array. # 62MB for float32 data should be OK for memory depending on what else happens downstream if band_array is None: # Convert to float32 for arithmetic and scale back to 0~1 reflectance band_array = (nbar_dataset.ReadAsArray().astype(numpy.float32)) / SCALE_FACTOR log_multiline(logger.debug, band_array, 'band_array', '\t') # Adjust bands if required for band_tag in lookup.bands: if lookup.adjustment_multiplier[band_tag] != 1.0 or lookup.adjustment_offset[band_tag] != 0.0: logger.debug('Band values adjusted: %s = %s * %s + %s', band_tag, band_tag, lookup.adjustment_multiplier[band_tag], lookup.adjustment_offset[band_tag]) band_array[lookup.band_index[band_tag]] = band_array[lookup.band_index[band_tag]] * lookup.adjustment_multiplier[band_tag] + lookup.adjustment_offset[band_tag] log_multiline(logger.debug, band_array, 'adjusted band_array', '\t') # Re-project issues with PQ. REDO the contiguity layer. non_contiguous = (band_array < 0).any(0) pqa_mask[non_contiguous] = False log_multiline(logger.debug, pqa_mask, 'enhanced pqa_mask', '\t') gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate each output here # Remember band_array indices are zero-based if output_tag in lookup.bands: # One of the band tags # Copy values data_array = band_array[lookup.band_index[output_tag]].copy() elif output_tag == 'NDVI': data_array = numexpr.evaluate("((NIR_array - R_array) / (NIR_array + R_array)) + 1", {'NIR_array': band_array[lookup.band_index['NIR']], 'R_array': band_array[lookup.band_index['R']] }) elif output_tag == 'EVI': data_array = numexpr.evaluate("(2.5 * ((NIR_array - R_array) / (NIR_array + (6 * R_array) - (7.5 * B_array) + 1))) + 1", {'NIR_array': band_array[lookup.band_index['NIR']], 'R_array':band_array[lookup.band_index['R']], 'B_array':band_array[lookup.band_index['B']] }) elif output_tag == 'NDSI': data_array = numexpr.evaluate("((R_array - SWIR1_array) / (R_array + SWIR1_array)) + 1", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'R_array': band_array[lookup.band_index['R']] }) elif output_tag == 'NDMI': data_array = numexpr.evaluate("((NIR_array - SWIR1_array) / (NIR_array + SWIR1_array)) + 1", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'NIR_array': band_array[lookup.band_index['NIR']] }) elif output_tag == 'SLAVI': data_array = numexpr.evaluate("NIR_array / (R_array + SWIR1_array)", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'NIR_array': band_array[lookup.band_index['NIR']], 'R_array': band_array[lookup.band_index['R']] }) elif output_tag == 'SATVI': data_array = numexpr.evaluate("(((SWIR1_array - R_array) / (SWIR1_array + R_array + 0.5)) * 1.5 - (SWIR2_array / 2)) + 1", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'SWIR2_array':band_array[lookup.band_index['SWIR2']], 'R_array':band_array[lookup.band_index['R']] }) else: raise Exception('Invalid operation') log_multiline(logger.debug, data_array, 'data_array', '\t') if no_data_value: self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value) log_multiline(logger.debug, data_array, 'masked data_array', '\t') gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(output_dataset_info['nodata_value']) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing dataset %s', output_tile_path) finally: self.unlock_object(output_tile_path) else: logger.info('Skipped locked dataset %s', output_tile_path) sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile else: logger.info('Skipped existing dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # NDVI dataset processed - return info return output_dataset_dict
2
Example 25
View licensedef derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type(input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict' dtype = {'B10' : gdalconst.GDT_Float32, 'B20' : gdalconst.GDT_Float32, 'B30' : gdalconst.GDT_Float32, 'B40' : gdalconst.GDT_Float32, 'B50' : gdalconst.GDT_Float32, 'B70' : gdalconst.GDT_Float32, 'NDVI' : gdalconst.GDT_Float32, 'EVI' : gdalconst.GDT_Float32, 'NDSI' : gdalconst.GDT_Float32, 'NDMI' : gdalconst.GDT_Float32, 'SLAVI' : gdalconst.GDT_Float32, 'SATVI' : gdalconst.GDT_Float32, 'WATER' : gdalconst.GDT_Int16} no_data_value = {'B10' : numpy.nan, 'B20' : numpy.nan, 'B30' : numpy.nan, 'B40' : numpy.nan, 'B50' : numpy.nan, 'B70' : numpy.nan, 'NDVI' : numpy.nan, 'EVI' : numpy.nan, 'NDSI' : numpy.nan, 'NDMI' : numpy.nan, 'SLAVI' : numpy.nan, 'SATVI' : numpy.nan, 'WATER' : -1} log_multiline(logger.debug, input_dataset_dict, 'nbar_dataset_dict', '\t') # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} nbar_dataset_info = input_dataset_dict['NBAR'] # Only need NBAR data for NDVI #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands nbar_dataset_path = nbar_dataset_info['tile_pathname'] # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open NBAR dataset %s' % nbar_dataset band_array = None; # List of outputs to generate from each file output_tag_list = ['B10', 'B20', 'B30', 'B40', 'B50', 'B70', 'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI'] for output_tag in sorted(output_tag_list): # List of outputs to generate from each file # TODO: Make the stack file name reflect the date range output_stack_path = os.path.join(self.output_dir, re.sub('\+', '', '%s_%+04d_%+04d' % (output_tag, stack_output_info['x_index'], stack_output_info['y_index']))) if stack_output_info['start_datetime']: output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%m%d') if stack_output_info['end_datetime']: output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%m%d') output_stack_path += '_pqa_stack.vrt' output_tile_path = os.path.join(self.output_dir, re.sub('\.\w+$', tile_type_info['file_extension'], re.sub('NBAR', output_tag, os.path.basename(nbar_dataset_path) ) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 output_dataset_info['nodata_value'] = no_data_value[output_tag] # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path): if self.lock_object(output_tile_path): # Test for concurrent writes to the same file try: # Read whole nbar_dataset into one array. # 62MB for float32 data should be OK for memory depending on what else happens downstream if band_array is None: # Convert to float32 for arithmetic and scale back to 0~1 reflectance band_array = (nbar_dataset.ReadAsArray().astype(numpy.float32)) / SCALE_FACTOR # Re-project issues with PQ. REDO the contiguity layer. non_contiguous = (band_array < 0).any(0) pqa_mask[non_contiguous] = False gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) logger.debug('gdal_driver.Create(%s, %s, %s, %s, %s, %s', output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_tile_path output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate each output here # Remember band_array indices are zero-based if output_tag[0] == 'B': # One of the band tags band_file_no = int(output_tag[1:]) # Look up tile_layer (i.e. band number) for specified spectral band in tile dataset tile_layer = self.bands[tile_type_info['tile_type_id']][(nbar_dataset_info['satellite_tag'], nbar_dataset_info['sensor_name'])][band_file_no]['tile_layer'] # Copy values data_array = band_array[tile_layer - 1].copy() elif output_tag == 'NDVI': data_array = numexpr.evaluate("((b4 - b3) / (b4 + b3)) + 1", {'b4':band_array[3], 'b3':band_array[2]}) elif output_tag == 'EVI': data_array = numexpr.evaluate("(2.5 * ((b4 - b3) / (b4 + (6 * b3) - (7.5 * b1) + 1))) + 1", {'b4':band_array[3], 'b3':band_array[2], 'b1':band_array[0]}) elif output_tag == 'NDSI': data_array = numexpr.evaluate("((b3 - b5) / (b3 + b5)) + 1", {'b5':band_array[4], 'b3':band_array[2]}) elif output_tag == 'NDMI': data_array = numexpr.evaluate("((b4 - b5) / (b4 + b5)) + 1", {'b5':band_array[4], 'b4':band_array[3]}) elif output_tag == 'SLAVI': data_array = numexpr.evaluate("b4 / (b3 + b5)", {'b5':band_array[4], 'b4':band_array[3], 'b3':band_array[2]}) elif output_tag == 'SATVI': data_array = numexpr.evaluate("(((b5 - b3) / (b5 + b3 + 0.5)) * 1.5 - (b7 / 2)) + 1", {'b5':band_array[4], 'b7':band_array[5], 'b3':band_array[2]}) elif output_tag == 'WATER': data_array = numpy.zeros(band_array[0].shape, dtype=numpy.int16) #TODO: Call water analysis code here else: raise Exception('Invalid operation') if no_data_value[output_tag]: self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value[output_tag]) gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(output_dataset_info['nodata_value']) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing dataset %s', output_tile_path) finally: self.unlock_object(output_tile_path) else: logger.info('Skipped locked dataset %s', output_tile_path) sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile else: logger.info('Skipped existing dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # NDVI dataset processed - return info return output_dataset_dict
2
Example 26
View licensedef process_module(module, path): hppparser = hp.CppHeaderParser() rstparser = rp.RstParser(hppparser) rstparser.parse(module, path) rst = rstparser.definitions hdrlist = [] for root, dirs, files in os.walk(os.path.join(path, "include")): for filename in fnmatch.filter(files, "*.h*"): hdrlist.append(os.path.join(root, filename)) if module == "gpu": hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_devptrs.hpp")) hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpumat.hpp")) decls = [] for hname in hdrlist: if not "ts_gtest.h" in hname: decls += hppparser.parse(hname, wmode=False) funcs = [] # not really needed to hardcode all the namespaces. Normally all they are collected automatically namespaces = ['cv', 'cv.gpu', 'cvflann', 'cvflann.anyimpl', 'cvflann.lsh', 'cv.flann', 'cv.linemod', 'cv.detail', 'cvtest', 'perf', 'cv.videostab'] classes = [] structs = [] # collect namespaces and classes/structs for decl in decls: if decl[0].startswith("const"): pass elif decl[0].startswith("class") or decl[0].startswith("struct"): if decl[0][0] == 'c': classes.append(decl) else: structs.append(decl) dotIdx = decl[0].rfind('.') if dotIdx > 0: namespace = decl[0][decl[0].find(' ')+1:dotIdx] if not [c for c in classes if c[0].endswith(namespace)] and not [s for s in structs if s[0].endswith(namespace)]: if namespace not in namespaces: namespaces.append(namespace) else: funcs.append(decl) clsnamespaces = [] # process classes for cl in classes: name = cl[0][cl[0].find(' ')+1:] if name.find('.') < 0 and not name.startswith("Cv"): logerror(ERROR_004_MISSEDNAMESPACE, "class " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention") clsnamespaces.append(name) if do_python_crosscheck and not name.startswith("cv.") and name.startswith("Cv"): clsnamespaces.append("cv." + name[2:]) if name.startswith("cv."): name = name[3:] name = name.replace(".", "::") sns = synonims.get(name, []) sns.append(name) for name in sns: doc = rst.get(name) if not doc: #TODO: class is not documented continue doc[DOCUMENTED_MARKER] = True # verify class marker if not doc.get("isclass"): logerror(ERROR_001_NOTACLASS, "class " + name + " is not marked as \"class\" in documentation", doc) else: # verify base signature = doc.get("class", "") signature = signature.replace(" public ", " ") namespaceIdx = signature.rfind("::") signature = ("class " + signature).strip() hdrsignature = ("class " + name + " " + cl[1]).replace(".", "::").replace("cv::","").strip() if signature != hdrsignature: logerror(ERROR_003_INCORRECTBASE, "invalid base class documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc) # process structs for st in structs: name = st[0][st[0].find(' ')+1:] if name.find('.') < 0 and not name.startswith("Cv"): logerror(ERROR_004_MISSEDNAMESPACE, "struct " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention") clsnamespaces.append(name) if name.startswith("cv."): name = name[3:] name = name.replace(".", "::") doc = rst.get(name) if not doc: #TODO: struct is not documented continue doc[DOCUMENTED_MARKER] = True # verify struct marker if not doc.get("isstruct"): logerror(ERROR_002_NOTASTRUCT, "struct " + name + " is not marked as \"struct\" in documentation", doc) else: # verify base signature = doc.get("class", "") signature = signature.replace(", public ", " ").replace(" public ", " ") signature = signature.replace(", protected ", " ").replace(" protected ", " ") signature = signature.replace(", private ", " ").replace(" private ", " ") signature = ("struct " + signature).strip() hdrsignature = (st[0] + " " + st[1]).replace("struct cv.", "struct ").replace(".", "::").strip() if signature != hdrsignature: logerror(ERROR_003_INCORRECTBASE, "invalid base struct documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc) print st, doc # process functions and methods flookup = {} for fn in funcs: name = fn[0] parent = None namespace = None for cl in clsnamespaces: if name.startswith(cl + "."): if cl.startswith(parent or ""): parent = cl if parent: name = name[len(parent) + 1:] for nm in namespaces: if parent.startswith(nm + "."): if nm.startswith(namespace or ""): namespace = nm if namespace: parent = parent[len(namespace) + 1:] else: for nm in namespaces: if name.startswith(nm + "."): if nm.startswith(namespace or ""): namespace = nm if namespace: name = name[len(namespace) + 1:] #print namespace, parent, name, fn[0] if not namespace and not parent and not name.startswith("cv") and not name.startswith("icv") and not name.startswith("CV_"): logerror(ERROR_004_MISSEDNAMESPACE, "function " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention") else: fdescr = (namespace, parent, name, fn) flookup_entry = flookup.get(fn[0], []) flookup_entry.append(fdescr) flookup[fn[0]] = flookup_entry if do_python_crosscheck: for name, doc in rst.iteritems(): decls = doc.get("decls") if not decls: continue for signature in decls: if signature[0] == "Python1": pname = signature[1][:signature[1].find('(')] try: fn = getattr(cv2.cv, pname[3:]) docstr = "cv." + fn.__doc__ except AttributeError: logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function: cv2." + pname, doc) continue docstring = docstr sign = signature[1] signature.append(DOCUMENTED_MARKER) # convert old signature to pydoc style if docstring.endswith("*"): docstring = docstring[:-1] s = None while s != sign: s = sign sign = re.sub(r"^(.*\(.*)\(.*?\)(.*\) *->)", "\\1_\\2", sign) s = None while s != sign: s = sign sign = re.sub(r"\s*,\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", " [, \\1\\2])", sign) sign = re.sub(r"\(\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", "([\\1\\2])", sign) sign = re.sub(r"\)\s*->\s*", ") -> ", sign) sign = sign.replace("-> convexHull", "-> CvSeq") sign = sign.replace("-> lines", "-> CvSeq") sign = sign.replace("-> boundingRects", "-> CvSeq") sign = sign.replace("-> contours", "-> CvSeq") sign = sign.replace("-> retval", "-> int") sign = sign.replace("-> detectedObjects", "-> CvSeqOfCvAvgComp") def retvalRplace(match): m = match.group(1) m = m.replace("CvScalar", "scalar") m = m.replace("CvMemStorage", "memstorage") m = m.replace("ROIplImage", "image") m = m.replace("IplImage", "image") m = m.replace("ROCvMat", "mat") m = m.replace("CvMat", "mat") m = m.replace("double", "float") m = m.replace("CvSubdiv2DPoint", "point") m = m.replace("CvBox2D", "Box2D") m = m.replace("IplConvKernel", "kernel") m = m.replace("CvHistogram", "hist") m = m.replace("CvSize", "width,height") m = m.replace("cvmatnd", "matND") m = m.replace("CvSeqOfCvConvexityDefect", "convexityDefects") mm = m.split(',') if len(mm) > 1: return "(" + ", ".join(mm) + ")" else: return m docstring = re.sub(r"(?<=-> )(.*)$", retvalRplace, docstring) docstring = docstring.replace("( [, ", "([") if sign != docstring: logerror(ERROR_006_INVALIDPYOLDDOC, "old-style documentation differs from pydoc\npydoc: " + docstring + "\nfixup: " + sign + "\ncvdoc: " + signature[1], doc) elif signature[0] == "Python2": pname = signature[1][4:signature[1].find('(')] cvname = "cv." + pname parent = None for cl in clsnamespaces: if cvname.startswith(cl + "."): if cl.startswith(parent or ""): parent = cl try: if parent: instance, clsname = get_cv2_object(parent) fn = getattr(instance, cvname[len(parent)+1:]) docstr = fn.__doc__ docprefix = "cv2." + clsname + "." else: fn = getattr(cv2, pname) docstr = fn.__doc__ docprefix = "cv2." except AttributeError: if parent: logerror(ERROR_005_MISSINGPYFUNC, "could not load documented member of " + parent + " class: cv2." + pname, doc) else: logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function cv2." + pname, doc) signature.append(DOCUMENTED_MARKER) # stop subsequent errors continue docstrings = [docprefix + s.replace("([, ", "([") for s in docstr.split(" or ")] if not signature[1] in docstrings: pydocs = "\npydoc: ".join(docstrings) logerror(ERROR_007_INVALIDPYDOC, "documentation differs from pydoc\npydoc: " + pydocs + "\ncvdoc: " + signature[1], doc) signature.append(DOCUMENTED_MARKER) # verify C/C++ signatures for name, doc in rst.iteritems(): decls = doc.get("decls") if not decls: continue for signature in decls: if signature[0] == "C" or signature[0] == "C++": if "template" in (signature[2][1] or ""): # TODO find a way to validate templates signature.append(DOCUMENTED_MARKER) continue fd = flookup.get(signature[2][0]) if not fd: if signature[2][0].startswith("cv."): fd = flookup.get(signature[2][0][3:]) if not fd: continue else: signature[2][0] = signature[2][0][3:] if signature[0] == "C": ffd = [f for f in fd if not f[0] and not f[1]] # filter out C++ stuff if not ffd: if fd[0][1]: logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually member of " + fd[0][1] + " class", doc) elif fd[0][0]: logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually placed in " + fd[0][0] + " namespace", doc) fd = ffd error = None for f in fd: match, error = compareSignatures(signature[2], f[3]) if match: signature.append(DOCUMENTED_MARKER) break if signature[-1] != DOCUMENTED_MARKER: candidates = "\n\t".join([formatSignature(f[3]) for f in fd]) logerror(ERROR_009_OVERLOADNOTFOUND, signature[0] + " function " + signature[2][0].replace(".","::") + " is documented but misses in headers (" + error + ").\nDocumented as:\n\t" + signature[1] + "\nCandidates are:\n\t" + candidates, doc) signature.append(DOCUMENTED_MARKER) # to stop subsequent error on this function # verify that all signatures was found in the library headers for name, doc in rst.iteritems(): # if doc.get(DOCUMENTED_MARKER, False): # continue # this class/struct was found if not doc.get(DOCUMENTED_MARKER, False) and (doc.get("isclass", False) or doc.get("isstruct", False)): if name in doc_signatures_whitelist: continue logerror(ERROR_010_UNKNOWNCLASS, "class/struct " + name + " is mentioned in documentation but is not found in OpenCV headers", doc) for d in doc.get("decls", []): if d[-1] != DOCUMENTED_MARKER: if d[0] == "C" or d[0] =="C++" or (do_python_crosscheck and d[0].startswith("Python")): if d[0][0] == 'C': sname = d[2][0][3:].replace(".", "::") if sname in defines: #TODO: need to find a way to verify #define's continue else: sname = d[1][:d[1].find("(")] prefixes = [x for x in doc_signatures_whitelist if sname.startswith(x)] if prefixes: # TODO: member of template class continue logerror(ERROR_011_UNKNOWNFUNC, d[0] + " function " + sname + " is documented but is not found in OpenCV headers. It is documented as:\n\t" + d[1], doc)
2
Example 27
View licensedef configure_step(self): """Custom configuration procedure for ALADIN.""" # unset $LIBRARY_PATH set by modules of dependencies, because it may screw up linking if 'LIBRARY_PATH' in os.environ: self.log.debug("Unsetting $LIBRARY_PATH (was: %s)" % os.environ['LIBRARY_PATH']) self.orig_library_path = os.environ.pop('LIBRARY_PATH') # build auxiliary libraries auxlibs_dir = None my_gnu = None if self.toolchain.comp_family() == toolchain.GCC: my_gnu = 'y' # gfortran for var in ['CFLAGS', 'CXXFLAGS', 'F90FLAGS', 'FFLAGS']: flags = os.getenv(var) env.setvar(var, "%s -fdefault-real-8 -fdefault-double-8" % flags) self.log.info("Updated %s to '%s'" % (var, os.getenv(var))) elif self.toolchain.comp_family() == toolchain.INTELCOMP: my_gnu = 'i' # icc/ifort else: raise EasyBuildError("Don't know how to set 'my_gnu' variable in auxlibs build script.") self.log.info("my_gnu set to '%s'" % my_gnu) tmp_installroot = tempfile.mkdtemp(prefix='aladin_auxlibs_') try: cwd = os.getcwd() os.chdir(self.builddir) builddirs = os.listdir(self.builddir) auxlibs_dir = [x for x in builddirs if x.startswith('auxlibs_installer')][0] os.chdir(auxlibs_dir) auto_driver = 'driver_automatic' for line in fileinput.input(auto_driver, inplace=1, backup='.orig.eb'): line = re.sub(r"^(my_gnu\s*=\s*).*$", r"\1%s" % my_gnu, line) line = re.sub(r"^(my_r32\s*=\s*).*$", r"\1n", line) # always 64-bit real precision line = re.sub(r"^(my_readonly\s*=\s*).*$", r"\1y", line) # make libs read-only after build line = re.sub(r"^(my_installroot\s*=\s*).*$", r"\1%s" % tmp_installroot, line) sys.stdout.write(line) run_cmd("./%s" % auto_driver) os.chdir(cwd) except OSError, err: raise EasyBuildError("Failed to build ALADIN: %s", err) # build gmkpack, update PATH and set GMKROOT # we build gmkpack here because a config file is generated in the gmkpack isntall path try: gmkpack_dir = [x for x in builddirs if x.startswith('gmkpack')][0] os.chdir(os.path.join(self.builddir, gmkpack_dir)) qa = { 'Do you want to run the configuration file maker assistant now (y) or later [n] ?': 'n', } run_cmd_qa("./build_gmkpack", qa) os.chdir(cwd) paths = os.getenv('PATH').split(':') paths.append(os.path.join(self.builddir, gmkpack_dir, 'util')) env.setvar('PATH', ':'.join(paths)) env.setvar('GMKROOT', os.path.join(self.builddir, gmkpack_dir)) except OSError, err: raise EasyBuildError("Failed to build gmkpack: %s", err) # generate gmkpack configuration file self.conf_file = 'ALADIN_%s' % self.version self.conf_filepath = os.path.join(self.builddir, 'gmkpack_support', 'arch', '%s.x' % self.conf_file) try: if os.path.exists(self.conf_filepath): os.remove(self.conf_filepath) self.log.info("Removed existing gmpack config file %s" % self.conf_filepath) archdir = os.path.dirname(self.conf_filepath) if not os.path.exists(archdir): mkdir(archdir, parents=True) except OSError, err: raise EasyBuildError("Failed to remove existing file %s: %s", self.conf_filepath, err) mpich = 'n' known_mpi_libs = [toolchain.MPICH, toolchain.MPICH2, toolchain.INTELMPI] if self.toolchain.options.get('usempi', None) and self.toolchain.mpi_family() in known_mpi_libs: mpich = 'y' qpref = 'Please type the ABSOLUTE name of ' qsuff = ', or ignore (environment variables allowed) :' qsuff2 = ', or ignore : (environment variables allowed) :' comp_fam = self.toolchain.comp_family() if comp_fam == toolchain.GCC: gribdir = 'GNU' elif comp_fam == toolchain.INTELCOMP: gribdir = 'INTEL' else: raise EasyBuildError("Don't know which grib lib dir to use for compiler %s", comp_fam) aux_lib_gribex = os.path.join(tmp_installroot, gribdir, 'lib', 'libgribex.a') aux_lib_ibm = os.path.join(tmp_installroot, gribdir, 'lib', 'libibmdummy.a') grib_api_lib = os.path.join(get_software_root('grib_api'), 'lib', 'libgrib_api.a') grib_api_f90_lib = os.path.join(get_software_root('grib_api'), 'lib', 'libgrib_api_f90.a') grib_api_inc = os.path.join(get_software_root('grib_api'), 'include') jasperlib = os.path.join(get_software_root('JasPer'), 'lib', 'libjasper.a') mpilib = os.path.join(os.getenv('MPI_LIB_DIR'), os.getenv('MPI_LIB_SHARED')) # netCDF netcdf = get_software_root('netCDF') netcdf_fortran = get_software_root('netCDF-Fortran') if netcdf: netcdfinc = os.path.join(netcdf, 'include') if netcdf_fortran: netcdflib = os.path.join(netcdf_fortran, get_software_libdir('netCDF-Fortran'), 'libnetcdff.a') else: netcdflib = os.path.join(netcdf, get_software_libdir('netCDF'), 'libnetcdff.a') if not os.path.exists(netcdflib): raise EasyBuildError("%s does not exist", netcdflib) else: raise EasyBuildError("netCDF(-Fortran) not available") ldpaths = [ldflag[2:] for ldflag in os.getenv('LDFLAGS').split(' ')] # LDFLAGS have form '-L/path/to' lapacklibs = [] for lib in os.getenv('LAPACK_STATIC_LIBS').split(','): libpaths = [os.path.join(ldpath, lib) for ldpath in ldpaths] lapacklibs.append([libpath for libpath in libpaths if os.path.exists(libpath)][0]) lapacklib = ' '.join(lapacklibs) blaslibs = [] for lib in os.getenv('BLAS_STATIC_LIBS').split(','): libpaths = [os.path.join(ldpath, lib) for ldpath in ldpaths] blaslibs.append([libpath for libpath in libpaths if os.path.exists(libpath)][0]) blaslib = ' '.join(blaslibs) qa = { 'Do you want to run the configuration file maker assistant now (y) or later [n] ?': 'y', 'Do you want to setup your configuration file for MPICH (y/n) [n] ?': mpich, 'Please type the directory name where to find a dummy file mpif.h or ignore :': os.getenv('MPI_INC_DIR'), '%sthe library gribex or emos%s' % (qpref, qsuff2): aux_lib_gribex, '%sthe library ibm%s' % (qpref, qsuff): aux_lib_ibm, '%sthe library grib_api%s' % (qpref, qsuff): grib_api_lib, '%sthe library grib_api_f90%s' % (qpref, qsuff): grib_api_f90_lib, '%sthe JPEG auxilary library if enabled by Grib_api%s' % (qpref, qsuff2): jasperlib, '%sthe library netcdf%s' % (qpref, qsuff): netcdflib, '%sthe library lapack%s' % (qpref, qsuff): lapacklib, '%sthe library blas%s' % (qpref, qsuff): blaslib, '%sthe library mpi%s' % (qpref, qsuff): mpilib, '%sa MPI dummy library for serial executions, or ignore :' % qpref: '', 'Please type the directory name where to find grib_api headers, or ignore :': grib_api_inc, 'Please type the directory name where to find fortint.h or ignore :': '', 'Please type the directory name where to find netcdf headers, or ignore :': netcdfinc, 'Do you want to define CANARI (y/n) [y] ?': 'y', 'Please type the name of the script file used to generate a preprocessed blacklist file, or ignore :': '', 'Please type the name of the script file used to recover local libraries (gget), or ignore :': '', 'Please type the options to tune the gnu compilers, or ignore :': os.getenv('F90FLAGS'), } f90_seq = os.getenv('F90_SEQ') if not f90_seq: # F90_SEQ is only defined when usempi is enabled f90_seq = os.getenv('F90') stdqa = OrderedDict([ (r'Confirm library .* is .*', 'y'), # this one needs to be tried first! (r'.*fortran 90 compiler name .*\s*:\n\(suggestions\s*: .*\)', os.getenv('F90')), (r'.*fortran 90 compiler interfaced with .*\s*:\n\(suggestions\s*: .*\)', f90_seq), (r'Please type the ABSOLUTE name of .*library.*, or ignore\s*[:]*\s*[\n]*.*', ''), (r'Please .* to save this draft configuration file :\n.*', '%s.x' % self.conf_file), ]) no_qa = [ ".*ignored.", ] env.setvar('GMKTMP', self.builddir) env.setvar('GMKFILE', self.conf_file) run_cmd_qa("gmkfilemaker", qa, std_qa=stdqa, no_qa=no_qa) # set environment variables for installation dirs env.setvar('ROOTPACK', os.path.join(self.installdir, 'rootpack')) env.setvar('ROOTBIN', os.path.join(self.installdir, 'rootpack')) env.setvar('HOMEPACK', os.path.join(self.installdir, 'pack')) env.setvar('HOMEBIN', os.path.join(self.installdir, 'pack')) # patch config file to include right Fortran compiler flags regex_subs = [(r"^(FRTFLAGS\s*=.*)$", r"\1 %s" % os.getenv('FFLAGS'))] apply_regex_substitutions(self.conf_filepath, regex_subs)
2
Example 28
View licensedef configure_step(self): """Custom configuration procedure for Quantum ESPRESSO.""" if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']: self.cfg.update('configopts', '--enable-openmp') if not self.toolchain.options.get('usempi', None): self.cfg.update('configopts', '--disable-parallel') if not self.cfg['with_scalapack']: self.cfg.update('configopts', '--without-scalapack') repls = [] if self.toolchain.comp_family() in [toolchain.INTELCOMP]: # set preprocessor command (-E to stop after preprocessing, -C to preserve comments) cpp = "%s -E -C" % os.getenv('CC') repls.append(('CPP', cpp, False)) env.setvar('CPP', cpp) # also define $FCCPP, but do *not* include -C (comments should not be preserved when preprocessing Fortran) env.setvar('FCCPP', "%s -E" % os.getenv('CC')) super(EB_QuantumESPRESSO, self).configure_step() # compose list of DFLAGS (flag, value, keep_stuff) # for guidelines, see include/defs.h.README in sources dflags = [] comp_fam_dflags = { toolchain.INTELCOMP: '-D__INTEL', toolchain.GCC: '-D__GFORTRAN -D__STD_F95', } dflags.append(comp_fam_dflags[self.toolchain.comp_family()]) if self.toolchain.options.get('openmp', False): libfft = os.getenv('LIBFFT_MT') else: libfft = os.getenv('LIBFFT') if libfft: if "fftw3" in libfft: dflags.append('-D__FFTW3') else: dflags.append('-D__FFTW') env.setvar('FFTW_LIBS', libfft) if get_software_root('ACML'): dflags.append('-D__ACML') if self.toolchain.options.get('usempi', None): dflags.append('-D__MPI -D__PARA') if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']: dflags.append(" -D__OPENMP") if self.cfg['with_scalapack']: dflags.append(" -D__SCALAPACK") # always include -w to supress warnings dflags.append('-w') repls.append(('DFLAGS', ' '.join(dflags), False)) # complete C/Fortran compiler and LD flags if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']: repls.append(('LDFLAGS', self.toolchain.get_flag('openmp'), True)) repls.append(('(?:C|F90|F)FLAGS', self.toolchain.get_flag('openmp'), True)) # obtain library settings libs = [] for lib in ['BLAS', 'LAPACK', 'FFT', 'SCALAPACK']: if self.toolchain.options.get('openmp', False): val = os.getenv('LIB%s_MT' % lib) else: val = os.getenv('LIB%s' % lib) repls.append(('%s_LIBS' % lib, val, False)) libs.append(val) libs = ' '.join(libs) repls.append(('BLAS_LIBS_SWITCH', 'external', False)) repls.append(('LAPACK_LIBS_SWITCH', 'external', False)) repls.append(('LD_LIBS', os.getenv('LIBS'), False)) self.log.debug("List of replacements to perform: %s" % repls) # patch make.sys file fn = os.path.join(self.cfg['start_dir'], 'make.sys') try: for line in fileinput.input(fn, inplace=1, backup='.orig.eb'): for (k, v, keep) in repls: # need to use [ \t]* instead of \s*, because vars may be undefined as empty, # and we don't want to include newlines if keep: line = re.sub(r"^(%s\s*=[ \t]*)(.*)$" % k, r"\1\2 %s" % v, line) else: line = re.sub(r"^(%s\s*=[ \t]*).*$" % k, r"\1%s" % v, line) # fix preprocessing directives for .f90 files in make.sys if required if self.toolchain.comp_family() in [toolchain.GCC]: line = re.sub(r"\$\(MPIF90\) \$\(F90FLAGS\) -c \$<", "$(CPP) -C $(CPPFLAGS) $< -o $*.F90\n" + "\t$(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o", line) sys.stdout.write(line) except IOError, err: raise EasyBuildError("Failed to patch %s: %s", fn, err) self.log.debug("Contents of patched %s: %s" % (fn, open(fn, "r").read())) # patch default make.sys for wannier if LooseVersion(self.version) >= LooseVersion("5"): fn = os.path.join(self.cfg['start_dir'], 'install', 'make_wannier90.sys') else: fn = os.path.join(self.cfg['start_dir'], 'plugins', 'install', 'make_wannier90.sys') try: for line in fileinput.input(fn, inplace=1, backup='.orig.eb'): line = re.sub(r"^(LIBS\s*=\s*).*", r"\1%s" % libs, line) sys.stdout.write(line) except IOError, err: raise EasyBuildError("Failed to patch %s: %s", fn, err) self.log.debug("Contents of patched %s: %s" % (fn, open(fn, "r").read())) # patch Makefile of want plugin wantprefix = 'want-' wantdirs = [d for d in os.listdir(self.builddir) if d.startswith(wantprefix)] if len(wantdirs) > 1: raise EasyBuildError("Found more than one directory with %s prefix, help!", wantprefix) if len(wantdirs) != 0: wantdir = os.path.join(self.builddir, wantdirs[0]) make_sys_in_path = None cand_paths = [os.path.join('conf', 'make.sys.in'), os.path.join('config', 'make.sys.in')] for path in cand_paths: full_path = os.path.join(wantdir, path) if os.path.exists(full_path): make_sys_in_path = full_path break if make_sys_in_path is None: raise EasyBuildError("Failed to find make.sys.in in want directory %s, paths considered: %s", wantdir, ', '.join(cand_paths)) try: for line in fileinput.input(make_sys_in_path, inplace=1, backup='.orig.eb'): # fix preprocessing directives for .f90 files in make.sys if required if self.toolchain.comp_family() in [toolchain.GCC]: line = re.sub("@[email protected]", "$(CPP) -C $(CPPFLAGS) $< -o $*.F90\n" + "\t$(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o", line) sys.stdout.write(line) except IOError, err: raise EasyBuildError("Failed to patch %s: %s", fn, err) # move non-espresso directories to where they're expected and create symlinks try: dirnames = [d for d in os.listdir(self.builddir) if not d.startswith('espresso')] targetdir = os.path.join(self.builddir, "espresso-%s" % self.version) for dirname in dirnames: shutil.move(os.path.join(self.builddir, dirname), os.path.join(targetdir, dirname)) self.log.info("Moved %s into %s" % (dirname, targetdir)) dirname_head = dirname.split('-')[0] linkname = None if dirname_head == 'sax': linkname = 'SaX' if dirname_head == 'wannier90': linkname = 'W90' elif dirname_head in ['gipaw', 'plumed', 'want', 'yambo']: linkname = dirname_head.upper() if linkname: os.symlink(os.path.join(targetdir, dirname), os.path.join(targetdir, linkname)) except OSError, err: raise EasyBuildError("Failed to move non-espresso directories: %s", err)
2
Example 29
View licensedef make_python_identifier(string, namespace=None, reserved_words=None, convert='drop', handle='force'): """ Takes an arbitrary string and creates a valid Python identifier. If the input string is in the namespace, return its value. If the python identifier created is already in the namespace, but the input string is not (ie, two similar strings resolve to the same python identifier) or if the identifier is a reserved word in the reserved_words list, or is a python default reserved word, adds _1, or if _1 is in the namespace, _2, etc. Parameters ---------- string : <basestring> The text to be converted into a valid python identifier namespace : <dictionary> Map of existing translations into python safe identifiers. This is to ensure that two strings are not translated into the same python identifier reserved_words : <list of strings> List of words that are reserved (because they have other meanings in this particular program, such as also being the names of libraries, etc. convert : <string> Tells the function what to do with characters that are not valid in python identifiers - 'hex' implies that they will be converted to their hexidecimal representation. This is handy if you have variables that have a lot of reserved characters, or you don't want the name to be dependent on when things were added to the namespace - 'drop' implies that they will just be dropped altogether handle : <string> Tells the function how to deal with namespace conflicts - 'force' will create a representation which is not in conflict by appending _n to the resulting variable where n is the lowest number necessary to avoid a conflict - 'throw' will raise an exception Returns ------- identifier : <string> A vaild python identifier based on the input string namespace : <dictionary> An updated map of the translations of words to python identifiers, including the passed in 'string'. Examples -------- >>> make_python_identifier('Capital') ('capital', {'Capital': 'capital'}) >>> make_python_identifier('multiple words') ('multiple_words', {'multiple words': 'multiple_words'}) >>> make_python_identifier('multiple spaces') ('multiple_spaces', {'multiple spaces': 'multiple_spaces'}) When the name is a python keyword, add '_1' to differentiate it >>> make_python_identifier('for') ('for_1', {'for': 'for_1'}) Remove leading and trailing whitespace >>> make_python_identifier(' whitespace ') ('whitespace', {' whitespace ': 'whitespace'}) Remove most special characters outright: >>> make_python_identifier('[email protected] tr!ck') ('ht_trck', {'[email protected] tr!ck': 'ht_trck'}) Replace special characters with their hex representations >>> make_python_identifier('[email protected] tr!ck', convert='hex') ('h40t_tr21ck', {'[email protected] tr!ck': 'h40t_tr21ck'}) remove leading digits >>> make_python_identifier('123abc') ('abc', {'123abc': 'abc'}) already in namespace >>> make_python_identifier('Variable$', namespace={'Variable$': 'variable'}) ('variable', {'Variable$': 'variable'}) namespace conflicts >>> make_python_identifier('Variable$', namespace={'[email protected]': 'variable'}) ('variable_1', {'[email protected]': 'variable', 'Variable$': 'variable_1'}) >>> make_python_identifier('Variable$', namespace={'[email protected]': 'variable', >>> 'Variable%': 'variable_1'}) ('variable_2', {'[email protected]': 'variable', 'Variable%': 'variable_1', 'Variable$': 'variable_2'}) throw exception instead >>> make_python_identifier('Variable$', namespace={'[email protected]': 'variable'}, handle='throw') Traceback (most recent call last): ... NameError: variable already exists in namespace or is a reserved word References ---------- Identifiers must follow the convention outlined here: https://docs.python.org/2/reference/lexical_analysis.html#identifiers """ if namespace is None: namespace = dict() if reserved_words is None: reserved_words = list() if string in namespace: return namespace[string], namespace # create a working copy (and make it lowercase, while we're at it) s = string.lower() # remove leading and trailing whitespace s = s.strip() # Make spaces into underscores s = re.sub('[\\s\\t\\n]+', '_', s) if convert == 'hex': # Convert invalid characters to hex s = ''.join([c.encode("hex") if re.findall('[^0-9a-zA-Z_]', c) else c for c in s]) elif convert == 'drop': # Remove invalid characters s = re.sub('[^0-9a-zA-Z_]', '', s) # Remove leading characters until we find a letter or underscore s = re.sub('^[^a-zA-Z_]+', '', s) # Check that the string is not a python identifier while (s in keyword.kwlist or s in namespace.values() or s in reserved_words): if handle == 'throw': raise NameError(s + ' already exists in namespace or is a reserved word') if handle == 'force': if re.match(".*?_\d+$", s): i = re.match(".*?_(\d+)$", s).groups()[0] s = s.strip('_' + i) + '_' + str(int(i) + 1) else: s += '_1' namespace[string] = s return s, namespace
2
Example 30
View licensedef linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False, parse_email=False, tokenizer=HTMLSanitizer): """Convert URL-like strings in an HTML fragment to links. linkify() converts strings that look like URLs or domain names in a blob of text that may be an HTML fragment to links, while preserving (a) links already in the string, (b) urls found in attributes, and (c) email addresses. """ text = force_unicode(text) if not text: return '' parser = html5lib.HTMLParser(tokenizer=tokenizer) forest = parser.parseFragment(text) _seen = set([]) def replace_nodes(tree, new_frag, node, index=0): """ Doesn't really replace nodes, but inserts the nodes contained in new_frag into the treee at position index and returns the number of nodes inserted. If node is passed in, it is removed from the tree """ count = 0 new_tree = parser.parseFragment(new_frag) # capture any non-tag text at the start of the fragment if new_tree.text: if index == 0: tree.text = tree.text or '' tree.text += new_tree.text else: tree[index - 1].tail = tree[index - 1].tail or '' tree[index - 1].tail += new_tree.text # the put in the tagged elements into the old tree for n in new_tree: if n.tag == ETREE_TAG('a'): _seen.add(n) tree.insert(index + count, n) count += 1 # if we got a node to remove... if node is not None: tree.remove(node) return count def strip_wrapping_parentheses(fragment): """Strips wrapping parentheses. Returns a tuple of the following format:: (string stripped from wrapping parentheses, count of stripped opening parentheses, count of stripped closing parentheses) """ opening_parentheses = closing_parentheses = 0 # Count consecutive opening parentheses # at the beginning of the fragment (string). for char in fragment: if char == '(': opening_parentheses += 1 else: break if opening_parentheses: newer_frag = '' # Cut the consecutive opening brackets from the fragment. fragment = fragment[opening_parentheses:] # Reverse the fragment for easier detection of parentheses # inside the URL. reverse_fragment = fragment[::-1] skip = False for char in reverse_fragment: # Remove the closing parentheses if it has a matching # opening parentheses (they are balanced). if (char == ')' and closing_parentheses < opening_parentheses and not skip): closing_parentheses += 1 continue # Do not remove ')' from the URL itself. elif char != ')': skip = True newer_frag += char fragment = newer_frag[::-1] return fragment, opening_parentheses, closing_parentheses def apply_callbacks(attrs, new): for cb in callbacks: attrs = cb(attrs, new) if attrs is None: return None return attrs def _render_inner(node): out = ['' if node.text is None else node.text] for subnode in node: out.append(_render(subnode)) if subnode.tail: out.append(subnode.tail) return ''.join(out) def linkify_nodes(tree, parse_text=True): children = len(tree) current_child = -1 # start at -1 to process the parent first while current_child < len(tree): if current_child < 0: node = tree if parse_text and node.text: new_txt = old_txt = node.text if parse_email: new_txt = re.sub(email_re, email_repl, node.text) if new_txt and new_txt != node.text: node.text = '' adj = replace_nodes(tree, new_txt, None, 0) children += adj current_child += adj linkify_nodes(tree, True) continue new_txt = re.sub(url_re, link_repl, new_txt) if new_txt != old_txt: node.text = '' adj = replace_nodes(tree, new_txt, None, 0) children += adj current_child += adj continue else: node = tree[current_child] if parse_text and node.tail: new_tail = old_tail = node.tail if parse_email: new_tail = re.sub(email_re, email_repl, new_tail) if new_tail != node.tail: node.tail = '' adj = replace_nodes(tree, new_tail, None, current_child + 1) # Insert the new nodes made from my tail into # the tree right after me. current_child+1 children += adj continue new_tail = re.sub(url_re, link_repl, new_tail) if new_tail != old_tail: node.tail = '' adj = replace_nodes(tree, new_tail, None, current_child + 1) children += adj if node.tag == ETREE_TAG('a') and not (node in _seen): if not node.get('href', None) is None: attrs = dict(node.items()) _text = attrs['_text'] = _render_inner(node) attrs = apply_callbacks(attrs, False) if attrs is None: # <a> tag replaced by the text within it adj = replace_nodes(tree, _text, node, current_child) current_child -= 1 # pull back current_child by 1 to scan the # new nodes again. else: text = force_unicode(attrs.pop('_text')) for attr_key, attr_val in attrs.items(): node.set(attr_key, attr_val) for n in reversed(list(node)): node.remove(n) text = parser.parseFragment(text) node.text = text.text for n in text: node.append(n) _seen.add(node) elif current_child >= 0: if node.tag == ETREE_TAG('pre') and skip_pre: linkify_nodes(node, False) elif not (node in _seen): linkify_nodes(node, True) current_child += 1 def email_repl(match): addr = match.group(0).replace('"', '"') link = { '_text': addr, 'href': 'mailto:{0!s}'.format(addr), } link = apply_callbacks(link, True) if link is None: return addr _href = link.pop('href') _text = link.pop('_text') repl = '<a href="{0!s}" {1!s}>{2!s}</a>' attr = '{0!s}="{1!s}"' attribs = ' '.join(attr.format(k, v) for k, v in link.items()) return repl.format(_href, attribs, _text) def link_repl(match): url = match.group(0) open_brackets = close_brackets = 0 if url.startswith('('): _wrapping = strip_wrapping_parentheses(url) url, open_brackets, close_brackets = _wrapping end = '' m = re.search(punct_re, url) if m: end = m.group(0) url = url[0:m.start()] if re.search(proto_re, url): href = url else: href = ''.join(['http://', url]) link = { '_text': url, 'href': href, } link = apply_callbacks(link, True) if link is None: return '(' * open_brackets + url + ')' * close_brackets _text = link.pop('_text') _href = link.pop('href') repl = '{0!s}<a href="{1!s}" {2!s}>{3!s}</a>{4!s}{5!s}' attr = '{0!s}="{1!s}"' attribs = ' '.join(attr.format(k, v) for k, v in link.items()) return repl.format('(' * open_brackets, _href, attribs, _text, end, ')' * close_brackets) try: linkify_nodes(forest) except RuntimeError as e: # If we hit the max recursion depth, just return what we've got. log.exception('Probable recursion error: {0!r}'.format(e)) return _render(forest)
2
Example 31
View licensedef clean_html(content_fn, out_fn, author_names): import lxml.etree # The HTML file contains the entire HTML page from CRS.gov that the report was # scraped from. Extract just the report content, dropping the CRS.gov header/footer. with open(content_fn) as f: content = f.read() # Some reports are invalid HTML with a whole doctype and html node inside # the main report container element. See if this is one of those documents. extract_blockquote = ('<div class="Report"><!DOCTYPE' in content) # Extract the report itself from the whole page. content = html5lib.parse(content, treebuilder="lxml") content = content.find(".//*[@class='Report']") if content is None: raise ValueError("HTML page doesn't contain an element with the Report CSS class") if extract_blockquote: content = content.find("{http://www.w3.org/1999/xhtml}blockquote") if content is None: raise ValueError("HTML page didn't have the expected blockquote.") content.tag = "div" # Remove the XHTML namespace to make processing easier. for tag in [content] + content.findall(".//*"): if isinstance(tag.tag, str): # is an element tag.tag = tag.tag.replace("{http://www.w3.org/1999/xhtml}", "") # Scrub content and adjust some tags. allowed_classes = { 'ReportHeader' } def scrub_text(text): # Scrub crs.gov email addresses from the text. # There's a separate filter later for addresses in mailto: links. text = re.sub(r"[a-zA-Z0-9_!#\$%&\'\*\+\-/=\?\^`\{\|\}~][email protected]\.(loc\.)?gov", "[email address scrubbed]", text) # Scrub CRS telephone numbers --- in 7-xxxx format. We have to exclude # cases that have a precediing digit, because otherwise we match # strings like "2007-2009". But the number can also occur at the start # of a node, so it may be the start of a string. text = re.sub(r"(^|[^\d])7-\d\d\d\d", r"\1[phone number scrubbed]", text) # Scrub all telephone numbers --- in (xxx) xxx-xxxx format. text = re.sub(r"\(\d\d\d\) \d\d\d-\d\d\d\d", "[phone number scrubbed]", text) # Scrub all author names. text = re.sub("|".join([re.escape(an) for an in author_names]), "[author name scrubbed]", text) return text for tag in [content] + content.findall(".//*"): # Skip non-element nodes. if not isinstance(tag.tag, str): continue # Scrub the text. if tag.text is not None: tag.text = scrub_text(tag.text) if tag.tail is not None: tag.tail = scrub_text(tag.tail) css_classes = set(tag.attrib.get('class', '').split(" ")) # Modern reports have a ReportHeader node with title, authors, date, report number, # and an internal link to just past the table of contents. Since we are scrubbing # author names, we must remove at least that. We also want to remove that internal # link and replace the title with an <h1> tag. if "ReportHeader" in css_classes: for node in tag: node_css_classes = set(node.attrib.get('class', '').split(" ")) if "Title" in node_css_classes: node.tag = "h1" elif "CoverDate" in node_css_classes: pass # keep this one else: node.getparent().remove(node) # Older reports had a "titleline" class for the title. if "titleline" in css_classes: tag.tag = "h1" css_classes.add("Title") # so the h1 doesn't get demoted below # Older reports had an "authorline" with author names, which we scrub by # removing completely. if "authorline" in css_classes: tag.getparent().remove(tag) # Older reports had a "Print Version" link, which we can remove. if tag.tag == "a" and tag.text == "Print Version": tag.getparent().remove(tag) # Scrub mailto: links, which have author emails, which we want to scrub, # as well as email addresses of other people mentioned in the reports. if 'href' in tag.attrib and tag.attrib['href'].lower().startswith("mailto:"): tag.tag = "span" del tag.attrib['href'] tag.text = "[email address scrubbed]" for n in tag: # remove all child nodes tag.remove(n) # Demote h#s. These seem to occur around the table of contents only. Don't # demote the one we just made above for the title. if tag.tag in ("h1", "h2", "h3", "h4", "h5") and "Title" not in css_classes: tag.tag = "h" + str(int(tag.tag[1:])+1) # Turn some classes into h#s. for cls in css_classes: if cls in ("Heading1", "Heading2", "Heading3", "Heading4", "Heading5"): tag.tag = "h" + str(int(cls[7:])+1) if cls == "SummaryHeading": tag.tag = "h2" # Sanitize CSS classes using the whitelist above. if "class" in tag.attrib: new_classes = " ".join(sorted(set(tag.attrib["class"].split(" ")) & allowed_classes)) if new_classes: tag.attrib["class"] = new_classes else: del tag.attrib["class"] # Serialize back to XHTML. content = lxml.etree.tostring(content, encoding=str, method="html") # Guard against unsafe content. import bleach def link_filter(name, value): if name in ("name", "class"): return True # "name" is for link targets if name == "href" and (value.startswith("http:") or value.startswith("https:") or value.startswith("#")): return True return False def image_filter(name, value): if name in ("class",): return True if name == "src" and (value.startswith("http:") or value.startswith("https:")): return True return False content = bleach.clean( content, tags=["a", "img", "b", "strong", "i", "em", "u", "sup", "sub", "span", "div", "p", "br", "ul", "ol", "li", "table", "thead", "tbody", "tr", "th", "td", "hr", "h1", "h2", "h3", "h4", "h5", "h6"], attributes={ "*": ["title", "class"], "a": link_filter, "img": image_filter, "td": ["colspan", "rowspan"], "th": ["colspan", "rowspan"], } ) # Write it out. with open(out_fn, "w") as f2: f2.write(content)
2
Example 32
View licensedef _loadProgress(self): #print('Progress') #print(self.url) if 'kissanime' in self.url: cookie_file = '/tmp/AnimeWatch/kcookie.txt' elif 'kisscartoon' in self.url: cookie_file = '/tmp/AnimeWatch/kcookieC.txt' elif 'kissasian' in self.url: cookie_file = '/tmp/AnimeWatch/kcookieD.txt' elif 'masterani' in self.url: cookie_file = '/tmp/AnimeWatch/animeSquare.txt' elif 'animeget' in self.url: cookie_file = '/tmp/AnimeWatch/animeget.txt' elif 'animeplace' in self.url: cookie_file = '/tmp/AnimeWatch/animeplace.txt' elif 'moetube' in self.url: cookie_file = '/tmp/AnimeWatch/animeHQ.txt' elif 'nyaa' in self.url: cookie_file = '/tmp/AnimeWatch/nyaa.txt' if 'moetube' in self.url: txt_file = '/tmp/AnimeWatch/moetube.txt' frame = self.mainFrame() html = frame.toHtml() #print(html) if 'var glink = ' in html: if os.path.exists(txt_file): f = open(txt_file,'a') else: f = open(txt_file,'w') f.write(html) f.close() if self.cnt == 0 and os.path.exists(cookie_file) and ('kisscartoon' in self.url or 'kissasian' in self.url): frame = self.mainFrame() html = frame.toHtml() soup = BeautifulSoup(html,'lxml') m = soup.findAll('select',{'id':'selectQuality'}) if m: print(m) arr = [] for i in m: j = i.findAll('option') for k in j: l = k['value'] #print(l) arr.append(l) total_q = len(arr) if arr: print('----------total Different Quality Video------',total_q) if self.quality == 'sd': txt = arr[-1] elif self.quality == 'hd': if total_q == 1: txt = arr[-1] elif total_q == 2: txt = arr[-2] elif total_q == 3 or total_q == 4: txt = arr[-3] elif self.quality == 'sd480p': if total_q == 1: txt = arr[-1] elif total_q == 2 or total_q == 3 or total_q == 4: txt = arr[-2] doc = frame.documentElement() bt = doc.findFirst("select[id=selectQuality]") #txt = arr[-1] bt.evaluateJavaScript('this.value="'+txt+'"') self.cnt = 1 listCookies = self.networkAccessManager().cookieJar().allCookies() #print(listCookies) n = [] m = '' o = '' for cookie in listCookies: k=cookie.toRawForm() #k = getContentUnicode(k) k = re.sub("b'","'",str(k)) #print(k) j = re.findall("'[^']*",k) for i in j: i = re.sub("'",'',i) if 'kissanime.to' in i or 'kissasian.com' in i or 'kisscartoon.me' in i or 'masterani.me' in i or 'animeget.io' in i or 'animeplace.co' in i or 'moetube.net' in i or 'nyaa.se' in i: j = re.findall('expires=[^;]*',i) if j: l = re.sub('expires=','',j[0]) d = datetime.strptime(l,"%a, %d-%b-%Y %H:%M:%S %Z") t = calendar.timegm(d.timetuple()) i = i+'; expiry='+str(int(t)) else: i = i+'; expiry='+str(0) n.append(i) #print(n) cfc='' cfd ='' asp = '' idt = '' test_idt = '' clr = False for i in n: if 'cf_clearance' in i: clr = True #print(n) if clr: for i in n: if 'cf_clearance' in i: cfc = self.cookie_split(i) elif '__cfduid' in i: cfd = self.cookie_split(i) elif 'ASP.NET_SessionId' in i: asp = self.cookie_split(i) elif 'idtz' in i: idt = self.cookie_split(i) if cfc and cfd: #print(cfc) #print(cfd) #print(asp) str1 = cfc['domain']+' '+cfc['HttpOnly']+' '+cfc['path']+' '+'FALSE'+' '+cfc['expiry']+' '+'cf_clearance'+' '+cfc['cf_clearance'] str2 = cfd['domain']+' '+cfd['HttpOnly']+' '+cfd['path']+' '+'FALSE'+' '+cfd['expiry']+' '+'__cfduid'+' '+cfd['__cfduid'] if asp: str3 = asp['domain']+' '+'FALSE'+' '+asp['path']+' '+'FALSE'+' '+asp['expiry']+' '+'ASP.NET_SessionId'+' '+asp['ASP.NET_SessionId'] else: str3 = '' if idt: str3 = idt['domain']+' '+'FALSE'+' '+idt['path']+' '+'FALSE'+' '+idt['expiry']+' '+'idtz'+' '+idt['idtz'] else: str3 = '' if 'kissasian' in self.url: str3 = 'kissasian.com FALSE / FALSE 0 __test' if not os.path.exists('/tmp/AnimeWatch'): os.makedirs('/tmp/AnimeWatch') f = open(cookie_file,'w') if str3: f.write(str2+'\n'+str1+'\n'+str3) else: f.write(str2+'\n'+str1) f.close()
2
Example 33
View licensedef run(self): x = open(os.path.join(self.reportDir, self.msgFile)) msg = email.message_from_file(x) # open the eml file so we can parse ie x.close() ################# Header Information ################### # Get me all the sections then write them as one big sql line dateLine = msg.get('Date') msg_id = int(os.path.basename(self.reportDir)) # Unique id for this email used to cross ref other tables try: fromAdd = msg['from'] # might need to tidy this up a little bit using the address parse option except: fromAdd = msg['from'] stringIt = str(fromAdd) dbFrom = stringIt[stringIt.find("<")+1:stringIt.find(">")] # very messy need to fix this. addDomain = dbFrom[dbFrom.find("@")+1:] try: subjectLine = unicode(msg['subject'], errors = 'replace') except: subjectLine = msg['subject'] x_mailer = msg['X-Mailer'] x_priority = msg['X-Priority'] try: message_id = re.sub('[<>]', '', msg['Message-ID']) except: message_id = msg['Message-ID'] hops = msg.get_all('Received') if hops: for hop in hops: hop = re.sub('[<>]', '', hop) sqlHop = (msg_id, hop) db.parseHops(sqlHop) try: sender = re.sub('[<>]', '', msg.get('From')) # remove <> so it renders correctly in the HTML except: sender = dbFrom try: to_add = re.sub('[<>]', '', msg.get('To')) # except: to_add = msg.get('To') try: cc_add = re.sub('[<>]', '', msg.get('cc')) except: cc_add = msg.get('cc') try: bcc_add = re.sub('[<>]', '', msg.get('Bcc')) except: bcc_add = msg.get('bcc') sqlHeader = ( msg_id, dateLine, sender, addDomain, subjectLine, x_mailer, x_priority, message_id, cc_add, bcc_add, to_add) db.parseHeader(sqlHeader) counter = 0 for part in msg.walk(): if part.get_content_maintype() == 'multipart': continue if part.get_content_type() == 'text/plain': # Plain Text Body contents = part.get_payload(decode=True) links = re.findall(r'(https?://\S+)', contents) link_type = "url" for urls in links: sqlUrl = (msg_id, link_type, urls) db.parseLinks(sqlUrl) from core.cleanHtml import cleanHTML htmlStrip = cleanHTML().safe_html(contents) if htmlStrip is not None: fp = open(os.path.join(self.reportDir, "attatchments", "body.txt"), 'wb') fp.write(htmlStrip.encode('ascii', 'ignore')) fp.close() if part.get_content_type() == 'text/html': # HTML Body contents = part.get_payload(decode=True) soup = BeautifulSoup(contents) for link in soup.find_all('a'): link_type = "url" urls = link.get('href') sqlUrl = (msg_id, link_type, urls) db.parseLinks(sqlUrl) for images in soup.find_all('img'): link_type = "img" image = images.get('src') sqlImg = (msg_id, link_type, image) db.parseLinks(sqlImg) for iframes in soup.find_all('iframe'): link_type = "iframe" frames = "Fix Me" sqlFrames = (msg_id, link_type, frames) db.parseLinks(sqlFrames) from core.cleanHtml import cleanHTML htmlStrip = cleanHTML().safe_html(contents) if htmlStrip is not None: fp = open(os.path.join(self.reportDir, "attatchments", "htmlbody.txt"), 'wb') fp.write(htmlStrip.encode('ascii', 'ignore')) fp.close() if part.get('Content-Disposition') is None: # Actual File attatchments here continue from bs4 import UnicodeDammit filenameraw = str(part.get_filename()) dammit = UnicodeDammit(filenameraw) enctype = dammit.original_encoding if enctype == "ascii": filename = dammit.unicode_markup else: ext = mimetypes.guess_extension(part.get_content_type()) filename = '%s-encoded-File-%s.%s' % (enctype, counter, ext) if filename == 'None': # if theres no name then guess the extension and make something up ext = mimetypes.guess_extension(part.get_content_type()) if not ext: ext = ".bin" filename = 'part-%03d%s' % (counter, ext) counter +=1 fp = open(os.path.join(self.reportDir, "attatchments", filename), 'wb') # write the attatchment out to a folder # Deal With Zero Size Files if part.get_payload(decode=True) is None: part_data = "This is a Zero Byte File" fp.write(part_data) fp.close() else: fp.write(part.get_payload(decode=True)) fp.close() part_data = part.get_payload(decode=True) fileSize = os.path.getsize(os.path.join(self.reportDir, "attatchments", filename)) fileExt = os.path.splitext(os.path.join(self.reportDir, "attatchments", filename)) md5Hash = MailHash().HashMD5(part_data) sha256Hash = MailHash().HashSha256(part_data) if ssdeepcheck == '1': # check to see if users has enabled ssdeep try: #gracefull fail if the python wrapper is not installed. ssdHash = MailHash().Hashssdeep(part_data) except: ssdHash = "0" else: ssdHash = "0" import core.yarascan filetoScan = os.path.join(self.reportDir, "attatchments", filename) result = core.yarascan.fileScan(filetoScan, md5Hash, msg_id) match = '0' if result: yaraMatch = '3' match = '3' else: yaraMatch = '0' # database stuff here sqlAttatchments = (msg_id, str(filename), fileExt[1][1:], fileSize, md5Hash, sha256Hash, ssdHash, yaraMatch) db.parseAttatch(sqlAttatchments) sqlYara = (counter, match, msg_id) db.parseYara(sqlYara)
2
Example 34
View licensedef run(self): x = open(os.path.join(self.reportDir, self.msgFile)) msg = email.message_from_file(x) # open the eml file so we can parse ie x.close() ################# Header Information ################### # Get me all the sections then write them as one big sql line dateLine = msg.get('Date') msg_id = int(os.path.basename(self.reportDir)) # Unique id for this email used to cross ref other tables try: fromAdd = msg['from'] # might need to tidy this up a little bit using the address parse option except: fromAdd = msg['from'] stringIt = str(fromAdd) dbFrom = stringIt[stringIt.find("<")+1:stringIt.find(">")] # very messy need to fix this. addDomain = dbFrom[dbFrom.find("@")+1:] try: subjectLine = unicode(msg['subject'], errors = 'replace') except: subjectLine = msg['subject'] x_mailer = msg['X-Mailer'] x_priority = msg['X-Priority'] try: message_id = re.sub('[<>]', '', msg['Message-ID']) except: message_id = msg['Message-ID'] hops = msg.get_all('Received') if hops: for hop in hops: hop = re.sub('[<>]', '', hop) sqlHop = (msg_id, hop) db.parseHops(sqlHop) try: sender = re.sub('[<>]', '', msg.get('From')) # remove <> so it renders correctly in the HTML except: sender = dbFrom try: to_add = re.sub('[<>]', '', msg.get('To')) # except: to_add = msg.get('To') try: cc_add = re.sub('[<>]', '', msg.get('cc')) except: cc_add = msg.get('cc') try: bcc_add = re.sub('[<>]', '', msg.get('Bcc')) except: bcc_add = msg.get('bcc') sqlHeader = ( msg_id, dateLine, sender, addDomain, subjectLine, x_mailer, x_priority, message_id, cc_add, bcc_add, to_add) db.parseHeader(sqlHeader) counter = 0 for part in msg.walk(): if part.get_content_maintype() == 'multipart': continue if part.get_content_type() == 'text/plain': # Plain Text Body contents = part.get_payload(decode=True) links = re.findall(r'(https?://\S+)', contents) link_type = "url" for urls in links: sqlUrl = (msg_id, link_type, urls) db.parseLinks(sqlUrl) from core.cleanHtml import cleanHTML htmlStrip = cleanHTML().safe_html(contents) if htmlStrip is not None: fp = open(os.path.join(self.reportDir, "attatchments", "body.txt"), 'wb') fp.write(htmlStrip.encode('ascii', 'ignore')) fp.close() if part.get_content_type() == 'text/html': # HTML Body contents = part.get_payload(decode=True) soup = BeautifulSoup(contents) for link in soup.find_all('a'): link_type = "url" urls = link.get('href') sqlUrl = (msg_id, link_type, urls) db.parseLinks(sqlUrl) for images in soup.find_all('img'): link_type = "img" image = images.get('src') sqlImg = (msg_id, link_type, image) db.parseLinks(sqlImg) for iframes in soup.find_all('iframe'): link_type = "iframe" frames = "Fix Me" sqlFrames = (msg_id, link_type, frames) db.parseLinks(sqlFrames) from core.cleanHtml import cleanHTML htmlStrip = cleanHTML().safe_html(contents) if htmlStrip is not None: fp = open(os.path.join(self.reportDir, "attatchments", "htmlbody.txt"), 'wb') fp.write(htmlStrip.encode('ascii', 'ignore')) fp.close() if part.get('Content-Disposition') is None: # Actual File attatchments here continue from bs4 import UnicodeDammit filenameraw = str(part.get_filename()) dammit = UnicodeDammit(filenameraw) enctype = dammit.original_encoding if enctype == "ascii": filename = dammit.unicode_markup else: ext = mimetypes.guess_extension(part.get_content_type()) filename = '%s-encoded-File-%s.%s' % (enctype, counter, ext) if filename == 'None': # if theres no name then guess the extension and make something up ext = mimetypes.guess_extension(part.get_content_type()) if not ext: ext = ".bin" filename = 'part-%03d%s' % (counter, ext) counter +=1 fp = open(os.path.join(self.reportDir, "attatchments", filename), 'wb') # write the attatchment out to a folder # Deal With Zero Size Files if part.get_payload(decode=True) is None: part_data = "This is a Zero Byte File" fp.write(part_data) fp.close() else: fp.write(part.get_payload(decode=True)) fp.close() part_data = part.get_payload(decode=True) fileSize = os.path.getsize(os.path.join(self.reportDir, "attatchments", filename)) fileExt = os.path.splitext(os.path.join(self.reportDir, "attatchments", filename)) md5Hash = MailHash().HashMD5(part_data) sha256Hash = MailHash().HashSha256(part_data) if ssdeepcheck == '1': # check to see if users has enabled ssdeep try: #gracefull fail if the python wrapper is not installed. ssdHash = MailHash().Hashssdeep(part_data) except: ssdHash = "0" else: ssdHash = "0" import core.yarascan filetoScan = os.path.join(self.reportDir, "attatchments", filename) result = core.yarascan.fileScan(filetoScan, md5Hash, msg_id) match = '0' if result: yaraMatch = '3' match = '3' else: yaraMatch = '0' # database stuff here sqlAttatchments = (msg_id, str(filename), fileExt[1][1:], fileSize, md5Hash, sha256Hash, ssdHash, yaraMatch) db.parseAttatch(sqlAttatchments) sqlYara = (counter, match, msg_id) db.parseYara(sqlYara)
2
Example 35
View licensedef test_basic_ensure_statements(self): ensure(range(10)).contains(5) with self.assertRaises(EnsureError): ensure(range(10)).contains(-1) ensure("abc").is_in("abcdef") with self.assertRaises(EnsureError): ensure(range(10)).is_in(-1) ensure("abc").matches("(abc|def)") with self.assertRaises(EnsureError): ensure(range(10)).is_in(-1) x = {x: x for x in range(10)} ok_clauses = ('Ensure(x).contains(0)', 'Ensure(x).contains_all_of(range(10))', 'Ensure(x).contains_no(str)', 'Ensure(x).contains_none_of(range(20, 30))', 'Ensure(x).contains_one_of(range(1))', 'Ensure(x).contains_some_of(range(2))', 'Ensure(x).contains_only(range(10))', 'Ensure(x).does_not_contain(-1)', 'Ensure(x).does_not_equal(range(10))', 'Ensure(x).has_attribute("__iter__")', 'Ensure(x).has_length(10).also.is_nonempty()', 'Ensure(x).has_length(length=10, min=9, max=10)', 'Ensure(x).has_length(max=99.9)', 'Ensure(x).is_nonempty().also.has_length(10)', 'Ensure(x).is_a(collections.Mapping)', 'Ensure(x).is_a_dict_of(int).to(int)', 'Ensure(x).is_a(collections.Mapping).of(int).to(int)', 'Ensure(6).is_greater_than(5)', 'Ensure(6).exceeds(5)', 'Ensure(1.1).is_greater_than_or_equal_to(1.1)', 'Ensure(1.1).is_less_than_or_equal_to(1.1)', 'Ensure(1).is_less_than(1.1)', 'Ensure(1).is_positive()', 'Ensure(1.1).is_a_positive(float)', 'Ensure(-1).is_negative()', 'Ensure(-1).is_a_negative(int)', 'Ensure(0).is_nonnegative()', 'Ensure(0).is_a_nonnegative(int)', 'Ensure(1).is_a_positive(int).which.equals(1.0)', 'Ensure((collections.namedtuple("Thing", ["x"]))(x={})).has_attribute("x").which.is_a(dict)', 'Ensure({1:"a"}).has_key(1).whose_value.has_length(1)', 'Ensure({1: "a", 2: "b", 3: "c"}).has_keys((1, 2))', 'Ensure({1: "a", 2: "b", 3: "c"}).has_only_keys((1, 2, 3))', 'Ensure({}).is_empty()', 'Ensure(os.path.join).called_with("a", "b").returns(os.path.join("a", "b"))', 'Ensure(int).called_with("1100101", base=2).returns(101)', 'Ensure.each_of([1,2,3]).is_an(int)', 'Ensure.each_of([lambda x: x, lambda y: y]).called_with(1).returns(1)', 'Ensure(True).is_none_or.is_an(int)', # See https://www.python.org/dev/peps/pep-0285/ (section 6) 'Ensure(None).is_none_or.is_a_negative(int)', 'Ensure(-5).is_none_or.is_a_negative(int)', 'Ensure({"a": "b"}).is_none_or.has_key("a")', 'Ensure("A").satisfies(str.isupper)', 'Ensure("A").satisfies(".isupper")', 'Ensure("ABC").satisfies(str.startswith, "AB")', 'Ensure("ABC").satisfies(".startswith", "AB")', 'Ensure(3).satisfies(lambda x, y: x < y, y=4)') for clause in ok_clauses: print("Testing OK clause", clause) eval(clause) if 'each_of' not in clause: for sub in r'Check\1.otherwise(Exception)', r'Check\1.or_raise(Exception)', r'Check\1.or_call(self.assertTrue, False)': print("Testing OK clause", re.sub(r'^Ensure(.+)', sub, clause)) eval(re.sub(r'^Ensure(.+)', sub, clause)) bad_clauses = ('Ensure(x).contains(-1)', 'Ensure(x).has_length(10).also.is_empty()', 'Ensure(x).contains_all_of(range(20))', 'Ensure(x).contains_no(int)', 'Ensure(x).contains_none_of(range(0, 30))', 'Ensure(x).contains_one_of(range(2))', 'Ensure(x).contains_some_of(range(20, 30))', 'Ensure(x).contains_only(range(11))', 'Ensure(x).does_not_contain(1)', 'Ensure(x).does_not_equal(x)', 'Ensure(x).does_not_equal(copy.deepcopy(x))', 'Ensure(x).has_attribute("y")', 'Ensure(x).has_length(1)', 'Ensure(x).has_length(length=1, min=9, max=10)', 'Ensure(x).has_length(min=11)', 'Ensure(x).has_length(max=1.1)', 'Ensure(x).is_a(str)', 'Ensure(x).is_empty()', 'Ensure(6).is_greater_than(7)', 'Ensure(6).exceeds(7)', 'Ensure(1).is_greater_than_or_equal_to(1.1)', 'Ensure(None).is_greater_than_or_equal_to(1.1)', 'Ensure(5).is_less_than_or_equal_to(1)', 'Ensure(1).is_less_than(None)', 'Ensure(0).is_positive()', 'Ensure(1).is_a_positive(float)', 'Ensure(1).is_negative()', 'Ensure(-0).is_a_negative(int)', 'Ensure(-0.1).is_nonnegative()', 'Ensure(None).is_a_nonnegative(int)', 'Ensure({1: "a"}).has_key(1).whose_value.has_length(2)', 'Ensure({1: "a"}).has_keys((1, 2))', 'Ensure({1: "a", 2: "b"}).has_only_keys([1])', 'Ensure({1: "a", 2: "b"}).has_only_keys([1, 2, 3])', 'Ensure([1, 2, 3]).has_only_keys([1, 2, 3])', 'Ensure(os.path.join).called_with("a", "b").returns(None)', 'Ensure(1).is_a_positive(int).which.equals(1.2)', 'Ensure.each_of([lambda x: x, lambda y: y]).called_with(2).returns(1)', 'Ensure(5).is_none_or.is_a_negative(int)', 'Ensure(None).is_a_negative(int)', 'Ensure("a").satisfies(str.isupper)', 'Ensure("a").satisfies(".isupper")', 'Ensure("ABC").satisfies(str.startswith, "Z")', 'Ensure("ABC").satisfies(".startswith", "Z")', 'Ensure(5).satisfies(str.isupper)', 'Ensure(5).satisfies(".isupper")') for clause in bad_clauses: print("Testing bad clause", clause) with self.assertRaises(EnsureError): eval(clause) if 'each_of' not in clause: for sub in r'Check\1.otherwise(Exception)', r'Check\1.or_raise(Exception)', r'Check\1.or_call(self.assertTrue, False)': with self.assertRaises(Exception): print("Testing bad clause", re.sub(r'^Ensure(.+)', sub, clause)) eval(re.sub(r'^Ensure(.+)', sub, clause)) with self.assertRaises(EnsureError): Ensure(x).is_a_dict_of(int).to(str) with self.assertRaises(EnsureError): Ensure(x).is_a_dict_of(str).to(int) with self.assertRaises(EnsureError): Ensure(x).called_with().is_an(int) Ensure(lambda: True).is_callable() Ensure("1.1").is_a_numeric_string() with self.assertRaises(EnsureError): Ensure(b"1").is_a_numeric_string() with self.assertRaises(EnsureError): Ensure("").is_a_numeric_string() with self.assertRaises(EnsureError): Ensure(None).is_a_numeric_string() Ensure(b"1").is_a_numeric_bytestring() Ensure(b"1.1").is_a_numeric_bytestring() with self.assertRaises(EnsureError): Ensure("1").is_a_numeric_bytestring() with self.assertRaises(EnsureError): Ensure(b"").is_a_numeric_bytestring() with self.assertRaises(EnsureError): Ensure(None).is_a_numeric_bytestring() Ensure("1").is_an_integer_string() with self.assertRaises(EnsureError): Ensure("1.1").is_an_integer_string() Ensure(b"1").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure(b"1.1").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure("1").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure(b"").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure(None).is_an_integer_bytestring()
2
Example 36
View licensedef test_basic_ensure_statements(self): ensure(range(10)).contains(5) with self.assertRaises(EnsureError): ensure(range(10)).contains(-1) ensure("abc").is_in("abcdef") with self.assertRaises(EnsureError): ensure(range(10)).is_in(-1) ensure("abc").matches("(abc|def)") with self.assertRaises(EnsureError): ensure(range(10)).is_in(-1) x = {x: x for x in range(10)} ok_clauses = ('Ensure(x).contains(0)', 'Ensure(x).contains_all_of(range(10))', 'Ensure(x).contains_no(str)', 'Ensure(x).contains_none_of(range(20, 30))', 'Ensure(x).contains_one_of(range(1))', 'Ensure(x).contains_some_of(range(2))', 'Ensure(x).contains_only(range(10))', 'Ensure(x).does_not_contain(-1)', 'Ensure(x).does_not_equal(range(10))', 'Ensure(x).has_attribute("__iter__")', 'Ensure(x).has_length(10).also.is_nonempty()', 'Ensure(x).has_length(length=10, min=9, max=10)', 'Ensure(x).has_length(max=99.9)', 'Ensure(x).is_nonempty().also.has_length(10)', 'Ensure(x).is_a(collections.Mapping)', 'Ensure(x).is_a_dict_of(int).to(int)', 'Ensure(x).is_a(collections.Mapping).of(int).to(int)', 'Ensure(6).is_greater_than(5)', 'Ensure(6).exceeds(5)', 'Ensure(1.1).is_greater_than_or_equal_to(1.1)', 'Ensure(1.1).is_less_than_or_equal_to(1.1)', 'Ensure(1).is_less_than(1.1)', 'Ensure(1).is_positive()', 'Ensure(1.1).is_a_positive(float)', 'Ensure(-1).is_negative()', 'Ensure(-1).is_a_negative(int)', 'Ensure(0).is_nonnegative()', 'Ensure(0).is_a_nonnegative(int)', 'Ensure(1).is_a_positive(int).which.equals(1.0)', 'Ensure((collections.namedtuple("Thing", ["x"]))(x={})).has_attribute("x").which.is_a(dict)', 'Ensure({1:"a"}).has_key(1).whose_value.has_length(1)', 'Ensure({1: "a", 2: "b", 3: "c"}).has_keys((1, 2))', 'Ensure({1: "a", 2: "b", 3: "c"}).has_only_keys((1, 2, 3))', 'Ensure({}).is_empty()', 'Ensure(os.path.join).called_with("a", "b").returns(os.path.join("a", "b"))', 'Ensure(int).called_with("1100101", base=2).returns(101)', 'Ensure.each_of([1,2,3]).is_an(int)', 'Ensure.each_of([lambda x: x, lambda y: y]).called_with(1).returns(1)', 'Ensure(True).is_none_or.is_an(int)', # See https://www.python.org/dev/peps/pep-0285/ (section 6) 'Ensure(None).is_none_or.is_a_negative(int)', 'Ensure(-5).is_none_or.is_a_negative(int)', 'Ensure({"a": "b"}).is_none_or.has_key("a")', 'Ensure("A").satisfies(str.isupper)', 'Ensure("A").satisfies(".isupper")', 'Ensure("ABC").satisfies(str.startswith, "AB")', 'Ensure("ABC").satisfies(".startswith", "AB")', 'Ensure(3).satisfies(lambda x, y: x < y, y=4)') for clause in ok_clauses: print("Testing OK clause", clause) eval(clause) if 'each_of' not in clause: for sub in r'Check\1.otherwise(Exception)', r'Check\1.or_raise(Exception)', r'Check\1.or_call(self.assertTrue, False)': print("Testing OK clause", re.sub(r'^Ensure(.+)', sub, clause)) eval(re.sub(r'^Ensure(.+)', sub, clause)) bad_clauses = ('Ensure(x).contains(-1)', 'Ensure(x).has_length(10).also.is_empty()', 'Ensure(x).contains_all_of(range(20))', 'Ensure(x).contains_no(int)', 'Ensure(x).contains_none_of(range(0, 30))', 'Ensure(x).contains_one_of(range(2))', 'Ensure(x).contains_some_of(range(20, 30))', 'Ensure(x).contains_only(range(11))', 'Ensure(x).does_not_contain(1)', 'Ensure(x).does_not_equal(x)', 'Ensure(x).does_not_equal(copy.deepcopy(x))', 'Ensure(x).has_attribute("y")', 'Ensure(x).has_length(1)', 'Ensure(x).has_length(length=1, min=9, max=10)', 'Ensure(x).has_length(min=11)', 'Ensure(x).has_length(max=1.1)', 'Ensure(x).is_a(str)', 'Ensure(x).is_empty()', 'Ensure(6).is_greater_than(7)', 'Ensure(6).exceeds(7)', 'Ensure(1).is_greater_than_or_equal_to(1.1)', 'Ensure(None).is_greater_than_or_equal_to(1.1)', 'Ensure(5).is_less_than_or_equal_to(1)', 'Ensure(1).is_less_than(None)', 'Ensure(0).is_positive()', 'Ensure(1).is_a_positive(float)', 'Ensure(1).is_negative()', 'Ensure(-0).is_a_negative(int)', 'Ensure(-0.1).is_nonnegative()', 'Ensure(None).is_a_nonnegative(int)', 'Ensure({1: "a"}).has_key(1).whose_value.has_length(2)', 'Ensure({1: "a"}).has_keys((1, 2))', 'Ensure({1: "a", 2: "b"}).has_only_keys([1])', 'Ensure({1: "a", 2: "b"}).has_only_keys([1, 2, 3])', 'Ensure([1, 2, 3]).has_only_keys([1, 2, 3])', 'Ensure(os.path.join).called_with("a", "b").returns(None)', 'Ensure(1).is_a_positive(int).which.equals(1.2)', 'Ensure.each_of([lambda x: x, lambda y: y]).called_with(2).returns(1)', 'Ensure(5).is_none_or.is_a_negative(int)', 'Ensure(None).is_a_negative(int)', 'Ensure("a").satisfies(str.isupper)', 'Ensure("a").satisfies(".isupper")', 'Ensure("ABC").satisfies(str.startswith, "Z")', 'Ensure("ABC").satisfies(".startswith", "Z")', 'Ensure(5).satisfies(str.isupper)', 'Ensure(5).satisfies(".isupper")') for clause in bad_clauses: print("Testing bad clause", clause) with self.assertRaises(EnsureError): eval(clause) if 'each_of' not in clause: for sub in r'Check\1.otherwise(Exception)', r'Check\1.or_raise(Exception)', r'Check\1.or_call(self.assertTrue, False)': with self.assertRaises(Exception): print("Testing bad clause", re.sub(r'^Ensure(.+)', sub, clause)) eval(re.sub(r'^Ensure(.+)', sub, clause)) with self.assertRaises(EnsureError): Ensure(x).is_a_dict_of(int).to(str) with self.assertRaises(EnsureError): Ensure(x).is_a_dict_of(str).to(int) with self.assertRaises(EnsureError): Ensure(x).called_with().is_an(int) Ensure(lambda: True).is_callable() Ensure("1.1").is_a_numeric_string() with self.assertRaises(EnsureError): Ensure(b"1").is_a_numeric_string() with self.assertRaises(EnsureError): Ensure("").is_a_numeric_string() with self.assertRaises(EnsureError): Ensure(None).is_a_numeric_string() Ensure(b"1").is_a_numeric_bytestring() Ensure(b"1.1").is_a_numeric_bytestring() with self.assertRaises(EnsureError): Ensure("1").is_a_numeric_bytestring() with self.assertRaises(EnsureError): Ensure(b"").is_a_numeric_bytestring() with self.assertRaises(EnsureError): Ensure(None).is_a_numeric_bytestring() Ensure("1").is_an_integer_string() with self.assertRaises(EnsureError): Ensure("1.1").is_an_integer_string() Ensure(b"1").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure(b"1.1").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure("1").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure(b"").is_an_integer_bytestring() with self.assertRaises(EnsureError): Ensure(None).is_an_integer_bytestring()
2
Example 37
View licensedef run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.utils.date import isoformat from calibre.utils.html2text import html2text from calibre.utils.bibtex import BibTeX from calibre.library.save_to_disk import preprocess_template from calibre.utils.date import now as nowf from calibre.utils.logging import default_log as log library_name = os.path.basename(db.library_path) def create_bibtex_entry(entry, fields, mode, template_citation, bibtexdict, db, citation_bibtex=True, calibre_files=True): # Bibtex doesn't like UTF-8 but keep unicode until writing # Define starting chain or if book valid strict and not book return a Fail string bibtex_entry = [] if mode != "misc" and check_entry_book_valid(entry) : bibtex_entry.append(u'@book{') elif mode != "book" : bibtex_entry.append(u'@misc{') else : # case strict book return '' if citation_bibtex : # Citation tag bibtex_entry.append(make_bibtex_citation(entry, template_citation, bibtexdict)) bibtex_entry = [u' '.join(bibtex_entry)] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'],field,index_is_id=True) if isinstance(item, (bool, float, int)): item = repr(item) elif field == 'title_sort': item = entry['sort'] elif field == 'library_name': item = library_name else: item = entry[field] # check if the field should be included (none or empty) if item is None: continue try: if len(item) == 0 : continue except TypeError: pass if field == 'authors' : bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field == 'id' : bibtex_entry.append(u'calibreid = "%s"' % int(item)) elif field == 'rating' : bibtex_entry.append(u'rating = "%s"' % int(item)) elif field == 'size' : bibtex_entry.append(u'%s = "%s octets"' % (field, int(item))) elif field == 'tags' : # A list to flatten bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item))) elif field == 'comments' : # \n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) item = bibtexdict.stripUnmatchedSyntax(item, u'{', u'}') # html to text try: item = html2text(item) except: log.warn("Failed to convert comments to text") bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn' : # Could be 9, 10 or 13 digits bibtex_entry.append(u'isbn = "%s"' % format_isbn(item)) elif field == 'formats' : # Add file path if format is selected formats = [format.rpartition('.')[2].lower() for format in item] bibtex_entry.append(u'formats = "%s"' % u', '.join(formats)) if calibre_files: files = [u':%s:%s' % (format, format.rpartition('.')[2].upper()) for format in item] bibtex_entry.append(u'file = "%s"' % u', '.join(files)) elif field == 'series_index' : bibtex_entry.append(u'volume = "%s"' % int(item)) elif field == 'timestamp' : bibtex_entry.append(u'timestamp = "%s"' % isoformat(item).partition('T')[0]) elif field == 'pubdate' : bibtex_entry.append(u'year = "%s"' % item.year) bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) elif field.startswith('#') and isinstance(item, basestring): bibtex_entry.append(u'custom_%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item))) elif isinstance(item, basestring): # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice', # 'author_sort', 'series', 'title_sort'] : bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry += u' }\n\n' return bibtex_entry def check_entry_book_valid(entry): # Check that the required fields are ok for a book entry for field in ['title', 'authors', 'publisher'] : if entry[field] is None or len(entry[field]) == 0 : return False if entry['pubdate'] is None : return False else : return True def make_bibtex_citation(entry, template_citation, bibtexclass): # define a function to replace the template entry by its value def tpl_replace(objtplname) : tpl_field = re.sub(u'[\{\}]', u'', objtplname.group()) if tpl_field in TEMPLATE_ALLOWED_FIELDS : if tpl_field in ['pubdate', 'timestamp'] : tpl_field = isoformat(entry[tpl_field]).partition('T')[0] elif tpl_field in ['tags', 'authors'] : tpl_field =entry[tpl_field][0] elif tpl_field in ['id', 'series_index'] : tpl_field = str(entry[tpl_field]) else : tpl_field = entry[tpl_field] return tpl_field else: return u'' if len(template_citation) >0 : tpl_citation = bibtexclass.utf8ToBibtex( bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}', tpl_replace, template_citation))) if len(tpl_citation) >0 : return tpl_citation if len(entry["isbn"]) > 0 : template_citation = u'%s' % re.sub(u'[\D]',u'', entry["isbn"]) else : template_citation = u'%s' % str(entry["id"]) return bibtexclass.ValidateCitationKey(template_citation) self.fmt = path_to_output.rpartition('.')[2] self.notification = notification # Combobox options bibfile_enc = ['utf8', 'cp1252', 'ascii'] bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace'] bib_entry = ['mixed', 'misc', 'book'] # Needed beacause CLI return str vs int by widget try: bibfile_enc = bibfile_enc[opts.bibfile_enc] bibfile_enctag = bibfile_enctag[opts.bibfile_enctag] bib_entry = bib_entry[opts.bib_entry] except: if opts.bibfile_enc in bibfile_enc : bibfile_enc = opts.bibfile_enc else : log.warn("Incorrect --choose-encoding flag, revert to default") bibfile_enc = bibfile_enc[0] if opts.bibfile_enctag in bibfile_enctag : bibfile_enctag = opts.bibfile_enctag else : log.warn("Incorrect --choose-encoding-configuration flag, revert to default") bibfile_enctag = bibfile_enctag[0] if opts.bib_entry in bib_entry : bib_entry = opts.bib_entry else : log.warn("Incorrect --entry-type flag, revert to default") bib_entry = bib_entry[0] if opts.verbose: opts_dict = vars(opts) log("%s(): Generating %s" % (self.name,self.fmt)) if opts.connected_device['is_device_connected']: log(" connected_device: %s" % opts.connected_device['name']) if opts_dict['search_text']: log(" --search='%s'" % opts_dict['search_text']) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) if opts_dict['search_text']: log(" (--search ignored when a subset of the database is specified)") if opts_dict['fields']: if opts_dict['fields'] == 'all': log(" Fields: %s" % ', '.join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict['fields']) log(" Output file will be encoded in %s with %s flag" % (bibfile_enc, bibfile_enctag)) log(" BibTeX entry type is %s with a citation like '%s' flag" % (bib_entry, opts_dict['bib_cit'])) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) # Initialize BibTeX class bibtexc = BibTeX() # Entries writing after Bibtex formating (or not) if bibfile_enc != 'ascii' : bibtexc.ascii_bibtex = False else : bibtexc.ascii_bibtex = True # Check citation choice and go to default in case of bad CLI if isinstance(opts.impcit, (StringType, UnicodeType)) : if opts.impcit == 'False' : citation_bibtex= False elif opts.impcit == 'True' : citation_bibtex= True else : log.warn("Incorrect --create-citation, revert to default") citation_bibtex= True else : citation_bibtex= opts.impcit # Check add file entry and go to default in case of bad CLI if isinstance(opts.addfiles, (StringType, UnicodeType)) : if opts.addfiles == 'False' : addfiles_bibtex = False elif opts.addfiles == 'True' : addfiles_bibtex = True else : log.warn("Incorrect --add-files-path, revert to default") addfiles_bibtex= True else : addfiles_bibtex = opts.addfiles # Preprocess for error and light correction template_citation = preprocess_template(opts.bib_cit) # Open output and write entries with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\ as outfile: # File header nb_entries = len(data) # check in book strict if all is ok else throw a warning into log if bib_entry == 'book' : nb_books = len(filter(check_entry_book_valid, data)) if nb_books < nb_entries : log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries)) nb_entries = nb_books # If connected device, add 'On Device' values to data if opts.connected_device['is_device_connected'] and 'ondevice' in fields: for entry in data: entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice'] outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries)) outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n' % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding))) for entry in data: outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation, bibtexc, db, citation_bibtex, addfiles_bibtex))
2
Example 38
View licensedef run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.utils.date import isoformat from calibre.utils.html2text import html2text from calibre.utils.bibtex import BibTeX from calibre.library.save_to_disk import preprocess_template from calibre.utils.date import now as nowf from calibre.utils.logging import default_log as log library_name = os.path.basename(db.library_path) def create_bibtex_entry(entry, fields, mode, template_citation, bibtexdict, db, citation_bibtex=True, calibre_files=True): # Bibtex doesn't like UTF-8 but keep unicode until writing # Define starting chain or if book valid strict and not book return a Fail string bibtex_entry = [] if mode != "misc" and check_entry_book_valid(entry) : bibtex_entry.append(u'@book{') elif mode != "book" : bibtex_entry.append(u'@misc{') else : # case strict book return '' if citation_bibtex : # Citation tag bibtex_entry.append(make_bibtex_citation(entry, template_citation, bibtexdict)) bibtex_entry = [u' '.join(bibtex_entry)] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'],field,index_is_id=True) if isinstance(item, (bool, float, int)): item = repr(item) elif field == 'title_sort': item = entry['sort'] elif field == 'library_name': item = library_name else: item = entry[field] # check if the field should be included (none or empty) if item is None: continue try: if len(item) == 0 : continue except TypeError: pass if field == 'authors' : bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field == 'id' : bibtex_entry.append(u'calibreid = "%s"' % int(item)) elif field == 'rating' : bibtex_entry.append(u'rating = "%s"' % int(item)) elif field == 'size' : bibtex_entry.append(u'%s = "%s octets"' % (field, int(item))) elif field == 'tags' : # A list to flatten bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item))) elif field == 'comments' : # \n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) item = bibtexdict.stripUnmatchedSyntax(item, u'{', u'}') # html to text try: item = html2text(item) except: log.warn("Failed to convert comments to text") bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn' : # Could be 9, 10 or 13 digits bibtex_entry.append(u'isbn = "%s"' % format_isbn(item)) elif field == 'formats' : # Add file path if format is selected formats = [format.rpartition('.')[2].lower() for format in item] bibtex_entry.append(u'formats = "%s"' % u', '.join(formats)) if calibre_files: files = [u':%s:%s' % (format, format.rpartition('.')[2].upper()) for format in item] bibtex_entry.append(u'file = "%s"' % u', '.join(files)) elif field == 'series_index' : bibtex_entry.append(u'volume = "%s"' % int(item)) elif field == 'timestamp' : bibtex_entry.append(u'timestamp = "%s"' % isoformat(item).partition('T')[0]) elif field == 'pubdate' : bibtex_entry.append(u'year = "%s"' % item.year) bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) elif field.startswith('#') and isinstance(item, basestring): bibtex_entry.append(u'custom_%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item))) elif isinstance(item, basestring): # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice', # 'author_sort', 'series', 'title_sort'] : bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry += u' }\n\n' return bibtex_entry def check_entry_book_valid(entry): # Check that the required fields are ok for a book entry for field in ['title', 'authors', 'publisher'] : if entry[field] is None or len(entry[field]) == 0 : return False if entry['pubdate'] is None : return False else : return True def make_bibtex_citation(entry, template_citation, bibtexclass): # define a function to replace the template entry by its value def tpl_replace(objtplname) : tpl_field = re.sub(u'[\{\}]', u'', objtplname.group()) if tpl_field in TEMPLATE_ALLOWED_FIELDS : if tpl_field in ['pubdate', 'timestamp'] : tpl_field = isoformat(entry[tpl_field]).partition('T')[0] elif tpl_field in ['tags', 'authors'] : tpl_field =entry[tpl_field][0] elif tpl_field in ['id', 'series_index'] : tpl_field = str(entry[tpl_field]) else : tpl_field = entry[tpl_field] return tpl_field else: return u'' if len(template_citation) >0 : tpl_citation = bibtexclass.utf8ToBibtex( bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}', tpl_replace, template_citation))) if len(tpl_citation) >0 : return tpl_citation if len(entry["isbn"]) > 0 : template_citation = u'%s' % re.sub(u'[\D]',u'', entry["isbn"]) else : template_citation = u'%s' % str(entry["id"]) return bibtexclass.ValidateCitationKey(template_citation) self.fmt = path_to_output.rpartition('.')[2] self.notification = notification # Combobox options bibfile_enc = ['utf8', 'cp1252', 'ascii'] bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace'] bib_entry = ['mixed', 'misc', 'book'] # Needed beacause CLI return str vs int by widget try: bibfile_enc = bibfile_enc[opts.bibfile_enc] bibfile_enctag = bibfile_enctag[opts.bibfile_enctag] bib_entry = bib_entry[opts.bib_entry] except: if opts.bibfile_enc in bibfile_enc : bibfile_enc = opts.bibfile_enc else : log.warn("Incorrect --choose-encoding flag, revert to default") bibfile_enc = bibfile_enc[0] if opts.bibfile_enctag in bibfile_enctag : bibfile_enctag = opts.bibfile_enctag else : log.warn("Incorrect --choose-encoding-configuration flag, revert to default") bibfile_enctag = bibfile_enctag[0] if opts.bib_entry in bib_entry : bib_entry = opts.bib_entry else : log.warn("Incorrect --entry-type flag, revert to default") bib_entry = bib_entry[0] if opts.verbose: opts_dict = vars(opts) log("%s(): Generating %s" % (self.name,self.fmt)) if opts.connected_device['is_device_connected']: log(" connected_device: %s" % opts.connected_device['name']) if opts_dict['search_text']: log(" --search='%s'" % opts_dict['search_text']) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) if opts_dict['search_text']: log(" (--search ignored when a subset of the database is specified)") if opts_dict['fields']: if opts_dict['fields'] == 'all': log(" Fields: %s" % ', '.join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict['fields']) log(" Output file will be encoded in %s with %s flag" % (bibfile_enc, bibfile_enctag)) log(" BibTeX entry type is %s with a citation like '%s' flag" % (bib_entry, opts_dict['bib_cit'])) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) # Initialize BibTeX class bibtexc = BibTeX() # Entries writing after Bibtex formating (or not) if bibfile_enc != 'ascii' : bibtexc.ascii_bibtex = False else : bibtexc.ascii_bibtex = True # Check citation choice and go to default in case of bad CLI if isinstance(opts.impcit, (StringType, UnicodeType)) : if opts.impcit == 'False' : citation_bibtex= False elif opts.impcit == 'True' : citation_bibtex= True else : log.warn("Incorrect --create-citation, revert to default") citation_bibtex= True else : citation_bibtex= opts.impcit # Check add file entry and go to default in case of bad CLI if isinstance(opts.addfiles, (StringType, UnicodeType)) : if opts.addfiles == 'False' : addfiles_bibtex = False elif opts.addfiles == 'True' : addfiles_bibtex = True else : log.warn("Incorrect --add-files-path, revert to default") addfiles_bibtex= True else : addfiles_bibtex = opts.addfiles # Preprocess for error and light correction template_citation = preprocess_template(opts.bib_cit) # Open output and write entries with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\ as outfile: # File header nb_entries = len(data) # check in book strict if all is ok else throw a warning into log if bib_entry == 'book' : nb_books = len(filter(check_entry_book_valid, data)) if nb_books < nb_entries : log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries)) nb_entries = nb_books # If connected device, add 'On Device' values to data if opts.connected_device['is_device_connected'] and 'ondevice' in fields: for entry in data: entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice'] outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries)) outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n' % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding))) for entry in data: outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation, bibtexc, db, citation_bibtex, addfiles_bibtex))
2
Example 39
View licensedef start(argv): if len(sys.argv) < 4: usage() sys.exit() try: opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcte:") except getopt.GetoptError: usage() sys.exit() start = 0 host_ip = [] filename = "" bingapi = "yes" dnslookup = False dnsbrute = False dnstld = False shodan = False vhost = [] virtual = False limit = 100 dnsserver = "" for opt, arg in opts: if opt == '-l': limit = int(arg) elif opt == '-d': word = arg elif opt == '-s': start = int(arg) elif opt == '-v': virtual = "basic" elif opt == '-f': filename = arg elif opt == '-n': dnslookup = True elif opt == '-c': dnsbrute = True elif opt == '-h': shodan = True elif opt == '-e': dnsserver = arg elif opt == '-t': dnstld = True elif opt == '-b': engine = arg if engine not in ("baidu", "bing", "bingapi","dogpile", "google", "googleCSE", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo", "all"): usage() print "Invalid search engine, try with: baidu, bing, bingapi, dogpile, google, googleCSE, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, all" sys.exit() else: pass if engine == "google": print "[-] Searching in Google:" search = googlesearch.search_google(word, limit, start) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() if engine == "googleCSE": print "[-] Searching in Google Custom Search:" search = googleCSE.search_googleCSE(word, limit, start) search.process() search.store_results() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "bing" or engine == "bingapi": print "[-] Searching in Bing:" search = bingsearch.search_bing(word, limit, start) if engine == "bingapi": bingapi = "yes" else: bingapi = "no" search.process(bingapi) all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "dogpile": print "[-] Searching in Dogpilesearch.." search = dogpilesearch.search_dogpile(word, limit) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "pgp": print "[-] Searching in PGP key server.." search = pgpsearch.search_pgp(word) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "yahoo": print "[-] Searching in Yahoo.." search = yahoosearch.search_yahoo(word, limit) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "baidu": print "[-] Searching in Baidu.." search = baidusearch.search_baidu(word, limit) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "googleplus": print "[-] Searching in Google+ .." search = googleplussearch.search_googleplus(word, limit) search.process() people = search.get_people() print "Users from Google+:" print "====================" for user in people: print user sys.exit() elif engine == "twitter": print "[-] Searching in Twitter .." search = twittersearch.search_twitter(word, limit) search.process() people = search.get_people() print "Users from Twitter:" print "====================" for user in people: print user sys.exit() elif engine == "linkedin": print "[-] Searching in Linkedin.." search = linkedinsearch.search_linkedin(word, limit) search.process() people = search.get_people() print "Users from Linkedin:" print "====================" for user in people: print user sys.exit() elif engine == "google-profiles": print "[-] Searching in Google profiles.." search = googlesearch.search_google(word, limit, start) search.process_profiles() people = search.get_profiles() print "Users from Google profiles:" print "---------------------------" for users in people: print users sys.exit() elif engine == "all": print "Full harvest.." all_emails = [] all_hosts = [] virtual = "basic" print "[-] Searching in Google.." search = googlesearch.search_google(word, limit, start) search.process() emails = search.get_emails() hosts = search.get_hostnames() all_emails.extend(emails) all_hosts.extend(hosts) print "[-] Searching in PGP Key server.." search = pgpsearch.search_pgp(word) search.process() emails = search.get_emails() hosts = search.get_hostnames() all_hosts.extend(hosts) all_emails.extend(emails) print "[-] Searching in Bing.." bingapi = "no" search = bingsearch.search_bing(word, limit, start) search.process(bingapi) emails = search.get_emails() hosts = search.get_hostnames() all_hosts.extend(hosts) all_emails.extend(emails) print "[-] Searching in Exalead.." search = exaleadsearch.search_exalead(word, limit, start) search.process() emails = search.get_emails() hosts = search.get_hostnames() all_hosts.extend(hosts) all_emails.extend(emails) #Clean up email list, sort and uniq all_emails=sorted(set(all_emails)) #Results############################################################ print "\n\n[+] Emails found:" print "------------------" if all_emails == []: print "No emails found" else: print "\n".join(all_emails) print "\n[+] Hosts found in search engines:" print "------------------------------------" if all_hosts == []: print "No hosts found" else: all_hosts=sorted(set(all_hosts)) print "[-] Resolving hostnames IPs... " full_host = hostchecker.Checker(all_hosts) full = full_host.check() for host in full: ip = host.split(':')[0] print host if host_ip.count(ip.lower()): pass else: host_ip.append(ip.lower()) #DNS reverse lookup################################################# dnsrev = [] if dnslookup == True: print "\n[+] Starting active queries:" analyzed_ranges = [] for x in full: ip = x.split(":")[0] range = ip.split(".") range[3] = "0/24" range = string.join(range, '.') if not analyzed_ranges.count(range): print "[-]Performing reverse lookup in :" + range a = dnssearch.dns_reverse(range, True) a.list() res = a.process() analyzed_ranges.append(range) else: continue for x in res: if x.count(word): dnsrev.append(x) if x not in full: full.append(x) print "Hosts found after reverse lookup:" print "---------------------------------" for xh in dnsrev: print xh #DNS Brute force#################################################### dnsres = [] if dnsbrute == True: print "\n[-] Starting DNS brute force:" a = dnssearch.dns_force(word, dnsserver, verbose=True) res = a.process() print "\n[+] Hosts found after DNS brute force:\n" for y in res: print y dnsres.append(y) if y not in full: full.append(y) #DNS TLD expansion################################################### dnstldres = [] if dnstld == True: print "[-] Starting DNS TLD expansion:" a = dnssearch.dns_tld(word, dnsserver, verbose=True) res = a.process() print "\n[+] Hosts found after DNS TLD expansion:" print "==========================================" for y in res: print y dnstldres.append(y) if y not in full: full.append(y) #Virtual hosts search############################################### if virtual == "basic": print "[+] Virtual hosts:" print "==================" for l in host_ip: search = bingsearch.search_bing(l, limit, start) search.process_vhost() res = search.get_allhostnames() for x in res: x = re.sub(r'[[\<\/?]*[\w]*>]*','',x) x = re.sub('<','',x) x = re.sub('>','',x) print l + "\t" + x vhost.append(l + ":" + x) full.append(l + ":" + x) vhost=sorted(set(vhost)) else: pass shodanres = [] shodanvisited = [] if shodan == True: print "[+] Shodan Database search:" for x in full: print x try: ip = x.split(":")[0] if not shodanvisited.count(ip): print "\tSearching for: " + x a = shodansearch.search_shodan(ip) shodanvisited.append(ip) results = a.run() for res in results: shodanres.append( x + "SAPO" + str(res['banner']) + "SAPO" + str(res['port'])) except: pass print "[+] Shodan results:" print "===================" for x in shodanres: print x.split("SAPO")[0] + ":" + x.split("SAPO")[1] else: pass ################################################################### # Here i need to add explosion mode. # Tengo que sacar los TLD para hacer esto. recursion = None if recursion: start = 0 for word in vhost: search = googlesearch.search_google(word, limit, start) search.process() emails = search.get_emails() hosts = search.get_hostnames() print emails print hosts else: pass #Reporting####################################################### if filename != "": try: print "[+] Saving files..." html = htmlExport.htmlExport( all_emails, full, vhost, dnsres, dnsrev, filename, word, shodanres, dnstldres) save = html.writehtml() except Exception as e: print e print "Error creating the file" try: filename = filename.split(".")[0] + ".xml" file = open(filename, 'w') file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>') for x in all_emails: file.write('<email>' + x + '</email>') for x in full: x = x.split(":") if len(x) == 2: file.write('<host>' + '<ip>' + x[0] + '</ip><hostname>' + x[1] + '</hostname>' + '</host>') else: file.write('<host>' + x + '</host>') for x in vhost: x = x.split(":") if len(x) == 2: file.write('<vhost>' + '<ip>' + x[0] + '</ip><hostname>' + x[1] + '</hostname>' + '</vhost>') else: file.write('<vhost>' + x + '</vhost>') if shodanres != []: shodanalysis = [] for x in shodanres: res = x.split("SAPO") # print " res[0] " + res[0] # ip/host # print " res[1] " + res[1] # banner/info # print " res[2] " + res[2] # port file.write('<shodan>') #page.h3(res[0]) file.write('<host>' + res[0] + '</host>') #page.a("Port :" + res[2]) file.write('<port>' + res[2] + '</port>') #page.pre(res[1]) file.write('<banner><!--' + res[1] + '--></banner>') reg_server = re.compile('Server:.*') temp = reg_server.findall(res[1]) if temp != []: shodanalysis.append(res[0] + ":" + temp[0]) file.write('</shodan>') if shodanalysis != []: shodanalysis=sorted(set(shodanalysis)) file.write('<servers>') for x in shodanalysis: #page.pre(x) file.write('<server>' + x + '</server>') file.write('</servers>') file.write('</theHarvester>') file.flush() file.close() print "Files saved!" except Exception as er: print "Error saving XML file: " + er sys.exit()
2
Example 40
View licensedef start(argv): if len(sys.argv) < 4: usage() sys.exit() try: opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcte:") except getopt.GetoptError: usage() sys.exit() start = 0 host_ip = [] filename = "" bingapi = "yes" dnslookup = False dnsbrute = False dnstld = False shodan = False vhost = [] virtual = False limit = 100 dnsserver = "" for opt, arg in opts: if opt == '-l': limit = int(arg) elif opt == '-d': word = arg elif opt == '-s': start = int(arg) elif opt == '-v': virtual = "basic" elif opt == '-f': filename = arg elif opt == '-n': dnslookup = True elif opt == '-c': dnsbrute = True elif opt == '-h': shodan = True elif opt == '-e': dnsserver = arg elif opt == '-t': dnstld = True elif opt == '-b': engine = arg if engine not in ("baidu", "bing", "bingapi","dogpile", "google", "googleCSE", "googleplus", "google-profiles","linkedin", "pgp", "twitter", "vhost", "yahoo", "all"): usage() print "Invalid search engine, try with: baidu, bing, bingapi, dogpile, google, googleCSE, googleplus, google-profiles, linkedin, pgp, twitter, vhost, yahoo, all" sys.exit() else: pass if engine == "google": print "[-] Searching in Google:" search = googlesearch.search_google(word, limit, start) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() if engine == "googleCSE": print "[-] Searching in Google Custom Search:" search = googleCSE.search_googleCSE(word, limit, start) search.process() search.store_results() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "bing" or engine == "bingapi": print "[-] Searching in Bing:" search = bingsearch.search_bing(word, limit, start) if engine == "bingapi": bingapi = "yes" else: bingapi = "no" search.process(bingapi) all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "dogpile": print "[-] Searching in Dogpilesearch.." search = dogpilesearch.search_dogpile(word, limit) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "pgp": print "[-] Searching in PGP key server.." search = pgpsearch.search_pgp(word) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "yahoo": print "[-] Searching in Yahoo.." search = yahoosearch.search_yahoo(word, limit) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "baidu": print "[-] Searching in Baidu.." search = baidusearch.search_baidu(word, limit) search.process() all_emails = search.get_emails() all_hosts = search.get_hostnames() elif engine == "googleplus": print "[-] Searching in Google+ .." search = googleplussearch.search_googleplus(word, limit) search.process() people = search.get_people() print "Users from Google+:" print "====================" for user in people: print user sys.exit() elif engine == "twitter": print "[-] Searching in Twitter .." search = twittersearch.search_twitter(word, limit) search.process() people = search.get_people() print "Users from Twitter:" print "====================" for user in people: print user sys.exit() elif engine == "linkedin": print "[-] Searching in Linkedin.." search = linkedinsearch.search_linkedin(word, limit) search.process() people = search.get_people() print "Users from Linkedin:" print "====================" for user in people: print user sys.exit() elif engine == "google-profiles": print "[-] Searching in Google profiles.." search = googlesearch.search_google(word, limit, start) search.process_profiles() people = search.get_profiles() print "Users from Google profiles:" print "---------------------------" for users in people: print users sys.exit() elif engine == "all": print "Full harvest.." all_emails = [] all_hosts = [] virtual = "basic" print "[-] Searching in Google.." search = googlesearch.search_google(word, limit, start) search.process() emails = search.get_emails() hosts = search.get_hostnames() all_emails.extend(emails) all_hosts.extend(hosts) print "[-] Searching in PGP Key server.." search = pgpsearch.search_pgp(word) search.process() emails = search.get_emails() hosts = search.get_hostnames() all_hosts.extend(hosts) all_emails.extend(emails) print "[-] Searching in Bing.." bingapi = "no" search = bingsearch.search_bing(word, limit, start) search.process(bingapi) emails = search.get_emails() hosts = search.get_hostnames() all_hosts.extend(hosts) all_emails.extend(emails) print "[-] Searching in Exalead.." search = exaleadsearch.search_exalead(word, limit, start) search.process() emails = search.get_emails() hosts = search.get_hostnames() all_hosts.extend(hosts) all_emails.extend(emails) #Clean up email list, sort and uniq all_emails=sorted(set(all_emails)) #Results############################################################ print "\n\n[+] Emails found:" print "------------------" if all_emails == []: print "No emails found" else: print "\n".join(all_emails) print "\n[+] Hosts found in search engines:" print "------------------------------------" if all_hosts == []: print "No hosts found" else: all_hosts=sorted(set(all_hosts)) print "[-] Resolving hostnames IPs... " full_host = hostchecker.Checker(all_hosts) full = full_host.check() for host in full: ip = host.split(':')[0] print host if host_ip.count(ip.lower()): pass else: host_ip.append(ip.lower()) #DNS reverse lookup################################################# dnsrev = [] if dnslookup == True: print "\n[+] Starting active queries:" analyzed_ranges = [] for x in full: ip = x.split(":")[0] range = ip.split(".") range[3] = "0/24" range = string.join(range, '.') if not analyzed_ranges.count(range): print "[-]Performing reverse lookup in :" + range a = dnssearch.dns_reverse(range, True) a.list() res = a.process() analyzed_ranges.append(range) else: continue for x in res: if x.count(word): dnsrev.append(x) if x not in full: full.append(x) print "Hosts found after reverse lookup:" print "---------------------------------" for xh in dnsrev: print xh #DNS Brute force#################################################### dnsres = [] if dnsbrute == True: print "\n[-] Starting DNS brute force:" a = dnssearch.dns_force(word, dnsserver, verbose=True) res = a.process() print "\n[+] Hosts found after DNS brute force:\n" for y in res: print y dnsres.append(y) if y not in full: full.append(y) #DNS TLD expansion################################################### dnstldres = [] if dnstld == True: print "[-] Starting DNS TLD expansion:" a = dnssearch.dns_tld(word, dnsserver, verbose=True) res = a.process() print "\n[+] Hosts found after DNS TLD expansion:" print "==========================================" for y in res: print y dnstldres.append(y) if y not in full: full.append(y) #Virtual hosts search############################################### if virtual == "basic": print "[+] Virtual hosts:" print "==================" for l in host_ip: search = bingsearch.search_bing(l, limit, start) search.process_vhost() res = search.get_allhostnames() for x in res: x = re.sub(r'[[\<\/?]*[\w]*>]*','',x) x = re.sub('<','',x) x = re.sub('>','',x) print l + "\t" + x vhost.append(l + ":" + x) full.append(l + ":" + x) vhost=sorted(set(vhost)) else: pass shodanres = [] shodanvisited = [] if shodan == True: print "[+] Shodan Database search:" for x in full: print x try: ip = x.split(":")[0] if not shodanvisited.count(ip): print "\tSearching for: " + x a = shodansearch.search_shodan(ip) shodanvisited.append(ip) results = a.run() for res in results: shodanres.append( x + "SAPO" + str(res['banner']) + "SAPO" + str(res['port'])) except: pass print "[+] Shodan results:" print "===================" for x in shodanres: print x.split("SAPO")[0] + ":" + x.split("SAPO")[1] else: pass ################################################################### # Here i need to add explosion mode. # Tengo que sacar los TLD para hacer esto. recursion = None if recursion: start = 0 for word in vhost: search = googlesearch.search_google(word, limit, start) search.process() emails = search.get_emails() hosts = search.get_hostnames() print emails print hosts else: pass #Reporting####################################################### if filename != "": try: print "[+] Saving files..." html = htmlExport.htmlExport( all_emails, full, vhost, dnsres, dnsrev, filename, word, shodanres, dnstldres) save = html.writehtml() except Exception as e: print e print "Error creating the file" try: filename = filename.split(".")[0] + ".xml" file = open(filename, 'w') file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>') for x in all_emails: file.write('<email>' + x + '</email>') for x in full: x = x.split(":") if len(x) == 2: file.write('<host>' + '<ip>' + x[0] + '</ip><hostname>' + x[1] + '</hostname>' + '</host>') else: file.write('<host>' + x + '</host>') for x in vhost: x = x.split(":") if len(x) == 2: file.write('<vhost>' + '<ip>' + x[0] + '</ip><hostname>' + x[1] + '</hostname>' + '</vhost>') else: file.write('<vhost>' + x + '</vhost>') if shodanres != []: shodanalysis = [] for x in shodanres: res = x.split("SAPO") # print " res[0] " + res[0] # ip/host # print " res[1] " + res[1] # banner/info # print " res[2] " + res[2] # port file.write('<shodan>') #page.h3(res[0]) file.write('<host>' + res[0] + '</host>') #page.a("Port :" + res[2]) file.write('<port>' + res[2] + '</port>') #page.pre(res[1]) file.write('<banner><!--' + res[1] + '--></banner>') reg_server = re.compile('Server:.*') temp = reg_server.findall(res[1]) if temp != []: shodanalysis.append(res[0] + ":" + temp[0]) file.write('</shodan>') if shodanalysis != []: shodanalysis=sorted(set(shodanalysis)) file.write('<servers>') for x in shodanalysis: #page.pre(x) file.write('<server>' + x + '</server>') file.write('</servers>') file.write('</theHarvester>') file.flush() file.close() print "Files saved!" except Exception as er: print "Error saving XML file: " + er sys.exit()
2
Example 41
View licensedef executemechanize(urldict): url = urldict["url"] url_no = urldict["counter"] #Array of redirections threadlocal.__setattr__('redirection_list', []) # Mechanize Settings br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(False) br.set_handle_robots(False) br.set_debug_responses(True) br.set_debug_redirects(True) br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=0) br.set_proxies(honeypotconfig.proxy) br.encoding = "UTF-8" # Add HTTP Basic/Digest auth username and password for HTTP proxy access. # (equivalent to using "joe:[email protected]" form above) # br.add_proxy_password("username", "password") # Set header, referrer, accept language from honeypotconfig if honeypotconfig.referrer: br.addheaders = [('User-Agent', honeypotconfig.useragent),('Accept', 'text/html,application/xhtml+xml,application/xml,text/javascript;q=0.9,*/*;q=0.8'),('Accept-Language', honeypotconfig.acceptlang),('Accept-Encoding', 'gzip,deflate'),('Referer', honeypotconfig.referrer)] else: br.addheaders = [('User-Agent', honeypotconfig.useragent),('Accept', 'text/html,application/xhtml+xml,application/xml,text/javascript;q=0.9,*/*;q=0.8'),('Accept-Language', honeypotconfig.acceptlang),('Accept-Encoding', 'gzip,deflate'),('Referer', host)] #'https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source='+url)] cj.add_cookie_header(br) try: r = br.open(url, timeout=12.0) # Parse url (url after redirections) scheme, host, path, params, query, fragment = urlparse.urlparse(r.geturl()) # Print redirection route if exist threadlocal.__setattr__('redirect', mechanize._redirection.redirection()) # Extract and format URL extracted = tldextract.extract(url) # print extracted #formatted = "{}.{}".format(extracted.domain, extracted.tld) formatted = "{}.{}.{}".format(extracted.subdomain, extracted.domain, extracted.suffix) print formatted # Extract each link in the redirection list and match it aginst the formatted URL for eachredirect in threadlocal.redirection_list: list_extract = tldextract.extract(eachredirect) list_format = "{}.{}.{}".format(list_extract.subdomain, list_extract.domain, list_extract.suffix) # print list_format if list_format == formatted: pass if not list_format == formatted: if threadlocal.redirection_list: logger.info(str(url_no) + ",\t" + url + ",\t" + "Redirection Route" + ",\t" +str(threadlocal.redirection_list)) break #if threadlocal.redirection_list: #logger.info(str(url_no) + ",\t" + url + ",\t" + "Redirection Route" + ",\t" +str(threadlocal.redirection_list)) # Convert url into valid file name fdirname = urllib.quote_plus(url) if (len(fdirname) > 250): fdirname = fdirname[:247] # Folder Generation # Gets first character of website to store alphabetically first_char = re.sub(r"(http://|https://)?(www.)?", "", url)[:1] second_char = re.sub(r"(http://|https://)?(www.)?", "", url)[1:3] directory_name = os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder, first_char, second_char, fdirname) # If using proxy, names directory in the format ip_address:port if honeypotconfig.proxy: proxy_name = re.search(r":\s?['\"](.*)\s?['\"]", str(honeypotconfig.proxy)).group(1) directory_name = os.path.join(honeypotconfig.wdir, proxy_name, first_char, second_char, fdirname) create_directory(directory_name) # Fetch array of javascript url jsurl_list_old, jsurl_list, url_list = js_extraction(br.response().read(), scheme, host) # Remove duplicates jsurl_list_unique = set(jsurl_list) del jsurl_list[:] # Modify javascript paths in html if relative path fp = open(os.path.join(directory_name, fdirname), "wb") new_js_path = br.response().read() for link in jsurl_list_old: if not link.lower().startswith(("www.","http://","https://")): js_name=link[link.rfind("/") + 1:] new_js_path = re.sub(re.escape(link), "./javascripts/" + js_name, new_js_path) fp.write(new_js_path) fp.close() del jsurl_list_old[:] # Grab the current extension of the file and check the true extension # Rename if differ current_ext = os.path.splitext(os.path.join(directory_name, fdirname))[1] guess_ext = mimetypes.guess_extension(magic.from_file(os.path.join(directory_name, fdirname), mime=True)) if (guess_ext is not current_ext and guess_ext is not None): os.rename((os.path.join(directory_name, fdirname)), (os.path.join(directory_name, fdirname)) + str(guess_ext)) #Fetching .js Files if len(jsurl_list_unique) != 0: create_directory(os.path.join(directory_name, "javascripts")) for link in jsurl_list_unique: try: r = br.open(link, timeout=12.0) logger.info(str(url_no) + ",\t" + url + ",\tJS retrieve,\t" + link) js_name = link[link.rfind("/") + 1:] response = br.response().read() # If it doesn't end with ".js" eg. "abc.js?key=123" truncate after "?" if not js_name.endswith(".js"): js_name = js_name[0:js_name.rfind("?")] # Writes js file js_file_path = os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder, first_char, second_char, fdirname, "javascripts", js_name) if honeypotconfig.proxy: proxyname = re.search(r":\s?['\"](.*)\s?['\"]", str(honeypotconfig.proxy)).group(1) js_file_path = os.path.join(honeypotconfig.wdir, proxyname, first_char, second_char, fdirname, "javascripts", js_name) jswrite = open(js_file_path, 'w') jswrite.write(response) if honeypotconfig.jsbeautifier: jswrite.write("\n====================================================\n") jswrite.write("====================Beautified Below================\n") with open(js_file_path , 'a') as f: beautify_script_string = jsbeautifier.beautify(response, opts) f.write(str(beautify_script_string)) jswrite.close() except Exception, e: try: logger.error(str(url_no) + ",\t" + url.strip() + ",\t" + str(e) + ",\t" + link, extra = {'error_code' : str(e.code)}) except AttributeError: logger.error(str(url_no) + ",\t" + url.strip() + ",\t" + str(e) + ",\t" + link, extra = {'error_code' : ""}) jsurl_list_unique.clear() # Check for executable files and saves them exe_list = [] if exe_crawler: exe_list = exe_extraction(url_list) if len(exe_list) != 0: create_directory(os.path.join(directory_name, "exe")) for link in exe_list: try: # Read header to check for exe size # Only downloads if less than a threshold (set in honeypotconfig) r = urllib2.urlopen(link, timeout=12) size = int(r.headers["Content-Length"]) / 1024 exename = link[link.rfind("/") + 1:] if size < honeypotconfig.exe_max_size: logger.info(str(url_no) + ",\t" + url + ",\t" + "EXE retrieve,\t" + link) exe_file_path = os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder, first_char, second_char, fdirname, "exe", exename) if honeypotconfig.proxy: proxyname = re.search(r":\s?['\"](.*)\s?['\"]", str(honeypotconfig.proxy)).group(1) exe_file_path = os.path.join(honeypotconfig.wdir, proxyname, first_char, second_char, fdirname, "exe", js_name) exewrite = open(exe_file_path, 'w') exewrite.write(br.response().read()) exewrite.close() else: logger.info(str(url_no) + ",\t" + url + ",\t" + "EXE " + str(size) + "KB above exe_max_size" + ",\t" + link) except Exception, e: try: logger.error(str(url_no) + ",\t" + url.strip() + ",\t" + str(e) + ",\t" + link, extra = {'error_code' : str(e.code)}) except AttributeError: logger.error(str(url_no) + ",\t" + url.strip() + ",\t" + str(e) + ",\t" + link, extra = {'error_code' : ""}) del exe_list[:] del url_list[:] except Exception, e: try: logger.error(str(url_no) + ",\t" + url.strip() + "\tpoop" + ",\t" + str(e), extra = {'error_code' : str(e.code)}) except AttributeError: # logger.error(str(url_no) + ",\t" + url.strip() + "\tpoop" +",\t" + "Error 418: I'm a teapot", extra = {'error_code' : ""}) # else: logger.error(str(url_no) + ",\t" + url.strip() + "\tpoop" +",\t" + str(e), extra = {'error_code' : ""})
2
Example 42
View licensedef clone(args): image = args["image"] hostname = args["hostname"] format = None if args.has_key("format"): format = args["format"] conn = libvirt.open(None) dom = conn.lookupByName(image) desc = fromstring(dom.XMLDesc(libvirt.VIR_DOMAIN_XML_SECURE)) sources = [] for disk in desc.findall(".//disk"): if disk.get("device") == "disk": sources.append(disk.find(".//source").get("file")) target_paths = [] for source in sources: target_file = os.path.basename(source) target_file = target_file.replace(image, hostname) target_dir = _select_most_free_dir(conn) if not target_dir: target_dir = os.path.dirname(source) target_paths.append(os.path.join(target_dir, target_file)) cmdline = [ "virt-clone", "-o", image, "-n", hostname, ] for path in target_paths: cmdline.append("-f") cmdline.append(path) proc = subprocess.Popen( cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() message = None status = 1 if proc.returncode: status = 2 message = stderr g = guestfs.GuestFS() for path in target_paths: g.add_drive(path) g.launch() roots = g.inspect_os() mountpoints = g.inspect_get_mountpoints(roots[0]) mountpoints.sort() for mountpoint in mountpoints: g.mount(mountpoint[1], mountpoint[0]) ostype = None if g.exists('/etc/redhat-release'): ostype = 'redhat' elif g.exists('/etc/debian_version'): ostype = 'debian' # TODO: OS 毎に別モジュールにする if ostype == 'redhat': ifcfg = '''DEVICE=%s BOOTPROTO=dhcp ONBOOT=yes TYPE="Ethernet" DHCP_HOSTNAME=%s ''' network = '''NETWORKING=yes HOSTNAME=%s ''' ifcfg0 = ifcfg % ('eth0', hostname) network = network % (hostname) g.write_file('/etc/sysconfig/network-scripts/ifcfg-eth0', ifcfg0, 0) g.write_file('/etc/sysconfig/network', network, 0) g.write_file('/etc/udev/rules.d/70-persistent-net.rules', '', 0) if g.exists('/etc/sysconfig/network-scripts/ifcfg-eth1'): ifcfg1 = ifcfg % ('eth1', re.sub(r"\.pb$", ".pblan", hostname)) g.write_file( '/etc/sysconfig/network-scripts/ifcfg-eth1', ifcfg1, 0) elif ostype == 'debian': g.write_file('/etc/hosts', '127.0.0.1 localhost', 0) g.write_file('/etc/hostname', hostname, 0) g.write_file('/etc/udev/rules.d/70-persistent-net.rules', '', 0) interface = ''' interface "%s" { send host-name "%s"; } ''' eth0 = interface % ('eth0', hostname) eth1 = interface % ('eth1', re.sub(r"\.pb$", ".pblan", hostname)) conf = g.read_file('/etc/dhcp/dhclient.conf') g.write_file('/etc/dhcp/dhclient.conf', conf + eth0 + eth1, 0) shadow = g.read_file("/etc/shadow") g.write_file("/etc/shadow", re.sub( r"^root:[^:]+:", "root:$1$ZJsvbRbB$dWzQZuu8dDFR8wr6PTPjp0:", shadow), 0) if format == "vmdk": grub = g.read_file("/boot/grub/grub.conf") g.write_file("/boot/grub/grub.conf", re.sub( r"console=[^\s]+", "", grub), 0) g.sync() g.umount_all() dom = conn.lookupByName(hostname) if args["start"] and format != "vmdk": dom.create() if format == "vmdk": vmdk_path = "/var/www/html/maglica/%s.vmdk" % hostname cmdline = [ "qemu-img", "convert", "-f", "raw", "-O", "vmdk", target_paths[0], vmdk_path, ] proc = subprocess.Popen( cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode: status = 2 message = stderr else: message = "Get vmdk file from http://%s/maglica/%s.vmdk" % ( socket.gethostname(), hostname) remove({"name": hostname}) if status == 1 and not message: message = "%s was cloned from %s on %s successfully" % ( hostname, image, socket.gethostname()) return { "message": message, "status": status, }
2
Example 43
View licensedef clone(args): image = args["image"] hostname = args["hostname"] format = None if args.has_key("format"): format = args["format"] conn = libvirt.open(None) dom = conn.lookupByName(image) desc = fromstring(dom.XMLDesc(libvirt.VIR_DOMAIN_XML_SECURE)) sources = [] for disk in desc.findall(".//disk"): if disk.get("device") == "disk": sources.append(disk.find(".//source").get("file")) target_paths = [] for source in sources: target_file = os.path.basename(source) target_file = target_file.replace(image, hostname) target_dir = _select_most_free_dir(conn) if not target_dir: target_dir = os.path.dirname(source) target_paths.append(os.path.join(target_dir, target_file)) cmdline = [ "virt-clone", "-o", image, "-n", hostname, ] for path in target_paths: cmdline.append("-f") cmdline.append(path) proc = subprocess.Popen( cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() message = None status = 1 if proc.returncode: status = 2 message = stderr g = guestfs.GuestFS() for path in target_paths: g.add_drive(path) g.launch() roots = g.inspect_os() mountpoints = g.inspect_get_mountpoints(roots[0]) mountpoints.sort() for mountpoint in mountpoints: g.mount(mountpoint[1], mountpoint[0]) ostype = None if g.exists('/etc/redhat-release'): ostype = 'redhat' elif g.exists('/etc/debian_version'): ostype = 'debian' # TODO: OS 毎に別モジュールにする if ostype == 'redhat': ifcfg = '''DEVICE=%s BOOTPROTO=dhcp ONBOOT=yes TYPE="Ethernet" DHCP_HOSTNAME=%s ''' network = '''NETWORKING=yes HOSTNAME=%s ''' ifcfg0 = ifcfg % ('eth0', hostname) network = network % (hostname) g.write_file('/etc/sysconfig/network-scripts/ifcfg-eth0', ifcfg0, 0) g.write_file('/etc/sysconfig/network', network, 0) g.write_file('/etc/udev/rules.d/70-persistent-net.rules', '', 0) if g.exists('/etc/sysconfig/network-scripts/ifcfg-eth1'): ifcfg1 = ifcfg % ('eth1', re.sub(r"\.pb$", ".pblan", hostname)) g.write_file( '/etc/sysconfig/network-scripts/ifcfg-eth1', ifcfg1, 0) elif ostype == 'debian': g.write_file('/etc/hosts', '127.0.0.1 localhost', 0) g.write_file('/etc/hostname', hostname, 0) g.write_file('/etc/udev/rules.d/70-persistent-net.rules', '', 0) interface = ''' interface "%s" { send host-name "%s"; } ''' eth0 = interface % ('eth0', hostname) eth1 = interface % ('eth1', re.sub(r"\.pb$", ".pblan", hostname)) conf = g.read_file('/etc/dhcp/dhclient.conf') g.write_file('/etc/dhcp/dhclient.conf', conf + eth0 + eth1, 0) shadow = g.read_file("/etc/shadow") g.write_file("/etc/shadow", re.sub( r"^root:[^:]+:", "root:$1$ZJsvbRbB$dWzQZuu8dDFR8wr6PTPjp0:", shadow), 0) if format == "vmdk": grub = g.read_file("/boot/grub/grub.conf") g.write_file("/boot/grub/grub.conf", re.sub( r"console=[^\s]+", "", grub), 0) g.sync() g.umount_all() dom = conn.lookupByName(hostname) if args["start"] and format != "vmdk": dom.create() if format == "vmdk": vmdk_path = "/var/www/html/maglica/%s.vmdk" % hostname cmdline = [ "qemu-img", "convert", "-f", "raw", "-O", "vmdk", target_paths[0], vmdk_path, ] proc = subprocess.Popen( cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if proc.returncode: status = 2 message = stderr else: message = "Get vmdk file from http://%s/maglica/%s.vmdk" % ( socket.gethostname(), hostname) remove({"name": hostname}) if status == 1 and not message: message = "%s was cloned from %s on %s successfully" % ( hostname, image, socket.gethostname()) return { "message": message, "status": status, }
2
Example 44
View licensedef CmIndexFromPage(urllinkpage): urlinkpage = urllib.urlopen(urllinkpage) #print "urllinkpage ", urllinkpage srlinkpage = urlinkpage.read() urlinkpage.close() # remove comments because they sometimes contain wrong links srlinkpage = re.sub('<!--[\s\S]*?-->', ' ', srlinkpage) # <b>Wednesday 5 November 2003</b> #<td colspan=2><font size=+1><b>Wednesday 5 November 2003</b></font></td> # <a href="../cm199900/cmhansrd/vo000309/debindx/00309-x.htm">Oral Questions and Debates</a> # this was when I didn't use the match objects, and prefered this more direct detction thing datelinks = redateindexlinks.findall(srlinkpage) # read the dates and links in order, and associate last date with each matching link sdate = '' for link1 in datelinks: if link1[0]: odate = re.sub('\s', ' ', link1[0]) if odate == 'Wednesday 1 November' and urllinkpage == 'http://www.publications.parliament.uk/pa/cm/cmhn0611.htm': odate = 'Wednesday 1 November 2006' if odate == 'Tuesday 9 November 2008' and sdate=='': odate = 'Tuesday 9 December 2008' if odate == 'Wednesday 10 November 2008' and sdate=='': odate = 'Wednesday 10 December 2008' if odate == 'Tuesday 8 June 2008' and sdate=='': odate = 'Tuesday 8 July 2008' sdate = mx.DateTime.DateTimeFrom(odate).date continue # these come from the special dates (of ranges) listed from above. # any more of these and I'll have to make special code to handle them if link1[1]: if link1[1][0:22] == "<b>Friday 23 July 2004": odate = "1 Sept 2004" # the date quoted on the wrans page elif link1[1][0:27] == "<b>Friday 17 September 2004": odate = "4 October 2004" # the date quoted on the wrans page else: assert False sdate = mx.DateTime.DateTimeFrom(odate).date continue if link1[2]: odate = re.sub(' ', ' ', link1[3]) if link1[3] == 'Friday, 6 February 2003': odate = '7 February 2003' if link1[3] == 'Thursday, 24th February 1999': odate = '25 February 1999' sdate = mx.DateTime.DateTimeFrom(odate).date if sdate < earliestdate: continue uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', link1[2])) typ = "Votes and Proceedings" elif link1[4]: odate = re.sub('\s+', ' ', link1[5].replace(' ', ' ')) sdate = mx.DateTime.DateTimeFrom(odate).date if sdate < earliestdate: continue uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', link1[4])) typ = "Question Book" elif link1[6]: linkhref = link1[6] linktext = link1[7] # the link types by name if not re.search('debate|westminster|written(?i)', linktext): continue if re.search('Chronology', linktext): # print "Chronology:", link continue # get rid of the new index pages if re.search('/indexes/|cmordbk|/business/', linkhref): continue if (re.search('Written Answers received between Friday 26 May and Thursday 1 June\s+2006', linktext)): odate = '2 June 2006' sdate = mx.DateTime.DateTimeFrom(odate).date if not sdate: raise Exception, 'No date for link 1 in: ' + urllinkpage + ' ' + ','.join(link1) if sdate < earliestdate: continue # take out spaces and linefeeds we don't want uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', linkhref)) typ = string.strip(re.sub('\s+', ' ', linktext)) if typ == 'Recess Written Answers': typ = 'Written Answers' elif link1[8]: linkhref = link1[8] linktext = link1[9] if re.match('Written Answers and Statements received between<br>\s*Monday 4 September and Friday 8 September 2006', linktext): odate = '11 September 2006' elif re.match('Written Answers received between<br>\s*Wednesday 26 July and Friday 1 September 2006', linktext): odate = '4 September 2006' elif re.match('Written Answers and Statements received between<br>\s*Monday 11 September and Wednesday 13 September 2006', linktext): odate = '13 September 2006' elif re.match('Written Answers and Statements received between<br>\s*Thursday 14 September and Monday 18 September 2006', linktext): odate = '18 September 2006' elif re.match('Written Answers received between<br>\s*Tuesday 19 September and Friday 29 September 2006', linktext): odate = '2 October 2006' elif re.match('Written Answers received between<br>\s*Wednesday 20 December 2006 and Friday 5 January 2007', linktext): odate = '5 January 2007' elif re.match('Written Answers received between<br>\s*Monday 12 February 2007 and Friday 16 February 2007', linktext): odate = '16 February 2007' elif re.match('Written Answers received between<br>\s*Wednesday 12 February 2007 and Friday 16 February 2007', linktext): odate = '16 February 2007' else: raise Exception, 'No date for link 2 in: ' + urllinkpage + ' ' + ','.join(link1) sdate = mx.DateTime.DateTimeFrom(odate).date uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', linkhref)) typ = 'Written Answers' uind = uind.replace('080227a', '080227') # 21st July 2005 has a link, but there was none if uind == 'http://www.publications.parliament.uk/pa/cm200506/cmhansrd/vo050721/hallindx/50721-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm071203/hallindx/71203-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080218/hallindx/80218-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080225/hallindx/80225-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080229/hallindx/80229-x.htm': continue # 21st June 2005 WHall links to 22nd June if sdate=='2005-06-21' and uind=='http://www.publications.parliament.uk/pa/cm200506/cmhansrd/vo050622/hallindx/50622-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200506/cmhansrd/vo050621/hallindx/50621-x.htm' # 25th May 2006 WMS links to 4th May if sdate=='2006-05-25' and uind=='http://www.publications.parliament.uk/pa/cm200506/cmhansrd/cm060504/wmsindx/60504-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200506/cmhansrd/cm060525/wmsindx/60525-x.htm' if sdate=='2007-06-28' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070627/wmsindx/70628-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070628/wmsindx/70628-x.htm' if sdate=='2007-02-26' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070129/index/70129-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070226/index/70226-x.htm' if sdate=='2007-02-26' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070129/wmsindx/70129-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070226/wmsindx/70226-x.htm' if sdate=='2007-02-26' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070129/debindx/70129-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070226/debindx/70226-x.htm' if sdate=='2007-01-19' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070126/index/70126-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070119/index/70119-x.htm' if sdate=='2007-01-19' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070126/wmsindx/70126-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070119/wmsindx/70119-x.htm' if sdate=='2007-01-19' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070126/debindx/70126-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070119/debindx/70119-x.htm' if sdate=='2007-10-23' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm071016/debindx/71023-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm071023/debindx/71023-x.htm' if sdate=='2007-11-15' and uind=='http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm071114/debindx/71115-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm071115/debindx/71115-x.htm' if sdate=='2008-01-15' and uind=='http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080116/index/80115-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080115/index/80115-x.htm' # 7th May 2008 debates links to 8th May if sdate=='2008-05-07' and uind=='http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080508/debindx/80508-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080507/debindx/80507-x.htm' if sdate>='2006-12-05' and sdate<='2006-12-14' and typ=='Westminster Hall': uind = uind.replace('200506', '200607') # check for repeats where the URLs differ if (sdate, typ) in reses: rc = reses[(sdate, typ)] otheruind = rc[0] if otheruind == uind: continue # sometimes they have old links to the cm edition as # well as the vo edition, we pick the newer vo ones # make sure that discrepancies are explainable test1 = uind.replace('cmhansrd/cm', 'cmhansrd/vo') test2 = otheruind.replace('cmhansrd/cm', 'cmhansrd/vo') if test1 != test2: raise Exception, '------\nRepeated link to %s %s differs:\nurl1: %s\nurl2: %s\nfrom index page1: %s\nindex2: %s\n------' % (sdate, typ, uind, otheruind, urllinkpage, rc[1]) # case of two URLs the same only vo/cm differ like this: # (which is a bug in Hansard, should never happen) #http://www.publications.parliament.uk/pa/cm200203/cmhansrd/vo031006/index/31006-x.htm #http://www.publications.parliament.uk/pa/cm200203/cmhansrd/cm031006/index/31006-x.htm # we replace both with just the vo edition: #print "done replace of these two URLs into the vo one\nurl1: %s\nurl2: %s" % (uind, otheruind) uind = test1 reses[(sdate, typ)] = (uind, urllinkpage)
2
Example 45
View licensedef CmIndexFromPage(urllinkpage): urlinkpage = urllib.urlopen(urllinkpage) #print "urllinkpage ", urllinkpage srlinkpage = urlinkpage.read() urlinkpage.close() # remove comments because they sometimes contain wrong links srlinkpage = re.sub('<!--[\s\S]*?-->', ' ', srlinkpage) # <b>Wednesday 5 November 2003</b> #<td colspan=2><font size=+1><b>Wednesday 5 November 2003</b></font></td> # <a href="../cm199900/cmhansrd/vo000309/debindx/00309-x.htm">Oral Questions and Debates</a> # this was when I didn't use the match objects, and prefered this more direct detction thing datelinks = redateindexlinks.findall(srlinkpage) # read the dates and links in order, and associate last date with each matching link sdate = '' for link1 in datelinks: if link1[0]: odate = re.sub('\s', ' ', link1[0]) if odate == 'Wednesday 1 November' and urllinkpage == 'http://www.publications.parliament.uk/pa/cm/cmhn0611.htm': odate = 'Wednesday 1 November 2006' if odate == 'Tuesday 9 November 2008' and sdate=='': odate = 'Tuesday 9 December 2008' if odate == 'Wednesday 10 November 2008' and sdate=='': odate = 'Wednesday 10 December 2008' if odate == 'Tuesday 8 June 2008' and sdate=='': odate = 'Tuesday 8 July 2008' sdate = mx.DateTime.DateTimeFrom(odate).date continue # these come from the special dates (of ranges) listed from above. # any more of these and I'll have to make special code to handle them if link1[1]: if link1[1][0:22] == "<b>Friday 23 July 2004": odate = "1 Sept 2004" # the date quoted on the wrans page elif link1[1][0:27] == "<b>Friday 17 September 2004": odate = "4 October 2004" # the date quoted on the wrans page else: assert False sdate = mx.DateTime.DateTimeFrom(odate).date continue if link1[2]: odate = re.sub(' ', ' ', link1[3]) if link1[3] == 'Friday, 6 February 2003': odate = '7 February 2003' if link1[3] == 'Thursday, 24th February 1999': odate = '25 February 1999' sdate = mx.DateTime.DateTimeFrom(odate).date if sdate < earliestdate: continue uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', link1[2])) typ = "Votes and Proceedings" elif link1[4]: odate = re.sub('\s+', ' ', link1[5].replace(' ', ' ')) sdate = mx.DateTime.DateTimeFrom(odate).date if sdate < earliestdate: continue uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', link1[4])) typ = "Question Book" elif link1[6]: linkhref = link1[6] linktext = link1[7] # the link types by name if not re.search('debate|westminster|written(?i)', linktext): continue if re.search('Chronology', linktext): # print "Chronology:", link continue # get rid of the new index pages if re.search('/indexes/|cmordbk|/business/', linkhref): continue if (re.search('Written Answers received between Friday 26 May and Thursday 1 June\s+2006', linktext)): odate = '2 June 2006' sdate = mx.DateTime.DateTimeFrom(odate).date if not sdate: raise Exception, 'No date for link 1 in: ' + urllinkpage + ' ' + ','.join(link1) if sdate < earliestdate: continue # take out spaces and linefeeds we don't want uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', linkhref)) typ = string.strip(re.sub('\s+', ' ', linktext)) if typ == 'Recess Written Answers': typ = 'Written Answers' elif link1[8]: linkhref = link1[8] linktext = link1[9] if re.match('Written Answers and Statements received between<br>\s*Monday 4 September and Friday 8 September 2006', linktext): odate = '11 September 2006' elif re.match('Written Answers received between<br>\s*Wednesday 26 July and Friday 1 September 2006', linktext): odate = '4 September 2006' elif re.match('Written Answers and Statements received between<br>\s*Monday 11 September and Wednesday 13 September 2006', linktext): odate = '13 September 2006' elif re.match('Written Answers and Statements received between<br>\s*Thursday 14 September and Monday 18 September 2006', linktext): odate = '18 September 2006' elif re.match('Written Answers received between<br>\s*Tuesday 19 September and Friday 29 September 2006', linktext): odate = '2 October 2006' elif re.match('Written Answers received between<br>\s*Wednesday 20 December 2006 and Friday 5 January 2007', linktext): odate = '5 January 2007' elif re.match('Written Answers received between<br>\s*Monday 12 February 2007 and Friday 16 February 2007', linktext): odate = '16 February 2007' elif re.match('Written Answers received between<br>\s*Wednesday 12 February 2007 and Friday 16 February 2007', linktext): odate = '16 February 2007' else: raise Exception, 'No date for link 2 in: ' + urllinkpage + ' ' + ','.join(link1) sdate = mx.DateTime.DateTimeFrom(odate).date uind = urlparse.urljoin(urllinkpage, re.sub('\s', '', linkhref)) typ = 'Written Answers' uind = uind.replace('080227a', '080227') # 21st July 2005 has a link, but there was none if uind == 'http://www.publications.parliament.uk/pa/cm200506/cmhansrd/vo050721/hallindx/50721-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm071203/hallindx/71203-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080218/hallindx/80218-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080225/hallindx/80225-x.htm': continue if uind == 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080229/hallindx/80229-x.htm': continue # 21st June 2005 WHall links to 22nd June if sdate=='2005-06-21' and uind=='http://www.publications.parliament.uk/pa/cm200506/cmhansrd/vo050622/hallindx/50622-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200506/cmhansrd/vo050621/hallindx/50621-x.htm' # 25th May 2006 WMS links to 4th May if sdate=='2006-05-25' and uind=='http://www.publications.parliament.uk/pa/cm200506/cmhansrd/cm060504/wmsindx/60504-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200506/cmhansrd/cm060525/wmsindx/60525-x.htm' if sdate=='2007-06-28' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070627/wmsindx/70628-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070628/wmsindx/70628-x.htm' if sdate=='2007-02-26' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070129/index/70129-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070226/index/70226-x.htm' if sdate=='2007-02-26' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070129/wmsindx/70129-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070226/wmsindx/70226-x.htm' if sdate=='2007-02-26' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070129/debindx/70129-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070226/debindx/70226-x.htm' if sdate=='2007-01-19' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070126/index/70126-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070119/index/70119-x.htm' if sdate=='2007-01-19' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070126/wmsindx/70126-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070119/wmsindx/70119-x.htm' if sdate=='2007-01-19' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070126/debindx/70126-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm070119/debindx/70119-x.htm' if sdate=='2007-10-23' and uind=='http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm071016/debindx/71023-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200607/cmhansrd/cm071023/debindx/71023-x.htm' if sdate=='2007-11-15' and uind=='http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm071114/debindx/71115-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm071115/debindx/71115-x.htm' if sdate=='2008-01-15' and uind=='http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080116/index/80115-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080115/index/80115-x.htm' # 7th May 2008 debates links to 8th May if sdate=='2008-05-07' and uind=='http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080508/debindx/80508-x.htm': uind = 'http://www.publications.parliament.uk/pa/cm200708/cmhansrd/cm080507/debindx/80507-x.htm' if sdate>='2006-12-05' and sdate<='2006-12-14' and typ=='Westminster Hall': uind = uind.replace('200506', '200607') # check for repeats where the URLs differ if (sdate, typ) in reses: rc = reses[(sdate, typ)] otheruind = rc[0] if otheruind == uind: continue # sometimes they have old links to the cm edition as # well as the vo edition, we pick the newer vo ones # make sure that discrepancies are explainable test1 = uind.replace('cmhansrd/cm', 'cmhansrd/vo') test2 = otheruind.replace('cmhansrd/cm', 'cmhansrd/vo') if test1 != test2: raise Exception, '------\nRepeated link to %s %s differs:\nurl1: %s\nurl2: %s\nfrom index page1: %s\nindex2: %s\n------' % (sdate, typ, uind, otheruind, urllinkpage, rc[1]) # case of two URLs the same only vo/cm differ like this: # (which is a bug in Hansard, should never happen) #http://www.publications.parliament.uk/pa/cm200203/cmhansrd/vo031006/index/31006-x.htm #http://www.publications.parliament.uk/pa/cm200203/cmhansrd/cm031006/index/31006-x.htm # we replace both with just the vo edition: #print "done replace of these two URLs into the vo one\nurl1: %s\nurl2: %s" % (uind, otheruind) uind = test1 reses[(sdate, typ)] = (uind, urllinkpage)
2
Example 46
View license@classmethod def convert_html_to_data(cls, html): # Clean out all the now. pdftohtml puts them to preserve the lines html = re.sub( r' ', ' ', html ) html = re.sub( r' ', ' ', html ) # create a soup out of the html soup = BeautifulSoup( html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES ) if not soup.body: raise Exception, "No <body> was found - output probably isn't HTML" contents = soup.body.contents # counters to use in the loops below br_count = 0 page_number = 1 filtered_contents = [] while len(contents): line = contents.pop(0) # get the tag name if there is one tag_name = line.name if type(line) == Tag else None # count <br> tags - we use two or more in succession to decide that # we've moved on to a new bit of text if tag_name == 'br': br_count += 1 continue # skip empty lines if tag_name == None: text_content = unicode(line) else: text_content = line.text if re.match( r'\s*$', text_content ): continue # For Assembly # check for something that looks like the page number - when found # delete it and the two lines that follow if tag_name == 'b': page_number_match = re.match( r'(\d+)\s{10,}', line.text ) if page_number_match: # up the page number - the match is the page that we are leaving page_number = int(page_number_match.group(0)) + 1 # skip on to the next page while len(contents): item = contents.pop(0) if type(item) == Tag and item.name == 'hr': break continue # For Senate # check for something that looks like the page number if tag_name == 'b': page_number_match = re.search( r'\s{10,}(\d+)', line.text ) if page_number_match: # set the page number - the match is the page that we are on page_number = int(page_number_match.group(0)) continue if tag_name == 'b': if re.search( r'\s*Disclaimer:', line.text ): # This is a disclaimer line that we can skip continue # if br_count > 0: # print 'br_count: ' + str(br_count) # print type( line ) # # if type(line) == Tag: print line.name # print "%s: >>>%s<<<" % (tag_name, text_content) # print '------------------------------------------------------' text_content = text_content.strip() text_content = re.sub( r'\s+', ' ', text_content ) filtered_contents.append(dict( tag_name = tag_name, text_content = text_content, br_count = br_count, page_number = page_number, )) br_count = 0 # go through all the filtered_content and using the br_count determine # when lines should be merged merged_contents = [] for line in filtered_contents: # print line br_count = line['br_count'] # Join lines that have the same tag_name and are not too far apart same_tag_name_test = ( br_count <= 1 and len(merged_contents) and line['tag_name'] == merged_contents[-1]['tag_name'] ) # Italic text in the current unstyled text inline_italic_test = ( br_count == 0 and len(merged_contents) and line['tag_name'] == 'i' and merged_contents[-1]['tag_name'] == None ) # Merge lines tha meet one of the above tests if ( same_tag_name_test or inline_italic_test ): new_content = ' '.join( [ merged_contents[-1]['text_content'], line['text_content'] ] ) new_content = re.sub( r'\s+,', ',', new_content ) merged_contents[-1]['text_content'] = new_content else: merged_contents.append( line ) # now go through and create some meaningful chunks from what we see meaningful_content = [] last_speaker_name = '' last_speaker_title = '' while len(merged_contents): line = merged_contents.pop(0) next_line = merged_contents[0] if len(merged_contents) else None # print '----------------------------------------' # print line # if the content is italic then it is a scene if line['tag_name'] == 'i': meaningful_content.append({ 'type': 'scene', 'text': line['text_content'], 'page_number': line['page_number'], }) continue # if the content is all caps then it is a heading if line['text_content'] == line['text_content'].upper(): meaningful_content.append({ 'type': 'heading', 'text': line['text_content'], 'page_number': line['page_number'], }) last_speaker_name = '' last_speaker_title = '' continue # It is a speech if we have a speaker and it is not formatted if line['tag_name'] == None and last_speaker_name: # do some quick smarts to see if we can extract a name from the # start of the speech. speech = line['text_content'] matches = re.match( r'\(([^\)]+)\):(.*)', speech ) if matches: last_speaker_title = last_speaker_name last_speaker_name = matches.group(1) speech = matches.group(2) else: # strip leading colons that may have been missed when the # name was extracted (usually the colon was outside the # bold tags around the name) speech = re.sub( r'^:\s*', '', speech) meaningful_content.append({ 'speaker_name': last_speaker_name, 'speaker_title': last_speaker_title, 'text': speech, 'type': 'speech', 'page_number': line['page_number'], }) # print meaningful_content[-1] continue # If it is a bold line and the next line is 'None' and is no # br_count away then we have the start of a speech. if ( line['tag_name'] == 'b' and next_line and next_line['tag_name'] == None and next_line['br_count'] == 0 ): last_speaker_name = line['text_content'].strip(':') last_speaker_title = '' continue meaningful_content.append({ 'type': 'other', 'text': line['text_content'], 'page_number': line['page_number'], }) last_speaker_name = '' last_speaker_title = '' hansard_data = { 'meta': cls.extract_meta_from_transcript( meaningful_content ), 'transcript': meaningful_content, } return hansard_data
2
Example 47
View license@classmethod def convert_html_to_data(cls, html): # Clean out all the now. pdftohtml puts them to preserve the lines html = re.sub( r' ', ' ', html ) html = re.sub( r' ', ' ', html ) # create a soup out of the html soup = BeautifulSoup( html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES ) if not soup.body: raise Exception, "No <body> was found - output probably isn't HTML" contents = soup.body.contents # counters to use in the loops below br_count = 0 page_number = 1 filtered_contents = [] while len(contents): line = contents.pop(0) # get the tag name if there is one tag_name = line.name if type(line) == Tag else None # count <br> tags - we use two or more in succession to decide that # we've moved on to a new bit of text if tag_name == 'br': br_count += 1 continue # skip empty lines if tag_name == None: text_content = unicode(line) else: text_content = line.text if re.match( r'\s*$', text_content ): continue # For Assembly # check for something that looks like the page number - when found # delete it and the two lines that follow if tag_name == 'b': page_number_match = re.match( r'(\d+)\s{10,}', line.text ) if page_number_match: # up the page number - the match is the page that we are leaving page_number = int(page_number_match.group(0)) + 1 # skip on to the next page while len(contents): item = contents.pop(0) if type(item) == Tag and item.name == 'hr': break continue # For Senate # check for something that looks like the page number if tag_name == 'b': page_number_match = re.search( r'\s{10,}(\d+)', line.text ) if page_number_match: # set the page number - the match is the page that we are on page_number = int(page_number_match.group(0)) continue if tag_name == 'b': if re.search( r'\s*Disclaimer:', line.text ): # This is a disclaimer line that we can skip continue # if br_count > 0: # print 'br_count: ' + str(br_count) # print type( line ) # # if type(line) == Tag: print line.name # print "%s: >>>%s<<<" % (tag_name, text_content) # print '------------------------------------------------------' text_content = text_content.strip() text_content = re.sub( r'\s+', ' ', text_content ) filtered_contents.append(dict( tag_name = tag_name, text_content = text_content, br_count = br_count, page_number = page_number, )) br_count = 0 # go through all the filtered_content and using the br_count determine # when lines should be merged merged_contents = [] for line in filtered_contents: # print line br_count = line['br_count'] # Join lines that have the same tag_name and are not too far apart same_tag_name_test = ( br_count <= 1 and len(merged_contents) and line['tag_name'] == merged_contents[-1]['tag_name'] ) # Italic text in the current unstyled text inline_italic_test = ( br_count == 0 and len(merged_contents) and line['tag_name'] == 'i' and merged_contents[-1]['tag_name'] == None ) # Merge lines tha meet one of the above tests if ( same_tag_name_test or inline_italic_test ): new_content = ' '.join( [ merged_contents[-1]['text_content'], line['text_content'] ] ) new_content = re.sub( r'\s+,', ',', new_content ) merged_contents[-1]['text_content'] = new_content else: merged_contents.append( line ) # now go through and create some meaningful chunks from what we see meaningful_content = [] last_speaker_name = '' last_speaker_title = '' while len(merged_contents): line = merged_contents.pop(0) next_line = merged_contents[0] if len(merged_contents) else None # print '----------------------------------------' # print line # if the content is italic then it is a scene if line['tag_name'] == 'i': meaningful_content.append({ 'type': 'scene', 'text': line['text_content'], 'page_number': line['page_number'], }) continue # if the content is all caps then it is a heading if line['text_content'] == line['text_content'].upper(): meaningful_content.append({ 'type': 'heading', 'text': line['text_content'], 'page_number': line['page_number'], }) last_speaker_name = '' last_speaker_title = '' continue # It is a speech if we have a speaker and it is not formatted if line['tag_name'] == None and last_speaker_name: # do some quick smarts to see if we can extract a name from the # start of the speech. speech = line['text_content'] matches = re.match( r'\(([^\)]+)\):(.*)', speech ) if matches: last_speaker_title = last_speaker_name last_speaker_name = matches.group(1) speech = matches.group(2) else: # strip leading colons that may have been missed when the # name was extracted (usually the colon was outside the # bold tags around the name) speech = re.sub( r'^:\s*', '', speech) meaningful_content.append({ 'speaker_name': last_speaker_name, 'speaker_title': last_speaker_title, 'text': speech, 'type': 'speech', 'page_number': line['page_number'], }) # print meaningful_content[-1] continue # If it is a bold line and the next line is 'None' and is no # br_count away then we have the start of a speech. if ( line['tag_name'] == 'b' and next_line and next_line['tag_name'] == None and next_line['br_count'] == 0 ): last_speaker_name = line['text_content'].strip(':') last_speaker_title = '' continue meaningful_content.append({ 'type': 'other', 'text': line['text_content'], 'page_number': line['page_number'], }) last_speaker_name = '' last_speaker_title = '' hansard_data = { 'meta': cls.extract_meta_from_transcript( meaningful_content ), 'transcript': meaningful_content, } return hansard_data
2
Example 48
View licensedef handle(self, username=None, **options): from slumber.exceptions import HttpClientError from candidates.election_specific import PARTY_DATA, shorten_post_label from candidates.models import PopItPerson from candidates.popit import create_popit_api_object election_data = { 'prv-2015': 'listedescandidatsauxelectionslegislativeslisteprovincialeanptic.csv', 'nat-2015': 'listedescandidatsauxelectionslegislativesanptic.csv' } field_map = { 'prv-2015': { 'region': 1, 'party': 4, 'list_order': 5, 'first_name': 7, 'last_name': 6, 'gender': 8, 'birth_date': 9, 'party_short': 3 }, 'nat-2015': { 'region': 0, 'party': 2, 'list_order': 3, 'first_name': 5, 'last_name': 4, 'gender': 6, 'birth_date': 7, 'party_short': 2 } } api = create_popit_api_object() party_id_missing = {} party_name_to_id = {} for party_id, party_name in PARTY_DATA.party_id_to_name.items(): party_name_to_id[party_name] = party_id for election_id, filename in election_data.items(): csv_filename = join( dirname(__file__), '..', '..', 'data', filename ) fields = field_map[election_id] with codecs.open(csv_filename, 'r', encoding='windows-1252') as f: initial = True for candidate in unicode_csv_reader(f): # skip header line if initial: initial = False continue region = candidate[fields['region']] party = candidate[fields['party']] party_list_order = candidate[fields['list_order']] first_name = string.capwords(candidate[fields['first_name']]) last_name = string.capwords(candidate[fields['last_name']]) gender = candidate[fields['gender']] birth_date = None if candidate[fields['birth_date']] is not None: birth_date = str(dateutil.parser.parse( candidate[fields['birth_date']], dayfirst=True ).date()) name = first_name + ' ' + last_name id = '-'.join([ re.sub('[^\w]*', '', re.sub(r' ', '-', strip_accents(name.lower()))), re.sub('[^\w]*', '', candidate[fields['party_short']].lower()), birth_date ]) # national candidate if region == 'PAYS': region = 'Burkina Faso' election_data, post_data = get_post_data( api, election_id, region ) # debug # tmp = '%s %s %s (%s) - %s (%s)' % ( id, first_name, last_name, party, region, post_data['label'] ) # print(tmp) person = get_existing_popit_person(id) if person: # print("Found an existing person:", person.get_absolute_url()) pass else: print("No existing person, creating a new one:", name) person = PopItPerson() person.set_identifier('import-id', id) person.family_name = last_name person.given_name = first_name person.name = name person.gender = gender if birth_date: person.birth_date = str(birth_date) else: person.birth_date = None standing_in_election = { 'post_id': post_data['id'], 'name': shorten_post_label(post_data['label']), 'party_list_position': party_list_order, } if 'area' in post_data: standing_in_election['mapit_url'] = post_data['area']['identifier'] person.standing_in = { election_data.slug: standing_in_election } change_metadata = get_change_metadata( None, 'Imported candidate from CSV', ) party_comp = re.sub(' +', ' ', party) party_id = UNKNOWN_PARTY_ID if party_comp in party_name_to_id.keys(): party_id = party_name_to_id[party_comp] party = party_comp else: party_id = party_name_to_id['Unknown Party'] party = 'Unknown Party' if party_id == UNKNOWN_PARTY_ID and party_comp not in party_id_missing.keys(): party_id_missing[party_comp] = 1 person.party_memberships = { election_data.slug: { 'id': party_id, 'name': party, 'imported_name': party_comp } } person.record_version(change_metadata) try: person.save_to_popit(api) except HttpClientError as hce: print("Got an HttpClientError:", hce.content) raise if len(party_id_missing) > 0: print("Unmatched party names:") for name in party_id_missing.keys(): print(name)
2
Example 49
View licensedef convert(buildingsFile, osmOut): with open(buildingsFile) as f: features = json.load(f) allAddresses = {} buildings = [] buildingShapes = [] buildingIdx = index.Index() # Returns the coordinates for this address def keyFromAddress(address): return str(address['geometry']['coordinates'][0]) + "," + str(address['geometry']['coordinates'][1]) for feature in features: if feature['geometry']['type'] == 'Polygon' or feature['geometry']['type'] == 'MultiPolygon': extra_tags = osm_tags.get_osm_tags(feature) feature['properties']['osm'] = extra_tags buildings.append(feature) shape = asShape(feature['geometry']) buildingShapes.append(shape) buildingIdx.add(len(buildingShapes) - 1, shape.bounds) # These are the addresses that don't overlap any buildings elif feature['geometry']['type'] == 'Point': # The key is the coordinates of this address. Track how many addresses share these coords. key = keyFromAddress(feature) if key in allAddresses: allAddresses[key].append(feature) else: allAddresses[key] = [feature] else: print "geometry of unknown type:", feature['geometry']['type'] # Generates a new osm id. osmIds = dict(node = -1, way = -1, rel = -1) def newOsmId(type): osmIds[type] = osmIds[type] - 1 return osmIds[type] ## Formats multi part house numbers def formatHousenumber(p): def suffix(part1, part2, hyphen_type=None): #part1 = stripZeroes(part1) if not part2: return str(part1) #part2 = stripZeroes(part2) return str(part1) + ' ' + str(part2) #def stripZeroes(addr): # strip leading zeroes from numbers # if addr.isdigit(): # addr = str(int(addr)) # if '-' in addr: # try: # addr2 = addr.split('-') # if len(addr2) == 2: # addr = str(int(addr2[0])) + '-' + str(int(addr2[1])).zfill(2) # except: # pass # return addr number = suffix(p['Number'], p['NumSuffix']) if p['NumPrefix']: number = p['NumPrefix'] + number return number # Converts an address def convertAddress(address): result = dict() if all (k in address for k in ('Number', 'StreetName')): if address['Number']: result['addr:housenumber'] = formatHousenumber(address) if address['StreetName']: # Titlecase streetname = address['StreetName'].title() if address['StArticle']: streetname = address['StArticle'].title() + " " + streetname if address['PreType']: streetname = address['PreType'].title() + " " + streetname if address['PreDir']: streetname = address['PreDir'].title() + " " + streetname if address['PreMod']: streetname = address['PreMod'].title() + " " + streetname if address['PostType']: streetname = streetname + " " + address['PostType'].title() if address['PostDir']: streetname = streetname + " " + address['PostDir'].title() if address['PostMod']: streetname = streetname + " " + address['PostMod'].title() # Fix titlecase on 1St, 2Nd, 3Rd, 4Th, etc streetname = re.sub(r"(.*)(\d+)St\s*(.*)", r"\1\2st \3", streetname) streetname = re.sub(r"(.*)(\d+)Nd\s*(.*)", r"\1\2nd \3", streetname) streetname = re.sub(r"(.*)(\d+)Rd\s*(.*)", r"\1\2rd \3", streetname) streetname = re.sub(r"(.*)(\d+)Th\s*(.*)", r"\1\2th \3", streetname) # Expand 'St ' -> 'Saint' # relevant for: # 'St Clair' # 'St Louis' # 'St James' # 'St James Park' # 'St Andrews' # 'St Nicolas' # 'St Cloud' # 'St Ambrose' # 'St Bonaventure' # 'St Joseph' # 'St Tropez' if streetname[0:3] == 'St ': streetname = 'Saint ' + streetname[3:] # Middle name expansions streetname = streetname.replace(' St ', ' Street ') streetname = streetname.replace(' Rd ', ' Road ') streetname = streetname.replace(' Blvd ', ' Boulevard ') result['addr:street'] = streetname if address['PCITY1']: result['addr:city'] = address['PCITY1'].title() elif address['LegalComm']: result['addr:city'] = address['LegalComm'].title() if address['ZipCode']: result['addr:postcode'] = str(int(address['ZipCode'])) if address['UnitName']: result['addr:unit'] = address['UnitName'] return result # Distills coincident addresses into one address where possible. # Takes an array of addresses and returns an array of 1 or more addresses def distillAddresses(addresses): # Only distill addresses if the following conditions are true: # 1) the addresses share the same coordinates. # AND # 2a) all the attributes are the same _except_ the unit number/name # OR # 2b) the street number is the same but the street names are referring to the same thing outputAddresses = [] # First, group the addresses into separate lists for each unique location addressesByCoords = {} for address in addresses: key = keyFromAddress(address) if key in addressesByCoords: addressesByCoords[key].append(address) else: addressesByCoords[key] = [address] # loop over unique coordinates for key in addressesByCoords: # Here see if we can collapse any of these addresses at the same coords. # addressesByCoords[key] is an array of addresses at this location. # We are only looking for the 2 possibilities above (2a) and (2b). # If the situation is more complicated, change nothing. outputAddresses.extend(distillAddressesAtPoint(addressesByCoords[key])) return outputAddresses # This function is called by distillAddresses. # It assumes all addresses are at the same coordinates. # Returns an array of 1 or more addresses def distillAddressesAtPoint(addresses): if len(addresses) == 1: return addresses firstAddress = addresses[0] # (2a) If the first address is an apartment, see if all the rest are too. # NOTE: sometimes an apartment building has a few address points that lack a UnitName... # ...so checking for the presence of UnitName in firstAddress wouldn't always work. props = firstAddress['properties'] if debug: print "Testing to see if these are apartments...", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])]) # Compare subsequent addresses in the array to the first address. # Hence, range starts at 1. for i in range(1, len(addresses)): if not areSameAddressExceptUnit(firstAddress, addresses[i]): props = addresses[i]['properties'] if debug: print "No, this address was different...........", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])]) #print firstAddress #print addresses[i] break # else, keep going else: # else for the `for` statement. Executes only if `break` never did. # We checked them all, and they're all the same except UnitName. # In this case the apartment data is useless to OSM because the # apartment nodes are all on top of each other. # So, discard the unit information and return just one address. firstAddress['properties']['UnitName'] = None if debug: print "Yes they were apartments! Collapsed", len(addresses), "into one" return [firstAddress] # (2b) Check if the street number is all the same. # For this, we use a list of alternative names (like HWY 1, etc)... # ...and we need to know which canonical name to keep. if debug: print "Testing to see if the street names are synonyms.." canonicalStreetName = None for i in range(1, len(addresses)): props = addresses[i]['properties'] if not areSameAddressExceptStreet(firstAddress, addresses[i]): if debug: print "No, this address was different...........", '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])]) #print firstAddress #print addresses[i] break compoundStreetName = (str(props['PreType']),str(props['StreetName']),str(props['PostType'])) currentCanonicalStreetName = getCanonicalName(compoundStreetName) if currentCanonicalStreetName: if debug: print "found canonical name", currentCanonicalStreetName if ((currentCanonicalStreetName == canonicalStreetName) or (canonicalStreetName == None)): canonicalStreetName = currentCanonicalStreetName else: if debug: print "canonicalStreetNames didn't match:", canonicalStreetName, currentCanonicalStreetName break else: print "couldn't find canonicalStreetName for", compoundStreetName break else: # else for the `for` statement. Executes only if `break` never did. # We checked them all, and they're all the same except StreetName. # If we can determine that they are all the same synonym, we can # overwrite the other streetname information and return just one address. firstAddress['properties']['PreType'] = canonicalStreetName[0] firstAddress['properties']['StreetName'] = canonicalStreetName[1] firstAddress['properties']['PostType'] = canonicalStreetName[2] if debug: print "Yes they were synonyms! Collapsed", len(addresses), "into one" return [firstAddress] # This is only excuted if neither of the two `else` statements executed # for the two `for` statements above. That means we were unable to collapse # separate apartments into one, or collapse synonymous street names into one. # So, instead of returning just one address, we fail and return all of them. return addresses def areSameAddressExceptUnit(a1, a2): for key in ['NumPrefix', 'Number', 'NumSuffix', 'PreMod', 'PreDir', 'PreType', 'StArticle', 'StreetName', 'PostType', 'PostDir', 'PostMod', 'ZipCode', 'LegalComm', 'PCITY1']: if a1['properties'][key] != a2['properties'][key]: #print key, a1['properties'][key], "!=", a2['properties'][key] return False return True def areSameAddressExceptStreet(a1, a2): for key in ['NumPrefix', 'Number', 'NumSuffix', 'PreMod', 'PreDir', 'StArticle', 'UnitName', 'PostDir', 'PostMod', 'ZipCode', 'LegalComm', 'PCITY1']: if a1['properties'][key] != a2['properties'][key]: #print key, a1['properties'][key], "!=", a2['properties'][key] return False return True # Sometimes we have identical addresses that differ only by street name. # Usually these are because the street name is also a highway. We want to # remove all the highway names and only use the street name for the address canonicalNames = { ("None", "LINCOLN", "BOULEVARD"): (None, "LINCOLN", "BOULEVARD"), ("ROUTE", "1", "None"): (None, "LINCOLN", "BOULEVARD"), ("HIGHWAY", "1", "None"): (None, "LINCOLN", "BOULEVARD"), ("None", "SR-1", "None"): (None, "LINCOLN", "BOULEVARD"), ("None", "PCH", "None"): (None, "LINCOLN", "BOULEVARD"), } def getCanonicalName(compoundStreetName): result = None try: result = canonicalNames[compoundStreetName] except KeyError: return None return result # Appends new node or returns existing if exists. nodes = {} def appendNewNode(coords, osmXml): rlon = int(float(coords[0]*10**7)) rlat = int(float(coords[1]*10**7)) if (rlon, rlat) in nodes: return nodes[(rlon, rlat)] node = etree.Element('node', visible = 'true', id = str(newOsmId('node'))) node.set('lon', str(Decimal(coords[0])*Decimal(1))) node.set('lat', str(Decimal(coords[1])*Decimal(1))) nodes[(rlon, rlat)] = node osmXml.append(node) return node # Sometimes we want to force overlapping nodes, such as with addresses. # This way they'll show up in JOSM and the contributor can deal with them manually. # Otherwise, we might try to apply multiple address tags to the same node... # ...which is also incorrect, but harder to detect. def appendNewNodeIgnoringExisting(coords, osmXml): rlon = int(float(coords[0]*10**7)) rlat = int(float(coords[1]*10**7)) #if (rlon, rlat) in nodes: # return nodes[(rlon, rlat)] node = etree.Element('node', visible = 'true', id = str(newOsmId('node'))) node.set('lon', str(Decimal(coords[0])*Decimal(1))) node.set('lat', str(Decimal(coords[1])*Decimal(1))) nodes[(rlon, rlat)] = node osmXml.append(node) return node def appendNewWay(coords, intersects, osmXml): way = etree.Element('way', visible='true', id=str(newOsmId('way'))) firstNid = 0 for i, coord in enumerate(coords): if i == 0: continue # the first and last coordinate are the same node = appendNewNode(coord, osmXml) if i == 1: firstNid = node.get('id') way.append(etree.Element('nd', ref=node.get('id'))) # Check each way segment for intersecting nodes int_nodes = {} try: line = LineString([coord, coords[i+1]]) except IndexError: line = LineString([coord, coords[1]]) for idx, c in enumerate(intersects): if line.buffer(0.000001).contains(Point(c[0], c[1])) and c not in coords: t_node = appendNewNode(c, osmXml) for n in way.iter('nd'): if n.get('ref') == t_node.get('id'): break else: int_nodes[t_node.get('id')] = Point(c).distance(Point(coord)) for n in sorted(int_nodes, key=lambda key: int_nodes[key]): # add intersecting nodes in order way.append(etree.Element('nd', ref=n)) way.append(etree.Element('nd', ref=firstNid)) # close way osmXml.append(way) return way # Appends an address to a given node or way. def appendAddress(address, element): # # Need to check if these tags already exist on this element for k, v in convertAddress(address['properties']).iteritems(): # TODO: is this doing anything useful? #for child in element: # if child.tag == 'tag': # #print k, v # if child.attrib.get('k') == k: # print "found key", k # if child.attrib.get('v') == v: # print "found matching value", v element.append(etree.Element('tag', k=k, v=v)) # Appends a building to a given OSM xml document. def appendBuilding(building, shape, address, osmXml): # Check for intersecting buildings intersects = [] for i in buildingIdx.intersection(shape.bounds): try: for c in buildingShapes[i].exterior.coords: if Point(c[0], c[1]).buffer(0.000001).intersects(shape): intersects.append(c) except AttributeError: for c in buildingShapes[i][0].exterior.coords: if Point(c[0], c[1]).buffer(0.000001).intersects(shape): intersects.append(c) # Export building, create multipolygon if there are interior shapes. interiors = [] try: way = appendNewWay(list(shape.exterior.coords), intersects, osmXml) for interior in shape.interiors: interiors.append(appendNewWay(list(interior.coords), [], osmXml)) except AttributeError: way = appendNewWay(list(shape[0].exterior.coords), intersects, osmXml) for interior in shape[0].interiors: interiors.append(appendNewWay(list(interior.coords), [], osmXml)) if len(interiors) > 0: relation = etree.Element('relation', visible='true', id=str(newOsmId('way'))) relation.append(etree.Element('member', type='way', role='outer', ref=way.get('id'))) for interior in interiors: relation.append(etree.Element('member', type='way', role='inner', ref=interior.get('id'))) relation.append(etree.Element('tag', k='type', v='multipolygon')) osmXml.append(relation) way = relation for tag in building['properties']['osm']: value = building['properties']['osm'][tag] way.append(etree.Element('tag', k=tag, v=value)) # if 'GeneralUse' in building['properties']: # way.append(etree.Element('tag', k='building', v=building['properties']['GeneralUse'])) # else: # way.append(etree.Element('tag', k='building', v='yes')) # if 'SpecificUs' in building['properties']: # way.append(etree.Element('tag', k='building:use', v=building['properties']['GeneralUse'])) if 'YearBuilt' in building['properties'] and building['properties']['YearBuilt'] is not None: YearBuilt = int(building['properties']['YearBuilt']) if YearBuilt > 0: way.append(etree.Element('tag', k='start_date', v=str(YearBuilt))) # if 'Specific_1' in building['properties']: # way.append(etree.Element('tag', k='amenity', v=building['properties']['Specific_1'])) if 'Units' in building['properties'] and building['properties']['Units'] is not None: units = int(round(float(building['properties']['Units']), 0)) if units > 0: way.append(etree.Element('tag', k='building:units', v=str(units))) if 'HEIGHT' in building['properties']: height = round(((building['properties']['HEIGHT'] * 12) * 0.0254), 1) if height > 0: way.append(etree.Element('tag', k='height', v=str(height))) if 'ELEV' in building['properties']: elevation = round(((building['properties']['ELEV'] * 12) * 0.0254), 1) if elevation > 0: way.append(etree.Element('tag', k='ele', v=str(elevation))) if 'BLD_ID' in building['properties']: way.append(etree.Element('tag', k='lacounty:bld_id', v=str(building['properties']['BLD_ID']))) if 'AIN' in building['properties'] and building['properties']['AIN'] is not None: way.append(etree.Element('tag', k='lacounty:ain', v=str(building['properties']['AIN']))) # if address: # appendAddress(address, way) # Export buildings & addresses. Only export address with building if there is exactly # one address per building. Export remaining addresses as individual nodes. # The remaining addresses are added to a dictionary hashed by their coordinates. # This way we catch any addresses that have the same coordinates. osmXml = etree.Element('osm', version='0.6', generator='[email protected]') for i in range(0, len(buildings)): buildingAddresses = [] for address in buildings[i]['properties']['addresses']: buildingAddresses.append(address) address = None if len(buildingAddresses) == 1: # There's only one address in the building footprint address = buildingAddresses[0] elif len(buildingAddresses) > 1: # If there are multiple addresses, first try to distill them. # If we can distill them to one address, we can still add it to this building. distilledAddresses = distillAddresses(buildingAddresses) if len(distilledAddresses) == 1: # We distilled down to one address. Add it to the building. address = distilledAddresses[0] else: # We could not distilled down to one address. Instead export as nodes. for address in distilledAddresses: # The key is the coordinates of this address. Track how many addresses share these coords. key = keyFromAddress(address) if key in allAddresses: allAddresses[key].append(address) else: allAddresses[key] = [address] appendBuilding(buildings[i], buildingShapes[i], address, osmXml) # Export any addresses that aren't the only address for a building. if (len(allAddresses) > 0): # Iterate over the list of distinct coordinates found in the address data for coordskey in allAddresses: # if a distinct coordinate has only one associated address, # then export that address as a new node if len(allAddresses[coordskey]) == 1: address = allAddresses[coordskey][0] coordinates = address['geometry']['coordinates'] # node = appendNewNode(coordinates, osmXml) # returns old node if one exists at these coords # appendAddress(address, node) # If there is more than one address at these coordinates, do something. # ...but do what exactly? else: distilledAddresses = distillAddresses(allAddresses[coordskey]) if len(distilledAddresses) == 1: # We distilled down to one address. Append it. address = distilledAddresses[0] coordinates = address['geometry']['coordinates'] # node = appendNewNode(coordinates, osmXml) # returns old node if one exists at these coords # appendAddress(address, node) else: if debug: print "found duplicate coordinates that could not be distilled:", coordskey, "has", len(allAddresses[coordskey]), "addresses" if debug: print '\t'.join(["num", "numsufx", "pretype", "street", "posttype", "unit"]) for address in distilledAddresses: # TODO: do something smart here. These are overlapping addresses that we couldn't distill. # TODO: maybe jitter them, or leave stacked but with FIXME? # TODO: For now, we use appendNewNodeIgnoringExisting to pile the nodes on top of each other. #print address props = address['properties'] if debug: print '\t'.join([str(props['Number']), str(props['NumSuffix']), str(props['PreType']), str(props['StreetName']), str(props['PostType']), str(props['UnitName'])]) coordinates = address['geometry']['coordinates'] # node = appendNewNodeIgnoringExisting(coordinates, osmXml) # Force overlapping nodes so JOSM will catch them # appendAddress(address, node) with open(osmOut, 'w') as outFile: outFile.writelines(tostring(osmXml, pretty_print=True, xml_declaration=True, encoding='UTF-8')) print 'Exported ' + osmOut
0
Example 50
View licensedef __init__(self, **args): """Initializes a new netsnmpAgent instance. "args" is a dictionary that can contain the following optional parameters: - AgentName : The agent's name used for registration with net-snmp. - MasterSocket : The transport specification of the AgentX socket of the running snmpd instance to connect to (see the "LISTENING ADDRESSES" section in the snmpd(8) manpage). Change this if you want to use eg. a TCP transport or access a custom snmpd instance, eg. as shown in run_simple_agent.sh, or for automatic testing. - PersistenceDir: The directory to use to store persistence information. Change this if you want to use a custom snmpd instance, eg. for automatic testing. - MIBFiles : A list of filenames of MIBs to be loaded. Required if the OIDs, for which variables will be registered, do not belong to standard MIBs and the custom MIBs are not located in net-snmp's default MIB path (/usr/share/snmp/mibs). - UseMIBFiles : Whether to use MIB files at all or not. When False, the parser for MIB files will not be initialized, so neither system-wide MIB files nor the ones provided in the MIBFiles argument will be in use. - LogHandler : An optional Python function that will be registered with net-snmp as a custom log handler. If specified, this function will be called for every log message net-snmp itself generates, with parameters as follows: 1. a string indicating the message's priority: one of "Emergency", "Alert", "Critical", "Error", "Warning", "Notice", "Info" or "Debug". 2. the actual log message. Note that heading strings such as "Warning: " and "Error: " will be stripped off since the priority level is explicitly known and can be used to prefix the log message, if desired. Trailing linefeeds will also have been stripped off. If undefined, log messages will be written to stderr instead. """ # Default settings defaults = { "AgentName" : os.path.splitext(os.path.basename(sys.argv[0]))[0], "MasterSocket" : None, "PersistenceDir": None, "UseMIBFiles" : True, "MIBFiles" : None, "LogHandler" : None, } for key in defaults: setattr(self, key, args.get(key, defaults[key])) if self.UseMIBFiles and self.MIBFiles is not None and type(self.MIBFiles) not in (list, tuple): self.MIBFiles = (self.MIBFiles,) # Initialize status attribute -- until start() is called we will accept # SNMP object registrations self._status = netsnmpAgentStatus.REGISTRATION # Unfortunately net-snmp does not give callers of init_snmp() (used # in the start() method) any feedback about success or failure of # connection establishment. But for AgentX clients this information is # quite essential, thus we need to implement some more or less ugly # workarounds. # For net-snmp 5.7.x, we can derive success and failure from the log # messages it generates. Normally these go to stderr, in the absence # of other so-called log handlers. Alas we define a callback function # that we will register with net-snmp as a custom log handler later on, # hereby effectively gaining access to the desired information. def _py_log_handler(majorID, minorID, serverarg, clientarg): # "majorID" and "minorID" are the callback IDs with which this # callback function was registered. They are useful if the same # callback was registered multiple times. # Both "serverarg" and "clientarg" are pointers that can be used to # convey information from the calling context to the callback # function: "serverarg" gets passed individually to every call of # snmp_call_callbacks() while "clientarg" was initially passed to # snmp_register_callback(). # In this case, "majorID" and "minorID" are always the same (see the # registration code below). "serverarg" needs to be cast back to # become a pointer to a "snmp_log_message" C structure (passed by # net-snmp's log_handler_callback() in snmplib/snmp_logging.c) while # "clientarg" will be None (see the registration code below). logmsg = ctypes.cast(serverarg, snmp_log_message_p) # Generate textual description of priority level priorities = { LOG_EMERG: "Emergency", LOG_ALERT: "Alert", LOG_CRIT: "Critical", LOG_ERR: "Error", LOG_WARNING: "Warning", LOG_NOTICE: "Notice", LOG_INFO: "Info", LOG_DEBUG: "Debug" } msgprio = priorities[logmsg.contents.priority] # Strip trailing linefeeds and in addition "Warning: " and "Error: " # from msgtext as these conditions are already indicated through # msgprio msgtext = re.sub( "^(Warning|Error): *", "", u(logmsg.contents.msg.rstrip(b"\n")) ) # Intercept log messages related to connection establishment and # failure to update the status of this netsnmpAgent object. This is # really an ugly hack, introducing a dependency on the particular # text of log messages -- hopefully the net-snmp guys won't # translate them one day. if msgprio == "Warning" \ or msgprio == "Error" \ and re.match("Failed to .* the agentx master agent.*", msgtext): # If this was the first connection attempt, we consider the # condition fatal: it is more likely that an invalid # "MasterSocket" was specified than that we've got concurrency # issues with our agent being erroneously started before snmpd. if self._status == netsnmpAgentStatus.FIRSTCONNECT: self._status = netsnmpAgentStatus.CONNECTFAILED # No need to log this message -- we'll generate our own when # throwing a netsnmpAgentException as consequence of the # ECONNECT return 0 # Otherwise we'll stay at status RECONNECTING and log net-snmp's # message like any other. net-snmp code will keep retrying to # connect. elif msgprio == "Info" \ and re.match("AgentX subagent connected", msgtext): self._status = netsnmpAgentStatus.CONNECTED elif msgprio == "Info" \ and re.match("AgentX master disconnected us.*", msgtext): self._status = netsnmpAgentStatus.RECONNECTING # If "LogHandler" was defined, call it to take care of logging. # Otherwise print all log messages to stderr to resemble net-snmp # standard behavior (but add log message's associated priority in # plain text as well) if self.LogHandler: self.LogHandler(msgprio, msgtext) else: print("[{0}] {1}".format(msgprio, msgtext)) return 0 # We defined a Python function that needs a ctypes conversion so it can # be called by C code such as net-snmp. That's what SNMPCallback() is # used for. However we also need to store the reference in "self" as it # will otherwise be lost at the exit of this function so that net-snmp's # attempt to call it would end in nirvana... self._log_handler = SNMPCallback(_py_log_handler) # Now register our custom log handler with majorID SNMP_CALLBACK_LIBRARY # and minorID SNMP_CALLBACK_LOGGING. if libnsa.snmp_register_callback( SNMP_CALLBACK_LIBRARY, SNMP_CALLBACK_LOGGING, self._log_handler, None ) != SNMPERR_SUCCESS: raise netsnmpAgentException( "snmp_register_callback() failed for _netsnmp_log_handler!" ) # Finally the net-snmp logging system needs to be told to enable # logging through callback functions. This will actually register a # NETSNMP_LOGHANDLER_CALLBACK log handler that will call out to any # callback functions with the majorID and minorID shown above, such as # ours. libnsa.snmp_enable_calllog() # Unfortunately our custom log handler above is still not enough: in # net-snmp 5.4.x there were no "AgentX master disconnected" log # messages yet. So we need another workaround to be able to detect # disconnects for this release. Both net-snmp 5.4.x and 5.7.x support # a callback mechanism using the "majorID" SNMP_CALLBACK_APPLICATION and # the "minorID" SNMPD_CALLBACK_INDEX_STOP, which we can abuse for our # purposes. Again, we start by defining a callback function. def _py_index_stop_callback(majorID, minorID, serverarg, clientarg): # For "majorID" and "minorID" see our log handler above. # "serverarg" is a disguised pointer to a "netsnmp_session" # structure (passed by net-snmp's subagent_open_master_session() and # agentx_check_session() in agent/mibgroup/agentx/subagent.c). We # can ignore it here since we have a single session only anyway. # "clientarg" will be None again (see the registration code below). # We only care about SNMPD_CALLBACK_INDEX_STOP as our custom log # handler above already took care of all other events. if minorID == SNMPD_CALLBACK_INDEX_STOP: self._status = netsnmpAgentStatus.RECONNECTING return 0 # Convert it to a C callable function and store its reference self._index_stop_callback = SNMPCallback(_py_index_stop_callback) # Register it with net-snmp if libnsa.snmp_register_callback( SNMP_CALLBACK_APPLICATION, SNMPD_CALLBACK_INDEX_STOP, self._index_stop_callback, None ) != SNMPERR_SUCCESS: raise netsnmpAgentException( "snmp_register_callback() failed for _netsnmp_index_callback!" ) # No enabling necessary here # Make us an AgentX client if libnsa.netsnmp_ds_set_boolean( NETSNMP_DS_APPLICATION_ID, NETSNMP_DS_AGENT_ROLE, 1 ) != SNMPERR_SUCCESS: raise netsnmpAgentException( "netsnmp_ds_set_boolean() failed for NETSNMP_DS_AGENT_ROLE!" ) # Use an alternative transport specification to connect to the master? # Defaults to "/var/run/agentx/master". # (See the "LISTENING ADDRESSES" section in the snmpd(8) manpage) if self.MasterSocket: if libnsa.netsnmp_ds_set_string( NETSNMP_DS_APPLICATION_ID, NETSNMP_DS_AGENT_X_SOCKET, b(self.MasterSocket) ) != SNMPERR_SUCCESS: raise netsnmpAgentException( "netsnmp_ds_set_string() failed for NETSNMP_DS_AGENT_X_SOCKET!" ) # Use an alternative persistence directory? if self.PersistenceDir: if libnsa.netsnmp_ds_set_string( NETSNMP_DS_LIBRARY_ID, NETSNMP_DS_LIB_PERSISTENT_DIR, b(self.PersistenceDir) ) != SNMPERR_SUCCESS: raise netsnmpAgentException( "netsnmp_ds_set_string() failed for NETSNMP_DS_LIB_PERSISTENT_DIR!" ) # Initialize net-snmp library (see netsnmp_agent_api(3)) if libnsa.init_agent(b(self.AgentName)) != 0: raise netsnmpAgentException("init_agent() failed!") # Initialize MIB parser if self.UseMIBFiles: libnsa.netsnmp_init_mib() # If MIBFiles were specified (ie. MIBs that can not be found in # net-snmp's default MIB directory /usr/share/snmp/mibs), read # them in so we can translate OID strings to net-snmp's internal OID # format. if self.UseMIBFiles and self.MIBFiles: for mib in self.MIBFiles: if libnsa.read_mib(b(mib)) == 0: raise netsnmpAgentException("netsnmp_read_module({0}) " + "failed!".format(mib)) # Initialize our SNMP object registry self._objs = defaultdict(dict)