sys.stdout.write

Here are the examples of the python api sys.stdout.write taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

Project: paramnormal
Source File: ipython_directive.py
View license
    def process_input(self, data, input_prompt, lineno):
        """
        Process data block for INPUT token.

        """
        decorator, input, rest = data
        image_file = None
        image_directive = None

        is_verbatim = decorator=='@verbatim' or self.is_verbatim
        is_doctest = (decorator is not None and \
                     decorator.startswith('@doctest')) or self.is_doctest
        is_suppress = decorator=='@suppress' or self.is_suppress
        is_okexcept = decorator=='@okexcept' or self.is_okexcept
        is_okwarning = decorator=='@okwarning' or self.is_okwarning
        is_savefig = decorator is not None and \
                     decorator.startswith('@savefig')

        # set the encodings to be used by DecodingStringIO
        # to convert the execution output into unicode if
        # needed. this attrib is set by IpythonDirective.run()
        # based on the specified block options, defaulting to ['ut
        self.cout.set_encodings(self.output_encoding)

        input_lines = input.split('\n')

        if len(input_lines) > 1:
           if input_lines[-1] != "":
               input_lines.append('') # make sure there's a blank line
                                       # so splitter buffer gets reset

        continuation = '   %s:'%''.join(['.']*(len(str(lineno))+2))

        if is_savefig:
            image_file, image_directive = self.process_image(decorator)

        ret = []
        is_semicolon = False

        # Hold the execution count, if requested to do so.
        if is_suppress and self.hold_count:
            store_history = False
        else:
            store_history = True

        # Note: catch_warnings is not thread safe
        with warnings.catch_warnings(record=True) as ws:
            for i, line in enumerate(input_lines):
                if line.endswith(';'):
                    is_semicolon = True

                if i == 0:
                    # process the first input line
                    if is_verbatim:
                        self.process_input_line('')
                        self.IP.execution_count += 1 # increment it anyway
                    else:
                        # only submit the line in non-verbatim mode
                        self.process_input_line(line, store_history=store_history)
                    formatted_line = '%s %s'%(input_prompt, line)
                else:
                    # process a continuation line
                    if not is_verbatim:
                        self.process_input_line(line, store_history=store_history)

                    formatted_line = '%s %s'%(continuation, line)

                if not is_suppress:
                    ret.append(formatted_line)

        if not is_suppress and len(rest.strip()) and is_verbatim:
            # the "rest" is the standard output of the
            # input, which needs to be added in
            # verbatim mode
            ret.append(rest)

        self.cout.seek(0)
        output = self.cout.read()
        if not is_suppress and not is_semicolon:
            ret.append(output)
        elif is_semicolon: # get spacing right
            ret.append('')

        # context information
        filename = self.state.document.current_source
        lineno = self.state.document.current_line

        # output any exceptions raised during execution to stdout
        # unless :okexcept: has been specified.
        if not is_okexcept and "Traceback" in output:
            s =  "\nException in %s at block ending on line %s\n" % (filename, lineno)
            s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
            sys.stdout.write('\n\n>>>' + ('-' * 73))
            sys.stdout.write(s)
            sys.stdout.write(output)
            sys.stdout.write('<<<' + ('-' * 73) + '\n\n')

        # output any warning raised during execution to stdout
        # unless :okwarning: has been specified.
        if not is_okwarning:
            for w in ws:
                s =  "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
                s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
                sys.stdout.write('\n\n>>>' + ('-' * 73))
                sys.stdout.write(s)
                sys.stdout.write('-' * 76 + '\n')
                s=warnings.formatwarning(w.message, w.category,
                                         w.filename, w.lineno, w.line)
                sys.stdout.write(s)
                sys.stdout.write('<<<' + ('-' * 73) + '\n')

        self.cout.truncate(0)
        return (ret, input_lines, output, is_doctest, decorator, image_file,
                    image_directive)

Example 2

Project: pysmc
Source File: _smc.py
View license
    def initialize(self, gamma, particle_approximation=None,
                   num_mcmc_per_particle=10):
        """
        Initialize SMC at a particular ``gamma``.

        The method has basically three ways of initializing the particles:

        + If ``particles_approximation`` is not ``None``,
          then it is assumed to contain the
          particles at the corresponding value of ``gamma``.
        + If ``particles_approximation`` is ``None`` and the
          MCMC sampler class has a method
          called ``draw_from_prior()`` that works, then it is called to
          initialize the particles.
        + In any other case, MCMC sampling is used to initialize the particles.
          We are assuming that the MCMC sampler has already been tuned for
          that particular gamma and that a sufficient burning period has past.
          Then we record the current state as the first particle, we sample
          ``num_mcmc_per_particle`` times and record the second particle, and
          so on.

        :param gamma:               The initial ``gamma`` parameter. It must, of
                                    course, be within the right range of
                                    ``gamma``.
        :type gamma:                float
        :param particles_approximation: A dictionary of MCMC states representing
                                        the particles. When using MPI, we are
                                        assuming that each one of the CPU's
                                        has each own collection of particles.
        :type particles_approximation:  :class:`pysmc.ParticleApproximation`
        :param num_mcmc_per_particle:   This parameter is ignored if
                                        ``particles`` is not ``None``. If the
                                        only way to initialize the particles is
                                        to use MCMC, then this is the number of
                                        of mcmc samples we drop before getting
                                        a SMC particle.
        """
        if self.verbose > 0:
            print '------------------------'
            print 'START SMC Initialization'
            print '------------------------'
            print '- initializing at', self.gamma_name, ':', gamma
        # Zero out the MCMC step counter
        self._total_num_mcmc = 0
        # Set gamma
        self._set_gamma(gamma)
        # Set the weights and ESS
        self.log_w.fill(-math.log(self.num_particles))
        self._ess = float(self.num_particles)
        if particle_approximation is not None:
            if self.verbose > 0:
                print '- initializing with a particle approximation.'
            self._particles = particle_approximation.particles
            self._log_w = particle_approximation.log_w
            self._ess = self._get_ess_at(self.log_w)
            return
        else:
            self.particles[0] = self.mcmc_sampler.get_state()
            try:
                if self.verbose > 0:
                    sys.stdout.write(
                            '- initializing by sampling from the prior: ')
                if not gamma == 0.:
                    raise AttributeError()
                for i in range(1, self.my_num_particles):
                    self.mcmc_sampler.draw_from_prior()
                    self.particles[i] = self.mcmc_sampler.get_state()
                if self.verbose > 0:
                    sys.stdout.write('SUCCESS\n')
            except AttributeError:
                if self.verbose > 0:
                    sys.stdout.write('FAILURE\n')
                    print '- initializing via MCMC'
                    if self.use_mpi:
                        total_samples = (self.my_num_particles
                                         * num_mcmc_per_particle)
                        print '- taking a total of', total_samples, 'samples per process'
                    else:
                        total_samples = (self.num_particles
                                         * num_mcmc_per_particle)
                        print '- taking a total of', total_samples, 'samples'
                    print '- creating a particle every', num_mcmc_per_particle
                if self.verbose > 0:
                    pb = pymc.progressbar.ProgressBar(self.num_particles *
                                                      num_mcmc_per_particle)
                # Only rank 0 keeps the first particle
                if self.rank == 0:
                    start_idx = 1
                else:
                    start_idx = 0
                for i in range(start_idx, self.my_num_particles):
                    self.mcmc_sampler.sample(num_mcmc_per_particle)
                    self.particles[i] = self.mcmc_sampler.get_state()
                    self._total_num_mcmc += num_mcmc_per_particle
                    # TODO: Find bug in PyMC bar
                    #if self.verbose > 0:
                    #    pb.update((i + 2) * self.size * num_mcmc_per_particle)
                if self.verbose > 0:
                    print ''
        pa = self.get_particle_approximation().gather()
        sm_params = self.mcmc_sampler.get_params(comm=self.comm)
        if self.update_db and self.rank == 0:
            self.db.add(self.gamma, pa, sm_params)
            self.db.commit()
        if self.verbose > 0:
            print '----------------------'
            print 'END SMC Initialization'
            print '----------------------'

Example 3

Project: raspberry_pwn
Source File: sqlharvest.py
View license
def main():
    tables = dict()
    cookies = cookielib.CookieJar()
    cookie_processor = urllib2.HTTPCookieProcessor(cookies)
    opener = urllib2.build_opener(cookie_processor)
    opener.addheaders = [("User-Agent", USER_AGENT)]

    conn = opener.open(SEARCH_URL)
    page = conn.read()  # set initial cookie values

    config = ConfigParser.ConfigParser()
    config.read(CONFIG_FILE)

    if not config.has_section("options"):
        config.add_section("options")
    if not config.has_option("options", "index"):
        config.set("options", "index", "0")

    i = int(config.get("options", "index"))

    try:
        with open(TABLES_FILE, 'r') as f:
            for line in f.xreadlines():
                if len(line) > 0 and ',' in line:
                    temp = line.split(',')
                    tables[temp[0]] = int(temp[1])
    except:
        pass

    socket.setdefaulttimeout(TIMEOUT)

    files, old_files = None, None
    try:
        while True:
            abort = False
            old_files = files
            files = []

            try:
                conn = opener.open("%s&q=%s&start=%d&sa=N" % (SEARCH_URL, QUERY.replace(' ', '+'), i * 10))
                page = conn.read()
                for match in re.finditer(REGEX_URLS, page):
                    files.append(urllib.unquote(match.group(1)))
                    if len(files) >= 10:
                        break
                abort = (files == old_files)

            except KeyboardInterrupt:
                raise

            except Exception, msg:
                print msg

            if abort:
                break

            sys.stdout.write("\n---------------\n")
            sys.stdout.write("Result page #%d\n" % (i + 1))
            sys.stdout.write("---------------\n")

            for sqlfile in files:
                print sqlfile

                try:
                    req = urllib2.Request(sqlfile)
                    response = urllib2.urlopen(req)

                    if "Content-Length" in response.headers:
                        if int(response.headers.get("Content-Length")) > MAX_FILE_SIZE:
                            continue

                    page = response.read()
                    found = False
                    counter = 0

                    for match in re.finditer(REGEX_RESULT, page):
                        counter += 1
                        table = match.group("result").strip().strip("`\"'").replace('"."', ".").replace("].[", ".").strip('[]')

                        if table and not any(_ in table for _ in ('>', '<', '--', ' ')):
                            found = True
                            sys.stdout.write('*')

                            if table in tables:
                                tables[table] += 1
                            else:
                                tables[table] = 1
                    if found:
                        sys.stdout.write("\n")

                except KeyboardInterrupt:
                    raise

                except Exception, msg:
                    print msg

            else:
                i += 1

    except KeyboardInterrupt:
        pass

    finally:
        with open(TABLES_FILE, 'w+') as f:
            tables = sorted(tables.items(), key=itemgetter(1), reverse=True)
            for table, count in tables:
                f.write("%s,%d\n" % (table, count))

        config.set("options", "index", str(i + 1))
        with open(CONFIG_FILE, 'w+') as f:
            config.write(f)

Example 4

Project: pyfilesystem
Source File: fscp.py
View license
    def do_run(self, options, args):

        self.options = options
        if len(args) < 2:
            self.error("at least two filesystems required\n")
            return 1

        srcs = args[:-1]
        dst = args[-1]

        dst_fs, dst_path = self.open_fs(dst, writeable=True, create_dir=True)

        if dst_path is not None and dst_fs.isfile(dst_path):
            self.error('Destination must be a directory\n')
            return 1

        if dst_path:
            dst_fs = dst_fs.makeopendir(dst_path)
            dst_path = None

        copy_fs_paths = []

        progress = options.progress

        if progress:
            sys.stdout.write(self.progress_bar(len(srcs), 0, 'scanning...'))
            sys.stdout.flush()

        self.root_dirs = []
        for i, fs_url in enumerate(srcs):
            src_fs, src_path = self.open_fs(fs_url)

            if src_path is None:
                src_path = '/'

            if iswildcard(src_path):
                for file_path in src_fs.listdir(wildcard=src_path, full=True):
                    copy_fs_paths.append((self.FILE, src_fs, file_path, file_path))

            else:
                if src_fs.isdir(src_path):
                    self.root_dirs.append((src_fs, src_path))
                    src_sub_fs = src_fs.opendir(src_path)
                    for dir_path, file_paths in src_sub_fs.walk():
                        if dir_path not in ('', '/'):
                            copy_fs_paths.append((self.DIR, src_sub_fs, dir_path, dir_path))
                        sub_fs = src_sub_fs.opendir(dir_path)
                        for file_path in file_paths:
                            copy_fs_paths.append((self.FILE, sub_fs, file_path, pathjoin(dir_path, file_path)))
                else:
                    if src_fs.exists(src_path):
                        copy_fs_paths.append((self.FILE, src_fs, src_path, src_path))
                    else:
                        self.error('%s is not a file or directory\n' % src_path)
                        return 1

            if progress:
                sys.stdout.write(self.progress_bar(len(srcs), i + 1, 'scanning...'))
                sys.stdout.flush()

        if progress:
            sys.stdout.write(self.progress_bar(len(copy_fs_paths), 0, self.get_verb()))
            sys.stdout.flush()

        if self.options.threads > 1:
            copy_fs_dirs = [r for r in copy_fs_paths if r[0] == self.DIR]
            copy_fs_paths = [r for r in copy_fs_paths if r[0] == self.FILE]
            for path_type, fs, path, dest_path in copy_fs_dirs:
                dst_fs.makedir(path, allow_recreate=True, recursive=True)

        self.lock = threading.RLock()

        self.total_files = len(copy_fs_paths)
        self.done_files = 0

        file_queue = queue.Queue()
        threads = [FileOpThread(self.get_action(),
                                'T%i' % i,
                                dst_fs,
                                file_queue,
                                self.on_done,
                                self.on_error)
                        for i in xrange(options.threads)]

        for thread in threads:
            thread.start()

        self.action_errors = []
        complete = False
        try:
            enqueue = file_queue.put
            for resource in copy_fs_paths:
                enqueue(resource)

            while not file_queue.empty():
                time.sleep(0)
                if self.any_error():
                    raise SystemExit
            # Can't use queue.join here, or KeyboardInterrupt will not be
            # caught until the queue is finished
            #file_queue.join()

        except KeyboardInterrupt:
            options.progress = False
            self.output("\nCancelling...\n")

        except SystemExit:
            options.progress = False

        finally:
            sys.stdout.flush()
            for thread in threads:
                thread.finish_event.set()
            for thread in threads:
                thread.join()
            complete = True
            if not self.any_error():
                self.post_actions()

        dst_fs.close()

        if self.action_errors:
            for error in self.action_errors:
                self.error(self.wrap_error(unicode(error)) + '\n')
            sys.stdout.flush()
        else:
            if complete and options.progress:
                sys.stdout.write(self.progress_bar(self.total_files, self.done_files, ''))
                sys.stdout.write('\n')
                sys.stdout.flush()

Example 5

Project: scikit-rf
Source File: ipython_directive.py
View license
    def process_input(self, data, input_prompt, lineno):
        """
        Process data block for INPUT token.

        """
        decorator, input, rest = data
        image_file = None
        image_directive = None

        is_verbatim = decorator=='@verbatim' or self.is_verbatim
        is_doctest = (decorator is not None and \
                     decorator.startswith('@doctest')) or self.is_doctest
        is_suppress = decorator=='@suppress' or self.is_suppress
        is_okexcept = decorator=='@okexcept' or self.is_okexcept
        is_okwarning = decorator=='@okwarning' or self.is_okwarning
        is_savefig = decorator is not None and \
                     decorator.startswith('@savefig')

        input_lines = input.split('\n')
        if len(input_lines) > 1:
            if input_lines[-1] != "":
                input_lines.append('') # make sure there's a blank line
                                       # so splitter buffer gets reset

        continuation = '   %s:'%''.join(['.']*(len(str(lineno))+2))

        if is_savefig:
            image_file, image_directive = self.process_image(decorator)

        ret = []
        is_semicolon = False

        # Hold the execution count, if requested to do so.
        if is_suppress and self.hold_count:
            store_history = False
        else:
            store_history = True

        # Note: catch_warnings is not thread safe
        with warnings.catch_warnings(record=True) as ws:
            for i, line in enumerate(input_lines):
                if line.endswith(';'):
                    is_semicolon = True

                if i == 0:
                    # process the first input line
                    if is_verbatim:
                        self.process_input_line('')
                        self.IP.execution_count += 1 # increment it anyway
                    else:
                        # only submit the line in non-verbatim mode
                        self.process_input_line(line, store_history=store_history)
                    formatted_line = '%s %s'%(input_prompt, line)
                else:
                    # process a continuation line
                    if not is_verbatim:
                        self.process_input_line(line, store_history=store_history)

                    formatted_line = '%s %s'%(continuation, line)

                if not is_suppress:
                    ret.append(formatted_line)

        if not is_suppress and len(rest.strip()) and is_verbatim:
            # the "rest" is the standard output of the
            # input, which needs to be added in
            # verbatim mode
            ret.append(rest)

        self.cout.seek(0)
        output = self.cout.read()
        if not is_suppress and not is_semicolon:
            ret.append(output)
        elif is_semicolon: # get spacing right
            ret.append('')

        # context information
        filename = "Unknown"
        lineno = 0
        if self.directive.state:
            filename = self.directive.state.document.current_source
            lineno = self.directive.state.document.current_line

        # output any exceptions raised during execution to stdout
        # unless :okexcept: has been specified.
        if not is_okexcept and "Traceback" in output:
            s =  "\nException in %s at block ending on line %s\n" % (filename, lineno)
            s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
            sys.stdout.write('\n\n>>>' + ('-' * 73))
            sys.stdout.write(s)
            sys.stdout.write(output)
            sys.stdout.write('<<<' + ('-' * 73) + '\n\n')

        # output any warning raised during execution to stdout
        # unless :okwarning: has been specified.
        if not is_okwarning:
            for w in ws:
                s =  "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
                s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
                sys.stdout.write('\n\n>>>' + ('-' * 73))
                sys.stdout.write(s)
                sys.stdout.write(('-' * 76) + '\n')
                s=warnings.formatwarning(w.message, w.category,
                                         w.filename, w.lineno, w.line)
                sys.stdout.write(s)
                sys.stdout.write('<<<' + ('-' * 73) + '\n')

        self.cout.truncate(0)
        return (ret, input_lines, output, is_doctest, decorator, image_file,
                    image_directive)

Example 6

Project: D-TECT
Source File: d-tect.py
View license
def dtect():
	print("  ____   _____ _____ ____ _____ ")
	print(" |  _ \ |_   _| ____/ ___|_   _|")
	print(" | | | |__| | |  _|| |     | |  ")
	print(" | |_| |__| | | |__| |___  | |  ")
	print(" |____/   |_| |_____\____| |_|  v1.0")
	print("") 
	print(" D-TECT - Pentest the Modern Web")
	print(" Author: Shawar Khan - ( https://shawarkhan.com )")
	print("")
	def menu():
		global filedetector,wpenumerator,subdomainscan,portscan,wpscan,xssscanner,wpbackupscan,sqliscanner
		print(" -- "+boldwhite+"Menu"+reset+" -- \n \n  1. 	"+boldwhite+"WordPress Username Enumerator"+reset+"   \n  2. 	"+boldwhite+"Sensitive File Detector"+reset+"        \n  3. 	"+boldwhite+"Sub-Domain Scanner"+reset+"\n  4. 	"+boldwhite+"Port Scanner"+reset+"        \n  5. 	"+boldwhite+"Wordpress Scanner\n"+reset+"  6. 	"+boldwhite+"Cross-Site Scripting [ XSS ] Scanner\n"+reset+"  7.    "+boldwhite+"Wordpress Backup Grabber\n"+reset+"  8.    "+boldwhite+"SQL Injection [ SQLI ] Scanner\n"+reset)
		option = raw_input("[+] Select Option\n    > ")
		if option == "1":
			wpenumerator = "on"
		elif option == "2":
			filedetector = "on"
		elif option == "3":
			subdomainscan = "on"
		elif option == "4":
			portscan = "on"
		elif option == "5":
			wpscan = "on"
		elif option == "6":
			xssscanner = "on"
		elif option == "7":
			wpbackupscan = "on"
		elif option == "8":
			sqliscanner = "on"
		else:
			print("[+] Incorrect Option selected")
			menu()

	def sock(i,secretswitch=0):
		secret = secretswitch
		global data,page,sourcecode
		if redirect == 1:
			data = host+i
		else:
			data = host.strip("/")+'/'+i
		page = urllib.urlopen(data)
		sourcecode = page.read()
		if secret == "1":
			return sourcecode
	def cloudflare():
		data = host #+'/'
		page = urllib.urlopen(data)
		pagesource = page.read()
		if "used CloudFlare to restrict access</title>" in pagesource:
			print("[!] Cloudflare blocked the IP")
			again()
	def alive():
		try:
			global page,splithost,ip
			data = host#+'/'
			page = urllib.urlopen(data)
			source = page.read()
			splithost = str(data.split("://")[1].split("/")[0])
			ip = socket.gethostbyname(splithost)
			print("[i] "+green+"Site is up!"+reset)
			print("  \n[+] Target Info:\n | URL: "+boldwhite+"%s"+reset+"\n | IP: "+boldwhite+"%s"+reset+"\n  ")%(data,ip)
			print("[+] Checking if any Cloudflare is blocking access...")
			cloudflare()
			redirectcheck()
		except(IOError):
			print("[!] "+red+"Error connecting to site! Site maybe down."+reset)
			again()
	def responseheadercheck():
		print('')
		headers 		= ['set-cookie','x-cache','Location','Date','Content-Type','Content-Length','Connection','Etag','Expires','Last-Modified','Pragma','Vary','Cache-Control','X-Pingback','Accept-Ranges']
		headersfound 	= []
		interesting 	= []
		caution 		= []
		cj = 0
		for i in page.headers:
			if i.lower() in str(headers).lower():
				pass
			elif i == "server":
				structure = str(i)+" : "+str(page.headers[i])
				headersfound.append(structure)
				structure = "Server : "+boldwhite+str(page.headers[i])+reset
				interesting.append(structure)
			elif i == "x-powered-by":
				structure = str(i)+" : "+str(page.headers[i])
				headersfound.append(structure)
				structure = "Powered by: "+boldwhite+str(page.headers[i])+reset
				interesting.append(structure)
			elif i == "x-frame-options":
				cj = 1
				pass
			else:
				structure = str(i)+" : "+str(page.headers[i])
				headersfound.append(structure)
		if cj == 0:
			caution.append("[!]"+red+" X-Frame-Options header Missing\n"+reset+"[!] "+red+"Page might be vulnerable to "+boldred+"Click Jacking\n"+reset+"[!] "+boldred+page.geturl()+reset+"\n[i] About ClickJacking: [ "+green+"https://www.owasp.org/index.php/Clickjacking"+reset+" ]")
		print("[+] Interesting Headers Found:")
		for i in headersfound:
			print(" | %s")%(i)
		if len(interesting) != 0:
			print("\n[i] Information from Headers:")
			for i in interesting:
				print(" | %s")%i
		print('')
		if cj == 0:
			print(caution[0])
		print('')
	def parameterarrange(payload):
		parsedurl = urlparse.urlparse(host)
		parameters = urlparse.parse_qsl(parsedurl.query, keep_blank_values=True)
		parameternames = []
		parametervalues = []

		for m in parameters:
			parameternames.append(m[0])
			parametervalues.append(m[1])


		for n in parameters:
			try:
				print("Checking '%s' parameter")%n[0]
				index = parameternames.index(n[0])
				original = parametervalues[index]
				parametervalues[index] = payload
				return urllib.urlencode(dict(zip(parameternames,parametervalues)))
				parametervalues[index] = original
			except(KeyError):
				pass
	def SQLIscan(site):
		print("[+] [ SQLI ] Scanner Started...\n")
		vuln = []
		payloads = {
					'2':'"',
					'1':'\''
		}
		errors = {
					'MySQL':'You have an error in your SQL syntax;',
					'Oracle':'SQL command not properly ended',
					'MSSQL':'Unclosed quotation mark after the character string',
					'PostgreSQL':'syntax error at or near'
		}
		path = urlparse.urlparse(site).scheme+"://"+urlparse.urlparse(site).netloc+urlparse.urlparse(site).path
		parsedurl = urlparse.urlparse(host)
		parameters = urlparse.parse_qsl(parsedurl.query, keep_blank_values=True)
		parameternames = []
		parametervalues = []

		for m in parameters:
			parameternames.append(m[0])
			parametervalues.append(m[1])


		for n in parameters:
			found = 0
			print("[+] Checking '%s' parameter")%n[0]
			try:
				for i in payloads:	
					pay = payloads[i]
					index = parameternames.index(n[0])
					original = parametervalues[index]
					parametervalues[index] = pay
					modifiedurl = urllib.urlencode(dict(zip(parameternames,parametervalues)))
					parametervalues[index] = original
					modifiedparams = modifiedurl
					payload = urllib.quote_plus(payloads[i])
					u = urllib.urlopen(path+"?"+modifiedparams)
					source = u.read()
					#print ("[+] Checking HTML Context...")
					
					for i in errors:
						if errors[i] in source:#htmlcode[0].contents[0]:
							dbfound = " | Back-End Database: "+green+str(i)+reset
							found = 1
							break
					if found != 1:
						break
			except(KeyError):
				pass

			if found == 1:
				print("[!] "+red+"SQL Injection Vulnerability Found!"+reset)
				print dbfound
				vuln.append("'"+n[0]+"'")
				found = 0
		if len(vuln) != 0:
			print(" | Vulnerable Parameter/s:"), 
			for i in vuln:
				print(i),

		else:
			print("[!] Not Vulnerable")
	def XSSscan(site):
		print("[+] [ XSS ] Scanner Started...")
		vuln = []
		payloads = {
					'3':'d4rk();"\'\\/}{d4rk',
					'2':'d4rk</script><script>alert(1)</script>d4rk',
					'1':'<d4rk>'
		}
		path = urlparse.urlparse(site).scheme+"://"+urlparse.urlparse(site).netloc+urlparse.urlparse(site).path
		parsedurl = urlparse.urlparse(host)
		parameters = urlparse.parse_qsl(parsedurl.query, keep_blank_values=True)
		parameternames = []
		parametervalues = []

		for m in parameters:
			parameternames.append(m[0])
			parametervalues.append(m[1])


		for n in parameters:
			found = 0
			print(" | Checking '%s' parameter")%n[0]
			try:
				for i in payloads:	
					pay = payloads[i]
					index = parameternames.index(n[0])
					original = parametervalues[index]
					parametervalues[index] = pay
					modifiedurl = urllib.urlencode(dict(zip(parameternames,parametervalues)))
					parametervalues[index] = original
					modifiedparams = modifiedurl
					payload = urllib.quote_plus(payloads[i])
					u = urllib.urlopen(path+"?"+modifiedparams)
					source = u.read()
					code = BeautifulSoup(source)
					if str(i) == str(1):
						#print ("[+] Checking HTML Context...")
						if payloads[i] in source:#htmlcode[0].contents[0]:
							#print("[+] XSS Vulnerability Found.")
							found = 1
					script = code.findAll('script')
					if str(i) == str(3) or str(i) == str(2):
						#print("[+] Checking JS Context...")
						if str(i) == str(3):
							#JS Context
							for p in range(len(script)):
								try:
									if pay in script[p].contents[0]:
										#print("[+] XSS Vulnerability Found")
										found = 1
								except(IndexError):
									pass
						if str(i) == str(2):
							if payloads['2'] in source:
								#	print("[+] XSS Vulnerability Found")
								found = 1
			except(KeyError):
				pass

			if found == 1:
				vuln.append("'"+n[0]+"'")
				found = 0
		if len(vuln) != 0:
			print("[!] "+red+"Vulnerable Parameter/s:"+reset), 
			for i in vuln:
				print(boldred+i+reset),
		else:
			print("[!] Not Vulnerable")
	def portscanner():
		print("[i] Syntax	:	Function")
  		print("    23,80,120	:	Scans Specific Ports, e.g, Scans Port 23,80 and 120")
  		print("    23-80	:	Scans a Range of Ports, e.g, Scans Port from 23 to 80")
  		print("    23   	:	Scans a single port, e.g, Scans Port 23")
  		print("    all  	:	Scans all ports from 20 to 5000")
  		print(" ")
  		portoption = raw_input("[+] Enter Range or Port:\n    > ")
  		wasmultiple		 = 		0
  		wasrange		 = 		0
  		wasone			 =		0
  		if ',' in portoption:
  			wasmultiple = 1
  			multipleport = portoption.split(',')
  			notexpected = 0
  			for i in multipleport:
  				if not str(i).isdigit():
  					print("[!] Incorrect Syntax!")
  					notexpected = 1
  			if notexpected == 1:
  				again()
  			totallength = multipleport
  		elif '-' in portoption:
  			wasrange = 1
  			rangeport = portoption.split('-')
  			totalrange = range(int(rangeport[0]),int(rangeport[1])+1)
  			if len(rangeport) != 2:
  				print("[!] Incorrect Syntax!")
  				again()
  			totallength = totalrange
  		elif portoption == 'all':
  			totallength = range(20,5000)
  		elif portoption.isdigit():
  			wasone = 1
  			oneport = int(portoption)
  			totallength = range(1)
  		else:
  			print("[+] Incorrect Syntax!")
  			again()
		print("[+] Scanning %s Port/s on Target: %s")%(len(totallength),ip)
		ports = 5000
		found = 1
		protocolname = 'tcp'
		progress = 20
		loopcondition = range(20,5000)
		if portoption == 'all':
			loopcondition = range(20,5000)
			ports = 5000
			progress = 20
		elif wasmultiple == 1:
			loopcondition = multipleport
			ports = int(len(multipleport))
			progress = 0 #int(min(multipleport))
		elif wasrange == 1:
			loopcondition = totalrange
			ports = int(rangeport[1])
			progress = int(rangeport[0])-1
		elif wasone == 1:
			onlyport = []
			onlyport.append(portoption)
			loopcondition = onlyport
			progress = 0
			ports = 1
		else:
			loopcondition = range(20,5000)
		for i in loopcondition:
			i = int(i)
			progress += 1
			sys.stdout.write("\r[+] Progress %i / %s ..."% (progress,ports))
			sys.stdout.flush()
			portconnect = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
			response = portconnect.connect_ex((ip, i))
			if(response == 0) :
				print ('\n | Port: '+boldwhite+'%d'+reset+' \n | Status: '+green+'OPEN'+reset+'\n | Service: '+boldwhite+'%s'+reset+'\n')% (i,socket.getservbyport(i, protocolname))
				found += 1
			portconnect.close()
		if found == 1:
			print("\n | "+red+"No Open Ports Found!"+reset)
	def subdomainscanner():
		
		import sys
		print("\n[+] Subdomain Scanner Start!")
		wordlist = ["mail","localhost","blog","forum","0","01","02","03","1","10","11","12","13","14","15","16","17","18","19","2","20","3","3com","4","5","6","7","8","9","ILMI","a","a.auth-ns","a01","a02","a1","a2","abc","about","ac","academico","acceso","access","accounting","accounts","acid","activestat","ad","adam","adkit","admin","administracion","administrador","administrator","administrators","admins","ads","adserver","adsl","ae","af","affiliate","affiliates","afiliados","ag","agenda","agent","ai","aix","ajax","ak","akamai","al","alabama","alaska","albuquerque","alerts","alpha","alterwind","am","amarillo","americas","an","anaheim","analyzer","announce","announcements","antivirus","ao","ap","apache","apollo","app","app01","app1","apple","application","applications","apps","appserver","aq","ar","archie","arcsight","argentina","arizona","arkansas","arlington","as","as400","asia","asterix","at","athena","atlanta","atlas","att","au","auction","austin","auth","auto","av","aw","ayuda","az","b","b.auth-ns","b01","b02","b1","b2","b2b","b2c","ba","back","backend","backup","baker","bakersfield","balance","balancer","baltimore","banking","bayarea","bb","bbdd","bbs","bd","bdc","be","bea","beta","bf","bg","bh","bi","billing","biz","biztalk","bj","black","blackberry","blogs","blue","bm","bn","bnc","bo","bob","bof","boise","bolsa","border","boston","boulder","boy","br","bravo","brazil","britian","broadcast","broker","bronze","brown","bs","bsd","bsd0","bsd01","bsd02","bsd1","bsd2","bt","bug","buggalo","bugs","bugzilla","build","bulletins","burn","burner","buscador","buy","bv","bw","by","bz","c","c.auth-ns","ca","cache","cafe","calendar","california","call","calvin","canada","canal","canon","careers","catalog","cc","cd","cdburner","cdn","cert","certificates","certify","certserv","certsrv","cf","cg","cgi","ch","channel","channels","charlie","charlotte","chat","chats","chatserver","check","checkpoint","chi","chicago","ci","cims","cincinnati","cisco","citrix","ck","cl","class","classes","classifieds","classroom","cleveland","clicktrack","client","clientes","clients","club","clubs","cluster","clusters","cm","cmail","cms","cn","co","cocoa","code","coldfusion","colombus","colorado","columbus","com","commerce","commerceserver","communigate","community","compaq","compras","con","concentrator","conf","conference","conferencing","confidential","connect","connecticut","consola","console","consult","consultant","consultants","consulting","consumer","contact","content","contracts","core","core0","core01","corp","corpmail","corporate","correo","correoweb","cortafuegos","counterstrike","courses","cr","cricket","crm","crs","cs","cso","css","ct","cu","cust1","cust10","cust100","cust101","cust102","cust103","cust104","cust105","cust106","cust107","cust108","cust109","cust11","cust110","cust111","cust112","cust113","cust114","cust115","cust116","cust117","cust118","cust119","cust12","cust120","cust121","cust122","cust123","cust124","cust125","cust126","cust13","cust14","cust15","cust16","cust17","cust18","cust19","cust2","cust20","cust21","cust22","cust23","cust24","cust25","cust26","cust27","cust28","cust29","cust3","cust30","cust31","cust32","cust33","cust34","cust35","cust36","cust37","cust38","cust39","cust4","cust40","cust41","cust42","cust43","cust44","cust45","cust46","cust47","cust48","cust49","cust5","cust50","cust51","cust52","cust53","cust54","cust55","cust56","cust57","cust58","cust59","cust6","cust60","cust61","cust62","cust63","cust64","cust65","cust66","cust67","cust68","cust69","cust7","cust70","cust71","cust72","cust73","cust74","cust75","cust76","cust77","cust78","cust79","cust8","cust80","cust81","cust82","cust83","cust84","cust85","cust86","cust87","cust88","cust89","cust9","cust90","cust91","cust92","cust93","cust94","cust95","cust96","cust97","cust98","cust99","customer","customers","cv","cvs","cx","cy","cz","d","dallas","data","database","database01","database02","database1","database2","databases","datastore","datos","david","db","db0","db01","db02","db1","db2","dc","de","dealers","dec","def","default","defiant","delaware","dell","delta","delta1","demo","demonstration","demos","denver","depot","des","desarrollo","descargas","design","designer","detroit","dev","dev0","dev01","dev1","devel","develop","developer","developers","development","device","devserver","devsql","dhcp","dial","dialup","digital","dilbert","dir","direct","directory","disc","discovery","discuss","discussion","discussions","disk","disney","distributer","distributers","dj","dk","dm","dmail","dmz","dnews","dns","dns-2","dns0","dns1","dns2","dns3","do","docs","documentacion","documentos","domain","domains","dominio","domino","dominoweb","doom","download","downloads","downtown","dragon","drupal","dsl","dyn","dynamic","dynip","dz","e","e-com","e-commerce","e0","eagle","earth","east","ec","echo","ecom","ecommerce","edi","edu","education","edward","ee","eg","eh","ejemplo","elpaso","email","employees","empresa","empresas","en","enable","eng","eng01","eng1","engine","engineer","engineering","enterprise","epsilon","er","erp","es","esd","esm","espanol","estadisticas","esx","et","eta","europe","events","domain","exchange","exec","extern","external","extranet","f","f5","falcon","farm","faststats","fax","feedback","feeds","fi","field","file","files","fileserv","fileserver","filestore","filter","find","finger","firewall","fix","fixes","fj","fk","fl","flash","florida","flow","fm","fo","foobar","formacion","foro","foros","fortworth","forums","foto","fotos","foundry","fox","foxtrot","fr","france","frank","fred","freebsd","freebsd0","freebsd01","freebsd02","freebsd1","freebsd2","freeware","fresno","front","frontdesk","fs","fsp","ftp","ftp-","ftp0","ftp2","ftp_","ftpserver","fw","fw-1","fw1","fwsm","fwsm0","fwsm01","fwsm1","g","ga","galeria","galerias","galleries","gallery","games","gamma","gandalf","gate","gatekeeper","gateway","gauss","gd","ge","gemini","general","george","georgia","germany","gf","gg","gh","gi","gl","glendale","gm","gmail","gn","go","gold","goldmine","golf","gopher","gp","gq","gr","green","group","groups","groupwise","gs","gsx","gt","gu","guest","gw","gw1","gy","h","hal","halflife","hawaii","hello","help","helpdesk","helponline","henry","hermes","hi","hidden","hk","hm","hn","hobbes","hollywood","home","homebase","homer","honeypot","honolulu","host","host1","host3","host4","host5","hotel","hotjobs","houstin","houston","howto","hp","hpov","hr","ht","http","https","hu","hub","humanresources","i","ia","ias","ibm","ibmdb","id","ida","idaho","ids","ie","iis","il","illinois","im","images","imail","imap","imap4","img","img0","img01","img02","in","inbound","inc","include","incoming","india","indiana","indianapolis","info","informix","inside","install","int","intern","internal","international","internet","intl","intranet","invalid","investor","investors","invia","invio","io","iota","iowa","iplanet","ipmonitor","ipsec","ipsec-gw","iq","ir","irc","ircd","ircserver","ireland","iris","irvine","irving","is","isa","isaserv","isaserver","ism","israel","isync","it","italy","ix","j","japan","java","je","jedi","jm","jo","jobs","john","jp","jrun","juegos","juliet","juliette","juniper","k","kansas","kansascity","kappa","kb","ke","kentucky","kerberos","keynote","kg","kh","ki","kilo","king","km","kn","knowledgebase","knoxville","koe","korea","kp","kr","ks","kw","ky","kz","l","la","lab","laboratory","labs","lambda","lan","laptop","laserjet","lasvegas","launch","lb","lc","ldap","legal","leo","li","lib","library","lima","lincoln","link","linux","linux0","linux01","linux02","linux1","linux2","lista","lists","listserv","listserver","live","lk","load","loadbalancer","local","log","log0","log01","log02","log1","log2","logfile","logfiles","logger","logging","loghost","login","logs","london","longbeach","losangeles","lotus","louisiana","lr","ls","lt","lu","luke","lv","ly","lyris","m","ma","mac","mac1","mac10","mac11","mac2","mac3","mac4","mac5","mach","macintosh","madrid","mail2","mailer","mailgate","mailhost","mailing","maillist","maillists","mailroom","mailserv","mailsite","mailsrv","main","maine","maint","mall","manage","management","manager","manufacturing","map","mapas","maps","marketing","marketplace","mars","marvin","mary","maryland","massachusetts","master","max","mc","mci","md","mdaemon","me","media","member","members","memphis","mercury","merlin","messages","messenger","mg","mgmt","mh","mi","miami","michigan","mickey","midwest","mike","milwaukee","minneapolis","minnesota","mirror","mis","mississippi","missouri","mk","ml","mm","mn","mngt","mo","mobile","mom","monitor","monitoring","montana","moon","moscow","movies","mozart","mp","mp3","mpeg","mpg","mq","mr","mrtg","ms","ms-exchange","ms-sql","msexchange","mssql","mssql0","mssql01","mssql1","mt","mta","mtu","mu","multimedia","music","mv","mw","mx","my","mysql","mysql0","mysql01","mysql1","mz","n","na","name","names","nameserv","nameserver","nas","nashville","nat","nc","nd","nds","ne","nebraska","neptune","net","netapp","netdata","netgear","netmeeting","netscaler","netscreen","netstats","network","nevada","new","newhampshire","newjersey","newmexico","neworleans","news","newsfeed","newsfeeds","newsgroups","newton","newyork","newzealand","nf","ng","nh","ni","nigeria","nj","nl","nm","nms","nntp","no","node","nokia","nombres","nora","north","northcarolina","northdakota","northeast","northwest","noticias","novell","november","np","nr","ns","ns-","ns0","ns01","ns02","ns1","ns2","ns3","ns4","ns5","ns_","nt","nt4","nt40","ntmail","ntp","ntserver","nu","null","nv","ny","nz","o","oakland","ocean","odin","office","offices","oh","ohio","ok","oklahoma","oklahomacity","old","om","omaha","omega","omicron","online","ontario","open","openbsd","openview","operations","ops","ops0","ops01","ops02","ops1","ops2","opsware","or","oracle","orange","order","orders","oregon","orion","orlando","oscar","out","outbound","outgoing","outlook","outside","ov","owa","owa01","owa02","owa1","owa2","ows","oxnard","p","pa","page","pager","pages","paginas","papa","paris","parners","partner","partners","patch","patches","paul","payroll","pbx","pc","pc01","pc1","pc10","pc101","pc11","pc12","pc13","pc14","pc15","pc16","pc17","pc18","pc19","pc2","pc20","pc21","pc22","pc23","pc24","pc25","pc26","pc27","pc28","pc29","pc3","pc30","pc31","pc32","pc33","pc34","pc35","pc36","pc37","pc38","pc39","pc4","pc40","pc41","pc42","pc43","pc44","pc45","pc46","pc47","pc48","pc49","pc5","pc50","pc51","pc52","pc53","pc54","pc55","pc56","pc57","pc58","pc59","pc6","pc60","pc7","pc8","pc9","pcmail","pda","pdc","pe","pegasus","pennsylvania","peoplesoft","personal","pf","pg","pgp","ph","phi","philadelphia","phoenix","phoeniz","phone","phones","photos","pi","pics","pictures","pink","pipex-gw","pittsburgh","pix","pk","pki","pl","plano","platinum","pluto","pm","pm1","pn","po","policy","polls","pop","pop3","portal","portals","portfolio","portland","post","posta","posta01","posta02","posta03","postales","postoffice","ppp1","ppp10","ppp11","ppp12","ppp13","ppp14","ppp15","ppp16","ppp17","ppp18","ppp19","ppp2","ppp20","ppp21","ppp3","ppp4","ppp5","ppp6","ppp7","ppp8","ppp9","pptp","pr","prensa","press","print >> sys.stdout,er","print >> sys.stdout,serv","print >> sys.stdout,server","priv","privacy","private","problemtracker","products","profiles","project","projects","promo","proxy","prueba","pruebas","ps","psi","pss","pt","pub","public","pubs","purple","pw","py","q","qa","qmail","qotd","quake","quebec","queen","quotes","r","r01","r02","r1","r2","ra","radio","radius","rapidsite","raptor","ras","rc","rcs","rd","re","read","realserver","recruiting","red","redhat","ref","reference","reg","register","registro","registry","regs","relay","rem","remote","remstats","reports","research","reseller","reserved","resumenes","rho","rhodeisland","ri","ris","rmi","ro","robert","romeo","root","rose","route","router","router1","rs","rss","rtelnet","rtr","rtr01","rtr1","ru","rune","rw","rwhois","s","s1","s2","sa","sac","sacramento","sadmin","safe","sales","saltlake","sam","san","sanantonio","sandiego","sanfrancisco","sanjose","saskatchewan","saturn","sb","sbs","sc","scanner","schedules","scotland","scotty","sd","se","search","seattle","sec","secret","secure","secured","securid","security","sendmail","seri","serv","serv2","server","server1","servers","service","services","servicio","servidor","setup","sg","sh","shared","sharepoint","shareware","shipping","shop","shoppers","shopping","si","siebel","sierra","sigma","signin","signup","silver","sim","sirius","site","sj","sk","skywalker","sl","slackware","slmail","sm","smc","sms","smtp","smtphost","sn","sniffer","snmp","snmpd","snoopy","snort","so","socal","software","sol","solaris","solutions","soporte","source","sourcecode","sourcesafe","south","southcarolina","southdakota","southeast","southwest","spain","spam","spider","spiderman","splunk","spock","spokane","springfield","sprint >> sys.stdout,","sqa","sql","sql0","sql01","sql1","sql7","sqlserver","squid","sr","ss","ssh","ssl","ssl0","ssl01","ssl1","st","staff","stage","staging","start","stat","static","statistics","stats","stlouis","stock","storage","store","storefront","streaming","stronghold","strongmail","studio","submit","subversion","sun","sun0","sun01","sun02","sun1","sun2","superman","supplier","suppliers","support","sv","sw","sw0","sw01","sw1","sweden","switch","switzerland","sy","sybase","sydney","sysadmin","sysback","syslog","syslogs","system","sz","t","tacoma","taiwan","talk","tampa","tango","tau","tc","tcl","td","team","tech","technology","techsupport","telephone","telephony","telnet","temp","tennessee","terminal","terminalserver","termserv","test","test2k","testbed","testing","testlab","testlinux","testo","testserver","testsite","testsql","testxp","texas","tf","tftp","tg","th","thailand","theta","thor","tienda","tiger","time","titan","tivoli","tj","tk","tm","tn","to","tokyo","toledo","tom","tool","tools","toplayer","toronto","tour","tp","tr","tracker","train","training","transfers","trinidad","trinity","ts","ts1","tt","tucson","tulsa","tumb","tumblr","tunnel","tv","tw","tx","tz","u","ua","uddi","ug","uk","um","uniform","union","unitedkingdom","unitedstates","unix","unixware","update","updates","upload","ups","upsilon","uranus","urchin","us","usa","usenet","user","users","ut","utah","utilities","uy","uz","v","va","vader","vantive","vault","vc","ve","vega","vegas","vend","vendors","venus","vermont","vg","vi","victor","video","videos","viking","violet","vip","virginia","vista","vm","vmserver","vmware","vn","vnc","voice","voicemail","voip","voyager","vpn","vpn0","vpn01","vpn02","vpn1","vpn2","vt","vu","w","w1","w2","w3","wa","wais","wallet","wam","wan","wap","warehouse","washington","wc3","web","webaccess","webadmin","webalizer","webboard","webcache","webcam","webcast","webdev","webdocs","webfarm","webhelp","weblib","weblogic","webmail","webmaster","webproxy","webring","webs","webserv","webserver","webservices","website","websites","websphere","websrv","websrvr","webstats","webstore","websvr","webtrends","welcome","west","westvirginia","wf","whiskey","white","whois","wi","wichita","wiki","wililiam","win","win01","win02","win1","win2","win2000","win2003","win2k","win2k3","windows","windows01","windows02","windows1","windows2","windows2000","windows2003","windowsxp","wingate","winnt","winproxy","wins","winserve","winxp","wire","wireless","wisconsin","wlan","wordpress","work","world","write","ws","ws1","ws10","ws11","ws12","ws13","ws2","ws3","ws4","ws5","ws6","ws7","ws8","ws9","wusage","wv","ww","www","www-","www-01","www-02","www-1","www-2","www-int","www0","www01","www02","www1","www2","www3","www_","wwwchat","wwwdev","wwwmail","wy","wyoming","x","x-ray","xi","xlogan","xmail","xml","xp","y","yankee","ye","yellow","young","yt","yu","z","z-log","za","zebra","zera","zeus","zlog","zm","zulu","zw"]
		progress = 0
		for i in wordlist:
			progress += 1
			sys.stdout.write("\r[+] Progress %i / %s ..."% (progress,len(wordlist)))
			sys.stdout.flush()
			try:
				s = socket.gethostbyname(i+'.'+splithost)
				if (s):
					so = socket.gethostbyname_ex(i+'.'+splithost)
					print("\n[+] Subdomain found!\n | Subdomain: %s.%s \n | Nameserver: %s\n | IP: %s")%(i,splithost,so[0],s)
					if s == '127.0.0.1':
						print("[!] "+red+"Sub-domain is vulnerable to "+boldred+"Same-Site Scripting! "+reset+"\n[!] About Same-Site Scripting:\n[!] [ "+green+"https://www.acunetix.com/vulnerabilities/web/same-site-scripting"+reset+" ] ")
					print('')
			except(socket.gaierror):
				pass
	def enumform(listofIDs,listofnames):
		lengthofnames =  len(max(listofnames, key=len))
		lengthofIDs = len(max(listofIDs, key=len))
		if lengthofnames < 12:
			lengthofnames = 12
		print "[i] "+green+"Found the following Username/s:"+reset
		print "\t+-"+'-'.center(6, '-')+'-+-'+'-'.center(lengthofnames, '-')+"-+"
		print "\t| "+'ID/s'.center(6, ' ')+' | '+'Username/s'.center(lengthofnames, ' ')+" |"
		print "\t+-"+'-'.center(6, '-')+'-+-'+'-'.center(lengthofnames, '-')+"-+"
		for i,d in zip(listofnames,listofIDs):
			print '\t| '+d.center(6, ' ')+" | "+i.center(lengthofnames, ' ')+' |'
		print "\t+-"+'-'.center(6, '-')+'-+-'+'-'.center(lengthofnames, '-')+"-+"
		print("")
	def wpbackupscanner():
		backups = ['wp-config.php~','wp-config.php.txt','wp-config.php.save','.wp-config.php.swp','wp-config.php.swp','wp-config.php.swo','wp-config.php_bak','wp-config.bak','wp-config.php.bak','wp-config.save','wp-config.old','wp-config.php.old','wp-config.php.orig','wp-config.orig','wp-config.php.original','wp-config.original','wp-config.txt']
		print("[+] Scan Started")
		print("[+] Searching Wordpress Backups...")
		print("[?] Note: Press CTRL+C to skip\n  ")
		progress = 0
		backup = []
		backupurl = []
		try:
			for i in backups:
				progress += 1
				sys.stdout.write("\r[+] Progress %i / %s ..."% (progress,len(backups)))
				sys.stdout.flush()
				sock(i)
				if page.getcode() == 200:
					detecting = sock(i,"1")
					if "define('DB_PASSWORD'" in detecting:
						s1 = i
						s2 = data
						backup.append(s1)
						backupurl.append(s2)
		except(KeyboardInterrupt):
			print("\n[+] File detection skipped")
		print('')
		for ifile,iurl in zip(backup,backupurl):
			print("[!] "+boldred+"Backup Found!\n"+reset+" | "+red+"Filename: "+boldred+"%s"+reset+"\n | "+red+"URL: "+boldred+"%s\n"+reset)%(ifile,iurl)
	def wpenumeration():
		import time
		global d4rk,dr1,host
		page = urllib.urlopen(host)
		url = page.geturl()
		if page.geturl() != host:
			print("[i] The remote host redirects to '"+str(url)+"' \n    Following the redirection...")
			host = page.geturl()
		print("\n[+] Scan Started : "+lightgreen+"%s"+reset) % time.strftime("%c")
		print "[+] Enumeration Usernames..."
		T = 33
		found = 0
		listofusernames = []
		listofids = []
		for i in range(30):
			authorlink = host+"?author="+str(i+1)
			url = urllib.urlopen(authorlink)
			source = url.read()
			if url.geturl() == authorlink:
				break
			else:
				com = str(host)+"/author/"
				res = url.geturl()
				res = res.split("/")
				while len(res) >=3:
					res.pop(0)
				listofusernames.append(res[0])
				listofids.append(str(i+1))
				found = 1
		d4rk = dr1+str(1)+str(T)+str(7)
		if found == 0:
			print("[+] "+red+"No Usernames detected"+reset)
		else:
			enumform(listofids,listofusernames)
		print("[+] Enumeration Completed.")
		print("[+] Scan Ended : "+lightgreen+"%s"+reset) % time.strftime("%c")
	def wpscanner():
		print("  \n[+] Detecting Wordpress")
		wp = 0
		i = 'wp-admin/'
		sock(i)
		if "wp-login.php?redirect_to" in page.geturl():
			wp = 1
			print(green+"[i] "+green+"Wordpress Detected!"+reset)
			if wpenumeration == "on":
				wpenumeration()
			else:
				
				wpenumeration()
		if wp == 0:
			i = 'wp-content/index.php'
			sock(i)
			if page.getcode() == 200 and "" in page.read():
				print("[!] "+green+"Wordpress Detected!"+reset)
				wp = 1
				if wpenumeration == "on":
					wpenumeration()
				else:
					wpbackupscanner()
					wpenumeration()
		if wp == 0:
			print("[!] "+red+"No Wordpress Detected"+reset)
	def redirectcheck():
		global redirect,host
		redirect = 0
		print("[+] Checking Redirection")
		page = urllib.urlopen(host)
		url = page.geturl()
		if page.geturl() != host:
			option = raw_input("[i] "+boldgrey+"Host redirects to "+str(url)+reset+" \n    Set this as default Host? [Y/N]:\n    > ")
			if option.lower() == "y":
				host = page.geturl()
				redirect = 1
		else:
			print("[+] URL isn't redirecting")
	def again():
		global wpenumerator,filedetector,subdomainscan,portscan,wpscan,xssscanner,wpbackupscan,sqliscanner
		# -- Switches Reset -- 
		wpenumerator	=		"off"
		filedetector	=  		"off"
		subdomainscan	=		"off"
		portscan 		= 		"off"
		wpscan 			= 		"off"
		xssscanner		= 		"off"
		wpbackupscan	=		"off"
		sqliscanner		=		"off"
		# -- Swiches Reset --
		inp = raw_input("\n[+] [E]xit or launch [A]gain? (e/a)").lower()
		if inp == 'a':
			dtect()
		elif inp == 'e':
			exit()
		else:
			print("[!] Incorrect option selected")
			again()

# -- Program Structure Start -- 
	menu()
	try:
		global host
		host = raw_input("[+] Enter Domain \n    e.g, site.com\n    > ")
		if 'https://' in host:
			pass
		elif 'http://' in host:
			pass
		else:
			host = "http://"+host
		print("[+] Checking Status...")
		alive()
		responseheadercheck()
		if xssscanner == "on":
			XSSscan(host)
		if sqliscanner == "on":
			SQLIscan(host)
		if wpbackupscan == "on":
			wpbackupscanner()
		if filedetector == "on":
			files = ['robots.txt','crossdomain.xml','.htaccess','clientaccesspolicy.xml','infophp.php','log.txt','logs.txt','CHANGELOG.txt','awstats/data/']
			print("[+] Scan Started")
			print("[+] Searching sensitive files...")
			print("[?] Note: Press CTRL+C to skip\n  ")
			try:
				for i in files:
					if i == "awstats/data/":
						sock(i)
						if "<title>Index of /awstats/data</title>" in sourcecode:
							print("[!] awstats detected!\n[!] URL: %s")%(data)
					else:
						sock(i)
						if page.getcode() == 200:
							print("[!] File Found!\n | Name: %s\n | URL: %s\n")%(i,data)
			except(KeyboardInterrupt):
				print("\n[+] File detection skipped")
		if wpenumerator == "on":
			print("  \n[+] Detecting Wordpress")
			wp = 0
			i = 'wp-admin/'
			sock(i)
			if "wp-login.php?redirect_to" in page.geturl():
				wp = 1
				print(green+"[i] "+green+"Wordpress Detected!"+reset)
				wpenumeration()
			if wp == 0:
				i = 'wp-content/index.php'
				sock(i)
				if page.getcode() == 200 and "" in page.read():
					print("[!] "+green+"Wordpress Detected!"+reset)
					wp = 1
					wpenumeration()
			if wp == 0:
				print("[!] "+red+"No Wordpress Detected"+reset)
		if wpscan == "on":
			wpscanner()
		if subdomainscan == "on":
			subdomainscanner()
		if portscan == "on":
			portscanner()
		again()
	except(KeyboardInterrupt) as Exit:
		print("\n[+] Exiting...")
		sys.exit()

Example 7

Project: bonding
Source File: bonding.py
View license
def peers(quiet=True):
    if os.geteuid() != 0:
        print ('%sroot privileges are needed to properly check for bonding '
               'peers. Skipping...%s' % (RED, RESET))
        return {}

    syslog.openlog('bonding')
    syslog.syslog('Scanning for bonding interface peers')

    ifaces = get_iface_list()

    # Enable all normal interfaces
    if not quiet:
        sys.stdout.write('Enabling interfaces')
        sys.stdout.flush()
    for iface in ifaces:
        if is_iface_loopback(iface) or is_iface_master(iface):
            continue
        if not quiet:
            sys.stdout.write('.')
            sys.stdout.flush()
        syslog.syslog('Enabling interface %s' % iface)
        try:
            set_iface_flag(iface, IFF_UP)
        except IOError, e:
            raise SystemExit('%s %s. This generally indicates a misconfigured '
                             'interface' % (e, iface))

    if not quiet:
        print '\nSleeping 5 seconds for switch port negotiation...'
    time.sleep(5)

    if not quiet:
        sys.stdout.write('Scanning')
        sys.stdout.flush()
    secondaries = []
    groups = {}
    for send_iface in ifaces:
        if not quiet:
            sys.stdout.write('.')
            sys.stdout.flush()
        if (is_iface_loopback(send_iface) or is_iface_master(send_iface) or
                send_iface in secondaries):
            continue

        # The data required for building the frame
        # Static data for frame payload that includes the sending interface
        static = 'IF%sIF' % send_iface
        # Build the rest of the payload using random data
        payload = '%s%s' % (static, os.urandom(46 - len(static)))
        # Broadcast FF:FF:FF:FF:FF:FF
        dst_mac = '\xff\xff\xff\xff\xff\xff'
        if USEREALSRCMAC:
            # The real MAC address of the sending interface
            src_mac = get_mac_addr_raw(send_iface)
        else:
            # Invalid source MAC
            src_mac = '\x00\x00\x00\x00\x00\x00'
        # Unregistered EtherType, in this case for Interface Peer Discovery
        frame_type = '\x50\x44'

        # Set up the sending interface socket
        s1 = socket.socket(socket.AF_PACKET, socket.SOCK_RAW,
                           socket.htons(ETH_P_ALL))
        s1.setsockopt(socket.SOL_SOCKET, SO_BINDTODEVICE, send_iface + '\0')
        s1.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
        s1.bind((send_iface, 0))
        s1.setblocking(0)

        for recv_iface in ifaces:
            if not quiet:
                sys.stdout.write('.')
                sys.stdout.flush()
            if (is_iface_loopback(recv_iface) or is_iface_master(recv_iface) or
                    recv_iface == send_iface):
                continue

            # Set up the receiving interface socket
            s2 = socket.socket(socket.AF_PACKET, socket.SOCK_RAW,
                               socket.htons(ETH_P_ALL))
            s2.setsockopt(socket.SOL_SOCKET, SO_BINDTODEVICE,
                          recv_iface + '\0')
            s2.bind((recv_iface, 0))
            s2.settimeout(TIMEOUT)

            # Place current receiving interface into promiscuous mode
            current_flags = 0
            ifreq = fcntl.ioctl(s2.fileno(), SIOCGIFFLAGS,
                                struct.pack('256s', recv_iface[:15]))
            (current_flags,) = struct.unpack('16xH', ifreq[:18])
            current_flags |= IFF_PROMISC
            ifreq = struct.pack('16sH', recv_iface, current_flags)
            fcntl.ioctl(s2.fileno(), SIOCSIFFLAGS, ifreq)

            # Try sending and receiving 3 times to give us better chances of
            # catching the send
            # Generally we always catch on the first time
            for i in xrange(0, 3):
                try:
                    s1.sendall('%s%s%s%s' % (dst_mac, src_mac, frame_type,
                                             payload))
                except (socket.timeout, socket.error):
                    continue
                try:
                    data = s2.recv(60)
                except (socket.timeout, socket.error):
                    continue
                recv_frame_type = data[12:14]
                recv_payload = data[14:]
                if payload == recv_payload and recv_frame_type == frame_type:
                    if send_iface not in groups:
                        groups[send_iface] = []
                    groups[send_iface].append(recv_iface)
                    secondaries.append(recv_iface)
                    break

            # Take the receiving interface out of promiscuous mode
            current_flags ^= IFF_PROMISC
            ifreq = struct.pack('16sH', recv_iface, current_flags)
            fcntl.ioctl(s1.fileno(), SIOCSIFFLAGS, ifreq)

            s2.close()

        s1.close()

    for iface in sorted(groups.keys()):
        syslog.syslog('Interface group: %s %s' %
                      (iface, ' '.join(groups[iface])))

    syslog.syslog('Scan for bonding interface peers completed')

    if not quiet:
        print 'Done'
    return groups

Example 8

Project: iktomi
Source File: base.py
View license
def manage(commands, argv=None, delim=':'):
    '''
    Parses argv and runs neccessary command. Is to be used in manage.py file.

    Accept a dict with digest name as keys and instances of
    :class:`Cli<iktomi.management.commands.Cli>`
    objects as values.

    The format of command is the following::

        ./manage.py digest_name:command_name[ arg1[ arg2[...]]][ --key1=kwarg1[...]]

    where command_name is a part of digest instance method name, args and kwargs
    are passed to the method. For details, see
    :class:`Cli<iktomi.management.commands.Cli>` docs.
    '''

    commands = {(k.decode('utf-8') if isinstance(k, six.binary_type) else k): v
                for k, v in commands.items()}

    # Default django autocompletion script is registered to manage.py
    # We use the same name for this script and it seems to be ok
    # to implement the same interface
    def perform_auto_complete(commands):
        from .lazy import LazyCli
        cwords = os.environ['COMP_WORDS'].split()[1:]
        cword = int(os.environ['COMP_CWORD'])

        try:
            curr = cwords[cword - 1]
        except IndexError:
            curr = ''

        suggest = []
        if len(cwords) > 1 and cwords[0] in commands.keys():
            value = commands[cwords[0]]
            if isinstance(value, LazyCli):
                value = value.get_digest()

            for cmd_name, _ in value.get_funcs():
                cmd_name = cmd_name[8:]
                suggest.append(cmd_name)
            if curr == ":":
                curr = ''
        else:
            suggest += list(commands.keys()) + [x+":" for x in commands.keys()]
        suggest.sort()
        output = u" ".join(filter(lambda x: x.startswith(curr), suggest))
        sys.stdout.write(output)

    auto_complete = 'IKTOMI_AUTO_COMPLETE' in os.environ or \
                    'DJANGO_AUTO_COMPLETE' in os.environ
    if auto_complete:
        perform_auto_complete(commands)
        sys.exit(0)

    argv = sys.argv if argv is None else argv
    if len(argv) > 1:
        cmd_name = argv[1]
        raw_args = argv[2:]
        args, kwargs = [], {}
        # parsing params
        for item in raw_args:
            if item.startswith('--'):
                splited = item[2:].split('=', 1)
                if len(splited) == 2:
                    k,v = splited
                elif len(splited) == 1:
                    k,v = splited[0], True
                kwargs[k] = v
            else:
                args.append(item)

        # trying to get command instance
        if delim in cmd_name:
            digest_name, command = cmd_name.split(delim)
        else:
            digest_name = cmd_name
            command = None
        try:
            digest = commands[digest_name]
        except KeyError:
            _command_list(commands)
            sys.exit('ERROR: Command "{}" not found'.format(digest_name))
        try:
            if command is None:
                if isinstance(digest, Cli):
                    help_ = digest.description(argv[0], digest_name)
                    sys.stdout.write(help_)
                    sys.exit('ERROR: "{}" command digest requires command name'\
                                .format(digest_name))
                digest(*args, **kwargs)
            else:
                digest(command, *args, **kwargs)
        except CommandNotFound:
            help_ = digest.description(argv[0], digest_name)
            sys.stdout.write(help_)
            sys.exit('ERROR: Command "{}:{}" not found'.format(digest_name, command))
    else:
        _command_list(commands)
        sys.exit('Please provide any command')

Example 9

Project: kamaelia_
Source File: spam_grab.py
View license
    def main(self):

        yield self.getMailStats()
        

        print "Number of emails waiting for us:", self.stat_mails
        print "Size of inbox", self.stat_size
        
        self.spamcount = self.getSpamStoreMeta()
    
        lower = self.stat_mails

        print self.whitelist        
        while lower > 1:
            deletions = []
            greyzone = []
            higher = lower
#            lower = max(1, lower-50)
#            lower = max(1, lower-200)
            lower = max(1, lower-600)

            if 1:
                higher = lower
                lower = max(1, lower-600)

                higher = lower
                lower = max(1, lower-600)

                higher = lower
                lower = max(1, lower-600)

                higher = lower
                lower = max(1, lower-600)

                higher = lower
                lower = max(1, lower-600)

                higher = lower
                lower = max(1, lower-600)

                higher = lower
                lower = max(1, lower-600)

                higher = lower+200
                lower = max(1, lower-500)

#            if 0:
                higher = lower
                lower = max(1, lower-600)

            print "lower, higher",lower, higher
            l = 0
            for mailid in range(lower, higher+1):
                l +=1
                if (l % 100) == 0: print
    #            print "Retrieving HEADERS of mail", mailid
                yield self.getMessageHeaders(mailid)

    #            print "-------- HEADERS RECEIVED --------"
                delete = False
                whitelisted = False

                for sender in self.headers.get("from",[]):
                    if whitelisted:
                        continue
                    if self.blacklisted_sender(sender):
                        delete = True
                    if self.whitelisted_sender(sender):
                        delete = False
                        whitelisted = True
                        continue
                    if "mail delivery subsystem" in sender:
                        delete = True
                    if "system administrator" in sender:
                        if "undeliverable" in self.headers["subject"][0]:
                            delete = True

                    for phrase in self.phrases: # hideously inefficient, but works
                        if phrase in self.headers["subject"][0]:
                            delete = True
                    if not delete: # handled differently now
                        pass 
#                        print self.headers["subject"][0]

                if delete:
                    deletions.append( (mailid, self.headers["from"], self.headers) )
                    sys.stdout.write("D")
                else:
                    sys.stdout.write(".")
                sys.stdout.flush()

                if not delete and not whitelisted:
#                    print "THIS /MAY/ BE SPAM", self.headers["subject"][0]
                    greyzone.append( (mailid, self.headers.get("from",[]), self.headers) )
            print

            if len(deletions) != 0:

                print 
                print "============ CANDIDATES FOR DELETION ============"
                pprint.pprint( [ (ID, FROM, HEADERS.get("subject",[]) ) for (ID, FROM, HEADERS) in deletions ])
                print "TOTAL Suggested", len(deletions)
                
                print "To delete these, don't type 'quit'"

                X = raw_input()
                if X.lower() == "quit":
                   break
                if X.lower() != "skip":
                    for deletion in deletions:
                        ID, FROM, HEADERS = deletion
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        yield self.grabStoreSpam(ID)
    #                    print "SPAM GRABBED"
    #                    print "INCREASING SPAMCOUNT"
                        f = str(self.spamcount)
                        self.spamcount +=1
                        self.storeSpamStoreMeta(self.spamcount)
                        print "DELETING SPAM FROM SERVER, you still have mail",ID,"here", "SPAMSTORE/"+f
                        yield self.deleteMessage(ID)

                        #print self.result
                    print "RECOMMENDED DELETIONS COMPLETE"
                    print "To delete more, don't type 'quit'"
                    X = raw_input()
                    if X.lower() == "quit":
                       break
                else:
                    print "skipping, moving on"
                    deletions = []
        
            if len(greyzone) != 0:
                print "============ EMAILS WHICH ARE GREY ============"
#                pprint.pprint( [ (ID, FROM, HEADERS["subject"]) for (ID, FROM, HEADERS) in greyzone ])
                for (ID, FROM, HEADERS) in greyzone:
                    senders = []
                    for sender in FROM:
                        if ("<" in sender) and (">" in sender):
                            sender = sender[sender.find("<")+1:sender.rfind(">")]
                        senders.append(sender)
                    print " ".join(senders), ":", "".join( HEADERS["subject"]), ":", "".join(FROM)

                
                print "JUST SENDERS ---------------------------------------"
                allsenders = []
                for senders in [ FROM for (ID, FROM, HEADERS) in greyzone ]:
                    for sender in senders:
                        if ("<" in sender) and (">" in sender):
                             sender = sender[sender.find("<")+1:sender.rfind(">")]
                        if sender not in allsenders:
                            allsenders.append(sender)
                for sender in allsenders:
                    print sender

                print "JUST SUBJECTS --------------------------------------"
                for subjects in [ HEADERS["subject"] for (ID, FROM, HEADERS) in greyzone ]:
                    for subject in subjects:
                        print subject
                
                print "=====End of report on currently grey mails====="
                print "To keep doing, don't type quit"
                X = raw_input()
                if X.lower() == "quit":
                    break
        print "Done, call again"
        self.send(["QUIT"], "outbox")

Example 10

Project: nrvr-commander
Source File: ssh.py
View license
    def __init__(self, sshParameters, argv,
                 exceptionIfNotZero=True,
                 connectTimeoutSeconds=None,
                 maxConnectionRetries=10,
                 tickerForRetry=True,
                 checkForPermissionDenied=False):
        """Create new SshCommand instance.
        
        Will wait until completed.
        
        Captures returncode, and output.
        
        Output may contain extraneous leading or trailing newlines and whitespace.
        
        Example use::
        
            example = SshCommand(exampleSshParameters, ["ls", "-al"])
            print "returncode=" + str(example.returncode)
            print "output=" + example.output
        
        sshParameters
            an SshParameters instance.
        
        argv
            list of command and arguments passed to ssh.
            
            If given a string instead of a list then fixed by argv=argv.split() making a list.
            That may only work as expected for some commands on some platforms.
            It should work for a command without arguments.
            
            Hence if you don't want a string split, pass it in wrapped as sole item of a list."""
        if not _gotPty:
            # cannot use ssh if no pty
            raise Exception("must have module pty available to use ssh command"
                            ", which is known to be available in Python 2.6 on Linux, but not on Windows")
        #
        if isinstance(argv, basestring):
            argv = argv.split()
        maxConnectionRetries = int(maxConnectionRetries)
        #
        self._ipaddress = sshParameters.ipaddress
        self._argv = argv
        self._user = sshParameters.user
        self._pwd = sshParameters.pwd
        self._exceptionIfNotZero = exceptionIfNotZero
        self._connectTimeoutSeconds = connectTimeoutSeconds
        self._connectionRetriesRemaining = maxConnectionRetries if maxConnectionRetries else -1
        self._output = ""
        self._returncode = None
        #
        ticked = False
        while self._connectionRetriesRemaining:
            self._connectionRetriesRemaining -= 1
            # fork and connect child to a pseudo-terminal
            self._pid, self._fd = pty.fork()
            if self._pid == 0:
                # in child process
                sshOptions = ["-l", self._user]
                if connectTimeoutSeconds:
                    sshOptions.extend(["-o", "ConnectTimeout=" + str(connectTimeoutSeconds)])
                sshOptions.append(self._ipaddress)
                os.execvp("ssh", ["ssh"] + sshOptions + self._argv)
            else:
                # in parent process
                if self._pwd:
                    # if given a password then apply
                    promptedForPassword = False
                    outputTillPrompt = ""
                    # look for password prompt
                    while not promptedForPassword:
                        try:
                            newOutput = os.read(self._fd, 1024)
                            if not len(newOutput):
                                # end has been reached
                                if not self._connectionRetriesRemaining:
                                    # was raise Exception("unexpected end of output from ssh")
                                    raise Exception("failing to connect via ssh\n" + 
                                                    outputTillPrompt)
                                if tickerForRetry:
                                    if not ticked:
                                        # first time only printing
                                        sys.stdout.write("retrying to connect via ssh [")
                                    sys.stdout.write(".")
                                    sys.stdout.flush()
                                    ticked = True
                                break # break out of while not promptedForPassword:
                            # ssh has been observed returning "\r\n" for newline, but we want "\n"
                            newOutput = SshCommand._crLfRegex.sub("\n", newOutput)
                            outputTillPrompt += newOutput
                            if SshCommand._acceptPromptRegex.search(outputTillPrompt):
                                # e.g. "Are you sure you want to continue connecting (yes/no)? "
                                raise Exception("cannot proceed unless having accepted host key\n" +
                                                outputTillPrompt +
                                                '\nE.g. invoke SshCommand.acceptKnownHostKey(SshParameters("{0}",user,pwd)).'.format(self._ipaddress))
                            if SshCommand._pwdPromptRegex.search(outputTillPrompt):
                                # e.g. "10.123.45.67's password: "
                                promptedForPassword = True
                        except EnvironmentError:
                            # e.g. "@    WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!     @" and closing
                            raise Exception("failing to connect via ssh\n" + 
                                            outputTillPrompt)
                    if not promptedForPassword: # i.e. if got here from breaking out of while not promptedForPassword:
                        continue # continue at while self._connectionRetriesRemaining:
                    else: # promptedForPassword is normal
                        # if connecting then no more retries,
                        # maxConnectionRetries is meant for retrying connecting only
                        self._connectionRetriesRemaining = 0
                    os.write(self._fd, self._pwd + "\n")
                # look for output
                endOfOutput = False
                outputSincePrompt = ""
                try:
                    while not endOfOutput:
                        try:
                            newOutput = os.read(self._fd, 1024)
                            if len(newOutput):
                                outputSincePrompt += newOutput
                            else:
                                # end has been reached
                                endOfOutput = True
                            if checkForPermissionDenied:
                                # seen stderr "Permission denied, please try again."
                                # and a repeat of stdout "10.123.45.67's password: "
                                if len(outputSincePrompt) <= 128: # limit to early in output
                                    if SshCommand._permissionDeniedRegex.search(outputSincePrompt) and SshCommand._pwdPromptRegex.search(outputSincePrompt):
                                        os.kill(self._pid, signal.SIGKILL)
                        except EnvironmentError as e:
                            # some ideas maybe at http://bugs.python.org/issue5380
                            if e.errno == 5: # errno.EIO:
                                # seen when pty closes OSError: [Errno 5] Input/output error
                                endOfOutput = True
                            else:
                                # we accept what we got so far, for now
                                endOfOutput = True
                finally:
                    # remove any leading space (maybe there after "password:" prompt) and
                    # remove first newline (is there after entering password and "\n")
                    self._output = re.sub(SshCommand._removeLeadingSpaceAndFirstNewlineRegex, r"\1", outputSincePrompt)
                    #
                    # get returncode
                    signalled = False
                    try:
                        ignorePidAgain, waitEncodedStatusIndication = os.waitpid(self._pid, 0)
                        if os.WIFEXITED(waitEncodedStatusIndication):
                            # normal exit(status) call
                            self._returncode = os.WEXITSTATUS(waitEncodedStatusIndication)
                        else:
                            # e.g. os.WIFSIGNALED or os.WIFSTOPPED
                            # less common case
                            signalled = True
                            self._returncode = -1
                        # raise an exception if asked to and there is a reason
                        exceptionMessage = ""
                        if signalled:
                            # less common case
                            exceptionMessage += "ssh did not exit normally"
                        elif self._exceptionIfNotZero and self._returncode:
                            exceptionMessage += "returncode: " + str(self._returncode)
                        if exceptionMessage:
                            commandDescription = "ipaddress: " + self._ipaddress
                            commandDescription += "\ncommand:\n\t" + self._argv[0]
                            if len(self._argv) > 1:
                                commandDescription += "\narguments:\n\t" + "\n\t".join(self._argv[1:])
                            else:
                                commandDescription += "\nno arguments"
                            commandDescription += "\nuser: " + self._user
                            exceptionMessage = commandDescription + "\n" + exceptionMessage
                            exceptionMessage += "\noutput:\n" + self._output
                            raise SshCommandException(exceptionMessage)
                    except OSError:
                        # supposedly can occur
                        self._returncode = -1
                        raise SshCommandException("ssh did not exit normally")
        if ticked:
            # final printing
            sys.stdout.write("]\n")
            sys.stdout.flush()

Example 11

Project: statsmodels
Source File: ipython_directive.py
View license
    def process_input(self, data, input_prompt, lineno):
        """
        Process data block for INPUT token.

        """
        decorator, input, rest = data
        image_file = None
        image_directive = None

        is_verbatim = decorator=='@verbatim' or self.is_verbatim
        is_doctest = (decorator is not None and \
                     decorator.startswith('@doctest')) or self.is_doctest
        is_suppress = decorator=='@suppress' or self.is_suppress
        is_okexcept = decorator=='@okexcept' or self.is_okexcept
        is_okwarning = decorator=='@okwarning' or self.is_okwarning
        is_savefig = decorator is not None and \
                     decorator.startswith('@savefig')

        input_lines = input.split('\n')
        if len(input_lines) > 1:
            if input_lines[-1] != "":
                input_lines.append('') # make sure there's a blank line
                                       # so splitter buffer gets reset

        continuation = '   %s:'%''.join(['.']*(len(str(lineno))+2))

        if is_savefig:
            image_file, image_directive = self.process_image(decorator)

        ret = []
        is_semicolon = False

        # Hold the execution count, if requested to do so.
        if is_suppress and self.hold_count:
            store_history = False
        else:
            store_history = True

        # Note: catch_warnings is not thread safe
        with warnings.catch_warnings(record=True) as ws:
            for i, line in enumerate(input_lines):
                if line.endswith(';'):
                    is_semicolon = True

                if i == 0:
                    # process the first input line
                    if is_verbatim:
                        self.process_input_line('')
                        self.IP.execution_count += 1 # increment it anyway
                    else:
                        # only submit the line in non-verbatim mode
                        self.process_input_line(line, store_history=store_history)
                    formatted_line = '%s %s'%(input_prompt, line)
                else:
                    # process a continuation line
                    if not is_verbatim:
                        self.process_input_line(line, store_history=store_history)

                    formatted_line = '%s %s'%(continuation, line)

                if not is_suppress:
                    ret.append(formatted_line)

        if not is_suppress and len(rest.strip()) and is_verbatim:
            # The "rest" is the standard output of the input. This needs to be
            # added when in verbatim mode. If there is no "rest", then we don't
            # add it, as the new line will be added by the processed output.
            ret.append(rest)

        # Fetch the processed output. (This is not the submitted output.)
        self.cout.seek(0)
        processed_output = self.cout.read()
        if not is_suppress and not is_semicolon:
            #
            # In IPythonDirective.run, the elements of `ret` are eventually
            # combined such that '' entries correspond to newlines. So if
            # `processed_output` is equal to '', then the adding it to `ret`
            # ensures that there is a blank line between consecutive inputs
            # that have no outputs, as in:
            #
            #    In [1]: x = 4
            #
            #    In [2]: x = 5
            #
            # When there is processed output, it has a '\n' at the tail end. So
            # adding the output to `ret` will provide the necessary spacing
            # between consecutive input/output blocks, as in:
            #
            #   In [1]: x
            #   Out[1]: 5
            #
            #   In [2]: x
            #   Out[2]: 5
            #
            # When there is stdout from the input, it also has a '\n' at the
            # tail end, and so this ensures proper spacing as well. E.g.:
            #
            #   In [1]: print x
            #   5
            #
            #   In [2]: x = 5
            #
            # When in verbatim mode, `processed_output` is empty (because
            # nothing was passed to IP. Sometimes the submitted code block has
            # an Out[] portion and sometimes it does not. When it does not, we
            # need to ensure proper spacing, so we have to add '' to `ret`.
            # However, if there is an Out[] in the submitted code, then we do
            # not want to add a newline as `process_output` has stuff to add.
            # The difficulty is that `process_input` doesn't know if
            # `process_output` will be called---so it doesn't know if there is
            # Out[] in the code block. The requires that we include a hack in
            # `process_block`. See the comments there.
            #
            ret.append(processed_output)
        elif is_semicolon:
            # Make sure there is a newline after the semicolon.
            ret.append('')

        # context information
        filename = "Unknown"
        lineno = 0
        if self.directive.state:
            filename = self.directive.state.document.current_source
            lineno = self.directive.state.document.current_line

        # output any exceptions raised during execution to stdout
        # unless :okexcept: has been specified.
        if not is_okexcept and "Traceback" in processed_output:
            s =  "\nException in %s at block ending on line %s\n" % (filename, lineno)
            s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
            sys.stdout.write('\n\n>>>' + ('-' * 73))
            sys.stdout.write(s)
            sys.stdout.write(processed_output)
            sys.stdout.write('<<<' + ('-' * 73) + '\n\n')

        # output any warning raised during execution to stdout
        # unless :okwarning: has been specified.
        if not is_okwarning:
            for w in ws:
                s =  "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
                s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
                sys.stdout.write('\n\n>>>' + ('-' * 73))
                sys.stdout.write(s)
                sys.stdout.write(('-' * 76) + '\n')
                s=warnings.formatwarning(w.message, w.category,
                                         w.filename, w.lineno, w.line)
                sys.stdout.write(s)
                sys.stdout.write('<<<' + ('-' * 73) + '\n')

        self.cout.truncate(0)

        return (ret, input_lines, processed_output,
                is_doctest, decorator, image_file, image_directive)

Example 12

Project: pySecurityCenter
Source File: population.py
View license
def gen(sc, asset, expire):
    '''
    Database population function.

    What we are doing here is trying to interpret the output of plugin ID 20811
    and use that information to help populate the database with individualized
    entries of the software that is installed on the host.  This information will
    later be used to build the report.
    '''

    # The following regex patters are used to pull out the needed fields from
    # Plugin ID 20811
    redate = re.compile(r'\[installed on (\d{4})/(\d{1,2})/(\d{1,2})\]')
    reinvdate = re.compile(r'\[installed on (\d{1,2})/(\d{1,2})/(\d{4})\]')
    rever = re.compile(r'\[version (.*?)\]')
    resw = re.compile(r'^([\w\s\.\(\-\)\+]*)')
    s = Session()
    ts = datetime.datetime.now()
    for vuln in sc.analysis(('pluginID','=','20811,22869'),
                            ('asset', '=', {'id': str(asset)}),
                            tool='vulndetails'):


        # First we need to get the host information...
        nh = False
        host = s.query(Host).filter_by(ip=vuln['ip']).first()
        if not host:
            host = Host()
            nh = True
        hdata = sc.analysis(('ip', '=', vuln['ip']),tool='sumip')[0]
        host.ip = vuln['ip']
        host.name = vuln['netbiosName']
        host.cpe = hdata['osCPE']
        host.dns = hdata['dnsName']
        host.asset_id = asset
        if nh:
            s.add(host)
        else:
            s.merge(host)
        s.commit()
        sys.stdout.write('%4d\t%-16s\t%-40s' % (host.id, host.ip, host.dns))
        sys.stdout.flush()

        if vuln['pluginID'] == '22869':
            if 'CentOS Linux system' in vuln['pluginText'] or 'Red Hat Linux system' in vuln['pluginText']:
                software = re.findall('  ([a-zA-Z0-9\.\-]*)\|',vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            elif 'SunOS 5.10' in vuln['pluginText']:
                software = re.findall('Patch: ([^ ]*)', vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            elif 'Solaris 11 system' in vuln['pluginText']:
                software = re.findall('([\w\/]+)\W+([0-9\.\-]+).*\n',vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item[0]
                    entry.version = item[1]
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            elif 'Mac OS X system' in vuln['pluginText']:
                software = re.findall('  ([a-zA-Z0-9\.\-\_]*\.pkg)\n',vuln['pluginText'])
                for item in software:
                    entry = Entry()
                    entry.name = item
                    entry.timestamp = ts
                    entry.host_id = host.id
                    s.add(entry)
                    s.commit()
            else:
                sys.stdout.write('\t[NO FORMATTER]')
                sys.stdout.flush()

        if vuln['pluginID'] == '20811':
            software = False
            patches = False
            sw = None
            nh = False
            s.commit()
            for line in vuln['pluginText'].split('\n'):
                if '</plugin_output>' in line:
                    continue
                if line == u'The following software are installed on the remote host :':
                    software = True
                    patches = False
                    continue
                if line == u'The following updates are installed :':
                    patches = True
                    continue

                if software and line != '':
                    names = resw.findall(line)
                    vers = rever.findall(line)
                    dates = redate.findall(line)
                    new = Entry()
                    if len(names) > 0: new.name = names[0].strip()
                    if len(vers) > 0: new.version = vers[0]
                    try:
                        if len(dates) > 0:
                            date = datetime.date(year=int(dates[0][0]),
                                                 month=int(dates[0][1]),
                                                 day=int(dates[0][2]))
                            new.date = date
                        else:
                            dates = reinvdate.findall(line)
                            if len(dates) > 0:
                                date = datetime.date(year=int(dates[0][2]),
                                                     month=int(dates[0][0]),
                                                     day=int(dates[0][1]))
                                new.date = date
                    except:
                        pass
                    if patches:
                        if line[:2] != '  ':
                            sw = line.strip(':').strip()
                            continue
                        else:
                            new.name = '%s (%s)' % (new.name, sw)

                    new.timestamp = ts
                    new.host_id = host.id
                    s.add(new)
        s.commit()
        sys.stdout.write('\tdone\n')
        sys.stdout.flush()
    s.commit()

    # Now to expire the old data out...
    exp = datetime.datetime.now() - datetime.timedelta(days=expire)
    print exp

    # First to delete the aged out entries
    for entry in s.query(Entry).filter(Entry.timestamp < exp).all():
        s.delete(entry)
    s.commit()

    # Next to delete any hosts that we arent pulling info for anymore...
    for host in s.query(Host).all():
        if len(host.entries) == 0:
            s.delete(host)
    s.commit()
    s.close()

Example 13

Project: info-flow-experiments
Source File: reader.py
View license
def read_old_log(log_file):                         
    treatnames = []
    fo = open(log_file, "r")
    line = fo.readline()
    chunks = re.split("\|\|", line)
    if(chunks[0] == 'g'):
        old = True
        gmarker = 'g'
        treatments = 2
        treatnames = ['0', '1']
        samples = len(chunks)-1
    else:
        old = False
        gmarker = 'assign'
        treatments = int(chunks[2])
        samples = int(chunks[1])
        line = fo.readline()
        chunks = re.split("\|\|", line)
        for i in range(1, len(chunks)):
            treatnames.append(chunks[i].strip())
    fo.close()
    assert treatments == len(treatnames)
    for i in range(0, treatments):
        print "Treatment ", i, " = ", treatnames[i]
    adv = []
    ints = []
    newsv = []
    for i in range(0, samples):
        adv.append(adVector.AdVector())
        ints.append(interest.Interests())
        newsv.append(news.NewsVector())
    loadtimes = [timedelta(minutes=0)]*samples
    reloads = [0]*samples
    errors = [0]*samples
    xvfbfails = []
    breakout = False
    par_adv = []
    ass = []
        
    fo = open(log_file, "r")
    r = 0   
    sys.stdout.write("Scanning ads")
    for line in fo:
        chunks = re.split("\|\|", line)
        chunks[len(chunks)-1] = chunks[len(chunks)-1].rstrip()
        if(chunks[0] == gmarker and r==0):
            r += 1
            ass = chunks[2:]
            if(old):    
                ass = chunks[1:]
            assert len(ass) == samples
            apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
            #print ass
        elif(chunks[0] == gmarker and r >0 ):
            r += 1
            par_adv.append({'advector':adv, 'newsvector':newsv, 'assignment':ass, 'xf':xvfbfails, 'intvector':ints, 
                        'break':breakout, 'loadtimes':loadtimes, 'reloads':reloads, 'errors':errors})
            sys.stdout.write(".")
            sys.stdout.flush()
            adv = []
            ints = []
            newsv = []
            for i in range(0, samples):
                adv.append(adVector.AdVector())
                ints.append(interest.Interests())
                newsv.append(news.NewsVector())
            loadtimes = [timedelta(minutes=0)]*samples
            reloads = [0]*samples
            errors = [0]*samples
            xvfbfails = []
            breakout = False
            ass = chunks[2:]
            if(old):    
                ass = chunks[1:]
            assert len(ass) == samples
            apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
        elif(chunks[0] == 'Xvfbfailure'):
            xtreat, xid = chunks[1], chunks[2]
            xvfbfails.append(xtreat)
        elif(chunks[1] == 'breakingout'):
            breakout = True
        elif(chunks[1] == 'loadtime'):
            t = (datetime.strptime(chunks[2], "%H:%M:%S.%f"))
            delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
            id = int(chunks[3])
            loadtimes[id] += delta
        elif(chunks[1] == 'reload'):
            id = int(chunks[2])
            reloads[id] += 1
        elif(chunks[1] == 'errorcollecting'):
            id = int(chunks[2])
            errors[id] += 1
        elif(chunks[1] == 'prepref'):
            id = int(chunks[4])
            ints[id].remove_interest()
        elif(chunks[1] == 'pref'):
            id = int(chunks[4])
            int_str = chunks[3]
            ints[id].set_from_string(int_str)
        elif(chunks[0] == 'news'):
            ind_news = news.News({'Time':datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[4], 
                    'Agency': chunks[5], 'Ago': chunks[6], 'Body': chunks[7].rstrip(), 'Label':chunks[2]})
            newsv[int(chunks[1])].add(ind_news)
        elif(chunks[0] == 'ad'):
            ind_ad = ad.Ad({'Time':datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[4], 
                    'URL': chunks[5], 'Body': chunks[6].rstrip(), 'cat': "", 'Label':chunks[2]})
            adv[int(chunks[1])].add(ind_ad)
        else:                           # to analyze old log files
            try:
                ind_ad = ad.Ad({'Time':datetime.strptime(chunks[2], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[3], 
                        'URL': chunks[4], 'Body': chunks[5].rstrip(), 'cat': "", 'label':chunks[1]})
#               ind_ad = ad.Ad({'Time':datetime.strptime(chunks[1], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[2], 
#                       'URL': chunks[3], 'Body': chunks[4].rstrip(), 'cat': "", 'label':""})
                adv[int(chunks[0])].add(ind_ad)
            except:
                pass
    
    r += 1
    par_adv.append({'advector':adv, 'newsvector':newsv, 'assignment':ass, 'xf':xvfbfails, 'intvector':ints, 
            'break':breakout, 'loadtimes':loadtimes, 'reloads':reloads, 'errors':errors})
    sys.stdout.write(".Scanning complete\n")
    sys.stdout.flush()
    return par_adv, treatnames

Example 14

Project: onigiri
Source File: onigiri.py
View license
    def acquire_ram(self, victim, alternative):
        targets = victim.Targets
        pm = re.compile(r'.*:pmem$')
        self.logger.debug('Issue Discovery Request...')
    	for target in targets:
            if pm.search(target.TargetName):
                self.logger.info('Physical Memory found: {0} (DiskType={1})'.format(target.TargetName, target.DiskType))

                dest_path = self.out_path + "\\" + victim.MachineNameOrIP
                img_path = dest_path + "\\pmem"
                if self.skip and (os.path.exists(img_path + '.dd4.001') or os.path.exists(img_path + '.dmp')):
                    self.logger.info('the RAM image already exists, so skip the acquisition ({0})'.format(img_path))
                    continue
                if not os.path.exists(dest_path):
                    os.mkdir(dest_path)

                if alternative:
                    self.logger.info('acquiring mapped physical memory using PsExec&DumpIt...')
                    #cmd_listen = [self.dumpit_path, '/l', '/f', img_path + '.dmp.lznt1']
                    cmd_listen = [self.dumpit_path, '/l', '/f', img_path + '.dmp']
                    self.logger.debug('DumpIt Listener cmdline: {}'.format(' '.join(cmd_listen)))
                    proc_listen = subprocess.Popen(cmd_listen, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    for i in range(3):
                        self.logger.info('trying... {0}'.format(i+1))
                        dest_host = socket.gethostbyname(socket.gethostname())
                        cmd_psexec = [self.psexec_path, r'\\' + victim.MachineNameOrIP, '-accepteula', '-c', '-f', '-u', self.domain + '\\' + self.user,
                                #'-p', self.password, '-r', 'onigiri', self.dumpit_path, '/t', dest_host, '/a', '/d', '/lznt1'] # /lznt1 through network NOT work
                                '-p', self.password, '-r', 'onigiri', self.dumpit_path, '/t', dest_host, '/a', '/d']
                        self.logger.debug('PsExec cmdline: {}'.format(' '.join(cmd_psexec)))
                        proc_psexec = subprocess.Popen(cmd_psexec, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

                        #stdout_data, stderr_data = proc_psexec.communicate()
                        '''  # for Python 3.3
                        while 1:
                            try:
                                outs, errs = proc_psexec.communicate(timeout=5)
                                break
                            except subprocess.TimeoutExpired:
                                size = os.path.getsize(img_path + '.dmp')
                                sys.stdout.write('\r...{:8d}MB'.format(long(size / (1024 * 1024))))
                                continue
                        '''
                        sleep(2)
                        while proc_psexec.poll() is None:
                            sleep(0.1)
                            #size = os.path.getsize(img_path + '.dmp.lznt1')
                            try:
                                size = os.path.getsize(img_path + '.dmp')
                                sys.stdout.write('\r...{:8d}MB'.format(long(size / (1024 * 1024))))
                            except WindowsError:
                                self.logger.debug('WindowsError: os.path.getsize for {}'.format(img_path + '.dmp'))
                                sleep(1)

                        print '\r\t\t ...Done.'

                        if proc_psexec.returncode == 0:
                            break
                        else:
                            self.logger.error(stderr_data)
                            self.logger.error('PsExec&DumpIt failed.')
                    self.logger.debug('PsExec returncode={0}'.format(proc_psexec.returncode))
                    if proc_psexec.returncode != 0:
                        proc_listen.terminate()
                        self.logger.critical('RAM acquisition failed (PsExec&DumpIt).')
                        self.logger.error("check with the cmdline: {0}".format(' '.join(cmd_psexec)))
                        sys.exit(1)
                    else:
                        stdout_data, stderr_data = proc_listen.communicate()
                    self.logger.debug('DumpIt Listener returncode={0}'.format(proc_listen.returncode))
                    if proc_listen.returncode != 0:
                        self.logger.error(stderr_data)
                        self.logger.critical('RAM acquisition failed (DumpIt Listener).')
                        self.logger.error("check with the cmdline: {0}".format(' '.join(cmd_listen)))
                        sys.exit(1)
                    #self.logger.info('RAM crashdump image saved (lznt1 compressed): {0}'.format(img_path + '.dmp.lznt1'))
                    self.logger.info('RAM crashdump image saved: {0}'.format(img_path + '.dmp'))

                    '''
                    self.logger.info('decompressing...')
                    cmd_decomp = [self.dumpit_path, '/unpack', img_path + '.dmp.lznt1',  img_path + '.dmp']
                    self.logger.debug('DumpIt unpack cmdline: {}'.format(' '.join(cmd_decomp)))
                    proc_decomp = subprocess.Popen(cmd_decomp, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    stdout_data, stderr_data = proc_decomp.communicate()
                    if proc_decomp.returncode != 0:
                        self.logger.critical('DumpIt decompression failed.')
                        self.logger.error("check with the cmdline: {0}".format(' '.join(cmd_decomp)))
                        sys.exit(1)
                    self.logger.info('Decompressed RAM crashdump image saved: {0}'.format(img_path + '.dmp'))
                    '''

                else:
                    try:
                        self.logger.debug('Login to F-Response Disk...')
                        target.Login()
                    except win32com.client.pywintypes.com_error:
                        self.logger.critical('Login to F-Response Disk failed. Aborted in the previous acquisition? Please check the status on GUI console and logout the pmem manually.')
                        sys.exit(1)
                    #login_check = target.PhysicalDiskMapping
                    #device = target.PhysicalDiskName
                    if target.PhysicalDiskMapping == -1:
                        self.logger.critical('PhysicalDiskMapping failed due to timing issue. Simply try again.')
                        sys.exit(1)
                    device = r'\\.\PhysicalDrive' + str(target.PhysicalDiskMapping)
                    self.logger.info('acquiring mapped physical memory using F-Response&FTKImager ({0})...'.format(device))
                    cmd = [self.ftk_path, device, dest_path + "\\pmem"]

                    self.logger.debug('FTKImager cmdline: {}'.format(' '.join(cmd)))
                    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1)
                    with io.open(proc.stderr.fileno(), closefd=False) as stream: # iter(proc.stdout.readline) doesn't work for '\r'?
                        for line in stream:
                            if line.find('MB') != -1 or line.find('complete') != -1:
                                sys.stdout.write('\r' + line.rstrip('\n'))
                    print ''
                    proc.wait()
                    self.logger.debug('Remove F-Response Disk...')
                    target.Logout()
                    self.logger.debug('returncode={0}'.format(proc.returncode))
                    if proc.returncode != 0:
                        self.logger.critical('RAM acquisition failed (F-Response&FTKImager).')
                        self.logger.error("check with the cmdline: {0}".format(' '.join(cmd)))
                        sys.exit(1)
                    self.logger.info('RAM raw image saved: {0}'.format(img_path + '.dd4.001'))

Example 15

Project: engarde
Source File: ipython_directive.py
View license
    def process_input(self, data, input_prompt, lineno):
        """
        Process data block for INPUT token.

        """
        decorator, input, rest = data
        image_file = None
        image_directive = None

        is_verbatim = decorator=='@verbatim' or self.is_verbatim
        is_doctest = (decorator is not None and \
                     decorator.startswith('@doctest')) or self.is_doctest
        is_suppress = decorator=='@suppress' or self.is_suppress
        is_okexcept = decorator=='@okexcept' or self.is_okexcept
        is_okwarning = decorator=='@okwarning' or self.is_okwarning
        is_savefig = decorator is not None and \
                     decorator.startswith('@savefig')

        # set the encodings to be used by DecodingStringIO
        # to convert the execution output into unicode if
        # needed. this attrib is set by IpythonDirective.run()
        # based on the specified block options, defaulting to ['ut
        self.cout.set_encodings(self.output_encoding)

        input_lines = input.split('\n')

        if len(input_lines) > 1:
           if input_lines[-1] != "":
               input_lines.append('') # make sure there's a blank line
                                       # so splitter buffer gets reset

        continuation = '   %s:'%''.join(['.']*(len(str(lineno))+2))

        if is_savefig:
            image_file, image_directive = self.process_image(decorator)

        ret = []
        is_semicolon = False

        # Hold the execution count, if requested to do so.
        if is_suppress and self.hold_count:
            store_history = False
        else:
            store_history = True

        # Note: catch_warnings is not thread safe
        with warnings.catch_warnings(record=True) as ws:
            for i, line in enumerate(input_lines):
                if line.endswith(';'):
                    is_semicolon = True

                if i == 0:
                    # process the first input line
                    if is_verbatim:
                        self.process_input_line('')
                        self.IP.execution_count += 1 # increment it anyway
                    else:
                        # only submit the line in non-verbatim mode
                        self.process_input_line(line, store_history=store_history)
                    formatted_line = '%s %s'%(input_prompt, line)
                else:
                    # process a continuation line
                    if not is_verbatim:
                        self.process_input_line(line, store_history=store_history)

                    formatted_line = '%s %s'%(continuation, line)

                if not is_suppress:
                    ret.append(formatted_line)

        if not is_suppress and len(rest.strip()) and is_verbatim:
            # the "rest" is the standard output of the
            # input, which needs to be added in
            # verbatim mode
            ret.append(rest)

        self.cout.seek(0)
        output = self.cout.read()
        if not is_suppress and not is_semicolon:
            ret.append(output)
        elif is_semicolon: # get spacing right
            ret.append('')

        # context information
        filename = self.state.document.current_source
        lineno = self.state.document.current_line

        # output any exceptions raised during execution to stdout
        # unless :okexcept: has been specified.
        if not is_okexcept and "Traceback" in output:
            s =  "\nException in %s at block ending on line %s\n" % (filename, lineno)
            s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
            sys.stdout.write('\n\n>>>' + ('-' * 73))
            sys.stdout.write(s)
            sys.stdout.write(output)
            sys.stdout.write('<<<' + ('-' * 73) + '\n\n')

        # output any warning raised during execution to stdout
        # unless :okwarning: has been specified.
        if not is_okwarning:
            for w in ws:
                s =  "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
                s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
                sys.stdout.write('\n\n>>>' + ('-' * 73))
                sys.stdout.write(s)
                sys.stdout.write('-' * 76 + '\n')
                s=warnings.formatwarning(w.message, w.category,
                                         w.filename, w.lineno, w.line)
                sys.stdout.write(s)
                sys.stdout.write('<<<' + ('-' * 73) + '\n')

        self.cout.truncate(0)
        return (ret, input_lines, output, is_doctest, decorator, image_file,
                    image_directive)

Example 16

Project: astor
Source File: rtrip.py
View license
def convert(srctree, dsttree=dsttree, readonly=False, dumpall=False):
    """Walk the srctree, and convert/copy all python files
    into the dsttree

    """

    allow_ast_comparison()

    parse_file = code_to_ast.parse_file
    find_py_files = code_to_ast.find_py_files
    srctree = os.path.normpath(srctree)

    if not readonly:
        dsttree = os.path.normpath(dsttree)
        logging.info('')
        logging.info('Trashing ' + dsttree)
        shutil.rmtree(dsttree, True)

    unknown_src_nodes = set()
    unknown_dst_nodes = set()
    badfiles = set()
    broken = []
    # TODO: When issue #26 resolved, remove UnicodeDecodeError
    handled_exceptions = SyntaxError, UnicodeDecodeError

    oldpath = None

    allfiles = find_py_files(srctree, None if readonly else dsttree)
    for srcpath, fname in allfiles:
        # Create destination directory
        if not readonly and srcpath != oldpath:
            oldpath = srcpath
            if srcpath >= srctree:
                dstpath = srcpath.replace(srctree, dsttree, 1)
                if not dstpath.startswith(dsttree):
                    raise ValueError("%s not a subdirectory of %s" %
                                     (dstpath, dsttree))
            else:
                assert srctree.startswith(srcpath)
                dstpath = dsttree
            os.makedirs(dstpath)

        srcfname = os.path.join(srcpath, fname)
        logging.info('Converting %s' % srcfname)
        try:
            srcast = parse_file(srcfname)
        except handled_exceptions:
            badfiles.add(srcfname)
            continue

        dsttxt = to_source(srcast)

        if not readonly:
            dstfname = os.path.join(dstpath, fname)
            try:
                with open(dstfname, 'w') as f:
                    f.write(dsttxt)
            except UnicodeEncodeError:
                badfiles.add(dstfname)

        # As a sanity check, make sure that ASTs themselves
        # round-trip OK
        try:
            dstast = ast.parse(dsttxt) if readonly else parse_file(dstfname)
        except SyntaxError:
            dstast = []
        unknown_src_nodes.update(strip_tree(srcast))
        unknown_dst_nodes.update(strip_tree(dstast))
        if dumpall or srcast != dstast:
            srcdump = dump_tree(srcast)
            dstdump = dump_tree(dstast)
            bad = srcdump != dstdump
            logging.warning('    calculating dump -- %s' %
                            ('bad' if bad else 'OK'))
            if bad:
                broken.append(srcfname)
            if dumpall or bad:
                if not readonly:
                    try:
                        with open(dstfname[:-3] + '.srcdmp', 'w') as f:
                            f.write(srcdump)
                    except UnicodeEncodeError:
                        badfiles.add(dstfname[:-3] + '.srcdmp')
                    try:
                        with open(dstfname[:-3] + '.dstdmp', 'w') as f:
                            f.write(dstdump)
                    except UnicodeEncodeError:
                        badfiles.add(dstfname[:-3] + '.dstdmp')
                elif dumpall:
                    sys.stdout.write('\n\nAST:\n\n    ')
                    sys.stdout.write(srcdump.replace('\n', '\n    '))
                    sys.stdout.write('\n\nDecompile:\n\n    ')
                    sys.stdout.write(dsttxt.replace('\n', '\n    '))
                    sys.stdout.write('\n\nNew AST:\n\n    ')
                    sys.stdout.write('(same as old)' if dstdump == srcdump
                                     else dstdump.replace('\n', '\n    '))
                    sys.stdout.write('\n')

    if badfiles:
        logging.warning('\nFiles not processed due to syntax errors:')
        for fname in sorted(badfiles):
            logging.warning('    %s' % fname)
    if broken:
        logging.warning('\nFiles failed to round-trip to AST:')
        for srcfname in broken:
            logging.warning('    %s' % srcfname)

    ok_to_strip = 'col_offset _precedence _use_parens lineno _p_op _pp'
    ok_to_strip = set(ok_to_strip.split())
    bad_nodes = (unknown_dst_nodes | unknown_src_nodes) - ok_to_strip
    if bad_nodes:
        logging.error('\nERROR -- UNKNOWN NODES STRIPPED: %s' % bad_nodes)
    logging.info('\n')

Example 17

Project: fixofx
Source File: webunittest.py
View license
    def fetch(self, url, postdata=None, server=None, port=None, protocol=None,
                    ok_codes=None):
        '''Run a single test request to the indicated url. Use the POST data
        if supplied.

        Raises failureException if the returned data contains any of the
        strings indicated to be Error Content.
        Returns a HTTPReponse object wrapping the response from the server.
        '''
        # see if the url is fully-qualified (not just a path)
        t_protocol, t_server, t_url, x, t_args, x = urlparse.urlparse(url)
        if t_server:
            protocol = t_protocol
            if ':' in t_server:
                server, port = t_server.split(':')
            else:
                server = t_server
                if protocol == 'http':
                    port = '80'
                else:
                    port = '443'
            url = t_url
            if t_args:
                url = url + '?' + t_args
            # ignore the machine name if the URL is for localhost
            if t_server == 'localhost':
                server = None
        elif not server:
            # no server was specified with this fetch, or in the URL, so
            # see if there's a base URL to use.
            base = self.get_base_url()
            if base:
                t_protocol, t_server, t_url, x, x, x = urlparse.urlparse(base)
                if t_protocol:
                    protocol = t_protocol
                if t_server:
                    server = t_server
                if t_url:
                    url = urlparse.urljoin(t_url, url)

        # TODO: allow override of the server and port from the URL!
        if server is None: server = self.server
        if port is None: port = self.port
        if protocol is None: protocol = self.protocol
        if ok_codes is None: ok_codes = self.expect_codes

        if protocol == 'http':
            handler = self.scheme_handlers.get('http')
            h = handler(server, int(port))

            if int(port) == 80:
               host_header = server
            else: 
               host_header = '%s:%s'%(server, port)
        elif protocol == 'https':
            #if httpslib is None:
                #raise ValueError, "Can't fetch HTTPS: M2Crypto not installed"
            handler = self.scheme_handlers.get('https')
            h = handler(server, int(port))
            
            if int(port) == 443:
               host_header = server
            else: 
               host_header = '%s:%s'%(server, port)
        else:
            raise ValueError, protocol

        params = None
        if postdata:
            for field,value in postdata.items():
                if type(value) == type({}):
                    postdata[field] = []
                    for k,selected in value.items():
                        if selected: postdata[field].append(k)

            # Do a post with the data file
            params = mimeEncode(postdata)
            h.putrequest('POST', url)
            h.putheader('Content-type', 'multipart/form-data; boundary=%s'%
                boundary)
            h.putheader('Content-length', str(len(params)))
        else:
            # Normal GET
            h.putrequest('GET', url)

        # Other Full Request headers
        if self.authinfo:
            h.putheader('Authorization', "Basic %s"%self.authinfo)
        h.putheader('Host', host_header)

        # Send cookies
        #  - check the domain, max-age (seconds), path and secure
        #    (http://www.ietf.org/rfc/rfc2109.txt)
        cookies_used = []
        cookie_list = []
        for domain, cookies in self.cookies.items():
            # check cookie domain
            if not server.endswith(domain):
                continue
            for path, cookies in cookies.items():
                # check that the path matches
                urlpath = urlparse.urlparse(url)[2]
                if not urlpath.startswith(path) and not (path == '/' and
                        urlpath == ''):
                    continue
                for sendcookie in cookies.values():
                    # and that the cookie is or isn't secure
                    if sendcookie['secure'] and protocol != 'https':
                        continue
                    # TODO: check max-age
                    cookie_list.append("%s=%s;"%(sendcookie.key,
                        sendcookie.coded_value))
                    cookies_used.append(sendcookie.key)

        if cookie_list:
            h.putheader('Cookie', ' '.join(cookie_list))

        # check that we sent the cookies we expected to
        if self.expect_cookies is not None:
            assert cookies_used == self.expect_cookies, \
                "Didn't use all cookies (%s expected, %s used)"%(
                self.expect_cookies, cookies_used)

        # finish the headers
        h.endheaders()

        if params is not None:
            h.send(params)

        # handle the reply
        errcode, errmsg, headers = h.getreply()

        # get the body and save it
        f = h.getfile()
        g = cStringIO.StringIO()
        d = f.read()
        while d:
            g.write(d)
            d = f.read()
        response = HTTPResponse(self.cookies, protocol, server, port, url,
            errcode, errmsg, headers, g.getvalue(), self.error_content)
        f.close()

        if errcode not in ok_codes:
            if VERBOSE:
                sys.stdout.write('e')
                sys.stdout.flush()
            raise HTTPError(response)

        # decode the cookies
        if self.accept_cookies:
            try:
                # decode the cookies and update the cookies store
                cookie.decodeCookies(url, server, headers, self.cookies)
            except:
                if VERBOSE:
                    sys.stdout.write('c')
                    sys.stdout.flush()
                raise

        # Check errors
        if self.error_content:
            data = response.body
            for content in self.error_content:
                if data.find(content) != -1:
                    msg = "Matched error: %s"%content
                    if hasattr(self, 'results') and self.results:
                        self.writeError(url, msg)
                    self.log('Matched error'+`(url, content)`, data)
                    if VERBOSE:
                        sys.stdout.write('c')
                        sys.stdout.flush()
                    raise self.failureException, msg

        if VERBOSE:
            sys.stdout.write('_')
            sys.stdout.flush()
        return response

Example 18

Project: mavelous
Source File: miniterm.py
View license
    def writer(self):
        """\
        Loop and copy console->serial until EXITCHARCTER character is
        found. When MENUCHARACTER is found, interpret the next key
        locally.
        """
        menu_active = False
        try:
            while self.alive:
                try:
                    b = console.getkey()
                except KeyboardInterrupt:
                    b = serial.to_bytes([3])
                c = character(b)
                if menu_active:
                    if c == MENUCHARACTER or c == EXITCHARCTER: # Menu character again/exit char -> send itself
                        self.serial.write(b)                    # send character
                        if self.echo:
                            sys.stdout.write(c)
                    elif c == '\x15':                       # CTRL+U -> upload file
                        sys.stderr.write('\n--- File to upload: ')
                        sys.stderr.flush()
                        console.cleanup()
                        filename = sys.stdin.readline().rstrip('\r\n')
                        if filename:
                            try:
                                file = open(filename, 'r')
                                sys.stderr.write('--- Sending file %s ---\n' % filename)
                                while True:
                                    line = file.readline().rstrip('\r\n')
                                    if not line:
                                        break
                                    self.serial.write(line)
                                    self.serial.write('\r\n')
                                    # Wait for output buffer to drain.
                                    self.serial.flush()
                                    sys.stderr.write('.')   # Progress indicator.
                                sys.stderr.write('\n--- File %s sent ---\n' % filename)
                            except IOError, e:
                                sys.stderr.write('--- ERROR opening file %s: %s ---\n' % (filename, e))
                        console.setup()
                    elif c in '\x08hH?':                    # CTRL+H, h, H, ? -> Show help
                        sys.stderr.write(get_help_text())
                    elif c == '\x12':                       # CTRL+R -> Toggle RTS
                        self.rts_state = not self.rts_state
                        self.serial.setRTS(self.rts_state)
                        sys.stderr.write('--- RTS %s ---\n' % (self.rts_state and 'active' or 'inactive'))
                    elif c == '\x04':                       # CTRL+D -> Toggle DTR
                        self.dtr_state = not self.dtr_state
                        self.serial.setDTR(self.dtr_state)
                        sys.stderr.write('--- DTR %s ---\n' % (self.dtr_state and 'active' or 'inactive'))
                    elif c == '\x02':                       # CTRL+B -> toggle BREAK condition
                        self.break_state = not self.break_state
                        self.serial.setBreak(self.break_state)
                        sys.stderr.write('--- BREAK %s ---\n' % (self.break_state and 'active' or 'inactive'))
                    elif c == '\x05':                       # CTRL+E -> toggle local echo
                        self.echo = not self.echo
                        sys.stderr.write('--- local echo %s ---\n' % (self.echo and 'active' or 'inactive'))
                    elif c == '\x09':                       # CTRL+I -> info
                        self.dump_port_settings()
                    elif c == '\x01':                       # CTRL+A -> cycle escape mode
                        self.repr_mode += 1
                        if self.repr_mode > 3:
                            self.repr_mode = 0
                        sys.stderr.write('--- escape data: %s ---\n' % (
                            REPR_MODES[self.repr_mode],
                        ))
                    elif c == '\x0c':                       # CTRL+L -> cycle linefeed mode
                        self.convert_outgoing += 1
                        if self.convert_outgoing > 2:
                            self.convert_outgoing = 0
                        self.newline = NEWLINE_CONVERISON_MAP[self.convert_outgoing]
                        sys.stderr.write('--- line feed %s ---\n' % (
                            LF_MODES[self.convert_outgoing],
                        ))
                    elif c in 'pP':                         # P -> change port
                        sys.stderr.write('\n--- Enter port name: ')
                        sys.stderr.flush()
                        console.cleanup()
                        try:
                            port = sys.stdin.readline().strip()
                        except KeyboardInterrupt:
                            port = None
                        console.setup()
                        if port and port != self.serial.port:
                            # reader thread needs to be shut down
                            self._stop_reader()
                            # save settings
                            settings = self.serial.getSettingsDict()
                            try:
                                try:
                                    new_serial = serial.serial_for_url(port, do_not_open=True)
                                except AttributeError:
                                    # happens when the installed pyserial is older than 2.5. use the
                                    # Serial class directly then.
                                    new_serial = serial.Serial()
                                    new_serial.port = port
                                # restore settings and open
                                new_serial.applySettingsDict(settings)
                                new_serial.open()
                                new_serial.setRTS(self.rts_state)
                                new_serial.setDTR(self.dtr_state)
                                new_serial.setBreak(self.break_state)
                            except Exception, e:
                                sys.stderr.write('--- ERROR opening new port: %s ---\n' % (e,))
                                new_serial.close()
                            else:
                                self.serial.close()
                                self.serial = new_serial
                                sys.stderr.write('--- Port changed to: %s ---\n' % (self.serial.port,))
                            # and restart the reader thread
                            self._start_reader()
                    elif c in 'bB':                         # B -> change baudrate
                        sys.stderr.write('\n--- Baudrate: ')
                        sys.stderr.flush()
                        console.cleanup()
                        backup = self.serial.baudrate
                        try:
                            self.serial.baudrate = int(sys.stdin.readline().strip())
                        except ValueError, e:
                            sys.stderr.write('--- ERROR setting baudrate: %s ---\n' % (e,))
                            self.serial.baudrate = backup
                        else:
                            self.dump_port_settings()
                        console.setup()
                    elif c == '8':                          # 8 -> change to 8 bits
                        self.serial.bytesize = serial.EIGHTBITS
                        self.dump_port_settings()
                    elif c == '7':                          # 7 -> change to 8 bits
                        self.serial.bytesize = serial.SEVENBITS
                        self.dump_port_settings()
                    elif c in 'eE':                         # E -> change to even parity
                        self.serial.parity = serial.PARITY_EVEN
                        self.dump_port_settings()
                    elif c in 'oO':                         # O -> change to odd parity
                        self.serial.parity = serial.PARITY_ODD
                        self.dump_port_settings()
                    elif c in 'mM':                         # M -> change to mark parity
                        self.serial.parity = serial.PARITY_MARK
                        self.dump_port_settings()
                    elif c in 'sS':                         # S -> change to space parity
                        self.serial.parity = serial.PARITY_SPACE
                        self.dump_port_settings()
                    elif c in 'nN':                         # N -> change to no parity
                        self.serial.parity = serial.PARITY_NONE
                        self.dump_port_settings()
                    elif c == '1':                          # 1 -> change to 1 stop bits
                        self.serial.stopbits = serial.STOPBITS_ONE
                        self.dump_port_settings()
                    elif c == '2':                          # 2 -> change to 2 stop bits
                        self.serial.stopbits = serial.STOPBITS_TWO
                        self.dump_port_settings()
                    elif c == '3':                          # 3 -> change to 1.5 stop bits
                        self.serial.stopbits = serial.STOPBITS_ONE_POINT_FIVE
                        self.dump_port_settings()
                    elif c in 'xX':                         # X -> change software flow control
                        self.serial.xonxoff = (c == 'X')
                        self.dump_port_settings()
                    elif c in 'rR':                         # R -> change hardware flow control
                        self.serial.rtscts = (c == 'R')
                        self.dump_port_settings()
                    else:
                        sys.stderr.write('--- unknown menu character %s --\n' % key_description(c))
                    menu_active = False
                elif c == MENUCHARACTER: # next char will be for menu
                    menu_active = True
                elif c == EXITCHARCTER: 
                    self.stop()
                    break                                   # exit app
                elif c == '\n':
                    self.serial.write(self.newline)         # send newline character(s)
                    if self.echo:
                        sys.stdout.write(c)                 # local echo is a real newline in any case
                        sys.stdout.flush()
                else:
                    self.serial.write(b)                    # send byte
                    if self.echo:
                        sys.stdout.write(c)
                        sys.stdout.flush()
        except:
            self.alive = False
            raise

Example 19

Project: rPGA
Source File: discover.py
View license
  def discover_junctions_bychrom(self,chrom,hetsnps,snpids,editpositions):
    bam1 = pysam.Samfile(self.hap1Bam)
    bam2 = pysam.Samfile(self.hap2Bam)
    snpreads1,snpreads2 = defaultdict(list),defaultdict(list)
    snpreads = defaultdict(lambda:defaultdict(list))
    reads1,reads2 = defaultdict(list),defaultdict(list)

    spec1,spec2  = list(),list()
    snps1,snps2 = defaultdict(list),defaultdict(list)
    refalt1,refalt2 = defaultdict(list),defaultdict(list)
    edit1,edit2= defaultdict(list),defaultdict(list)
    hap1only,hap2only = list(),list()
    sys.stdout.write( "Reading in hap1 bam file: "+ self.hap1Bam + "\n")
    
    for r in bam1.fetch('chr'+str(chrom)):
      tags = self.get_tags(r)
      if int(tags['NH'])>1: ## read is multimapped, deal with separately
        continue
      spec,editpos,snppos,refalt = self.haplotype_specific_read(r,hetsnps,0, editpositions)
      snpreads1[spec].append(r.qname)
      reads1[r.qname].append(r)
      edit1[r.qname] += editpos
      snps1[r.qname] += snppos
      refalt1[r.qname] += refalt

    sys.stdout.write( "Reading in hap2 bam file: "+ self.hap2Bam+"\n")
    for r in bam2.fetch('chr'+str(chrom)):
      tags = self.get_tags(r)
      if int(tags['NH'])>1: ## read is multimapped
        continue
      spec,editpos,snppos,refalt = self.haplotype_specific_read(r,hetsnps,1, editpositions)
      snpreads2[spec].append(r.qname)
      reads2[r.qname].append(r)
      edit2[r.qname]+=editpos
      snps2[r.qname]+=snppos
      refalt2[r.qname] += refalt

    sys.stdout.write("Assign haplotype specific reads\n")
    conflicting = list(set([ r for r in snpreads1[0] if r in snpreads2[0]] + snpreads1[2] + snpreads2[2]))
    multimapped = list(set(snpreads1[3] + snpreads2[3]))
    rnaeditreads = list(set(snpreads1[4] + snpreads2[4]))
    snpreads1[0] = list(set([ r for r in snpreads1[0] if ((r not in conflicting) and (r not in snpreads2[3]))]))
    snpreads2[0] = list(set([ r for r in snpreads2[0] if ((r not in conflicting) and (r not in snpreads1[3]))]))

    for qname in snpreads1[0]:
      if qname in reads2:
        if all([True if reads1[qname][i].pos==reads2[qname][i].pos else False for i in range(len(reads1[qname]))]): # reads have same starting position in hap1 and hap2
          if self.num_mismatches(reads1[qname][0]) < self.num_mismatches(reads2[qname][0]):
            spec1.append(qname)
      else:
        hap1only.append(qname)
      
    for qname in snpreads2[0]:
      if qname in reads1:
        if all([True if reads1[qname][i].pos==reads2[qname][i].pos else False for i in range(len(reads2[qname]))]): # reads have same starting position in hap1 and hap2 
          if self.num_mismatches(reads2[qname][0]) < self.num_mismatches(reads1[qname][0]):
            spec2.append(qname)
      else:
        hap2only.append(qname)

    conflictCount = len(set(conflicting))
    hap1Count = len(set(spec1))
    hap2Count = len(set(spec2))

    geneGroup,gtf,geneInfo = self.read_in_gtf()
    bamr = pysam.Samfile(self.refBam,"rb")
    junctions = defaultdict(lambda: defaultdict(set))
    for qname in reads1:
      if ((qname not in spec2) and (qname not in conflicting)):
        for r in reads1[qname]:
          juncs = self.get_junction_coordinates(r)
          for j in juncs:
            start,end = j
            junctions['1'][start,end].add(r.pos)

    for qname in reads2:
      if ((qname not in spec1) and (qname not in conflicting)):
        for r in reads2[qname]:
          juncs = self.get_junction_coordinates(r)
          for j in juncs:
            start,end = j
            junctions['2'][start,end].add(r.pos)

    for r in bamr.fetch('chr'+str(chrom)):
      if (r.qname not in conflicting):
        juncs = self.get_junction_coordinates(r)
        for j in juncs:
          start,end = j
          junctions['R'][start,end].add(r.pos)

    nR = {}
    for i in ['1','2','R']:
      nR[i] = {j:len(junctions[i][j]) for j in junctions[i]}
    
    spec = {}
    spec['1'] = [j for j in nR['1'] if ((j not in nR['2']) and (j not in nR['R']) and nR['1'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
    spec['2'] = [j for j in nR['2'] if ((j not in nR['1']) and (j not in nR['R']) and nR['2'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
    spec['12'] = [j for j in nR['1'] if ((j in nR['2']) and (j not in nR['R']) and nR['1'][j]>1 and nR['2'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
    spec['R'] = [j for j in nR['R'] if ((j not in nR['1']) and (j not in nR['2'])and nR['R'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
    bed = defaultdict(list)

    for h in ['1','2','R']:
      counter = 0
      for start,end in spec[h]:
        counter += 1
        num_overlapping_reads = sum([nR[h][j] for j in nR[h] 
                                     if ( j[0]<end and j[1]>start and
                                          self.characterize_junction(str(chrom),j[0],j[1],geneGroup,gtf,geneInfo)[0]=="R")])
        if num_overlapping_reads > 0:
          freq = float(nR[h][start,end])/float(num_overlapping_reads)
        else:
          freq = 1
        n_or_r,strand = self.characterize_junction(str(chrom),start,end,geneGroup,gtf,geneInfo)
        snp = ','.join(self.get_splicesite_snp(start,end,snpids))
        bed[h].append('chr'+str(chrom)+' '+str(start) + ' ' + str(end) + ' J_'+str(counter)+'_'+n_or_r+'_'+snp+ ' ' + str(strand) + ' ' +str(freq))
          
    counter = 0
    for start,end in spec['12']:
      counter += 1
      num_overlapping_reads1 = sum([nR['1'][j] for j in nR['1'] if 
                                    ( j[0]<end and j[1]>start and 
                                      self.characterize_junction(str(chrom),j[0],j[1],geneGroup,gtf,geneInfo)[0]=="R")])
      num_overlapping_reads2 = sum([nR['2'][j] for j in nR['2'] if 
                                    ( j[0]<end and j[1]>start and 
                                      self.characterize_junction(str(chrom),j[0],j[1],geneGroup,gtf,geneInfo)[0]=="R")])
      if num_overlapping_reads1 == 0:
        freq1=1.0
      else:
        freq1 = float(nR['1'][start,end])/float(num_overlapping_reads1)
      if num_overlapping_reads2==0:
        freq2 = 1.0
      else:
        freq2 = float(nR['2'][start,end])/float(num_overlapping_reads2)
      n_or_r,strand = self.characterize_junction(str(chrom),start,end,geneGroup,gtf,geneInfo)
      snp =','.join(self.get_splicesite_snp(start,end,snpids))
      bed['12'].append('chr'+str(chrom)+' '+str(start) + ' ' + str(end) + ' J_'+str(counter)+'_'+n_or_r+'_'+snp+ ' ' + str(strand)+' '+str((freq1+freq2)/2))

    return '\n'.join(bed['1']),'\n'.join(bed['2']),'\n'.join(bed['12']), '\n'.join(bed['R'])

Example 20

Project: LasagneNLP
Source File: bi_lstm.py
View license
def main():
    parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM')
    parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
    parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
                        required=True)
    parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
                        help='path for embedding dict')
    parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
    parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
    parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
    parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
    parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
    parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
    parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
    parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov'], help='update algorithm', default='sgd')
    parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training',
                        required=True)
    parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
    parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
    parser.add_argument('--train')  # "data/POS-penn/wsj/split1/wsj1.train.original"
    parser.add_argument('--dev')  # "data/POS-penn/wsj/split1/wsj1.dev.original"
    parser.add_argument('--test')  # "data/POS-penn/wsj/split1/wsj1.test.original"

    args = parser.parse_args()

    def construct_input_layer():
        if fine_tune:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
            layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
                                                            output_size=embedd_dim,
                                                            W=embedd_table, name='embedding')
            return layer_embedding
        else:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
                                                    name='input')
            return layer_input

    logger = utils.get_logger("BiLSTM")
    fine_tune = args.fine_tune
    oov = args.oov
    regular = args.regular
    embedding = args.embedding
    embedding_path = args.embedding_dict
    train_path = args.train
    dev_path = args.dev
    test_path = args.test
    update_algo = args.update
    grad_clipping = args.grad_clipping
    peepholes = args.peepholes
    gamma = args.gamma
    output_predict = args.output_prediction
    dropout = args.dropout

    X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
    embedd_table, label_alphabet, _, _, _, _ = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
                                                                                             test_path, oov=oov,
                                                                                             fine_tune=fine_tune,
                                                                                             embedding=embedding,
                                                                                             embedding_path=embedding_path)
    num_labels = label_alphabet.size() - 1

    logger.info("constructing network...")
    # create variables
    target_var = T.imatrix(name='targets')
    mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
    if fine_tune:
        input_var = T.imatrix(name='inputs')
        num_data, max_length = X_train.shape
        alphabet_size, embedd_dim = embedd_table.shape
    else:
        input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
        num_data, max_length, embedd_dim = X_train.shape

    # construct input and mask layers
    layer_incoming = construct_input_layer()

    layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')

    # construct bi-lstm
    num_units = args.num_units
    bi_lstm = build_BiLSTM(layer_incoming, num_units, mask=layer_mask, grad_clipping=grad_clipping,
                           peepholes=peepholes, dropout=dropout)

    # reshape bi-rnn to [batch * max_length, num_units]
    bi_lstm = lasagne.layers.reshape(bi_lstm, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_lstm, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    # get output of bi-rnn shape=[batch * max_length, #label]
    prediction_train = lasagne.layers.get_output(layer_output)
    prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
    final_prediction = T.argmax(prediction_eval, axis=1)

    # flat target_var to vector
    target_var_flatten = target_var.flatten()
    # flat mask_var to vector
    mask_var_flatten = mask_var.flatten()

    # compute loss
    num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
    # for training, we use mean of loss over number of labels
    loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
    loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
    # l2 regularization?
    if regular == 'l2':
        l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
        loss_train = loss_train + gamma * l2_penalty

    loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
    loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss

    # compute number of correct labels
    corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
    corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)

    corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
    corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)

    # Create update expressions for training.
    # hyper parameters to tune: learning rate, momentum, regularization.
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    decay_rate = args.decay_rate
    momentum = 0.9
    params = lasagne.layers.get_all_params(layer_output, trainable=True)
    updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)

    # Compile a function performing a training step on a mini-batch
    train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss], updates=updates)
    # Compile a second function evaluating the loss and accuracy of network
    eval_fn = theano.function([input_var, target_var, mask_var], [loss_eval, corr_eval, num_loss, final_prediction])

    # Finally, launch the training loop.
    logger.info(
        "Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
        % (
            update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size,
            grad_clipping,
            peepholes))
    num_batches = num_data / batch_size
    num_epochs = 1000
    best_loss = 1e+12
    best_acc = 0.0
    best_epoch_loss = 0
    best_epoch_acc = 0
    best_loss_test_err = 0.
    best_loss_test_corr = 0.
    best_acc_test_err = 0.
    best_acc_test_corr = 0.
    stop_count = 0
    lr = learning_rate
    patience = 5
    for epoch in range(1, num_epochs + 1):
        print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
        train_err = 0.0
        train_corr = 0.0
        train_total = 0
        start_time = time.time()
        num_back = 0
        train_batches = 0
        for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, batch_size=batch_size, shuffle=True):
            inputs, targets, masks, _ = batch
            err, corr, num = train_fn(inputs, targets, masks)
            train_err += err * num
            train_corr += corr
            train_total += num
            train_batches += 1
            time_ave = (time.time() - start_time) / train_batches
            time_left = (num_batches - train_batches) * time_ave

            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
                min(train_batches * batch_size, num_data), num_data,
                train_err / train_total, train_corr * 100 / train_total, time_left)
            sys.stdout.write(log_info)
            num_back = len(log_info)
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
            min(train_batches * batch_size, num_data), num_data,
            train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)

        # evaluate performance on dev data
        dev_err = 0.0
        dev_corr = 0.0
        dev_total = 0
        for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, batch_size=batch_size):
            inputs, targets, masks, _ = batch
            err, corr, num, predictions = eval_fn(inputs, targets, masks)
            dev_err += err * num
            dev_corr += corr
            dev_total += num
            if output_predict:
                utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)

        print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
            dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)

        if best_loss < dev_err and best_acc > dev_corr / dev_total:
            stop_count += 1
        else:
            update_loss = False
            update_acc = False
            stop_count = 0
            if best_loss > dev_err:
                update_loss = True
                best_loss = dev_err
                best_epoch_loss = epoch
            if best_acc < dev_corr / dev_total:
                update_acc = True
                best_acc = dev_corr / dev_total
                best_epoch_acc = epoch

            # evaluate on test data when better performance detected
            test_err = 0.0
            test_corr = 0.0
            test_total = 0
            for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, batch_size=batch_size):
                inputs, targets, masks, _ = batch
                err, corr, num, predictions = eval_fn(inputs, targets, masks)
                test_err += err * num
                test_corr += corr
                test_total += num
                if output_predict:
                    utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)

            print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
                test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)

            if update_loss:
                best_loss_test_err = test_err
                best_loss_test_corr = test_corr
            if update_acc:
                best_acc_test_err = test_err
                best_acc_test_corr = test_corr

        # stop if dev acc decrease 3 time straightly.
        if stop_count == patience:
            break

        # re-compile a function with new learning rate for training
        lr = learning_rate / (1.0 + epoch * decay_rate)
        updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
        train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss],
                                   updates=updates)

    # print best performance on test data.
    logger.info("final best loss test performance (at epoch %d)" % (best_epoch_loss))
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
    logger.info("final best acc test performance (at epoch %d)" % (best_epoch_acc))
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)

Example 21

Project: LasagneNLP
Source File: bi_lstm_cnn.py
View license
def main():
    parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM-CNN')
    parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
    parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
                        required=True)
    parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
                        help='path for embedding dict')
    parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
    parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
    parser.add_argument('--num_filters', type=int, default=20, help='Number of filters in CNN')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
    parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
    parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
    parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
    parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
    parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
    parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov', 'adadelta'], help='update algorithm', default='sgd')
    parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training', required=True)
    parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
    parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
    parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
    parser.add_argument('--train')  # "data/POS-penn/wsj/split1/wsj1.train.original"
    parser.add_argument('--dev')  # "data/POS-penn/wsj/split1/wsj1.dev.original"
    parser.add_argument('--test')  # "data/POS-penn/wsj/split1/wsj1.test.original"

    args = parser.parse_args()

    def construct_input_layer():
        if fine_tune:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
            layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
                                                            output_size=embedd_dim,
                                                            W=embedd_table, name='embedding')
            return layer_embedding
        else:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
                                                    name='input')
            return layer_input

    def construct_char_input_layer():
        layer_char_input = lasagne.layers.InputLayer(shape=(None, max_sent_length, max_char_length),
                                                     input_var=char_input_var, name='char-input')
        layer_char_input = lasagne.layers.reshape(layer_char_input, (-1, [2]))
        layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char_input, input_size=char_alphabet_size,
                                                             output_size=char_embedd_dim, W=char_embedd_table,
                                                             name='char_embedding')
        layer_char_input = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))
        return layer_char_input

    logger = utils.get_logger("BiLSTM-CNN")
    fine_tune = args.fine_tune
    oov = args.oov
    regular = args.regular
    embedding = args.embedding
    embedding_path = args.embedding_dict
    train_path = args.train
    dev_path = args.dev
    test_path = args.test
    update_algo = args.update
    grad_clipping = args.grad_clipping
    peepholes = args.peepholes
    num_filters = args.num_filters
    gamma = args.gamma
    output_predict = args.output_prediction
    dropout = args.dropout

    X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
    embedd_table, label_alphabet, \
    C_train, C_dev, C_test, char_embedd_table = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
                                                                                              test_path, oov=oov,
                                                                                              fine_tune=fine_tune,
                                                                                              embedding=embedding,
                                                                                              embedding_path=embedding_path,
                                                                                              use_character=True)
    num_labels = label_alphabet.size() - 1

    logger.info("constructing network...")
    # create variables
    target_var = T.imatrix(name='targets')
    mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
    if fine_tune:
        input_var = T.imatrix(name='inputs')
        num_data, max_length = X_train.shape
        alphabet_size, embedd_dim = embedd_table.shape
    else:
        input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
        num_data, max_length, embedd_dim = X_train.shape
    char_input_var = T.itensor3(name='char-inputs')
    num_data_char, max_sent_length, max_char_length = C_train.shape
    char_alphabet_size, char_embedd_dim = char_embedd_table.shape
    assert (max_length == max_sent_length)
    assert (num_data == num_data_char)

    # construct input and mask layers
    layer_incoming1 = construct_char_input_layer()
    layer_incoming2 = construct_input_layer()

    layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')

    # construct bi-rnn-cnn
    num_units = args.num_units
    bi_lstm_cnn = build_BiLSTM_CNN(layer_incoming1, layer_incoming2, num_units, mask=layer_mask,
                                   grad_clipping=grad_clipping, peepholes=peepholes, num_filters=num_filters,
                                   dropout=dropout)

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_lstm_cnn = lasagne.layers.reshape(bi_lstm_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_lstm_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    # get output of bi-lstm-cnn shape=[batch * max_length, #label]
    prediction_train = lasagne.layers.get_output(layer_output)
    prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
    final_prediction = T.argmax(prediction_eval, axis=1)

    # flat target_var to vector
    target_var_flatten = target_var.flatten()
    # flat mask_var to vector
    mask_var_flatten = mask_var.flatten()

    # compute loss
    num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
    # for training, we use mean of loss over number of labels
    loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
    loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
    # l2 regularization?
    if regular == 'l2':
        l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
        loss_train = loss_train + gamma * l2_penalty

    loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
    loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss

    # compute number of correct labels
    corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
    corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)

    corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
    corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)

    # Create update expressions for training.
    # hyper parameters to tune: learning rate, momentum, regularization.
    batch_size = args.batch_size
    learning_rate = 1.0 if update_algo == 'adadelta' else args.learning_rate
    decay_rate = args.decay_rate
    momentum = 0.9
    params = lasagne.layers.get_all_params(layer_output, trainable=True)
    updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)

    # Compile a function performing a training step on a mini-batch
    train_fn = theano.function([input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_loss],
                               updates=updates)
    # Compile a second function evaluating the loss and accuracy of network
    eval_fn = theano.function([input_var, target_var, mask_var, char_input_var],
                              [loss_eval, corr_eval, num_loss, final_prediction])

    # Finally, launch the training loop.
    logger.info(
        "Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
        % (
        update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size, grad_clipping,
        peepholes))
    num_batches = num_data / batch_size
    num_epochs = 1000
    best_loss = 1e+12
    best_acc = 0.0
    best_epoch_loss = 0
    best_epoch_acc = 0
    best_loss_test_err = 0.
    best_loss_test_corr = 0.
    best_acc_test_err = 0.
    best_acc_test_corr = 0.
    stop_count = 0
    lr = learning_rate
    patience = args.patience
    for epoch in range(1, num_epochs + 1):
        print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
        train_err = 0.0
        train_corr = 0.0
        train_total = 0
        start_time = time.time()
        num_back = 0
        train_batches = 0
        for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train,
                                               batch_size=batch_size, shuffle=True):
            inputs, targets, masks, char_inputs = batch
            err, corr, num = train_fn(inputs, targets, masks, char_inputs)
            train_err += err * num
            train_corr += corr
            train_total += num
            train_batches += 1
            time_ave = (time.time() - start_time) / train_batches
            time_left = (num_batches - train_batches) * time_ave

            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
                min(train_batches * batch_size, num_data), num_data,
                train_err / train_total, train_corr * 100 / train_total, time_left)
            sys.stdout.write(log_info)
            num_back = len(log_info)
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
            min(train_batches * batch_size, num_data), num_data,
            train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)

        # evaluate performance on dev data
        dev_err = 0.0
        dev_corr = 0.0
        dev_total = 0
        for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size):
            inputs, targets, masks, char_inputs = batch
            err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
            dev_err += err * num
            dev_corr += corr
            dev_total += num
            if output_predict:
                utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)

        print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
            dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)

        if best_loss < dev_err and best_acc > dev_corr / dev_total:
            stop_count += 1
        else:
            update_loss = False
            update_acc = False
            stop_count = 0
            if best_loss > dev_err:
                update_loss = True
                best_loss = dev_err
                best_epoch_loss = epoch
            if best_acc < dev_corr / dev_total:
                update_acc = True
                best_acc = dev_corr / dev_total
                best_epoch_acc = epoch

            # evaluate on test data when better performance detected
            test_err = 0.0
            test_corr = 0.0
            test_total = 0
            for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test,
                                                   batch_size=batch_size):
                inputs, targets, masks, char_inputs = batch
                err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
                test_err += err * num
                test_corr += corr
                test_total += num
                if output_predict:
                    utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)

            print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
                test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)

            if update_loss:
                best_loss_test_err = test_err
                best_loss_test_corr = test_corr
            if update_acc:
                best_acc_test_err = test_err
                best_acc_test_corr = test_corr

        # stop if dev acc decrease 3 time straightly.
        if stop_count == patience:
            break

        # re-compile a function with new learning rate for training
        if update_algo != 'adadelta':
            lr = learning_rate / (1.0 + epoch * decay_rate)
            updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
            train_fn = theano.function([input_var, target_var, mask_var, char_input_var],
                                        [loss_train, corr_train, num_loss],
                                        updates=updates)

    # print best performance on test data.
    logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
    logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)

Example 22

Project: LasagneNLP
Source File: bi_lstm_cnn_crf.py
View license
def main():
    parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM-CNN-CRF')
    parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
    parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna', 'random'], help='Embedding for words',
                        required=True)
    parser.add_argument('--embedding_dict', default=None, help='path for embedding dict')
    parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
    parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
    parser.add_argument('--num_filters', type=int, default=20, help='Number of filters in CNN')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
    parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
    parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
    parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
    parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
    parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
    parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov', 'adadelta'], help='update algorithm',
                        default='sgd')
    parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training', required=True)
    parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
    parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
    parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
    parser.add_argument('--train')  # "data/POS-penn/wsj/split1/wsj1.train.original"
    parser.add_argument('--dev')  # "data/POS-penn/wsj/split1/wsj1.dev.original"
    parser.add_argument('--test')  # "data/POS-penn/wsj/split1/wsj1.test.original"

    args = parser.parse_args()

    def construct_input_layer():
        if fine_tune:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
            layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
                                                            output_size=embedd_dim,
                                                            W=embedd_table, name='embedding')
            return layer_embedding
        else:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
                                                    name='input')
            return layer_input

    def construct_char_input_layer():
        layer_char_input = lasagne.layers.InputLayer(shape=(None, max_sent_length, max_char_length),
                                                     input_var=char_input_var, name='char-input')
        layer_char_input = lasagne.layers.reshape(layer_char_input, (-1, [2]))
        layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char_input, input_size=char_alphabet_size,
                                                             output_size=char_embedd_dim, W=char_embedd_table,
                                                             name='char_embedding')
        layer_char_input = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))
        return layer_char_input

    logger = utils.get_logger("BiLSTM-CNN-CRF")
    fine_tune = args.fine_tune
    oov = args.oov
    regular = args.regular
    embedding = args.embedding
    embedding_path = args.embedding_dict
    train_path = args.train
    dev_path = args.dev
    test_path = args.test
    update_algo = args.update
    grad_clipping = args.grad_clipping
    peepholes = args.peepholes
    num_filters = args.num_filters
    gamma = args.gamma
    output_predict = args.output_prediction
    dropout = args.dropout

    X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
    embedd_table, label_alphabet, \
    C_train, C_dev, C_test, char_embedd_table = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
                                                                                              test_path, oov=oov,
                                                                                              fine_tune=fine_tune,
                                                                                              embedding=embedding,
                                                                                              embedding_path=embedding_path,
                                                                                              use_character=True)
    num_labels = label_alphabet.size() - 1

    logger.info("constructing network...")
    # create variables
    target_var = T.imatrix(name='targets')
    mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
    if fine_tune:
        input_var = T.imatrix(name='inputs')
        num_data, max_length = X_train.shape
        alphabet_size, embedd_dim = embedd_table.shape
    else:
        input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
        num_data, max_length, embedd_dim = X_train.shape
    char_input_var = T.itensor3(name='char-inputs')
    num_data_char, max_sent_length, max_char_length = C_train.shape
    char_alphabet_size, char_embedd_dim = char_embedd_table.shape
    assert (max_length == max_sent_length)
    assert (num_data == num_data_char)

    # construct input and mask layers
    layer_incoming1 = construct_char_input_layer()
    layer_incoming2 = construct_input_layer()

    layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')

    # construct bi-rnn-cnn
    num_units = args.num_units

    bi_lstm_cnn_crf = build_BiLSTM_CNN_CRF(layer_incoming1, layer_incoming2, num_units, num_labels, mask=layer_mask,
                                           grad_clipping=grad_clipping, peepholes=peepholes, num_filters=num_filters,
                                           dropout=dropout)

    logger.info("Network structure: hidden=%d, filter=%d" % (num_units, num_filters))

    # compute loss
    num_tokens = mask_var.sum(dtype=theano.config.floatX)

    # get outpout of bi-lstm-cnn-crf shape [batch, length, num_labels, num_labels]
    energies_train = lasagne.layers.get_output(bi_lstm_cnn_crf)
    energies_eval = lasagne.layers.get_output(bi_lstm_cnn_crf, deterministic=True)

    loss_train = crf_loss(energies_train, target_var, mask_var).mean()
    loss_eval = crf_loss(energies_eval, target_var, mask_var).mean()
    # l2 regularization?
    if regular == 'l2':
        l2_penalty = lasagne.regularization.regularize_network_params(bi_lstm_cnn_crf, lasagne.regularization.l2)
        loss_train = loss_train + gamma * l2_penalty

    _, corr_train = crf_accuracy(energies_train, target_var)
    corr_train = (corr_train * mask_var).sum(dtype=theano.config.floatX)
    prediction_eval, corr_eval = crf_accuracy(energies_eval, target_var)
    corr_eval = (corr_eval * mask_var).sum(dtype=theano.config.floatX)

    # Create update expressions for training.
    # hyper parameters to tune: learning rate, momentum, regularization.
    batch_size = args.batch_size
    learning_rate = 1.0 if update_algo == 'adadelta' else args.learning_rate
    decay_rate = args.decay_rate
    momentum = 0.9
    params = lasagne.layers.get_all_params(bi_lstm_cnn_crf, trainable=True)
    updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)

    # Compile a function performing a training step on a mini-batch
    train_fn = theano.function([input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_tokens],
                               updates=updates)
    # Compile a second function evaluating the loss and accuracy of network
    eval_fn = theano.function([input_var, target_var, mask_var, char_input_var],
                              [loss_eval, corr_eval, num_tokens, prediction_eval])

    # Finally, launch the training loop.
    logger.info(
        "Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
        % (
            update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size,
            grad_clipping,
            peepholes))
    num_batches = num_data / batch_size
    num_epochs = 1000
    best_loss = 1e+12
    best_acc = 0.0
    best_epoch_loss = 0
    best_epoch_acc = 0
    best_loss_test_err = 0.
    best_loss_test_corr = 0.
    best_acc_test_err = 0.
    best_acc_test_corr = 0.
    stop_count = 0
    lr = learning_rate
    patience = args.patience
    for epoch in range(1, num_epochs + 1):
        print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
        train_err = 0.0
        train_corr = 0.0
        train_total = 0
        train_inst = 0
        start_time = time.time()
        num_back = 0
        train_batches = 0
        for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train,
                                               batch_size=batch_size, shuffle=True):
            inputs, targets, masks, char_inputs = batch
            err, corr, num = train_fn(inputs, targets, masks, char_inputs)
            train_err += err * inputs.shape[0]
            train_corr += corr
            train_total += num
            train_inst += inputs.shape[0]
            train_batches += 1
            time_ave = (time.time() - start_time) / train_batches
            time_left = (num_batches - train_batches) * time_ave

            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
                min(train_batches * batch_size, num_data), num_data,
                train_err / train_inst, train_corr * 100 / train_total, time_left)
            sys.stdout.write(log_info)
            num_back = len(log_info)
        # update training log after each epoch
        assert train_inst == num_data
        sys.stdout.write("\b" * num_back)
        print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
            min(train_batches * batch_size, num_data), num_data,
            train_err / num_data, train_corr * 100 / train_total, time.time() - start_time)

        # evaluate performance on dev data
        dev_err = 0.0
        dev_corr = 0.0
        dev_total = 0
        dev_inst = 0
        for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size):
            inputs, targets, masks, char_inputs = batch
            err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
            dev_err += err * inputs.shape[0]
            dev_corr += corr
            dev_total += num
            dev_inst += inputs.shape[0]
            if output_predict:
                utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet,
                                         is_flattened=False)

        print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
            dev_err / dev_inst, dev_corr, dev_total, dev_corr * 100 / dev_total)

        if best_loss < dev_err and best_acc > dev_corr / dev_total:
            stop_count += 1
        else:
            update_loss = False
            update_acc = False
            stop_count = 0
            if best_loss > dev_err:
                update_loss = True
                best_loss = dev_err
                best_epoch_loss = epoch
            if best_acc < dev_corr / dev_total:
                update_acc = True
                best_acc = dev_corr / dev_total
                best_epoch_acc = epoch

            # evaluate on test data when better performance detected
            test_err = 0.0
            test_corr = 0.0
            test_total = 0
            test_inst = 0
            for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test,
                                                   batch_size=batch_size):
                inputs, targets, masks, char_inputs = batch
                err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
                test_err += err * inputs.shape[0]
                test_corr += corr
                test_total += num
                test_inst += inputs.shape[0]
                if output_predict:
                    utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet,
                                             is_flattened=False)

            print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
                test_err / test_inst, test_corr, test_total, test_corr * 100 / test_total)

            if update_loss:
                best_loss_test_err = test_err
                best_loss_test_corr = test_corr
            if update_acc:
                best_acc_test_err = test_err
                best_acc_test_corr = test_corr

        # stop if dev acc decrease 3 time straightly.
        if stop_count == patience:
            break

        # re-compile a function with new learning rate for training
        if update_algo != 'adadelta':
            lr = learning_rate / (1.0 + epoch * decay_rate)
            updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
            train_fn = theano.function([input_var, target_var, mask_var, char_input_var],
                                        [loss_train, corr_train, num_tokens],
                                        updates=updates)

    # print best performance on test data.
    logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_loss_test_err / test_inst, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
    logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_acc_test_err / test_inst, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)

Example 23

Project: LasagneNLP
Source File: bi_lstm_highcnn.py
View license
def main():
    parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM-HighCNN')
    parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
    parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
                        required=True)
    parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
                        help='path for embedding dict')
    parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
    parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
    parser.add_argument('--num_filters', type=int, default=20, help='Number of filters in CNN')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
    parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
    parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
    parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
    parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
    parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
    parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov', 'adadelta'], help='update algorithm', default='sgd')
    parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training', required=True)
    parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
    parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
    parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
    parser.add_argument('--train')  # "data/POS-penn/wsj/split1/wsj1.train.original"
    parser.add_argument('--dev')  # "data/POS-penn/wsj/split1/wsj1.dev.original"
    parser.add_argument('--test')  # "data/POS-penn/wsj/split1/wsj1.test.original"

    args = parser.parse_args()

    def construct_input_layer():
        if fine_tune:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
            layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
                                                            output_size=embedd_dim,
                                                            W=embedd_table, name='embedding')
            return layer_embedding
        else:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
                                                    name='input')
            return layer_input

    def construct_char_input_layer():
        layer_char_input = lasagne.layers.InputLayer(shape=(None, max_sent_length, max_char_length),
                                                     input_var=char_input_var, name='char-input')
        layer_char_input = lasagne.layers.reshape(layer_char_input, (-1, [2]))
        layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char_input, input_size=char_alphabet_size,
                                                             output_size=char_embedd_dim, W=char_embedd_table,
                                                             name='char_embedding')
        layer_char_input = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))
        return layer_char_input

    logger = utils.get_logger("BiLSTM-HighCNN")
    fine_tune = args.fine_tune
    oov = args.oov
    regular = args.regular
    embedding = args.embedding
    embedding_path = args.embedding_dict
    train_path = args.train
    dev_path = args.dev
    test_path = args.test
    update_algo = args.update
    grad_clipping = args.grad_clipping
    peepholes = args.peepholes
    num_filters = args.num_filters
    gamma = args.gamma
    output_predict = args.output_prediction
    dropout = args.dropout

    X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
    embedd_table, label_alphabet, \
    C_train, C_dev, C_test, char_embedd_table = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
                                                                                              test_path, oov=oov,
                                                                                              fine_tune=fine_tune,
                                                                                              embedding=embedding,
                                                                                              embedding_path=embedding_path,
                                                                                              use_character=True)
    num_labels = label_alphabet.size() - 1

    logger.info("constructing network...")
    # create variables
    target_var = T.imatrix(name='targets')
    mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
    if fine_tune:
        input_var = T.imatrix(name='inputs')
        num_data, max_length = X_train.shape
        alphabet_size, embedd_dim = embedd_table.shape
    else:
        input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
        num_data, max_length, embedd_dim = X_train.shape
    char_input_var = T.itensor3(name='char-inputs')
    num_data_char, max_sent_length, max_char_length = C_train.shape
    char_alphabet_size, char_embedd_dim = char_embedd_table.shape
    assert (max_length == max_sent_length)
    assert (num_data == num_data_char)

    # construct input and mask layers
    layer_incoming1 = construct_char_input_layer()
    layer_incoming2 = construct_input_layer()

    layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')

    # construct bi-rnn-cnn
    num_units = args.num_units
    bi_lstm_cnn = build_BiLSTM_HighCNN(layer_incoming1, layer_incoming2, num_units, mask=layer_mask,
                                   grad_clipping=grad_clipping, peepholes=peepholes, num_filters=num_filters,
                                   dropout=dropout)

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_lstm_cnn = lasagne.layers.reshape(bi_lstm_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_lstm_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    # get output of bi-lstm-cnn shape=[batch * max_length, #label]
    prediction_train = lasagne.layers.get_output(layer_output)
    prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
    final_prediction = T.argmax(prediction_eval, axis=1)

    # flat target_var to vector
    target_var_flatten = target_var.flatten()
    # flat mask_var to vector
    mask_var_flatten = mask_var.flatten()

    # compute loss
    num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
    # for training, we use mean of loss over number of labels
    loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
    loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
    # l2 regularization?
    if regular == 'l2':
        l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
        loss_train = loss_train + gamma * l2_penalty

    loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
    loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss

    # compute number of correct labels
    corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
    corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)

    corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
    corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)

    # Create update expressions for training.
    # hyper parameters to tune: learning rate, momentum, regularization.
    batch_size = args.batch_size
    learning_rate = 1.0 if update_algo == 'adadelta' else args.learning_rate
    decay_rate = args.decay_rate
    momentum = 0.9
    params = lasagne.layers.get_all_params(layer_output, trainable=True)
    updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)

    # Compile a function performing a training step on a mini-batch
    train_fn = theano.function([input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_loss],
                               updates=updates)
    # Compile a second function evaluating the loss and accuracy of network
    eval_fn = theano.function([input_var, target_var, mask_var, char_input_var],
                              [loss_eval, corr_eval, num_loss, final_prediction])

    # Finally, launch the training loop.
    logger.info(
        "Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
        % (
            update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size, grad_clipping,
            peepholes))
    num_batches = num_data / batch_size
    num_epochs = 1000
    best_loss = 1e+12
    best_acc = 0.0
    best_epoch_loss = 0
    best_epoch_acc = 0
    best_loss_test_err = 0.
    best_loss_test_corr = 0.
    best_acc_test_err = 0.
    best_acc_test_corr = 0.
    stop_count = 0
    lr = learning_rate
    patience = args.patience
    for epoch in range(1, num_epochs + 1):
        print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
        train_err = 0.0
        train_corr = 0.0
        train_total = 0
        start_time = time.time()
        num_back = 0
        train_batches = 0
        for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train,
                                               batch_size=batch_size, shuffle=True):
            inputs, targets, masks, char_inputs = batch
            err, corr, num = train_fn(inputs, targets, masks, char_inputs)
            train_err += err * num
            train_corr += corr
            train_total += num
            train_batches += 1
            time_ave = (time.time() - start_time) / train_batches
            time_left = (num_batches - train_batches) * time_ave

            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
                min(train_batches * batch_size, num_data), num_data,
                train_err / train_total, train_corr * 100 / train_total, time_left)
            sys.stdout.write(log_info)
            num_back = len(log_info)
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
            min(train_batches * batch_size, num_data), num_data,
            train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)

        # evaluate performance on dev data
        dev_err = 0.0
        dev_corr = 0.0
        dev_total = 0
        for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size):
            inputs, targets, masks, char_inputs = batch
            err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
            dev_err += err * num
            dev_corr += corr
            dev_total += num
            if output_predict:
                utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)

        print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
            dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)

        if best_loss < dev_err and best_acc > dev_corr / dev_total:
            stop_count += 1
        else:
            update_loss = False
            update_acc = False
            stop_count = 0
            if best_loss > dev_err:
                update_loss = True
                best_loss = dev_err
                best_epoch_loss = epoch
            if best_acc < dev_corr / dev_total:
                update_acc = True
                best_acc = dev_corr / dev_total
                best_epoch_acc = epoch

            # evaluate on test data when better performance detected
            test_err = 0.0
            test_corr = 0.0
            test_total = 0
            for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test,
                                                   batch_size=batch_size):
                inputs, targets, masks, char_inputs = batch
                err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
                test_err += err * num
                test_corr += corr
                test_total += num
                if output_predict:
                    utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)

            print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
                test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)

            if update_loss:
                best_loss_test_err = test_err
                best_loss_test_corr = test_corr
            if update_acc:
                best_acc_test_err = test_err
                best_acc_test_corr = test_corr

        # stop if dev acc decrease 3 time straightly.
        if stop_count == patience:
            break

        # re-compile a function with new learning rate for training
        if update_algo != 'adadelta':
            lr = learning_rate / (1.0 + epoch * decay_rate)
            updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
            train_fn = theano.function([input_var, target_var, mask_var, char_input_var],
                                       [loss_train, corr_train, num_loss],
                                       updates=updates)

    # print best performance on test data.
    logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
    logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)

Example 24

Project: LasagneNLP
Source File: bi_rnn.py
View license
def main():
    parser = argparse.ArgumentParser(description='Tuning with bi-directional RNN')
    parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
    parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
                        required=True)
    parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
                        help='path for embedding dict')
    parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
    parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in RNN')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
    parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
    parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
    parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
    parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
    parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov'], help='update algorithm', default='sgd')
    parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training',
                        required=True)
    parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
    parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
    parser.add_argument('--train')  # "data/POS-penn/wsj/split1/wsj1.train.original"
    parser.add_argument('--dev')  # "data/POS-penn/wsj/split1/wsj1.dev.original"
    parser.add_argument('--test')  # "data/POS-penn/wsj/split1/wsj1.test.original"

    args = parser.parse_args()

    def construct_input_layer():
        if fine_tune:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
            layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
                                                            output_size=embedd_dim, W=embedd_table, name='embedding')
            return layer_embedding
        else:
            layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
                                                    name='input')
            return layer_input

    logger = utils.get_logger("BiRNN")
    fine_tune = args.fine_tune
    oov = args.oov
    regular = args.regular
    embedding = args.embedding
    embedding_path = args.embedding_dict
    train_path = args.train
    dev_path = args.dev
    test_path = args.test
    update_algo = args.update
    grad_clipping = args.grad_clipping
    gamma = args.gamma
    output_predict = args.output_prediction
    dropout = args.dropout

    X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
    embedd_table, label_alphabet, _, _, _, _ = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
                                                                                             test_path, oov=oov,
                                                                                             fine_tune=fine_tune,
                                                                                             embedding=embedding,
                                                                                             embedding_path=embedding_path)
    num_labels = label_alphabet.size() - 1

    logger.info("constructing network...")
    # create variables
    target_var = T.imatrix(name='targets')
    mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
    if fine_tune:
        input_var = T.imatrix(name='inputs')
        num_data, max_length = X_train.shape
        alphabet_size, embedd_dim = embedd_table.shape
    else:
        input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
        num_data, max_length, embedd_dim = X_train.shape

    # construct input and mask layers
    layer_incoming = construct_input_layer()

    layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')

    # construct bi-rnn
    num_units = args.num_units
    bi_rnn = build_BiRNN(layer_incoming, num_units, mask=layer_mask, grad_clipping=grad_clipping,
                         dropout=dropout)

    # reshape bi-rnn to [batch * max_length, num_units]
    bi_rnn = lasagne.layers.reshape(bi_rnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_rnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    # get output of bi-rnn shape=[batch * max_length, #label]
    prediction_train = lasagne.layers.get_output(layer_output)
    prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
    final_prediction = T.argmax(prediction_eval, axis=1)

    # flat target_var to vector
    target_var_flatten = target_var.flatten()
    # flat mask_var to vector
    mask_var_flatten = mask_var.flatten()

    # compute loss
    num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
    # for training, we use mean of loss over number of labels
    loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
    loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
    ############################################
    # l2 regularization?
    if regular == 'l2':
        l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
        loss_train = loss_train + gamma * l2_penalty
    # dima regularization?
    # if regular == 'dima':
    #     params_regular = utils.get_all_params_by_name(layer_output, name=['forward.hidden_to_hidden.W',
    #                                                                       'backward.hidden_to_hidden.W'])
    #     dima_penalty = lasagne.regularization.apply_penalty(params_regular, dima)
    #     loss_train = loss_train + gamma * dima_penalty

    loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
    loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss

    # compute number of correct labels
    corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
    corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)

    corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
    corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)

    # Create update expressions for training.
    # hyper parameters to tune: learning rate, momentum, regularization.
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    decay_rate = args.decay_rate
    momentum = 0.9
    params = lasagne.layers.get_all_params(layer_output, trainable=True)
    updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)

    # Compile a function performing a training step on a mini-batch
    train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss], updates=updates)
    # Compile a second function evaluating the loss and accuracy of network
    eval_fn = theano.function([input_var, target_var, mask_var], [loss_eval, corr_eval, num_loss, final_prediction])

    # Finally, launch the training loop.
    logger.info(
        "Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f)..." \
        % (
        update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size, grad_clipping))
    num_batches = num_data / batch_size
    num_epochs = 1000
    best_loss = 1e+12
    best_acc = 0.0
    best_epoch_loss = 0
    best_epoch_acc = 0
    best_loss_test_err = 0.
    best_loss_test_corr = 0.
    best_acc_test_err = 0.
    best_acc_test_corr = 0.
    stop_count = 0
    lr = learning_rate
    patience = 5
    for epoch in range(1, num_epochs + 1):
        print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
        train_err = 0.0
        train_corr = 0.0
        train_total = 0
        start_time = time.time()
        num_back = 0
        train_batches = 0
        for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, batch_size=batch_size, shuffle=True):
            inputs, targets, masks, _ = batch
            err, corr, num = train_fn(inputs, targets, masks)
            train_err += err * num
            train_corr += corr
            train_total += num
            train_batches += 1
            time_ave = (time.time() - start_time) / train_batches
            time_left = (num_batches - train_batches) * time_ave

            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
                min(train_batches * batch_size, num_data), num_data,
                train_err / train_total, train_corr * 100 / train_total, time_left)
            sys.stdout.write(log_info)
            num_back = len(log_info)
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
            min(train_batches * batch_size, num_data), num_data,
            train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)

        # evaluate performance on dev data
        dev_err = 0.0
        dev_corr = 0.0
        dev_total = 0
        for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, batch_size=batch_size):
            inputs, targets, masks, _ = batch
            err, corr, num, predictions = eval_fn(inputs, targets, masks)
            dev_err += err * num
            dev_corr += corr
            dev_total += num
            if output_predict:
                utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)

        print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
            dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)

        if best_loss < dev_err and best_acc > dev_corr / dev_total:
            stop_count += 1
        else:
            update_loss = False
            update_acc = False
            stop_count = 0
            if best_loss > dev_err:
                update_loss = True
                best_loss = dev_err
                best_epoch_loss = epoch
            if best_acc < dev_corr / dev_total:
                update_acc = True
                best_acc = dev_corr / dev_total
                best_epoch_acc = epoch

            # evaluate on test data when better performance detected
            test_err = 0.0
            test_corr = 0.0
            test_total = 0
            for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, batch_size=batch_size):
                inputs, targets, masks, _ = batch
                err, corr, num, predictions = eval_fn(inputs, targets, masks)
                test_err += err * num
                test_corr += corr
                test_total += num
                if output_predict:
                    utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)

            print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
                test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)

            if update_loss:
                best_loss_test_err = test_err
                best_loss_test_corr = test_corr
            if update_acc:
                best_acc_test_err = test_err
                best_acc_test_corr = test_corr

        # stop if dev acc decrease 3 time straightly.
        if stop_count == patience:
            break

        # re-compile a function with new learning rate for training
        lr = learning_rate / (1.0 + epoch * decay_rate)
        updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
        train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss],
                                   updates=updates)

    # print best performance on test data.
    logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
    logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
    print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
        best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)

Example 25

Project: bits
Source File: readline.py
View license
def _readline(prompt=""):
    global width, height, line_x, line_y, buffer_max, history, kill_ring, kill_accumulate, ctrl_o_index, completer

    with redirect.nolog():
        with pager.nopager():
            sys.stdout.write(prompt)

            line_buffer = ''
            pos = 0
            prev_len = 0

            term_count = bits.get_term_count()
            width = [0] * term_count
            height = [0] * term_count
            line_x = [0] * term_count
            line_y = [0] * term_count

            for term in range(term_count):
                width[term], height[term] = bits.get_width_height(term)
                line_x[term], line_y[term] = bits.get_xy(term)

            buffer_max = min((width[term] - 2 - line_x[term]) + ((height[term] - 1) * (width[term] - 1)) for term in range(term_count))

            history_index = len(history)
            history_state = dict()
            completer_state = 0
            last_yank_start = None
            kill_accumulate = False

            if ctrl_o_index is not None:
                if ctrl_o_index < len(history):
                    history_index = ctrl_o_index
                    line_buffer = history[history_index]
                    pos = len(line_buffer)
                ctrl_o_index = None

            while True:
                # Update history
                history_state[history_index] = (line_buffer, pos)

                try:
                    # clear any characters after the current line buffer
                    trailing_len = prev_len - len(line_buffer)
                    if trailing_len > 0:
                        for term in range(term_count):
                            trailing_x, trailing_y = PositionCursor(len(line_buffer), line_x[term], line_y[term], term)
                            print_buffer(" " * trailing_len, trailing_x, trailing_y, term)
                    prev_len = len(line_buffer)

                    for term in range(term_count):
                        # print the current line buffer
                        print_buffer(line_buffer, line_x[term], line_y[term], term)
                        # move the cursor to location of pos within the line buffer
                        PositionCursor(pos, line_x[term], line_y[term], term)

                    c = bits.input.get_key()

                    key = bits.input.key
                    def ctrl(k):
                        return key(k, ctrl=True)

                    # Reset states that depend on last key
                    if c != key('y', alt=True):
                        last_yank_start = None
                    if c not in (ctrl('k'), ctrl('u'), ctrl('w')):
                        kill_accumulate = False

                    if c == key('\r') or c == key('\n') or c == ctrl('o'):
                        if line_buffer or (history and history[-1]):
                            history.append(line_buffer)
                        if c == ctrl('o'): # Ctrl-O
                            ctrl_o_index = history_index + 1
                        sys.stdout.write('\n')
                        return line_buffer + '\n'

                    if not (c == key('\t') or c == ctrl('i')):
                        # reset completer state to force restart of the completer
                        completer_state = 0

                    if c == key(bits.input.KEY_HOME) or c == ctrl('a'):
                        # start of line
                        pos = 0
                    elif c == key(bits.input.KEY_LEFT) or c == ctrl('b'):
                        # left
                        if pos != 0:
                            pos -= 1
                    elif c == ctrl('d'):
                        # EOF
                        if len(line_buffer) == 0:
                            return ""
                        if pos < len(line_buffer):
                            line_buffer, pos = delete_char(line_buffer, pos)
                    elif c == key(bits.input.KEY_DELETE):
                        if pos < len(line_buffer):
                            line_buffer, pos = delete_char(line_buffer, pos)
                    elif c == key(bits.input.KEY_END) or c == ctrl('e'):
                        # end of line
                        pos = len(line_buffer)
                    elif c == key(bits.input.KEY_RIGHT) or c == ctrl('f'):
                        # right
                        if pos != len(line_buffer):
                            pos += 1
                    elif c == key('\b') or c == ctrl('h'):
                        # backspace
                        line_buffer, pos = delete_char_left(line_buffer, pos)
                    elif c == key('\t') or c == ctrl('i'):
                        # tab completion
                        if completer is not None:
                            if completer_state != 0:
                                for c in range(len(current_completion)):
                                    line_buffer, pos = delete_char_left(line_buffer, pos)
                            else:
                                cur = pos
                                while pos != 0 and line_buffer[pos-1] != ' ':
                                    pos -= 1
                                saved_str = line_buffer[pos:cur]
                                line_buffer = line_buffer[:pos] + line_buffer[cur:]
                            current_completion = completer(saved_str, completer_state)
                            completer_state += 1
                            if current_completion is not None:
                                for c in current_completion:
                                    line_buffer, pos = insert_char(line_buffer, c, pos)
                            else:
                                for c in saved_str:
                                    line_buffer, pos = insert_char(line_buffer, c, pos)
                                completer_state = 0
                    elif c == ctrl('k'):
                        # delete from current to end of line
                        killed_text = line_buffer[pos:]
                        line_buffer = line_buffer[:pos]
                        add_to_kill_ring(killed_text, to_right=True)
                    elif c == ctrl('l'):
                        # clear screen
                        bits.clear_screen()
                        sys.stdout.write(prompt)
                        for term in range(term_count):
                            line_x[term], line_y[term] = bits.get_xy(term);
                    elif c == key(bits.input.KEY_DOWN) or c == ctrl('n'):
                        # Next line in history
                        if history_index < len(history):
                            history_index += 1
                            if history_index == len(history):
                                line_buffer, pos = history_state.get(history_index, ('', 0))
                            else:
                                line_buffer, pos = history_state.get(history_index, (history[history_index], len(history[history_index])))
                    elif c == key(bits.input.KEY_UP) or c == ctrl('p'):
                        # Previous line in history
                        if history_index > 0:
                            history_index -= 1
                            line_buffer, pos = history_state.get(history_index, (history[history_index], len(history[history_index])))
                    elif c == ctrl('u'):
                        # delete from current to beginning of line
                        killed_text = line_buffer[:pos]
                        line_buffer = line_buffer[pos:]
                        pos = 0
                        add_to_kill_ring(killed_text, to_right=False)
                    elif c == ctrl(bits.input.KEY_LEFT):
                        # Move left by word
                        while pos != 0 and not line_buffer[pos-1].isalnum():
                            pos -= 1
                        while pos != 0 and line_buffer[pos-1].isalnum():
                            pos -= 1
                    elif c == ctrl(bits.input.KEY_RIGHT):
                        # Move right by word
                        end = len(line_buffer)
                        while pos != end and not line_buffer[pos].isalnum():
                            pos += 1
                        while pos != end and line_buffer[pos].isalnum():
                            pos += 1
                    elif c == ctrl('w'):
                        # delete previous word; note that this uses a different
                        # definition of "word" than Ctrl-Left and Ctrl-Right.
                        cur = pos
                        while pos != 0 and line_buffer[pos-1] == ' ':
                            pos -= 1
                        while pos != 0 and line_buffer[pos-1] != ' ':
                            pos -= 1
                        killed_text = line_buffer[pos:cur]
                        line_buffer = line_buffer[:pos] + line_buffer[cur:]
                        add_to_kill_ring(killed_text, to_right=False)
                    elif c == ctrl('y'):
                        # Yank
                        if kill_ring:
                            line_buffer, last_yank_start, pos = insert_string(line_buffer, kill_ring[-1], pos)
                    elif c == key('y', alt=True):
                        # If immediately after yank, rotate kill ring and yank
                        # the new top instead.
                        if last_yank_start is not None:
                            line_buffer = line_buffer[:last_yank_start] + line_buffer[pos:]
                            pos = last_yank_start
                            kill_ring.insert(0, kill_ring.pop()) # Rotate
                            line_buffer, last_yank_start, pos = insert_string(line_buffer, kill_ring[-1], pos)
                    elif c == ctrl('z') or c == key(bits.input.KEY_ESC):
                        if len(line_buffer) == 0:
                            return ""
                    elif c.key in key_hooks:
                        key_hooks[c.key]()
                    elif not(c.ctrl) and not(c.alt) and isinstance(c.key, basestring) and c.key in string.printable:
                        # printable
                        try:
                            line_buffer, pos = insert_char(line_buffer, c.key.encode('ascii'), pos)
                        except UnicodeError:
                            pass
                    else:
                        pass

                except IOError:
                    pass

Example 26

Project: deepy
Source File: server.py
View license
    def handle_control(self, req, worker_id):
        """
        Handles a control_request received from a worker.
        Returns:
            string or dict: response

            'stop' - the worker should quit
            'wait' - wait for 1 second
            'eval' - evaluate on valid and test set to start a new epoch
            'sync_hyperparams' - set learning rate
            'valid' - evaluate on valid and test set, then save the params
            'train' - train next batches
        """
        if self.start_time is None: self.start_time = time.time()
        response = ""

        if req == 'next':
            if self.num_train_batches == 0:
                response = "get_num_batches"
            elif self._done:
                response = "stop"
                self.worker_is_done(worker_id)
            elif self._evaluating:
                response = 'wait'
            elif not self.batch_pool:
                # End of one iter
                if self._train_costs:
                    with self._lock:
                        sys.stdout.write("\r")
                        sys.stdout.flush()
                        mean_costs = []
                        for i in range(len(self._training_names)):
                            mean_costs.append(np.mean([c[i] for c in self._train_costs]))
                        self.log("train   (epoch={:2d}) {}".format(
                            self.epoch,
                            self.get_monitor_string(zip(self._training_names, mean_costs)))
                        )
                response = {'eval': None, 'best_valid_cost': self._best_valid_cost}
                self._evaluating = True
            else:
                # Continue training
                if worker_id not in self.prepared_worker_pool:
                    response = {"sync_hyperparams": self.feed_hyperparams()}
                    self.prepared_worker_pool.add(worker_id)
                elif self._iters_from_last_valid >= self._valid_freq:
                    response = {'valid': None, 'best_valid_cost': self._best_valid_cost}
                    self._iters_from_last_valid = 0
                else:
                    response = {"train": self.feed_batches()}
        elif 'eval_done' in req:
            with self._lock:
                self._evaluating = False
                sys.stdout.write("\r")
                sys.stdout.flush()
                if 'test_costs' in req and req['test_costs']:
                    self.log("test    (epoch={:2d}) {}".format(
                        self.epoch,
                        self.get_monitor_string(req['test_costs']))
                    )
                if 'valid_costs' in req and req['test_costs']:
                    valid_J = req['valid_costs'][0][1]
                    if valid_J < self._best_valid_cost:
                        self._best_valid_cost = valid_J
                        star_str = "*"
                    else:
                        star_str = ""
                        self.log("valid   (epoch={:2d}) {} {} (worker {})".format(
                        self.epoch,
                        self.get_monitor_string(req['valid_costs']),
                        star_str,
                        worker_id))
                    # if star_str and 'auto_save' in req and req['auto_save']:
                    #     self.log("(worker {}) save the model to {}".format(
                    #         worker_id,
                    #         req['auto_save']
                    #     ))
                continue_training = self.prepare_epoch()
                self._epoch_start_time = time.time()
                if not continue_training:
                    self._done = True
                    self.log("training time {:.4f}s".format(time.time() - self.start_time))
                    response = "stop"
        elif 'valid_done' in req:
            with self._lock:
                sys.stdout.write("\r")
                sys.stdout.flush()
                if 'valid_costs' in req:
                    valid_J = req['valid_costs'][0][1]
                    if valid_J < self._best_valid_cost:
                        self._best_valid_cost = valid_J
                        star_str = "*"
                    else:
                        star_str = ""
                    self.log("valid   ( dryrun ) {} {} (worker {})".format(
                        self.get_monitor_string(req['valid_costs']),
                        star_str,
                        worker_id
                    ))
                    # if star_str and 'auto_save' in req and req['auto_save']:
                    #     self.log("(worker {}) save the model to {}".format(
                    #         worker_id,
                    #         req['auto_save']
                    #     ))
        elif 'train_done' in req:
            costs = req['costs']
            self._train_costs.append(costs)
            sys.stdout.write("\x1b[2K\r> %d%% | J=%.2f | %.1f batch/s" % (
                self._current_iter * 100 / self.num_train_batches,
                costs[0], float(len(self._train_costs)*self.step_len)/(time.time() - self._epoch_start_time)))
            sys.stdout.flush()
        elif 'get_num_batches_done' in req:
            self.num_train_batches = req['get_num_batches_done']
        elif 'get_easgd_alpha' in req:
            response = self._easgd_alpha
        elif 'sync_hyperparams' in req:
            response = {"sync_hyperparams": self.feed_hyperparams()}
        elif 'init_schedule' in req:
            with self._lock:
                sys.stdout.write("\r")
                sys.stdout.flush()
                self.log("worker {} connected".format(worker_id))
                if self.epoch == 0:
                    schedule_params = req['init_schedule']
                    sch_str = " ".join("{}={}".format(a, b) for (a, b) in schedule_params.items())
                    self.log("initialize the schedule with {}".format(sch_str))
                    for key, val in schedule_params.items():
                        if not val: continue
                        if key == 'learning_rate':
                            self._lr = val
                        elif key == 'start_halving_at':
                            self.epoch_start_halving = val
                        elif key == 'end_at':
                            self.end_at = val
                        elif key == 'step_len':
                            self.step_len = val
                        elif key == 'valid_freq':
                            self._valid_freq = val

        elif 'set_names' in req:
            self._training_names = req['training_names']
            self._evaluation_names = req['evaluation_names']


        return response

Example 27

Project: NaNoGenLab
Source File: advanced-spoonerizer.py
View license
def main(argv):
    optparser = OptionParser(__doc__)
    optparser.add_option("--debug", default=False, action='store_true',
                         help="show me the SchoonerSpores[tm]")
    optparser.add_option("--exclude-dictionary", default='',
                         help="comma-separated list of words that will not be "
                              "considered to be dictionary words")
    optparser.add_option("--disable-picking", default='',
                         help="comma-separated list of words that will be "
                              "not be picked from sentences")
    optparser.add_option("--disable-swapping", default='',
                         help="comma-separated list of colon-separated "
                              "pairs of words that will be "
                              "not be considered for swapping")
    optparser.add_option("--dictionary-words-only", default=False,
                         action='store_true',
                         help="only swap words when both words are "
                              "dictionary words")
    optparser.add_option("--remove-quotes", default=False, action='store_true',
                         help="strip double quotes from input words")
    (options, args) = optparser.parse_args(argv[1:])

    filenames = args

    load_dictionary(options.exclude_dictionary.split(','))
    disable_picking = set([
        w.upper() for w in options.disable_picking.split(',')
    ])
    disable_swapping = set([
        frozenset([z.upper for z in x.split(':')])
          for x in options.disable_swapping.split(',')
    ])

    words = []

    for filename in filenames:
        with open(filename, 'r') as f:
            for line in f:
                line = line.strip().replace('--', '-- ')
                words.extend(line.split())
                if line == '' and words[-1] is not PARAGRAPH_BREAK:
                    words.append(PARAGRAPH_BREAK)

    BASE_CLAUSE_ENDERS = ['.', '!', '?', ';', ':', ',', '--']
    CLAUSE_ENDERS = tuple(
        BASE_CLAUSE_ENDERS +
        [c + '"' for c in BASE_CLAUSE_ENDERS] +
        [c + "'" for c in BASE_CLAUSE_ENDERS]
    )
    
    sentences = []  # actually clauses. :/
    sentence = []
    for word in words:
        if word is PARAGRAPH_BREAK:
            if sentence:
                sentences.append(sentence)
                sentence = []
            sentences.append(PARAGRAPH_BREAK)
            continue
        if options.remove_quotes:
            if word.startswith(('"', "'")):
                word = word[1:]
            if word.endswith(('"', "'")):
                word = word[:-1]
        sentence.append(word)
        if (word not in ('Mr.', 'Mrs.', 'Dr.') and
            word.endswith(CLAUSE_ENDERS)):
            sentences.append(sentence)
            sentence = []

    sentences.append(sentence)

    for sentence in sentences:
        if sentence is PARAGRAPH_BREAK:
            sys.stdout.write('\n\n')
            continue
        scores = {}  # frozenset of two (word, pos) tuples -> score
        for (pos1, word1) in enumerate(sentence):
            for (pos2, word2) in enumerate(sentence):
                clean_word1 = clean(word1)
                clean_word2 = clean(word2)
                if clean_word1 == clean_word2:
                    continue
                if len(clean_word1) <= 2 or len(clean_word2) <= 2:
                    continue
                if clean_word1 in disable_picking or clean_word2 in disable_picking:
                    continue

                if frozenset([clean_word1, clean_word2]) in disable_swapping:
                    continue

                (pre1, cons1, base1) = strip_initial_consonants(word1)
                (pre2, cons2, base2) = strip_initial_consonants(word2)
                if len(cons1) == 0 and len(cons2) == 0:
                    continue
                if cons1.upper() == cons2.upper():
                    continue

                new1 = pre1 + cons2 + base1
                new2 = pre2 + cons1 + base2

                pair = frozenset([(word1, new1, pos1), (word2, new2, pos2)])

                scores[pair] = calculate_schooner_spore(
                    cons1, word1, new1, pos1,
                    cons2, word2, new2, pos2,
                    dictionary_words_only=options.dictionary_words_only
                )

        if options.debug:
            s = []
            for pair, score in scores.iteritems():
                s.append((score, pair))
            print ' '.join(sentence)
            for (score, pair) in sorted(s, reverse=True):
                print score, pair
            print

        best_score = AWFUL_SCORE
        best_pair = None
        for pair, score in scores.iteritems():
            if score > best_score:
                best_score = score
                best_pair = pair

        if best_pair is None or best_score == AWFUL_SCORE:
            sys.stdout.write(sentencify(sentence))
        else:
            best_pair = list(best_pair)
            (word1, new1, pos1) = best_pair[0]
            (word2, new2, pos2) = best_pair[1]
            new1 = adjust_case(new1, word1)
            new2 = adjust_case(new2, word2)
            sentence[pos2] = new2
            sentence[pos1] = new1
            sys.stdout.write(sentencify(sentence))

Example 28

Project: cgstudiomap
Source File: miniterm.py
View license
    def writer(self):
        """\
        Loop and copy console->serial until EXITCHARCTER character is
        found. When MENUCHARACTER is found, interpret the next key
        locally.
        """
        menu_active = False
        try:
            while self.alive:
                try:
                    b = console.getkey()
                except KeyboardInterrupt:
                    b = serial.to_bytes([3])
                c = character(b)
                if menu_active:
                    if c == MENUCHARACTER or c == EXITCHARCTER: # Menu character again/exit char -> send itself
                        self.serial.write(b)                    # send character
                        if self.echo:
                            sys.stdout.write(c)
                    elif c == '\x15':                       # CTRL+U -> upload file
                        sys.stderr.write('\n--- File to upload: ')
                        sys.stderr.flush()
                        console.cleanup()
                        filename = sys.stdin.readline().rstrip('\r\n')
                        if filename:
                            try:
                                file = open(filename, 'r')
                                sys.stderr.write('--- Sending file %s ---\n' % filename)
                                while True:
                                    line = file.readline().rstrip('\r\n')
                                    if not line:
                                        break
                                    self.serial.write(line)
                                    self.serial.write('\r\n')
                                    # Wait for output buffer to drain.
                                    self.serial.flush()
                                    sys.stderr.write('.')   # Progress indicator.
                                sys.stderr.write('\n--- File %s sent ---\n' % filename)
                            except IOError, e:
                                sys.stderr.write('--- ERROR opening file %s: %s ---\n' % (filename, e))
                        console.setup()
                    elif c in '\x08hH?':                    # CTRL+H, h, H, ? -> Show help
                        sys.stderr.write(get_help_text())
                    elif c == '\x12':                       # CTRL+R -> Toggle RTS
                        self.rts_state = not self.rts_state
                        self.serial.setRTS(self.rts_state)
                        sys.stderr.write('--- RTS %s ---\n' % (self.rts_state and 'active' or 'inactive'))
                    elif c == '\x04':                       # CTRL+D -> Toggle DTR
                        self.dtr_state = not self.dtr_state
                        self.serial.setDTR(self.dtr_state)
                        sys.stderr.write('--- DTR %s ---\n' % (self.dtr_state and 'active' or 'inactive'))
                    elif c == '\x02':                       # CTRL+B -> toggle BREAK condition
                        self.break_state = not self.break_state
                        self.serial.setBreak(self.break_state)
                        sys.stderr.write('--- BREAK %s ---\n' % (self.break_state and 'active' or 'inactive'))
                    elif c == '\x05':                       # CTRL+E -> toggle local echo
                        self.echo = not self.echo
                        sys.stderr.write('--- local echo %s ---\n' % (self.echo and 'active' or 'inactive'))
                    elif c == '\x09':                       # CTRL+I -> info
                        self.dump_port_settings()
                    elif c == '\x01':                       # CTRL+A -> cycle escape mode
                        self.repr_mode += 1
                        if self.repr_mode > 3:
                            self.repr_mode = 0
                        sys.stderr.write('--- escape data: %s ---\n' % (
                            REPR_MODES[self.repr_mode],
                        ))
                    elif c == '\x0c':                       # CTRL+L -> cycle linefeed mode
                        self.convert_outgoing += 1
                        if self.convert_outgoing > 2:
                            self.convert_outgoing = 0
                        self.newline = NEWLINE_CONVERISON_MAP[self.convert_outgoing]
                        sys.stderr.write('--- line feed %s ---\n' % (
                            LF_MODES[self.convert_outgoing],
                        ))
                    elif c in 'pP':                         # P -> change port
                        dump_port_list()
                        sys.stderr.write('--- Enter port name: ')
                        sys.stderr.flush()
                        console.cleanup()
                        try:
                            port = sys.stdin.readline().strip()
                        except KeyboardInterrupt:
                            port = None
                        console.setup()
                        if port and port != self.serial.port:
                            # reader thread needs to be shut down
                            self._stop_reader()
                            # save settings
                            settings = self.serial.getSettingsDict()
                            try:
                                try:
                                    new_serial = serial.serial_for_url(port, do_not_open=True)
                                except AttributeError:
                                    # happens when the installed pyserial is older than 2.5. use the
                                    # Serial class directly then.
                                    new_serial = serial.Serial()
                                    new_serial.port = port
                                # restore settings and open
                                new_serial.applySettingsDict(settings)
                                new_serial.open()
                                new_serial.setRTS(self.rts_state)
                                new_serial.setDTR(self.dtr_state)
                                new_serial.setBreak(self.break_state)
                            except Exception, e:
                                sys.stderr.write('--- ERROR opening new port: %s ---\n' % (e,))
                                new_serial.close()
                            else:
                                self.serial.close()
                                self.serial = new_serial
                                sys.stderr.write('--- Port changed to: %s ---\n' % (self.serial.port,))
                            # and restart the reader thread
                            self._start_reader()
                    elif c in 'bB':                         # B -> change baudrate
                        sys.stderr.write('\n--- Baudrate: ')
                        sys.stderr.flush()
                        console.cleanup()
                        backup = self.serial.baudrate
                        try:
                            self.serial.baudrate = int(sys.stdin.readline().strip())
                        except ValueError, e:
                            sys.stderr.write('--- ERROR setting baudrate: %s ---\n' % (e,))
                            self.serial.baudrate = backup
                        else:
                            self.dump_port_settings()
                        console.setup()
                    elif c == '8':                          # 8 -> change to 8 bits
                        self.serial.bytesize = serial.EIGHTBITS
                        self.dump_port_settings()
                    elif c == '7':                          # 7 -> change to 8 bits
                        self.serial.bytesize = serial.SEVENBITS
                        self.dump_port_settings()
                    elif c in 'eE':                         # E -> change to even parity
                        self.serial.parity = serial.PARITY_EVEN
                        self.dump_port_settings()
                    elif c in 'oO':                         # O -> change to odd parity
                        self.serial.parity = serial.PARITY_ODD
                        self.dump_port_settings()
                    elif c in 'mM':                         # M -> change to mark parity
                        self.serial.parity = serial.PARITY_MARK
                        self.dump_port_settings()
                    elif c in 'sS':                         # S -> change to space parity
                        self.serial.parity = serial.PARITY_SPACE
                        self.dump_port_settings()
                    elif c in 'nN':                         # N -> change to no parity
                        self.serial.parity = serial.PARITY_NONE
                        self.dump_port_settings()
                    elif c == '1':                          # 1 -> change to 1 stop bits
                        self.serial.stopbits = serial.STOPBITS_ONE
                        self.dump_port_settings()
                    elif c == '2':                          # 2 -> change to 2 stop bits
                        self.serial.stopbits = serial.STOPBITS_TWO
                        self.dump_port_settings()
                    elif c == '3':                          # 3 -> change to 1.5 stop bits
                        self.serial.stopbits = serial.STOPBITS_ONE_POINT_FIVE
                        self.dump_port_settings()
                    elif c in 'xX':                         # X -> change software flow control
                        self.serial.xonxoff = (c == 'X')
                        self.dump_port_settings()
                    elif c in 'rR':                         # R -> change hardware flow control
                        self.serial.rtscts = (c == 'R')
                        self.dump_port_settings()
                    else:
                        sys.stderr.write('--- unknown menu character %s --\n' % key_description(c))
                    menu_active = False
                elif c == MENUCHARACTER: # next char will be for menu
                    menu_active = True
                elif c == EXITCHARCTER: 
                    self.stop()
                    break                                   # exit app
                elif c == '\n':
                    self.serial.write(self.newline)         # send newline character(s)
                    if self.echo:
                        sys.stdout.write(c)                 # local echo is a real newline in any case
                        sys.stdout.flush()
                else:
                    self.serial.write(b)                    # send byte
                    if self.echo:
                        sys.stdout.write(c)
                        sys.stdout.flush()
        except:
            self.alive = False
            raise

Example 29

Project: cgstudiomap
Source File: miniterm.py
View license
    def writer(self):
        """\
        Loop and copy console->serial until EXITCHARCTER character is
        found. When MENUCHARACTER is found, interpret the next key
        locally.
        """
        menu_active = False
        try:
            while self.alive:
                try:
                    b = console.getkey()
                except KeyboardInterrupt:
                    b = serial.to_bytes([3])
                c = character(b)
                if menu_active:
                    if c == MENUCHARACTER or c == EXITCHARCTER: # Menu character again/exit char -> send itself
                        self.serial.write(b)                    # send character
                        if self.echo:
                            sys.stdout.write(c)
                    elif c == '\x15':                       # CTRL+U -> upload file
                        sys.stderr.write('\n--- File to upload: ')
                        sys.stderr.flush()
                        console.cleanup()
                        filename = sys.stdin.readline().rstrip('\r\n')
                        if filename:
                            try:
                                file = open(filename, 'r')
                                sys.stderr.write('--- Sending file %s ---\n' % filename)
                                while True:
                                    line = file.readline().rstrip('\r\n')
                                    if not line:
                                        break
                                    self.serial.write(line)
                                    self.serial.write('\r\n')
                                    # Wait for output buffer to drain.
                                    self.serial.flush()
                                    sys.stderr.write('.')   # Progress indicator.
                                sys.stderr.write('\n--- File %s sent ---\n' % filename)
                            except IOError, e:
                                sys.stderr.write('--- ERROR opening file %s: %s ---\n' % (filename, e))
                        console.setup()
                    elif c in '\x08hH?':                    # CTRL+H, h, H, ? -> Show help
                        sys.stderr.write(get_help_text())
                    elif c == '\x12':                       # CTRL+R -> Toggle RTS
                        self.rts_state = not self.rts_state
                        self.serial.setRTS(self.rts_state)
                        sys.stderr.write('--- RTS %s ---\n' % (self.rts_state and 'active' or 'inactive'))
                    elif c == '\x04':                       # CTRL+D -> Toggle DTR
                        self.dtr_state = not self.dtr_state
                        self.serial.setDTR(self.dtr_state)
                        sys.stderr.write('--- DTR %s ---\n' % (self.dtr_state and 'active' or 'inactive'))
                    elif c == '\x02':                       # CTRL+B -> toggle BREAK condition
                        self.break_state = not self.break_state
                        self.serial.setBreak(self.break_state)
                        sys.stderr.write('--- BREAK %s ---\n' % (self.break_state and 'active' or 'inactive'))
                    elif c == '\x05':                       # CTRL+E -> toggle local echo
                        self.echo = not self.echo
                        sys.stderr.write('--- local echo %s ---\n' % (self.echo and 'active' or 'inactive'))
                    elif c == '\x09':                       # CTRL+I -> info
                        self.dump_port_settings()
                    elif c == '\x01':                       # CTRL+A -> cycle escape mode
                        self.repr_mode += 1
                        if self.repr_mode > 3:
                            self.repr_mode = 0
                        sys.stderr.write('--- escape data: %s ---\n' % (
                            REPR_MODES[self.repr_mode],
                        ))
                    elif c == '\x0c':                       # CTRL+L -> cycle linefeed mode
                        self.convert_outgoing += 1
                        if self.convert_outgoing > 2:
                            self.convert_outgoing = 0
                        self.newline = NEWLINE_CONVERISON_MAP[self.convert_outgoing]
                        sys.stderr.write('--- line feed %s ---\n' % (
                            LF_MODES[self.convert_outgoing],
                        ))
                    elif c in 'pP':                         # P -> change port
                        dump_port_list()
                        sys.stderr.write('--- Enter port name: ')
                        sys.stderr.flush()
                        console.cleanup()
                        try:
                            port = sys.stdin.readline().strip()
                        except KeyboardInterrupt:
                            port = None
                        console.setup()
                        if port and port != self.serial.port:
                            # reader thread needs to be shut down
                            self._stop_reader()
                            # save settings
                            settings = self.serial.getSettingsDict()
                            try:
                                try:
                                    new_serial = serial.serial_for_url(port, do_not_open=True)
                                except AttributeError:
                                    # happens when the installed pyserial is older than 2.5. use the
                                    # Serial class directly then.
                                    new_serial = serial.Serial()
                                    new_serial.port = port
                                # restore settings and open
                                new_serial.applySettingsDict(settings)
                                new_serial.open()
                                new_serial.setRTS(self.rts_state)
                                new_serial.setDTR(self.dtr_state)
                                new_serial.setBreak(self.break_state)
                            except Exception, e:
                                sys.stderr.write('--- ERROR opening new port: %s ---\n' % (e,))
                                new_serial.close()
                            else:
                                self.serial.close()
                                self.serial = new_serial
                                sys.stderr.write('--- Port changed to: %s ---\n' % (self.serial.port,))
                            # and restart the reader thread
                            self._start_reader()
                    elif c in 'bB':                         # B -> change baudrate
                        sys.stderr.write('\n--- Baudrate: ')
                        sys.stderr.flush()
                        console.cleanup()
                        backup = self.serial.baudrate
                        try:
                            self.serial.baudrate = int(sys.stdin.readline().strip())
                        except ValueError, e:
                            sys.stderr.write('--- ERROR setting baudrate: %s ---\n' % (e,))
                            self.serial.baudrate = backup
                        else:
                            self.dump_port_settings()
                        console.setup()
                    elif c == '8':                          # 8 -> change to 8 bits
                        self.serial.bytesize = serial.EIGHTBITS
                        self.dump_port_settings()
                    elif c == '7':                          # 7 -> change to 8 bits
                        self.serial.bytesize = serial.SEVENBITS
                        self.dump_port_settings()
                    elif c in 'eE':                         # E -> change to even parity
                        self.serial.parity = serial.PARITY_EVEN
                        self.dump_port_settings()
                    elif c in 'oO':                         # O -> change to odd parity
                        self.serial.parity = serial.PARITY_ODD
                        self.dump_port_settings()
                    elif c in 'mM':                         # M -> change to mark parity
                        self.serial.parity = serial.PARITY_MARK
                        self.dump_port_settings()
                    elif c in 'sS':                         # S -> change to space parity
                        self.serial.parity = serial.PARITY_SPACE
                        self.dump_port_settings()
                    elif c in 'nN':                         # N -> change to no parity
                        self.serial.parity = serial.PARITY_NONE
                        self.dump_port_settings()
                    elif c == '1':                          # 1 -> change to 1 stop bits
                        self.serial.stopbits = serial.STOPBITS_ONE
                        self.dump_port_settings()
                    elif c == '2':                          # 2 -> change to 2 stop bits
                        self.serial.stopbits = serial.STOPBITS_TWO
                        self.dump_port_settings()
                    elif c == '3':                          # 3 -> change to 1.5 stop bits
                        self.serial.stopbits = serial.STOPBITS_ONE_POINT_FIVE
                        self.dump_port_settings()
                    elif c in 'xX':                         # X -> change software flow control
                        self.serial.xonxoff = (c == 'X')
                        self.dump_port_settings()
                    elif c in 'rR':                         # R -> change hardware flow control
                        self.serial.rtscts = (c == 'R')
                        self.dump_port_settings()
                    else:
                        sys.stderr.write('--- unknown menu character %s --\n' % key_description(c))
                    menu_active = False
                elif c == MENUCHARACTER: # next char will be for menu
                    menu_active = True
                elif c == EXITCHARCTER: 
                    self.stop()
                    break                                   # exit app
                elif c == '\n':
                    self.serial.write(self.newline)         # send newline character(s)
                    if self.echo:
                        sys.stdout.write(c)                 # local echo is a real newline in any case
                        sys.stdout.flush()
                else:
                    self.serial.write(b)                    # send byte
                    if self.echo:
                        sys.stdout.write(c)
                        sys.stdout.flush()
        except:
            self.alive = False
            raise

Example 30

Project: ibis
Source File: ipython_directive.py
View license
    def process_input(self, data, input_prompt, lineno):
        """
        Process data block for INPUT token.

        """
        decorator, input, rest = data
        image_file = None
        image_directive = None

        is_verbatim = decorator=='@verbatim' or self.is_verbatim
        is_doctest = (decorator is not None and \
                     decorator.startswith('@doctest')) or self.is_doctest
        is_suppress = decorator=='@suppress' or self.is_suppress
        is_okexcept = decorator=='@okexcept' or self.is_okexcept
        is_okwarning = decorator=='@okwarning' or self.is_okwarning
        is_savefig = decorator is not None and \
                     decorator.startswith('@savefig')

        # set the encodings to be used by DecodingStringIO
        # to convert the execution output into unicode if
        # needed. this attrib is set by IpythonDirective.run()
        # based on the specified block options, defaulting to ['ut
        self.cout.set_encodings(self.output_encoding)

        input_lines = input.split('\n')

        if len(input_lines) > 1:
           if input_lines[-1] != "":
               input_lines.append('') # make sure there's a blank line
                                       # so splitter buffer gets reset

        continuation = '   %s:'%''.join(['.']*(len(str(lineno))+2))

        if is_savefig:
            image_file, image_directive = self.process_image(decorator)

        ret = []
        is_semicolon = False

        # Hold the execution count, if requested to do so.
        if is_suppress and self.hold_count:
            store_history = False
        else:
            store_history = True

        # Note: catch_warnings is not thread safe
        with warnings.catch_warnings(record=True) as ws:
            for i, line in enumerate(input_lines):
                if line.endswith(';'):
                    is_semicolon = True

                if i == 0:
                    # process the first input line
                    if is_verbatim:
                        self.process_input_line('')
                        self.IP.execution_count += 1 # increment it anyway
                    else:
                        # only submit the line in non-verbatim mode
                        self.process_input_line(line, store_history=store_history)
                    formatted_line = '%s %s'%(input_prompt, line)
                else:
                    # process a continuation line
                    if not is_verbatim:
                        self.process_input_line(line, store_history=store_history)

                    formatted_line = '%s %s'%(continuation, line)

                if not is_suppress:
                    ret.append(formatted_line)

        if not is_suppress and len(rest.strip()) and is_verbatim:
            # the "rest" is the standard output of the
            # input, which needs to be added in
            # verbatim mode
            ret.append(rest)

        self.cout.seek(0)
        output = self.cout.read()
        if not is_suppress and not is_semicolon:
            ret.append(output)
        elif is_semicolon: # get spacing right
            ret.append('')

        # context information
        filename = self.state.document.current_source
        lineno = self.state.document.current_line

        # output any exceptions raised during execution to stdout
        # unless :okexcept: has been specified.
        if not is_okexcept and "Traceback" in output:
            s =  "\nException in %s at block ending on line %s\n" % (filename, lineno)
            s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
            sys.stdout.write('\n\n>>>' + ('-' * 73))
            sys.stdout.write(s)
            sys.stdout.write(output)
            sys.stdout.write('<<<' + ('-' * 73) + '\n\n')

        # output any warning raised during execution to stdout
        # unless :okwarning: has been specified.
        if not is_okwarning:
            for w in ws:
                s =  "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
                s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
                sys.stdout.write('\n\n>>>' + ('-' * 73))
                sys.stdout.write(s)
                sys.stdout.write('-' * 76 + '\n')
                s=warnings.formatwarning(w.message, w.category,
                                         w.filename, w.lineno, w.line)
                sys.stdout.write(s)
                sys.stdout.write('<<<' + ('-' * 73) + '\n')

        self.cout.truncate(0)
        return (ret, input_lines, output, is_doctest, decorator, image_file,
                    image_directive)

Example 31

Project: elijah-openstack
Source File: cloudlet_client.py
View license
def request_import_basevm(server_address, token, 
                          endpoint, glance_endpoint,
                          import_filepath, basevm_name):
    def _create_param(filepath, image_name, image_type, disk_size, mem_size):
        properties = {
            "image_type": "snapshot",
            "image_location": "snapshot",
            CLOUDLET_TYPE.PROPERTY_KEY_CLOUDLET: "True",
            CLOUDLET_TYPE.PROPERTY_KEY_CLOUDLET_TYPE: image_type,
            CLOUDLET_TYPE.PROPERTY_KEY_BASE_UUID: base_hashvalue,
            }
        param = {
            "name": "%s" % image_name,
            "data": open(filepath, "rb"),
            "size": os.path.getsize(filepath),
            "is_public": True,
            "disk_format": "raw",
            "container_format": "bare",
            "min_disk": disk_size,
            "min_ram": mem_size,
            "properties": properties,
            }
        return param
    (base_hashvalue, disk_name, memory_name, diskhash_name, memoryhash_name) = \
        PackagingUtil._get_basevm_attribute(import_filepath)

    # check duplicated base VM
    image_list = get_list(server_address, token, endpoint, "images")
    for image in image_list:
        properties = image.get("metadata", None)
        if properties is None or len(properties) == 0:
            continue
        if properties.get(CLOUDLET_TYPE.PROPERTY_KEY_CLOUDLET_TYPE) != \
                CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK:
            continue
        base_sha256_uuid = properties.get(CLOUDLET_TYPE.PROPERTY_KEY_BASE_UUID)
        if base_sha256_uuid == base_hashvalue:
            msg = "Duplicated base VM is already exists on the system\n"
            msg += "Image UUID of duplicated Base VM: %s\n" % image['id']
            raise CloudletClientError(msg)

    # decompress files
    temp_dir = mkdtemp(prefix="cloudlet-base-")
    sys.stdout.write(
        "Decompressing zipfile(%s) to temp dir(%s)\n" %
        (import_filepath, temp_dir))
    zipbase = zipfile.ZipFile(
        _FileFile("file:///%s" % os.path.abspath(import_filepath)), 'r')
    zipbase.extractall(temp_dir)
    disk_path = os.path.join(temp_dir, disk_name)
    memory_path = os.path.join(temp_dir, memory_name)
    diskhash_path = os.path.join(temp_dir, diskhash_name)
    memoryhash_path = os.path.join(temp_dir, memoryhash_name)

    # create new flavor if nothing matches
    memory_header = elijah_memory_util._QemuMemoryHeader(open(memory_path))
    libvirt_xml_str = memory_header.xml
    cpu_count, memory_size_mb = get_resource_size(libvirt_xml_str)
    disk_gb = int(math.ceil(os.path.getsize(disk_path)/1024/1024/1024))
    flavor_list = get_list(server_address, token, endpoint, "flavors")
    flavor_ref, flavor_id = find_matching_flavor(flavor_list, cpu_count,
                                                 memory_size_mb, disk_gb)
    if flavor_id == None:
       flavor_name = "cloudlet-flavor-%s" % basevm_name
       flavor_ref, flavor_id = create_flavor(server_address,
                                             token,
                                             endpoint,
                                             cpu_count,
                                             memory_size_mb,
                                             disk_gb,
                                             flavor_name)
       sys.stdout.write("Create new flavor for the base VM\n")

    # upload Base VM
    disk_param = _create_param(disk_path, basevm_name + "-disk",
                               CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK,
                               disk_gb, memory_size_mb)
    memory_param = _create_param(memory_path, basevm_name + "-memory",
                                 CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM,
                                 disk_gb, memory_size_mb)
    diskhash_param = _create_param(diskhash_path, basevm_name + "-diskhash",
                                   CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK_HASH,
                                   disk_gb, memory_size_mb)
    memoryhash_param = _create_param(memoryhash_path, basevm_name + "-memhash",
                                     CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM_HASH,
                                     disk_gb, memory_size_mb)
    url = "://".join((glance_endpoint.scheme, glance_endpoint.netloc))
    gclient = glance_client.Client('1', url, token=token, insecure=True)
    sys.stdout.write("upload base memory to glance\n")
    glance_memory = gclient.images.create(**memory_param)
    sys.stdout.write("upload base disk hash to glance\n")
    glance_diskhash = gclient.images.create(**diskhash_param)
    sys.stdout.write("upload base memory hash to glance\n")
    glance_memoryhash = gclient.images.create(**memoryhash_param)

    # upload Base disk at the last to have references for other image files
    glance_ref = {
        CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM: glance_memory.id,
        CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK_HASH: glance_diskhash.id,
        CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM_HASH: glance_memoryhash.id,
        CLOUDLET_TYPE.PROPERTY_KEY_BASE_RESOURCE:
        libvirt_xml_str.replace("\n", "")  # API cannot send '\n'
        }
    disk_param['properties'].update(glance_ref)
    sys.stdout.write("upload base disk to glance\n")
    glance_disk = gclient.images.create(**disk_param)

    # delete temp dir
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir)

    return glance_disk

Example 32

Project: ptsa
Source File: timeseries.py
View license
    def resampled(self, resampled_rate, window=None,
                  loop_axis=None, num_mp_procs=0, pad_to_pow2=False):
        """
        Resample the data and reset all the time ranges.

        Uses the resample function from scipy.  This method seems to
        be more accurate than the decimate method.

        Parameters
        ----------
        resampled_rate : {float}
            New sample rate to resample to.
        window : {None,str,float,tuple}, optional
            See scipy.signal.resample for details
        loop_axis: {None,str,int}, optional
            Sometimes it might be faster to loop over an axis.
        num_mp_procs: int, optional
            Whether to try and use multiprocessing to loop over axis.
            0 means no multiprocessing
            >0 specifies num procs to use
            None means yes, and use all possible procs
        pad_to_pow2: bool, optional
            Pad along the time dimension to the next power of 2 so
            that the resampling is much faster (experimental).

        Returns
        -------
        ts : {TimeSeries}
            A TimeSeries instance with the resampled data.

        See Also
        --------
        scipy.signal.resample
        """
        # resample the data, getting new time range
        time_range = self[self.tdim]
        new_length = int(np.round(len(time_range)*resampled_rate/self.samplerate))

        if pad_to_pow2:
            padded_length = 2**next_pow2(len(time_range))
            padded_new_length = int(np.round(padded_length*resampled_rate/self.samplerate))
            time_range = np.hstack([time_range, 
                                    (np.arange(1,padded_length-len(time_range)+1)*np.diff(time_range[-2:]))+time_range[-1]])

        if loop_axis is None:
            # just do standard method on all data at once
            if pad_to_pow2:
                newdat,new_time_range = resample(pad_to_next_pow2(np.asarray(self),axis=self.taxis), 
                                                 padded_new_length, t=time_range,
                                                 axis=self.taxis, window=window)
            else:
                newdat,new_time_range = resample(np.asarray(self),
                                                 new_length, t=time_range,
                                                 axis=self.taxis, window=window)

        else:
            # loop over specified axis
            # get the loop axis name and length
            loop_dim = self.get_dim_name(loop_axis)
            loop_dim_len = len(self[loop_dim])
            # specify empty boolean index
            ind = np.zeros(loop_dim_len,dtype=np.bool)
            newdat = []
            if has_mp and num_mp_procs != 0:
                po = mp.Pool(num_mp_procs)

            for i in range(loop_dim_len):
                ind[i] = True
                dat = self.select(**{loop_dim:ind})
                taxis = dat.taxis
                if has_mp and num_mp_procs != 0:
                    # start async proc
                    if pad_to_pow2:
                        dat = pad_to_next_pow2(np.asarray(dat), axis=dat.taxis)
                        newdat.append(po.apply_async(resample,
                                                     (np.asarray(dat), padded_new_length, time_range,
                                                      taxis, window)))
                    else:
                        newdat.append(po.apply_async(resample,
                                                     (np.asarray(dat), new_length, time_range,
                                                      taxis, window)))
                else:
                    # just call on that dataset
                    sys.stdout.write('%d '%i)
                    sys.stdout.flush()
                    if pad_to_pow2:
                        dat = pad_to_next_pow2(np.asarray(dat), axis=dat.taxis)
                        ndat,new_time_range = resample(np.asarray(dat), padded_new_length, t=time_range,
                                                       axis=taxis, window=window)
                    else:
                        ndat,new_time_range = resample(np.asarray(dat), new_length, t=time_range,
                                                       axis=taxis, window=window)
                    newdat.append(ndat)
                ind[i] = False
            if has_mp and num_mp_procs != 0:
                # aggregate mp results
                po.close()
                #po.join()
                out = []
                for i in range(len(newdat)):
                    sys.stdout.write('%d '%i)
                    sys.stdout.flush()
                    out.append(newdat[i].get())
                #out = [newdat[i].get() for i in range(len(newdat))]
                newdat = [out[i][0] for i in range(len(out))]
                new_time_range = out[i][1]

            # concatenate the new data
            newdat = np.concatenate(newdat,axis=self.get_axis(loop_axis))

            sys.stdout.write('\n')
            sys.stdout.flush()

        # remove pad if we padded it
        if pad_to_pow2:
            newdat = newdat.take(range(new_length),axis=self.taxis)
            new_time_range = new_time_range[:new_length]

        # set the time dimension
        newdims = self.dims.copy()
        attrs = self.dims[self.taxis]._attrs.copy()
        for k in self.dims[self.taxis]._required_attrs.keys():
            attrs.pop(k,None)
        newdims[self.taxis] = Dim(new_time_range,
                                  self.dims[self.taxis].name,
                                  **attrs)

        attrs = self._attrs.copy()
        for k in self._required_attrs.keys():
            attrs.pop(k,None)
        return TimeSeries(newdat, self.tdim, resampled_rate,
                          dims=newdims, **attrs)

Example 33

Project: ptsa
Source File: lmer.py
View license
    def __init__(self, fe_formula, re_formula,
                 re_group, dep_data, ind_data, 
                 factors=None, row_mask=None,
                 use_ranks=False, use_norm=True,
                 memmap=False, memmap_dir=None,
                 resid_formula=None,
                 null_formula=None, num_null_boot=0,
                 svd_terms=None, use_ssvd=False,
                 #nperms=500, nboot=100, 
                 n_jobs=1, verbose=10,
                 lmer_opts=None):
        """
        """
        if verbose>0:
            sys.stdout.write('Initializing...')
            sys.stdout.flush()
            start_time = time.time()

        # save the formula
        self._formula_str = fe_formula + ' + ' + re_formula

        # see if there's a resid formula
        if resid_formula:
            # the random effects are the same
            self._resid_formula_str = resid_formula + ' + ' + re_formula
        else:
            self._resid_formula_str = None

        # see if there's a null formula
        if null_formula:
            # the random effects are the same
            self._null_formula_str = null_formula + ' + ' + re_formula
        else:
            self._null_formula_str = None
        self._num_null_boot = num_null_boot

        # save whether using ranks
        self._use_ranks = use_ranks

        # see whether to use sparse svd
        self._use_ssvd = use_ssvd

        # see if memmapping
        self._memmap = memmap

        # save job info
        self._n_jobs = n_jobs
        self._verbose = verbose

        # eventually fill the feature shape
        self._feat_shape = None

        # fill A,M,O,D
        self._A = {}
        self._M = {}
        self._O = {}
        self._D = {}
        O = []

        # loop over unique grouping var
        self._re_group = re_group
        if isinstance(ind_data, dict):
            # groups are the keys
            self._groups = np.array(ind_data.keys())
        else:
            # groups need to be extracted from the recarray
            self._groups = np.unique(ind_data[re_group])
        for g in self._groups:
            # get that subj inds
            if isinstance(ind_data,dict):
                # the index is just the group into that dict
                ind_ind = g
            else:
                # select the rows based on the group
                ind_ind = ind_data[re_group]==g

            # process the row mask
            if row_mask is None:
                # no mask, so all good
                row_ind = np.ones(len(ind_data[ind_ind]),dtype=np.bool)
            elif isinstance(row_mask, dict):
                # pull the row_mask from the dict
                row_ind = row_mask[g]
            else:
                # index into it with ind_ind
                row_ind = row_mask[ind_ind]
            
            # extract that group's A,M,O
            # first save the observations (rows of A)
            self._O[g] = ind_data[ind_ind][row_ind]
            if use_ranks:
                # loop over non-factors and rank them
                for n in self._O[g].dtype.names:
                    if (n in factors) or isinstance(self._O[g][n][0],str):
                        continue
                    self._O[g][n] = rankdata(self._O[g][n])
            O.append(self._O[g])

            # eventually allow for dict of data files for dep_data
            if isinstance(dep_data,dict):
                # the index is just the group into that dict
                dep_ind = g
            else:
                # select the rows based on the group
                dep_ind = ind_ind

            # save feature shape if necessary
            if self._feat_shape is None:
                self._feat_shape = dep_data[dep_ind].shape[1:]

            # Save D index into data
            self._D[g] = dep_data[dep_ind][row_ind]
            # reshape it
            self._D[g] = self._D[g].reshape((self._D[g].shape[0],-1))
            if use_ranks:
                if verbose>0:
                    sys.stdout.write('Ranking %s...'%(str(g)))
                    sys.stdout.flush()

                for i in xrange(self._D[g].shape[1]):
                    self._D[g][:,i] = rankdata(self._D[g][:,i])

            # reshape M, so we don't have to do it repeatedly
            self._M[g] = self._D[g].copy() #dep_data[ind].reshape((dep_data[ind].shape[0],-1))
                
            # normalize M
            if use_norm:
                self._M[g] -= self._M[g].mean(0)
                self._M[g] /= np.sqrt((self._M[g]**2).sum(0))

            # determine A from the model.matrix
            rdf = DataFrame({k:(FactorVector(self._O[g][k]) 
                                if k in factors else self._O[g][k]) 
                             for k in self._O[g].dtype.names})
            
            # model spec as data frame
            ms = r['data.frame'](r_model_matrix(Formula(fe_formula), data=rdf))

            cols = list(r['names'](ms))
            if svd_terms is None:
                self._svd_terms = [c for c in cols if not 'Intercept' in c]
            else:
                self._svd_terms = svd_terms
            self._A[g] = np.concatenate([np.array(ms.rx(c)) 
                                         for c in self._svd_terms]).T
                                         #for c in cols if not 'Intercept' in c]).T

            if use_ranks:
                for i in xrange(self._A[g].shape[1]):
                    self._A[g][:,i] = rankdata(self._A[g][:,i])

            # normalize A
            if True: #use_norm:
                self._A[g] -= self._A[g].mean(0)
                self._A[g] /= np.sqrt((self._A[g]**2).sum(0))

            # memmap if desired
            if self._memmap:
                self._M[g] = _memmap_array(self._M[g], memmap_dir)
                self._D[g] = _memmap_array(self._D[g], memmap_dir)

        # concat the Os together and make an LMER instance
        #O = np.concatenate(O)
        #self._O = np.vstack(O)
        #self._O = np.array(O)
        self._O = O
        if lmer_opts is None:
            lmer_opts = {}
        self._lmer_opts = lmer_opts
        self._factors = factors
        #self._lmer = LMER(self._formula_str, O, factors=factors, **lmer_opts)

        # prepare for the perms and boots
        self._perms = []
        self._boots = []
        self._tp = []
        self._tb = []

        if verbose>0:
            sys.stdout.write('Done (%.2g sec)\n'%(time.time()-start_time))
            sys.stdout.write('Processing actual data...')
            sys.stdout.flush()
            start_time = time.time()

        global _global_meld
        _global_meld[id(self)] = self

        # run for actual data (returns both perm and boot vals)
        self._R = None
        self._ss = None
        self._mer = None
        self._mer_null = None
        tp,tb,R,feat_mask,ss,mer,mer_null = _eval_model(id(self),None, None)
        self._R = R
        self._tp.append(tp)
        self._tb.append(tb)
        self._feat_mask = feat_mask
        self._ss = ss
        self._mer = mer
        self._mer_null = mer_null

        if verbose>0:
            sys.stdout.write('Done (%.2g sec)\n'%(time.time()-start_time))
            sys.stdout.flush()

Example 34

Project: ptsa
Source File: meld.py
View license
    def __init__(self, fe_formula, re_formula,
                 re_group, dep_data, ind_data,
                 factors=None, row_mask=None,
                 dep_mask=None,
                 use_ranks=False, use_norm=True,
                 memmap=False, memmap_dir=None,
                 resid_formula=None,
                 svd_terms=None, feat_thresh=0.05,
                 feat_nboot=1000, do_tfce=False,
                 connectivity=None, shape=None,
                 dt=.01, E=2/3., H=2.0,
                 n_jobs=1, verbose=10,
                 lmer_opts=None):
        """

        dep_data can be an array or a dict of arrays (possibly
        memmapped), one for each group.

        ind_data can be a rec_array for each group or one large rec_array
        with a grouping variable.

        """
        if verbose>0:
            sys.stdout.write('Initializing...')
            sys.stdout.flush()
            start_time = time.time()

        # save the formula
        self._formula_str = fe_formula + ' + ' + re_formula

        # see if there's a resid formula
        if resid_formula:
            # the random effects are the same
            self._resid_formula_str = resid_formula + ' + ' + re_formula
        else:
            self._resid_formula_str = None

        # save whether using ranks
        self._use_ranks = use_ranks

        # see the thresh for keeping a feature
        self._feat_thresh = feat_thresh
        self._feat_nboot = feat_nboot
        self._do_tfce = do_tfce
        self._connectivity = connectivity
        self._dt = dt
        self._E = E
        self._H = H

        # see if memmapping
        self._memmap = memmap

        # save job info
        self._n_jobs = n_jobs
        self._verbose = verbose

        # eventually fill the feature shape
        self._feat_shape = None

        # handle the dep_mask
        self._dep_mask = dep_mask

        # fill A,M,O,D
        self._A = {}
        self._M = {}
        self._O = {}
        self._D = {}
        O = []

        # loop over unique grouping var
        self._re_group = re_group
        if isinstance(ind_data, dict):
            # groups are the keys
            self._groups = np.array(ind_data.keys())
        else:
            # groups need to be extracted from the recarray
            self._groups = np.unique(ind_data[re_group])
        for g in self._groups:
            # get that subj inds
            if isinstance(ind_data, dict):
                # the index is just the group into that dict
                ind_ind = g
            else:
                # select the rows based on the group
                ind_ind = ind_data[re_group] == g

            # process the row mask
            if row_mask is None:
                # no mask, so all good
                row_ind = np.ones(len(ind_data[ind_ind]), dtype=np.bool)
            elif isinstance(row_mask, dict):
                # pull the row_mask from the dict
                row_ind = row_mask[g]
            else:
                # index into it with ind_ind
                row_ind = row_mask[ind_ind]

            # extract that group's A,M,O
            # first save the observations (rows of A)
            self._O[g] = ind_data[ind_ind][row_ind]
            if use_ranks:
                # loop over non-factors and rank them
                for n in self._O[g].dtype.names:
                    if (n in factors) or isinstance(self._O[g][n][0], str):
                        continue
                    self._O[g][n] = rankdata(self._O[g][n])
            O.append(self._O[g])

            # eventually allow for dict of data files for dep_data
            if isinstance(dep_data, dict):
                # the index is just the group into that dict
                dep_ind = g
            else:
                # select the rows based on the group
                dep_ind = ind_ind

            # save feature shape if necessary
            if self._feat_shape is None:
                self._feat_shape = dep_data[dep_ind].shape[1:]

            # handle the mask
            if self._dep_mask is None:
                self._dep_mask = np.ones(self._feat_shape,
                                         dtype=np.bool)

            # create the connectivity (will mask later)
            if self._do_tfce and self._connectivity is None and \
               (len(self._dep_mask.flatten()) > self._dep_mask.sum()):
                # create the connectivity
                self._connectivity = cluster.sparse_dim_connectivity([cluster.simple_neighbors_1d(n)
                                                                      for n in self._feat_shape])

            # Save D index into data (apply row and feature masks
            # This will also reshape it
            self._D[g] = dep_data[dep_ind][row_ind][:, self._dep_mask].copy()

            # reshape it
            #self._D[g] = self._D[g].reshape((self._D[g].shape[0], -1))
            if use_ranks:
                if verbose > 0:
                    sys.stdout.write('Ranking %s...' % (str(g)))
                    sys.stdout.flush()

                for i in xrange(self._D[g].shape[1]):
                    # rank it
                    self._D[g][:, i] = rankdata(self._D[g][:, i])

                    # normalize it
                    self._D[g][:, i] = ((self._D[g][:, i] - 1) /
                                        (len(self._D[g][:, i]) - 1))

            # save M from D so we can have a normalized version
            self._M[g] = self._D[g].copy()

            # remove any NaN's in dep_data
            self._D[g][np.isnan(self._D[g])] = 0.0

            # normalize M
            if use_norm:
                self._M[g] -= self._M[g].mean(0)
                self._M[g] /= np.sqrt((self._M[g]**2).sum(0))

            # determine A from the model.matrix
            rdf = DataFrame({k: (FactorVector(self._O[g][k])
                                 if k in factors else self._O[g][k])
                             for k in self._O[g].dtype.names})

            # model spec as data frame
            ms = r['data.frame'](r_model_matrix(Formula(fe_formula), data=rdf))

            cols = list(r['names'](ms))
            if svd_terms is None:
                self._svd_terms = [c for c in cols
                                   if 'Intercept' not in c]
            else:
                self._svd_terms = svd_terms

            # self._A[g] = np.vstack([ms[c] #np.array(ms.rx(c))
            self._A[g] = np.concatenate([np.array(ms.rx(c))
                                         for c in self._svd_terms]).T

            if use_ranks:
                for i in xrange(self._A[g].shape[1]):
                    # rank it
                    self._A[g][:, i] = rankdata(self._A[g][:, i])

                    # normalize it
                    self._A[g][:, i] = ((self._A[g][:, i] - 1) /
                                        (len(self._A[g][:, i]) - 1))

            # normalize A
            if True:  # use_norm:
                self._A[g] -= self._A[g].mean(0)
                self._A[g] /= np.sqrt((self._A[g]**2).sum(0))

            # memmap if desired
            if self._memmap:
                self._M[g] = _memmap_array(self._M[g], memmap_dir,
                                           unique_id=str(g))
                self._D[g] = _memmap_array(self._D[g], memmap_dir,
                                           unique_id=str(g))

        # save the new O
        self._O = O
        if lmer_opts is None:
            lmer_opts = {}
        self._lmer_opts = lmer_opts
        self._factors = factors

        # mask the connectivity
        if self._do_tfce and (len(self._dep_mask.flatten()) > self._dep_mask.sum()):
            self._connectivity = self._connectivity.tolil()[self._dep_mask.flatten()][:,self._dep_mask.flatten()].tocoo()

        # prepare for the perms and boots and jackknife
        self._perms = []
        self._tp = []
        self._tb = []
        self._tj = []
        self._pfmask = []

        if verbose > 0:
            sys.stdout.write('Done (%.2g sec)\n' % (time.time()-start_time))
            sys.stdout.write('Processing actual data...')
            sys.stdout.flush()
            start_time = time.time()

        global _global_meld
        _global_meld[id(self)] = self

        # run for actual data (returns both perm and boot vals)
        self._R = None
        self._ss = None
        self._mer = None
        tp, tb, R, feat_mask, ss, mer = _eval_model(id(self), None)
        self._R = R
        self._tp.append(tp)
        self._tb.append(tb)
        self._feat_mask = feat_mask
        self._fmask = ~feat_mask[0]
        self._pfmask.append(~feat_mask[0])
        self._ss = ss
        self._mer = mer

        if verbose > 0:
            sys.stdout.write('Done (%.2g sec)\n' % (time.time()-start_time))
            sys.stdout.flush()

Example 35

Project: core
Source File: howmanynodes.py
View license
def main():
    usagestr = "usage: %prog [-h] [options] [args]"
    parser = optparse.OptionParser(usage = usagestr)
    parser.set_defaults(waittime = 0.2, numnodes = 0, bridges = 0, retries = 0,
                        logfile = None, services = None)

    parser.add_option("-w", "--waittime", dest = "waittime", type = float,
                      help = "number of seconds to wait between node creation" \
                      " (default = %s)" % parser.defaults["waittime"])
    parser.add_option("-n", "--numnodes", dest = "numnodes", type = int,
                      help = "number of nodes (default = unlimited)")
    parser.add_option("-b", "--bridges", dest = "bridges", type = int,
                      help = "number of nodes per bridge; 0 = one bridge " \
                      "(def. = %s)" % parser.defaults["bridges"])
    parser.add_option("-r", "--retry", dest = "retries", type = int,
                      help = "number of retries on error (default = %s)" % \
                      parser.defaults["retries"])
    parser.add_option("-l", "--log", dest = "logfile", type = str,
                      help = "log memory usage to this file (default = %s)" % \
                      parser.defaults["logfile"])
    parser.add_option("-s", "--services", dest = "services", type = str,
                      help = "pipe-delimited list of services added to each " \
                      "node (default = %s)\n(Example: 'zebra|OSPFv2|OSPFv3|" \
                      "vtysh|IPForward')" % parser.defaults["services"])

    def usage(msg = None, err = 0):
        sys.stdout.write("\n")
        if msg:
            sys.stdout.write(msg + "\n\n")
        parser.print_help()
        sys.exit(err)

    (options, args) = parser.parse_args()

    for a in args:
        sys.stderr.write("ignoring command line argument: '%s'\n" % a)

    start = datetime.datetime.now()
    prefix = ipaddr.IPv4Prefix("10.83.0.0/16")

    print "Testing how many network namespace nodes this machine can create."
    print " - %s" % linuxversion()
    mem = memfree()
    print " - %.02f GB total memory (%.02f GB swap)" % \
            (mem['total']/GBD, mem['stotal']/GBD)
    print " - using IPv4 network prefix %s" % prefix
    print " - using wait time of %s" % options.waittime
    print " - using %d nodes per bridge" % options.bridges
    print " - will retry %d times on failure" % options.retries
    print " - adding these services to each node: %s" % options.services
    print " "

    lfp = None
    if options.logfile is not None:
        # initialize a csv log file header
        lfp = open(options.logfile, "a")
        lfp.write("# log from howmanynodes.py %s\n" % time.ctime())
        lfp.write("# options = %s\n#\n" % options)
        lfp.write("# numnodes,%s\n" % ','.join(MEMKEYS))
        lfp.flush()

    session = pycore.Session(persistent=True)
    switch = session.addobj(cls = pycore.nodes.SwitchNode)
    switchlist.append(switch)
    print "Added bridge %s (%d)." % (switch.brname, len(switchlist))

    i = 0
    retry_count = options.retries
    while True:
        i += 1
        # optionally add a bridge (options.bridges nodes per bridge)
        try:
            if options.bridges > 0 and switch.numnetif() >= options.bridges:
                switch = session.addobj(cls = pycore.nodes.SwitchNode)
                switchlist.append(switch)
                print "\nAdded bridge %s (%d) for node %d." % \
                       (switch.brname, len(switchlist), i)
        except Exception, e:
            print "At %d bridges (%d nodes) caught exception:\n%s\n" % \
                    (len(switchlist), i-1, e)
            break
        # create a node
        try:
            n = session.addobj(cls = pycore.nodes.LxcNode, name = "n%d" % i)
            n.newnetif(switch, ["%s/%s" % (prefix.addr(i), prefix.prefixlen)])
            n.cmd([SYSCTL_BIN, "net.ipv4.icmp_echo_ignore_broadcasts=0"])
            if options.services is not None:
                session.services.addservicestonode(n, "", options.services,
                                                   verbose=False)
                n.boot()
            nodelist.append(n)
            if i % 25 == 0:
                print "\n%s nodes created " % i,
                mem = memfree()
                free = mem['free'] + mem['buff'] + mem['cached']
                swap = mem['stotal'] - mem['sfree']
                print "(%.02f/%.02f GB free/swap)" % (free/GBD , swap/GBD),
                if lfp:
                    lfp.write("%d," % i)
                    lfp.write("%s\n" % ','.join(str(mem[x]) for x in MEMKEYS))
                    lfp.flush()
            else:
                sys.stdout.write(".")
            sys.stdout.flush()
            time.sleep(options.waittime)
        except Exception, e:
            print "At %d nodes caught exception:\n" % i, e
            if retry_count > 0:
                print "\nWill retry creating node %d." % i
                shutil.rmtree(n.nodedir, ignore_errors = True)
                retry_count -= 1
                i -= 1
                time.sleep(options.waittime)
                continue
            else:
                print "Stopping at %d nodes!" % i
                break

        if i == options.numnodes:
            print "Stopping at %d nodes due to numnodes option." % i
            break
        # node creation was successful at this point
        retry_count = options.retries

    if lfp:
        lfp.flush()
        lfp.close()

    print "elapsed time: %s" % (datetime.datetime.now() - start)
    print "Use the core-cleanup script to remove nodes and bridges."

Example 36

View license
    def parseMessage(self, msgId, rawmsg, dlc, flg, time):
        msgToFunc = {
            1248: self.status_one,
            1249: self.status_two,
            1250: self.status_three,
            1251: self.status_four,
            1280: self.track_msg,
            1281: self.track_msg,
            1282: self.track_msg,
            1283: self.track_msg,
            1284: self.track_msg,
            1285: self.track_msg,
            1286: self.track_msg,
            1287: self.track_msg,
            1288: self.track_msg,
            1289: self.track_msg,
            1290: self.track_msg,
            1291: self.track_msg,
            1292: self.track_msg,
            1293: self.track_msg,
            1294: self.track_msg,
            1295: self.track_msg,
            1296: self.track_msg,
            1297: self.track_msg,
            1298: self.track_msg,
            1299: self.track_msg,
            1300: self.track_msg,
            1301: self.track_msg,
            1302: self.track_msg,
            1303: self.track_msg,
            1304: self.track_msg,
            1305: self.track_msg,
            1306: self.track_msg,
            1307: self.track_msg,
            1308: self.track_msg,
            1309: self.track_msg,
            1310: self.track_msg,
            1311: self.track_msg,
            1312: self.track_msg,
            1313: self.track_msg,
            1314: self.track_msg,
            1315: self.track_msg,
            1316: self.track_msg,
            1317: self.track_msg,
            1318: self.track_msg,
            1319: self.track_msg,
            1320: self.track_msg,
            1321: self.track_msg,
            1322: self.track_msg,
            1323: self.track_msg,
            1324: self.track_msg,
            1325: self.track_msg,
            1326: self.track_msg,
            1327: self.track_msg,
            1328: self.track_msg,
            1329: self.track_msg,
            1330: self.track_msg,
            1331: self.track_msg,
            1332: self.track_msg,
            1333: self.track_msg,
            1334: self.track_msg,
            1335: self.track_msg,
            1336: self.track_msg,
            1337: self.track_msg,
            1338: self.track_msg,
            1339: self.track_msg,
            1340: self.track_msg,
            1341: self.track_msg,
            1342: self.track_msg,
            1343: self.track_msg,
            1344: self.track_status_msg,
            1488: self.validation_msg_one,
            1489: self.validation_msg_two,
            1508: self.additional_status_one,
            1509: self.additional_status_two,
            1510: self.additional_status_three,
            1511: self.additional_status_four,
            1512: self.additional_status_five,
        }
        retData = {}
        msg = []
        if self.debug == True:
            sys.stdout.write("In radar_data_parser and this is a message\n")
            sys.stdout.write("msgId: %9d  time: %9d  flg: 0x%02x  dlc: %d " % (msgId, time, flg, dlc))

        msg = rawmsg
        for i in xrange(dlc):
            if self.debug == True:
                sys.stdout.write(" 0x%0.2x " % (msg[i]))

        if self.debug == True:
            sys.stdout.write("\n")

        if msgId in msgToFunc:
            # This message is valid, so we need to parse it
            if msgId >= 1280 and msgId <= 1343:
                msgToFunc[msgId](msgId, msg)
            else:
                if self.debug == True:
                    sys.stdout.write("In radar_data_parser and this is msgId %d\n" % (msgId))
                if (msgId == 1344):
                    msgToFunc[msgId](self.msg_counter, msg)
                    self.msg_counter += 1
                elif (msgId > 1344 and self.msg_counter > 0):
                    msgToFunc[msgId](msg)
                    self.msg_counter = 0
                else:
                    msgToFunc[msgId](msg)
                    if (msgId == 1512):
                        retData = self.data
                        self.data = {} # Start with a fresh object
        return retData

Example 37

View license
    def parseMessage(self, msgId, rawmsg, dlc, flg, time):
        msgToFunc = {
            1248: self.status_one,
            1249: self.status_two,
            1250: self.status_three,
            1251: self.status_four,
            1280: self.track_msg,
            1281: self.track_msg,
            1282: self.track_msg,
            1283: self.track_msg,
            1284: self.track_msg,
            1285: self.track_msg,
            1286: self.track_msg,
            1287: self.track_msg,
            1288: self.track_msg,
            1289: self.track_msg,
            1290: self.track_msg,
            1291: self.track_msg,
            1292: self.track_msg,
            1293: self.track_msg,
            1294: self.track_msg,
            1295: self.track_msg,
            1296: self.track_msg,
            1297: self.track_msg,
            1298: self.track_msg,
            1299: self.track_msg,
            1300: self.track_msg,
            1301: self.track_msg,
            1302: self.track_msg,
            1303: self.track_msg,
            1304: self.track_msg,
            1305: self.track_msg,
            1306: self.track_msg,
            1307: self.track_msg,
            1308: self.track_msg,
            1309: self.track_msg,
            1310: self.track_msg,
            1311: self.track_msg,
            1312: self.track_msg,
            1313: self.track_msg,
            1314: self.track_msg,
            1315: self.track_msg,
            1316: self.track_msg,
            1317: self.track_msg,
            1318: self.track_msg,
            1319: self.track_msg,
            1320: self.track_msg,
            1321: self.track_msg,
            1322: self.track_msg,
            1323: self.track_msg,
            1324: self.track_msg,
            1325: self.track_msg,
            1326: self.track_msg,
            1327: self.track_msg,
            1328: self.track_msg,
            1329: self.track_msg,
            1330: self.track_msg,
            1331: self.track_msg,
            1332: self.track_msg,
            1333: self.track_msg,
            1334: self.track_msg,
            1335: self.track_msg,
            1336: self.track_msg,
            1337: self.track_msg,
            1338: self.track_msg,
            1339: self.track_msg,
            1340: self.track_msg,
            1341: self.track_msg,
            1342: self.track_msg,
            1343: self.track_msg,
            1344: self.track_status_msg,
            1488: self.validation_msg_one,
            1489: self.validation_msg_two,
            1508: self.additional_status_one,
            1509: self.additional_status_two,
            1510: self.additional_status_three,
            1511: self.additional_status_four,
            1512: self.additional_status_five,
        }
        msg = []
        if self.debug == True:
            sys.stdout.write("In radar_data_parser and this is a message\n")
            sys.stdout.write("msgId: %9d  time: %9d  flg: 0x%02x  dlc: %d " % (msgId, time, flg, dlc))

        for i in xrange(dlc):
            msg[:0] = [ int(struct.unpack('B', rawmsg[i])[0]) ]
            if self.debug == True:
                sys.stdout.write(" 0x%0.2x " % (msg[i]))
        if self.debug == True:
            sys.stdout.write("\n")

        if msgId in msgToFunc:
            # This message is valid, so we need to parse it
            if msgId >= 1280 and msgId <= 1343:
                msgToFunc[msgId](msgId, msg)
            else:
                if self.debug == True:
                    sys.stdout.write("In radar_data_parser and this is msgId %d\n" % (msgId))
                if (msgId == 1344):
                    msgToFunc[msgId](self.msg_counter, msg)
                    self.msg_counter += 1
                elif (msgId > 1344 and self.msg_counter > 0):
                    msgToFunc[msgId](msg)
                    self.msg_counter = 0
                else:
                    msgToFunc[msgId](msg)
                    if (msgId == 1512):
                        if self.first == True:
                            print json.dumps(self.data)
                            self.first = False
                        else:
                            print ",", json.dumps(self.data)
                        self.data = {} # Start with a fresh object

Example 38

View license
    def process_input(self, data, input_prompt, lineno):
        """
        Process data block for INPUT token.

        """
        decorator, input, rest = data
        image_file = None
        image_directive = None

        is_verbatim = decorator=='@verbatim' or self.is_verbatim
        is_doctest = (decorator is not None and \
                     decorator.startswith('@doctest')) or self.is_doctest
        is_suppress = decorator=='@suppress' or self.is_suppress
        is_okexcept = decorator=='@okexcept' or self.is_okexcept
        is_okwarning = decorator=='@okwarning' or self.is_okwarning
        is_savefig = decorator is not None and \
                     decorator.startswith('@savefig')

        input_lines = input.split('\n')
        if len(input_lines) > 1:
            if input_lines[-1] != "":
                input_lines.append('') # make sure there's a blank line
                                       # so splitter buffer gets reset

        continuation = '   %s:'%''.join(['.']*(len(str(lineno))+2))

        if is_savefig:
            image_file, image_directive = self.process_image(decorator)

        ret = []
        is_semicolon = False

        # Hold the execution count, if requested to do so.
        if is_suppress and self.hold_count:
            store_history = False
        else:
            store_history = True

        # Note: catch_warnings is not thread safe
        with warnings.catch_warnings(record=True) as ws:
            for i, line in enumerate(input_lines):
                if line.endswith(';'):
                    is_semicolon = True

                if i == 0:
                    # process the first input line
                    if is_verbatim:
                        self.process_input_line('')
                        self.IP.execution_count += 1 # increment it anyway
                    else:
                        # only submit the line in non-verbatim mode
                        self.process_input_line(line, store_history=store_history)
                    formatted_line = '%s %s'%(input_prompt, line)
                else:
                    # process a continuation line
                    if not is_verbatim:
                        self.process_input_line(line, store_history=store_history)

                    formatted_line = '%s %s'%(continuation, line)

                if not is_suppress:
                    ret.append(formatted_line)

        if not is_suppress and len(rest.strip()) and is_verbatim:
            # The "rest" is the standard output of the input. This needs to be
            # added when in verbatim mode. If there is no "rest", then we don't
            # add it, as the new line will be added by the processed output.
            ret.append(rest)

        # Fetch the processed output. (This is not the submitted output.)
        self.cout.seek(0)
        processed_output = self.cout.read()
        if not is_suppress and not is_semicolon:
            #
            # In IPythonDirective.run, the elements of `ret` are eventually
            # combined such that '' entries correspond to newlines. So if
            # `processed_output` is equal to '', then the adding it to `ret`
            # ensures that there is a blank line between consecutive inputs
            # that have no outputs, as in:
            #
            #    In [1]: x = 4
            #
            #    In [2]: x = 5
            #
            # When there is processed output, it has a '\n' at the tail end. So
            # adding the output to `ret` will provide the necessary spacing
            # between consecutive input/output blocks, as in:
            #
            #   In [1]: x
            #   Out[1]: 5
            #
            #   In [2]: x
            #   Out[2]: 5
            #
            # When there is stdout from the input, it also has a '\n' at the
            # tail end, and so this ensures proper spacing as well. E.g.:
            #
            #   In [1]: print x
            #   5
            #
            #   In [2]: x = 5
            #
            # When in verbatim mode, `processed_output` is empty (because
            # nothing was passed to IP. Sometimes the submitted code block has
            # an Out[] portion and sometimes it does not. When it does not, we
            # need to ensure proper spacing, so we have to add '' to `ret`.
            # However, if there is an Out[] in the submitted code, then we do
            # not want to add a newline as `process_output` has stuff to add.
            # The difficulty is that `process_input` doesn't know if
            # `process_output` will be called---so it doesn't know if there is
            # Out[] in the code block. The requires that we include a hack in
            # `process_block`. See the comments there.
            #
            ret.append(processed_output)
        elif is_semicolon:
            # Make sure there is a newline after the semicolon.
            ret.append('')

        # context information
        filename = "Unknown"
        lineno = 0
        if self.directive.state:
            filename = self.directive.state.document.current_source
            lineno = self.directive.state.document.current_line

        # output any exceptions raised during execution to stdout
        # unless :okexcept: has been specified.
        if not is_okexcept and "Traceback" in processed_output:
            s =  "\nException in %s at block ending on line %s\n" % (filename, lineno)
            s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
            sys.stdout.write('\n\n>>>' + ('-' * 73))
            sys.stdout.write(s)
            sys.stdout.write(processed_output)
            sys.stdout.write('<<<' + ('-' * 73) + '\n\n')

        # output any warning raised during execution to stdout
        # unless :okwarning: has been specified.
        if not is_okwarning:
            for w in ws:
                s =  "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
                s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
                sys.stdout.write('\n\n>>>' + ('-' * 73))
                sys.stdout.write(s)
                sys.stdout.write(('-' * 76) + '\n')
                s=warnings.formatwarning(w.message, w.category,
                                         w.filename, w.lineno, w.line)
                sys.stdout.write(s)
                sys.stdout.write('<<<' + ('-' * 73) + '\n')

        self.cout.truncate(0)

        return (ret, input_lines, processed_output,
                is_doctest, decorator, image_file, image_directive)

Example 39

View license
    def create_data(self, username, password, usernum, kind, prefix,
                    endaga_version):
        # Create a user.
        sys.stdout.write('creating user: %s %s %s..\n' % (
            username, password, usernum))
        user = User(username=username, email="%[email protected]" % username)
        user.set_password(password)
        user.save()

        # Get user profile and add some credit.
        sys.stdout.write('setting user profile..\n')
        user_profile = UserProfile.objects.get(user=user)
        user_profile.save()

        # Add some towers.
        towers_to_add = random.randint(4, 7)
        added_towers = []
        print 'adding %s towers..' % towers_to_add

        for index in range(towers_to_add):
            nickname = None
            if random.random() < 0.5:
                nickname = 'Test Tower %s' % index
            bts = BTS(uuid=str(uuid.uuid4()), nickname=nickname, secret='mhm',
                      inbound_url='http://localhost:8090',
                      network=user_profile.network)
            added_towers.append(bts)
            # Set the last_active time and uptime randomly.
            random_seconds = random.randint(0, 24*60*60)
            random_date = (timezone.now() -
                           datetime.timedelta(seconds=random_seconds))
            bts.last_active = random_date
            bts.uptime = random.randint(24*60*60, 100*24*60*60)
            bts.status = random.choice(['no-data','active','inactive'])
            bts.save()
            # Set the metapackage version.  This has to be done after initially
            # creating the BTS or the post-create hook will override.
            if endaga_version is not None:
                endaga_version = bts.sortable_version(endaga_version)
            versions = {
                'endaga_version': endaga_version,
                'freeswitch_version': None,
                'gsm_version': None,
                'python_endaga_core_version': None,
                'python_gsm_version': None,
            }
            bts.package_versions = json.dumps(versions)
            bts.save()
            # Add some TimeseriesStats for each tower.
            stats_to_add = random.randint(100, 1000)
            print 'adding %s TimeseriesStats..' % stats_to_add
            for _ in range(stats_to_add):
                date = (
                    timezone.now() -
                    datetime.timedelta(seconds=random.randint(0, 7*24*60*60)))
                key = random.choice(stats_app.views.TIMESERIES_STAT_KEYS)
                if key in ('noise_rssi_db', 'noise_ms_rssi_target_db'):
                    value = random.randint(-75, -20)
                elif 'percent' in key:
                    value = random.randint(0, 100)
                elif 'bytes' in key:
                    value = random.randint(0, 10000)
                else:
                    value = random.randint(0, 10)
                stat = TimeseriesStat(key=key, value=value, date=date, bts=bts,
                                      network=user_profile.network)
                stat.save()
            # Add some SystemEvents for each tower (either small or large number)
            number_of_events = [0,1,2,5,18,135,264]
            events_to_add = random.choice(number_of_events)
            print 'adding %s SystemEvents..' % events_to_add
            for _ in range(events_to_add):
                # Actual events should be in order. But we should support
                # out-of-order events just in case
                date = (
                    timezone.now() -
                    datetime.timedelta(seconds=random.randint(0, 7*24*60*60)))
                event = SystemEvent(date=date, bts=bts,
                            type=random.choice(['bts up','bts down']))
                event.save()

        # Make at least one BTS active recently.
        bts.last_active = timezone.now()
        bts.status = 'active'
        bts.save()
        # Make one BTS in the no-data state.
        bts = BTS(uuid=str(uuid.uuid4()), nickname='No-data tower', secret='z',
                  inbound_url='http://localhost:5555',
                  network=user_profile.network,
                  package_versions=json.dumps(versions))
        bts.save()

        # Add some subscribers.
        sys.stdout.write("adding subscribers and numbers..\n")
        added_subscribers = []
        for index in range(random.randint(3, 20)):
            imsi = "IMSI%d999900000000%s" % (usernum, index)
            if random.random() < 0.5:
                name = "test name %s" % index
            else:
                name = ''
            balance = random.randint(40000000, 60000000)
            state = "active"
            bts = BTS.objects.filter(
                network=user_profile.network).order_by('?').first()
            subscriber = Subscriber(network=user_profile.network, imsi=imsi,
                                    name=name, balance=balance, state=state,
                                    bts=bts, last_camped=bts.last_active)
            subscriber.save()
            added_subscribers.append(subscriber)
            # And attach some numbers.
            for _ in range(random.randint(1, 5)):
                msisdn = int(prefix + str(random.randint(1000, 9999)))
                number = Number(
                    number=msisdn, state="inuse", network=user_profile.network,
                    kind=kind, subscriber=subscriber)
                number.save()

        # Add one last subscriber so we have at least one sub with no activity.
        imsi = "IMSI%d8888000000000" % usernum
        name = 'test name (no activity)'
        subscriber = Subscriber(network=user_profile.network, imsi=imsi,
                                bts=bts, name=name, balance=1000,
                                state='active')
        subscriber.save()

        # Add some UsageEvents attached to random subscribers.
        events_to_add = random.randint(100, 4000)
        sys.stdout.write("adding %s usage events..\n" % events_to_add)
        all_destinations = list(Destination.objects.all())
        with transaction.atomic():
            for _ in range(events_to_add):
                random_sub = random.choice(added_subscribers)
                time_delta = datetime.timedelta(
                    minutes=random.randint(0, 60000))
                date = (timezone.now() - time_delta)
                kinds = [
                    ('outside_sms', 10000), ('incoming_sms', 2000),
                    ('local_sms', 4000),
                    ('local_recv_sms', 1000), ('free_sms', 0),
                    ('error_sms', 0),
                    ('outside_call', 8000), ('incoming_call', 3000),
                    ('local_call', 2000),
                    ('local_recv_call', 1000),
                    ('free_call', 0), ('error_call', 0), ('gprs', 5000)]
                (kind, tariff) = random.choice(kinds)
                to_number, billsec, up_bytes, call_duration = 4 * [None]
                from_number, down_bytes, timespan, change = 4 * [None]
                if 'call' in kind:
                    billsec = random.randint(0, 120)
                    change = tariff * billsec
                    call_duration = billsec + random.randint(0, 10)
                    to_number = str(random.randint(1234567890, 9876543210))
                    from_number = str(random.randint(1234567890, 9876543210))
                    reason = '%s sec call to %s (%s)' % (billsec, to_number,
                                                         kind)
                elif 'sms' in kind:
                    change = tariff
                    to_number = str(random.randint(1234567890, 9876543210))
                    from_number = str(random.randint(1234567890, 9876543210))
                    reason = '%s to %s' % (kind, to_number)
                elif kind == 'gprs':
                    up_bytes = random.randint(20000, 400000)
                    down_bytes = random.randint(20000, 400000)
                    change = (down_bytes/1024) * tariff
                    timespan = 60
                    reason = 'gprs_usage, %sB uploaded, %sB downloaded' % (
                        up_bytes, down_bytes)
                old_amount = random_sub.balance
                random_sub.change_balance(change)
                usage_event = UsageEvent(
                    subscriber=random_sub, bts=random.choice(added_towers),
                    date=date, kind=kind,
                    reason=reason, oldamt=old_amount,
                    newamt=random_sub.balance, change=-change, billsec=billsec,
                    call_duration=call_duration, uploaded_bytes=up_bytes,
                    downloaded_bytes=down_bytes,
                    timespan=timespan, to_number=to_number,
                    from_number=from_number,
                    destination=random.choice(all_destinations), tariff=tariff)
                try:
                    usage_event.save()
                except DataError:
                    from django.db import connection
                    print connection.queries[-1]
                random_sub.save()
            # Create one more UE with a negative "oldamt" to test display
            # handling of such events.
            usage_event = UsageEvent(
                subscriber=random_sub, bts=random.choice(added_towers),
                date=date, kind='local_sms',
                reason='negative oldamt', oldamt=-200000,
                newamt=0, change=200000,
                billsec=0, to_number='19195551234',
                destination=random.choice(all_destinations))
            usage_event.save()

        # Add some transaction history.
        sys.stdout.write("adding transactions..\n")
        for _ in range(random.randint(10, 50)):
            time_delta = datetime.timedelta(
                minutes=random.randint(0, 60000))
            date = (timezone.now() - time_delta)
            new_transaction = Transaction(
                ledger=user_profile.network.ledger, kind='credit',
                reason='Automatic Recharge',
                amount=1e3*random.randint(1000, 100000),
                created=date,
            )
            new_transaction.save()

        # And some floating numbers for release testing.
        sys.stdout.write("adding floating phone numbers..\n")
        for num in random.sample(range(10000, 99999), 300):
            #need to be e164, that's what we use
            msisdn = int('155555%s' % str(num))
            state = random.choice(('available', 'pending'))
            kind = random.choice(('number.nexmo.monthly',
                                  'number.telecom.permanent'))
            number = Number(
                number=msisdn, state=state, kind=kind, country_id='US')
            number.save()

Example 40

Project: MIDS
Source File: Movies1MDataset.py
View license
def convert_csv(consts):
    print("converting csv...")
    timezones_cvs = pandas.read_csv(
        consts.timezone_path
        ,dtype = {
            'zip':numpy.str
            ,'city':numpy.str
            ,'state':numpy.str
            ,'latitude':numpy.float32
            ,'longitude':numpy.float32
            ,'timezone':numpy.int32
            ,'dst':numpy.int32
            }
        ,index_col = False
        )
    print("timezone data was loaded")
    movies_cvs = pandas.read_csv(
        consts.movies_path
        ,sep=";"
        ,header=None
        ,quotechar='"'
        ,encoding="cp1251"
        ,names=("MovieID","Name","Genders")
        ,dtype = {
            'MovieID':numpy.int32
            ,'Name':numpy.str
            ,'Genders':numpy.str
            }
        ,index_col = False
        )
    print("movies data was loaded")
    users_cvs = pandas.read_csv(
        consts.users_path
        ,sep=";"
        ,header=None
        ,quotechar='"'
        ,encoding="cp1251"
        ,names=("UserID","Gender","Age","Occupation","ZipCode")
        ,dtype = {
            'UserID':numpy.int32
            ,'Gender':numpy.str
            ,'Age':numpy.int32
            ,'Occupation':numpy.int32
            ,"ZipCode":numpy.str
            }
        ,index_col = False
        )
    print("users data was loaded")
    ratings_cvs = pandas.read_csv(
        consts.ratings_path
        ,sep=";"
        ,header=None
        ,quotechar='"'
        ,encoding="cp1251"
        ,names=("UserID","MovieID","Rating","Timestamp")
        ,dtype = {
            'UserID':numpy.int32
            ,'MovieID':numpy.int32
            ,'Rating':numpy.float32
            ,'Timestamp':numpy.int32
            }
        ,index_col = False
        )
    print("ratings data was loaded")
    
    lt = time.time()
    prog = re.compile(pattern = "\((\d+)\)$")
    movies_cvs['year'] = int(consts.min_year)
    for i in numpy.arange(movies_cvs.shape[0]-1):
        name = str(movies_cvs.at[i,"Name"])
        m = prog.search(name)
        if m:
            movies_cvs.at[i,'year'] = int(m.group(1))
            pass
        t1 = time.time()
        if t1>lt+1:
            p = float(i)/float(movies_cvs.shape[0])*100.0
            sys.stdout.write("\t\t\t\t\t\t\t\t\t\r")
            sys.stdout.write("movies csv data process %f %%\r" % (p,))
            lt = lt+1
        pass
    print("movies cvs data was prepared")
    
    users_cvs['latitude'] = float(0)
    users_cvs['longitude'] = float(0)
    users_cvs['timezone'] = int(0)
    users_cvs['dts'] = int(0)
    for i in numpy.arange(users_cvs.shape[0]-1):
        zipcode = users_cvs.loc[i,'ZipCode']
        zc = timezones_cvs[timezones_cvs.zip.isin([zipcode])]
        if len(zc)==1:
            users_cvs.at[i,'timezone'] = int(zc['timezone'])
            users_cvs.at[i,'latitude'] = float(zc['latitude'])
            users_cvs.at[i,'longitude'] = float(zc['longitude'])
            users_cvs.at[i,'dts'] = int(zc['dst'])
            pass  
        t1 = time.time()
        if t1>lt+1:
            p = float(i)/float(users_cvs.shape[0])*100.0
            sys.stdout.write("\t\t\t\t\t\t\t\t\t\r")
            sys.stdout.write("users csv data process %f %%\r" % (p,))
            lt = lt+1
        pass
    print("users cvs data was prepared")
    
    ratings_cvs["wday"] = int(0)
    ratings_cvs["yday"] = int(0)
    ratings_cvs["year"] = int(consts.min_year)
    
    for i in numpy.arange(ratings_cvs.shape[0]-1):
        user_id = int(ratings_cvs.at[i,"UserID"])
        t0 = ratings_cvs.at[i,"Timestamp"]
        ui = users_cvs[users_cvs.UserID.isin([user_id])]
        if len(ui)==1:
            timezone = int(ui["timezone"]) - 2
            tt = datetime.datetime.fromtimestamp(t0,datetime.timezone(datetime.timedelta(hours=timezone))).timetuple()
            ratings_cvs.at[i,"wday"] = tt.tm_wday
            ratings_cvs.at[i,"yday"] = tt.tm_yday
            ratings_cvs.at[i,"year"] = tt.tm_year 
            pass
        t1 = time.time()
        if t1>lt+1:
            p = float(i)/float(ratings_cvs.shape[0])*100.0
            sys.stdout.write("\t\t\t\t\t\t\t\t\t\r")
            sys.stdout.write("ratings csv data process %f %%\r" % (p,))
            lt = lt+1
        pass
    print("ratings cvs data was prepared")
    
    users_cvs.to_csv(
        path_or_buf = consts.users_csv_file_name, 
        sep = ";"
        ,header = False
        ,index = False
        ,encoding = "utf-8"
        ,quoting = csv.QUOTE_ALL
        ,quotechar = '"'
        ,line_terminator = "\n"
        ,doublequote = True
        )
    movies_cvs.to_csv(
        path_or_buf = consts.movies_csv_file_name, 
        sep = ";"
        ,header = False
        ,index = False
        ,encoding = "utf-8"
        ,quoting = csv.QUOTE_ALL
        ,quotechar = '"'
        ,line_terminator = "\n"
        ,doublequote = True
        )
    ratings_cvs.to_csv(
        path_or_buf = consts.ratings_csv_file_name
        ,sep = ";"
        ,header = False
        ,index = False
        ,encoding = "utf-8"
        ,quoting = csv.QUOTE_ALL
        ,quotechar = '"'
        ,line_terminator = "\n"
        ,doublequote = True
        )
    print("converting done")
    return

Example 41

Project: tools-iuc
Source File: htseqsams2mx.py
View license
def htseqMX(gff_filename, sam_filenames, colnames, sam_exts, sam_bais, opts):
    """
    Code taken from count.py in Simon Anders HTSeq distribution
    Wrapped in a loop to accept multiple bam/sam files and their names from galaxy to
    produce a matrix of contig counts by sample for downstream use in edgeR and DESeq tools
    """
    class UnknownChrom( Exception ):
        pass

    def my_showwarning( message, category, filename, lineno=None, line=None ):
        sys.stdout.write( "Warning: %s\n" % message )

    def invert_strand( iv ):
        iv2 = iv.copy()
        if iv2.strand == "+":
            iv2.strand = "-"
        elif iv2.strand == "-":
            iv2.strand = "+"
        else:
            raise ValueError("Illegal strand")
        return iv2

    def count_reads_in_features( sam_filenames, colnames, gff_filename, opts ):
        """ Hacked version of htseq count.py
        """
        if opts.quiet:
            warnings.filterwarnings( action="ignore", module="HTSeq" )
        features = HTSeq.GenomicArrayOfSets( "auto", opts.stranded != "no" )
        mapqMin = int(opts.mapqMin)
        counts = {}
        nreads = 0
        empty = 0
        ambiguous = 0
        notaligned = 0
        lowqual = 0
        nonunique = 0
        filtered = 0  # new filter_extras - need a better way to do this - independent filter tool?
        gff = HTSeq.GFF_Reader( gff_filename )
        try:
            for i, f in enumerate(gff):
                if f.type == opts.feature_type:
                    try:
                        feature_id = f.attr[ opts.id_attribute ]
                    except KeyError:
                        try:
                            feature_id = f.attr[ 'gene_id' ]
                        except KeyError:
                            sys.exit( "Feature at row %d %s does not contain a '%s' attribute OR a gene_id attribute - faulty GFF?" %
                                      ( (i + 1), f.name, opts.id_attribute ) )
                    if opts.stranded != "no" and f.iv.strand == ".":
                        sys.exit( "Feature %s at %s does not have strand information but you are "
                                  "running htseq-count in stranded mode. Use '--stranded=no'." %
                                  ( f.name, f.iv ) )
                    features[ f.iv ] += feature_id
                    counts[ feature_id ] = [0 for x in colnames]  # we use sami as an index here to bump counts later
        except:
            sys.stderr.write( "Error occured in %s.\n" % gff.get_line_number_string() )
            raise

        if not opts.quiet:
            sys.stdout.write( "%d GFF lines processed.\n" % i )

        if len( counts ) == 0 and not opts.quiet:
            sys.stdout.write( "Warning: No features of type '%s' found.\n" % opts.feature_type )
        for sami, sam_filename in enumerate(sam_filenames):
            colname = colnames[sami]
            isbam = sam_exts[sami] == 'bam'
            hasbai = sam_bais[sami] > ''
            if hasbai:
                tempname = os.path.splitext(os.path.basename(sam_filename))[0]
                tempbam = '%s_TEMP.bam' % tempname
                tempbai = '%s_TEMP.bai' % tempname
                os.link(sam_filename, tempbam)
                os.link(sam_bais[sami], tempbai)
            try:
                if isbam:
                    if hasbai:
                        read_seq = HTSeq.BAM_Reader( tempbam )
                    else:
                        read_seq = HTSeq.BAM_Reader( sam_filename )
                else:
                    read_seq = HTSeq.SAM_Reader( sam_filename )
                first_read = iter(read_seq).next()
                pe_mode = first_read.paired_end
            except:
                if isbam:
                    print >> sys.stderr, "Error occured when reading first line of bam file %s colname=%s \n" % (sam_filename, colname )
                else:
                    print >> sys.stderr, "Error occured when reading first line of sam file %s colname=%s \n" % (sam_filename, colname )
                raise

            try:
                if pe_mode:
                    read_seq_pe_file = read_seq
                    read_seq = HTSeq.pair_SAM_alignments( read_seq )
                for seqi, r in enumerate(read_seq):
                    nreads += 1
                    if not pe_mode:
                        if not r.aligned:
                            notaligned += 1
                            continue
                        try:
                            if len(opts.filter_extras) > 0:
                                for extra in opts.filter_extras:
                                    if r.optional_field(extra):
                                        filtered += 1
                                        continue
                            if r.optional_field( "NH" ) > 1:
                                nonunique += 1
                                continue
                        except KeyError:
                            pass
                        if r.aQual < mapqMin:
                            lowqual += 1
                            continue
                        if opts.stranded != "reverse":
                            iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
                        else:
                            iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )
                    else:
                        if r[0] is not None and r[0].aligned:
                            if opts.stranded != "reverse":
                                iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
                            else:
                                iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
                        else:
                            iv_seq = tuple()
                        if r[1] is not None and r[1].aligned:
                            if opts.stranded != "reverse":
                                iv_seq = itertools.chain( iv_seq,
                                                          ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
                            else:
                                iv_seq = itertools.chain( iv_seq,
                                                          ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
                        else:
                            if r[0] is None or not r[0].aligned:
                                notaligned += 1
                                continue
                        try:
                            if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
                                    ( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
                                nonunique += 1
                                continue
                        except KeyError:
                            pass
                        if ( r[0] and r[0].aQual < mapqMin ) or ( r[1] and r[1].aQual < mapqMin ):
                            lowqual += 1
                            continue

                    try:
                        if opts.mode == "union":
                            fs = set()
                            for iv in iv_seq:
                                if iv.chrom not in features.chrom_vectors:
                                    raise UnknownChrom
                                for iv2, fs2 in features[ iv ].steps():
                                    fs = fs.union( fs2 )
                        elif opts.mode == "intersection-strict" or opts.mode == "intersection-nonempty":
                            fs = None
                            for iv in iv_seq:
                                if iv.chrom not in features.chrom_vectors:
                                    raise UnknownChrom
                                for iv2, fs2 in features[ iv ].steps():
                                    if len(fs2) > 0 or opts.mode == "intersection-strict":
                                        if fs is None:
                                            fs = fs2.copy()
                                        else:
                                            fs = fs.intersection( fs2 )
                        else:
                            sys.exit( "Illegal overlap mode %s" % opts.mode )
                        if fs is None or len( fs ) == 0:
                            empty += 1
                        elif len( fs ) > 1:
                            ambiguous += 1
                        else:
                            ck = list(fs)[0]
                            counts[ck][sami] += 1  # end up with counts for each sample as a list
                    except UnknownChrom:
                        if not pe_mode:
                            rr = r
                        else:
                            rr = r[0] if r[0] is not None else r[1]
                        empty += 1
                        if not opts.quiet:
                            sys.stdout.write( ( "Warning: Skipping read '%s', because chromosome " +
                                                "'%s', to which it has been aligned, did not appear in the GFF file.\n" ) %
                                              ( rr.read.name, iv.chrom ) )
            except:
                if not pe_mode:
                    sys.stderr.write( "Error occured in %s.\n" % read_seq.get_line_number_string() )
                else:
                    sys.stderr.write( "Error occured in %s.\n" % read_seq_pe_file.get_line_number_string() )
                raise

            if not opts.quiet:
                sys.stdout.write( "%d sam %s processed for %s.\n" % ( seqi, "lines " if not pe_mode else "line pairs", colname ) )
        return counts, empty, ambiguous, lowqual, notaligned, nonunique, filtered, nreads

    warnings.showwarning = my_showwarning
    assert os.path.isfile(gff_filename), '## unable to open supplied gff file %s' % gff_filename
    try:
        counts, empty, ambiguous, lowqual, notaligned, nonunique, filtered, nreads = count_reads_in_features( sam_filenames, colnames, gff_filename, opts)
    except:
        sys.stderr.write( "Error: %s\n" % str( sys.exc_info()[1] ) )
        sys.stderr.write( "[Exception type: %s, raised in %s:%d]\n" %
                          ( sys.exc_info()[1].__class__.__name__,
                            os.path.basename(traceback.extract_tb( sys.exc_info()[2] )[-1][0]),
                            traceback.extract_tb( sys.exc_info()[2] )[-1][1] ) )
        sys.exit( 1 )
    return counts, empty, ambiguous, lowqual, notaligned, nonunique, filtered, nreads

Example 42

Project: jhbuild
Source File: sysdeps.py
View license
    def run(self, config, options, args, help=None):

        def fmt_details(pkg_config, req_version, installed_version):
            fmt_list = []
            if pkg_config:
                fmt_list.append(pkg_config)
            if req_version:
                fmt_list.append(_('required=%s') % req_version)
            if installed_version and installed_version != 'unknown':
                fmt_list.append(_('installed=%s') % installed_version)
            # Translators: This is used to separate items of package metadata
            fmt_str = _(', ').join(fmt_list)
            if fmt_str:
                return _('(%s)') % fmt_str
            else:
                return ''

        config.set_from_cmdline_options(options)

        module_set = jhbuild.moduleset.load(config)
        modules = args or config.modules
        module_list = module_set.get_full_module_list(modules, config.skip)

        if options.dump_all:
            for module in module_list:
                if (isinstance(module, SystemModule) or isinstance(module.branch, TarballBranch) and
                                                        module.pkg_config is not None):
                    if module.pkg_config is not None:
                        print 'pkgconfig:{0}'.format(module.pkg_config[:-3]) # remove .pc

                    if module.systemdependencies is not None:
                        for dep_type, value, altdeps in module.systemdependencies:
                            sys.stdout.write('{0}:{1}'.format(dep_type, value))
                            for dep_type, value, empty in altdeps:
                                sys.stdout.write(',{0}:{1}'.format(dep_type, value))
                            sys.stdout.write('\n')

            return

        module_state = module_set.get_module_state(module_list)

        have_new_enough = False
        have_too_old = False

        if options.dump:
            for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
                if new_enough:
                    continue

                if installed_version is not None and systemmodule:
                    # it's already installed but it's too old and we
                    # don't know how to build a new one for ourselves
                    have_too_old = True

                # request installation in two cases:
                #   1) we don't know how to build it
                #   2) we don't want to build it ourselves
                #
                # partial_build is on by default so this check will only
                # fail if someone explicitly turned it off
                if systemmodule or config.partial_build:
                    assert (module.pkg_config or module.systemdependencies)

                    if module.pkg_config is not None:
                        print 'pkgconfig:{0}'.format(module.pkg_config[:-3]) # remove .pc

                    if module.systemdependencies is not None:
                        for dep_type, value, altdeps in module.systemdependencies:
                            sys.stdout.write('{0}:{1}'.format(dep_type, value))
                            for dep_type, value, empty in altdeps:
                                sys.stdout.write(',{0}:{1}'.format(dep_type, value))
                            sys.stdout.write('\n')

            if have_too_old:
                return 1

            return

        print _('System installed packages which are new enough:')
        for module,(req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
            if (installed_version is not None) and new_enough and (config.partial_build or systemmodule):
                have_new_enough = True
                print ('    %s %s' % (module.name,
                                      fmt_details(module.pkg_config,
                                                  req_version,
                                                  installed_version)))
        if not have_new_enough:
            print _('  (none)')

        print _('Required packages:')
        print _('  System installed packages which are too old:')
        for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
            if (installed_version is not None) and (not new_enough) and systemmodule:
                have_too_old = True
                print ('    %s %s' % (module.name,
                                      fmt_details(module.pkg_config,
                                                  req_version,
                                                  installed_version)))
        if not have_too_old:
            print _('    (none)')

        print _('  No matching system package installed:')
        uninstalled = []
        for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
            if installed_version is None and (not new_enough) and systemmodule:
                print ('    %s %s' % (module.name,
                                      fmt_details(module.pkg_config,
                                                  req_version,
                                                  installed_version)))
                if module.pkg_config is not None:
                    uninstalled.append((module.name, 'pkgconfig', module.pkg_config[:-3])) # remove .pc
                elif module.systemdependencies is not None:
                    for dep_type, value, altdeps in module.systemdependencies:
                        uninstalled.append((module.name, dep_type, value))
        if len(uninstalled) == 0:
            print _('    (none)')

        have_too_old = False

        if config.partial_build:
            print _('Optional packages: (JHBuild will build the missing packages)')
            print _('  System installed packages which are too old:')
            for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
                if (installed_version is not None) and (not new_enough) and (not systemmodule):
                    have_too_old = True
                    print ('    %s %s' % (module.name,
                                          fmt_details(module.pkg_config,
                                                      req_version,
                                                      installed_version)))
            if not have_too_old:
                print _('    (none)')

            print _('  No matching system package installed:')
            for module,(req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
                if installed_version is None and (not new_enough) and (not systemmodule):
                    print ('    %s %s' % (module.name,
                                          fmt_details(module.pkg_config,
                                                      req_version,
                                                      installed_version)))
                    if module.pkg_config is not None:
                        uninstalled.append((module.name, 'pkgconfig', module.pkg_config[:-3])) # remove .pc

            if len(uninstalled) == 0:
                print _('    (none)')

        if options.install:
            installer = SystemInstall.find_best()
            if installer is None:
                # FIXME: This should be implemented per Colin's design:
                # https://bugzilla.gnome.org/show_bug.cgi?id=682104#c3
                if cmds.has_command('apt-get'):
                    raise FatalError(_("%(cmd)s is required to install "
                                       "packages on this system. Please "
                                       "install %(cmd)s.")
                                     % {'cmd' : 'apt-file'})

                raise FatalError(_("Don't know how to install packages on this system"))

            if len(uninstalled) == 0:
                logging.info(_("No uninstalled system dependencies to install for modules: %r") % (modules, ))
            else:
                logging.info(_("Installing dependencies on system: %s") % \
                               ' '.join(pkg[0] for pkg in uninstalled))
                installer.install(uninstalled)

Example 43

Project: golismero
Source File: sqlharvest.py
View license
def main():
    tables = dict()
    cookies = cookielib.CookieJar()
    cookie_processor = urllib2.HTTPCookieProcessor(cookies)
    opener = urllib2.build_opener(cookie_processor)
    opener.addheaders = [("User-Agent", USER_AGENT)]

    conn = opener.open(SEARCH_URL)
    page = conn.read()  # set initial cookie values

    config = ConfigParser.ConfigParser()
    config.read(CONFIG_FILE)

    if not config.has_section("options"):
        config.add_section("options")
    if not config.has_option("options", "index"):
        config.set("options", "index", "0")

    i = int(config.get("options", "index"))

    try:
        with open(TABLES_FILE, 'r') as f:
            for line in f.xreadlines():
                if len(line) > 0 and ',' in line:
                    temp = line.split(',')
                    tables[temp[0]] = int(temp[1])
    except:
        pass

    socket.setdefaulttimeout(TIMEOUT)

    files, old_files = None, None
    try:
        while True:
            abort = False
            old_files = files
            files = []

            try:
                conn = opener.open("%s&q=%s&start=%d&sa=N" % (SEARCH_URL, QUERY.replace(' ', '+'), i * 10))
                page = conn.read()
                for match in re.finditer(REGEX_URLS, page):
                    files.append(urllib.unquote(match.group(1)))
                    if len(files) >= 10:
                        break
                abort = (files == old_files)

            except KeyboardInterrupt:
                raise

            except Exception, msg:
                print msg

            if abort:
                break

            sys.stdout.write("\n---------------\n")
            sys.stdout.write("Result page #%d\n" % (i + 1))
            sys.stdout.write("---------------\n")

            for sqlfile in files:
                print sqlfile

                try:
                    req = urllib2.Request(sqlfile)
                    response = urllib2.urlopen(req)

                    if "Content-Length" in response.headers:
                        if int(response.headers.get("Content-Length")) > MAX_FILE_SIZE:
                            continue

                    page = response.read()
                    found = False
                    counter = 0

                    for match in re.finditer(REGEX_RESULT, page):
                        counter += 1
                        table = match.group("result").strip().strip("`\"'").replace('"."', ".").replace("].[", ".").strip('[]')

                        if table and not any(_ in table for _ in ('>', '<', '--', ' ')):
                            found = True
                            sys.stdout.write('*')

                            if table in tables:
                                tables[table] += 1
                            else:
                                tables[table] = 1
                    if found:
                        sys.stdout.write("\n")

                except KeyboardInterrupt:
                    raise

                except Exception, msg:
                    print msg

            else:
                i += 1

    except KeyboardInterrupt:
        pass

    finally:
        with open(TABLES_FILE, 'w+') as f:
            tables = sorted(tables.items(), key=itemgetter(1), reverse=True)
            for table, count in tables:
                f.write("%s,%d\n" % (table, count))

        config.set("options", "index", str(i + 1))
        with open(CONFIG_FILE, 'w+') as f:
            config.write(f)

Example 44

Project: gratipay.com
Source File: fake_data.py
View license
def populate_db(db, num_participants=100, ntips=200, num_teams=5, num_transfers=5000):
    """Populate DB with fake data.
    """
    print("Making Participants")
    make_flag_tester = num_participants > 1

    participants = []
    for i in xrange(num_participants - 1 if make_flag_tester else num_participants):
        participants.append(fake_participant(db))

    if make_flag_tester:
        # make a participant for testing weird flags
        flag_tester = fake_participant(db, random_identities=False)
        participants.append(flag_tester)

        nepal = db.one("SELECT id FROM countries WHERE code='NP'")
        flag_tester.store_identity_info(nepal, 'nothing-enforced', {})
        flag_tester.set_identity_verification(nepal, True)

        vatican = db.one("SELECT id FROM countries WHERE code='VA'")
        flag_tester.store_identity_info(vatican, 'nothing-enforced', {})
        flag_tester.set_identity_verification(vatican, True)

    print("Making Teams")
    teams = []
    teamowners = random.sample(participants, num_teams)
    for teamowner in teamowners:
        teams.append(fake_team(db, teamowner))
        
    # Creating a fake Gratipay Team 
    teamowner = random.choice(participants) 
    teams.append(fake_team(db, teamowner, "Gratipay"))

    print("Making Payment Instructions")
    npayment_instructions = 0
    payment_instructions = []
    for participant in participants:
        for team in teams:
            #eliminate self-payment
            if participant.username != team.owner:
                npayment_instructions += 1
                if npayment_instructions > ntips:
                    break
                payment_instructions.append(fake_payment_instruction(db, participant, team))
        if npayment_instructions > ntips:
            break

    print("Making Elsewheres")
    for p in participants:
        #All participants get between 1 and 3 elsewheres
        num_elsewheres = random.randint(1, 3)
        for platform_name in random.sample(PLATFORMS, num_elsewheres):
            fake_elsewhere(db, p, platform_name)


    print("Making Tips")
    tips = []
    for i in xrange(ntips):
        tipper, tippee = random.sample(participants, 2)
        tips.append(fake_tip(db, tipper, tippee))

    # Payments
    payments = []
    paymentcount = 0
    team_amounts = defaultdict(int)
    for payment_instruction in payment_instructions:
        participant = Participant.from_id(payment_instruction['participant_id'])
        team = Team.from_id(payment_instruction['team_id'])
        amount = payment_instruction['amount']
        assert participant.username != team.owner
        paymentcount += 1
        sys.stdout.write("\rMaking Payments (%i)" % (paymentcount))
        sys.stdout.flush()
        payments.append(fake_payment(db, participant.username, team.slug, amount, 'to-team'))
        team_amounts[team.slug] += amount
    for team in teams:
        paymentcount += 1
        sys.stdout.write("\rMaking Payments (%i)" % (paymentcount))
        sys.stdout.flush()
        payments.append(fake_payment(db, team.owner, team.slug, team_amounts[team.slug], 'to-participant'))
    print("")

    # Transfers
    transfers = []
    for i in xrange(num_transfers):
        sys.stdout.write("\rMaking Transfers (%i/%i)" % (i+1, num_transfers))
        sys.stdout.flush()
        tipper, tippee = random.sample(participants, 2)
        transfers.append(fake_transfer(db, tipper, tippee))
    print("")

    # Paydays
    # First determine the boundaries - min and max date
    min_date = min(min(x['ctime'] for x in payment_instructions + tips),
                   min(x['timestamp'] for x in payments + transfers))
    max_date = max(max(x['ctime'] for x in payment_instructions + tips),
                   max(x['timestamp'] for x in payments + transfers))
    # iterate through min_date, max_date one week at a time
    payday_counter = 1
    date = min_date
    paydays_total = (max_date - min_date).days/7 + 1
    while date < max_date:
        sys.stdout.write("\rMaking Paydays (%i/%i)" % (payday_counter, paydays_total))
        sys.stdout.flush()
        payday_counter += 1
        end_date = date + datetime.timedelta(days=7)
        week_tips = filter(lambda x: date <= x['ctime'] < end_date, tips)
        week_transfers = filter(lambda x: date <= x['timestamp'] < end_date, transfers)
        week_payment_instructions = filter(lambda x: date <= x['ctime'] < end_date, payment_instructions)
        week_payments = filter(lambda x: date <= x['timestamp'] < end_date, payments)
        week_payments_to_teams = filter(lambda x: x['direction'] == 'to-team', week_payments)
        week_payments_to_owners = filter(lambda x: x['direction'] == 'to-participant', week_payments)
        for p in participants:
            transfers_in = filter(lambda x: x['tippee'] == p.username, week_transfers)
            payments_in = filter(lambda x: x['participant'] == p.username, week_payments_to_owners)
            transfers_out = filter(lambda x: x['tipper'] == p.username, week_transfers)
            payments_out = filter(lambda x: x['participant'] == p.username, week_payments_to_teams)
            amount_in = sum([t['amount'] for t in transfers_in + payments_in])
            amount_out = sum([t['amount'] for t in transfers_out + payments_out])
            amount = amount_out - amount_in
            fee = amount * D('0.02')
            fee = abs(fee.quantize(D('.01')))
            if amount != 0:
                fee = amount * D('0.02')
                fee = abs(fee.quantize(D('.01')))
                fake_exchange(
                    db=db,
                    participant=p,
                    amount=amount,
                    fee=fee,
                    timestamp=(end_date - datetime.timedelta(seconds=1))
                )
        actives=set()
        tippers=set()
        #week_tips, week_transfers
        for xfers in week_tips, week_transfers:
            actives.update(x['tipper'] for x in xfers)
            actives.update(x['tippee'] for x in xfers)
            tippers.update(x['tipper'] for x in xfers)

        # week_payment_instructions
        actives.update(x['participant_id'] for x in week_payment_instructions)
        tippers.update(x['participant_id'] for x in week_payment_instructions)

        # week_payments
        actives.update(x['participant'] for x in week_payments)
        tippers.update(x['participant'] for x in week_payments_to_owners)

        payday = {
            'ts_start': date,
            'ts_end': end_date,
            'nusers': len(actives),
            'volume': sum(x['amount'] for x in week_transfers)
        }
        insert_fake_data(db, "paydays", **payday)
        date = end_date
    print("")

Example 45

View license
    def configure_step(self):
        """Custom configuration procedure for Quantum ESPRESSO."""

        if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']:
            self.cfg.update('configopts', '--enable-openmp')

        if not self.toolchain.options.get('usempi', None):
            self.cfg.update('configopts', '--disable-parallel')

        if not self.cfg['with_scalapack']:
            self.cfg.update('configopts', '--without-scalapack')

        repls = []

        if self.toolchain.comp_family() in [toolchain.INTELCOMP]:
            # set preprocessor command (-E to stop after preprocessing, -C to preserve comments)
            cpp = "%s -E -C" % os.getenv('CC')
            repls.append(('CPP', cpp, False))
            env.setvar('CPP', cpp)

            # also define $FCCPP, but do *not* include -C (comments should not be preserved when preprocessing Fortran)
            env.setvar('FCCPP', "%s -E" % os.getenv('CC'))

        super(EB_QuantumESPRESSO, self).configure_step()

        # compose list of DFLAGS (flag, value, keep_stuff)
        # for guidelines, see include/defs.h.README in sources
        dflags = []

        comp_fam_dflags = {
            toolchain.INTELCOMP: '-D__INTEL',
            toolchain.GCC: '-D__GFORTRAN -D__STD_F95',
        }
        dflags.append(comp_fam_dflags[self.toolchain.comp_family()])

        if self.toolchain.options.get('openmp', False):
            libfft = os.getenv('LIBFFT_MT')
        else:
            libfft = os.getenv('LIBFFT')
        if libfft:
            if "fftw3" in libfft:
                dflags.append('-D__FFTW3')
            else:
                dflags.append('-D__FFTW')
            env.setvar('FFTW_LIBS', libfft)

        if get_software_root('ACML'):
            dflags.append('-D__ACML')

        if self.toolchain.options.get('usempi', None):
            dflags.append('-D__MPI -D__PARA')

        if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']:
            dflags.append(" -D__OPENMP")

        if self.cfg['with_scalapack']:
            dflags.append(" -D__SCALAPACK")

        # always include -w to supress warnings
        dflags.append('-w')

        repls.append(('DFLAGS', ' '.join(dflags), False))

        # complete C/Fortran compiler and LD flags
        if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']:
            repls.append(('LDFLAGS', self.toolchain.get_flag('openmp'), True))
            repls.append(('(?:C|F90|F)FLAGS', self.toolchain.get_flag('openmp'), True))

        # obtain library settings
        libs = []
        for lib in ['BLAS', 'LAPACK', 'FFT', 'SCALAPACK']:
            if self.toolchain.options.get('openmp', False):
                val = os.getenv('LIB%s_MT' % lib)
            else:
                val = os.getenv('LIB%s' % lib)
            repls.append(('%s_LIBS' % lib, val, False))
            libs.append(val)
        libs = ' '.join(libs)

        repls.append(('BLAS_LIBS_SWITCH', 'external', False))
        repls.append(('LAPACK_LIBS_SWITCH', 'external', False))
        repls.append(('LD_LIBS', os.getenv('LIBS'), False))

        self.log.debug("List of replacements to perform: %s" % repls)

        # patch make.sys file
        fn = os.path.join(self.cfg['start_dir'], 'make.sys')
        try:
            for line in fileinput.input(fn, inplace=1, backup='.orig.eb'):
                for (k, v, keep) in repls:
                    # need to use [ \t]* instead of \s*, because vars may be undefined as empty,
                    # and we don't want to include newlines
                    if keep:
                        line = re.sub(r"^(%s\s*=[ \t]*)(.*)$" % k, r"\1\2 %s" % v, line)
                    else:
                        line = re.sub(r"^(%s\s*=[ \t]*).*$" % k, r"\1%s" % v, line)

                # fix preprocessing directives for .f90 files in make.sys if required
                if self.toolchain.comp_family() in [toolchain.GCC]:
                    line = re.sub(r"\$\(MPIF90\) \$\(F90FLAGS\) -c \$<",
                                  "$(CPP) -C $(CPPFLAGS) $< -o $*.F90\n" +
                                  "\t$(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o",
                                  line)

                sys.stdout.write(line)
        except IOError, err:
            raise EasyBuildError("Failed to patch %s: %s", fn, err)

        self.log.debug("Contents of patched %s: %s" % (fn, open(fn, "r").read()))

        # patch default make.sys for wannier
        if LooseVersion(self.version) >= LooseVersion("5"):
            fn = os.path.join(self.cfg['start_dir'], 'install', 'make_wannier90.sys')
        else:
            fn = os.path.join(self.cfg['start_dir'], 'plugins', 'install', 'make_wannier90.sys')
        try:
            for line in fileinput.input(fn, inplace=1, backup='.orig.eb'):
                line = re.sub(r"^(LIBS\s*=\s*).*", r"\1%s" % libs, line)

                sys.stdout.write(line)

        except IOError, err:
            raise EasyBuildError("Failed to patch %s: %s", fn, err)

        self.log.debug("Contents of patched %s: %s" % (fn, open(fn, "r").read()))

        # patch Makefile of want plugin
        wantprefix = 'want-'
        wantdirs = [d for d in os.listdir(self.builddir) if d.startswith(wantprefix)]

        if len(wantdirs) > 1:
            raise EasyBuildError("Found more than one directory with %s prefix, help!", wantprefix)

        if len(wantdirs) != 0:
            wantdir = os.path.join(self.builddir, wantdirs[0])
            make_sys_in_path = None
            cand_paths = [os.path.join('conf', 'make.sys.in'), os.path.join('config', 'make.sys.in')]
            for path in cand_paths:
                full_path = os.path.join(wantdir, path)
                if os.path.exists(full_path):
                    make_sys_in_path = full_path
                    break
            if make_sys_in_path is None:
                raise EasyBuildError("Failed to find make.sys.in in want directory %s, paths considered: %s",
                                     wantdir, ', '.join(cand_paths))

            try:
                for line in fileinput.input(make_sys_in_path, inplace=1, backup='.orig.eb'):
                    # fix preprocessing directives for .f90 files in make.sys if required
                    if self.toolchain.comp_family() in [toolchain.GCC]:
                        line = re.sub("@[email protected]",
                                      "$(CPP) -C $(CPPFLAGS) $< -o $*.F90\n" +
                                      "\t$(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o",
                                      line)

                    sys.stdout.write(line)
            except IOError, err:
                raise EasyBuildError("Failed to patch %s: %s", fn, err)

        # move non-espresso directories to where they're expected and create symlinks
        try:
            dirnames = [d for d in os.listdir(self.builddir) if not d.startswith('espresso')]
            targetdir = os.path.join(self.builddir, "espresso-%s" % self.version)
            for dirname in dirnames:
                shutil.move(os.path.join(self.builddir, dirname), os.path.join(targetdir, dirname))
                self.log.info("Moved %s into %s" % (dirname, targetdir))

                dirname_head = dirname.split('-')[0]
                linkname = None
                if dirname_head == 'sax':
                    linkname = 'SaX'
                if dirname_head == 'wannier90':
                    linkname = 'W90'
                elif dirname_head in ['gipaw', 'plumed', 'want', 'yambo']:
                    linkname = dirname_head.upper()
                if linkname:
                    os.symlink(os.path.join(targetdir, dirname), os.path.join(targetdir, linkname))

        except OSError, err:
            raise EasyBuildError("Failed to move non-espresso directories: %s", err)

Example 46

Project: liffy
Source File: liffy.py
View license
def main():
    # Terminal Colors
    t = Terminal()

    def banner():
        print(t.cyan("""

    .____    .__  _____  _____
    |    |   |__|/ ____\/ ____\__.__.
    |    |   |  \   __\   __<   |  |
    |    |___|  ||  |   |  |  \___  |
    |_______ \__||__|   |__|  / ____| v1.2
        \/                \/

"""))

    def progressbar():

        bar_width = 70
        sys.stdout.write(t.cyan("[{0}]  ".format(datetime.datetime.now())) + " " * bar_width)
        sys.stdout.flush()
        sys.stdout.write("\b" * (bar_width + 1))

        for w in xrange(bar_width):
            time.sleep(0.01)
            sys.stdout.write(".")
            sys.stdout.flush()

        sys.stdout.write("\n")

    #---------------------------------------------------------------------------------------------------

    banner()

    if not len(sys.argv):
        print(t.red("[{0}] ".format(datetime.datetime.now())) + "Not Enough Arguments!")
        print(t.red("[{0}] ".format(datetime.datetime.now())) + "Example: ./liffy.py --url \
        http://target/files.php?file= --data\n")
        sys.exit(0)

    #---------------------------------------------------------------------------------------------------

    """ Command Line Arguments """

    parser = argparse.ArgumentParser()
    parser.add_argument("--url", help="target url")
    parser.add_argument("--data", help="data technique", action="store_true")
    parser.add_argument("--input", help="input technique", action="store_true")
    parser.add_argument("--expect", help="expect technique", action="store_true")
    parser.add_argument("--environ", help="/proc/self/environ technique", action="store_true")
    parser.add_argument("--access", help="access logs technique", action="store_true")
    parser.add_argument("--ssh", help="auth logs technique", action="store_true")
    parser.add_argument("--filter", help="filter technique", action="store_true")
    parser.add_argument("--location", help="path to target file (access log, auth log, etc.)")
    parser.add_argument("--nostager", help="execute payload directly, do not use stager", action="store_true")
    parser.add_argument("--relative", help="use path traversal sequences for attack", action="store_true")
    parser.add_argument("--cookies", help="session cookies")
    args = parser.parse_args()

    #---------------------------------------------------------------------------------------------------

    """ Assign argument values """

    url = args.url
    nostager = args.nostager
    relative = args.relative
    c = args.cookies

    #---------------------------------------------------------------------------------------------------

    """ Check to make sure target is actually up """

    print(t.cyan("[{0}] ".format(datetime.datetime.now())) + "Checking Target: {0}".format(url))
    parsed = urlparse.urlsplit(url)
    domain = parsed.scheme + "://" + parsed.netloc
    progressbar()

    try:
        r = requests.get(domain)
        if r.status_code != 200:
            print(t.red("[{0}] ".format(datetime.datetime.now())) + "Did Not Receive Correct Response From Target URL!")
        else:
            print(t.red("[{0}] ".format(datetime.datetime.now())) + "Target URL Looks Good!")
            if args.data:
                print(t.red("[{0}] ".format(datetime.datetime.now())) + "Data Technique Selected!")
                d = core.Data(url, nostager, c)
                d.execute_data()
            elif args.input:
                print(t.red("[{0}] ".format(datetime.datetime.now())) + "Input Technique Selected!")
                i = core.Input(url, nostager, c)
                i.execute_input()
            elif args.expect:
                print(t.red("[{0}] ".format(datetime.datetime.now())) + "Expect Technique Selected!")
                e = core.Expect(url, nostager, c)
                e.execute_expect()
            elif args.environ:
                print(t.red("[{0}] ".format(datetime.datetime.now())) + "/proc/self/environ Technique Selected!")
                i = core.Environ(url, nostager, relative, c)
                i.execute_environ()
            elif args.access:
                if not args.location:
                    print(t.red("[{0}] ".format(datetime.datetime.now())) + "Log Location Not Provided! Using Default")
                    l = '/var/log/apache2/access.log'
                else:
                    l = args.location
                a = core.Logs(url, l, nostager, relative, c)
                a.execute_logs()
            elif args.ssh:
                if not args.location:
                    print(t.red("[{0}] ".format(datetime.datetime.now())) + "Log Location Not Provided! Using Default")
                    l = '/var/log/auth.log'
                else:
                    l = args.location
                a = core.SSHLogs(url, l, relative, c)
                a.execute_ssh()
            elif args.filter:
                print(t.red("[{0}] ".format(datetime.datetime.now())) + "Filter Technique Selected!")
                f = core.Filter(url, c)
                f.execute_filter()
            else:
                print(t.red("[{0}] ".format(datetime.datetime.now())) + "Technique Not Selected!")
                sys.exit(0)
    except requests.HTTPError as e:
        print(t.red("[{0}] HTTP Error!".format(datetime.datetime.now())) + str(e))

Example 47

Project: weeman
Source File: shell.py
View license
def shell():
    """
        The shell, parse command line args,
        and set variables.
    """
    global url
    global port
    global action_url
    global user_agent
    global html_file
    global external_js

    print_startup()

    if os.path.exists("history.log"):
        if  os.stat("history.log").st_size == 0:
            history = open("history.log", "w")
        else:
            history = open("history.log", "a")
    else:
        history = open("history.log", "w")

    while True:
        try:
            # for Re-complete
            complete(array)
            an = raw_input("\033[01;37m>>> \033[00m") or "help"
            prompt = an.split()
            if not prompt:
                print("Error: What? try help.")
            elif prompt[0] == ";" or prompt[0] == "clear":
                print("\033[H\033[J")
            elif prompt[0] == "q" or prompt[0] == "quit":
                printt(2,"bye bye!")
                break;
            elif prompt[0] == "help" or prompt[0] == "?":
                if prompt[1]:
                    print_help_option(str(prompt[1]))
                else:
                    print_help()
            elif prompt[0] == "show":
                sys.stdout.write("\033[01;37m\t")
                print("-" * 20)
                print("\turl          : %s " %url)
                print("\tport         : %d " %(port))
                print("\taction_url   : %s " %(action_url))
                print("\tuser_agent   : %s " %(user_agent))
                print("\thtml_file    : %s " %(html_file))
                print("\texternal_js  : %s " %(external_js))
                sys.stdout.write("\t")
                print("-" * 20)
                sys.stdout.write("\033[01;00m")
            elif prompt[0] == "set":
                if prompt[1] == "port":
                    port = int(prompt[2])
                    ## Check if port == 80 and not running as root
                    if port == 80 and os.getuid() != 0:
                        printt(2, "Permission denied, to bind port 80, you need to run weeman as root.");
                    history.write("port = %s\n" %port)
                if prompt[1] == "url":
                    url = str(prompt[2])
                    history.write("url = %s\n" %url)
                if prompt[1] == "action_url":
                    action_url = str(prompt[2])
                    history.write("action_url = %s\n" %action_url)
                if prompt[1] == "user_agent":
                    prompt.pop(0)
                    u = str()
                    for x in prompt:
                        u+=" "+x
                    user_agent = str(u.replace("user_agent", ""))
                    history.write("user_agent = %s\n" %user_agent)
                if prompt[1] == "html_file":
                    html_file = str(prompt[2])
                if prompt[1] == "external_js":
                    external_js = str(prompt[2])
                    history.write("external_js = %s\n" %external_js)
            elif prompt[0] == "run" or prompt[0] == "r":
                if not url:
                    printt(3, "Error: \'url\' can't be \'None\', please use \'set\'.")
                elif not action_url:
                    printt(3, "Error: \'action_url\' can't be \'None\', please use \'set\'.")
                else:
                    # Here we start the server (:
                    s = weeman(url,port)
                    s.clone()
                    s.serve()
            elif prompt[0] == "banner" or prompt[0] == "b":
                print_startup()
            elif prompt[0] == "framework":
                fw = framework()
                fw.shell()
            else:
                print("Error: \'%s\' What? try help." %prompt[0])

        except KeyboardInterrupt:
            s = weeman(url,port)
            s.cleanup()
            print("\nInterrupt ...")
        except IndexError:
            if prompt[0] == "help" or prompt[0] == "?":
                print_help()
            else:
                printt(3, "Error: please provide option for \'%s\'." %prompt[0])
        except Exception as e:
            printt(3, "Error: (%s)" %(str(e)))

Example 48

Project: pius
Source File: signer.py
View license
  def sign_all_uids(self, key, level):
    '''The main function that signs all the UIDs on a given key.'''
    signed_any_uids = False
    uids = self.get_uids(key)
    print '  There %s %s UID%s on this key to sign' % (
        ['is', 'are'][len(uids) != 1], len(uids), "s"[len(uids) == 1:]
    )

    # From the user key ring make a clean copy
    self.export_clean_key(key)
    for uid in uids:
      if uid['status'] == 'r':
        print '  Skipping revoked uid %s' % uid['index']
        continue
      elif uid['status'] == 'e':
        print '  Skipping expired uid %s' % uid['index']
        continue
      sys.stdout.write('  UID %s (%s): ' % (uid['index'], uid['id']))

      # Make sure we have a clean keyring, and then import the key we care
      # about
      self.clean_working_keyring()
      self.import_clean_key(key)

      # Sign the key...
      if self.mode in (MODE_CACHE_PASSPHRASE, MODE_AGENT):
        try:
          res = self.sign_uid(key, uid['index'], level)
        except AgentError:
          print '\ngpg-agent problems, bailing out!'
          sys.exit(1)
        except PassphraseError:
          print ('\nThe passphrase that worked a moment ago now doesn\'t work.'
                 ' I\'m bailing out!')
          sys.exit(1)
        except NoSelfKeyError:
          print '\nWe don\'t have our own key, according to GnuPG.'
          # No need to say anything else
          sys.exit(1)
      else:
        res = self.sign_uid_expect(key, uid['index'], level)
      if not res:
        uid['result'] = False
        continue
      sys.stdout.write('signed')
      uid['result'] = True
      signed_any_uids = True

      # Export the signed key...
      self.export_signed_uid(key, uid['file'])

      # If requested, encrypt the signed key...
      if self.encrypt_outfiles:
        try:
          uid['enc_file'] = self._outfile_path(
              self.encrypt_signed_uid(key, uid['file'])
          )
          sys.stdout.write(', encrypted')
        except EncryptionKeyError:
          print ('\nEncryption failed due to invalid key error. User may not'
                 ' have an encryption subkey or it may be expired.')
          uid['enc_file'] = None
          # If we can't encrypt, we don't want to mail - even if we're using
          # PGP/Mime the encryption for that will also fail. So we move on to
          # the next key
          continue

      # If requested, send keys out. Note this doesn't depend on
      # encrypt_outfiles, because if we use PGP/Mime, the default, the email
      # itself is encrypted
      if self.mail:
        try:
          if uid['email'] == None:
            print '  WARNING: No email for %s, cannot send key.' % uid['id']
            continue
          # this is a ugly. The mailer needs to be able to be able to call
          # encrypt_and_sign_file() to be able to generate the PGP/MIME file,
          # so we pass outselves, it can call it...
          self.mailer.send_sig_mail(self.signer, key, uid, self)
          sys.stdout.write(', mailed')
        except MailSendError, msg:
          print ('\nThere was a problem talking to the mail server (%s): %s'
                 % (self.mail_host, msg))

      # add a newline to all the sys.stdout.write()s
      print ''

      # remove the signed file, if it exists (it might not, if it's
      # expired, the user chose not to sign it, etc.)
      # But don't do this if the ONLY action we're performing is creating those
      # files - then the desired result is these files.
      if self.encrypt_outfiles or self.mail:
        if os.path.exists(uid['file']):
          os.unlink(uid['file'])

    if self.verbose:
      self.print_filenames(uids)

    # Remove the clean keyfile we temporarily created
    self.clean_clean_key(key)
    return signed_any_uids

Example 49

Project: DragonPy
Source File: pager.py
View license
def _manual_test_console():
    print(("\nconsole size: width %s, height %s" % (getwidth(), getheight())))
    echo("--<enter>--")
    getch()
    echo("\n")

    print("\nsys.stdout.write() doesn't insert newlines automatically,")
    print("that's why it is used for console output in non-trivial")
    print("cases here.\n")
    sys.stdout.write("--<enter>--")
    sys.stdout.flush()
    getch()
    print("\rHowever, sys.stdout.write() requires explicit flushing")
    print("to make the output immediately appear on the screen.")
    print("echo() function from this module does this automatically.")
    echo("\n--<enter>--")
    getch()

    print("\n\nThe following test outputs string equal to the width of the\n"
          "screen and waits for you to press <enter>. It behaves\n"
          "differently on Linux and Windows - W. scrolls the window and\n"
          "places cursor on the next line immediately, while L. window\n"
          "doesn't scroll until the next character is output.\n"
         )
    print("Tested on:")
    print("  Windows Vista - cmd.exe console")
    print("  Debian Lenny - native terminal")
    print("  Debian Lenny - PuTTY SSH terminal from Windows Vista")
    echo("\n--<enter>--")
    getch()
    echo("\n")

    echo("<" + "-"*(getwidth()-2) + ">")
    getch()
    print("^ note there is no newline when the next character is printed")
    print("")
    print("At least this part works similar on all platforms. It is just\n"
          "the state of the console after the last character on the line\n"
          "is printed that is different.")
    print("")
    echo("--<enter>--")
    getch()
    print("")

    print("\nBut there is one special case.")
    print("")
    print("It is when the next character is a newline.")
    print("")
    print("The following test prints line equal to the width of the\n"
          "console, waits for <enter>, then outputs newline '\\n',\n"
          "waits for another key press, then outputs 'x' char.")
    print("")
    echo("--<enter>--")
    getch()
    print("")

    echo("<" + "-"*(getwidth()-2) + ">")
    getch()
    echo("\n")
    getch()
    echo("x")
    getch()

    print("\n^ here is the difference:")
    print("")
    print("On Windows you will get:\n"
          "  <----------->\n"
          "  \n"
          "  x")
    print("")
    print("Linux will show you:\n"
          "  <----------->\n"
          "  x")
    print("")
    echo("--<enter>--")
    getch()
    print("")

    print("\nThe next test will fill the screen with '1' digits\n"
          "numbering each line staring from 1.")
    print("")
    print("It works the same on Linux and Windows, because the next\n"
          "character after the last on the line is not linefeed.\n")
    echo("--<enter>--")
    getch()
    print("")
    numwidth = len(str(getwidth()))
    strlen = getwidth() - numwidth - 2 # 2 = '. ' after the line number
    filler = '1' * strlen
    for i in range(getheight()-1):     # -1 to leave last line for --<enter>--
        lineno = ("%" + str(numwidth) + "s. ") % (i+1)
        sys.stdout.write(lineno + filler)
    echo("--<enter>--")
    getch()
    print("")

    print("\nNext test prints this source code using page() function")
    print("")
    echo("--<enter>--")
    getch()
    print("")
    content = open(__file__)
    page(content)
    echo("--<enter>--")
    getch()
    print("")

Example 50

Project: apogee
Source File: turbospec.py
View license
def turbosynth(*args,**kwargs):
    """
    NAME:
       turbosynth
    PURPOSE:
       Run a Turbospectrum synthesis (direct interface to the Turbospectrum code; use 'synth' for a general routine that generates the non-continuum-normalized spectrum, convolves withe LSF and macrotubulence, and optionally continuum normalizes the output)
    INPUT ARGUMENTS:
       lists with abundances:
          [Atomic number1,diff1]
          [Atomic number2,diff2]
          ...
          [Atomic numberM,diffM]
    SYNTHEIS KEYWORDS:
       isotopes= ('solar') use 'solar' or 'arcturus' isotope ratios; can also be a dictionary with isotope ratios (e.g., isotopes= {'6.012':'0.9375','6.013':'0.0625'})
       wmin, wmax, dw, width= (15000.000, 17000.000, 0.10000000) spectral synthesis limits and step of calculation (see MOOG)
       babsma_wmin, babsma_wmax= (wmin,wmax)) allows opacity limits to be different (broader) than for the synthesis itself
       costheta= (1.) cosine of the viewing angle
    LINELIST KEYWORDS:
          air= (True) if True, perform the synthesis in air wavelengths (affects the default Hlinelist, nothing else; output is in air if air, vacuum otherwise); set to False at your own risk, as Turbospectrum expects the linelist in air wavelengths!)
          Hlinelist= (None) Hydrogen linelists to use; can be set to the path of a linelist file or to the name of an APOGEE linelist; if None, then we first search for the Hlinedata.vac in the APOGEE linelist directory (if air=False) or we use the internal Turbospectrum Hlinelist (if air=True)
       linelist= (None) molecular and atomic linelists to use; can be set to the path of a linelist file or to the name of an APOGEE linelist, or lists of such files; if a single filename is given, the code will first search for files with extensions '.atoms', '.molec' or that start with 'turboatoms.' and 'turbomolec.'
    ATMOSPHERE KEYWORDS:
       modelatm= (None) model-atmosphere instance
       vmicro= (2.) microturbulence (km/s)
       modelopac= (None) 
                  (a) if set to an existing filename: assume babsma_lu has already been run and use this continuous opacity in bsyn_lu
                  (b) if set to a non-existing filename: store the continuous opacity in this file
    MISCELLANEOUS KEYWORDS:
       dr= data release
       saveTurboInput= if set to a string, the input to and output from Turbospectrum will be saved as a tar.gz file with this name; can be a filename in the current directory or a full path
    OUTPUT:
       (wavelengths,cont-norm. spectrum, spectrum (nwave))
    HISTORY:
       2015-04-13 - Written - Bovy (IAS)
    """
    # Get the spectral synthesis limits
    wmin= kwargs.pop('wmin',_WMIN_DEFAULT)
    wmax= kwargs.pop('wmax',_WMAX_DEFAULT)
    dw= kwargs.pop('dw',_DW_DEFAULT)
    babsma_wmin= kwargs.pop('babsma_wmin',wmin)
    babsma_wmax= kwargs.pop('babsma_wmax',wmax)
    if babsma_wmin > wmin or babsma_wmax < wmax:
        raise ValueError("Opacity wavelength range must encompass the synthesis range")
    if int(numpy.ceil((wmax-wmin)/dw > 150000)):
        raise ValueError('Too many wavelengths for Turbospectrum synthesis, reduce the wavelength step dw (to, e.g., 0.016)')
    costheta= kwargs.pop('costheta',1.)
    # Linelists
    Hlinelist= kwargs.pop('Hlinelist',None)
    linelist= kwargs.pop('linelist',None)
    # Parse isotopes
    isotopes= kwargs.pop('isotopes','solar')
    if isinstance(isotopes,str) and isotopes.lower() == 'solar':
        isotopes= {}
    elif isinstance(isotopes,str) and isotopes.lower() == 'arcturus':
        isotopes= {'6.012':'0.9375',
                   '6.013':'0.0625'}
    elif not isinstance(isotopes,dict):
        raise ValueError("'isotopes=' input not understood, should be 'solar', 'arcturus', or a dictionary")
    # We will run in a subdirectory of the current directory
    tmpDir= tempfile.mkdtemp(dir=os.getcwd())
    # Get the model atmosphere
    modelatm= kwargs.pop('modelatm',None)
    if not modelatm is None:
        if isinstance(modelatm,str) and os.path.exists(modelatm):
            raise ValueError('modelatm= input is an existing filename, but you need to give an Atmosphere object instead')
        elif isinstance(modelatm,str):
            raise ValueError('modelatm= input needs to be an Atmosphere instance')
        else:
            # Check temperature
            if modelatm._teff > 7000.:
                warnings.warn('Turbospectrum does not include all necessary physics to model stars hotter than about 7000 K; proceed with caution',RuntimeWarning)
            # Write atmosphere to file
            modelfilename= os.path.join(tmpDir,'atm.mod')
            modelatm.writeto(modelfilename,turbo=True)
    modeldirname= os.path.dirname(modelfilename)
    modelbasename= os.path.basename(modelfilename)
    # Get the name of the linelists
    if Hlinelist is None:
        if kwargs.get('air',True):
            Hlinelist= 'DATA/Hlinedata' # will be symlinked
        else:
            Hlinelist= appath.linelistPath('Hlinedata.vac',
                                           dr=kwargs.get('dr',None))
    if not os.path.exists(Hlinelist) and not Hlinelist == 'DATA/Hlinedata':
        Hlinelist= appath.linelistPath(Hlinelist,
                                       dr=kwargs.get('dr',None))
    if not os.path.exists(Hlinelist) and not kwargs.get('air',True):
        print("Hlinelist in vacuum linelist not found, using Turbospectrum's, which is in air...")
        Hlinelist= 'DATA/Hlinedata' # will be symlinked
    linelistfilenames= [Hlinelist]
    if isinstance(linelist,str):
        if os.path.exists(linelist):
            linelistfilenames.append(linelist)
        else:
            # Try finding the linelist
            atomlinelistfilename= appath.linelistPath(\
                '%s.atoms' % linelist,
                dr=kwargs.get('dr',None))
            moleclinelistfilename= appath.linelistPath(\
                '%s.molec' % linelist,
                dr=kwargs.get('dr',None))
            if os.path.exists(atomlinelistfilename) \
                    and os.path.exists(moleclinelistfilename):
                linelistfilenames.append(atomlinelistfilename)
                linelistfilenames.append(moleclinelistfilename)
            else:
                atomlinelistfilename= appath.linelistPath(\
                    'turboatoms.%s' % linelist,
                    dr=kwargs.get('dr',None))
                moleclinelistfilename= appath.linelistPath(\
                    'turbomolec.%s' % linelist,
                    dr=kwargs.get('dr',None))
                if not os.path.exists(atomlinelistfilename) \
                        and '201404080919' in atomlinelistfilename \
                        and kwargs.get('air',True):
                    download.linelist(os.path.basename(atomlinelistfilename),
                                      dr=kwargs.get('dr',None))
                if not os.path.exists(moleclinelistfilename) \
                        and '201404080919' in moleclinelistfilename \
                        and kwargs.get('air',True):
                    download.linelist(os.path.basename(moleclinelistfilename),
                                      dr=kwargs.get('dr',None))
                if os.path.exists(atomlinelistfilename) \
                        and os.path.exists(moleclinelistfilename):
                    linelistfilenames.append(atomlinelistfilename)
                    linelistfilenames.append(moleclinelistfilename)
    if linelist is None or len(linelistfilenames) == 1:
        os.remove(modelfilename)
        os.rmdir(tmpDir)
        raise ValueError('linelist= must be set (see documentation) and given linelist must exist (either as absolute path or in the linelist directory)')
    # Link the Turbospectrum DATA directory
    os.symlink(os.getenv('TURBODATA'),os.path.join(tmpDir,'DATA'))
    # Cut the linelist to the desired wavelength range, if necessary,
    # Skipped because it is unnecessary, but left in case we still want to 
    # use it
    rmLinelists= False
    for ll, linelistfilename in enumerate(linelistfilenames[1:]):
        if not _CUTLINELIST: continue #SKIP
        if wmin == _WMIN_DEFAULT and wmax == _WMAX_DEFAULT: continue
        rmLinelists= True
        with open(os.path.join(tmpDir,'cutlines.awk'),'w') as awkfile:
            awkfile.write('($1>%.3f && $1<%.3f) || ( substr($1,1,1) == "' 
                          %(wmin-7.,wmax+7.) +"'"+'")\n')
        keeplines= open(os.path.join(tmpDir,'lines.tmp'),'w')
        stderr= open('/dev/null','w')
        try:
            subprocess.check_call(['awk','-f','cutlines.awk',
                                   linelistfilename],
                                  cwd=tmpDir,stdout=keeplines,stderr=stderr)
            keeplines.close()
        except subprocess.CalledProcessError:
            os.remove(os.path.join(tmpDir,'lines.tmp'))
            os.remove(os.path.join(tmpDir,'DATA'))
            raise RuntimeError("Removing unnecessary linelist entries failed ...")
        finally:
            os.remove(os.path.join(tmpDir,'cutlines.awk'))
            stderr.close()
        # Remove elements that aren't used altogether, adjust nlines
        with open(os.path.join(tmpDir,'lines.tmp'),'r') as infile:
            lines= infile.readlines()
        nl_list= [l[0] == "'" for l in lines]
        nl= numpy.array(nl_list,dtype='int')
        nl_list.append(True)
        nl_list.append(True)
        nlines= [numpy.sum(1-nl[ii:nl_list[ii+2:].index(True)+ii+2]) 
                 for ii in range(len(nl))]
        with open(os.path.join(tmpDir,os.path.basename(linelistfilename)),
                  'w') \
                as outfile:
            for ii, line in enumerate(lines):
                if ii < len(lines)-2:
                    if not lines[ii][0] == "'":
                        outfile.write(lines[ii])
                    elif not (lines[ii+2][0] == "'" and lines[ii+1][0] == "'"):
                        if lines[ii+1][0] == "'":
                            # Adjust nlines                       
                            outfile.write(lines[ii].replace(lines[ii].split()[-1]+'\n',
                                                            '%i\n' % nlines[ii]))
                        else:
                            outfile.write(lines[ii])
                else:
                    if not lines[ii][0] == "'": outfile.write(lines[ii])
        os.remove(os.path.join(tmpDir,'lines.tmp'))
        # cp the linelists to the temporary directory
        shutil.copy(linelistfilename,tmpDir)
        linelistfilenames[ll]= os.path.basename(linelistfilename)
    # Parse the abundances
    if len(args) == 0: #special case that there are *no* differences
        args= ([26,0.],)
    indiv_abu= {}
    for arg in args:
        indiv_abu[arg[0]]= arg[1]+solarabundances._ASPLUND05[arg[0]]\
            +modelatm._metals
        if arg[0] == 6: indiv_abu[arg[0]]+= modelatm._cm
        if arg[0] in [8,10,12,14,16,18,20,22]: indiv_abu[arg[0]]+= modelatm._am
    modelopac= kwargs.get('modelopac',None)
    if modelopac is None or \
            (isinstance(modelopac,str) and not os.path.exists(modelopac)):
        # Now write the script file for babsma_lu
        scriptfilename= os.path.join(tmpDir,'babsma.par')
        modelopacname= os.path.join(tmpDir,'mopac')
        _write_script(scriptfilename,
                      babsma_wmin,babsma_wmax,dw,
                      None,
                      modelfilename,
                      None,
                      modelopacname,
                      modelatm._metals,
                      modelatm._am,
                      indiv_abu,
                      kwargs.get('vmicro',2.),
                      None,None,None,bsyn=False)
        # Run babsma
        sys.stdout.write('\r'+"Running Turbospectrum babsma_lu ...\r")
        sys.stdout.flush()
        if kwargs.get('verbose',False):
            stdout= None
            stderr= None
        else:
            stdout= open('/dev/null', 'w')
            stderr= subprocess.STDOUT
        try:
            p= subprocess.Popen(['babsma_lu'],
                                cwd=tmpDir,
                                stdin=subprocess.PIPE,
                                stdout=stdout,
                                stderr=stderr)
            with open(os.path.join(tmpDir,'babsma.par'),'r') as parfile:
                for line in parfile:
                    p.stdin.write(line.encode('utf-8'))
            stdout, stderr= p.communicate()
        except subprocess.CalledProcessError:
            for linelistfilename in linelistfilenames:
                os.remove(linelistfilename,tmpDir)
            if os.path.exists(os.path.join(tmpDir,'DATA')):
                os.remove(os.path.join(tmpDir,'DATA'))
            raise RuntimeError("Running babsma_lu failed ...")
        finally:
            if os.path.exists(os.path.join(tmpDir,'babsma.par')) \
                    and not 'saveTurboInput' in kwargs:
                os.remove(os.path.join(tmpDir,'babsma.par'))
            sys.stdout.write('\r'+download._ERASESTR+'\r')
            sys.stdout.flush()
        if isinstance(modelopac,str):
            shutil.copy(modelopacname,modelopac)
    else:
        shutil.copy(modelopac,tmpDir)
        modelopacname= os.path.join(tmpDir,os.path.basename(modelopac))
    # Now write the script file for bsyn_lu
    scriptfilename= os.path.join(tmpDir,'bsyn.par')
    outfilename= os.path.join(tmpDir,'bsyn.out')
    _write_script(scriptfilename,
                  wmin,wmax,dw,
                  costheta,
                  modelfilename,
                  None,
                  modelopacname,
                  modelatm._metals,
                  modelatm._am,
                  indiv_abu,
                  None,
                  outfilename,
                  isotopes,
                  linelistfilenames,
                  bsyn=True)
    # Run bsyn
    sys.stdout.write('\r'+"Running Turbospectrum bsyn_lu ...\r")
    sys.stdout.flush()
    if kwargs.get('verbose',False):
        stdout= None
        stderr= None
    else:
        stdout= open('/dev/null', 'w')
        stderr= subprocess.STDOUT
    try:
        p= subprocess.Popen(['bsyn_lu'],
                            cwd=tmpDir,
                            stdin=subprocess.PIPE,
                            stdout=stdout,
                            stderr=stderr)
        with open(os.path.join(tmpDir,'bsyn.par'),'r') as parfile:
            for line in parfile:
                p.stdin.write(line.encode('utf-8'))
        stdout, stderr= p.communicate()
    except subprocess.CalledProcessError:
        raise RuntimeError("Running bsyn_lu failed ...")
    finally:
        if 'saveTurboInput' in kwargs:
            turbosavefilename= kwargs['saveTurboInput']
            if os.path.dirname(turbosavefilename) == '':
                turbosavefilename= os.path.join(os.getcwd(),turbosavefilename)
            try:
                subprocess.check_call(['tar','cvzf',turbosavefilename,
                                       os.path.basename(os.path.normpath(tmpDir))])
            except subprocess.CalledProcessError:
                raise RuntimeError("Tar-zipping the Turbospectrum input and output failed; you will have to manually delete the temporary directory ...")
            # Need to remove babsma.par, bc not removed above
            if os.path.exists(os.path.join(tmpDir,'babsma.par')):
                os.remove(os.path.join(tmpDir,'babsma.par'))
        if os.path.exists(os.path.join(tmpDir,'bsyn.par')):
            os.remove(os.path.join(tmpDir,'bsyn.par'))
        if os.path.exists(modelopacname):
            os.remove(modelopacname)
        if os.path.exists(modelopacname+'.mod'):
            os.remove(modelopacname+'.mod')
        if os.path.exists(os.path.join(tmpDir,'DATA')):
            os.remove(os.path.join(tmpDir,'DATA'))
        if os.path.exists(os.path.join(tmpDir,'dummy-output.dat')):
            os.remove(os.path.join(tmpDir,'dummy-output.dat'))
        if os.path.exists(modelfilename):
            os.remove(modelfilename)
        if rmLinelists:
            for linelistfilename in linelistfilenames[1:]:
                os.remove(linelistfilename)
        sys.stdout.write('\r'+download._ERASESTR+'\r')
        sys.stdout.flush()
    # Now read the output
    turboOut= numpy.loadtxt(outfilename)
    # Clean up
    os.remove(outfilename)
    os.rmdir(tmpDir)
    # Return wav, cont-norm, full spectrum
    return (turboOut[:,0],turboOut[:,1],turboOut[:,2])