re.search

Here are the examples of the python api re.search taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

Project: pexpect
Source File: monitor.py
View license
def main():

    global COMMAND_PROMPT, TERMINAL_PROMPT, TERMINAL_TYPE, SSH_NEWKEY
    ######################################################################
    ## Parse the options, arguments, get ready, etc.
    ######################################################################
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'h?s:u:p:', ['help','h','?'])
    except Exception as e:
        print(str(e))
        exit_with_usage()
    options = dict(optlist)
    if len(args) > 1:
        exit_with_usage()

    if [elem for elem in options if elem in ['-h','--h','-?','--?','--help']]:
        print("Help:")
        exit_with_usage()

    if '-s' in options:
        host = options['-s']
    else:
        host = raw_input('hostname: ')
    if '-u' in options:
        user = options['-u']
    else:
        user = raw_input('username: ')
    if '-p' in options:
        password = options['-p']
    else:
        password = getpass.getpass('password: ')

    #
    # Login via SSH
    #
    child = pexpect.spawn('ssh -l %s %s'%(user, host))
    i = child.expect([pexpect.TIMEOUT, SSH_NEWKEY, COMMAND_PROMPT, '(?i)password'])
    if i == 0: # Timeout
        print('ERROR! could not login with SSH. Here is what SSH said:')
        print(child.before, child.after)
        print(str(child))
        sys.exit (1)
    if i == 1: # In this case SSH does not have the public key cached.
        child.sendline ('yes')
        child.expect ('(?i)password')
    if i == 2:
        # This may happen if a public key was setup to automatically login.
        # But beware, the COMMAND_PROMPT at this point is very trivial and
        # could be fooled by some output in the MOTD or login message.
        pass
    if i == 3:
        child.sendline(password)
        # Now we are either at the command prompt or
        # the login process is asking for our terminal type.
        i = child.expect ([COMMAND_PROMPT, TERMINAL_PROMPT])
        if i == 1:
            child.sendline (TERMINAL_TYPE)
            child.expect (COMMAND_PROMPT)
    #
    # Set command prompt to something more unique.
    #
    COMMAND_PROMPT = "\[PEXPECT\]\$ "
    child.sendline ("PS1='[PEXPECT]\$ '") # In case of sh-style
    i = child.expect ([pexpect.TIMEOUT, COMMAND_PROMPT], timeout=10)
    if i == 0:
        print("# Couldn't set sh-style prompt -- trying csh-style.")
        child.sendline ("set prompt='[PEXPECT]\$ '")
        i = child.expect ([pexpect.TIMEOUT, COMMAND_PROMPT], timeout=10)
        if i == 0:
            print("Failed to set command prompt using sh or csh style.")
            print("Response was:")
            print(child.before)
            sys.exit (1)

    # Now we should be at the command prompt and ready to run some commands.
    print('---------------------------------------')
    print('Report of commands run on remote host.')
    print('---------------------------------------')

    # Run uname.
    child.sendline ('uname -a')
    child.expect (COMMAND_PROMPT)
    print(child.before)
    if 'linux' in child.before.lower():
        LINUX_MODE = 1
    else:
        LINUX_MODE = 0

    # Run and parse 'uptime'.
    child.sendline ('uptime')
    child.expect('up\s+(.*?),\s+([0-9]+) users?,\s+load averages?: ([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9])')
    duration, users, av1, av5, av15 = child.match.groups()
    days = '0'
    hours = '0'
    mins = '0'
    if 'day' in duration:
        child.match = re.search('([0-9]+)\s+day',duration)
        days = str(int(child.match.group(1)))
    if ':' in duration:
        child.match = re.search('([0-9]+):([0-9]+)',duration)
        hours = str(int(child.match.group(1)))
        mins = str(int(child.match.group(2)))
    if 'min' in duration:
        child.match = re.search('([0-9]+)\s+min',duration)
        mins = str(int(child.match.group(1)))
    print()
    print('Uptime: %s days, %s users, %s (1 min), %s (5 min), %s (15 min)' % (
        duration, users, av1, av5, av15))
    child.expect (COMMAND_PROMPT)

    # Run iostat.
    child.sendline ('iostat')
    child.expect (COMMAND_PROMPT)
    print(child.before)

    # Run vmstat.
    child.sendline ('vmstat')
    child.expect (COMMAND_PROMPT)
    print(child.before)

    # Run free.
    if LINUX_MODE:
        child.sendline ('free') # Linux systems only.
        child.expect (COMMAND_PROMPT)
        print(child.before)

    # Run df.
    child.sendline ('df')
    child.expect (COMMAND_PROMPT)
    print(child.before)

    # Run lsof.
    child.sendline ('lsof')
    child.expect (COMMAND_PROMPT)
    print(child.before)

#    # Run netstat
#    child.sendline ('netstat')
#    child.expect (COMMAND_PROMPT)
#    print child.before

#    # Run MySQL show status.
#    child.sendline ('mysql -p -e "SHOW STATUS;"')
#    child.expect (PASSWORD_PROMPT_MYSQL)
#    child.sendline (password_mysql)
#    child.expect (COMMAND_PROMPT)
#    print
#    print child.before

    # Now exit the remote host.
    child.sendline ('exit')
    index = child.expect([pexpect.EOF, "(?i)there are stopped jobs"])
    if index==1:
        child.sendline("exit")
        child.expect(EOF)

Example 2

Project: pwn_plug_sources
Source File: sms_launch.py
View license
def launch():
	while 1:
		print("""
  1.  Pre-Defined Template
  2.  One-Time Use SMS

  99. Cancel and return to SMS Spoofing Menu
""")
		template_choice = raw_input(core.setprompt(["7"], "Use a predefined template or craft a one time SMS?"))
		# if predefined template go here
		if template_choice == '1':
			# set path for
			path = 'src/templates/sms/'
			filewrite=file("src/program_junk/sms.templates", "w")
			counter=0
			# Pull all files in the templates directory
			for infile in glob.glob(os.path.join(path, '*.template')):
				infile=infile.split("/")
				# grab just the filename
				infile=infile[3]
				counter=counter+1
				# put it in a format we can use later in a file
				filewrite.write(infile+" "+str(counter)+"\n")
			# close the file
			filewrite.close()
			# read in formatted filenames
			fileread=file("src/program_junk/sms.templates","r").readlines()
			print "Below is a list of available templates:\n"
			for line in fileread:
				line=line.rstrip()
				line=line.split(" ")
				filename=line[0]
				# read in file
				fileread2=file("src/templates/sms/%s" % (filename),"r").readlines()
				for line2 in fileread2:
					match=re.search("SUBJECT=", line2)
					if match:
						line2=line2.rstrip()
						line2=line2.split("=")
						line2=line2[1]
						# strip double quotes
						line2=line2.replace('"', "")
						# display results back
						print line[1]+": "+line2
	
			# allow user to select template
			choice=raw_input(core.setprompt(["7"], "Select template"))
			for line in fileread:
				# split based off of space
				line=line.split(" ")
				# search for the choice
				match=re.search(str(choice), line[1])
				if match:
					extract=line[0]
					fileopen=file("src/templates/sms/"+str(extract), "r").readlines()
					for line2 in fileopen:
						match2=re.search("ORIGIN=", line2)
						if match2:
							origin=line2.replace('"', "")
							origin=origin.split("=")
							origin=origin[1]
						match3=re.search("SUBJECT=", line2)
						if match3:
							subject=line2.replace('"', "")
							subject=subject.split("=")
							subject=subject[1]
						match4=re.search("BODY=", line2)
						if match4:
							body=line2.replace('"', "")
							body=body.replace(r'\n', " \n ")
							body=body.split("=")
							body=body[1]
	
	  		break;
		if template_choice == '2':
			try:
				origin = raw_input(core.setprompt(["7"], "Source number phone"))
				body = raw_input(core.setprompt(["7"], "Body of the message, hit return for a new line. Control+c when finished"))
				while body != 'sdfsdfihdsfsodhdsofh':
					try:
						body+=("\n")
						body+=raw_input("Next line of the body: ")
					except KeyboardInterrupt: break
			except KeyboardInterrupt: pass
			break;
	
	
		if template_choice == '99': 
			break;
	
	if template_choice != '3':
		while 1:
			print("""
 Service Selection

 There are diferent services you can use for the SMS spoofing, select
 your own.

  1.  SohoOS (buggy)
  2.  Lleida.net (pay)
  3.  SMSGANG (pay)
  4.  Android Emulator (need to install Android Emulator)

  99. Cancel and return to SMS Spoofing Menu
""")
			service_option = raw_input(core.setprompt(["7"], ""))
			# exit 
			if service_option == '1':
				break
			if service_option == '2':
				break
			if service_option == '3': 
				break
			if service_option == '4':
				break
			if service_option == '99':
				break
			
	if template_choice != '3' and service_option != '99':
		#sohoOS service
		if service_option == '1':
			for to in phones:		
				send_sohoos_sms(to.rstrip(), origin.rstrip(), body.rstrip())
			# Finish here then return to main menu
			core.PrintStatus("SET has completed!")
			core.ReturnContinue()
		
		#Lleida.net service
		if service_option == '2':
			user = raw_input(core.setprompt(["7"], "Your Lleida.net user"))
			password = raw_input(core.setprompt(["7"], "Your Lleida.net password"))
			email = raw_input(core.setprompt(["7"], "Email for the receipt (optional)"))
			for to in phones:
				send_lleidanet_sms(to.rstrip(), origin.rstrip(), body.rstrip(), user, password, email)
			# Finish here then return to main menu
			core.PrintStatus("SET has completed!")
			core.ReturnContinue()
			
		#SMSGANG service
		if service_option == '3':
			pincode = raw_input(core.setprompt(["7"], "Your SMSGANG pincode"))
			for to in phones:
				send_smsgang_sms(to.rstrip(), origin.rstrip(), body.rstrip(), pincode)
			# Finish here then return to main menu
			core.PrintStatus("SET has completed!")
			core.ReturnContinue()

		#Andriod Emulator service
		if service_option == '4':
			for to in phones:		
				send_android_emu_sms(origin.rstrip(), body.rstrip())
			# Finish here then return to main menu
			core.PrintStatus("SET has completed!")
			core.ReturnContinue()

Example 3

Project: raspberry_pwn
Source File: target.py
View license
def _setRequestParams():
    """
    Check and set the parameters and perform checks on 'data' option for
    HTTP method POST.
    """

    if conf.direct:
        conf.parameters[None] = "direct connection"
        return

    testableParameters = False

    # Perform checks on GET parameters
    if conf.parameters.get(PLACE.GET):
        parameters = conf.parameters[PLACE.GET]
        paramDict = paramToDict(PLACE.GET, parameters)

        if paramDict:
            conf.paramDict[PLACE.GET] = paramDict
            testableParameters = True

    # Perform checks on POST parameters
    if conf.method == HTTPMETHOD.POST and conf.data is None:
        errMsg = "HTTP POST method depends on HTTP data value to be posted"
        raise SqlmapSyntaxException(errMsg)

    if conf.data is not None:
        conf.method = HTTPMETHOD.POST if not conf.method or conf.method == HTTPMETHOD.GET else conf.method

        def process(match, repl):
            retVal = match.group(0)

            if not (conf.testParameter and match.group("name") not in conf.testParameter):
                retVal = repl
                while True:
                    _ = re.search(r"\\g<([^>]+)>", retVal)
                    if _:
                        retVal = retVal.replace(_.group(0), match.group(int(_.group(1)) if _.group(1).isdigit() else _.group(1)))
                    else:
                        break

            return retVal

        if kb.processUserMarks is None and CUSTOM_INJECTION_MARK_CHAR in conf.data:
            message = "custom injection marking character ('%s') found in option " % CUSTOM_INJECTION_MARK_CHAR
            message += "'--data'. Do you want to process it? [Y/n/q] "
            test = readInput(message, default="Y")
            if test and test[0] in ("q", "Q"):
                raise SqlmapUserQuitException
            else:
                kb.processUserMarks = not test or test[0] not in ("n", "N")

                if kb.processUserMarks and "=%s" % CUSTOM_INJECTION_MARK_CHAR in conf.data:
                    warnMsg = "it seems that you've provided empty parameter value(s) "
                    warnMsg += "for testing. Please, always use only valid parameter values "
                    warnMsg += "so sqlmap could be able to run properly"
                    logger.warn(warnMsg)

        if not (kb.processUserMarks and CUSTOM_INJECTION_MARK_CHAR in conf.data):
            if re.search(JSON_RECOGNITION_REGEX, conf.data):
                message = "JSON data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r'("(?P<name>[^"]+)"\s*:\s*"[^"]+)"', functools.partial(process, repl=r'\g<1>%s"' % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    conf.data = re.sub(r'("(?P<name>[^"]+)"\s*:\s*)(-?\d[\d\.]*\b)', functools.partial(process, repl=r'\g<0>%s' % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    match = re.search(r'(?P<name>[^"]+)"\s*:\s*\[([^\]]+)\]', conf.data)
                    if match and not (conf.testParameter and match.group("name") not in conf.testParameter):
                        _ = match.group(2)
                        _ = re.sub(r'("[^"]+)"', '\g<1>%s"' % CUSTOM_INJECTION_MARK_CHAR, _)
                        _ = re.sub(r'(\A|,|\s+)(-?\d[\d\.]*\b)', '\g<0>%s' % CUSTOM_INJECTION_MARK_CHAR, _)
                        conf.data = conf.data.replace(match.group(0), match.group(0).replace(match.group(2), _))
                    kb.postHint = POST_HINT.JSON

            elif re.search(JSON_LIKE_RECOGNITION_REGEX, conf.data):
                message = "JSON-like data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r"('(?P<name>[^']+)'\s*:\s*'[^']+)'", functools.partial(process, repl=r"\g<1>%s'" % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    conf.data = re.sub(r"('(?P<name>[^']+)'\s*:\s*)(-?\d[\d\.]*\b)", functools.partial(process, repl=r"\g<0>%s" % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    kb.postHint = POST_HINT.JSON_LIKE

            elif re.search(ARRAY_LIKE_RECOGNITION_REGEX, conf.data):
                message = "Array-like data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r"(=[^%s]+)" % DEFAULT_GET_POST_DELIMITER, r"\g<1>%s" % CUSTOM_INJECTION_MARK_CHAR, conf.data)
                    kb.postHint = POST_HINT.ARRAY_LIKE

            elif re.search(XML_RECOGNITION_REGEX, conf.data):
                message = "SOAP/XML data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r"(<(?P<name>[^>]+)( [^<]*)?>)([^<]+)(</\2)", functools.partial(process, repl=r"\g<1>\g<4>%s\g<5>" % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    kb.postHint = POST_HINT.SOAP if "soap" in conf.data.lower() else POST_HINT.XML

            elif re.search(MULTIPART_RECOGNITION_REGEX, conf.data):
                message = "Multipart like data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r"(?si)((Content-Disposition[^\n]+?name\s*=\s*[\"'](?P<name>[^\n]+?)[\"']).+?)(((\r)?\n)+--)", functools.partial(process, repl=r"\g<1>%s\g<4>" % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    kb.postHint = POST_HINT.MULTIPART

        if not kb.postHint:
            if CUSTOM_INJECTION_MARK_CHAR in conf.data:  # later processed
                pass
            else:
                place = PLACE.POST

                conf.parameters[place] = conf.data
                paramDict = paramToDict(place, conf.data)

                if paramDict:
                    conf.paramDict[place] = paramDict
                    testableParameters = True
        else:
            if CUSTOM_INJECTION_MARK_CHAR not in conf.data:  # in case that no usable parameter values has been found
                conf.parameters[PLACE.POST] = conf.data

    kb.processUserMarks = True if (kb.postHint and CUSTOM_INJECTION_MARK_CHAR in conf.data) else kb.processUserMarks

    if re.search(URI_INJECTABLE_REGEX, conf.url, re.I) and not any(place in conf.parameters for place in (PLACE.GET, PLACE.POST)) and not kb.postHint and not CUSTOM_INJECTION_MARK_CHAR in (conf.data or ""):
        warnMsg = "you've provided target URL without any GET "
        warnMsg += "parameters (e.g. www.site.com/article.php?id=1) "
        warnMsg += "and without providing any POST parameters "
        warnMsg += "through --data option"
        logger.warn(warnMsg)

        message = "do you want to try URI injections "
        message += "in the target URL itself? [Y/n/q] "
        test = readInput(message, default="Y")

        if not test or test[0] not in ("n", "N"):
            conf.url = "%s%s" % (conf.url, CUSTOM_INJECTION_MARK_CHAR)
            kb.processUserMarks = True
        elif test[0] in ("q", "Q"):
            raise SqlmapUserQuitException

    for place, value in ((PLACE.URI, conf.url), (PLACE.CUSTOM_POST, conf.data), (PLACE.CUSTOM_HEADER, str(conf.httpHeaders))):
        _ = re.sub(PROBLEMATIC_CUSTOM_INJECTION_PATTERNS, "", value or "") if place == PLACE.CUSTOM_HEADER else value or ""
        if CUSTOM_INJECTION_MARK_CHAR in _:
            if kb.processUserMarks is None:
                lut = {PLACE.URI: '-u', PLACE.CUSTOM_POST: '--data', PLACE.CUSTOM_HEADER: '--headers/--user-agent/--referer/--cookie'}
                message = "custom injection marking character ('%s') found in option " % CUSTOM_INJECTION_MARK_CHAR
                message += "'%s'. Do you want to process it? [Y/n/q] " % lut[place]
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                else:
                    kb.processUserMarks = not test or test[0] not in ("n", "N")

                    if kb.processUserMarks and "=%s" % CUSTOM_INJECTION_MARK_CHAR in _:
                        warnMsg = "it seems that you've provided empty parameter value(s) "
                        warnMsg += "for testing. Please, always use only valid parameter values "
                        warnMsg += "so sqlmap could be able to run properly"
                        logger.warn(warnMsg)

            if not kb.processUserMarks:
                if place == PLACE.URI:
                    query = urlparse.urlsplit(value).query
                    if query:
                        parameters = conf.parameters[PLACE.GET] = query
                        paramDict = paramToDict(PLACE.GET, parameters)

                        if paramDict:
                            conf.url = conf.url.split('?')[0]
                            conf.paramDict[PLACE.GET] = paramDict
                            testableParameters = True
                elif place == PLACE.CUSTOM_POST:
                    conf.parameters[PLACE.POST] = conf.data
                    paramDict = paramToDict(PLACE.POST, conf.data)

                    if paramDict:
                        conf.paramDict[PLACE.POST] = paramDict
                        testableParameters = True

            else:
                conf.parameters[place] = value
                conf.paramDict[place] = OrderedDict()

                if place == PLACE.CUSTOM_HEADER:
                    for index in xrange(len(conf.httpHeaders)):
                        header, value = conf.httpHeaders[index]
                        if CUSTOM_INJECTION_MARK_CHAR in re.sub(PROBLEMATIC_CUSTOM_INJECTION_PATTERNS, "", value):
                            parts = value.split(CUSTOM_INJECTION_MARK_CHAR)
                            for i in xrange(len(parts) - 1):
                                conf.paramDict[place]["%s #%d%s" % (header, i + 1, CUSTOM_INJECTION_MARK_CHAR)] = "%s,%s" % (header, "".join("%s%s" % (parts[j], CUSTOM_INJECTION_MARK_CHAR if i == j else "") for j in xrange(len(parts))))
                            conf.httpHeaders[index] = (header, value.replace(CUSTOM_INJECTION_MARK_CHAR, ""))
                else:
                    parts = value.split(CUSTOM_INJECTION_MARK_CHAR)

                    for i in xrange(len(parts) - 1):
                        conf.paramDict[place]["%s#%d%s" % (("%s " % kb.postHint) if kb.postHint else "", i + 1, CUSTOM_INJECTION_MARK_CHAR)] = "".join("%s%s" % (parts[j], CUSTOM_INJECTION_MARK_CHAR if i == j else "") for j in xrange(len(parts)))

                    if place == PLACE.URI and PLACE.GET in conf.paramDict:
                        del conf.paramDict[PLACE.GET]
                    elif place == PLACE.CUSTOM_POST and PLACE.POST in conf.paramDict:
                        del conf.paramDict[PLACE.POST]

                testableParameters = True

    if kb.processUserMarks:
        for item in ("url", "data", "agent", "referer", "cookie"):
            if conf.get(item):
                conf[item] = conf[item].replace(CUSTOM_INJECTION_MARK_CHAR, "")

    # Perform checks on Cookie parameters
    if conf.cookie:
        conf.parameters[PLACE.COOKIE] = conf.cookie
        paramDict = paramToDict(PLACE.COOKIE, conf.cookie)

        if paramDict:
            conf.paramDict[PLACE.COOKIE] = paramDict
            testableParameters = True

    # Perform checks on header values
    if conf.httpHeaders:
        for httpHeader, headerValue in conf.httpHeaders:
            # Url encoding of the header values should be avoided
            # Reference: http://stackoverflow.com/questions/5085904/is-ok-to-urlencode-the-value-in-headerlocation-value

            httpHeader = httpHeader.title()

            if httpHeader == HTTP_HEADER.USER_AGENT:
                conf.parameters[PLACE.USER_AGENT] = urldecode(headerValue)

                condition = any((not conf.testParameter, intersect(conf.testParameter, USER_AGENT_ALIASES)))

                if condition:
                    conf.paramDict[PLACE.USER_AGENT] = {PLACE.USER_AGENT: headerValue}
                    testableParameters = True

            elif httpHeader == HTTP_HEADER.REFERER:
                conf.parameters[PLACE.REFERER] = urldecode(headerValue)

                condition = any((not conf.testParameter, intersect(conf.testParameter, REFERER_ALIASES)))

                if condition:
                    conf.paramDict[PLACE.REFERER] = {PLACE.REFERER: headerValue}
                    testableParameters = True

            elif httpHeader == HTTP_HEADER.HOST:
                conf.parameters[PLACE.HOST] = urldecode(headerValue)

                condition = any((not conf.testParameter, intersect(conf.testParameter, HOST_ALIASES)))

                if condition:
                    conf.paramDict[PLACE.HOST] = {PLACE.HOST: headerValue}
                    testableParameters = True

    if not conf.parameters:
        errMsg = "you did not provide any GET, POST and Cookie "
        errMsg += "parameter, neither an User-Agent, Referer or Host header value"
        raise SqlmapGenericException(errMsg)

    elif not testableParameters:
        errMsg = "all testable parameters you provided are not present "
        errMsg += "within the given request data"
        raise SqlmapGenericException(errMsg)

    if conf.csrfToken:
        if not any(conf.csrfToken in _ for _ in (conf.paramDict.get(PLACE.GET, {}), conf.paramDict.get(PLACE.POST, {}))) and not conf.csrfToken in set(_[0].lower() for _ in conf.httpHeaders) and not conf.csrfToken in conf.paramDict.get(PLACE.COOKIE, {}):
            errMsg = "CSRF protection token parameter '%s' not " % conf.csrfToken
            errMsg += "found in provided GET, POST, Cookie or header values"
            raise SqlmapGenericException(errMsg)
    else:
        for place in (PLACE.GET, PLACE.POST, PLACE.COOKIE):
            for parameter in conf.paramDict.get(place, {}):
                if any(parameter.lower().count(_) for _ in CSRF_TOKEN_PARAMETER_INFIXES):
                    message = "%s parameter '%s' appears to hold CSRF protection token. " % (place, parameter)
                    message += "Do you want sqlmap to automatically update it in further requests? [y/N] "
                    test = readInput(message, default="N")
                    if test and test[0] in ("y", "Y"):
                        conf.csrfToken = parameter
                    break

Example 4

Project: raspberry_pwn
Source File: monitor.py
View license
def main():

    global COMMAND_PROMPT, TERMINAL_PROMPT, TERMINAL_TYPE, SSH_NEWKEY
    ######################################################################
    ## Parse the options, arguments, get ready, etc.
    ######################################################################
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'h?s:u:p:', ['help','h','?'])
    except Exception, e:
        print str(e)
        exit_with_usage()
    options = dict(optlist)
    if len(args) > 1:
        exit_with_usage()

    if [elem for elem in options if elem in ['-h','--h','-?','--?','--help']]:
        print "Help:"
        exit_with_usage()

    if '-s' in options:
        host = options['-s']
    else:
        host = raw_input('hostname: ')
    if '-u' in options:
        user = options['-u']
    else:
        user = raw_input('username: ')
    if '-p' in options:
        password = options['-p']
    else:
        password = getpass.getpass('password: ')

    #
    # Login via SSH
    #
    child = pexpect.spawn('ssh -l %s %s'%(user, host))
    i = child.expect([pexpect.TIMEOUT, SSH_NEWKEY, COMMAND_PROMPT, '(?i)password'])
    if i == 0: # Timeout
        print 'ERROR! could not login with SSH. Here is what SSH said:'
        print child.before, child.after
        print str(child)
        sys.exit (1)
    if i == 1: # In this case SSH does not have the public key cached.
        child.sendline ('yes')
        child.expect ('(?i)password')
    if i == 2:
        # This may happen if a public key was setup to automatically login.
        # But beware, the COMMAND_PROMPT at this point is very trivial and
        # could be fooled by some output in the MOTD or login message.
        pass
    if i == 3:
        child.sendline(password)
        # Now we are either at the command prompt or
        # the login process is asking for our terminal type.
        i = child.expect ([COMMAND_PROMPT, TERMINAL_PROMPT])
        if i == 1:
            child.sendline (TERMINAL_TYPE)
            child.expect (COMMAND_PROMPT)
    #
    # Set command prompt to something more unique.
    #
    COMMAND_PROMPT = "\[PEXPECT\]\$ "
    child.sendline ("PS1='[PEXPECT]\$ '") # In case of sh-style
    i = child.expect ([pexpect.TIMEOUT, COMMAND_PROMPT], timeout=10)
    if i == 0:
        print "# Couldn't set sh-style prompt -- trying csh-style."
        child.sendline ("set prompt='[PEXPECT]\$ '")
        i = child.expect ([pexpect.TIMEOUT, COMMAND_PROMPT], timeout=10)
        if i == 0:
            print "Failed to set command prompt using sh or csh style."
            print "Response was:"
            print child.before
            sys.exit (1)

    # Now we should be at the command prompt and ready to run some commands.
    print '---------------------------------------'
    print 'Report of commands run on remote host.'
    print '---------------------------------------'

    # Run uname.
    child.sendline ('uname -a')
    child.expect (COMMAND_PROMPT)
    print child.before
    if 'linux' in child.before.lower():
        LINUX_MODE = 1
    else:
        LINUX_MODE = 0

    # Run and parse 'uptime'.
    child.sendline ('uptime')
    child.expect('up\s+(.*?),\s+([0-9]+) users?,\s+load averages?: ([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9]),?\s+([0-9]+\.[0-9][0-9])')
    duration, users, av1, av5, av15 = child.match.groups()
    days = '0'
    hours = '0'
    mins = '0'
    if 'day' in duration:
        child.match = re.search('([0-9]+)\s+day',duration)
        days = str(int(child.match.group(1)))
    if ':' in duration:
        child.match = re.search('([0-9]+):([0-9]+)',duration)
        hours = str(int(child.match.group(1)))
        mins = str(int(child.match.group(2)))
    if 'min' in duration:
        child.match = re.search('([0-9]+)\s+min',duration)
        mins = str(int(child.match.group(1)))
    print
    print 'Uptime: %s days, %s users, %s (1 min), %s (5 min), %s (15 min)' % (
        duration, users, av1, av5, av15)
    child.expect (COMMAND_PROMPT)

    # Run iostat.
    child.sendline ('iostat')
    child.expect (COMMAND_PROMPT)
    print child.before

    # Run vmstat.
    child.sendline ('vmstat')
    child.expect (COMMAND_PROMPT)
    print child.before

    # Run free.
    if LINUX_MODE:
        child.sendline ('free') # Linux systems only.
        child.expect (COMMAND_PROMPT)
        print child.before

    # Run df.
    child.sendline ('df')
    child.expect (COMMAND_PROMPT)
    print child.before
    
    # Run lsof.
    child.sendline ('lsof')
    child.expect (COMMAND_PROMPT)
    print child.before

#    # Run netstat
#    child.sendline ('netstat')
#    child.expect (COMMAND_PROMPT)
#    print child.before

#    # Run MySQL show status.
#    child.sendline ('mysql -p -e "SHOW STATUS;"')
#    child.expect (PASSWORD_PROMPT_MYSQL)
#    child.sendline (password_mysql)
#    child.expect (COMMAND_PROMPT)
#    print
#    print child.before

    # Now exit the remote host.
    child.sendline ('exit')
    index = child.expect([pexpect.EOF, "(?i)there are stopped jobs"])
    if index==1:
        child.sendline("exit")
        child.expect(EOF)

Example 5

View license
def run(test, params, env):
    """
    Test command: virsh domif-setlink and domif-getlink.

    The command   set and get link state of a virtual interface
    1. Prepare test environment.
    2. Perform virsh domif-setlink and domif-getlink operation.
    3. Recover test environment.
    4. Confirm the test result.
    """

    def domif_setlink(vm, device, operation, options):
        """
        Set the domain link state

        :param vm : domain name
        :param device : domain virtual interface
        :param opration : domain virtual interface state
        :param options : some options like --config

        """

        return virsh.domif_setlink(vm, device, operation, options, debug=True)

    def domif_getlink(vm, device, options):
        """
        Get the domain link state

        :param vm : domain name
        :param device : domain virtual interface
        :param options : some options like --config

        """

        return virsh.domif_getlink(vm, device, options,
                                   ignore_status=True, debug=True)

    def guest_cmd_check(cmd, session, pattern):
        """
        Check cmd output with pattern in session
        """
        try:
            cmd_status, output = session.cmd_status_output(cmd, timeout=10)
            logging.info("exit: %s, output: %s",
                         cmd_status, output)
            return re.search(pattern, output)
        except (aexpect.ShellTimeoutError, aexpect.ShellStatusError), e:
            logging.debug(e)
            return re.search(pattern, str(e.__str__))

    def guest_if_state(if_name, session):
        """
        Get the domain link state from the guest
        """
        # Get link state by ethtool
        cmd = "ethtool %s" % if_name
        pattern = "Link detected: ([a-zA-Z]+)"
        ret = guest_cmd_check(cmd, session, pattern)
        if ret:
            return ret.group(1) == "yes"
        else:
            return False

    def check_update_device(vm, if_name, session):
        """
        Change link state by upadte-device command, Check the results
        """
        vmxml = vm_xml.VMXML.new_from_dumpxml(vm.name)

        # Get interface xml object
        iface = vmxml.get_devices(device_type="interface")[0]
        if iface.address:
            del iface.address

        # Change link state to up
        iface.link_state = "up"
        iface.xmltreefile.write()
        ret = virsh.update_device(vm.name, iface.xml,
                                  ignore_status=True, debug=True)
        if ret.exit_status:
            logging.error("Failed to update device to up state")
            return False
        if not guest_if_state(if_name, session):
            logging.error("Guest link should be up now")
            return False

        # Change link state to down
        iface.link_state = "down"
        iface.xmltreefile.write()
        ret = virsh.update_device(vm.name, iface.xml,
                                  ignore_status=True, debug=True)
        if ret.exit_status:
            logging.error("Failed to update device to down state")
            return False
        if guest_if_state(if_name, session):
            logging.error("Guest link should be down now")
            return False

        # Passed all test
        return True

    vm_name = []
    # vm_name list:first element for original name in config
    vm_name.append(params.get("main_vm", "avocado-vt-vm1"))
    vm = env.get_vm(vm_name[0])
    options = params.get("if_options", "--config")
    start_vm = params.get("start_vm", "no")
    domain = params.get("domain", "name")
    if_device = params.get("if_device", "net")
    if_name = params.get("if_name", "vnet0")
    if_operation = params.get("if_operation", "up")
    status_error = params.get("status_error", "no")
    mac_address = vm.get_virsh_mac_address(0)
    check_link_state = "yes" == params.get("check_link_state", "no")
    check_link_by_update_device = "yes" == params.get(
        "excute_update_device", "no")
    device = "vnet0"
    username = params.get("username")
    password = params.get("password")

    # Back up xml file.
    vm_xml_file = os.path.join(test.tmpdir, "vm.xml")
    virsh.dumpxml(vm_name[0], extra="--inactive", to_file=vm_xml_file)

    # Vm status
    if start_vm == "yes" and vm.is_dead():
        vm.start()

    elif start_vm == "no" and vm.is_alive():
        vm.destroy()

    # vm_name list: second element for 'domain' in virsh command
    if domain == "ID":
        # Get ID for the running domain
        vm_name.append(vm.get_id())
    elif domain == "UUID":
        # Get UUID for the domain
        vm_name.append(vm.get_uuid())
    elif domain == "no_match_UUID":
        # Generate a random UUID
        vm_name.append(uuid.uuid1())
    elif domain == "no_match_name":
        # Generate a random string as domain name
        vm_name.append(utils_misc.generate_random_string(6))
    elif domain == " ":
        # Set domain name empty
        vm_name.append("''")
    else:
        # Set domain name
        vm_name.append(vm_name[0])

    try:
        # Test device net or mac address
        if if_device == "net" and vm.is_alive():
            device = if_name
            # Get all vm's interface device
            device = vm_xml.VMXML.get_net_dev(vm_name[0])[0]

        elif if_device == "mac":
            device = mac_address

        # Test no exist device
        if if_device == "no_exist_net":
            device = "vnet-1"
        elif if_device == "no_exist_mac":
            # Generate random mac address for negative test
            device = utils_net.VirtIface.complete_mac_address("01:02")
        elif if_device == " ":
            device = "''"

        # Setlink opertation
        result = domif_setlink(vm_name[1], device, if_operation, options)
        status = result.exit_status
        logging.info("Setlink done")

        # Getlink opertation
        get_result = domif_getlink(vm_name[1], device, options)
        getlink_output = get_result.stdout.strip()

        # Check the getlink command output
        if status_error == "no":
            if not re.search(if_operation, getlink_output):
                raise error.TestFail("Getlink result should "
                                     "equal with setlink operation")

        logging.info("Getlink done")
        # If --config is given should restart the vm then test link status
        if options == "--config" and vm.is_alive():
            vm.destroy()
            vm.start()
            logging.info("Restart VM")

        elif start_vm == "no":
            vm.start()

        error_msg = None
        if status_error == "no":
            # Serial login the vm to check link status
            # Start vm check the link statue
            session = vm.wait_for_serial_login(username=username,
                                               password=password)
            guest_if_name = utils_net.get_linux_ifname(session, mac_address)

            # Check link state in guest
            if check_link_state:
                if (if_operation == "up" and
                        not guest_if_state(guest_if_name, session)):
                    error_msg = "Link state should be up in guest"
                if (if_operation == "down" and
                        guest_if_state(guest_if_name, session)):
                    error_msg = "Link state should be down in guest"

            # Test of setting link state by update_device command
            if check_link_by_update_device:
                if not check_update_device(vm, guest_if_name, session):
                    error_msg = "Check update_device failed"

            # Set the link up make host connect with vm
            domif_setlink(vm_name[0], device, "up", "")
            if not utils_misc.wait_for(
                    lambda: guest_if_state(guest_if_name, session), 5):
                error_msg = "Link state isn't up in guest"

            # Ignore status of this one
            cmd = 'ifdown %s' % guest_if_name
            pattern = "Device '%s' successfully disconnected" % guest_if_name
            guest_cmd_check(cmd, session, pattern)

            cmd = 'ifup %s' % guest_if_name
            pattern = "Determining IP information for %s" % guest_if_name
            pattern += "|Connection successfully activated"
            if not guest_cmd_check(cmd, session, pattern):
                error_msg = ("Could not bring up interface %s inside guest"
                             % guest_if_name)
        else:  # negative test
            # stop guest, so state is always consistent on next start
            vm.destroy()

        if error_msg:
            raise error.TestFail(error_msg)

        # Check status_error
        if status_error == "yes":
            if status:
                logging.info("Expected error (negative testing). Output: %s",
                             result.stderr.strip())

            else:
                raise error.TestFail("Unexpected return code %d "
                                     "(negative testing)" % status)
        elif status_error != "no":
            raise error.TestError("Invalid value for status_error '%s' "
                                  "(must be 'yes' or 'no')" % status_error)
    finally:
        # Recover VM.
        if vm.is_alive():
            vm.destroy(gracefully=False)
        virsh.undefine(vm_name[0])
        virsh.define(vm_xml_file)
        os.remove(vm_xml_file)

Example 6

Project: tp-libvirt
Source File: virsh_snapshot_disk.py
View license
def run(test, params, env):
    """
    Test virsh snapshot command when disk in all kinds of type.

    (1). Init the variables from params.
    (2). Create a image by specifice format.
    (3). Attach disk to vm.
    (4). Snapshot create.
    (5). Snapshot revert.
    (6). cleanup.
    """
    # Init variables.
    vm_name = params.get("main_vm", "avocado-vt-vm1")
    vm = env.get_vm(vm_name)
    vm_state = params.get("vm_state", "running")
    image_format = params.get("snapshot_image_format", "qcow2")
    snapshot_del_test = "yes" == params.get("snapshot_del_test", "no")
    status_error = ("yes" == params.get("status_error", "no"))
    snapshot_from_xml = ("yes" == params.get("snapshot_from_xml", "no"))
    snapshot_current = ("yes" == params.get("snapshot_current", "no"))
    snapshot_revert_paused = ("yes" == params.get("snapshot_revert_paused",
                                                  "no"))
    replace_vm_disk = "yes" == params.get("replace_vm_disk", "no")
    disk_source_protocol = params.get("disk_source_protocol")
    vol_name = params.get("vol_name")
    tmp_dir = data_dir.get_tmp_dir()
    pool_name = params.get("pool_name", "gluster-pool")
    brick_path = os.path.join(tmp_dir, pool_name)
    multi_gluster_disks = "yes" == params.get("multi_gluster_disks", "no")

    # Pool variables.
    snapshot_with_pool = "yes" == params.get("snapshot_with_pool", "no")
    pool_name = params.get("pool_name")
    pool_type = params.get("pool_type")
    pool_target = params.get("pool_target")
    emulated_image = params.get("emulated_image", "emulated-image")
    vol_format = params.get("vol_format")
    lazy_refcounts = "yes" == params.get("lazy_refcounts")
    options = params.get("snapshot_options", "")
    export_options = params.get("export_options", "rw,no_root_squash,fsid=0")

    # Set volume xml attribute dictionary, extract all params start with 'vol_'
    # which are for setting volume xml, except 'lazy_refcounts'.
    vol_arg = {}
    for key in params.keys():
        if key.startswith('vol_'):
            if key[4:] in ['capacity', 'allocation', 'owner', 'group']:
                vol_arg[key[4:]] = int(params[key])
            else:
                vol_arg[key[4:]] = params[key]
    vol_arg['lazy_refcounts'] = lazy_refcounts

    supported_pool_list = ["dir", "fs", "netfs", "logical", "iscsi",
                           "disk", "gluster"]
    if snapshot_with_pool:
        if pool_type not in supported_pool_list:
            raise error.TestNAError("%s not in support list %s" %
                                    (pool_target, supported_pool_list))

    # Do xml backup for final recovery
    vmxml_backup = libvirt_xml.VMXML.new_from_inactive_dumpxml(vm_name)
    # Some variable for xmlfile of snapshot.
    snapshot_memory = params.get("snapshot_memory", "internal")
    snapshot_disk = params.get("snapshot_disk", "internal")
    no_memory_snap = "yes" == params.get("no_memory_snap", "no")

    # Skip 'qed' cases for libvirt version greater than 1.1.0
    if libvirt_version.version_compare(1, 1, 0):
        if vol_format == "qed" or image_format == "qed":
            raise error.TestNAError("QED support changed, check bug: "
                                    "https://bugzilla.redhat.com/show_bug.cgi"
                                    "?id=731570")

    if not libvirt_version.version_compare(1, 2, 7):
        # As bug 1017289 closed as WONTFIX, the support only
        # exist on 1.2.7 and higher
        if disk_source_protocol == 'gluster':
            raise error.TestNAError("Snapshot on glusterfs not support in "
                                    "current version. Check more info with "
                                    "https://bugzilla.redhat.com/buglist.cgi?"
                                    "bug_id=1017289,1032370")

    # Init snapshot_name
    snapshot_name = None
    snapshot_external_disk = []
    snapshot_xml_path = None
    del_status = None
    image = None
    pvt = None
    # Get a tmp dir
    snap_cfg_path = "/var/lib/libvirt/qemu/snapshot/%s/" % vm_name
    try:
        if replace_vm_disk:
            utlv.set_vm_disk(vm, params, tmp_dir)
            if multi_gluster_disks:
                new_params = params.copy()
                new_params["pool_name"] = "gluster-pool2"
                new_params["vol_name"] = "gluster-vol2"
                new_params["disk_target"] = "vdf"
                new_params["image_convert"] = 'no'
                utlv.set_vm_disk(vm, new_params, tmp_dir)

        if snapshot_with_pool:
            # Create dst pool for create attach vol img
            pvt = utlv.PoolVolumeTest(test, params)
            pvt.pre_pool(pool_name, pool_type, pool_target,
                         emulated_image, image_size="1G",
                         pre_disk_vol=["20M"],
                         source_name=vol_name,
                         export_options=export_options)

            if pool_type in ["iscsi", "disk"]:
                # iscsi and disk pool did not support create volume in libvirt,
                # logical pool could use libvirt to create volume but volume
                # format is not supported and will be 'raw' as default.
                pv = libvirt_storage.PoolVolume(pool_name)
                vols = pv.list_volumes().keys()
                if vols:
                    vol_name = vols[0]
                else:
                    raise error.TestNAError("No volume in pool: %s" % pool_name)
            else:
                # Set volume xml file
                volxml = libvirt_xml.VolXML()
                newvol = volxml.new_vol(**vol_arg)
                vol_xml = newvol['xml']

                # Run virsh_vol_create to create vol
                logging.debug("create volume from xml: %s" % newvol.xmltreefile)
                cmd_result = virsh.vol_create(pool_name, vol_xml,
                                              ignore_status=True,
                                              debug=True)
                if cmd_result.exit_status:
                    raise error.TestNAError("Failed to create attach volume.")

            cmd_result = virsh.vol_path(vol_name, pool_name, debug=True)
            if cmd_result.exit_status:
                raise error.TestNAError("Failed to get volume path from pool.")
            img_path = cmd_result.stdout.strip()

            if pool_type in ["logical", "iscsi", "disk"]:
                # Use qemu-img to format logical, iscsi and disk block device
                if vol_format != "raw":
                    cmd = "qemu-img create -f %s %s 10M" % (vol_format,
                                                            img_path)
                    cmd_result = utils.run(cmd, ignore_status=True)
                    if cmd_result.exit_status:
                        raise error.TestNAError("Failed to format volume, %s" %
                                                cmd_result.stdout.strip())
            extra = "--persistent --subdriver %s" % vol_format
        else:
            # Create a image.
            params['image_name'] = "snapshot_test"
            params['image_format'] = image_format
            params['image_size'] = "1M"
            image = qemu_storage.QemuImg(params, tmp_dir, "snapshot_test")
            img_path, _ = image.create(params)
            extra = "--persistent --subdriver %s" % image_format

        if not multi_gluster_disks:
            # Do the attach action.
            out = utils.run("qemu-img info %s" % img_path)
            logging.debug("The img info is:\n%s" % out.stdout.strip())
            result = virsh.attach_disk(vm_name, source=img_path, target="vdf",
                                       extra=extra, debug=True)
            if result.exit_status:
                raise error.TestNAError("Failed to attach disk %s to VM."
                                        "Detail: %s." % (img_path, result.stderr))

        # Create snapshot.
        if snapshot_from_xml:
            snap_xml = libvirt_xml.SnapshotXML()
            snapshot_name = "snapshot_test"
            snap_xml.snap_name = snapshot_name
            snap_xml.description = "Snapshot Test"
            if not no_memory_snap:
                if "--disk-only" not in options:
                    if snapshot_memory == "external":
                        memory_external = os.path.join(tmp_dir,
                                                       "snapshot_memory")
                        snap_xml.mem_snap_type = snapshot_memory
                        snap_xml.mem_file = memory_external
                        snapshot_external_disk.append(memory_external)
                    else:
                        snap_xml.mem_snap_type = snapshot_memory

            # Add all disks into xml file.
            vmxml = libvirt_xml.VMXML.new_from_inactive_dumpxml(vm_name)
            disks = vmxml.devices.by_device_tag('disk')
            new_disks = []
            for src_disk_xml in disks:
                disk_xml = snap_xml.SnapDiskXML()
                disk_xml.xmltreefile = src_disk_xml.xmltreefile
                del disk_xml.device
                del disk_xml.address
                disk_xml.snapshot = snapshot_disk
                disk_xml.disk_name = disk_xml.target['dev']

                # Only qcow2 works as external snapshot file format, update it
                # here
                driver_attr = disk_xml.driver
                driver_attr.update({'type': 'qcow2'})
                disk_xml.driver = driver_attr

                if snapshot_disk == 'external':
                    new_attrs = disk_xml.source.attrs
                    if disk_xml.source.attrs.has_key('file'):
                        new_file = "%s.snap" % disk_xml.source.attrs['file']
                        snapshot_external_disk.append(new_file)
                        new_attrs.update({'file': new_file})
                        hosts = None
                    elif disk_xml.source.attrs.has_key('name'):
                        new_name = "%s.snap" % disk_xml.source.attrs['name']
                        new_attrs.update({'name': new_name})
                        hosts = disk_xml.source.hosts
                    elif (disk_xml.source.attrs.has_key('dev') and
                          disk_xml.type_name == 'block'):
                        # Use local file as external snapshot target for block type.
                        # As block device will be treat as raw format by default,
                        # it's not fit for external disk snapshot target. A work
                        # around solution is use qemu-img again with the target.
                        disk_xml.type_name = 'file'
                        del new_attrs['dev']
                        new_file = "%s/blk_src_file.snap" % tmp_dir
                        snapshot_external_disk.append(new_file)
                        new_attrs.update({'file': new_file})
                        hosts = None

                    new_src_dict = {"attrs": new_attrs}
                    if hosts:
                        new_src_dict.update({"hosts": hosts})
                    disk_xml.source = disk_xml.new_disk_source(**new_src_dict)
                else:
                    del disk_xml.source

                new_disks.append(disk_xml)

            snap_xml.set_disks(new_disks)
            snapshot_xml_path = snap_xml.xml
            logging.debug("The snapshot xml is: %s" % snap_xml.xmltreefile)

            options += " --xmlfile %s " % snapshot_xml_path

            if vm_state == "shut off":
                vm.destroy(gracefully=False)

            snapshot_result = virsh.snapshot_create(
                vm_name, options, debug=True)
            out_err = snapshot_result.stderr.strip()
            if snapshot_result.exit_status:
                if status_error:
                    return
                else:
                    if re.search("live disk snapshot not supported with this "
                                 "QEMU binary", out_err):
                        raise error.TestNAError(out_err)

                    if libvirt_version.version_compare(1, 2, 5):
                        # As commit d2e668e in 1.2.5, internal active snapshot
                        # without memory state is rejected. Handle it as SKIP
                        # for now. This could be supportted in future by bug:
                        # https://bugzilla.redhat.com/show_bug.cgi?id=1103063
                        if re.search("internal snapshot of a running VM" +
                                     " must include the memory state",
                                     out_err):
                            raise error.TestNAError("Check Bug #1083345, %s" %
                                                    out_err)

                    raise error.TestFail("Failed to create snapshot. Error:%s."
                                         % out_err)
        else:
            snapshot_result = virsh.snapshot_create(vm_name, options,
                                                    debug=True)
            if snapshot_result.exit_status:
                if status_error:
                    return
                else:
                    raise error.TestFail("Failed to create snapshot. Error:%s."
                                         % snapshot_result.stderr.strip())
            snapshot_name = re.search(
                "\d+", snapshot_result.stdout.strip()).group(0)

            if snapshot_current:
                snap_xml = libvirt_xml.SnapshotXML()
                new_snap = snap_xml.new_from_snapshot_dumpxml(vm_name,
                                                              snapshot_name)
                # update an element
                new_snap.creation_time = snapshot_name
                snapshot_xml_path = new_snap.xml
                options += "--redefine %s --current" % snapshot_xml_path
                snapshot_result = virsh.snapshot_create(vm_name,
                                                        options, debug=True)
                if snapshot_result.exit_status:
                    raise error.TestFail("Failed to create snapshot --current."
                                         "Error:%s." %
                                         snapshot_result.stderr.strip())

        if status_error:
            if not snapshot_del_test:
                raise error.TestFail("Success to create snapshot in negative"
                                     " case\nDetail: %s" % snapshot_result)

        # Touch a file in VM.
        if vm.is_dead():
            vm.start()
        session = vm.wait_for_login()

        # Init a unique name for tmp_file.
        tmp_file = tempfile.NamedTemporaryFile(prefix=("snapshot_test_"),
                                               dir="/tmp")
        tmp_file_path = tmp_file.name
        tmp_file.close()

        echo_cmd = "echo SNAPSHOT_DISK_TEST >> %s" % tmp_file_path
        status, output = session.cmd_status_output(echo_cmd)
        logging.debug("The echo output in domain is: '%s'", output)
        if status:
            raise error.TestFail("'%s' run failed with '%s'" %
                                 (tmp_file_path, output))
        status, output = session.cmd_status_output("cat %s" % tmp_file_path)
        logging.debug("File created with content: '%s'", output)

        session.close()

        # As only internal snapshot revert works now, let's only do revert
        # with internal, and move the all skip external cases back to pass.
        # After external also supported, just move the following code back.
        if snapshot_disk == 'internal':
            # Destroy vm for snapshot revert.
            if not libvirt_version.version_compare(1, 2, 3):
                virsh.destroy(vm_name)
            # Revert snapshot.
            revert_options = ""
            if snapshot_revert_paused:
                revert_options += " --paused"
            revert_result = virsh.snapshot_revert(vm_name, snapshot_name,
                                                  revert_options,
                                                  debug=True)
            if revert_result.exit_status:
                # Attempts to revert external snapshots will FAIL with an error
                # "revert to external disk snapshot not supported yet" or "revert
                # to external snapshot not supported yet" since d410e6f. Thus,
                # let's check for that and handle as a SKIP for now. Check bug:
                # https://bugzilla.redhat.com/show_bug.cgi?id=1071264
                if re.search("revert to external \w* ?snapshot not supported yet",
                             revert_result.stderr):
                    raise error.TestNAError(revert_result.stderr.strip())
                else:
                    raise error.TestFail("Revert snapshot failed. %s" %
                                         revert_result.stderr.strip())

            if vm.is_dead():
                raise error.TestFail("Revert snapshot failed.")

            if snapshot_revert_paused:
                if vm.is_paused():
                    vm.resume()
                else:
                    raise error.TestFail("Revert command successed, but VM is not "
                                         "paused after reverting with --paused"
                                         "  option.")
            # login vm.
            session = vm.wait_for_login()
            # Check the result of revert.
            status, output = session.cmd_status_output("cat %s" % tmp_file_path)
            logging.debug("After revert cat file output='%s'", output)
            if not status:
                raise error.TestFail("Tmp file exists, revert failed.")

            # Close the session.
            session.close()

        # Test delete snapshot without "--metadata", delete external disk
        # snapshot will fail for now.
        # Only do this when snapshot creat succeed which filtered in cfg file.
        if snapshot_del_test:
            if snapshot_name:
                del_result = virsh.snapshot_delete(vm_name, snapshot_name,
                                                   debug=True,
                                                   ignore_status=True)
                del_status = del_result.exit_status
                snap_xml_path = snap_cfg_path + "%s.xml" % snapshot_name
                if del_status:
                    if not status_error:
                        raise error.TestFail("Failed to delete snapshot.")
                    else:
                        if not os.path.exists(snap_xml_path):
                            raise error.TestFail("Snapshot xml file %s missing"
                                                 % snap_xml_path)
                else:
                    if status_error:
                        err_msg = "Snapshot delete succeed but expect fail."
                        raise error.TestFail(err_msg)
                    else:
                        if os.path.exists(snap_xml_path):
                            raise error.TestFail("Snapshot xml file %s still"
                                                 % snap_xml_path + " exist")

    finally:
        if vm.is_alive():
            vm.destroy(gracefully=False)
        virsh.detach_disk(vm_name, target="vdf", extra="--persistent")
        if image:
            image.remove()
        if del_status and snapshot_name:
            virsh.snapshot_delete(vm_name, snapshot_name, "--metadata")
        for disk in snapshot_external_disk:
            if os.path.exists(disk):
                os.remove(disk)
        vmxml_backup.sync("--snapshots-metadata")

        libvirtd = utils_libvirtd.Libvirtd()
        if disk_source_protocol == 'gluster':
            utlv.setup_or_cleanup_gluster(False, vol_name, brick_path)
            if multi_gluster_disks:
                brick_path = os.path.join(tmp_dir, "gluster-pool2")
                utlv.setup_or_cleanup_gluster(False, "gluster-vol2", brick_path)
            libvirtd.restart()

        if snapshot_xml_path:
            if os.path.exists(snapshot_xml_path):
                os.unlink(snapshot_xml_path)
        if pvt:
            try:
                pvt.cleanup_pool(pool_name, pool_type, pool_target,
                                 emulated_image, source_name=vol_name)
            except error.TestFail, detail:
                libvirtd.restart()
                logging.error(str(detail))

Example 7

Project: tp-qemu
Source File: boot_from_device.py
View license
@error.context_aware
def run(test, params, env):
    """
    QEMU boot from device:

    1) Start guest from device(hd/usb/scsi-hd)
    2) Check the boot result
    3) Log into the guest if it's up
    4) Shutdown the guest if it's up

    :param test: QEMU test object
    :param params: Dictionary with the test parameters
    :param env: Dictionary with test environment.
    """

    def create_cdroms():
        """
        Create 'test' cdrom with one file on it
        """

        logging.info("creating test cdrom")
        cdrom_test = params.get("cdrom_test")
        cdrom_test = utils_misc.get_path(data_dir.get_data_dir(), cdrom_test)
        utils.run("dd if=/dev/urandom of=test bs=10M count=1")
        utils.run("mkisofs -o %s test" % cdrom_test)
        utils.run("rm -f test")

    def cleanup_cdroms():
        """
        Removes created cdrom
        """

        logging.info("cleaning up temp cdrom images")
        cdrom_test = utils_misc.get_path(
            data_dir.get_data_dir(), params.get("cdrom_test"))
        os.remove(cdrom_test)

    def preprocess_remote_storage():
        """
        Prepare remote ISCSI storage for block image, and login session for
        iscsi device.
        """
        image_name = params.get("images").split()[0]
        base_dir = params.get("images_base_dir", data_dir.get_data_dir())
        iscsidevice = qemu_storage.Iscsidev(params, base_dir, image_name)
        iscsidevice.setup()

    def postprocess_remote_storage():
        """
        Logout from target.
        """
        image_name = params.get("images").split()[0]
        base_dir = params.get("images_base_dir", data_dir.get_data_dir())
        iscsidevice = qemu_storage.Iscsidev(params, base_dir, image_name)
        iscsidevice.cleanup()

    def cleanup(dev_name):
        if dev_name == "scsi-cd":
            cleanup_cdroms()
        elif dev_name == "iscsi-dev":
            postprocess_remote_storage()

    def check_boot_result(boot_fail_info, device_name):
        """
        Check boot result, and logout from iscsi device if boot from iscsi.
        """

        logging.info("Wait for display and check boot info.")
        infos = boot_fail_info.split(';')
        start = time.time()
        while True:
            console_str = vm.serial_console.get_stripped_output()
            match = re.search(infos[0], console_str)
            if match or time.time() > start + timeout:
                break
            time.sleep(1)
        logging.info("Try to boot from '%s'" % device_name)
        try:
            if dev_name == "hard-drive" or (dev_name == "scsi-hd" and not
                                            params.get("image_name_stg")):
                error.context("Log into the guest to verify it's up",
                              logging.info)
                session = vm.wait_for_login(timeout=timeout)
                session.close()
                vm.destroy()
                return

            output = vm.serial_console.get_stripped_output()

            for i in infos:
                if not re.search(i, output):
                    raise error.TestFail("Could not boot from"
                                         " '%s'" % device_name)
        finally:
            cleanup(device_name)

    timeout = int(params.get("login_timeout", 360))
    boot_menu_key = params.get("boot_menu_key", 'f12')
    boot_menu_hint = params.get("boot_menu_hint")
    boot_fail_info = params.get("boot_fail_info")
    boot_device = params.get("boot_device")
    dev_name = params.get("dev_name")
    if dev_name == "scsi-cd":
        create_cdroms()
        params["start_vm"] = "yes"
        env_process.preprocess_vm(test, params, env, params.get("main_vm"))
        vm = env.get_vm(params["main_vm"])
        vm.verify_alive()
    elif dev_name == "iscsi-dev":
        preprocess_remote_storage()
        params["start_vm"] = "yes"
        env_process.preprocess_vm(test, params, env, params.get("main_vm"))
        vm = env.get_vm(params["main_vm"])
        vm.verify_alive()
    else:
        vm = env.get_vm(params["main_vm"])
        vm.verify_alive()
    if boot_device:
        match = False
        start = time.time()
        while True:
            console_str = vm.serial_console.get_stripped_output()
            match = re.search(boot_menu_hint, console_str)
            if match or time.time() > start + timeout:
                break
            time.sleep(1)
        if not match:
            cleanup(dev_name)
            raise error.TestFail("Could not get boot menu message. "
                                 "Excepted Result: '%s', Actual result: '%s'"
                                 % (boot_menu_hint, console_str))

        # Send boot menu key in monitor.
        vm.send_key(boot_menu_key)

        output = vm.serial_console.get_stripped_output()
        boot_list = re.findall("^\d+\. (.*)\s", output, re.M)

        if not boot_list:
            cleanup(dev_name)
            raise error.TestFail("Could not get boot entries list.")

        logging.info("Got boot menu entries: '%s'", boot_list)
        for i, v in enumerate(boot_list, start=1):
            if re.search(boot_device, v, re.I):
                logging.info("Start guest from boot entry '%s'" % boot_device)
                vm.send_key(str(i))
                break
        else:
            raise error.TestFail("Could not get any boot entry match "
                                 "pattern '%s'" % boot_device)

    check_boot_result(boot_fail_info, dev_name)

Example 8

Project: RMG-Py
Source File: fluxdiagram.py
View license
def generateFluxDiagram(reactionModel, times, concentrations, reactionRates, outputDirectory, centralSpecies=None, speciesDirectory=None, settings=None):
    """
    For a given `reactionModel` and simulation results stored as arrays of
    `times`, species `concentrations`, and `reactionRates`, generate a series
    of flux diagrams as frames of an animation, then stitch them together into
    a movie. The individual frames and the final movie are saved on disk at
    `outputDirectory.`
    """
    global maximumNodeCount, maximumEdgeCount, timeStep, concentrationTolerance, speciesRateTolerance
    # Allow user defined settings for flux diagram generation if given
    if settings:
        maximumNodeCount = settings['maximumNodeCount']       
        maximumEdgeCount = settings['maximumEdgeCount']  
        timeStep = settings['timeStep']
        concentrationTolerance = settings['concentrationTolerance']   
        speciesRateTolerance = settings['speciesRateTolerance']
    
    # Get the species and reactions corresponding to the provided concentrations and reaction rates
    speciesList = reactionModel.core.species[:]
    numSpecies = len(speciesList)
    reactionList = reactionModel.core.reactions[:]
    numReactions = len(reactionList)
    
    #search index of central species:
    if centralSpecies is not None:
        for i, species in enumerate(speciesList):
            if species.label == centralSpecies:
                centralSpeciesIndex = i
                break 
    
    # Compute the rates between each pair of species (big matrix warning!)
    speciesRates = numpy.zeros((len(times),numSpecies,numSpecies), numpy.float64)
    for index, reaction in enumerate(reactionList):
        rate = reactionRates[:,index]
        if not reaction.pairs: reaction.generatePairs()
        for reactant, product in reaction.pairs:
            reactantIndex = speciesList.index(reactant)
            productIndex = speciesList.index(product)
            speciesRates[:,reactantIndex,productIndex] += rate
            speciesRates[:,productIndex,reactantIndex] -= rate
    
    # Determine the maximum concentration for each species and the maximum overall concentration
    maxConcentrations = numpy.max(numpy.abs(concentrations), axis=0)
    maxConcentration = numpy.max(maxConcentrations)
    
    # Determine the maximum rate for each species-species pair and the maximum overall species-species rate
    maxSpeciesRates = numpy.max(numpy.abs(speciesRates), axis=0)
    maxSpeciesRate = numpy.max(maxSpeciesRates)
    speciesIndex = maxSpeciesRates.reshape((numSpecies*numSpecies)).argsort()
    
    # Determine the nodes and edges to keep
    nodes = []; edges = []
    if centralSpecies is None:
        for i in range(numSpecies*numSpecies):
            productIndex, reactantIndex = divmod(speciesIndex[-i-1], numSpecies)
            if reactantIndex > productIndex:
                # Both reactant -> product and product -> reactant are in this list,
                # so only keep one of them
                continue
            if maxSpeciesRates[reactantIndex, productIndex] == 0:
                break
            if reactantIndex not in nodes and len(nodes) < maximumNodeCount: nodes.append(reactantIndex)
            if productIndex not in nodes and len(nodes) < maximumNodeCount: nodes.append(productIndex)
            if len(nodes) > maximumNodeCount: 
                break
            edges.append([reactantIndex, productIndex])
            if len(edges) >= maximumEdgeCount:
                break
    else:
        nodes.append(centralSpeciesIndex)
        for index, reaction in enumerate(reactionList):
            for reactant, product in reaction.pairs:
                reactantIndex = speciesList.index(reactant)
                productIndex = speciesList.index(product)
                if maxSpeciesRates[reactantIndex, productIndex] == 0:
                    break
                if len(nodes) > maximumNodeCount or len(edges) >= maximumEdgeCount: 
                    break
                if reactantIndex == centralSpeciesIndex: 
                    if productIndex not in nodes:
                        nodes.append(productIndex)
                        edges.append([reactantIndex, productIndex])
                if productIndex == centralSpeciesIndex: 
                    if reactantIndex not in nodes:
                        nodes.append(reactantIndex)
                        edges.append([reactantIndex, productIndex])
    # Create the master graph
    # First we're going to generate the coordinates for all of the nodes; for
    # this we use the thickest pen widths for all nodes and edges 
    graph = pydot.Dot('flux_diagram', graph_type='digraph', overlap="false")
    graph.set_rankdir('LR')
    graph.set_fontname('sans')
    graph.set_fontsize('10')
    
    # Add a node for each species
    for index in nodes:
        species = speciesList[index]
        node = pydot.Node(name=str(species))
        node.set_penwidth(maximumNodePenWidth)
        graph.add_node(node)
        # Try to use an image instead of the label
        speciesIndex = str(species) + '.png'
        imagePath = ''
        if not speciesDirectory or not os.path.exists(speciesDirectory): 
            continue
        for root, dirs, files in os.walk(speciesDirectory):
            for f in files:
                if f.endswith(speciesIndex):
                    imagePath = os.path.join(root, f)
                    break
        if os.path.exists(imagePath):
            node.set_image(imagePath)
            node.set_label(" ")
    # Add an edge for each species-species rate
    for reactantIndex, productIndex in edges:
        if reactantIndex in nodes and productIndex in nodes:
            reactant = speciesList[reactantIndex]
            product = speciesList[productIndex]
            edge = pydot.Edge(str(reactant), str(product))
            edge.set_penwidth(maximumEdgePenWidth)
            graph.add_edge(edge) 
    
    # Generate the coordinates for all of the nodes using the specified program
    graph = pydot.graph_from_dot_data(graph.create_dot(prog=program))[0]
    
    # Now iterate over the time points, setting the pen widths appropriately
    # This should preserve the coordinates of the nodes from frame to frame
    frameNumber = 1
    for t in range(len(times)):
        # Update the nodes
        slope = -maximumNodePenWidth / math.log10(concentrationTolerance)
        for index in nodes:
            species = speciesList[index]         
            if re.search(r'^[a-zA-Z0-9_]*$',str(species)) is not None:
                species_string = str(species)
            else:
                # species name contains special characters                
                species_string = '"{0}"'.format(str(species))
                
            node = graph.get_node(species_string)[0]
            concentration = concentrations[t,index] / maxConcentration
            if concentration < concentrationTolerance:
                penwidth = 0.0
            else:
                penwidth = round(slope * math.log10(concentration) + maximumNodePenWidth,3)
            node.set_penwidth(penwidth)
        # Update the edges
        slope = -maximumEdgePenWidth / math.log10(speciesRateTolerance)
        for index in range(len(edges)):
            reactantIndex, productIndex = edges[index]
            if reactantIndex in nodes and productIndex in nodes:
                reactant = speciesList[reactantIndex]
                product = speciesList[productIndex]
                
                if re.search(r'^[a-zA-Z0-9_]*$',str(reactant)) is not None:
                    reactant_string = str(reactant)
                else:
                    reactant_string = '"{0}"'.format(str(reactant))
                    
                if re.search(r'^[a-zA-Z0-9_]*$',str(product)) is not None:
                    product_string = str(product)
                else:
                    product_string = '"{0}"'.format(str(product))
                    
                edge = graph.get_edge(reactant_string, product_string)[0]
                # Determine direction of arrow based on sign of rate
                speciesRate = speciesRates[t,reactantIndex,productIndex] / maxSpeciesRate
                if speciesRate < 0:
                    edge.set_dir("back")
                    speciesRate = -speciesRate
                else:
                    edge.set_dir("forward")
                # Set the edge pen width
                if speciesRate < speciesRateTolerance:
                    penwidth = 0.0
                    edge.set_dir("none")
                else:
                    penwidth = round(slope * math.log10(speciesRate) + maximumEdgePenWidth,3)
                edge.set_penwidth(penwidth)
        # Save the graph at this time to a dot file and a PNG image
        if times[t] == 0:
            label = 't = 0 s'
        else:
            label = 't = 10^{0:.1f} s'.format(math.log10(times[t]))
        graph.set_label(label)
        if t == 0:
            repeat = framesPerSecond * initialPadding
        elif t == len(times) - 1:
            repeat = framesPerSecond * finalPadding
        else:
            repeat = 1
        for r in range(repeat):
            graph.write_dot(os.path.join(outputDirectory, 'flux_diagram_{0:04d}.dot'.format(frameNumber)))
            graph.write_png(os.path.join(outputDirectory, 'flux_diagram_{0:04d}.png'.format(frameNumber)))
            frameNumber += 1
    
    # Use ffmpeg to stitch the PNG images together into a movie
    import subprocess
    
    command = ['ffmpeg',
               '-framerate', '{0:d}'.format(framesPerSecond), # Duration of each image
               '-i', 'flux_diagram_%04d.png',                 # Input file format
               '-c:v', 'mpeg4',                               # Encoder
               '-r', '30',                                    # Video framerate
               '-pix_fmt', 'yuv420p',                         # Pixel format
               'flux_diagram.avi']                            # Output filename
    
    subprocess.check_call(command, cwd=outputDirectory)

Example 9

Project: virtmgr
Source File: views.py
View license
def pool(request, host_id, pool):

	if not request.user.is_authenticated():
		return HttpResponseRedirect('/')

	kvm_host = Host.objects.get(user=request.user.id, id=host_id)

	def add_error(msg, type_err):
		error_msg = Log(host_id=host_id, 
			            type=type_err, 
			            message=msg, 
			            user_id=request.user.id
			            )
		error_msg.save()

	def get_storages():
		try:
			storages = {}
			for name in conn.listStoragePools():
				stg = conn.storagePoolLookupByName(name)
				status = stg.isActive()
				storages[name] = status
			for name in conn.listDefinedStoragePools():
				stg = conn.storagePoolLookupByName(name)
				status = stg.isActive()
				storages[name] = status
			return storages
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def vm_conn():
	   	try:
			flags = [libvirt.VIR_CRED_AUTHNAME, libvirt.VIR_CRED_PASSPHRASE]
	  		auth = [flags, creds, None]
			uri = 'qemu+tcp://' + kvm_host.ipaddr + '/system'
		   	conn = libvirt.openAuth(uri, auth, 0)
		   	return conn
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def get_vms():
		try:
			vname = {}
			for id in conn.listDomainsID():
				id = int(id)
				dom = conn.lookupByID(id)
				vname[dom.name()] = dom.info()[0]
			for id in conn.listDefinedDomains():
				dom = conn.lookupByName(id)
				vname[dom.name()] = dom.info()[0]
			return vname
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	if not kvm_host.login or not kvm_host.passwd:
		def creds(credentials, user_data):
			for credential in credentials:
				if credential[0] == libvirt.VIR_CRED_AUTHNAME:
					credential[4] = request.session['login_kvm']
					if len(credential[4]) == 0:
						credential[4] = credential[3]
				elif credential[0] == libvirt.VIR_CRED_PASSPHRASE:
					credential[4] = request.session['passwd_kvm']
				else:
					return -1
			return 0
	else:
		def creds(credentials, user_data):
			for credential in credentials:
				if credential[0] == libvirt.VIR_CRED_AUTHNAME:
					credential[4] = kvm_host.login
					if len(credential[4]) == 0:
						credential[4] = credential[3]
				elif credential[0] == libvirt.VIR_CRED_PASSPHRASE:
					credential[4] = kvm_host.passwd
				else:
					return -1
			return 0

	def get_conn_pool(pool):
		try:
			stg = conn.storagePoolLookupByName(pool)
			return stg
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def pool_start():
		try:
			stg.create(0)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def pool_stop():
		try:
			stg.destroy()
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def pool_delete():
		try:
			stg.undefine()
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"
			
	def pool_refresh():
		try:
			stg.refresh(0)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def get_stg_info(get):
		try:
			if get == "info":
				if stg.info()[3] == 0:
					percent = 0
				else:
					percent = (stg.info()[2] * 100) / stg.info()[1]
				stg_info = stg.info()
				stg_info.append(percent)
				return stg_info
			elif get == "status":
				return stg.isActive()
			elif get == "start":
				return stg.autostart()
			elif get == "list":
				return stg.listVolumes()
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def get_type():
		try:
			xml = stg.XMLDesc(0)
			return util.get_xml_path(xml, "/pool/@type")
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def get_target_path():
		try:
			xml = stg.XMLDesc(0)
			return util.get_xml_path(xml, "/pool/target/path")
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def delete_volume(img):
		try:
			vol = stg.storageVolLookupByName(img)
			vol.delete(0)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def stg_set_autostart(pool):
		try:
			stg = conn.storagePoolLookupByName(pool)
			stg.setAutostart(1)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def create_volume(img, size_max):
		try:
			size_max = int(size_max) * 1073741824
			xml = """
				<volume>
					<name>%s.img</name>
					<capacity>%s</capacity>
					<allocation>0</allocation>
					<target>
						<format type='qcow2'/>
					</target>
				</volume>""" % (img, size_max)
			stg.createXML(xml,0)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def create_stg_pool(name_pool, path_pool):
		try:
			xml = """
				<pool type='dir'>
					<name>%s</name>
						<target>
							<path>%s</path>
						</target>
				</pool>""" % (name_pool, path_pool)
			conn.storagePoolDefineXML(xml,0)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def clone_volume(img, new_img):
		try:
			vol = stg.storageVolLookupByName(img)
			xml = """
				<volume>
					<name>%s</name>
					<capacity>0</capacity>
					<allocation>0</allocation>
					<target>
						<format type='qcow2'/>
					</target>
				</volume>""" % (new_img)
			stg.createXMLFrom(xml, vol, 0)
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"

	def get_vl_info(listvol):
		try:
			volinfo = {}
			if stg.isActive() != 0:
				for name in listvol:
					vol = stg.storageVolLookupByName(name)
					xml = vol.XMLDesc(0)
					size = vol.info()[1]
					format = util.get_xml_path(xml, "/volume/target/format/@type")
	 				volinfo[name] = size,format
			return volinfo
		except libvirt.libvirtError as e:
			add_error(e,'libvirt')
			return "error"
	
	conn = vm_conn()
	errors = []

	if conn == "error":
		return HttpResponseRedirect('/overview/%s/' % (host_id))

	pools = get_storages()
	all_vm = get_vms()
	
	if pool != 'new_stg_pool':
		stg = get_conn_pool(pool)
		status = get_stg_info('status')
		if status == 1:
			pool_refresh()
			info = get_stg_info('info')
			stype = get_type()
			spath = get_target_path()
			start = get_stg_info('start')
			listvol = get_stg_info('list')
			volinfo = get_vl_info(listvol)
			hdd_size = range(1,321)
		errors = []

	if request.method == 'POST':
		if request.POST.get('new_stg_pool',''):
			name_pool = request.POST.get('name_pool','')
			path_pool = request.POST.get('path_pool','')
			simbol = re.search('[^a-zA-Z0-9\_]+', name_pool)
			if len(name_pool) > 20:
				msg = _('The name of the storage pool must not exceed 20 characters')
				errors.append(msg)
			if simbol:
				msg = _('The name of the storage pool must not contain any characters and Russian characters')
				errors.append(msg)
			if not name_pool:
				msg = _('Enter the name of the pool')
				errors.append(msg)
			if not path_pool:
				msg = _('Enter the path of the pool')
				errors.append(msg)
			if not errors:
				if create_stg_pool(name_pool, path_pool) is "error":
					msg = _('Such a pool already exists')
					errors.append(msg)
				else:
					stg = get_conn_pool(name_pool)
					stg_set_autostart(name_pool)
					if pool_start() is "error":
						msg = _('Pool is created, but when I run the pool fails, you may specify the path does not exist')
						errors.append(msg)
						return HttpResponseRedirect('/storage/%s/%s/' % (host_id, name_pool))
					else:
						msg = _('Creating a storage pool: ')
						msg = msg + name_pool
						add_error(msg,'user')
						return HttpResponseRedirect('/storage/%s/%s/' % (host_id, name_pool))
				if errors:
					return render_to_response('storage.html', locals())
		if request.POST.get('stop_pool',''):
			pool_stop()
			msg = _('Stop storage pool: ')
			msg = msg + pool
			add_error(msg,'user')
			return HttpResponseRedirect('/storage/%s/%s/' % (host_id, pool))
		if request.POST.get('start_pool',''):
			pool_start()
			msg = _('Start storage pool: ')
			msg = msg + pool
			add_error(msg,'user')
			return HttpResponseRedirect('/storage/%s/%s/' % (host_id, pool))
		if request.POST.get('del_pool',''):
			pool_delete()
			msg = _('Delete storage pool: ')
			msg = msg + pool
			add_error(msg,'user')
			return HttpResponseRedirect('/storage/%s/' % (host_id))
		if request.POST.get('vol_del',''):
			img = request.POST['img']
			delete_volume(img)
			msg = _('Delete image: ')
			msg = msg + img
			add_error(msg,'user')
			return HttpResponseRedirect('/storage/%s/%s/' % (host_id, pool))
		if request.POST.get('vol_add',''):
			img = request.POST.get('img','')
			size_max = request.POST.get('size_max','')
			simbol = re.search('[^a-zA-Z0-9\_]+', img)
			if len(img) > 20:
				msg = _('The name of the images must not exceed 20 characters')
				errors.append(msg)
			if simbol:
				msg = _('The name of the image must not contain any characters and Russian characters')
				errors.append(msg)
			if not img:
				msg = _('Enter image name')
				errors.append(msg)
			if not size_max:
				msg = _('Enter image size')
				errors.append(msg)
			if not errors:
				create_volume(img, size_max)
				msg = _('Create image: ')
				msg = msg + img + '.img'
				add_error(msg,'user')
				return HttpResponseRedirect('/storage/%s/%s/' % (host_id, pool))
		if request.POST.get('vol_clone',''):
			img = request.POST.get('img','')
			new_img = request.POST.get('new_img','')
			simbol = re.search('[^a-zA-Z0-9\_]+', new_img)
			new_img = new_img + '.img'
			if new_img == '.img':
				msg = _('Enter image name')
				errors.append(msg)
			if len(new_img) > 20:
				msg = _('The name of the images must not exceed 20 characters')
				errors.append(msg)
			if simbol:
				msg = _('The name of the image must not contain any characters and Russian characters')
				errors.append(msg)
			if new_img in listvol:
				msg = _('The image of the same name already exists')
				errors.append(msg)
			if re.search('.ISO', img) or re.search('.iso', img):
				msg = _('You can only clone a virtual machine images')
				errors.append(msg)
			if not errors:
				clone_volume(img, new_img)
				msg = _('Cloning image: ')
				msg = msg + img + ' => ' + new_img
				add_error(msg,'user')
				return HttpResponseRedirect('/storage/%s/%s/' % (host_id, pool))

	conn.close()
				
	return render_to_response('storage.html', locals())

Example 10

Project: ru
Source File: default.py
View license
def GetVideo(url):
	if re.search('vk\.com|vkontakte\.ru', url):
		http = GET(url)
		soup = bs(http, from_encoding = "windows-1251")
		#sdata1 = soup.find('div', class_ = "scroll_fix_wrap", id = "page_wrap")
		rmdata = soup.find('div', style = "position:absolute; top:50%; text-align:center; right:0pt; left:0pt; font-family:Tahoma; font-size:12px; color:#FFFFFF;")
		if rmdata:
			rmdata = rmdata.find('div', style = False, class_ = False)
			if rmdata.br: rmdata.br.replace_with(" ")
			rmdata = "".join(list(rmdata.strings)).strip().encode('utf-8')
			print rmdata
			vk_email = Addon.getSetting('vk_email')
			vk_pass = Addon.getSetting('vk_pass')
			if 'изъято' in rmdata or not vk_email:
				ShowMessage("ВКонтакте", rmdata, times = 20000)
				return False
			oid, id = re.findall('oid=([-0-9]*)&id=([0-9]*)', url)[0]
			url = 'http://vk.com/video' + oid + '_' + id
			#print url
			from vk_auth import vk_auth as vk
			vks = vk(vk_email, vk_pass)
			crid = vks.get_remixsid_cookie()
			if crid:
				if debug_mode: ShowMessage("ВКонтакте", "Применена авторизация")
			else:
				ShowMessage("ВКонтакте", "Ошибка авторизации")
				print "ошибка авторизации вконтакте"
				return False
			#print crid
			html = GET(url, headers = {"Cookie": crid})
			#print html
			rec = re.findall('var vars = ({.+?});', html)
			if rec:
				rec = rec[0]
				rec = rec.replace('\\', '')
			else:
				ShowMessage("ВКонтакте", "Видео недоступно")
				#print "видео недоступно"
				#if gebug_mode: print html
				return False
			#print 'rec: ' + str(rec)
			fvs = json.loads(rec, encoding = "windows-1251")
			#print json.dumps(fvs, indent = 1).encode('utf-8')
		else:
			rec = soup.find_all('param', {'name': 'flashvars'})[0]['value']
			fvs = urlparse.parse_qs(rec)
		#print json.dumps(fvs, indent = 1).encode('utf-8')
		uid = fvs['uid'][0]
		vtag = fvs['vtag'][0]
		#host = fvs['host'][0]
		#vid = fvs['vid'][0]
		#oid = fvs['oid'][0]
		q_list = {None: '240', '1': '360', '2': '480', '3': '720'}
		hd = fvs['hd'] if 'hd' in fvs else None
		if isinstance(hd, list): hd = hd[0]
		if isinstance(hd, float): hd = str(int(hd))
		print q_list[hd] + "p"
		#burl = host + 'u' + uid + '/videos/' + vtag + '.%s.mp4'
		#q_url_map = {q: burl % q for q in q_list.values()}
		#print q_url_map
		url = fvs['url' + q_list[hd]]
		if isinstance(url, list): url = url[0]
		#url = url.replace('vk.me', 'vk.com')
		sr = urlparse.urlsplit(url)
		if not IsIPv4(sr[1]):
			ipv = '6'
			url = url.replace('v6', '', 1)
		else: ipv = '4'
		if debug_mode: print 'IPv' + ipv
		#print url
		return url
	
	elif re.search('moonwalk\.cc|37\.220\.36\.\d{1,3}|serpens\.nl', url):
		page = GET(url)
		token = re.findall("video_token: '(.*?)'", page)[0]
		access_key = re.findall("access_key: '(.*?)'", page)[0]
		d_id = re.findall("d_id: (\d*)", page)[0]
		#referer = re.findall(r'player_url = "(.+?\.swf)";', page)[0]
		referer = url
		post = urllib.urlencode({"video_token": token, "access_key": access_key, "d_id": d_id, "content_type": 'movie'})
		#print post
		page = GET('http://moonwalk.cc/sessions/create_session', post = post, opts = 'xmlhttp', ref = url, headers = None)
		#print page
		page = json.loads(page)
		if use_ahds:
			url = page["manifest_f4m"]
		else:
			url = page["manifest_m3u8"]
		
		headers = {'User-Agent': UA, 'Connection': 'Keep-Alive', 'Referer': 'http://37.220.36.28/static/player/player_base.swf'}
		url += '|' + urllib.urlencode(headers)
		#print url
		return url
	
	elif 'rutube.ru' in url:
		data = GET(url)
		#print data
		import HTMLParser
		hp = HTMLParser.HTMLParser()
		data = hp.unescape(data)
		match = re.compile('"m3u8": "(.+?)"').findall(data)
		#print match
		if len(match) > 0:
			url = match[0]
			return url
	
	elif re.search('api\.video\.mail\.ru|videoapi\.my\.mail\.ru', url):
		data = GET(url)
		#match = re.compile('videoSrc = "(.+?)",').findall(data)
		match = re.compile('"metadataUrl":"(.+?)"').findall(data)
		if len(match) > 0:
			url = match[0]
		else:
			print "Mail.ru video parser is failed"
			ShowMessage(addon_name, "Mail.ru video parser is failed")
			return False
		data = GET(url, opts = 'headers')
		video_key_c = data[1].getheader('Set-Cookie')
		video_key_c = re.compile('(video_key=.+?;)').findall(video_key_c)
		if len(video_key_c) > 0:
			video_key_c = video_key_c[0]
		else:
			print "Mail.ru video parser is failed"
			ShowMessage(addon_name, "Mail.ru video parser is failed")
			return False
		jsdata = json.loads(data[0])
		vlist = jsdata['videos']
		vlist.sort(key = lambda i: i['key'])
		vdata = vlist[-1]
		url = vdata['url']
		headers = {'Cookie': video_key_c}
		url += '|' + urllib.urlencode(headers)
		return url
	
	elif 'youtube.com' in url:
		if '/embed/' in url:
			if debug_mode: print 'embed'
			video_id = re.findall('embed/(.+)\??', url)[0]
		else:
			finder = url.find('=')
			video_id = url[finder + 1:]
		url = 'plugin://plugin.video.youtube/?action=play_video&videoid=%s' % (video_id)
		print url
		return url
	
	elif re.search('moevideo\.net|playreplay\.net|videochart\.net', url):
		o = urlparse.urlparse(url)
		#print o
		uid = re.findall('http://(?:.+?)/framevideo/(.+?)\?', url)
		if uid: uid = uid[0]
		post = urllib.urlencode({"r": '[["file/flv_link",{"uid":"%s"}]]' % (uid)})
		purl = urlparse.urlunsplit((o.scheme, o.netloc, '/data', '' , ''))
		#print purl
		page = GET(purl, post = post)
		#print page
		page = json.loads(page)
		#print json.dumps(page, indent = 1).encode('utf-8')
		url = page['data'][0]['link']
		return url
		
	else:
		ShowMessage(addon_name, "Неизвестный видеохостинг: " + url)
		print "Неизвестный видеохостинг: " + url
		return False

Example 11

Project: SiCKRAGE
Source File: rarbg.py
View license
    def search(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        if not self.login():
            return results

        if epObj is not None:
            ep_indexerid = epObj.show.indexerid
            ep_indexer = epObj.show.indexer
        else:
            ep_indexerid = None
            ep_indexer = None

        for mode in search_params.keys():  # Mode = RSS, Season, Episode
            sickrage.srCore.srLogger.debug("Search Mode: %s" % mode)
            for search_string in search_params[mode]:

                if mode != 'RSS':
                    sickrage.srCore.srLogger.debug("Search string: %s " % search_string)

                if mode == 'RSS':
                    searchURL = self.urls['listing'] + self.defaultOptions
                elif mode == 'Season':
                    if ep_indexer == INDEXER_TVDB:
                        searchURL = self.urls['search_tvdb'] % (ep_indexerid, search_string) + self.defaultOptions
                    else:
                        searchURL = self.urls['search'] % (search_string) + self.defaultOptions
                elif mode == 'Episode':
                    if ep_indexer == INDEXER_TVDB:
                        searchURL = self.urls['search_tvdb'] % (ep_indexerid, search_string) + self.defaultOptions
                    else:
                        searchURL = self.urls['search'] % (search_string) + self.defaultOptions
                else:
                    sickrage.srCore.srLogger.error("Invalid search mode: %s " % mode)

                if self.minleech:
                    searchURL += self.urlOptions['leechers'].format(min_leechers=int(self.minleech))

                if self.minseed:
                    searchURL += self.urlOptions['seeders'].format(min_seeders=int(self.minseed))

                if self.sorting:
                    searchURL += self.urlOptions['sorting'].format(sorting=self.sorting)

                if self.ranked:
                    searchURL += self.urlOptions['ranked'].format(ranked=int(self.ranked))

                sickrage.srCore.srLogger.debug("Search URL: %s" % searchURL)

                try:
                    retry = 3
                    while retry > 0:
                        time_out = 0
                        while (datetime.datetime.now() < self.next_request) and time_out <= 15:
                            time_out += 1
                            time.sleep(1)

                        self.next_request = datetime.datetime.now() + datetime.timedelta(seconds=10)

                        try:
                            data = sickrage.srCore.srWebSession.get(searchURL + self.urlOptions['token'].format(token=self.token)).text
                        except Exception:
                            sickrage.srCore.srLogger.debug("No data returned from provider")
                            raise StopIteration

                        if re.search('ERROR', data):
                            sickrage.srCore.srLogger.debug("Error returned from provider")
                            raise StopIteration
                        if re.search('No results found', data):
                            sickrage.srCore.srLogger.debug("No results found")
                            raise StopIteration
                        if re.search('Invalid token set!', data):
                            sickrage.srCore.srLogger.warning("Invalid token!")
                            return results
                        if re.search('Too many requests per minute. Please try again later!', data):
                            sickrage.srCore.srLogger.warning("Too many requests per minute")
                            retry -= 1
                            time.sleep(10)
                            continue
                        if re.search('Cant find search_tvdb in database. Are you sure this imdb exists?', data):
                            sickrage.srCore.srLogger.warning(
                                "No results found. The tvdb id: %s do not exist on provider" % ep_indexerid)
                            raise StopIteration
                        if re.search('Invalid token. Use get_token for a new one!', data):
                            sickrage.srCore.srLogger.debug("Invalid token, retrieving new token")
                            retry -= 1
                            self.token = None
                            self.tokenExpireDate = None
                            if not self.login():
                                sickrage.srCore.srLogger.debug("Failed retrieving new token")
                                return results
                            sickrage.srCore.srLogger.debug("Using new token")
                            continue

                        # No error found break
                        break
                    else:
                        sickrage.srCore.srLogger.debug("Retried 3 times without getting results")
                        continue
                except StopIteration:
                    continue

                try:
                    data = re.search(r'\[\{\"title\".*\}\]', data)
                    if data is not None:
                        data_json = json.loads(data.group())
                    else:
                        data_json = {}
                except Exception:
                    sickrage.srCore.srLogger.error("JSON load failed: %s" % traceback.format_exc())
                    sickrage.srCore.srLogger.debug("JSON load failed. Data dump: %s" % data)
                    continue

                try:
                    for item in data_json:
                        try:
                            title = item['title']
                            download_url = item['download']
                            size = convert_size(item['size'])
                            seeders = item['seeders']
                            leechers = item['leechers']
                            # pubdate = item['pubdate']

                            if not all([title, download_url]):
                                continue

                            item = title, download_url, size, seeders, leechers
                            if mode != 'RSS':
                                sickrage.srCore.srLogger.debug("Found result: %s " % title)
                            items[mode].append(item)

                        except Exception:
                            sickrage.srCore.srLogger.debug("Skipping invalid result. JSON item: {}".format(item))

                except Exception:
                    sickrage.srCore.srLogger.error("Failed parsing provider. Traceback: %s" % traceback.format_exc())

            # For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)
            results += items[mode]

        return results

Example 12

Project: MininGit
Source File: GitParser.py
View license
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None:
                # Skip commits on svn tags
                if self.branch.tail.svn_tag is None:
                    self.handler.commit(self.branch.tail.commit)

            if self.patterns['replace-commit'].search(line):
                printdbg("Skipping commit, because it's a replacement")
                self.commit = None

                return

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
            git_commit = self.GitCommit(self.commit, parents)

            # If a specific branch has been configured, there
            # won't be any decoration, so a branch needs to be
            # created
            if Config().branch is not None:
                self.branch = self.GitBranch(self.GitBranch.LOCAL,
                                             Config().branch,
                                             git_commit)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(2),
                                            git_commit)
                    printdbg("Branch '%s' head at acommit %s",
                             (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL,
                                                m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'",
                                 (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this
                        # decoration
                        if self.branch and \
                        self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged",
                                     (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH,
                                                    "stash", git_commit)
                            printdbg("Commit %s on stash",
                                     (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'",
                             (self.commit.revision, self.commit.tags[0]))

            if branch is not None and self.branch is not None:
                # Detect empty branches. Ideally, the head of a branch
                # can't have children. When this happens is because the
                # branch is empty, so we just ignore such branch
                if self.branch.is_my_parent(git_commit):
                    printout("Warning: Detected empty branch '%s', " + \
                             "it'll be ignored", (branch.name,))
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and \
            self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s " + \
                         "to current %s", (pending_tag,
                                           self.branch.tail.commit.revision,
                                           self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.is_remote() and branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                self.branch.set_tail(git_commit)

            if parents and len(parents) > 1 and not Config().analyze_merges:
                #Skip merge commits
                self.commit = None

            return
        elif self.commit is None:
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)

            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)

            return

        # Commit Date
        match = self.patterns['commit-date'].match(line)
        if match:
            self.commit.commit_date = datetime.datetime(*(time.strptime(\
                match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))

            return

        # Author Date
        match = self.patterns['author-date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(*(time.strptime(\
                match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))

            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            action.type = match.group(1)
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)

Example 13

Project: maltrail
Source File: update.py
View license
def update_trails(server=None, force=False, offline=False):
    """
    Update trails from feeds
    """

    trails = {}
    duplicates = {}

    if server:
        print "[i] retrieving trails from provided 'UPDATE_SERVER' server..."
        _ = retrieve_content(server)
        if not _:
            exit("[!] unable to retrieve data from '%s'" % server)
        else:
            with _fopen(TRAILS_FILE, "w+b") as f:
                f.write(_)
            trails = load_trails()

    trail_files = set()
    for dirpath, dirnames, filenames in os.walk(os.path.abspath(os.path.join(ROOT_DIR, "trails"))) :
        for filename in filenames:
            trail_files.add(os.path.abspath(os.path.join(dirpath, filename)))

    if config.CUSTOM_TRAILS_DIR:
        for dirpath, dirnames, filenames in os.walk(os.path.abspath(os.path.join(ROOT_DIR, os.path.expanduser(config.CUSTOM_TRAILS_DIR)))) :
            for filename in filenames:
                trail_files.add(os.path.abspath(os.path.join(dirpath, filename)))

    try:
        if not os.path.isdir(USERS_DIR):
            os.makedirs(USERS_DIR, 0755)
    except Exception, ex:
        exit("[!] something went wrong during creation of directory '%s' ('%s')" % (USERS_DIR, ex))

    _chown(USERS_DIR)

    if not trails and (force or not os.path.isfile(TRAILS_FILE) or (time.time() - os.stat(TRAILS_FILE).st_mtime) >= config.UPDATE_PERIOD or os.stat(TRAILS_FILE).st_size == 0 or any(os.stat(_).st_mtime > os.stat(TRAILS_FILE).st_mtime for _ in trail_files)):
        print "[i] updating trails (this might take a while)..."

        if not offline and (force or config.USE_FEED_UPDATES):
            sys.path.append(os.path.abspath(os.path.join(ROOT_DIR, "trails", "feeds")))
            filenames = sorted(glob.glob(os.path.join(sys.path[-1], "*.py")))
        else:
            filenames = []

        sys.path.append(os.path.abspath(os.path.join(ROOT_DIR, "trails")))
        filenames += [os.path.join(sys.path[-1], "static")]
        filenames += [os.path.join(sys.path[-1], "custom")]

        filenames = [_ for _ in filenames if "__init__.py" not in _]

        for i in xrange(len(filenames)):
            filename = filenames[i]

            try:
                module = __import__(os.path.basename(filename).split(".py")[0])
            except (ImportError, SyntaxError), ex:
                print "[x] something went wrong during import of feed file '%s' ('%s')" % (filename, ex)
                continue

            for name, function in inspect.getmembers(module, inspect.isfunction):
                if name == "fetch":
                    print(" [o] '%s'%s" % (module.__url__, " " * 20 if len(module.__url__) < 20 else ""))
                    sys.stdout.write("[?] progress: %d/%d (%d%%)\r" % (i, len(filenames), i * 100 / len(filenames)))
                    sys.stdout.flush()
                    try:
                        results = function()
                        for item in results.items():
                            if item[0].startswith("www.") and '/' not in item[0]:
                                item = [item[0][len("www."):], item[1]]
                            if item[0] in trails:
                                if item[0] not in duplicates:
                                    duplicates[item[0]] = set((trails[item[0]][1],))
                                duplicates[item[0]].add(item[1][1])
                            if not (item[0] in trails and (any(_ in item[1][0] for _ in LOW_PRIORITY_INFO_KEYWORDS) or trails[item[0]][1] in HIGH_PRIORITY_REFERENCES)) or (item[1][1] in HIGH_PRIORITY_REFERENCES and "history" not in item[1][0]) or any(_ in item[1][0] for _ in HIGH_PRIORITY_INFO_KEYWORDS):
                                trails[item[0]] = item[1]
                        if not results and "abuse.ch" not in module.__url__:
                            print "[x] something went wrong during remote data retrieval ('%s')" % module.__url__
                    except Exception, ex:
                        print "[x] something went wrong during processing of feed file '%s' ('%s')" % (filename, ex)

        # basic cleanup
        for key in trails.keys():
            if key not in trails:
                continue
            if not key or re.search(r"\A(?i)\.?[a-z]+\Z", key) and not any(_ in trails[key][1] for _ in ("custom", "static")):
                del trails[key]
                continue
            if re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key):
                if any(_ in trails[key][0] for _ in ("parking site", "sinkhole")) and key in duplicates:
                    del duplicates[key]
                if trails[key][0] == "malware":
                    trails[key] = ("potential malware site", trails[key][1])
            if trails[key][0] == "ransomware":
                trails[key] = ("ransomware (malware)", trails[key][1])
            if key.startswith("www.") and '/' not in key:
                _ = trails[key]
                del trails[key]
                key = key[len("www."):]
                if key:
                    trails[key] = _
            if '?' in key:
                _ = trails[key]
                del trails[key]
                key = key.split('?')[0]
                if key:
                    trails[key] = _
            if '//' in key:
                _ = trails[key]
                del trails[key]
                key = key.replace('//', '/')
                trails[key] = _
            if key != key.lower():
                _ = trails[key]
                del trails[key]
                key = key.lower()
                trails[key] = _
            if key in duplicates:
                _ = trails[key]
                others = sorted(duplicates[key] - set((_[1],)))
                if others and " (+" not in _[1]:
                    trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

        read_whitelist()

        for key in trails.keys():
            if check_whitelisted(key) or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                del trails[key]
            elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and cdn_ip(key):
                del trails[key]
            else:
                try:
                    key.decode("utf8")
                    trails[key][0].decode("utf8")
                    trails[key][1].decode("utf8")
                except UnicodeDecodeError:
                    del trails[key]

        try:
            if trails:
                with _fopen(TRAILS_FILE, "w+b") as f:
                    writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
                    for trail in trails:
                        writer.writerow((trail, trails[trail][0], trails[trail][1]))

        except Exception, ex:
            print "[x] something went wrong during trails file write '%s' ('%s')" % (TRAILS_FILE, ex)

        print "[i] update finished%s" % (40 * " ")

    return trails

Example 14

Project: maltrail
Source File: sensor.py
View license
def _process_packet(packet, sec, usec, ip_offset):
    """
    Processes single (raw) IP layer data
    """

    global _connect_sec
    global _last_syn
    global _last_logged_syn
    global _last_udp
    global _last_logged_udp
    global _last_dns_exhaustion
    global _subdomains_sec

    try:
        if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES:
            _result_cache.clear()

        if config.USE_HEURISTICS:
            if _locks.connect_sec:
                _locks.connect_sec.acquire()

            connect_sec = _connect_sec
            _connect_sec = sec

            if _locks.connect_sec:
                _locks.connect_sec.release()

            if sec > connect_sec:
                for key in _connect_src_dst:
                    if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                        _src_ip, _dst_ip = key.split('~')
                        if not check_whitelisted(_src_ip):
                            for _ in _connect_src_details[key]:
                                log_event((sec, usec, _src_ip, _[2], _dst_ip, _[3], PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)"), packet)

                _connect_src_dst.clear()
                _connect_src_details.clear()

        ip_data = packet[ip_offset:]
        ip_version = ord(ip_data[0]) >> 4
        localhost_ip = LOCALHOST_IP[ip_version]

        if ip_version == 0x04:  # IPv4
            ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])
            iph_length = (ip_header[0] & 0xf) << 2
            protocol = ip_header[6]
            src_ip = socket.inet_ntoa(ip_header[8])
            dst_ip = socket.inet_ntoa(ip_header[9])
        elif ip_version == 0x06:  # IPv6
            # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/
            ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40])
            iph_length = 40
            protocol = ip_header[4]
            src_ip = inet_ntoa6(ip_header[6])
            dst_ip = inet_ntoa6(ip_header[7])
        else:
            return

        if protocol == socket.IPPROTO_TCP:  # TCP
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14])

            if flags != 2 and config.plugin_functions:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True)
                elif src_ip in trails and dst_ip != localhost_ip:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True)

            if flags == 2:  # SYN set (only)
                _ = _last_syn
                _last_syn = (sec, src_ip, src_port, dst_ip, dst_port)
                if _ == _last_syn:  # skip bursts
                    return

                if dst_ip in trails:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)

                elif src_ip in trails and dst_ip != localhost_ip:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                if config.USE_HEURISTICS:
                    if dst_ip != localhost_ip:
                        key = "%s~%s" % (src_ip, dst_ip)
                        if key not in _connect_src_dst:
                            _connect_src_dst[key] = set()
                            _connect_src_details[key] = set()
                        _connect_src_dst[key].add(dst_port)
                        _connect_src_details[key].add((sec, usec, src_port, dst_port))

            else:
                tcph_length = doff_reserved >> 4
                h_size = iph_length + (tcph_length << 2)
                tcp_data = ip_data[h_size:]

                if tcp_data.startswith("HTTP/"):
                    if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet)
                    else:
                        index = tcp_data.find("<title>")
                        if index >= 0:
                            title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)]
                            if all(_ in title.lower() for _ in ("this domain", "has been seized")):
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet)

                    content_type = None
                    first_index = tcp_data.find("\r\nContent-Type:")
                    if first_index >= 0:
                        first_index = first_index + len("\r\nContent-Type:")
                        last_index = tcp_data.find("\r\n", first_index)
                        if last_index >= 0:
                            content_type = tcp_data[first_index:last_index].strip().lower()

                    if content_type and content_type in SUSPICIOUS_CONTENT_TYPES:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, content_type, "content type (suspicious)", "(heuristic)"), packet)

                method, path = None, None
                index = tcp_data.find("\r\n")
                if index >= 0:
                    line = tcp_data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method, path, _ = line.split(' ')

                if method and path:
                    post_data = None
                    host = dst_ip
                    first_index = tcp_data.find("\r\nHost:")

                    if first_index >= 0:
                        first_index = first_index + len("\r\nHost:")
                        last_index = tcp_data.find("\r\n", first_index)
                        if last_index >= 0:
                            host = tcp_data[first_index:last_index]
                            host = host.strip().lower()
                            if host.endswith(":80"):
                                host = host[:-3]
                            if host and host[0].isalpha() and dst_ip in trails:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet)
                            elif config.CHECK_HOST_DOMAINS and not host.replace('.', "").isdigit():
                                _check_domain(host, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet)

                    index = tcp_data.find("\r\n\r\n")
                    if index >= 0:
                        post_data = tcp_data[index + 4:]

                    if config.USE_HEURISTICS and dst_port == 80 and path.startswith("http://") and not _check_domain_whitelisted(urlparse.urlparse(path).netloc.split(':')[0]):
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, path, "potential proxy probe (suspicious)", "(heuristic)"), packet)
                        return
                    elif "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        if host.endswith(":80"):
                            host = host[:-3]
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        if host.endswith(":80"):
                            host = host[:-3]
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    else:
                        url = "%s%s" % (host, path)

                    if config.USE_HEURISTICS:
                        user_agent, result = None, None

                        first_index = tcp_data.find("\r\nUser-Agent:")
                        if first_index >= 0:
                            first_index = first_index + len("\r\nUser-Agent:")
                            last_index = tcp_data.find("\r\n", first_index)
                            if last_index >= 0:
                                user_agent = tcp_data[first_index:last_index]
                                user_agent = urllib.unquote(user_agent).strip()

                        if user_agent:
                            result = _result_cache.get(user_agent)
                            if result is None:
                                if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS):
                                    match = re.search(SUSPICIOUS_UA_REGEX, user_agent)
                                    if match:
                                        def _(value):
                                            return value.replace('(', "\\(").replace(')', "\\)")

                                        parts = user_agent.split(match.group(0), 1)

                                        if len(parts) > 1 and parts[0] and parts[-1]:
                                            result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent))
                                        else:
                                            result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts)
                                if not result:
                                    _result_cache[user_agent] = False

                            if result:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet)

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])
                        checks.append(checks[0][checks[0].rfind('/'):].split('?')[0])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        unquoted_path = urllib.unquote(path)
                        unquoted_post_data = urllib.unquote(post_data or "")
                        for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                            replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char]
                            path = path.replace(char, replacement)
                            if post_data:
                                post_data = post_data.replace(char, replacement)

                        if not _check_domain_whitelisted(host):
                            if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS):
                                if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_path)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_path, re.I | re.DOTALL):
                                                found = desc
                                                break
                                        _result_cache[unquoted_path] = found or ""
                                    if found:
                                        trail = "%s(%s)" % (host, path)
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                                if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_post_data)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_post_data, re.I | re.DOTALL):
                                                found = desc
                                                break
                                        _result_cache[unquoted_post_data] = found or ""
                                    if found:
                                        trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip())
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "%s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                            if '.' in path:
                                _ = urlparse.urlparse("http://%s" % url)  # dummy scheme
                                path = path.lower()
                                filename = _.path.split('/')[-1]
                                name, extension = os.path.splitext(filename)
                                trail = "%s(%s)" % (host, path)
                                if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and '=' not in _.query and len(name) < 10:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet)
                                elif filename in WEB_SHELLS:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential web shell (suspicious)", "(heuristic)"), packet)
                                else:
                                    for desc, regex in SUSPICIOUS_HTTP_PATH_REGEXES:
                                        if re.search(regex, filename, re.I):
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % desc, "(heuristic)"), packet)
                                            break

        elif protocol == socket.IPPROTO_UDP:  # UDP
            _ = ip_data[iph_length:iph_length + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            _ = _last_udp
            _last_udp = (sec, src_ip, src_port, dst_ip, dst_port)
            if _ == _last_udp:  # skip bursts
                return

            if src_port != 53 and dst_port != 53:  # not DNS
                if dst_ip in trails:
                    trail = dst_ip
                elif src_ip in trails:
                    trail = src_ip
                else:
                    trail = None

                if trail:
                    _ = _last_logged_udp
                    _last_logged_udp = _last_udp
                    if _ != _last_logged_udp:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet)

            else:
                dns_data = ip_data[iph_length + 8:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(dns_data) > 6:
                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(dns_data) > offset:
                            length = ord(dns_data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += dns_data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        query = query.lower()

                        if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES):
                            return

                        parts = query.split('.')

                        if ord(dns_data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5])

                            if len(parts) > 2:
                                domain = '.'.join(parts[-2:])

                                if not _check_domain_whitelisted(domain):  # e.g. <hash>.hashserver.cs.trendmicro.com
                                    if (sec - (_subdomains_sec or 0)) > DAILY_SECS:
                                        _subdomains.clear()
                                        _dns_exhausted_domains.clear()
                                        _subdomains_sec = sec

                                    subdomains = _subdomains.get(domain)

                                    if not subdomains:
                                        subdomains = _subdomains[domain] = set()

                                    if len(subdomains) < DNS_EXHAUSTION_THRESHOLD:
                                        subdomains.add('.'.join(parts[:-2]))
                                    else:
                                        if (sec - (_last_dns_exhaustion or 0)) > 60:
                                            trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet)
                                            _dns_exhausted_domains.add(domain)
                                            _last_dns_exhaustion = sec

                                        return

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                if dst_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet)
                                elif src_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet)

                        elif config.USE_HEURISTICS:
                            if ord(dns_data[2]) & 0x80:  # standard response
                                if ord(dns_data[3]) == 0x80:  # recursion available, no error
                                    _ = offset + 5
                                    try:
                                        while _ < len(dns_data):
                                            if ord(dns_data[_]) & 0xc0 != 0 and dns_data[_ + 2] == "\00" and dns_data[_ + 3] == "\x01":  # Type A
                                                break
                                            else:
                                                _ += 12 + struct.unpack("!H", dns_data[_ + 10: _ + 12])[0]

                                        _ = dns_data[_ + 12:_ + 16]
                                        if _:
                                            answer = socket.inet_ntoa(_)
                                            if answer in trails:
                                                _ = trails[answer]
                                                if "sinkhole" in _[0]:
                                                    trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % _[0].split(" ")[1], "(heuristic)"), packet)  # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn)
                                                elif "parking" in _[0]:
                                                    trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "parked site (suspicious)", "(heuristic)"), packet)
                                    except IndexError:
                                        pass

                                elif ord(dns_data[3]) == 0x83:  # recursion available, no such name
                                    if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails):
                                        if parts[-1].isdigit():
                                            return

                                        if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])):  # generic check for DNSBL IP lookups
                                            for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)):
                                                if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600:
                                                    NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()]
                                                else:
                                                    NO_SUCH_NAME_COUNTERS[_][1] += 1
                                                    NO_SUCH_NAME_COUNTERS[_][2].add(query)

                                                    if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD:
                                                        if _.startswith("*."):
                                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet)
                                                            for item in NO_SUCH_NAME_COUNTERS[_][2]:
                                                                try:
                                                                    del NO_SUCH_NAME_COUNTERS[item]
                                                                except KeyError:
                                                                    pass
                                                        else:
                                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet)

                                                        try:
                                                            del NO_SUCH_NAME_COUNTERS[_]
                                                        except KeyError:
                                                            pass

                                                        break

                                            if len(parts) > 2:
                                                part = parts[0] if parts[0] != "www" else parts[1]
                                                trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                            elif len(parts) == 2:
                                                part = parts[0]
                                                trail = "(%s).%s" % (parts[0], parts[1])
                                            else:
                                                part = query
                                                trail = query

                                            if part and '-' not in part:
                                                result = _result_cache.get(part)

                                                if result is None:
                                                    # Reference: https://github.com/exp0se/dga_detector
                                                    probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part))
                                                    entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities)
                                                    if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
                                                        result = "entropy threshold no such domain (suspicious)"

                                                    if not result:
                                                        if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
                                                            result = "consonant threshold no such domain (suspicious)"

                                                    _result_cache[part] = result or False

                                                if result:
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet)

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                if ord(ip_data[iph_length]) != 0x08:  # Non-echo request
                    return
            elif protocol == socket.IPPROTO_ICMPV6:
                if ord(ip_data[iph_length]) != 0x80:  # Non-echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

    except struct.error:
        pass

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()

Example 15

Project: timestring
Source File: Range.py
View license
    def __init__(self, start, end=None, offset=None, start_of_week=0, tz=None, verbose=False):
        """`start` can be type <class timestring.Date> or <type str>
        """
        self._dates = []
        pgoffset = None

        if start is None:
            raise TimestringInvalid("Range object requires a start valie")

        if not isinstance(start, (Date, datetime)):
            start = str(start)
        if end and not isinstance(end, (Date, datetime)):
            end = str(end)

        if start and end:
            """start and end provided
            """
            self._dates = (Date(start, tz=tz), Date(end, tz=tz))

        elif start == 'infinity':
            # end was not provided
            self._dates = (Date('infinity'), Date('infinity'))

        elif re.search(r'(\s(and|to)\s)', start):
            """Both sides where provided in the start
            """
            start = re.sub('^(between|from)\s', '', start.lower())
            # Both arguments found in start variable
            r = tuple(re.split(r'(\s(and|to)\s)', start.strip()))
            self._dates = (Date(r[0], tz=tz), Date(r[-1], tz=tz))

        elif re.match(r"(\[|\()((\"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(\.\d+)?(\+|\-)\d{2}\")|infinity),((\"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(\.\d+)?(\+|\-)\d{2}\")|infinity)(\]|\))", start):
            """postgresql tsrange and tstzranges support
            """
            start, end = tuple(re.sub('[^\w\s\-\:\.\+\,]', '', start).split(','))
            self._dates = (Date(start), Date(end))

        else:
            now = datetime.now()
            # no tz info but offset provided, we are UTC so convert

            if re.search(r"(\+|\-)\d{2}$", start):
                # postgresql tsrange and tstzranges
                pgoffset = re.search(r"(\+|\-)\d{2}$", start).group() + " hours"

            # tz info provided
            if tz:
                now = now.replace(tzinfo=pytz.timezone(str(tz)))

            # Parse
            res = TIMESTRING_RE.search(start)
            if res:
                group = res.groupdict()
                if verbose:
                    print(dict(map(lambda a: (a, group.get(a)), filter(lambda a: group.get(a), group))))
                if (group.get('delta') or group.get('delta_2')) is not None:
                    delta = (group.get('delta') or group.get('delta_2')).lower()

                    # always start w/ today
                    start = Date("today", offset=offset, tz=tz)

                    # make delta
                    di = "%s %s" % (str(int(group['num'] or 1)), delta)

                    # this           [   x  ]
                    if group['ref'] == 'this':

                        if delta.startswith('y'):
                            start = Date(datetime(now.year, 1, 1), offset=offset, tz=tz)

                        # month
                        elif delta.startswith('month'):
                            start = Date(datetime(now.year, now.month, 1), offset=offset, tz=tz)

                        # week
                        elif delta.startswith('w'):
                            start = Date("today", offset=offset, tz=tz) - (str(Date("today", tz=tz).date.weekday())+' days')

                        # day
                        elif delta.startswith('d'):
                            start = Date("today", offset=offset, tz=tz)

                        # hour
                        elif delta.startswith('h'):
                            start = Date("today", offset=dict(hour=now.hour+1), tz=tz)

                        # minute, second
                        elif delta.startswith('m') or delta.startswith('s'):
                            start = Date("now", tz=tz)

                        else:
                            raise TimestringInvalid("Not a valid time reference")

                        end = start + di

                    #next          x [      ]
                    elif group['ref'] == 'next':
                        if int(group['num'] or 1) > 1:
                            di = "%s %s" % (str(int(group['num'] or 1) - 1), delta)
                        end = start + di

                    # ago             [     ] x
                    elif group.get('ago') or group['ref'] == 'last' and int(group['num'] or 1) == 1:
                        #if group['ref'] == 'last' and int(group['num'] or 1) == 1:
                        #    start = start - ('1 ' + delta)
                        end = start - di

                    # last & no ref   [    x]
                    else:
                        # need to include today with this reference
                        if not (delta.startswith('h') or delta.startswith('m') or delta.startswith('s')):
                            start = Range('today', offset=offset, tz=tz).end
                        end = start - di                    

                elif group.get('month_1'):
                    # a single month of this yeear
                    start = Date(start, offset=offset, tz=tz)
                    start = start.replace(day=1)
                    end = start + '1 month'

                elif group.get('year_5'):
                    # a whole year
                    start = Date(start, offset=offset, tz=tz)
                    start = start.replace(day=1, month=1)
                    end = start + '1 year'

                else:
                    # after all else, we set the end to + 1 day
                    start = Date(start, offset=offset, tz=tz)
                    end = start + '1 day'

            else:
                raise TimestringInvalid("Invalid timestring request")


            if end is None:
                # no end provided, so assume 24 hours
                end = start + '24 hours'

            if start > end:
                # flip them if this is so
                start, end = copy(end), copy(start)
            
            if pgoffset:
                start = start - pgoffset
                if end != 'infinity':
                    end = end - pgoffset

            self._dates = (start, end)

        if self._dates[0] > self._dates[1]:
            self._dates = (self._dates[0], self._dates[1] + '1 day')

Example 16

Project: taolib
Source File: ceas.py
View license
def main():
    
    # read the options and validate them
    options=opt_validate(prepare_optparser())

    # CEAS run
    # read the gene annotation table
    jobcount=1
    info("#%d read the gene table..." %jobcount)
        
    # read
    GeneT=inout.GeneTable()
    GeneT.read(Host=options.Host,User=options.User,Db=options.Db,annotation='refGene',which=options.which)    
    GeneT.sort()
    chroms_GeneT=GeneT.get_chroms()
    jobcount+=1

    # read ChIP regions
    info("#%d read the bed file of ChIP regions..." %jobcount)
    Cbed=inout.Bed()
    Cbed.read(options.bed)
    Csampler=sampler.ChIPSampler()
    ChIP=Csampler.sample(Cbed,resolution=options.chip_res)
    del Cbed
    jobcount+=1
    
    # read regions of interest if it is given
    if options.ebed:
        info("#%d read the bed file of regions of interest..." %jobcount)
        roi=inout.Bed()
        roi.read(options.ebed)
        jobcount+=1
    else: roi=None

    # if wig profiling is not being run.
    if not options.bg:
        
        # iterate through chromosomes of the gene table
        info("#%d read the pre-computed genome bg annotation..." %jobcount)
        GenomeBGS=tables.SummaryGBG(name='GenomeBGS')
        GenomeBGS.readdb(Db=options.gdb)
        GP=_interpoloate_gbg(gdb,options.promoter,options.bipromoter,options.downstream)
        chroms_bg=GP.get_chroms()
        
        # if any regions of interest are given
        if options.ebed:
            GP=_get_bgroi(GP,GenomeBGS,roi=roi,bg_res=options.bg_res)
        
        # annotate ChIP regions
        info('#%d annotate the ChIP regions...' %jobcount)
        Annot=annotator.Annotator()
        ChIPA=Annot.annotate(genome_coordinates=ChIP,gene_table=GeneT,roi=roi,prom=options.promoter,biprom=options.bipromoter,down=options.downstream,gene_div=(3,5))
        CS,CP=Annot.summarize(ChIPA)
        # make the table complete with missing chromsomes, if there are
        annotator.make_table_complete(CS,chroms_bg)
        annotator.make_table_complete(CP,chroms_bg)
        # get the pvalues
        CPval=annotator.estimate_pvals(GP,CS,CP)
        jobcount+=1

        # open outfile 
        info('#%d write a R script of CEAS...' %jobcount)
        ofhd=open(options.name+'.R','w')
        pdfname=options.name+'.pdf'
        # the first part of CEAS R script. Because wig profiling is not run, just terminate
        rscript=R.pdf(pdfname,height=11.5,width=8.5)   
        rscript+=inout.draw_CEAS(GP,CP,CPval,bg_res=options.bg_res,chip_res=options.chip_res,prom=options.promoter,biprom=options.bipromoter,down=options.downstream,gene_div=(3,5))      
        ofhd.write(rscript)    # write CEAS
    
    # when wig profiling is running
    if options.pf:
        
        if options.bg:
            GenomeBGS=tables.Summary()
        
        # if gene groups are give
        if options.gn_groups:
            subsets=inout.read_gene_subsets(options.gn_groups)
        
        chrom=''
        chrcount=1
        prof=profiler.WigProfiler()
        FIRST=True
        for line in open(options.wig,'r').xreadlines():
            if not line: continue
            # read a chromosome
            if re.search(r'track',line): 
                try:
                    description=re.search(r'description="(\w+)"\s',line).group(1)
                except AttributeError:
                    pass
                continue
            if re.search(r'chrom=(\w+)\s',line):
                newchrom=re.search(r'chrom=(\w+)\s',line).group(1)
                try:
                    newchrom=inout.standard_chroms[newchrom]
                except KeyError:
                    pass
                continue
            l=line.strip().split()
        
        # the beginning
            if chrom=='' and chrom!=newchrom:
                # if the chromosome is not in gene table, continue
                chrom=newchrom
                if chrom in chroms_GeneT: # only if the new chromosome is in the chroms of gene table, a wig object is initiated.
                    info("#%d-%d work on %s..." %(jobcount,chrcount,chrom))
                    input=inout.Wig()
                    input.add_line(chrom,l)
                    chrcount+=1
            elif chrom!='' and chrom!=newchrom:    # new chromosome
                if chrom in chroms_GeneT:
                    # do genome BG annotation
                    if options.bg:
                        Sampler=sampler.GenomeSampler()
                        Annotator=annotator.Annotator()
                        GA=Annotator.annotate(Sampler.sample(input,resolution=options.bg_res),GeneT,roi=roi,prom=options.promoter,biprom=options.bipromoter,down=options.downstream,gene_div=(3,5))
                        tempS,tempP=Annotator.summarize(GA)
                        GenomeBGS.add_row(chrom,tempS.get_row(chrom))
                                                
                    # wig profiling
                    names,breaks,upstreams,downstreams,metagene_breaks,metagenes,metaexon_breaks,metaexons,metaintron_breaks,metaintrons=prof.profile(input,GeneT,rel_pos=options.rel_dist,metagenesize=options.metagene_size,step=options.pf_res,which=options.which,exonratio=0.5,emask=options.emask,imask=options.imask)
            
                    # get average of this chromosome
                    avg_up,upcount=corelib.mean_col_by_col(upstreams,counts=True)
                    avg_down,downcount=corelib.mean_col_by_col(downstreams,counts=True)
                    avg_mg,genecount=corelib.mean_col_by_col(metagenes,counts=True)
                    avg_me,exoncount=corelib.mean_col_by_col(metaexons,counts=True)
                    avg_mi,introncount=corelib.mean_col_by_col(metaintrons,counts=True)
            
                    if not FIRST:    # if not first chromosome
                        avg_upstream,avg_upcount=corelib.weight_mean_col_by_col([avg_upstream,avg_up],[avg_upcount,upcount],counts=True)
                        avg_downstream,avg_downcount=corelib.weight_mean_col_by_col([avg_downstream,avg_down],[avg_downcount,upcount],counts=True)
                        avg_metagene,avg_genecount=corelib.weight_mean_col_by_col([avg_metagene,avg_mg],[avg_genecount,genecount],counts=True)
                        avg_metaexon,avg_exoncount=corelib.weight_mean_col_by_col([avg_metaexon,avg_me],[avg_exoncount,exoncount],counts=True)
                        avg_metaintron,avg_introncount=corelib.weight_mean_col_by_col([avg_metaintron,avg_mi],[avg_introncount,introncount],counts=True)
                        del avg_up,avg_down,avg_mg,avg_me,avg_mi,upcount,downcount,genecount,exoncount,introncount
                
                        if options.gn_groups:    # when gene sub-gropus are given
                            ixs,subsets=profiler.get_gene_indicies(names,subsets)
                            avg_ups,upcs,avg_downs,downcs,avg_mgs,gcs,avg_mes,ecs,avg_mis,ics=profiler.select_profiles_chr_by_chr(ixs,upstreams,downstreams,metagenes,metaexons,metaintrons)
                            avg_upstreams,avg_upcounts=profiler.weight_mean_profiles_chr_by_chr(avg_upstreams,avg_upcounts,avg_ups,upcs)
                            avg_downstreams,avg_downcounts=profiler.weight_mean_profiles_chr_by_chr(avg_downstreams,avg_downcounts,avg_downs,downcs)
                            avg_metagenes,avg_genecounts=profiler.weight_mean_profiles_chr_by_chr(avg_metagenes,avg_genecounts,avg_mgs,gcs)
                            avg_metaexons,avg_exoncounts=profiler.weight_mean_profiles_chr_by_chr(avg_metaexons,avg_exoncounts,avg_mes,ecs)
                            avg_metaintrons,avg_introncounts=profiler.weight_mean_profiles_chr_by_chr(avg_metaintrons,avg_introncounts,avg_mis,ics)
                            del avg_ups,avg_downs,avg_mgs,avg_mes,avg_mis,upcs,downcs,gcs,ecs,ics
                
                    else:   # if first chromosome
                        avg_upstream=avg_up
                        avg_downstream=avg_down
                        avg_metagene=avg_mg
                        avg_metaexon=avg_me
                        avg_metaintron=avg_mi
                        avg_upcount=upcount
                        avg_downcount=downcount
                        avg_genecount=genecount
                        avg_exoncount=exoncount
                        avg_introncount=introncount
            
                        if options.gn_groups:
                            ixs,subsets=profiler.get_gene_indicies(names,subsets)
                            avg_upstreams,avg_upcounts,avg_downstreams,avg_downcounts,avg_metagenes,avg_genecounts,avg_metaexons,avg_exoncounts,avg_metaintrons,avg_introncounts=profiler.select_profiles_chr_by_chr(ixs,upstreams,downstreams,metagenes,metaexons,metaintrons)
                        FIRST=False
                
                    del upstreams,downstreams,metagenes,metaexons,metaintrons    
                
                # set chrom to the new chromosome
                chrom=newchrom
                if chrom in chroms_GeneT:    # only if the new chromosome is in the chroms of gene table, a wig object is initiated.
                    info("#%d-%d work on %s..." %(jobcount,chrcount,chrom))
                    input=inout.Wig()
                    input.add_line(chrom,l)
                    chrcount+=1
            else:    # in the middle of chromosome
                if chrom in chroms_GeneT:   # only if the new chromosome is in the chroms of gene table, the wig object is updated.
                    input.add_line(chrom,l)
                        
    # do profiling for the last chromosome 
        if chrom in chroms_GeneT:
            
            if options.bg:
                Sampler=sampler.GenomeSampler()
                Annotator=annotator.Annotator()
                GA=Annotator.annotate(Sampler.sample(input,resolution=options.bg_res),GeneT,roi=roi,prom=options.promoter,biprom=options.bipromoter,down=options.downstream,gene_div=(3,5))
                tempS,tempP=Annotator.summarize(GA)
                GenomeBGS.add_row(chrom,tempS.get_row(chrom))
                GenomeBGS.summarize()
                GP=GenomeBGS.get_p()
                
                if options.ebed:
                    GP=_get_bgroi(GP,GenomeBGS,roi=roi,bg_res=options.bg_res)
            
            # profiling
            names,breaks,upstreams,downstreams,metagene_breaks,metagenes,metaexon_breaks,metaexons,metaintron_breaks,metaintrons=prof.profile(input,GeneT,rel_pos=options.rel_dist,metagenesize=options.metagene_size,step=options.pf_res,which=options.which,exonratio=0.5,emask=options.emask,imask=options.imask)
            del input 
            # get average of this chromosome
            avg_up,upcount=corelib.mean_col_by_col(upstreams,counts=True)
            avg_down,downcount=corelib.mean_col_by_col(downstreams,counts=True)
            avg_mg,genecount=corelib.mean_col_by_col(metagenes,counts=True)
            avg_me,exoncount=corelib.mean_col_by_col(metaexons,counts=True)
            avg_mi,introncount=corelib.mean_col_by_col(metaintrons,counts=True)
            
            if not FIRST:    # the first chromosome profiling
                avg_upstream,avg_upcount=corelib.weight_mean_col_by_col([avg_upstream,avg_up],[avg_upcount,upcount],counts=True)
                avg_downstream,avg_downcount=corelib.weight_mean_col_by_col([avg_downstream,avg_down],[avg_downcount,upcount],counts=True)
                avg_metagene,avg_genecount=corelib.weight_mean_col_by_col([avg_metagene,avg_mg],[avg_genecount,genecount],counts=True)
                avg_metaexon,avg_exoncount=corelib.weight_mean_col_by_col([avg_metaexon,avg_me],[avg_exoncount,exoncount],counts=True)
                avg_metaintron,avg_introncount=corelib.weight_mean_col_by_col([avg_metaintron,avg_mi],[avg_introncount,introncount],counts=True)
                del avg_up,avg_down,avg_mg,avg_me,avg_mi,upcount,downcount,genecount,exoncount,introncount
        
                if options.gn_groups:
                    ixs,subsets=profiler.get_gene_indicies(names,subsets)
                    avg_ups,upcs,avg_downs,downcs,avg_mgs,gcs,avg_mes,ecs,avg_mis,ics=profiler.select_profiles_chr_by_chr(ixs,upstreams,downstreams,metagenes,metaexons,metaintrons)
                    avg_upstreams,avg_upcounts=profiler.weight_mean_profiles_chr_by_chr(avg_upstreams,avg_upcounts,avg_ups,upcs)
                    avg_downstreams,avg_downcounts=profiler.weight_mean_profiles_chr_by_chr(avg_downstreams,avg_downcounts,avg_downs,downcs)
                    avg_metagenes,avg_genecounts=profiler.weight_mean_profiles_chr_by_chr(avg_metagenes,avg_genecounts,avg_mgs,gcs)
                    avg_metaexons,avg_exoncounts=profiler.weight_mean_profiles_chr_by_chr(avg_metaexons,avg_exoncounts,avg_mes,ecs)
                    avg_metaintrons,avg_introncounts=profiler.weight_mean_profiles_chr_by_chr(avg_metaintrons,avg_introncounts,avg_mis,ics)
                    del avg_ups,avg_downs,avg_mgs,avg_mes,avg_mis,upcs,downcs,gcs,ecs,ics
            else:
                avg_upstream=avg_up
                avg_downstream=avg_down
                avg_metagene=avg_mg
                avg_metaexon=avg_me
                avg_metaintron=avg_mi
                avg_upcount=upcount
                avg_downcount=downcount
                avg_genecount=genecount
                avg_exoncount=exoncount
                avg_introncount=introncount
                if options.gn_groups:
                    ixs,subsets=profiler.get_gene_indicies(names,subsets)
                    avg_upstreams,avg_upcounts,avg_downstreams,avg_downcounts,avg_metagenes,avg_genecounts,avg_metaexons,avg_exoncounts,avg_metaintrons,avg_introncounts=profiler.select_profiles_chr_by_chr(ixs,upstreams,downstreams,metagenes,metaexons,metaintrons)
    
            del upstreams,downstreams,metagenes,metaexons,metaintrons
        jobcount+=1
        
        if options.bg:
            info('#%d annotate ChIP regions...' %jobcount)
            Annot=annotator.Annotator()
            ChIPA=Annot.annotate(genome_coordinates=ChIP,gene_table=GeneT,roi=roi,prom=options.promoter,biprom=options.bipromoter,down=options.downstream,gene_div=(3,5))
            CS,CP=Annot.summarize(ChIPA)
            CPval=annotator.estimate_pvals(GP,CS,CP)
            jobcount+=1    
            
            info('#%d write R script of CEAS and wig profiling...' %jobcount)
            ofhd=open(options.name+'.R','w')
            pdfname=options.name+'.pdf'
            # the first part of CEAS R script. Because wig profiling is not run, just terminate
            rscript=R.pdf(pdfname,height=11.5,width=8.5)
            rscript+=inout.draw_CEAS(GP,CP,CPval,bg_res=options.bg_res,chip_res=options.chip_res,prom=options.promoter,biprom=options.bipromoter,down=options.downstream,gene_div=(3,5)) 
            ofhd.write(rscript)    # writing CEAS
        else:
            info('#%d append R script of wig profiling...' %jobcount)
        
        # write R script
        if options.gn_groups:
            # append the profiles of all genes
            avg_upstreams.append(avg_upstream)
            avg_downstreams.append(avg_downstream)
            avg_metagenes.append(avg_metagene)
            avg_metaexons.append(avg_metaexon)
            avg_metaintrons.append(avg_metaintron)  
                
            rscript=inout.draw_profile_plots(breaks,avg_upstreams,avg_downstreams,metagene_breaks,avg_metagenes,metaexon_breaks,avg_metaexons,metaintron_breaks,avg_metaintrons,metagene_breaks_lim=[-1000,1000],legends=options.gn_names)
        else:              
            rscript=inout.draw_profile_plot(breaks,avg_upstream,avg_downstream,metagene_breaks,avg_metagene,metaexon_breaks,avg_metaexon,metaintron_breaks,avg_metaintron,metagene_breaks_lim=[-1000,1000])
        ofhd.write(rscript)    # write wig profiling
        
    ofhd.write(R.devoff())   
    ofhd.close()
    
    info ('#... cong! Run R on %s!' %(options.name+'.R'))

Example 17

Project: typhon-vx
Source File: napalmex2.py
View license
def add_datfile(datfile, tmsi, mcnc, cellid):
	global plain
	# hotfix, sometimes napalmex is trying to move empty file
	if (not datfile):
		return
	if not decrypt(datfile, tmsi):
#		print     (gsmpath+"/mysrc/napalmex_mods/%s %s/%s %s 2>/dev/null |grep ^S"%(mymod,newpath,datfile,cflags))
#		print(gsmpath+"/mysrc/napalmex_mods/%s %s/%s %s"%(mymod,newpath,datfile,cflags)) 

		if os.path.isfile("%s/stat/operators/%s.db"%gsmpath,mcnc):
#		f=os.popen(gsmpath+"/bin/gsm_convert -s %s/stat/stats.db -f %s/%s %s"%(gsmpath,newpath,datfile,cflags))
#		print(gsmpath+"/bin/gsm_convert -s %s/stat/operators/23002.db -f %s/%s %s  | sort -rn | tr \"+\" \"\n\" | cut -d \" \" -f 2- | grep SDCCH/DL"%(gsmpath,newpath,datfile,cflags))
			f=os.popen(gsmpath+"/bin/gsm_convert -s %s/stat/operators/%s.db -f %s/%s %s  | sort -rn | tr \"+\" \"\n\" | cut -d \" \" -f 2- | grep S.CCH/DL"%(gsmpath,mcnc,newpath,datfile,cflags))
		else:
			f=s.popen(gsmpath+"/bin/gsm_convert -s %s/stat/operators/default.db -f %s/%s %s  | sort -rn | tr \"+\" \"\n\" | cut -d \" \" -f 2- | grep S.CCH/DL"%(gsmpath,mcnc,newpath,datfile,cflags)

		# XXX todo: use cellid
		# XXX todo: what to do if operator is not found / if we do not want to crack sdcch downlink for example, ...

		bursts_pos=0 # burst position in frame
		prev_keystream="" # for verifying the very last burst in each frame
		prev_frameno=""
		for line in f.readlines():
			m=re.search('^S.* .* (.*): (.*)', line) # 1 - framenumber, 2 - guessed_keystream
			if(m):
				#print line;
							# if this is the first burst of the frame
							# there is no keystream to check againt yet
				if(bursts_pos%4 == 1):	# if this is the second burst of the frame
							# we need to add the first burst too
					insert_burst(datfile, prev_keystream, prev_frameno, m.group(2), m.group(1), 0, 0)
				if(bursts_pos%4 > 0):   # and then just add the current burst
					insert_burst(datfile, m.group(2), m.group(1), prev_keystream, prev_frameno, 0, 0)
				prev_keystream=m.group(2)
				prev_frameno=m.group(1)
				bursts_pos+=1
		f.close()
		if bursts_pos == 0: 
			# no keystream returned, assume decoding of the whole 
			# capture succeeded (no encryption used)
			print("PLAIN %s"%datfile)
			os.system("mv %s/%s %s"%(newpath,datfile,plainpath))
			cur.execute("update keys set status=4 where file like '%s'"%datfile)
			plain+=1
		else:
			cur.execute("update keys set status=1 where file like '%s'"%datfile)
	con.commit()
	sqlko_internal.commit()

def submit_burst(row):
	sock.send("crack %s\n"%row[1])
	tsql.execute("update keystreams set submitted=%d where keystream like '%s';"%(int(time.time()),row[1]))
#	print "crack %s"%row[1]
# used for generating plots

def job_finished(m):
	global failed
	tsql.execute("select * from keystreams where jobnum = '%s'"%m.group(1))
	row=tsql.fetchone()
	if row == None:
		print("Strange, cracker returned job I did not submit!")
		return
	datfile=row[0]
	tsql.execute("select count(1) from keystreams where filename = '%s'"%datfile)
	if tsql.fetchone()[0] == 1:
		cur.execute("update keys set status=3 where file like '%s' and key like '%s'"%(datfile,emptykey))
		if os.path.exists("%s/%s"%(newpath,datfile)):
			failed+=1
			print("FAILED %s"%datfile)
			os.system("mv %s/%s %s"%(newpath,datfile,badpath))
		con.commit()
	tsql.execute("delete from keystreams where jobnum = %s"%m.group(1))
	sqlko_internal.commit()

def state_found(m):
	# 1 - secret state; 2 - bitpos; 3 - jobnum
	tsql.execute("select * from keystreams where jobnum = %d"%int(m.group(3)))
	row = tsql.fetchone()
	if row == None:
		print("Strange, cracker solved job I did not submit!")
		return
	backclock(m.group(1), m.group(2), row[2], row[3], row[4], row[0], row[7])

def key_found(key, datfile):
	global dup_results
	global cracked
	cur.execute("update keys set key='%s', status=2 where file like '%s';"%(key,datfile))
	con.commit()
	if os.path.exists("%s/%s"%(newpath,datfile)):
		cur.execute("select timestamp from keys where file like '%s';"%(datfile))
		row = cur.fetchone()
		try:
#			if row[0].isnumeric(): # sometimes something else (?) is returned
			delay = time.time() - int(row[0])
		except ValueError:
			delay=0
			print("BUG")
		print("CRACKED %s, delay = %i"%(datfile, 0))
		os.system("mv %s/%s %s/"%(newpath,datfile,crackedpath))
#		os.system("./run %s %s %i"%(datfile,key,delay))
		tsql.execute("delete from keystreams where filename='%s' and submitted = -1"%(datfile))
	else:
		print("DUP %s"%datfile)
		dup_results+=1
		cracked-=1

def decrypt(datfile, tmsi):
	global reused
	cur = con.cursor()
	# XXX optimization per BTS, per time (e.g. week lasting session)
	cur.execute("select distinct(key) from keys where key not like '%s' and tmsi like '%s'"%(emptykey, tmsi))
	while True:
		row = cur.fetchone()
		if row == None:
			break
		found=os.system(gsmpath+"/bin/gsm_convert -f %s -k %s 2>/dev/null |grep -q KEY_OK"%(datfile,row[0]))
		if found == 0:
			reused+=1
			key_found(row[0], datfile)
			return True
	return False

def usage():
	print sys.argv[0]+" [options]"
	print "If run without filename, starts automated cracking of current session"
	print "Options:"
	print " -h, --help : This help"
	print " -u, --uplink : Try to crack uplink"
	print " -p text, --plaintext text : Use provided plaintext"
	print " -s filename, --single filename: Try to crack just single bursts file"
	print " -r 1/0, --reusing 1/0 : Try to reuse previously cracked keys (default on)"
	print " -f 1/0, --failed-reusing 1/0: Try to use new keys on previously failed bursts (default off)"


	return

try:
	opts, args = getopt.getopt(sys.argv[1:], "hc:us:r:f:", ["help", "cflags=","uplink","single=","reusing=","failed-reusing="])

except getopt.GetoptError:
	usage()
	sys.exit(2)
for opt, arg in opts:
	if opt in ("-h", "--help"):
		usage()                     
		sys.exit()                  
	elif opt in ("-u", "--uplink"):
		fflags="u" 
		cflags+=" -u"
	elif opt in ("-c", "--cflags"):
		cflags+=arg
	elif opt in ("-m", "--module"):
		mymod=arg
	elif opt in ("-s", "--single"):
		# crack just single file
		key=prepare_for_kraken(arg)
		print arg+"\t"+key
		sys.exit(0)
	elif opt in ("-r", "--reusing"):
		if arg == '0':
			reusing=0
		if arg == '1':
			reusing=1
	elif opt in ("-f", "--failed-reusing"):
		if arg == '0':
			failed_reusing=0
		if arg == '1':
			failed_reusing=1
	else:
		assert False, "unhandled option"

# automated mode
# 1reate keys.db if database does not exists
con = None
try:
	con=lite.connect(gsmsession+'keys.db');
#	con=lite.connect(gsmsession+'keys.db', isolation_level=None)

	cur = con.cursor()    
	cur.execute('SELECT count(1) from keys') 
	data = cur.fetchone()
    
except lite.Error, e:
	cur.execute("CREATE TABLE keys(timestamp INT, file TEXT, tmsi text, key text, status INT, cid INT, mncc INT)")
	cur.execute("CREATE INDEX idx_file on keys(file);");
	cur.execute("CREATE INDEX idx_key on keys(key);");
	cur.execute("CREATE INDEX idx_status on keys(status);");
	cur.execute("CREATE INDEX idx_tmsi on keys(tmsi);"); # XXX test performance

	con.commit()

cur.execute("update keys set status=0 where status=1 and key like '%s'"%emptykey) # recovery after crash
con.commit()

# internal database 
sqlko_internal=lite.connect(":memory:");
#sqlko_internal=lite.connect(":memory:",isolation_level=None)
tsql = sqlko_internal.cursor()
tsql.execute("CREATE TABLE keystreams(filename TEXT,keystream TEXT,framecount INT,keystream2 TEXT,framecount2 INT,priority INT,serial INT,uplink INT,jobnum INT,submitted INT)")

tsql.execute("CREATE INDEX idx_filename on keystreams(filename);");
tsql.execute("CREATE INDEX idx_keystreams on keystreams(keystream);");
tsql.execute("CREATE INDEX idx_jobnum on keystreams(jobnum);");
tsql.execute("CREATE INDEX idx_submitted on keystreams(submitted);");

sqlko_internal.commit()
#sqlko_internal.close()
# open connection to the Kraken
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#sock.settimeout(gsmkrakentimeout)
sock.connect((gsmkrakenhost, gsmkrakenport))

#hack, not optimal (blocking & not timeouting)
sock.setblocking(0)
sfile=sock.makefile("rb")

#main loop
laststat=time.time()
while (1):
	# Part 1: get new files
#       tsql.execute("select count(distinct filename) from keystreams")
#       dat_add = dat_maxadd # tsql.fetchone()[0]
#       if (dat_add > 0):
#               cur.execute("select file,tmsi from keys where key like '%s' and status=0 order by timestamp desc limit %i"%(emptykey,dat_add))
#               result=cur.fetchall()
#               i=0
#               while True:
#                       if i>=len(result):
#                               break
#                       row = result[i]
#                       add_datfile(row[0],row[1])
#                       i+=1
	cur.execute("select file,tmsi,mcnc,cellid from keys where key like '%s' and status=0 order by timestamp desc limit %i"%(emptykey, dat_maxadd))
	result=cur.fetchall()
	for i in range(len(result)):
		row = result[i]
		tsql.execute("select count(1) from keystreams");
		bursts_to_add=burst_maxqueue-tsql.fetchone()[0];
		if bursts_to_add > 0:
			add_datfile(row[0],row[1],row[2],row[3])
		else:
			break

	# Part 2: send parsed bursts for cracking
	tsql.execute("select count(1) from keystreams where submitted != -1")
	to_crack=kraken_burstmaxcrack-tsql.fetchone()[0]
	tsql.execute("select filename,keystream,MIN(serial) from keystreams k where submitted = -1 group by filename order by priority desc, (select count(1) from keystreams where k.filename = filename and submitted <> -1), serial desc limit %i"%to_crack)
#	tsql.execute("select * from keystreams where submitted = -1 group by filename order by priority desc, serial desc")
	result=tsql.fetchall()
	for i in range(to_crack):
		if i>=len(result):
			break
		row = result[i]
		submit_burst(row)
	
	sqlko_internal.commit()

	# Part 3: remove stuck bursts
#	tstamp=int(time.time()-kraken_job_timeout)
#	tsql.execute("select count(*) from keystreams where submitted < %s and submitted > 0"%tstamp)
#	row = tsql.fetchone()
#	if row[0] > 0:
#		orphans+=int(row[0])
#		print("Removing %s orphans"%row[0])
#		tsql.execute("delete from keystreams where submitted < %s and submitted > 0"%tstamp)

	# Part 4: read returned data from cracker & process them
	while True:
		try:
			line=sfile.readline().strip()
		except:
			line=""
#			time.sleep(napalmexdelay)
			break
		m=re.search('crack #(.*) took',line)
		if(m):
			job_finished(m)
			bursts_recv+=1
		m=re.search('Found (.*) @ (.*) #(.*)  ',line)
		if(m):
			state_found(m)
		m=re.search('Cracking #(.*) (.*)',line)
		if(m):
			tsql.execute("update keystreams set jobnum=%d where keystream like '%s';"%(int(m.group(1)),m.group(2)))
			sqlko_internal.commit()

	if laststat+napalmexdelay<time.time():
		stat()
		laststat=time.time()
	else: # don't do cycles more often than delay
		time.sleep(laststat+napalmexdelay-time.time())

Example 18

Project: ictf-framework
Source File: exploit.py
View license
    def execute(self, ip, port, flag_id):
        from multiprocessing import Process, Queue
        from collections import defaultdict
        from twisted.internet.protocol import Protocol, ClientFactory
        from twisted.internet import reactor
        from twisted.protocols.basic import LineReceiver
        from sys import stdout
        import sys
        import re
        import logging

        NUM_FILES_PER_AGENT = 15

        class File(object):
            def __init__(self, name, size, value):
                self.name = name
                self.size = size
                self.value = value
            def __repr__(self):
                return "Name:{0}, Size:{1}, Value:{2}".format(\
                        self.name, self.size, self.value)

        class Solver(object):
            def __init__(self, host, port, room_id):
                self.agents = []
                self.host = host
                self.port = port
                self.room_id = room_id
                self.flag = None
                reactor.connectTCP(self.host, port, AgentFactory(self))
                reactor.connectTCP(self.host, port, AgentFactory(self))
                reactor.connectTCP(self.host, port, AgentFactory(self))

            def add_agent(self, agent):
                self.agents.append(agent)

            def solve_individual(self):
                # Begin communication
                self.agents[0].msg_snowden("hi")
                reactor.callLater(.1, self.tell_agents_to_send)

            def tell_agents_to_send(self):
                for agent in self.agents:
                    agent.send_files()

            def check_if_ready(self):
                received_files = map(lambda agent: agent.received_all_files(), self.agents)
                if all(received_files):
                    self.solve_individual()


        def memoize(f):
            """ Memoization decorator for functions taking one or more arguments. """
            class memodict(dict):
                def __init__(self, f):
                    self.f = f
                def __call__(self, *args):
                    return self[args]
                def __missing__(self, key):
                    ret = self[key] = self.f(*key)
                    return ret
            return memodict(f)

        class Agent(LineReceiver):
            delimiter = "\n"

            def __init__(self, solver):
                self.solver = solver
                self.solver.add_agent(self)
                self.identity = None
                self.files = []
                self.bandwidth = 0
                self.committed = False

            def received_all_files(self):
                return len(self.files) == NUM_FILES_PER_AGENT

            def determine_identity(self, data):
                find_self = re.search('(\w+) has joined', data)
                if find_self:
                    self.identity = find_self.group(1)

            # list -- | Remaining Bandwidth: 16079 KB
            def determine_bandwidth(self, data):
                find_bandwidth = re.search('(\d+)', data)
                if find_bandwidth:
                    self.bandwidth = int(find_bandwidth.group(1))
                    logging.debug("{0} has bandwidth {1}".format(self.identity,
                            self.bandwidth))

            def msg_snowden(self,msg):
                cmd = "/msg E.Snowden {0}".format(msg)
                if msg == "DONE" and not self.committed:
                    logging.debug("{0} types {1}".format(self.identity, cmd))
                    self.sendLine(cmd)
                    self.committed = True
                elif msg != "DONE":
                    self.sendLine("/msg E.Snowden {0}".format(msg))

            def send_files(self):
                value, files_to_send = self.knapsack(self.files, self.bandwidth)
                for myfile in files_to_send:
                    self.send_file(myfile.name, "E.Snowden")
                    self.files.remove(myfile)
                self.bandwidth -= sum(map(lambda sent_file: sent_file.size,
                                    files_to_send))
                logging.debug("Bandwidth for {0} is now {1}".format(self.identity,
                        self.bandwidth))
                logging.debug("Remaining Files for {0} is {1}".format(self.identity,
                        len(self.files)))

                if len(self.files) >= 3:
                    for agent, my_file in zip(self.solver.agents, self.files[:3]):
                        self.send_file(my_file.name, agent.identity)
                    self.msg_snowden("DONE")

            def send_file (self, file_name, agent):
                cmd = "/send {0} {1}".format(agent, file_name)
                logging.debug("{0} types {1}".format(agent, cmd))
                self.sendLine(cmd)

            def show_up_for_work(self):
                self.sendLine("1")

            def enter_room(self):
                self.sendLine(self.solver.room_id)

            def begin_mission(self):
                self.sendLine("/list")

            def received_file(self, line):
                valid_file = re.search('(\S+)\s+(\d+)KB\s+(\d*)', line)
                if valid_file:
                    new_file = File(valid_file.group(1), int(valid_file.group(2)),
                                    int(valid_file.group(3)))
                    self.files.append(new_file)
                    self.solver.check_if_ready()

            # send -- | *Received File: MasteringTheInternet.ppt from Agent2 *
            def receive_file_from_friend(self, line):
                valid_file = re.search('Received File:\s+(\S+)\((\d+)\)', line)
                if valid_file:
                    new_file_name = valid_file.group(1)
                    file_size = int(valid_file.group(2))
                    if file_size < self.bandwidth:
                        self.send_file(new_file_name, "E.Snowden")
                        self.msg_snowden("DONE")

            def lineReceived(self, data):
                if "Enter the number" in data:
                    self.show_up_for_work()
                elif "Enter your room id" in data:
                    self.enter_room()
                elif self.identity is None and "has joined" in data:
                    self.determine_identity(data)
                elif "Everyone has arrived" in data:
                    self.begin_mission()
                elif "Remaining Bandwidth" not in data and "list -- |" in data:
                    self.received_file(data[12:])
                elif "Remaining Bandwidth" in data:
                    self.determine_bandwidth(data)
                elif "Received File" in data:
                    self.receive_file_from_friend(data)
                # think your boss will want this: test
                elif "boss will want" in data:
                    logging.debug(data)
                    flag = re.search('(\S+)$', data)
                    if flag:
                        self.solver.flag = flag.group(1)
                    reactor.stop()

            def knapsack(self, files, maxweight):

                # Return the value of the most valuable subsequence of the first i
                # elements in items whose weights sum to no more than j.
                @memoize
                def bestvalue(i, j):
                    if i == 0: return 0
                    value = files[i - 1].value
                    weight = files[i - 1].size
                    if weight > j:
                        return bestvalue(i - 1, j)
                    else:
                        return max(bestvalue(i - 1, j),
                                bestvalue(i - 1, j - weight) + value)

                j = maxweight
                result = []
                for i in xrange(len(files), 0, -1):
                    if bestvalue(i, j) != bestvalue(i - 1, j):
                        result.append(files[i - 1])
                        j -= files[i - 1].size
                result.reverse()
                return bestvalue(len(files), maxweight), result

        class AgentFactory(ClientFactory):
            def __init__(self, solver):
                self.solver = solver

            def buildProtocol(self, addr):
                return Agent(self.solver)

        def run(host, port, room_id, queue):
            logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.CRITICAL)
            # Connect three agents
            solver = Solver(host, port, room_id)
            reactor.run()
            #return solver.flag
            queue.put(solver.flag)

                
        flag = ''
        error = 0
        error_msg = ''

        try:
            queue = Queue()
            p = Process(target=run, args=(ip, port, flag_id, queue))
            p.start()
            p.join()
            flag = queue.get()
        except Exception as e:
            error = -1 #down
            error_msg = str(e)

        self.flag = flag
        self.error = error
        self.error_msg = error_msg

Example 19

Project: bamsurgeon
Source File: addsv.py
View license
def makemut(args, bedline, alignopts):

    if args.seed is not None: random.seed(int(args.seed) + int(bedline.strip().split()[1]))

    mutid = '_'.join(map(str, bedline.strip().split()))
    try:
        bamfile = pysam.Samfile(args.bamFileName, 'rb')
        reffile = pysam.Fastafile(args.refFasta)
        logfn = '_'.join(map(os.path.basename, bedline.strip().split())) + ".log"
        logfile = open('addsv_logs_' + os.path.basename(args.outBamFile) + '/' + os.path.basename(args.outBamFile) + '_' + logfn, 'w')
        exclfile = args.tmpdir + '/' + '.'.join((mutid, 'exclude', str(uuid4()), 'txt'))
        exclude = open(exclfile, 'w')

        # optional CNV file
        cnv = None
        if (args.cnvfile):
            cnv = pysam.Tabixfile(args.cnvfile, 'r')

        # temporary file to hold mutated reads
        outbam_mutsfile = args.tmpdir + '/' + '.'.join((mutid, str(uuid4()), "muts.bam"))

        c = bedline.strip().split()
        chrom  = c[0]
        start  = int(c[1])
        end    = int(c[2])
        araw   = c[3:len(c)] # INV, DEL, INS seqfile.fa TSDlength, DUP
 
        # translocation specific
        trn_chrom = None
        trn_start = None
        trn_end   = None

        is_transloc = c[3] == 'TRN'

        if is_transloc:
            start -= 3000
            end   += 3000
            if start < 0: start = 0

            trn_chrom = c[4]
            trn_start = int(c[5]) - 3000
            trn_end   = int(c[5]) + 3000
            if trn_start < 0: trn_start = 0

        actions = map(lambda x: x.strip(),' '.join(araw).split(','))

        svfrac = float(args.svfrac) # default, can be overridden by cnv file

        if cnv: # CNV file is present
            if chrom in cnv.contigs:
                for cnregion in cnv.fetch(chrom,start,end):
                    cn = float(cnregion.strip().split()[3]) # expect chrom,start,end,CN
                    sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\t" + ' '.join(("copy number in sv region:",chrom,str(start),str(end),"=",str(cn))) + "\n")
                    svfrac = 1.0/float(cn)
                    assert svfrac <= 1.0
                    sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tadjusted MAF: " + str(svfrac) + "\n")

        print "INFO\t" + now() + "\t" + mutid + "\tinterval:", c
        print "INFO\t" + now() + "\t" + mutid + "\tlength:", end-start

       # modify start and end if interval is too short
        minctglen = int(args.minctglen)

        # adjust if minctglen is too short
        if minctglen < 3*int(args.maxlibsize):
            minctglen = 3*int(args.maxlibsize)

        if end-start < minctglen:
            adj   = minctglen - (end-start)
            start = start - adj/2
            end   = end + adj/2

            print "INFO\t" + now() + "\t" + mutid + "\tnote: interval size was too short, adjusted: %s:%d-%d" % (chrom,start,end)

        dfrac = discordant_fraction(args.bamFileName, chrom, start, end)
        print "INFO\t" + now() + "\t" + mutid + "\tdiscordant fraction:", dfrac

        maxdfrac = 0.1 # FIXME make a parameter
        if dfrac > .1: 
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tdiscordant fraction > " + str(maxdfrac) + " aborting mutation!\n")
            return None, None

        contigs = ar.asm(chrom, start, end, args.bamFileName, reffile, int(args.kmersize), args.tmpdir, mutid=mutid, debug=args.debug)

        trn_contigs = None
        if is_transloc:
            trn_contigs = ar.asm(trn_chrom, trn_start, trn_end, args.bamFileName, reffile, int(args.kmersize), args.tmpdir, mutid=mutid, debug=args.debug)

        maxcontig = sorted(contigs)[-1]

        trn_maxcontig = None
        if is_transloc: trn_maxcontig = sorted(trn_contigs)[-1]

        # be strict about contig quality
        if re.search('N', maxcontig.seq):
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tcontig dropped due to ambiguous base (N), aborting mutation.\n")
            return None, None

        if is_transloc and re.search('N', trn_maxcontig.seq):
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tcontig dropped due to ambiguous base (N), aborting mutation.\n")
            return None, None

        if maxcontig is None:
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tmaxcontig has length 0, aborting mutation!\n")
            return None, None

        if is_transloc and trn_maxcontig is None:
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\ttransloc maxcontig has length 0, aborting mutation!\n")
            return None, None

        print "INFO\t" + now() + "\t" + mutid + "\tbest contig length:", sorted(contigs)[-1].len

        if is_transloc:
            print "INFO\t" + now() + "\t" + mutid + "\tbest transloc contig length:", sorted(trn_contigs)[-1].len

        # trim contig to get best ungapped aligned region to ref.
        maxcontig, refseq, alignstats, refstart, refend, qrystart, qryend, tgtstart, tgtend = trim_contig(mutid, chrom, start, end, maxcontig, reffile)

        if maxcontig is None:
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tbest contig did not have sufficent match to reference, aborting mutation.\n")
            return None, None
    
        print "INFO\t" + now() + "\t" + mutid + "\tstart, end, tgtstart, tgtend, refstart, refend:", start, end, tgtstart, tgtend, refstart, refend

        if is_transloc:
            trn_maxcontig, trn_refseq, trn_alignstats, trn_refstart, trn_refend, trn_qrystart, trn_qryend, trn_tgtstart, trn_tgtend = trim_contig(mutid, trn_chrom, trn_start, trn_end, trn_maxcontig, reffile)
            print "INFO\t" + now() + "\t" + mutid + "\ttrn_start, trn_end, trn_tgtstart, trn_tgtend, trn_refstart, trn_refend:", trn_start, trn_end, trn_tgtstart, trn_tgtend, trn_refstart, trn_refend

        # is there anough room to make mutations?
        if maxcontig.len < 3*int(args.maxlibsize):
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tbest contig too short to make mutation!\n")
            return None, None

        if is_transloc and trn_maxcontig.len < 3*int(args.maxlibsize):
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\tbest transloc contig too short to make mutation!\n")
            return None, None

        # make mutation in the largest contig
        mutseq = ms.MutableSeq(maxcontig.seq)

        if is_transloc: trn_mutseq = ms.MutableSeq(trn_maxcontig.seq)

        # support for multiple mutations
        for actionstr in actions:
            a = actionstr.split()
            action = a[0]

            print "INFO\t" + now() + "\t" + mutid + "\taction: ", actionstr, action

            insseqfile = None
            insseq = ''
            tsdlen = 0  # target site duplication length
            ndups = 0   # number of tandem dups
            dsize = 0.0 # deletion size fraction
            dlen = 0
            ins_motif = None

            if action == 'INS':
                assert len(a) > 1 # insertion syntax: INS <file.fa> [optional TSDlen]
                insseqfile = a[1]
                if not (os.path.exists(insseqfile) or insseqfile == 'RND'): # not a file... is it a sequence? (support indel ins.)
                    assert re.search('^[ATGCatgc]*$',insseqfile) # make sure it's a sequence
                    insseq = insseqfile.upper()
                    insseqfile = None
                if len(a) > 2: # field 5 for insertion is TSD Length
                    tsdlen = int(a[2])

                if len(a) > 3: # field 5 for insertion is motif, format = 'NNNN/NNNN where / is cut site
                    ins_motif = a[3]
                    assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^'

            if action == 'DUP':
                if len(a) > 1:
                    ndups = int(a[1])
                else:
                    ndups = 1

            if action == 'DEL':
                if len(a) > 1:
                    dsize = float(a[1])
                    if dsize > 1.0: # if DEL size is not a fraction, interpret as bp
                        # since DEL 1 is default, if DEL 1 is specified, interpret as 1 bp deletion
                        dlen = int(dsize)
                        dsize = 1.0
                else:
                    dsize = 1.0

            if action == 'TRN':
                pass


            logfile.write(">" + chrom + ":" + str(refstart) + "-" + str(refend) + " BEFORE\n" + str(mutseq) + "\n")

            if action == 'INS':
                inspoint = mutseq.length()/2
                if ins_motif is not None:
                    inspoint = mutseq.find_site(ins_motif, left_trim=int(args.maxlibsize), right_trim=int(args.maxlibsize))

                if insseqfile: # seq in file
                    if insseqfile == 'RND':
                        assert args.inslib is not None # insertion library needs to exist
                        insseqfile = random.choice(args.inslib.keys())
                        print "INFO\t" + now() + "\t" + mutid + "\tchose sequence from insertion library: " + insseqfile
                        mutseq.insertion(inspoint, args.inslib[insseqfile], tsdlen)

                    else:
                        mutseq.insertion(inspoint, singleseqfa(insseqfile, mutid=mutid), tsdlen)

                else: # seq is input
                    mutseq.insertion(inspoint, insseq, tsdlen)

                logfile.write("\t".join(('ins',chrom,str(refstart),str(refend),action,str(mutseq.length()),str(inspoint),str(insseqfile),str(tsdlen),str(svfrac))) + "\n")

            elif action == 'INV':
                invstart = int(args.maxlibsize)
                invend = mutseq.length() - invstart
                mutseq.inversion(invstart,invend)
                logfile.write("\t".join(('inv',chrom,str(refstart),str(refend),action,str(mutseq.length()),str(invstart),str(invend),str(svfrac))) + "\n")

            elif action == 'DEL':
                delstart = int(args.maxlibsize)
                delend = mutseq.length() - delstart
                if dlen == 0: # bp size not specified, delete fraction of contig
                    dlen = int((float(delend-delstart) * dsize)+0.5) 

                dadj = delend-delstart-dlen
                if dadj < 0:
                    dadj = 0
                    sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\twarning: deletion of length 0\n")

                delstart += dadj/2
                delend   -= dadj/2

                mutseq.deletion(delstart,delend)
                logfile.write("\t".join(('del',chrom,str(refstart),str(refend),action,str(mutseq.length()),str(delstart),str(delend),str(dlen),str(svfrac))) + "\n")

            elif action == 'DUP':
                dupstart = int(args.maxlibsize)
                dupend = mutseq.length() - dupstart
                mutseq.duplication(dupstart,dupend,ndups)
                logfile.write("\t".join(('dup',chrom,str(refstart),str(refend),action,str(mutseq.length()),str(dupstart),str(dupend),str(ndups),str(svfrac))) + "\n")

            elif action == 'TRN':
                mutseq.fusion(mutseq.length()/2, trn_mutseq, trn_mutseq.length()/2)
                logfile.write("\t".join(('trn',chrom,str(refstart),str(refend),action,str(mutseq.length()),trn_chrom,str(trn_refstart),str(trn_refend),str(trn_mutseq.length()),str(svfrac))) + "\n")

            else:
                raise ValueError("ERROR\t" + now() + "\t" + mutid + "\t: mutation not one of: INS,INV,DEL,DUP,TRN\n")

            logfile.write(">" + chrom + ":" + str(refstart) + "-" + str(refend) +" AFTER\n" + str(mutseq) + "\n")

        pemean, pesd = float(args.ismean), float(args.issd) 
        print "INFO\t" + now() + "\t" + mutid + "\tset paired end mean distance: " + str(args.ismean)
        print "INFO\t" + now() + "\t" + mutid + "\tset paired end distance stddev: " + str(args.issd)

        # simulate reads
        (fq1, fq2) = runwgsim(maxcontig, mutseq.seq, svfrac, actions, exclude, pemean, pesd, args.tmpdir, mutid=mutid, seed=args.seed, trn_contig=trn_maxcontig)

        outreads = aligners.remap_fastq(args.aligner, fq1, fq2, args.refFasta, outbam_mutsfile, alignopts, mutid=mutid, threads=1)

        if outreads == 0:
            sys.stderr.write("WARN\t" + now() + "\t" + mutid + "\toutbam " + outbam_mutsfile + " has no mapped reads!\n")
            return None, None

        print "INFO\t" + now() + "\t" + mutid + "\ttemporary bam: " + outbam_mutsfile

        exclude.close()
        bamfile.close()

        return outbam_mutsfile, exclfile

    except Exception, e:
        sys.stderr.write("*"*60 + "\nencountered error in mutation spikein: " + bedline + "\n")
        traceback.print_exc(file=sys.stderr)
        sys.stderr.write("*"*60 + "\n")
        return None, None

Example 20

Project: cgat
Source File: timeseries2diffgenes.py
View license
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--results-directory", dest="res_dir",
                      type="string", help="directory to write results"
                      "tables to")

    parser.add_option("--alpha", dest="alpha", type="string",
                      help="statistical significance p-value threshold")

    parser.add_option("--method", dest="method", type="string",
                      help="analysis design. "
                      "either timepoint or condition")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    try:
        infile = argv[-1]
        IOTools.openFile(infile, "r")
        # check for compression state
        if infile.split(".")[-1] == "gz":
            comp = "gzip"
        else:
            comp = None

    except IOError:
        infile = options.stdin
        # check for compression state
        if infile.name.split(".")[-1] == "gz":
            comp = "gzip"
        else:
            comp = None

    alpha = float(options.alpha)
    res_dir = options.res_dir

    count_table = pd.read_table(infile,
                                sep="\t",
                                index_col=0,
                                header=0,
                                compression=comp)
    columns = count_table.columns
    conditions = set([x.split(".")[0] for x in columns])
    times = set([x.split(".")[1] for x in columns])

    data_dict = {}
    cond_times = [x for x in itertools.product(conditions, times)]
    base_col = {}
    time_dict = {}

    if options.method == "timepoint":

        # assumes all column names are in the form
        # `condition`:`time`:`replicate`
        # use `condition`.`time` as dictionary keys

        for x in cond_times:
            c_t = "%s.%s" % (x[0], x[1])
            cols = [k for k in count_table.columns if re.search(c_t, k)]
            if x[1] == '000':
                base_col[c_t] = count_table[cols]
            else:
                time_dict[c_t] = count_table[cols]

        for bt in itertools.product(list(base_col.keys()),
                                    list(time_dict.keys())):
            df = pd.merge(left=base_col[bt[0]],
                          right=time_dict[bt[1]],
                          how='outer',
                          left_index=True,
                          right_index=True)
            time = int(bt[1].split(".")[1])
            data_dict["%s_0_%i" % (bt[0].split(".")[0],
                                   time)] = df

        for each in list(data_dict.keys()):
            df_ = data_dict[each]
            outfile = "%s/%s-time.tsv" % (res_dir,
                                          each)
            res_frame = TS.timepointDESeq2(df_,
                                           each,
                                           alpha,
                                           res_dir)
            res_frame.to_csv(outfile,
                             sep="\t",
                             index_label="gene_id")

    elif options.method == "condition":

        # assumes all column names are in the form
        # `condition`:`time`:`replicate`
        # use `condition`.`time` as dictionary keys

        for x in cond_times:
            c_t = "%s.%s" % (x[0], x[1])
            cols = [k for k in count_table.columns if re.search(c_t, k)]
            if int(x[1]) == 0:
                base_col[c_t] = count_table[cols]
            else:
                time_dict[c_t] = count_table[cols]

        # make a dataframe for each 0:time point combination
        # for all conditions, index on `condition:0_time`

        base_keys = list(base_col.keys())
        time_keys = list(time_dict.keys())
        for k in conditions:
            for x in itertools.product(base_keys, time_keys):
                if re.search(k, x[0]) and re.search(k, x[1]):
                    df = pd.merge(left=base_col[x[0]],
                                  right=time_dict[x[1]],
                                  how='outer',
                                  left_index=True,
                                  right_index=True)
                    time = int(x[1].split(".")[1])
                    data_dict["%s.0_%i" % (x[0].split(".")[0],
                                           time)] = df
                else:
                    pass

        time_span = set([x.split(".")[1] for x in list(data_dict.keys())])

        all_dict = {}
        for cond in itertools.combinations(conditions, 2):
            c1 = cond[0]
            c2 = cond[1]
            for x in time_span:
                key1 = "%s.%s" % (c1, x)
                key2 = "%s.%s" % (c2, x)
                df = pd.merge(left=data_dict[key1],
                              right=data_dict[key2],
                              how='outer',
                              left_index=True,
                              right_index=True)
                all_dict["%s_%s.%s-diff" % (c1, c2, x)] = df

        for each in list(all_dict.keys()):

            df = all_dict[each]
            outfile = "%s/%s-cond.tsv" % (res_dir,
                                          each)
            res_frame = TS.conditionDESeq2(df,
                                           each,
                                           alpha,
                                           res_dir)
            res_frame.to_csv(outfile, sep="\t", index_label="gene_id")

    # write footer and output benchmark information.
    E.Stop()

Example 21

Project: bcbio-nextgen
Source File: collectl.py
View license
def _parse_raw(fp, start_tstamp, end_tstamp):
    import progressbar
    widgets = [
        os.path.basename(fp.name), ': ',
        progressbar.Bar(marker='-', left='[', right=']'), ' ',
        progressbar.Percentage(), ' ', progressbar.ETA(),
    ]
    # We don't know what the file's uncompressed size will wind up being,
    # so take an educated guess and ignore the AssertionError later on
    # if it winds up being bigger than we guess.
    bar = progressbar.ProgressBar(
        widgets=widgets, maxval=os.path.getsize(fp.name) * 15)
    bar.start()
    bar.update(0)

    tstamp = 0
    hardware = {}
    data = {}
    for line in fp:
        matches = re.search(r'^>>> (\d+).\d+ <<<', line)
        if matches:
            try:
                bar.update(fp.tell())
            except AssertionError:
                pass

            tstamp = int(matches.group(1))
            if (tstamp >= start_tstamp) or (tstamp <= end_tstamp):
                data[tstamp] = {
                    'disk': {},
                    'mem': {},
                    'net': {},
                    'proc': {},
                }
            continue

        if line.startswith('# SubSys: '):
            matches = re.search(r'\sNumCPUs: (\d+)\s+', line)
            if matches:
                hardware['num_cpus'] = int(matches.group(1))
            continue
        if line.startswith('# Kernel: '):
            matches = re.search(r'\sMemory: (\d+)\s+kB', line)
            if matches:
                hardware['memory'] = int(math.ceil(float(matches.group(1)) / math.pow(1024.0, 2.0)))
            continue

        if (tstamp < start_tstamp) or (tstamp > end_tstamp):
            continue

        if line.startswith('cpu '):
            # Don't know what the last two fields are, but they
            # always seem to be 0, and collectl doesn't parse them
            # in formatit::dataAnalyze().
            (title, user, nice, sys, idle, wait, irq,
             soft, steal) = line.split()[:9]
            data[tstamp]['cpu'] = {
                 'user': user,
                 'nice': nice,
                 'sys': sys,
                 'idle': idle,
                 'wait': wait,
                 'irq': irq,
                 'soft': soft,
                 'steal': steal,
            }
        elif line.startswith('disk '):
            (title, major, minor, node,
             num_reads, reads_merged, sectors_read, msec_spent_reading,
             num_writes, writes_merged, sectors_written, msec_spent_writing,
             iops_in_progress, msec_spent_on_iops,
             weighted_msec_spent_on_iops) = line.split()
            data[tstamp]['disk'][node] = {
                'num_reads': num_reads,
                'reads_merged': reads_merged,
                'sectors_read': sectors_read,
                'msec_spent_reading': msec_spent_reading,
                'num_writes': num_writes,
                'writes_merged': writes_merged,
                'sectors_written': sectors_written,
                'msec_spent_writing': msec_spent_writing,
                'iops_in_progress': iops_in_progress,
                'msec_spent_on_iops': msec_spent_on_iops,
                'weighted_msec_spent_on_iops': weighted_msec_spent_on_iops,
            }
        elif line.startswith('Net '):
            # Older kernel versions don't have whitespace after
            # the interface colon:
            #
            #   Net   eth0:70627391
            #
            # unlike newer kernels:
            #
            #   Net   eth0: 415699541
            line = re.sub(r'^(Net\s+[^:]+):', r'\1: ', line)

            (title, iface,
             rbyte, rpkt, rerr, rdrop, rfifo,
             rframe, rcomp, rmulti,
             tbyte, tpkt, terr, tdrop, tfifo,
             tcoll, tcarrier, tcomp) = line.split()
            iface = iface.replace(':', '')
            data[tstamp]['net'][iface] = {
                 'rbyte': rbyte,
                 'rpkt': rpkt,
                 'rerr': rerr,
                 'rdrop': rdrop,
                 'rfifo': rfifo,
                 'rframe': rframe,
                 'rcomp': rcomp,
                 'rmulti': rmulti,
                 'tbyte': tbyte,
                 'tpkt': tpkt,
                 'terr': terr,
                 'tdrop': tdrop,
                 'tfifo': tfifo,
                 'tcoll': tcoll,
                 'tcarrier': tcarrier,
                 'tcomp': tcomp,
            }
        elif line.startswith('MemTotal:'):
            title, amount, unit = line.split()
            data[tstamp]['mem']['total'] = amount
        elif line.startswith('MemFree:'):
            title, amount, unit = line.split()
            data[tstamp]['mem']['free'] = amount
        elif line.startswith('Buffers:'):
            title, amount, unit = line.split()
            data[tstamp]['mem']['buffers'] = amount
        elif line.startswith('Cached:'):
            title, amount, unit = line.split()
            data[tstamp]['mem']['cached'] = amount
        # We don't currently do anything with process data,
        # so don't bother parsing it.
        elif False and line.startswith('proc:'):
            title_pid, rest = line.split(None, 1)
            title, pid = title_pid.split(':')

            if pid not in data[tstamp]['proc']:
                data[tstamp]['proc'][pid] = {}

            if rest.startswith('cmd '):
                title, cmd = rest.split(None, 1)
                data[tstamp]['proc'][pid]['cmd'] = cmd
            elif rest.startswith('io read_bytes: '):
                value = rest.split(':')[1].strip()
                data[tstamp]['proc'][pid]['read_bytes'] = value
            elif rest.startswith('io write_bytes: '):
                value = rest.split(':')[1].strip()
                data[tstamp]['proc'][pid]['write_bytes'] = value

    bar.finish()
    return hardware, data

Example 22

Project: ScraXBRL
Source File: XMLExtract.py
View license
	def make_calc_tree(self, calc_arcs, calc_locs, role_name, title):
		"""Generate a calculation tree for a specific role 
		   and create the ordered priority and weight."""
		
		root = []
		to_list = []
		from_list = []
		from_to_pair = [] #(parent, child)
		locs_pairs = []
		for cl in calc_locs:
			tmp_cl_raw = cl.get('xlink:href')
			tmp_cl = os.path.splitext(tmp_cl_raw)[1]
			if '#' in tmp_cl:
				tmp_cl_idx = tmp_cl.index('#')
				tmp_cl = tmp_cl[tmp_cl_idx+1:]
				pfx_loc = self.get_pfx_gen(tmp_cl, 'ins_t')
				name_loc = self.get_name_gen(tmp_cl, 'ins_t')
				locs_pairs.append((pfx_loc, name_loc))
		for i in calc_arcs:
			#Start to list
			xlink_to = i.get('xlink:to')
			pfx_to = self.get_pfx_gen(xlink_to, 'ins_t')
			name_to = self.get_name_gen(xlink_to, 'ins_t')
			if not pfx_to and not name_to:
				name_to = xlink_to
				for lp in locs_pairs:
					if name_to == lp[1]:
						pfx_to = lp[0]
						break
			if not pfx_to:
				continue
			if pfx_to and not name_to:
				tmp_to_store = xlink_to
				tmp_to = xlink_to.lower()
				idx_name_s = tmp_to.index(pfx_to)
				xlink_to = xlink_to[idx_name_s:]
				try:
					reg_xt = '(?<={0})[^\W_]*'.format(pfx_to)
					reg_xt_ex = re.search(reg_xt, xlink_to)
					name_to = reg_xt_ex.group(0)
				except AttributeError:
					try:
						reg_xt = '(?<={0})[^\W_]*'.format(pfx_to.upper())
						reg_xt_ex = re.search(reg_xt, xlink_to)
						name_to = reg_xt_ex.group(0)
					except AttributeError:
						pass
			if not name_to:
				tmp_pfx = self.get_name_gen(xlink_to, 'ins')
				if tmp_pfx:
					tmp_nt = xlink_to.lower()
					end_pfx = tmp_nt.index(tmp_pfx) + len(tmp_pfx)
					name_to = xlink_to[end_pfx:]
					if '_' in name_to:
						xt_idx = name_to.index('_')
						name_to = name_to[:xt_idx]
				if not name_to:
					if '_' not in xlink_to and ':' not in xlink_to and '-' not in xlink_to:
						if isinstance(xlink_to, str):
							if tmp_pfx == None:
								name_to = xlink_to
					if not name_to:
						if pfx_to:
							if '_' not in xlink_to and ':' not in xlink_to and '-' not in xlink_to:
								if isinstance(xlink_to, str):
									name_to = xlink_to
						if not name_to:
							continue
			#Get order and weight
			order = i.get('order')
			order = float(order)
			weight = i.get('weight')
			weight = float(weight)
			if name_to not in to_list:
				to_list.append((pfx_to, name_to, order, weight))
			#Start From List
			xlink_from = i.get('xlink:from')
			pfx_from = self.get_pfx_gen(xlink_from, 'ins_t')
			name_from = self.get_name_gen(xlink_from, 'ins_t')
			if not pfx_from and not name_from:
				name_from = xlink_from
				for lp in locs_pairs:
					if name_from == lp[1]:
						pfx_from = lp[0]
						break
			if not pfx_from:
				continue
			if pfx_from and not name_from:
				tmp_from_store = xlink_from
				pfx_from = self.get_pfx_gen(xlink_from, 'ins_t')
				tmp_from = xlink_from.lower()
				idx_name_s = tmp_from.index(pfx_from)
				xlink_from = xlink_from[idx_name_s:]
				try:
					reg_xt = '(?<={0})[^\W_]*'.format(pfx_from)
					reg_xt_ex = re.search(reg_xt, xlink_from)
					name_from = reg_xt_ex.group(0)
				except:
					try:
						reg_xt = '(?<={0})[^\W_]*'.format(pfx_to.upper())
						reg_xt_ex = re.search(reg_xt, xlink_from)
						name_from = reg_xt_ex.group(0)
					except AttributeError:
						pass
			if not name_from:
				tmp_pfx = self.get_name_gen(xlink_from, 'ins')
				if tmp_pfx:
					tmp_nt = xlink_from.lower()
					end_pfx = tmp_nt.index(tmp_pfx) + len(tmp_pfx)
					name_from = xlink_from[end_pfx:]
					if '_' in name_from:
						xt_idx = name_from.index('_')
						name_from = name_from[:xt_idx]
				if not name_from:
					if '_' not in xlink_from and ':' not in xlink_from and '-' not in xlink_from:
						if isinstance(xlink_from, str):
							if tmp_pfx == None:
								name_from = xlink_from
					if not name_from:
						if pfx_from:
							if '_' not in xlink_from and ':' not in xlink_from and '-' not in xlink_from:
								if isinstance(xlink_from, str):
									name_from = xlink_from
						if not name_from:
							continue
			if name_from not in from_list:
				from_list.append((pfx_from, name_from, order, weight))
			from_to_pair.append((name_from, name_to, order, weight))
		for i in from_list:
			in_to_list = False
			for x in to_list:
				if i[:2] == x[:2]:
					in_to_list = True
			if not in_to_list:	
				root.append(i)
		root = list(set(root))
		root.sort(key=lambda tup: tup[2])
		to_list.sort(key=lambda tup: tup[2])
		from_list.sort(key=lambda tup: tup[2])
		if len(root) == 0:
			if len(to_list) > 0 or len(from_list) > 0:
				if len(to_list) == 1 and len(from_list) == 0:
					root = to_list
					root = list(set(root))
				elif len(from_list) == 1 and len(to_list) == 0:
					root = from_list
					root = list(set(root))
				elif len(from_list) == 0:
					root = to_list
					root = list(set(root))
				elif len(to_list) == 0:
					root = from_list
					root = list(set(root))
				elif len(from_list) == 1 and len(to_list) > 1:
					root = from_list
					root = list(set(root))
				elif len(from_list) < len(to_list):
					root = from_list
					root = list(set(root))
				elif to_list == from_list:
					root = from_list
					root = list(set(root))
				else:
					pass
		self.data['cal']['roles'][role_name] = OrderedDict()
		self.data['cal']['roles'][role_name]['title_name'] = title
		self.data['cal']['roles'][role_name]['tree'] = OrderedDict()
		self.data['cal']['roles'][role_name]['from_to'] = from_to_pair
		self.data['cal']['roles'][role_name]['root'] = root
		unique_tmp = from_list + to_list
		unique = list(set(unique_tmp))
		self.data['cal']['roles'][role_name]['unique'] = unique
		key_ctx_list = []
		for i in root:
			self.data['cal']['roles'][role_name]['tree'][i[1]] = OrderedDict()
			self.data['cal']['roles'][role_name]['tree'][i[1]]['pfx'] = i[0]
			self.data['cal']['roles'][role_name]['tree'][i[1]]['sub'] = OrderedDict()
			try:
				tmp_root = self.data['ins']['facts'][i[0]][i[1].lower()]['val_by_date']
			except KeyError:
				try:
					tmp_root = self.data['ins']['facts'][self.symbol.lower()][i[1].lower()]['val_by_date']
				except KeyError:
					try:
						tmp_root = self.data['ins']['facts']['us-gaap'][i[1].lower()]['val_by_date']
					except KeyError:
						continue
			root_val = OrderedDict()
			tr_keys = tmp_root.keys()
			tmp_ctx_list = []
			for trk in tr_keys:
				tmps_val = tmp_root[trk]
				root_val[trk] = tmps_val[0][0]
				if (trk, tmps_val[0][1]) not in tmp_ctx_list:
					tmp_ctx_list.append((trk, tmps_val[0][1]))
			if (i[:2], tuple(tmp_ctx_list)) not in key_ctx_list:
				key_ctx_list.append((i[:2], tuple(tmp_ctx_list)))
			self.data['cal']['roles'][role_name]['tree'][i[1]]['val'] = root_val
			label = self.find_label_str((i[0], i[1]))
			self.data['cal']['roles'][role_name]['tree'][i[1]]['label'] = label
		for i in to_list:
			try:
				line = self.get_lineage(root, from_to_pair, i[1])
			except RuntimeError:
				self.data['no_lineage'].append(i)
				return False
			line_root = line[0]
			use_ctx = None
			for kcl in key_ctx_list:
				if kcl[0][1] == line_root:
					use_ctx = kcl[1]
			val = OrderedDict()
			if use_ctx == None:
				val = None
			else:
				for uc in use_ctx:
					try:
						tmp_val = self.data['ins']['facts'][i[0]][i[1].lower()]['val_by_date'][uc[0]]
					except KeyError:
						continue
					vals = None
					for tv in tmp_val:
						if tv[1] == uc[1]:
							vals = (tv[0])
					val[uc[0]] = vals
			self.gen_dict_path('cal', line, role_name, i[0], (i[2], i[3], val))

Example 23

Project: ScraXBRL
Source File: XMLExtract.py
View license
	def make_pre_tree(self, pre_arcs, pre_locs, role_name, title):
		"""Generate a presentation tree for a specific role 
		   and populate it with values."""
		
		root = []
		to_list = []
		from_list = []
		from_to_pair = [] #(parent, child)
		locs_pairs = []
		for pl in pre_locs:
			tmp_pl_raw = pl.get('xlink:href')
			tmp_pl = os.path.splitext(tmp_pl_raw)[1]
			if '#' in tmp_pl:
				tmp_pl_idx = tmp_pl.index('#')
				tmp_pl = tmp_pl[tmp_pl_idx+1:]
				pfx_loc = self.get_pfx_gen(tmp_pl, 'ins_t')
				name_loc = self.get_name_gen(tmp_pl, 'ins_t')
				locs_pairs.append((pfx_loc, name_loc))
		for i in pre_arcs:
			#Get preferred label
			label_str = i.get('preferredLabel')
			if label_str == None:
				label_str = i.get('preferredlabel')
				if label_str == None:
					label_str = i.get('xlink:preferredLabel')
					if label_str == None:
						label_str = i.get('xlink:preferredlabel')
			if label_str == None:
				label_str = 'label'
			else:
				label_str = os.path.split(label_str)[1]
			#Start to list
			xlink_to = i.get('xlink:to')
			pfx_to = self.get_pfx_gen(xlink_to, 'ins_t')
			name_to = self.get_name_gen(xlink_to, 'ins_t')
			if not pfx_to and not name_to:
				name_to = xlink_to
				for lp in locs_pairs:
					if name_to == lp[1]:
						pfx_to = lp[0]
						break
			if not pfx_to:
				continue
			if pfx_to and not name_to:
				tmp_to_store = xlink_to
				tmp_to = xlink_to.lower()
				idx_name_s = tmp_to.index(pfx_to)
				xlink_to = xlink_to[idx_name_s:]
				try:
					reg_xt = '(?<={0})[^\W_]*'.format(pfx_to)
					reg_xt_ex = re.search(reg_xt, xlink_to)
					name_to = reg_xt_ex.group(0)
				except AttributeError:
					try:
						reg_xt = '(?<={0})[^\W_]*'.format(pfx_to.upper())
						reg_xt_ex = re.search(reg_xt, xlink_to)
						name_to = reg_xt_ex.group(0)
					except AttributeError:
						pass
			if not name_to:
				tmp_pfx = self.get_pfx_gen(xlink_to, 'ins')
				if tmp_pfx:
					tmp_nt = xlink_to.lower()
					end_pfx = tmp_nt.index(tmp_pfx) + len(tmp_pfx)
					name_to = xlink_to[end_pfx:]
					if '_' in name_to:
						xt_idx = name_to.index('_')
						name_to = name_to[:xt_idx]
				if not name_to:
					if '_' not in xlink_to and ':' not in xlink_to and '-' not in xlink_to:
						if isinstance(xlink_to, str):
							if tmp_pfx == None:
								name_to = xlink_to
					if not name_to:
						if pfx_to:
							if '_' not in xlink_to and ':' not in xlink_to and '-' not in xlink_to:
								if isinstance(xlink_to, str):
									name_to = xlink_to
						if not name_to:
							continue
			#Get order
			order = i.get('order')
			if name_to not in to_list:
				to_list.append((pfx_to, name_to, order, label_str))
			#Start From List
			xlink_from = i.get('xlink:from')
			pfx_from = self.get_pfx_gen(xlink_from, 'ins_t')
			name_from = self.get_name_gen(xlink_from, 'ins_t')
			if not pfx_from and not name_from:
				name_from = xlink_from
				for lp in locs_pairs:
					if name_from == lp[1]:
						pfx_from = lp[0]
						break
			if not pfx_from:
				continue
			if pfx_from and not name_from:
				tmp_from_store = xlink_from
				pfx_from = self.get_pfx_gen(xlink_from, 'ins_t')
				tmp_from = xlink_from.lower()
				idx_name_s = tmp_from.index(pfx_from)
				xlink_from = xlink_from[idx_name_s:]
				try:
					reg_xt = '(?<={0})[^\W_]*'.format(pfx_from)
					reg_xt_ex = re.search(reg_xt, xlink_from)
					name_from = reg_xt_ex.group(0)
				except AttributeError:
					try:
						reg_xt = '(?<={0})[^\W_]*'.format(pfx_to.upper())
						reg_xt_ex = re.search(reg_xt, xlink_from)
						name_from = reg_xt_ex.group(0)
					except AttributeError:
						pass
			if not name_from:
				tmp_pfx = self.get_pfx_gen(xlink_from, 'ins')
				if tmp_pfx:
					tmp_nt = xlink_from.lower()
					end_pfx = tmp_nt.index(tmp_pfx) + len(tmp_pfx)
					name_from = xlink_from[end_pfx:]
					if '_' in name_from:
						xt_idx = name_from.index('_')
						name_from = name_from[:xt_idx]
				if not name_from:
					if '_' not in xlink_from and ':' not in xlink_from and '-' not in xlink_from:
						if isinstance(xlink_from, str):
							if tmp_pfx == None:
								name_from = xlink_from
					if not name_from:
						if pfx_from:
							if '_' not in xlink_from and ':' not in xlink_from and '-' not in xlink_from:
								if isinstance(xlink_from, str):
									name_from = xlink_from
						if not name_from:
							continue
			if name_from not in from_list:
				from_list.append((pfx_from, name_from, order, label_str))
			from_to_pair.append((name_from, name_to, order, label_str))
		for i in from_list:
			in_to_list = False
			for x in to_list:
				if i[:2] == x[:2]:
					in_to_list = True
			if not in_to_list:	
				root.append(i)
		root = list(set(root))
		root.sort(key=lambda tup: tup[2])
		to_list.sort(key=lambda tup: tup[2])
		from_list.sort(key=lambda tup: tup[2])
		if len(root) == 0:
			if len(to_list) > 0 or len(from_list) > 0:
				if len(to_list) == 1 and len(from_list) == 0:
					root = to_list
					root = list(set(root))
				elif len(from_list) == 1 and len(to_list) == 0:
					root = from_list
					root = list(set(root))
				elif len(from_list) == 0:
					root = to_list
					root = list(set(root))
				elif len(to_list) == 0:
					root = from_list
					root = list(set(root))
				elif len(from_list) == 1 and len(to_list) > 1:
					root = from_list
					root = list(set(root))
				elif len(from_list) < len(to_list):
					root = from_list
					root = list(set(root))
				elif to_list == from_list:
					root = from_list
					root = list(set(root))
				else:
					pass
		self.data['pre']['roles'][role_name] = OrderedDict()
		self.data['pre']['roles'][role_name]['title_name'] = title
		self.data['pre']['roles'][role_name]['tree'] = OrderedDict()
		self.data['pre']['roles'][role_name]['from_to'] = from_to_pair
		self.data['pre']['roles'][role_name]['root'] = root
		unique_tmp = from_list + to_list
		unique = list(set(unique_tmp))
		self.data['pre']['roles'][role_name]['unique'] = unique
		for i in root:
			self.data['pre']['roles'][role_name]['tree'][i[1]] = OrderedDict()
			self.data['pre']['roles'][role_name]['tree'][i[1]]['pfx'] = i[0]
			self.data['pre']['roles'][role_name]['tree'][i[1]]['sub'] = OrderedDict()
			try:
				label = self.find_label_str((i[0], i[1]))
			except KeyError:
				label = OrderedDict()
				label[0] = i[1]
			if len(label) > 1:
				label_list = []
				try:
					label_keys = label.keys()
					for k in label_keys:
						if label[k] not in label_list:
							label_list.append(label[k])
				except AttributeError:
					label_list.append(label)
				if len(label_list) == 1:
					label = label_list[0]
				else:
					try:
						label = label[i[3]]
					except KeyError:
						label = label['label']
			else:
				label_keys = label.keys()
				label = label[label_keys[0]]
			self.data['pre']['roles'][role_name]['tree'][i[1]]['label'] = label
		for i in to_list:
			try:
				line = self.get_lineage(root, from_to_pair, i[1])
			except RuntimeError:
				self.data['no_lineage'].append(i)
				continue
			line_root = None
			ins_keys = self.data['ins']['facts'].keys()
			for ik in ins_keys:
				found_lr = False
				for l in line:
					try:
						self.data['ins']['facts'][ik][l.lower()]
						line_root = (ik, l)
						found_lr = True
						break
					except KeyError:
						continue
				if found_lr:
					break
			key_ctx_list = []
			try:
				tmp_root = self.data['ins']['facts'][line_root[0]][line_root[1].lower()]['val_by_date']
				root_val = OrderedDict()
				tr_keys = tmp_root.keys()
				tmp_ctx_list = []
				for trk in tr_keys:
					tmps_val = tmp_root[trk]
					root_val[trk] = tmps_val[0][0]
					if (trk, tmps_val[0][1]) not in tmp_ctx_list:
						tmp_ctx_list.append((trk, tmps_val[0][1]))
				if (i[:2], tuple(tmp_ctx_list)) not in key_ctx_list:
					key_ctx_list.append((line_root[:2], tuple(tmp_ctx_list)))
			except (KeyError, TypeError):
				pass
			try:
				use_ctx = None
				for kcl in key_ctx_list:
					if kcl[0] == line_root:
						use_ctx = kcl[1]
				val = OrderedDict()
				for uc in use_ctx:
					try:
						tmp_val = self.data['ins']['facts'][i[0]][i[1].lower()]['val_by_date'][uc[0]]
					except KeyError:
						continue
					vals = None
					for tv in tmp_val:
						if tv[1] == uc[1]:
							vals = (tv[0])
					val[uc[0]] = vals
			except TypeError:
				pass
			try:
				label = self.find_label_str((i[0], i[1]))
			except KeyError:
				label = OrderedDict()
				label[0] = i[1]
			if len(label) > 1:
				try:
					label = label[i[3]]
				except KeyError:
					try:
						label = label['label']
					except KeyError:
						label = i[1]
				except TypeError:
					label = label
			else:
				label_keys = label.keys()
				label = label[label_keys[0]]
			self.gen_dict_path('pre', line, role_name, i[0], (i[2], val, label))
			try:
				label = self.find_label_str((i[0], i[1]))
			except KeyError:
				label = OrderedDict()
				label[0] = i[1]
			if len(label) > 1:
				try:
					label = label[i[3]]
				except KeyError:
					try:
						label = label['label']
					except KeyError:
						label = i[1]
				except TypeError:
					label = label
			else:
				label_keys = label.keys()
				label = label[label_keys[0]]
			self.gen_dict_path('pre', line, role_name, i[0], label)

Example 24

Project: CouchPotatoServer
Source File: indexcreator.py
View license
    def parse(self, data, name=None):
        if not name:
            self.name = "_" + uuid.uuid4().hex
        else:
            self.name = name

        self.ind = 0
        self.stage = 0
        self.logic = ['and', 'or', 'in']
        self.logic2 = ['&', '|']
        self.allowed_props = {'TreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format'],
                              'HashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
                              'MultiHashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
                              'MultiTreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format']
                              }
        self.funcs = {'md5': (['md5'], ['.digest()']),
                      'len': (['len'], []),
                      'str': (['str'], []),
                      'fix_r': (['self.fix_r'], []),
                      'prefix': (['self.prefix'], []),
                      'infix': (['self.infix'], []),
                      'suffix': (['self.suffix'], [])
                      }
        self.handle_int_imports = {'infix': "from itertools import izip\n"}

        self.funcs_with_body = {'fix_r':
                                ("""    def fix_r(self,s,l):
        e = len(s)
        if e == l:
            return s
        elif e > l:
            return s[:l]
        else:
            return s.rjust(l,'_')\n""", False),
                                'prefix':
                                ("""    def prefix(self,s,m,l,f):
        t = len(s)
        if m < 1:
            m = 1
        o = set()
        if t > l:
            s = s[:l]
            t = l
        while m <= t:
            o.add(s.rjust(f,'_'))
            s = s[:-1]
            t -= 1
        return o\n""", False),
                                'suffix':
                                ("""    def suffix(self,s,m,l,f):
        t = len(s)
        if m < 1:
            m = 1
        o = set()
        if t > l:
            s = s[t-l:]
            t = len(s)
        while m <= t:
            o.add(s.rjust(f,'_'))
            s = s[1:]
            t -= 1
        return o\n""", False),
                                'infix':
                                ("""    def infix(self,s,m,l,f):
        t = len(s)
        o = set()
        for x in xrange(m - 1, l):
            t = (s, )
            for y in xrange(0, x):
                t += (s[y + 1:],)
            o.update(set(''.join(x).rjust(f, '_').lower() for x in izip(*t)))
        return o\n""", False)}
        self.none = ['None', 'none', 'null']
        self.props_assign = ['=', ':']
        self.all_adj_num_comp = {token.NUMBER: (
            token.NUMBER, token.NAME, '-', '('),
            token.NAME: (token.NUMBER, token.NAME, '-', '('),
            ')': (token.NUMBER, token.NAME, '-', '(')
        }

        self.all_adj_num_op = {token.NUMBER: (token.NUMBER, token.NAME, '('),
                               token.NAME: (token.NUMBER, token.NAME, '('),
                               ')': (token.NUMBER, token.NAME, '(')
                               }
        self.allowed_adjacent = {
            "<=": self.all_adj_num_comp,
            ">=": self.all_adj_num_comp,
            ">": self.all_adj_num_comp,
            "<": self.all_adj_num_comp,

            "==": {token.NUMBER: (token.NUMBER, token.NAME, '('),
                   token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
                   token.STRING: (token.NAME, token.STRING, '('),
                   ')': (token.NUMBER, token.NAME, token.STRING, '('),
                   ']': (token.NUMBER, token.NAME, token.STRING, '(')
                   },

            "+": {token.NUMBER: (token.NUMBER, token.NAME, '('),
                  token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
                  token.STRING: (token.NAME, token.STRING, '('),
                  ')': (token.NUMBER, token.NAME, token.STRING, '('),
                  ']': (token.NUMBER, token.NAME, token.STRING, '(')
                  },

            "-": {token.NUMBER: (token.NUMBER, token.NAME, '('),
                  token.NAME: (token.NUMBER, token.NAME, '('),
                  ')': (token.NUMBER, token.NAME, '('),
                  '<': (token.NUMBER, token.NAME, '('),
                  '>': (token.NUMBER, token.NAME, '('),
                  '<=': (token.NUMBER, token.NAME, '('),
                  '>=': (token.NUMBER, token.NAME, '('),
                  '==': (token.NUMBER, token.NAME, '('),
                  ']': (token.NUMBER, token.NAME, '(')
                  },
            "*": self.all_adj_num_op,
            "/": self.all_adj_num_op,
            "%": self.all_adj_num_op,
            ",": {token.NUMBER: (token.NUMBER, token.NAME, token.STRING, '{', '[', '('),
                  token.NAME: (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
                  token.STRING: (token.NAME, token.STRING, token.NUMBER, '(', '{', '['),
                  ')': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
                  ']': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
                  '}': (token.NUMBER, token.NAME, token.STRING, '(', '{', '[')
                  }
        }

        def is_num(s):
            m = re.search('[^0-9*()+\-\s/]+', s)
            return not m

        def is_string(s):
            m = re.search('\s*(?P<a>[\'\"]+).*?(?P=a)\s*', s)
            return m
        data = re.split('make_key_value\:', data)

        if len(data) < 2:
            raise IndexCreatorFunctionException(
                "Couldn't find a definition of make_key_value function!\n")

        spl1 = re.split('make_key\:', data[0])
        spl2 = re.split('make_key\:', data[1])

        self.funcs_rev = False

        if len(spl1) > 1:
            data = [spl1[0]] + [data[1]] + [spl1[1]]
            self.funcs_rev = True
        elif len(spl2) > 1:
            data = [data[0]] + spl2
        else:
            data.append("key")

        if data[1] == re.search('\s*', data[1], re.S | re.M).group(0):
            raise IndexCreatorFunctionException("Empty function body ",
                                                len(re.split('\n', data[0])) + (len(re.split('\n', data[2])) if self.funcs_rev else 1) - 1)
        if data[2] == re.search('\s*', data[2], re.S | re.M).group(0):
            raise IndexCreatorFunctionException("Empty function body ",
                                                len(re.split('\n', data[0])) + (1 if self.funcs_rev else len(re.split('\n', data[1]))) - 1)
        if data[0] == re.search('\s*', data[0], re.S | re.M).group(0):
            raise IndexCreatorValueException("You didn't set any properity or you set them not at the begining of the code\n")

        data = [re.split(
            '\n', data[0]), re.split('\n', data[1]), re.split('\n', data[2])]
        self.cnt_lines = (len(data[0]), len(data[1]), len(data[2]))
        ind = 0
        self.predata = data
        self.data = [[], [], []]
        for i, v in enumerate(self.predata[0]):
            for k, w in enumerate(self.predata[0][i]):
                if self.predata[0][i][k] in self.props_assign:
                    if not is_num(self.predata[0][i][k + 1:]) and self.predata[0][i].strip()[:4] != 'type' and self.predata[0][i].strip()[:4] != 'name':
                        s = self.predata[0][i][k + 1:]
                        self.predata[0][i] = self.predata[0][i][:k + 1]

                        m = re.search('\s+', s.strip())
                        if not is_string(s) and not m:
                            s = "'" + s.strip() + "'"
                        self.predata[0][i] += s
                        break

        for n, i in enumerate(self.predata):
            for k in i:
                k = k.strip()
                if k:
                    self.data[ind].append(k)
                    self.check_enclosures(k, n)
            ind += 1

        return self.parse_ex()

Example 25

Project: stonix
Source File: DisableGUILogon.py
View license
    def fix(self):
        '''
        @author: Eric Ball
        @param self - essential if you override this definition
        @return: bool - True if fix is successful, False if it isn't
        '''
        try:
            if not self.ci1.getcurrvalue() and not self.ci2.getcurrvalue() \
               and not self.ci3.getcurrvalue():
                return
            success = True
            results = ""
            # Delete past state change records from previous fix
            self.iditerator = 0
            eventlist = self.statechglogger.findrulechanges(self.rulenumber)
            for event in eventlist:
                self.statechglogger.deleteentry(event)

            if self.ci1.getcurrvalue() or self.ci3.getcurrvalue():
                if self.initver == "systemd":
                    cmd = ["/bin/systemctl", "set-default",
                           "multi-user.target"]
                    if not self.ch.executeCommand(cmd):
                        success = False
                        results += '"systemctl set-default multi-user.target"' \
                                   + " did not succeed\n"
                    else:
                        self.iditerator += 1
                        myid = iterate(self.iditerator, self.rulenumber)
                        commandstring = "/bin/systemctl set-default " + \
                                        "graphical.target"
                        event = {"eventtype": "commandstring",
                                 "command": commandstring}
                        self.statechglogger.recordchgevent(myid, event)

                elif self.initver == "debian":
                    dmlist = ["gdm", "gdm3", "lightdm", "xdm", "kdm"]
                    for dm in dmlist:
                        cmd = ["update-rc.d", "-f", dm, "disable"]
                        if not self.ch.executeCommand(cmd):
                            results += "Failed to disable desktop " + \
                                       "manager " + dm
                        else:
                            self.iditerator += 1
                            myid = iterate(self.iditerator, self.rulenumber)
                            event = {"eventtype":   "servicehelper",
                                     "servicename": dm,
                                     "startstate":  "enabled",
                                     "endstate":    "disabled"}
                            self.statechglogger.recordchgevent(myid, event)

                elif self.initver == "ubuntu":
                    ldmover = "/etc/init/lightdm.override"
                    tmpfile = ldmover + ".tmp"
                    if not os.path.exists(ldmover):
                        createFile(ldmover, self.logger)
                        self.iditerator += 1
                        myid = iterate(self.iditerator, self.rulenumber)
                        event = {"eventtype": "creation", "filepath": ldmover}
                        self.statechglogger.recordchgevent(myid, event)
                    writeFile(tmpfile, "manual", self.logger)
                    self.iditerator += 1
                    myid = iterate(self.iditerator, self.rulenumber)
                    event = {"eventtype": "conf", "filepath": ldmover}
                    self.statechglogger.recordchgevent(myid, event)
                    self.statechglogger.recordfilechange(ldmover, tmpfile,
                                                         myid)
                    os.rename(tmpfile, ldmover)
                    resetsecon(ldmover)

                    grub = "/etc/default/grub"
                    if not os.path.exists(grub):
                        createFile(grub, self.logger)
                        self.iditerator += 1
                        myid = iterate(self.iditerator, self.rulenumber)
                        event = {"eventtype": "creation", "filepath": grub}
                        self.statechglogger.recordchgevent(myid, event)
                    tmppath = grub + ".tmp"
                    data = {"GRUB_CMDLINE_LINUX_DEFAULT": '"quiet"'}
                    editor = KVEditorStonix(self.statechglogger, self.logger,
                                            "conf", grub, tmppath, data,
                                            "present", "closedeq")
                    editor.report()
                    if editor.fixables:
                        if editor.fix():
                            debug = "kveditor fix ran successfully\n"
                            self.logger.log(LogPriority.DEBUG, debug)
                            if editor.commit():
                                debug = "kveditor commit ran successfully\n"
                                self.logger.log(LogPriority.DEBUG, debug)
                            else:
                                error = "kveditor commit did not run " + \
                                        "successfully\n"
                                self.logger.log(LogPriority.ERROR, error)
                                success = False
                        else:
                            error = "kveditor fix did not run successfully\n"
                            self.logger.log(LogPriority.ERROR, error)
                            success = False
                    cmd = "update-grub"
                    self.ch.executeCommand(cmd)

                else:
                    inittab = "/etc/inittab"
                    tmpfile = inittab + ".tmp"
                    if os.path.exists(inittab):
                        initText = open(inittab, "r").read()
                        initre = r"id:\d:initdefault:"
                        if re.search(initre, initText):
                            initText = re.sub(initre, "id:3:initdefault:",
                                              initText)
                            writeFile(tmpfile, initText, self.logger)
                            self.iditerator += 1
                            myid = iterate(self.iditerator, self.rulenumber)
                            event = {"eventtype": "conf", "filepath": inittab}
                            self.statechglogger.recordchgevent(myid, event)
                            self.statechglogger.recordfilechange(inittab,
                                                                 tmpfile, myid)
                            os.rename(tmpfile, inittab)
                            resetsecon(inittab)
                        else:
                            initText += "\nid:3:initdefault:\n"
                            writeFile(tmpfile, initText, self.logger)
                            self.iditerator += 1
                            myid = iterate(self.iditerator, self.rulenumber)
                            event = {"eventtype": "conf", "filepath": inittab}
                            self.statechglogger.recordchgevent(myid, event)
                            self.statechglogger.recordfilechange(inittab,
                                                                 tmpfile, myid)
                            os.rename(tmpfile, inittab)
                            resetsecon(inittab)
                    else:
                        results += inittab + " not found, no other init " + \
                            "system found. If you are using a supported " + \
                            "Linux OS, please report this as a bug\n"

            if self.ci3.getcurrvalue():
                # Due to automatic removal of dependent packages, the full
                # removal of X and related packages cannot be undone
                if re.search("opensuse", self.myos):
                    cmd = ["zypper", "-n", "rm", "-u", "xorg-x11*", "kde*",
                           "xinit*"]
                    self.ch.executeCommand(cmd)
                elif re.search("debian|ubuntu", self.myos):
                    cmd = ["apt-get", "purge", "-y", "--force-yes", "unity.*",
                           "xserver.*", "gnome.*", "x11.*", "lightdm.*",
                           "libx11.*", "libqt.*"]
                    self.ch.executeCommand(cmd)
                    cmd2 = ["apt-get", "autoremove", "-y"]
                    self.ch.executeCommand(cmd2)
                elif re.search("fedora", self.myos):
                    # Fedora does not use the same group packages as other
                    # RHEL-based OSs. Removing this package will remove the X
                    # Windows system, just less efficiently than using a group
                    self.ph.remove("xorg-x11-server-Xorg")
                    self.ph.remove("xorg-x11-xinit*")
                else:
                    cmd = ["yum", "groups", "mark", "convert"]
                    self.ch.executeCommand(cmd)
                    self.ph.remove("xorg-x11-xinit")
                    cmd2 = ["yum", "groupremove", "-y", "X Window System"]
                    if not self.ch.executeCommand(cmd2):
                        success = False
                        results += '"yum groupremove -y X Window System" ' + \
                                   'command failed\n'
            # Since LOCKDOWNX depends on having X installed, and REMOVEX
            # completely removes X from the system, LOCKDOWNX fix will only be
            # executed if REMOVEX is not.
            elif self.ci2.getcurrvalue():
                if self.sh.disableservice("xfs"):
                    self.iditerator += 1
                    myid = iterate(self.iditerator, self.rulenumber)
                    event = {"eventtype":   "servicehelper",
                             "servicename": "xfs",
                             "startstate":  "enabled",
                             "endstate":    "disabled"}
                    self.statechglogger.recordchgevent(myid, event)
                else:
                    success = False
                    results += "STONIX was unable to disable the xfs service\n"

                if not self.xservSecure:
                    serverrcString = "exec X :0 -nolisten tcp [email protected]"
                    if not os.path.exists(self.serverrc):
                        createFile(self.serverrc, self.logger)
                        self.iditerator += 1
                        myid = iterate(self.iditerator, self.rulenumber)
                        event = {"eventtype": "creation",
                                 "filepath": self.serverrc}
                        self.statechglogger.recordchgevent(myid, event)
                        writeFile(self.serverrc, serverrcString, self.logger)
                    else:
                        open(self.serverrc, "a").write(serverrcString)

            self.rulesuccess = success
            if self.rulesuccess:
                self.detailedresults = "DisableGUILogon fix has been run " + \
                                       "to completion"
            else:
                self.detailedresults = "DisableGUILogon fix has been run " + \
                                       "but not to completion\n" + results
        except (KeyboardInterrupt, SystemExit):
            # User initiated exit
            raise
        except Exception:
            self.rulesuccess = False
            self.detailedresults += "\n" + traceback.format_exc()
            self.logdispatch.log(LogPriority.ERROR, self.detailedresults)
        self.formatDetailedResults("fix", self.rulesuccess,
                                   self.detailedresults)
        self.logdispatch.log(LogPriority.INFO, self.detailedresults)
        return self.rulesuccess

Example 26

Project: stonix
Source File: ReqAuthSingleUserMode.py
View license
    def fix(self):
        '''
        The fix method will apply the required settings to the system. 
        self.rulesuccess will be updated if the rule does not succeed.
        Enter the correct config entry in EITHER /etc/inittab OR 
        /etc/default/sulogin OR /etc/ttys OR /etc/sysconfig/init to require 
        authentication with single-user mode.

        @author bemalmbe
        '''
        try:
            if not self.ci.getcurrvalue():
                return
            
            success = True
            self.detailedresults = ""
            
            #clear out event history so only the latest fix is recorded
            self.iditerator = 0
            eventlist = self.statechglogger.findrulechanges(self.rulenumber)
            for event in eventlist:
                self.statechglogger.deleteentry(event)
                    
            #there is no way to disable the requirement of a password for 
            #apt-get systems so no need to do anything 
            if not self.ph.manager == "apt-get" and not self.ph.manager == \
                                                                      "zypper":
                
                #solution for bsd
                if self.ph.manager == "freebsd":
                    fp = "/etc/ttys"
                    tfp = fp + ".tmp"
                    created = False
                    badfile = False
                    if not os.path.exists(fp):
                        createFile(fp)
                        created = True
                    if os.path.exists(fp):
                        if not created:
                            #we check if file was previously created above
                            #if so, we don't want to record a permission
                            #change event, since the undo will be file deletion
                            if not checkPerms(fp, [0, 0, 420], self.logger):
                                self.iditerator += 1
                                myid = iterate(self.iditerator, self.rulenumber)
                                if setPerms(fp, [0, 0, 420], self.logger, 
                                                    self.statechglogger, myid):
                                    self.detailedresults += "Successfully \
corrected permissions on file: " + fp + "\n"
                                else:
                                    self.detailedresults += "Was not able to \
successfully set permissions on file: " + fp + "\n"
                                    success = False
                                    
                        #read in file
                        contents = readFile(fp, self.logger)
                        tempstring = ""
                        for line in contents:
                            #search for any line beginning with tty
                            if re.search("^tty", line):
                                linesplit = line.split()
                                try:
                                    #replace any line beginning with tty's
                                    #value with insecure if secure
                                    if linesplit[4] == "secure":
                                        linesplit[4] == "insecure"
                                        badfile = True
                                        tempstring += " ".join(linesplit) + "\n"
                                    else:
                                        tempstring += line
                                except IndexError:
                                    debug = traceback.format_exc() + "\n"
                                    debug += "Index out of range on line: " + line + "\n"
                                    self.logger.log(LogPriority.DEBUG, debug)
                            else:
                                tempstring += line
                        
                        #check to see if badfile is true which is set when
                        #checking contents of the file, if badfile is false
                        #we found everything in the file we needed, so no need
                        #to change
                        if badfile:
                            if writeFile(tfp, tempstring, self.logger):
                                self.iditerator += 1
                                myid = iterate(self.iditerator, 
                                                               self.rulenumber)
                                #if the file wasn't created earlier, then we
                                #will record the change event as a file change
                                if not created:
                                    event = {"eventtype":"conf",
                                             "filepath":fp}
                                    self.statechglogger.recordchgevent(myid, 
                                                                         event)
                                    self.statechglogger.recordfilechange(fp, 
                                                                     tfp, myid)
                                #if file was created earlier, then we will 
                                #record the change event as a file creation
                                #so that undo event will be a file deletion
                                else:
                                    event = {"eventtype":"conf",
                                             "filepath":fp}
                                    self.statechglogger.recordchgevent(myid,
                                                                         event)
                                self.detailedresults += "corrected contents \
and wrote to file: " + fp + "\n"
                                os.rename(tfp, fp)
                                os.chown(fp, 0, 0)
                                os.chmod(fp, 420)
                                resetsecon(fp)
                            else:
                                self.detailedresults += "Unable to \
successfully write the file: " + fp + "\n"
                                success = False
                                
                if self.ph.manager == "yum":
                    tempstring = ""
                    fp = "/etc/sysconfig/init"
                    tfp = fp + ".tmp"
                    created = False
                    badfile = False
                    if not os.path.exists(fp):
                        createFile(fp)
                        created = True
                    if os.path.exists(fp):
                        if not created:
                            #we check if file was previously created above
                            #if so, we don't want to record a permission
                            #change event, since the undo will be file deletion
                            if not checkPerms(fp, [0, 0, 420], self.logger):
                                self.iditerator += 1
                                myid = iterate(self.iditerator, self.rulenumber)
                                if setPerms(fp, [0, 0, 420], self.logger,
                                                    self.statechglogger, myid):
                                    self.detailedresults += "Successfully \
corrected permissions on file: " + fp + "\n"
                                else:
                                    self.detailedresults += "Was not able to \
successfully set permissions on file: " + fp + "\n"
                                    success = False
                        contents = readFile(fp, self.logger)
                        if contents:
                            linefound = False
                            for line in contents:
                                if re.search("^SINGLE", line.strip()):
                                    if re.search("=", line):
                                        temp = line.split("=")
                                        try:
                                            if temp[1].strip() == "/sbin/sulogin":
                                                tempstring += line
                                                linefound = True
                                        except IndexError:
                                            self.compliant = False
                                            debug = traceback.format_exc() + "\n"
                                            debug += "Index out of range on line: " + line + "\n"
                                            self.logger.log(LogPriority.DEBUG, debug)
                                else:
                                    tempstring += line
                            if not linefound:
                                badfile = True
                                tempstring += "SINGLE=/sbin/sulogin\n"
                                
                        #check to see if badfile is true which is set when
                        #checking contents of the file, if badfile is false
                        #we found everything in the file we needed, so no need
                        #to change
                        if badfile:
                            if writeFile(tfp, tempstring, self.logger):
                                self.iditerator += 1
                                myid = iterate(self.iditerator, 
                                                               self.rulenumber)
                                #if the file wasn't created earlier, then we
                                #will record the change event as a file change
                                if not created:
                                    event = {"eventtype":"conf",
                                             "filepath":fp}
                                    self.statechglogger.recordchgevent(myid, 
                                                                         event)
                                    self.statechglogger.recordfilechange(fp, 
                                                                     tfp, myid)
                                #if file was created earlier, then we will 
                                #record the change event as a file creation
                                #so that undo event will be a file deletion
                                else:
                                    event = {"eventtype":"creation",
                                             "filepath":fp}
                                    self.statechglogger.recordchgevent(myid,
                                                                         event)
                                self.detailedresults += "corrected contents \
and wrote to file: " + fp + "\n"
                                os.rename(tfp, fp)
                                os.chown(fp, 0, 0)
                                os.chmod(fp, 420)
                                resetsecon(fp)
                            else:
                                self.detailedresults += "Unable to \
successfully write the file: " + fp + "\n"
                                success = False
                                
                                
#                 if self.ph.manager == "zypper":
#                     tempstring = ""
#                     fp = "/etc/inittab"
#                     tfp = fp + ".tmp"
#                     created = False
#                     badfile = False
#                     if not os.path.exists(fp):
#                         self.createFile(fp)
#                         created = True
#                     if os.path.exists(fp):
#                         if not created:
#                             #we check if file was previously created above
#                             #if so, we don't want to record a permission
#                             #change event, since the undo will be file deletion
#                             if not checkPerms(fp, [0, 0, 420], self.logger):
#                                 self.iditerator += 1
#                                 myid = iterate(self.iditerator, self.rulenumber)
#                                 if setPerms(fp, [0, 0, 420], self.logger, "", 
#                                                     self.statechglogger, myid):
#                                     self.detailedresults += "Successfully \
# corrected permissions on file: " + fp + "\n"
#                                 else:
#                                     self.detailedresults += "Was not able to \
# successfully set permissions on file: " + fp + "\n"
#                                     success = False
#                         contents = readFile(fp, self.logger)
#                         if contents:
#                             linefound = False
#                             for line in contents:
#                                 if re.search("^~~:S:wait:/sbin/sulogin", line.strip()):
#                                     tempstring += line
#                                     linefound = True
#                                 else:
#                                     tempstring += line
#                             if not linefound:
#                                 badfile = True
#                                 tempstring += "~~:S:wait:/sbin/sulogin\n"
#                         #check to see if badfile is true which is set when
#                         #checking contents of the file, if badfile is false
#                         #we found everything in the file we needed, so no need
#                         #to change
#                         if badfile:
#                             if writeFile(tfp, self.logger, tempstring):
#                                 self.iditerator += 1
#                                 myid = iterate(self.iditerator, 
#                                                                self.rulenumber)
#                                 #if the file wasn't created earlier, then we
#                                 #will record the change event as a file change
#                                 if not created:
#                                     event = {"eventtype":"conf",
#                                              "filepath":fp}
#                                     self.statechglogger.recordchgevent(myid, 
#                                                                          event)
#                                     self.statechglogger.recordfilechange(fp, 
#                                                                      tfp, myid)
#                                 #if file was created earlier, then we will 
#                                 #record the change event as a file creation
#                                 #so that undo event will be a file deletion
#                                 else:
#                                     event = {"eventtype":"creation",
#                                              "filepath":fp}
#                                     self.statechglogger.recordchgevent(myid,
#                                                                          event)
#                                 self.detailedresults += "corrected contents \
# and wrote to file: " + fp + "\n"
#                                 os.rename(tfp, fp)
#                                 os.chown(fp, 0, 0)
#                                 os.chmod(fp, 420)
#                                 resetsecon(fp)
#                             else:
#                                 self.detailedresults += "Unable to \
# successfully write the file: " + fp + "\n"
#                                 success = False
                                
                #solution for solaris systems
                if self.ph.manager == "solaris":
                    tempstring = ""
                    fp = "/etc/default/sulogin"
                    tfp = fp + ".tmp"
                    created = False
                    badfile = False
                    
                    if not os.path.exists(fp):
                        createFile(fp)
                        created = True
                    if os.path.exists(fp):
                        if not created:
                            #we check if file was previously created above
                            #if so, we don't want to record a permission
                            #change event, since the undo will be file deletion
                            if not checkPerms(fp, [0, 0, 420], self.logger):
                                self.iditerator += 1
                                myid = iterate(self.iditerator, self.rulenumber)
                                if setPerms(fp, [0, 0, 420], self.logger,
                                                    self.statechglogger, myid):
                                    self.detailedresults += "Successfully \
corrected permissions on file: " + fp + "\n"
                                else:
                                    self.detailedresults += "Was not able to \
successfully set permissions on file: " + fp + "\n"
                                    success = False
                        contents = readFile(fp, self.logger)
                        if contents:
                            linefound = False
                            for line in contents:
                                if re.search("^PASSREQ", line.strip()):
                                    if re.search("=", line):
                                        temp = line.split("=")
                                        try:
                                            if temp[1].strip() == "YES":
                                                tempstring += line
                                                linefound = True
                                        except IndexError:
                                            debug = traceback.format_exc() + "\n"
                                            debug += "Index out of range on line: " + line + "\n"
                                            self.logger.log(LogPriority.DEBUG, debug)
                                else:
                                    tempstring += line
                            if not linefound:
                                badfile = True
                                tempstring += "PASSREQ=YES\n"
                        #check to see if badfile is true which is set when
                        #checking contents of the file, if badfile is false
                        #we found everything in the file we needed, so no need
                        #to change
                        if badfile:
                            if writeFile(tfp, tempstring, self.logger):
                                self.iditerator += 1
                                myid = iterate(self.iditerator, 
                                                               self.rulenumber)
                                #if the file wasn't created earlier, then we
                                #will record the change event as a file change
                                if not created:
                                    event = {"eventtype":"conf",
                                             "filepath":fp}
                                    self.statechglogger.recordchgevent(myid, 
                                                                         event)
                                    self.statechglogger.recordfilechange(fp, 
                                                                     tfp, myid)
                                #if file was created earlier, then we will 
                                #record the change event as a file creation
                                #so that undo event will be a file deletion
                                else:
                                    event = {"eventtype":"creation",
                                             "filepath":fp}
                                    self.statechglogger.recordchgevent(myid,
                                                                         event)
                                self.detailedresults += "corrected contents \
and wrote to file: " + fp + "\n"
                                os.rename(tfp, fp)
                                os.chown(fp, 0, 0)
                                os.chmod(fp, 420)
                                resetsecon(fp)
                            else:
                                self.detailedresults += "Unable to \
successfully write the file: " + fp + "\n"
                                success = False
            self.rulesuccess = success
        except (KeyboardInterrupt, SystemExit):
            # User initiated exit
            raise
        except Exception:
            self.rulesuccess = False
            self.detailedresults += "\n" + traceback.format_exc()
            self.logdispatch.log(LogPriority.ERROR, self.detailedresults)
        self.formatDetailedResults("fix", self.rulesuccess,
                                   self.detailedresults)
        self.logdispatch.log(LogPriority.INFO, self.detailedresults)
        return self.rulesuccess

Example 27

Project: stonix
Source File: SecureHomeDir.py
View license
    def reportOther(self):
        debug = ""
        uidmin = ""
        compliant = True
        templist = []
        remove = []
        grpvals = ("7", "2", "3", "6")
        unwanted = ["/root", "/", "/usr", "/var", "/lib", "/bin", "/sbin",
                    "/run", "/etc"]
        #we will look for home directories in /etc/passwd and in /home
        if self.environ.getosfamily() == "solaris":
            homebase = "/export/home/"
        else:
            homebase = "/home/"
        #read in /etc/passwd
        contents = readFile("/etc/passwd", self.logger)
        if not contents:
            self.detailedresults += "the /etc/passwd file is blank.  This \
rule cannot be run at all.\n"
            self.formatDetailedResults("report", False, self.detailedresults)
            compliant = False

        #user is root
        elif self.environ.geteuid() == 0:
            if os.path.exists("/etc/login.defs"):
                logindefs = readFile("/etc/login.defs", self.logger)
                for line in logindefs:
                    if re.search("^UID_MIN", line):
                        line = re.sub("\s+", " ", line)
                        temp = line.split()
                        try:
                            if isinstance(int(temp[1]), int):
                                uidmin = int(temp[1])
                        except IndexError:
                            compliant = False
                            debug = traceback.format_exc() + "\n"
                            debug += "Index out of range on line: " + line + "\n"

            #add home directories found in /etc/passwd
            if not uidmin:
                uidmin = 100
            for line in contents:
                if re.search("^#", line) or re.match("^\s*$", line):
                    continue
                if re.search(":", line):
                    temp = line.split(":")
                    try:
                        if re.search("/", temp[5]):
                            if int(temp[2]) >= uidmin and int(temp[2]) != 65534:
                                self.homedirs.append(temp[5])
                        else:
                            debug = "the /etc/passwd file is not in the \
correct format as of the line: " + line + "\n"
                    except IndexError:
                        compliant = False
                        debug = traceback.format_exc() + "\n"
                        debug += "Index out of range on line: " + line + "\n"

            #add home directories found
            output = os.listdir(homebase)
            for item in output:
                if item == "lost+found":
                    continue
                home = homebase + item
                if home not in self.homedirs:
                    self.homedirs.append(home)

            #clean up self.homedirs to not contain any of the root dirs
            for item in self.homedirs:
                for item2 in unwanted:
                    if item2 == "/":
                        if re.search("^" + item2 + "$", item):
                            remove.append(item)
                            break
                    elif re.search("^" + item2, item):
                        remove.append(item)
                        break
            for item in remove:
                self.homedirs.remove(item)

            #let's look at the home directories we found
            if self.homedirs:
                for home in self.homedirs:
                    user = ""
                    if not os.path.exists(home):
                        compliant = False
                        debug += "This home directory doesn't exist: " + home + "\n"
                        continue

                    #here we get just the username from the home directory
                    temphome = home.split("/")
                    try:
                        if temphome[-1] == "":
                            temphome.pop(-1)
                        if temphome[0] == "":
                            temphome.pop(0)
                        user = str(temphome[-1])
                    except IndexError:
                        compliant = False
                        debug += traceback.format_exc() + "\n"
                        debug += "Index out of range on line: " + line + "\n"
                        continue
                    output = os.listdir(homebase)
                    for directory in output:
                        templist = []
                        #we found the current user's home directory
                        if (user == directory):
                            try:
                                #get info about user's directory
                                statdata = os.stat(homebase + directory)

                                #permission returns in integer format
                                mode = stat.S_IMODE(statdata.st_mode)

                                #convert permission to octal format
                                octval = oct(mode)

                                #remove leading 0
                                octval = re.sub("^0", "", octval)

                                #split numeric integer value of permissions
                                #into separate numbers
                                perms = list(octval)

                                grpval = perms[1]
                                world = perms[2]
                                #if the group value equals any of the 
                                #following numbers, it is group writeable
                                if grpval in grpvals:
                                    templist.append("gw")
                                    compliant = False
                                if world != "0":
                                    templist.append("wr")
                                    compliant = False
                                if templist:
                                    self.wrong[home] = templist
                                    break
                            except IndexError:
                                compliant = False
                                debug += traceback.format_exc() + "\n"
                                debug += "Index out of range on line: " + line + "\n"
                                break

        else:
            user = os.getlogin()
            output = os.listdir(homebase)
            for line in output:
                #the current user has a home directory!
                if line == user:
                    home = homebase + line
                    try:
                        #get file information each file
                        statdata = os.stat(home)

                        #permission returns in integer format
                        mode = stat.S_IMODE(statdata.st_mode)

                        #convert permission to octal format
                        octval = oct(mode)

                        #remove leading 0
                        octval = re.sub("^0", "", octval)

                        #split numeric integer value of permissions
                        #into separate numbers
                        perms = list(octval)

                        #group permissions
                        grpval = perms[1]

                        #other permissions(world)
                        world = perms[2]
                        #if the group value equals any of the 
                        #following numbers, it is group writeable
                        if grpval in grpvals:
                            templist.append("gw")
                            compliant = False
                        if world != "0":
                            templist.append("wr")
                            compliant = False
                        if templist:
                            self.wrong[home] = templist
                            break
                    except IndexError:
                        compliant = False
                        debug += traceback.format_exc() + "\n"
                        debug += "Index out of range on line: " + line + "\n"
        if debug:
            self.logger.log(LogPriority.DEBUG, debug)
        return compliant

Example 28

View license
def prep_group_analysis_workflow(c, resource, subject_infos):
    
    #
    # this function runs once per output file during group analysis
    #

    # p_id = a list of pipeline IDs, i.e. the name of the output folder for
    #        the strat
    
    # s_ids = a list of all the subject IDs

    # scan_ids = a list of scan IDs

    # s_paths = a list of all of the filepaths of this particular output
    #           file that prep_group_analysis_workflow is being called for

    p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos))


    # set this to False for now
    fTest = False

    def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id):
        
        import csv
        reader = csv.reader(open(phenotypic_file, 'rU'))
        columns = {}
        order = {}
        count = 0
        headers = reader.next()
                
        for h in headers:
            columns[h] =[]
            order[h] = count
            count+=1
            
        for r in reader:
            for h, v in zip(headers, r):
                if v:
                    columns[h].append(str(v))

        if m_dict:
            for measure in m_list:

                print '\n\nMeasure: ', measure, '\n\n'

                if measure in headers:
                    #check if 'MeanFD  is present'
                    if len(columns[measure]) < 1:

                        print '\n\ncolumns[sub_id]: ', columns[sub_id], '\n\n'

                        for sub in columns[sub_id]:

                            if m_dict.get(sub):
                                if m_dict.get(sub).get(measure):
                                    columns[measure].append(m_dict[sub][measure])
                                else:
                                    raise Exception("Couldn't find %s value for subject %s"%(measure,sub))
                            else:
                                raise Exception("Couldn't find subject %s in the parameter file"%sub)


        print '\n\ncolumns[measure]: ', columns, '\n\n'
        
        b = zip(*([k] + columns[k] for k in sorted(columns, key=order.get)))
        
        
        try:
            os.makedirs(mod_path)
        except:
            print "%s already exists"%(mod_path)
            
        new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file))
                
        a = csv.writer(open(new_phenotypic_file, 'w'))
        
        for col in b:
            a.writerow(list(col))
          
        return new_phenotypic_file

    # END get_phenotypic_file function



    threshold_val = None
    measure_dict = None
    measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS']
    model_sub_list = []
    

    if 1 in c.runScrubbing:

        #get scrubbing threshold
    
        if re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]):

            threshold_val = re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]).group(0)

        elif len(c.scrubbingThreshold) == 1:

            threshold_val = c.scrubbingThreshold[0]

        else:
            print "Found Multiple threshold value "


        print "scrubbing threshold_val -->", threshold_val

    else:

        print "No scrubbing enabled."

        if len(c.scrubbingThreshold) == 1:
            threshold_val = c.scrubbingThreshold[0]




    import yaml    

    for config in c.modelConfigs:

        print c.modelConfigs
        print config
        
        try:
            conf = Configuration(yaml.load(open(os.path.realpath(config), 'r')))
        except:
            raise Exception("Error in reading %s configuration file" % config)

        
        group_sublist = open(conf.subject_list, 'r')

        sublist_items = group_sublist.readlines()

        subject_list = [line.rstrip('\n') for line in sublist_items \
                              if not (line == '\n') and not line.startswith('#')]

        # list of subject paths which DO exist
        exist_paths = []




        ''' begin iteration through group subject list for processing '''

        for sub in subject_list:

            # let's check to make sure the subject list is formatted for
            # repeated measures properly if repeated measures is enabled and
            # vice versa
            if (c.repeatedMeasures == True) and (',' not in sub):
                print '\n\n'
                print '[!] CPAC says: The group analysis subject list is ' \
                        'not inthe appropriate format for repeated ' \
                        'measures.\n'
                print 'Please use the appropriate format as described in ' \
                        'the CPAC User Guide or turn off Repeated Measures ' \
                        'in the CPAC pipeline configuration editor, found ' \
                        'in the \'Group Analysis Settings\' tab of the ' \
                        'pipeline configuration editor.\n'
                print 'NOTE: CPAC generates a properly-formatted group ' \
                        'analysis subject list meant for running repeated ' \
                        'measures when you create your original subject ' \
                        'list. Look for \'subject_list_group_analysis_' \
                        'repeated_measures.txt\' in the directory where ' \
                        'you created your subject list.\n\n'
                raise Exception

            elif (c.repeatedMeasures == False) and (',' in sub):
                print '\n\n'
                print '[!] CPAC says: It looks like your group analysis ' \
                        'subject list is formatted for running repeated ' \
                        'measures, but \'Run Repeated Measures\' is not ' \
                        'enabled in the pipeline configuration, found in ' \
                        'the \'Group Analysis Settings\' tab of the ' \
                        'pipeline configuration editor.\n'
                print 'Double-check your pipeline configuration?\n\n'
                raise Exception



            ''' process subject ids for repeated measures, if it is on '''
            # if repeated measures is being run and the subject list
            # is a list of subject IDs and scan IDs concatenated
            if (c.repeatedMeasures == True):

                # sub.count(',') equals 1 when there is either multiple scans
                # or multiple sessions but not both, for repeated measures

                # sub.count(',') equals 2 when there are multiple sessions
                # AND scans, for repeated measures

                if sub.count(',') == 1:
                    sub_id = sub.split(',',1)[0]
                    other_id = sub.split(',',1)[1]

                elif sub.count(',') == 2:
                    sub_id = sub.split(',',2)[0]
                    scan_id = sub.split(',',2)[1]
                    session_id = sub.split(',',2)[2]



            ''' drop subjects from the group subject list '''
            # check the path files in path_files_here folder in the subject's
            # output folder - and drop any subjects from the group analysis
            # subject list which do not exist in the paths to the output files

            for path in s_paths:

                if (c.repeatedMeasures == True):

                    if sub.count(',') == 1:
                        if (sub_id in path) and (other_id in path):
                            exist_paths.append(sub)

                    elif sub.count(',') == 2:
                        if (sub_id in path) and (scan_id in path) and \
                                (session_id in path):
                            exist_paths.append(sub)

                else:

                    if sub in path:
                        exist_paths.append(sub)
 




        # check to see if any derivatives of subjects are missing
        if len(list(set(subject_list) - set(exist_paths))) >0:
            print "List of outputs missing for subjects:"
            print list(set(subject_list) - set(exist_paths))
            print "..for derivatives:"
            print resource
            print "..at paths:"
            print os.path.dirname(s_paths[0]).replace(s_ids[0], '*')

        

        # create the path string for the group analysis output
        out_dir = os.path.dirname(s_paths[0]).split(p_id[0] + '/')
        out_dir = os.path.join(conf.output_dir, out_dir[1])
        out_dir = out_dir.replace(s_ids[0], 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],conf.model_name))

        mod_path = os.path.join(out_dir, 'model_files')


        if not os.path.isdir(mod_path):
            os.makedirs(mod_path)

        


        ''' write the new subject list '''
        new_sub_file = os.path.join(mod_path, os.path.basename(conf.subject_list))

        try:

            f = open(new_sub_file, 'w')
         
            for sub in exist_paths:
                print >>f, sub
        
            f.close()

        except:

            print "Error: Could not open subject list file: ", new_sub_file
            raise Exception


        conf.update('subject_list',new_sub_file)

        sub_id = conf.subject_id_label
        


        if measure_dict != None:
            conf.update('pheno_file',get_phenotypic_file(conf.pheno_file, measure_dict, measure_list, mod_path, sub_id))
        
        print 'conf updated pheno: ', conf.pheno_file, '\n\n'

            
        print "Model config dictionary ->"
        print conf.__dict__



        # Run 'create_fsl_model' script to extract phenotypic data from
        # the phenotypic file for each of the subjects in the subject list



        ''' get the motion statistics parameter file, if present '''
        # get the parameter file so it can be passed to create_fsl_model.py
        # so MeanFD or other measures can be included in the design matrix
        parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val))

        if 1 in c.runGenerateMotionStatistics:

            if not os.path.exists(parameter_file):
                print '\n\n[!] CPAC says: Could not open the parameter file. ' \
                      'If Generate Motion Statistics is enabled, this can ' \
                      'usually be found in the output directory of your ' \
                      'individual-level analysis runs.\n'
                print 'Path not found: ', parameter_file, '\n\n'
                raise Exception

        elif (1 not in c.runGenerateMotionStatistics) and (os.path.exists(parameter_file)):

            if not os.path.exists(parameter_file):
                print '\n\n[!] CPAC says: Could not open the parameter file. ' \
                      'If Generate Motion Statistics is enabled, this can ' \
                      'usually be found in the output directory of your ' \
                      'individual-level analysis runs.\n'
                print 'Path not found: ', parameter_file, '\n\n'
                raise Exception

        else:

            def no_measures_error(measure):
                print '\n\n[!] CPAC says: The measure %s was included in ' \
                      'your group analysis design matrix formula, but ' \
                      'Generate Motion Statistics was not run during ' \
                      'individual-level analysis.\n' % measure
                print 'Please run Generate Motion Statistics if you wish ' \
                      'to include this measure in your model.\n'
                print 'If you HAVE completed a run with this option ' \
                      'enabled, then you are seeing this error because ' \
                      'the motion parameter file normally created by this ' \
                      'option is missing.\n\n'
                raise Exception

            for measure in measure_list:
                if (measure in conf.design_formula):
                    no_measures_error(measure)

            parameter_file = None



        ''' run create_fsl_model.py to generate the group analysis models '''
        # path to the pipeline folder to be passed to create_fsl_model.py
        # so that certain files like output_means.csv can be accessed
        pipeline_path = os.path.join(c.outputDirectory, p_id[0])

        # the current output that cpac_group_analysis_pipeline.py and
        # create_fsl_model.py is currently being run for
        current_output = s_paths[0].replace(pipeline_path, '').split('/')[2]


        try:

            from CPAC.utils import create_fsl_model

            create_fsl_model.run(conf, fTest, parameter_file, pipeline_path, current_output, True)

            #print >>diag, "> Runs create_fsl_model."
            #print >>diag, ""

        except Exception, e:

            print "FSL Group Analysis model not successfully created - error in create_fsl_model script"
            #print "Error ->", e
            raise


            
        model_sub_list.append((conf.output_dir, conf.subject_list))


    
    if len(model_sub_list) == 0:
        raise Exception("no model found")





    ''' start group analysis '''

    print '\n\nPreparing the group analysis workflow..\n\n'

    for model_sub in model_sub_list:

        #print >>diag, "Current model_sub: ", model_sub
        #print >>diag, ""
        
        model, subject_list = model_sub
   

        if not os.path.exists(model):
            raise Exception("path to model %s doesn't exist"%model)
        
        if not os.path.exists(subject_list):
            raise Exception("path to input subject list %s is invalid" % subject_list)
        
        #if c.mixedScanAnalysis == True:
        #    wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model)))
        #else:
        
        
        # s_paths is a list of paths to each subject's derivative (of the current
        # derivative gpa is being run on) - s_paths_dirList is a list of each directory
        # in this path separated into list elements
        s_paths_dirList = s_paths[0].split('/')
        
        currentDerivativeFile = s_paths_dirList[-1]
        
        currentDerivative = currentDerivativeFile.split('.')[0]
        
        currentDerivative = currentDerivative.replace('#', '_')
        
        
        strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1]

        for ch in ['.']:
            if ch in strgy_path:
                strgy_path = strgy_path.replace(ch, '_')
                
        # create nipype-workflow-name-friendly strgy_path
        # (remove special characters)
        strgy_path_name = strgy_path.replace('/', '__')
        
        

        wf = pe.Workflow(name = currentDerivative) 

        workDir = c.workingDirectory + '/group_analysis__%s__grp_model_%s__%s' % (resource, conf.model_name, scan_ids[0])
        workDir = workDir + '/' + strgy_path_name

        wf.base_dir = workDir
        wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)}
        log_dir = os.path.join(conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (conf.model_name))
        

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        else:
            print "log_dir already exist"
        



        # enable logging
    
        from nipype import config
        from nipype import logging
        
        config.update_config({'logging': {'log_directory': log_dir,
                              'log_to_file': True}})
        
        # Temporarily disable until solved
        #logging.update_logging(config)

        iflogger = logging.getLogger('interface')




        ''' create the list of paths to all output files to go to model '''
        # create the 'ordered_paths' list, which is a list of all of the
        # output paths of the output files being included in the current
        # group-level analysis model
        #     'ordered_paths' is later connected to the 'zmap_files' input
        #     of the group analysis workflow - the files listed in this list
        #     are merged into the merged 4D file that goes into group analysis
      
        group_sublist = open(subject_list, 'r')
        sublist_items = group_sublist.readlines()

        input_subject_list = [line.rstrip('\n') for line in sublist_items \
                              if not (line == '\n') and not line.startswith('#')]

        ordered_paths = []
        pathcount = 0
        subcount = 0
        for sub in input_subject_list:

            subcount += 1

            if (c.repeatedMeasures == True):

                # sub.count(',') equals 1 when there is either multiple scans
                # or multiple sessions but not both, for repeated measures

                # sub.count(',') equals 2 when there are multiple sessions
                # AND scans, for repeated measures

                if sub.count(',') == 1:
                    sub_id = sub.split(',',1)[0]
                    other_id = sub.split(',',1)[1]

                elif sub.count(',') == 2:
                    sub_id = sub.split(',',2)[0]
                    scan_id = sub.split(',',2)[1]
                    session_id = sub.split(',',2)[2]


            for path in s_paths:

                if (c.repeatedMeasures == True):

                    # if repeated measures is enabled, make sure all of the
                    # relevant indicators are in the path before adding it
                    # to 'ordered_paths', i.e. the session and/or scan IDs

                    if sub.count(',') == 1:
                        if (sub_id in path) and (other_id in path):
                            pathcount += 1
                            ordered_paths.append(path)

                    elif sub.count(',') == 2:
                        if (sub_id in path) and (scan_id in path) and \
                                (session_id in path):
                            pathcount += 1
                            ordered_paths.append(path)

                else:
                    if sub in path:
                        pathcount += 1
                        ordered_paths.append(path)




        print 'S_paths length: ', len(s_paths)

        print "Ordered paths length (number of subjects): ", len(ordered_paths)
      
        print "input_subject_list -> %s" % input_subject_list

        print "strgy_path: ", strgy_path


        if len(ordered_paths) == 0:
            print '\n\n\n[!] CPAC says: None of the subjects listed in the ' \
                  'group analysis subject list were found to have outputs ' \
                  'produced by individual-level analysis.\n\nEnsure that ' \
                  'the subjects listed in your group analysis subject list ' \
                  'are the same as the ones included in the individual-' \
                  'level analysis you are running group-level analysis for.' \
                  '\n\n\n'
            raise Exception



        # gp_flow
        # Extracts the model files (.con, .grp, .mat, .fts) from the model
        # directory and sends them to the create_group_analysis workflow gpa_wf

        gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % currentDerivative)
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.fTest = fTest
  


        # gpa_wf
        # Creates the actual group analysis workflow

        gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % currentDerivative)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')
    
        print "group model: ", model
        print "f test: ", fTest
        print "z threshold: ", c.zThreshold
        print "p threshold: ", c.pThreshold
        print "parameters: ", (c.FSLDIR, 'MNI152')

    
        wf.connect(gp_flow, 'outputspec.mat',
                   gpa_wf, 'inputspec.mat_file')
        wf.connect(gp_flow, 'outputspec.con',
                   gpa_wf, 'inputspec.con_file')
        wf.connect(gp_flow, 'outputspec.grp',
                    gpa_wf, 'inputspec.grp_file')

            
        if fTest:
            wf.connect(gp_flow, 'outputspec.fts',
                       gpa_wf, 'inputspec.fts_file')
        


        # ds
        # Creates the datasink node for group analysis
        
        ds = pe.Node(nio.DataSink(), name='gpa_sink')
     
        if 'sca_roi' in resource:
            out_dir = os.path.join(out_dir, \
              re.search('ROI_number_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
            
        if 'centrality' in resource:
            names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                     'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \
                     'lfcd_binarize', 'lfcd_weighted']
            for name in names:
                if name in os.path.basename(s_paths[0]):
                    out_dir = os.path.join(out_dir, name)
                    break

        if 'tempreg_maps_z_files' in resource:
            out_dir = os.path.join(out_dir, \
                re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
        
#         if c.mixedScanAnalysis == True:
#             out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)
              
        ds.inputs.base_directory = out_dir
        ds.inputs.container = ''
        
        ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'),
                                          (r'(?<=model_files)(.)*[/]','/'),
                                          (r'(?<=merged)(.)*[/]','/'),
                                          (r'(?<=stats/clusterMap)(.)*[/]','/'),
                                          (r'(?<=stats/unthreshold)(.)*[/]','/'),
                                          (r'(?<=stats/threshold)(.)*[/]','/'),
                                          (r'_cluster(.)*[/]',''),
                                          (r'_slicer(.)*[/]',''),
                                          (r'_overlay(.)*[/]','')]
    
        '''
        if 1 in c.runSymbolicLinks:
    
    
            link_node = pe.MapNode(interface=util.Function(
                                input_names=['in_file',
                                            'resource'],
                                    output_names=[],
                                    function=prepare_gp_links),
                                    name='link_gp_', iterfield=['in_file'])
            link_node.inputs.resource = resource
            wf.connect(ds, 'out_file', link_node, 'in_file')
        '''
    


        ########datasink connections#########
        if fTest:
            wf.connect(gp_flow, 'outputspec.fts',
                       ds, '[email protected]') 
        
        wf.connect(gp_flow, 'outputspec.mat',
                   ds, '[email protected]' )
        wf.connect(gp_flow, 'outputspec.con',
                   ds, '[email protected]')
        wf.connect(gp_flow, 'outputspec.grp',
                   ds, '[email protected]')
        wf.connect(gpa_wf, 'outputspec.merged',
                   ds, 'merged')
        wf.connect(gpa_wf, 'outputspec.zstats',
                   ds, 'stats.unthreshold')
        wf.connect(gpa_wf, 'outputspec.zfstats',
                   ds,'[email protected]')
        wf.connect(gpa_wf, 'outputspec.fstats',
                   ds,'[email protected]')
        wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf',
                   ds, 'stats.threshold')
        wf.connect(gpa_wf, 'outputspec.cluster_index_zf',
                   ds,'stats.clusterMap')
        wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf',
                   ds, '[email protected]')
        wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf',
                   ds, 'rendered')
        wf.connect(gpa_wf, 'outputspec.rendered_image_zf',
                   ds, '[email protected]')
        wf.connect(gpa_wf, 'outputspec.cluster_threshold',
                   ds,  '[email protected]')
        wf.connect(gpa_wf, 'outputspec.cluster_index',
                   ds, '[email protected]')
        wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt',
                   ds, '[email protected]')
        wf.connect(gpa_wf, 'outputspec.overlay_threshold',
                   ds, '[email protected]')
        wf.connect(gpa_wf, 'outputspec.rendered_image',
                   ds, '[email protected]')
        
        ######################################

        # Run the actual group analysis workflow
        wf.run()

        '''
        except:

            print "Error: Group analysis workflow run command did not complete successfully."
            print "subcount: ", subcount
            print "pathcount: ", pathcount
            print "sublist: ", sublist_items
            print "input subject list: "
            print "conf: ", conf.subjectListFile
            
            raise Exception
        '''
    
        print "**Workflow finished for model %s and resource %s"%(os.path.basename(model), resource)

Example 29

Project: Flexget
Source File: series.py
View license
    def parse(self, data=None, field=None, quality=None):
        # Clear the output variables before parsing
        self._reset()
        self.field = field
        if quality:
            self.quality = quality
        if data:
            self.data = data
        if not self.data:
            raise ParseWarning(self, 'No data supplied to parse.')
        if not self.name:
            log.debug('No name for series `%s` supplied, guessing name.', self.data)
            if not self.guess_name():
                log.debug('Could not determine a series name')
                return
            log.debug('Series name for %s guessed to be %s', self.data, self.name)

        # check if data appears to be unwanted (abort)
        if self.parse_unwanted(self.remove_dirt(self.data)):
            raise ParseWarning(self, '`{data}` appears to be an episode pack'.format(data=self.data))

        name = self.remove_dirt(self.name)

        log.debug('name: %s data: %s', name, self.data)

        # name end position
        name_start = 0
        name_end = 0

        # regexp name matching
        if not self.name_regexps:
            # if we don't have name_regexps, generate one from the name
            self.name_regexps = ReList(
                name_to_re(name, self.ignore_prefixes, self) for name in [self.name] + self.alternate_names)
            # With auto regex generation, the first regex group captures the name
            self.re_from_name = True
        # try all specified regexps on this data
        for name_re in self.name_regexps:
            match = re.search(name_re, self.data)
            if match:
                match_start, match_end = match.span(1 if self.re_from_name else 0)
                # Always pick the longest matching regex
                if match_end > name_end:
                    name_start, name_end = match_start, match_end
                log.debug('NAME SUCCESS: %s matched to %s', name_re.pattern, self.data)
        if not name_end:
            # leave this invalid
            log.debug('FAIL: name regexps %s do not match %s',
                      [regexp.pattern for regexp in self.name_regexps], self.data)
            return

        # remove series name from raw data, move any prefix to end of string
        data_stripped = self.data[name_end:] + ' ' + self.data[:name_start]
        data_stripped = data_stripped.lower()
        log.debug('data stripped: %s', data_stripped)

        # allow group(s)
        if self.allow_groups:
            for group in self.allow_groups:
                group = group.lower()
                for fmt in ['[%s]', '-%s']:
                    if fmt % group in data_stripped:
                        log.debug('%s is from group %s', self.data, group)
                        self.group = group
                        data_stripped = data_stripped.replace(fmt % group, '')
                        break
                if self.group:
                    break
            else:
                log.debug('%s is not from groups %s', self.data, self.allow_groups)
                return  # leave invalid

        # Find quality and clean from data
        log.debug('parsing quality ->')
        quality = qualities.Quality(data_stripped)
        if quality:
            # Remove quality string from data
            log.debug('quality detected, using remaining data `%s`', quality.clean_text)
            data_stripped = quality.clean_text
        # Don't override passed in quality
        if not self.quality:
            self.quality = quality

        # Remove unwanted words from data for ep / id parsing
        data_stripped = self.remove_words(data_stripped, self.remove, not_in_word=True)

        data_parts = re.split('[\W_]+', data_stripped)

        for part in data_parts[:]:
            if part in self.propers:
                self.proper_count += 1
                data_parts.remove(part)
            elif part == 'fastsub':
                # Subtract 5 to leave room for fastsub propers before the normal release
                self.proper_count -= 5
                data_parts.remove(part)
            elif part in self.specials:
                self.special = True
                data_parts.remove(part)

        data_stripped = ' '.join(data_parts).strip()

        log.debug("data for date/ep/id parsing '%s'", data_stripped)

        # Try date mode before ep mode
        if self.identified_by in ['date', 'auto']:
            date_match = self.parse_date(data_stripped)
            if date_match:
                if self.strict_name:
                    if date_match['match'].start() > 1:
                        return
                self.id = date_match['date']
                self.id_groups = date_match['match'].groups()
                self.id_type = 'date'
                self.valid = True
                if not (self.special and self.prefer_specials):
                    return
            else:
                log.debug('-> no luck with date_regexps')

        if self.identified_by in ['ep', 'auto'] and not self.valid:
            ep_match = self.parse_episode(data_stripped)
            if ep_match:
                # strict_name
                if self.strict_name:
                    if ep_match['match'].start() > 1:
                        return

                if ep_match['end_episode'] and ep_match['end_episode'] > ep_match['episode'] + 2:
                    # This is a pack of too many episodes, ignore it.
                    log.debug('Series pack contains too many episodes (%d). Rejecting',
                              ep_match['end_episode'] - ep_match['episode'])
                    return

                self.season = ep_match['season']
                self.episode = ep_match['episode']
                if ep_match['end_episode']:
                    self.episodes = (ep_match['end_episode'] - ep_match['episode']) + 1
                else:
                    self.episodes = 1
                self.id_type = 'ep'
                self.valid = True
                if not (self.special and self.prefer_specials):
                    return
            else:
                log.debug('-> no luck with ep_regexps')

            if self.identified_by == 'ep':
                # we should be getting season, ep !
                # try to look up idiotic numbering scheme 101,102,103,201,202
                # ressu: Added matching for 0101, 0102... It will fail on
                #        season 11 though
                log.debug('ep identifier expected. Attempting SEE format parsing.')
                match = re.search(self.re_not_in_word(r'(\d?\d)(\d\d)'), data_stripped, re.IGNORECASE | re.UNICODE)
                if match:
                    # strict_name
                    if self.strict_name:
                        if match.start() > 1:
                            return

                    self.season = int(match.group(1))
                    self.episode = int(match.group(2))
                    log.debug(self)
                    self.id_type = 'ep'
                    self.valid = True
                    return
                else:
                    log.debug('-> no luck with SEE')

        # Check id regexps
        if self.identified_by in ['id', 'auto'] and not self.valid:
            for id_re in self.id_regexps:
                match = re.search(id_re, data_stripped)
                if match:
                    # strict_name
                    if self.strict_name:
                        if match.start() > 1:
                            return
                    found_id = '-'.join(g for g in match.groups() if g)
                    if not found_id:
                        # If match groups were all blank, don't accept this match
                        continue
                    self.id = found_id
                    self.id_type = 'id'
                    self.valid = True
                    log.debug('found id \'%s\' with regexp \'%s\'', self.id, id_re.pattern)
                    if not (self.special and self.prefer_specials):
                        return
                    else:
                        break
            else:
                log.debug('-> no luck with id_regexps')

        # Other modes are done, check for unwanted sequence ids
        if self.parse_unwanted_sequence(data_stripped):
            return

        # Check sequences last as they contain the broadest matches
        if self.identified_by in ['sequence', 'auto'] and not self.valid:
            for sequence_re in self.sequence_regexps:
                match = re.search(sequence_re, data_stripped)
                if match:
                    # strict_name
                    if self.strict_name:
                        if match.start() > 1:
                            return
                    # First matching group is the sequence number
                    try:
                        self.id = int(match.group(1))
                    except ValueError:
                        self.id = self.roman_to_int(match.group(1))
                    self.season = 0
                    self.episode = self.id
                    # If anime style version was found, overwrite the proper count with it
                    if 'version' in match.groupdict():
                        if match.group('version'):
                            self.proper_count = int(match.group('version')) - 1
                    self.id_type = 'sequence'
                    self.valid = True
                    log.debug('found id \'%s\' with regexp \'%s\'', self.id, sequence_re.pattern)
                    if not (self.special and self.prefer_specials):
                        return
                    else:
                        break
            else:
                log.debug('-> no luck with sequence_regexps')

        # No id found, check if this is a special
        if self.special or self.assume_special:
            # Attempt to set id as the title of the special
            self.id = data_stripped or 'special'
            self.id_type = 'special'
            self.valid = True
            log.debug('found special, setting id to \'%s\'', self.id)
            return
        if self.valid:
            return

        msg = 'Title `%s` looks like series `%s` but cannot find ' % (self.data, self.name)
        if self.identified_by == 'auto':
            msg += 'any series numbering.'
        else:
            msg += 'a(n) `%s` style identifier.' % self.identified_by
        raise ParseWarning(self, msg)

Example 30

Project: golismero
Source File: target.py
View license
def _setRequestParams():
    """
    Check and set the parameters and perform checks on 'data' option for
    HTTP method POST.
    """

    if conf.direct:
        conf.parameters[None] = "direct connection"
        return

    testableParameters = False

    # Perform checks on GET parameters
    if conf.parameters.get(PLACE.GET):
        parameters = conf.parameters[PLACE.GET]
        paramDict = paramToDict(PLACE.GET, parameters)

        if paramDict:
            conf.paramDict[PLACE.GET] = paramDict
            testableParameters = True

    # Perform checks on POST parameters
    if conf.method == HTTPMETHOD.POST and conf.data is None:
        errMsg = "HTTP POST method depends on HTTP data value to be posted"
        raise SqlmapSyntaxException(errMsg)

    if conf.data is not None:
        conf.method = HTTPMETHOD.POST if not conf.method or conf.method == HTTPMETHOD.GET else conf.method

        def process(match, repl):
            retVal = match.group(0)

            if not (conf.testParameter and match.group("name") not in conf.testParameter):
                retVal = repl
                while True:
                    _ = re.search(r"\\g<([^>]+)>", retVal)
                    if _:
                        retVal = retVal.replace(_.group(0), match.group(int(_.group(1)) if _.group(1).isdigit() else _.group(1)))
                    else:
                        break

            return retVal

        if kb.processUserMarks is None and CUSTOM_INJECTION_MARK_CHAR in conf.data:
            message = "custom injection marking character ('%s') found in option " % CUSTOM_INJECTION_MARK_CHAR
            message += "'--data'. Do you want to process it? [Y/n/q] "
            test = readInput(message, default="Y")
            if test and test[0] in ("q", "Q"):
                raise SqlmapUserQuitException
            else:
                kb.processUserMarks = not test or test[0] not in ("n", "N")

        if not (kb.processUserMarks and CUSTOM_INJECTION_MARK_CHAR in conf.data):
            if re.search(JSON_RECOGNITION_REGEX, conf.data):
                message = "JSON like data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r'("(?P<name>[^"]+)"\s*:\s*"[^"]+)"', functools.partial(process, repl=r'\g<1>%s"' % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    conf.data = re.sub(r'("(?P<name>[^"]+)"\s*:\s*)(-?\d[\d\.]*\b)', functools.partial(process, repl=r'\g<0>%s' % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    kb.postHint = POST_HINT.JSON

            elif re.search(SOAP_RECOGNITION_REGEX, conf.data):
                message = "SOAP/XML like data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r"(<(?P<name>[^>]+)( [^<]*)?>)([^<]+)(</\2)", functools.partial(process, repl=r"\g<1>\g<4>%s\g<5>" % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    kb.postHint = POST_HINT.SOAP if "soap" in conf.data.lower() else POST_HINT.XML

            elif re.search(MULTIPART_RECOGNITION_REGEX, conf.data):
                message = "Multipart like data found in %s data. " % conf.method
                message += "Do you want to process it? [Y/n/q] "
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                elif test[0] not in ("n", "N"):
                    conf.data = conf.data.replace(CUSTOM_INJECTION_MARK_CHAR, ASTERISK_MARKER)
                    conf.data = re.sub(r"(?si)((Content-Disposition[^\n]+?name\s*=\s*[\"'](?P<name>[^\n]+?)[\"']).+?)(((\r)?\n)+--)", functools.partial(process, repl=r"\g<1>%s\g<4>" % CUSTOM_INJECTION_MARK_CHAR), conf.data)
                    kb.postHint = POST_HINT.MULTIPART

        if not kb.postHint:
            if CUSTOM_INJECTION_MARK_CHAR in conf.data:  # later processed
                pass
            else:
                place = PLACE.POST

                conf.parameters[place] = conf.data
                paramDict = paramToDict(place, conf.data)

                if paramDict:
                    conf.paramDict[place] = paramDict
                    testableParameters = True
        else:
            if CUSTOM_INJECTION_MARK_CHAR not in conf.data:  # in case that no usable parameter values has been found
                conf.parameters[PLACE.POST] = conf.data

    kb.processUserMarks = True if (kb.postHint and CUSTOM_INJECTION_MARK_CHAR in conf.data) else kb.processUserMarks

    if re.search(URI_INJECTABLE_REGEX, conf.url, re.I) and not any(place in conf.parameters for place in (PLACE.GET, PLACE.POST)) and not kb.postHint and not CUSTOM_INJECTION_MARK_CHAR in (conf.data or ""):
        warnMsg = "you've provided target URL without any GET "
        warnMsg += "parameters (e.g. www.site.com/article.php?id=1) "
        warnMsg += "and without providing any POST parameters "
        warnMsg += "through --data option"
        logger.warn(warnMsg)

        message = "do you want to try URI injections "
        message += "in the target URL itself? [Y/n/q] "
        test = readInput(message, default="Y")

        if not test or test[0] not in ("n", "N"):
            conf.url = "%s%s" % (conf.url, CUSTOM_INJECTION_MARK_CHAR)
            kb.processUserMarks = True
        elif test[0] in ("q", "Q"):
            raise SqlmapUserQuitException

    for place, value in ((PLACE.URI, conf.url), (PLACE.CUSTOM_POST, conf.data), (PLACE.CUSTOM_HEADER, str(conf.httpHeaders))):
        _ = re.sub(PROBLEMATIC_CUSTOM_INJECTION_PATTERNS, "", value or "") if place == PLACE.CUSTOM_HEADER else value or ""
        if CUSTOM_INJECTION_MARK_CHAR in _:
            if kb.processUserMarks is None:
                lut = {PLACE.URI: '-u', PLACE.CUSTOM_POST: '--data', PLACE.CUSTOM_HEADER: '--headers/--user-agent/--referer/--cookie'}
                message = "custom injection marking character ('%s') found in option " % CUSTOM_INJECTION_MARK_CHAR
                message += "'%s'. Do you want to process it? [Y/n/q] " % lut[place]
                test = readInput(message, default="Y")
                if test and test[0] in ("q", "Q"):
                    raise SqlmapUserQuitException
                else:
                    kb.processUserMarks = not test or test[0] not in ("n", "N")

            if not kb.processUserMarks:
                if place == PLACE.URI:
                    query = urlparse.urlsplit(value).query
                    if query:
                        parameters = conf.parameters[PLACE.GET] = query
                        paramDict = paramToDict(PLACE.GET, parameters)

                        if paramDict:
                            conf.url = conf.url.split('?')[0]
                            conf.paramDict[PLACE.GET] = paramDict
                            testableParameters = True
                elif place == PLACE.CUSTOM_POST:
                    conf.parameters[PLACE.POST] = conf.data
                    paramDict = paramToDict(PLACE.POST, conf.data)

                    if paramDict:
                        conf.paramDict[PLACE.POST] = paramDict
                        testableParameters = True

            else:
                conf.parameters[place] = value
                conf.paramDict[place] = OrderedDict()

                if place == PLACE.CUSTOM_HEADER:
                    for index in xrange(len(conf.httpHeaders)):
                        header, value = conf.httpHeaders[index]
                        if CUSTOM_INJECTION_MARK_CHAR in re.sub(PROBLEMATIC_CUSTOM_INJECTION_PATTERNS, "", value):
                            parts = value.split(CUSTOM_INJECTION_MARK_CHAR)
                            for i in xrange(len(parts) - 1):
                                conf.paramDict[place]["%s #%d%s" % (header, i + 1, CUSTOM_INJECTION_MARK_CHAR)] = "%s,%s" % (header, "".join("%s%s" % (parts[j], CUSTOM_INJECTION_MARK_CHAR if i == j else "") for j in xrange(len(parts))))
                            conf.httpHeaders[index] = (header, value.replace(CUSTOM_INJECTION_MARK_CHAR, ""))
                else:
                    parts = value.split(CUSTOM_INJECTION_MARK_CHAR)

                    for i in xrange(len(parts) - 1):
                        conf.paramDict[place]["%s#%d%s" % (("%s " % kb.postHint) if kb.postHint else "", i + 1, CUSTOM_INJECTION_MARK_CHAR)] = "".join("%s%s" % (parts[j], CUSTOM_INJECTION_MARK_CHAR if i == j else "") for j in xrange(len(parts)))

                    if place == PLACE.URI and PLACE.GET in conf.paramDict:
                        del conf.paramDict[PLACE.GET]
                    elif place == PLACE.CUSTOM_POST and PLACE.POST in conf.paramDict:
                        del conf.paramDict[PLACE.POST]

                testableParameters = True

    if kb.processUserMarks:
        for item in ("url", "data", "agent", "referer", "cookie"):
            if conf.get(item):
                conf[item] = conf[item].replace(CUSTOM_INJECTION_MARK_CHAR, "")

    # Perform checks on Cookie parameters
    if conf.cookie:
        conf.parameters[PLACE.COOKIE] = conf.cookie
        paramDict = paramToDict(PLACE.COOKIE, conf.cookie)

        if paramDict:
            conf.paramDict[PLACE.COOKIE] = paramDict
            testableParameters = True

    # Perform checks on header values
    if conf.httpHeaders:
        for httpHeader, headerValue in conf.httpHeaders:
            # Url encoding of the header values should be avoided
            # Reference: http://stackoverflow.com/questions/5085904/is-ok-to-urlencode-the-value-in-headerlocation-value

            httpHeader = httpHeader.title()

            if httpHeader == HTTP_HEADER.USER_AGENT:
                conf.parameters[PLACE.USER_AGENT] = urldecode(headerValue)

                condition = any((not conf.testParameter, intersect(conf.testParameter, USER_AGENT_ALIASES)))

                if condition:
                    conf.paramDict[PLACE.USER_AGENT] = {PLACE.USER_AGENT: headerValue}
                    testableParameters = True

            elif httpHeader == HTTP_HEADER.REFERER:
                conf.parameters[PLACE.REFERER] = urldecode(headerValue)

                condition = any((not conf.testParameter, intersect(conf.testParameter, REFERER_ALIASES)))

                if condition:
                    conf.paramDict[PLACE.REFERER] = {PLACE.REFERER: headerValue}
                    testableParameters = True

            elif httpHeader == HTTP_HEADER.HOST:
                conf.parameters[PLACE.HOST] = urldecode(headerValue)

                condition = any((not conf.testParameter, intersect(conf.testParameter, HOST_ALIASES)))

                if condition:
                    conf.paramDict[PLACE.HOST] = {PLACE.HOST: headerValue}
                    testableParameters = True

    if not conf.parameters:
        errMsg = "you did not provide any GET, POST and Cookie "
        errMsg += "parameter, neither an User-Agent, Referer or Host header value"
        raise SqlmapGenericException(errMsg)

    elif not testableParameters:
        errMsg = "all testable parameters you provided are not present "
        errMsg += "within the given request data"
        raise SqlmapGenericException(errMsg)

Example 31

Project: PrimCom
Source File: h.py
View license
@requires(cfg.EDITOR)
def menu():
    print("[{0:.3f}s]".format(time.time() - start_time), end='\n')
    #
    while True:
        try:
            #inp = raw_input(bold('pc> ')).strip()
            inp = raw_input(bold('{prompt}> '.format(prompt=os.getcwd()))).strip()
        except (KeyboardInterrupt, EOFError):
            print()
            my_exit(0)
        if len(inp) == 0:
            continue
        if inp in ('h', 'help()'):
            info()
        elif inp in ('q', 'qq', ':q', ':x', 'quit()', 'exit()'):
            my_exit(0)
        elif inp in ('c', 'clear()'):
            os.system('clear')
            print_header()
        elif inp in ('light()', 'dark()'):
            if inp == 'light()':
                cfg.g.BACKGROUND = cfg.LIGHT
            else:
                cfg.g.BACKGROUND = cfg.DARK
        elif inp in ('t', 'tags()', 'all()', 'd'):
            SearchHits.show_tag_list(tag2keys.keys())
        elif inp == 'p':
            os.system("python")
        elif inp == 'p3':
            os.system("python3")
        elif inp == 'bpy':
            os.system("bpython")
        elif inp == 'last()':
            print(last_key)
        elif inp == '!!':
            if last_key:
                perform_action(last_key)
        elif inp.startswith('!'):
            cmd = inp[1:]
            os.system(cmd)
        elif inp == 'edit()':
            if last_key:
                edit(last_key)
        elif inp == 'gedit()':
            if last_key:
                gedit(last_key)
        elif inp == 'less()':
            if last_key:
                less(last_key)
        elif inp in ('urls()', 'links()'):
            if last_key:
                show_urls(last_key)
        elif inp in ('cb()', 'tocb()'):
            if last_key:
                to_clipboards(last_key)
        elif inp == 'path()':
            if last_key:
                path_to_clipboards(last_key)
        elif inp == "doc()":
            if last_key:
                show_doc(last_key)
        elif inp == 'json.reload()':
            read_json()
        elif inp in ('json.view()', 'json.edit()'):
            if last_key:
                view_edit_json(last_key)
                read_json()
        elif inp in ("json.edit(this)", "jet()"):
            if last_key:
                edit_entry(last_key)
        elif inp == 'reddit()':
            reddit.reddit()
        elif inp == 'conferences()':
            conferences.conferences()
        elif inp == 'mute()':
            apps.radio.radio(None, stop=True)
        elif inp == 'myip()':
            my_ip.show_my_ip()
        elif inp in ('v', 'version()'):
            version()
        elif inp == 'commands()':
            show_commands()
        elif inp == 'add()':
            add_item()
            read_json()
        elif inp == 'hits()':
            SearchHits.show_tag_list()
        elif inp.startswith("pymotw:"):
            site = "pymotw.com"
            cmd_go1(inp[inp.find(':')+1:], site=site)
        elif inp.startswith("go:"):
            cmd_google(inp[inp.find(':')+1:])
        elif inp.startswith("go1:"):
            cmd_go1(inp[inp.find(':')+1:])
        elif inp.startswith("imdb:"):
            site = "imdb.com"
            cmd_go1(inp[inp.find(':')+1:], site=site)
        elif inp.startswith("amazon:"):
            site = "amazon.com"
            cmd_go1(inp[inp.find(':')+1:], site=site)
        elif inp.startswith("youtube:"):
            cmd_youtube(inp[inp.find(':')+1:])
        elif inp.startswith("wp:"):
            site = "wikipedia.org"
            cmd_go1(inp[inp.find(':')+1:], site=site)
        elif inp.startswith("lib:") or inp.startswith("lib2:"):
            site = "docs.python.org/2/library/"
            cmd_go1(inp[inp.find(':')+1:], site=site)
        elif inp.startswith("lib3:"):
            site = "docs.python.org/3/library/"
            cmd_go1(inp[inp.find(':')+1:], site=site)
        elif inp.startswith("golib:"):
            site = "http://golang.org/pkg/"
            lib = inp[inp.find(':')+1:]
            open_url(urljoin(site, lib))
        elif inp.startswith("shorten:"):
            urlshortener.shorten_url(inp[inp.find(':')+1:])
        elif inp.startswith("def:"):
            cmd_def(inp[inp.find(':')+1:])
        elif inp.startswith("pep:"):
            open_pep(inp[inp.find(':')+1:])
        elif inp == 'pep()':
            open_pep(None)
        elif inp == 'show()':
            show.show()
        elif inp == 'numbers()':
            toggle_line_numbers()
        elif re.search(r"^l([\d,-]+)\.(sh|py|py2|py3|cb|cb\(>\))$", inp):
            fname = key_to_file(last_key)
            selected_lines.process_selected_lines(inp, fname)
        elif inp == 'cd' or inp.startswith('cd '):
            change_dir(inp)
        elif inp == 'pwd()':
            print(os.getcwd())
        elif inp == 'userpass()':
            username_password()
        elif inp == 'apps()':
            apps.menu.main()
        elif inp == 'k':
            os.system("konsole 2>/dev/null &")
        elif inp.startswith("filter:"):
            term = inp[inp.find(':')+1:]
            if last_key:
                perform_action(last_key, term)
        elif inp.startswith("app:"):
            val = inp[inp.find(':')+1:]
            if not val:
                apps.menu.main()
            else:
                apps.menu.start_app(val)
        # shortcuts
        elif inp == 'radio()':
            apps.menu.start_app_by_shortcut('radio')
        # disabled, always show the search hits
        #elif inp in tag2keys:
        #    tag = inp
        #    command(tag)
        elif re.search(r'^\d+$', inp):
            try:
                index = int(inp) - 1
                if index < 0:
                    raise IndexError
                tag = SearchHits.hits[index].tag
                command(tag)
            except IndexError:
                print("out of range...")
        elif re.search(r'^\d+\.(doc|action|tags|json|url|link|key|jet|edit)(\(\))?$', inp):
            try:
                pos = inp.find('.')
                index = int(inp[:pos]) - 1
                what = inp[pos+1:].rstrip("()")
                if index < 0:
                    raise IndexError
                hit = SearchHits.hits[index]
                hit.inspect(what)
            except IndexError:
                print("out of range...")
        elif re.search(r'^this.(doc|action|tags|json|url|link|key|jet|edit)(\(\))?$', inp):
            try:
                if not last_key:
                    raise NoLastKeyError
                pos = inp.find('.')
                what = inp[pos+1:].rstrip("()")
                hit = Hit(tag=None, key=last_key)
                hit.inspect(what)
            except NoLastKeyError:
                pass
        elif inp == 'pid()':
            pidcheck.pid_alert()
        elif inp == 'debug()':
            debug(None)
        elif inp == 'song()':
            print("Playing:", apps.radio.get_song())
        else:
            if len(inp) == 1:
                print("too short...")
            else:
                inp = inp.lower()
                SearchHits.show_hint(inp)

Example 32

Project: karesansui
Source File: memory.py
View license
def create_memory_graph(_, lang, graph_dir, rrd_dir, start, end, dev=None, type=None):
    graph_filename = "%s.png" % (generate_phrase(12,'abcdefghijklmnopqrstuvwxyz'))
    graph_filepath = '%s/%s' % (graph_dir, graph_filename)

    rrd_filepath = ("%s/memory/memory-%s.rrd" % (rrd_dir, "free"),
                    "%s/memory/memory-%s.rrd" % (rrd_dir, "cached"),
                    "%s/memory/memory-%s.rrd" % (rrd_dir, "buffered"),
                    "%s/memory/memory-%s.rrd" % (rrd_dir, "used"),
                    )

    for filepath in rrd_filepath:
        if is_readable(filepath) is False:
            return ""

    legend_header_label = {"min":_('Min'),
                           "max":_('Max'),
                           "ave":_('Ave'),
                           "last":_('Last'),
                           }

    for key in legend_header_label.keys():
        if re.search(u"[^a-zA-Z0-9]", legend_header_label[key]):
            legend_header_label[key] = "</tt>%s<tt>" % (legend_header_label[key].encode("utf-8"))
        else:
            legend_header_label[key] = "%s" % (legend_header_label[key].encode("utf-8"))

    legend_header = "<tt>                      %s         %s         %s         %s</tt>" % (legend_header_label['min'],
                                                                                            legend_header_label['max'],
                                                                                            legend_header_label['ave'],
                                                                                            legend_header_label['last']
                                                                                            )

    title = _('Memory')
    if re.search(u"[^a-zA-Z0-9_\-\. ]", title):
        title = "%s" % (title.encode("utf-8"))
    else:
        title = "<tt>%s</tt>" % (title.encode("utf-8"))

    legend = {"used"     : _('Used'),
              "buffered" : _('Buffered'),
              "cached"   : _('Cached'),
              "free"     : _('Free'),
              }

    reg = re.compile(u"[^a-zA-Z0-9_\-\. ]")
    for key in legend.keys():
        if key == "used":
            if reg.search(legend[key]):
                legend[key] = "</tt>%s   <tt>    " % (legend[key].encode("utf-8"))
            else:
                legend[key] = "%s        " % (legend[key].encode("utf-8"))
        elif key == "buffered":
            if reg.search(legend[key]):
                legend[key] = "</tt>%s <tt>     " % (legend[key].encode("utf-8"))
            else:
                legend[key] = "%s    " % (legend[key].encode("utf-8"))
        elif key == "cached":
            if reg.search(legend[key]):
                legend[key] = "</tt>%s<tt>     " % (legend[key].encode("utf-8"))
            else:
                legend[key] = "%s      " % (legend[key].encode("utf-8"))
        elif key == "free":
            if reg.search(legend[key]):
                legend[key] = "</tt>%s   <tt>     " % (legend[key].encode("utf-8"))
            else:
                legend[key] = "%s        " % (legend[key].encode("utf-8"))
        else:
            legend[key] = "%s" % (legend[key].encode("utf-8"))

    created_label = _('Graph created')
    if re.search(u"[^a-zA-Z0-9 ]", created_label):
        created_label = "</tt>%s<tt>" % (created_label.encode("utf-8"))
    else:
        created_label = "%s" % (created_label.encode("utf-8"))

    created_time = "%s" % (datetime.datetime.today().strftime(DEFAULT_LANGS[lang]['DATE_FORMAT'][1]))
    created_time = re.sub(r':', '\:', created_time)

    legend_footer = "<tt>%s \: %s</tt>" % (created_label, created_time)

    data = rrdtool.graph(graph_filepath,
    "--imgformat", "PNG",
    "--font", "TITLE:0:IPAexGothic",
    "--font", "LEGEND:0:IPAexGothic",
    "--pango-markup",
    "--width", "550",
    "--height", "350",
    "--full-size-mode",
    "--color", "BACK#FFFFFF",
    "--color", "CANVAS#FFFFFF",
    "--color", "SHADEA#FFFFFF",
    "--color", "SHADEB#FFFFFF",
    "--color", "GRID#DDDDDD",
    "--color", "MGRID#CCCCCC",
    "--color", "FONT#555555",
    "--color", "FRAME#FFFFFF",
    "--color", "ARROW#FFFFFF",
                         "--title", title,
                         "--vertical-label", _('Bytes').encode("utf-8"),
                         "--lower-limit", "0",
                         "--rigid",
                         "--start", start,
                         "--end",  end,
                         #"--legend-direction", "bottomup",
                         "DEF:free=%s:value:AVERAGE" % (rrd_filepath[0]),
                         "DEF:cached=%s:value:AVERAGE" % (rrd_filepath[1]),
                         "DEF:buffered=%s:value:AVERAGE" % (rrd_filepath[2]),
                         "DEF:used=%s:value:AVERAGE" % (rrd_filepath[3]),
                         "COMMENT:%s\\r" % legend_footer,
                         "COMMENT:<tt>---------------------------------------------------------------------------</tt>\\n",
                         # TRANSLATORS:
                         #  メモリのグラフの項目名
                         #  日本語にした場合は表示が崩れますが、後で直すのでそのままで大丈夫です
                         "AREA:used#80AA00:<tt>%s</tt>" % (legend['used']),
                         "GPRINT:used:MIN:<tt>%8.1lf %s</tt>",
                         "GPRINT:used:MAX:<tt>%8.1lf %s</tt>",
                         "GPRINT:used:AVERAGE:<tt>%8.1lf %s</tt>",
                         "GPRINT:used:LAST:<tt>%8.1lf %s</tt>\\n",
                         "STACK:buffered#E7EF00:<tt>%s</tt>" % (legend['buffered']),
                         "GPRINT:buffered:MIN:<tt>%8.1lf %s</tt>",
                         "GPRINT:buffered:MAX:<tt>%8.1lf %s</tt>",
                         "GPRINT:buffered:AVERAGE:<tt>%8.1lf %s</tt>",
                         "GPRINT:buffered:LAST:<tt>%8.1lf %s</tt>\\n",
                         "STACK:cached#B3EF00:<tt>%s</tt>" % (legend['cached']),
                         "GPRINT:cached:MIN:<tt>%8.1lf %s</tt>",
                         "GPRINT:cached:MAX:<tt>%8.1lf %s</tt>",
                         "GPRINT:cached:AVERAGE:<tt>%8.1lf %s</tt>",
                         "GPRINT:cached:LAST:<tt>%8.1lf %s</tt>\\n",
                         "STACK:free#FFFFFF:<tt>%s</tt>" % (legend['free']),
                         "GPRINT:free:MIN:<tt>%8.1lf %s</tt>",
                         "GPRINT:free:MAX:<tt>%8.1lf %s</tt>",
                         "GPRINT:free:AVERAGE:<tt>%8.1lf %s</tt>",
                         "GPRINT:free:LAST:<tt>%8.1lf %s</tt>\\n",
                         "COMMENT:%s\\n" % (legend_header),
                         "COMMENT: \\n",
                         )

    return graph_filepath

Example 33

Project: pkgbuilder
Source File: wrapper.py
View license
def wrapper(source='AUTO'):
    """A wrapper for pacman and PKGBUILDer."""
    # Because I need to work with -S and nothing else, I am going to use
    # regular expressions on the argument list.  Sorry.
    if source == 'AUTO':
        argst = sys.argv[1:]
    else:
        argst = source

    log = logging.getLogger('pbwrapper')
    if '--debug' in argst:
        DS.debugmode()
    elif '--debugpb' in argst:
        DS.debugmode()
        argst.remove("--debugpb")
        sys.argv.remove("--debugpb")

    log.info('*** PBwrapper v{0} (PKGBUILDer '
             '{1})'.format(__wrapperversion__, __version__))

    if (('-L' in argst) or ('--unlock' in argst) or (re.search('-[a-zA-Z]*L',
                                                               ' '.join(argst))
                                                     is not None)):
        try:
            os.remove('/var/lib/pacman/db.lck')
            exit(0)
        except OSError as e:
            DS.fancy_error('[-L --unlock] ' + e.strerror)
            exit(1)

    if (('-S' in argst) or ('--sync' in argst) or (re.search('-[a-zA-Z]*S',
                                                             ' '.join(argst))
                                                   is not None)):
        # The user has requested -S.
        # -l/--list is in not in *a because it takes over the whole package
        # list, and that is a workaround.
        log.debug('Got -S, preparing to parse arguments...')
        pacmanshort = ['f', 'g', 'l', 'p', 'q']
        pacmanlong = ['asdeps', 'asexplicit', 'dbonly', 'downloadonly',
                      'force', 'groups', 'list', 'needed', 'nodeps',
                      'noprogressbar', 'noscriptlet', 'print', 'quiet',
                      'verbose']
        pacmanshorta = ['b', 'r']
        pacmanlonga = ['arch', 'cachedir', 'config', 'dbpath', 'gpgdir',
                       'hookdir', 'ignoregroup', 'logfile',
                       'print-format', 'root', 'assume-installed']

        pbshort = ['D', 'C', 'F']
        pblong = ['fetch', 'userfetch', 'vcsupgrade', 'novcsupgrade', 'colors',
                  'nocolors', 'depcheck', 'nodepcheck', 'validation',
                  'novalidation', 'install', 'buildonly', 'pgpcheck',
                  'skippgpcheck', 'deep', 'shallow', 'noclean', 'nodebug']

        commonshort = ['S', 'd', 'i', 's', 'v', 'w']
        commonlong = ['debug', 'info', 'search', 'sync', 'confirm',
                      'noconfirm']
        commonlongl = ['ignore']
        commonshortc = ['c', 'y', 'u']
        commonlongc = ['clean', 'refresh', 'sysupgrade']

        ignoredshort = ['L']
        ignoredlong = ['unlock']

        allpacman = pacmanshort + pacmanlong + pacmanshorta + pacmanlonga
        allpb = pbshort + pblong  # + pbshorta + pblonga
        allcommon = commonshort + commonlong + commonlongl + commonshortc + commonlongc

        allshort = pacmanshort + pbshort + commonshort
        alllong = pacmanlong + pblong + commonlong

        allshortc = commonshortc
        alllongc = commonlongc
        allcountable = allshortc + alllongc

        parser = argparse.ArgumentParser(add_help=False, usage=_('%(prog)s'
                                         ' <operation> [...]'),
                                         argument_default=argparse.SUPPRESS)
        parser.add_argument('-h', '--help', action='store_true',
                            default=False, dest='help')
        parser.add_argument('-V', '--version', action='store_true',
                            default=False, dest='version')

        for i in allshort + ignoredshort:
            parser.add_argument('-' + i, action='store_true', default=False,
                                dest=i)

        for i in alllong + ignoredlong:
            parser.add_argument('--' + i, action='store_true', default=False,
                                dest=i)

        for i in allshortc:
            parser.add_argument('-' + i, action='count', default=0, dest=i)

        for i in alllongc:
            parser.add_argument('--' + i, action='count', default=0, dest=i)

        for i in pacmanshorta:
            parser.add_argument('-' + i, action='store', nargs=1,
                                default='NIL', dest=i)

        for i in pacmanlonga:
            parser.add_argument('--' + i, action='store', nargs=1,
                                default='NIL', dest=i)

        for i in commonlongl:
            parser.add_argument('--' + i, action='append', dest=i)

        parser.add_argument('pkgnames', action='store', nargs='*')

        # Starting actual work.

        if source != 'AUTO':
            args = parser.parse_args(source)
        else:
            args = parser.parse_args()

        log.debug('Arguments parsed.  {0}'.format(args.__dict__))

        try:
            pkgnames = args.pkgnames
        except AttributeError:
            pkgnames = []

        execargs = []
        pacargs = []
        pbargs = []

        for k, v in args.__dict__.items():
            if v is not False:
                # == This argument has been provided.
                if k in allcountable:
                    # == This is a countable argument.
                    if k in allshortc:
                        for x in range(v):
                            execargs.append('-' + k)
                    elif k in alllongc:
                        for x in range(v):
                            execargs.append('--' + k)
                elif v:
                    # == This argument doesn't have a value.
                    if k in allshort:
                        execargs.append('-' + k)
                    elif k in alllong:
                        execargs.append('--' + k)

        for i in execargs:
            if i[1:] in allshort + allshortc:
                s = i[1:]
            elif i[2:] in alllong + alllongc:
                s = i[2:]
            else:
                raise SanityError('argparse broke')

            if s in allcommon:
                pacargs.append(i)
                pbargs.append(i)

            if s in allpacman:
                pacargs.append(i)
            elif s in allpb:
                pbargs.append(i)

        for k, v in args.__dict__.items():
            if v is not False and v != 'NIL':
                # == This argument can take values and has one.
                if k in pacmanshorta:
                    pacargs.append('-' + k)
                    pacargs.extend(v)
                elif k in pacmanlonga:
                    pacargs.append('--' + k)
                    pacargs.extend(v)
                elif k in commonlongl:
                    for vi in v:
                        pacargs.append('--' + k)
                        pacargs.append(vi)
                        pbargs.append('--' + k)
                        pbargs.append(vi)

        log.debug('Preparing to run pacman and/or PKGBUILDer...')

        if args.search or args.s:
            log.debug('Got -s.')
            if args.pkgnames:
                log.info('Running pacman.')
                DS.run_command([DS.paccommand] + pacargs + pkgnames)
                log.info('Running pkgbuilder (pkgbuilder.__main__.main()).')
                pbmain(pbargs + pkgnames)
            else:
                log.info('Nothing to do — args.pkgnames is empty.')

            exit()
        elif args.l or args.list:
            log.debug('Got -l.')
            log.info('Running pacman.')
            DS.run_command([DS.paccommand] + pacargs + pkgnames)
            exit()
        elif args.u or args.sysupgrade:
            log.debug('Got -u.')
            log.info('Running pacman.')
            DS.sudo([DS.paccommand] + pacargs)
            log.info('Running pkgbuilder (pkgbuilder.__main__.main()).')
            pbmain(pbargs, quit=False)
        elif args.y or args.refresh:
            log.debug('Got -y.')
            log.info('Running pacman.')
            DS.sudo([DS.paccommand] + pacargs)
        elif args.help:
            show_help()
            exit()
        elif args.version:
            show_version()
            exit()

        log.debug('Generating AUR packages list...')
        pbpkgnames = []
        info = pkgbuilder.utils.info(pkgnames)

        names = [i.name for i in info]
        pbpkgnames = [n for n in pkgnames if n in names]
        pacmanpkgnames = [i for i in pkgnames if i not in pbpkgnames]

        droppable = ['-u', '-y', '--sysupgrade', '--refresh']

        pacargs = [i for i in pacargs if i not in droppable]
        pbargs = [i for i in pbargs if i not in droppable]
        log.debug('Generated.')

        if pacmanpkgnames != []:
            log.info('Running pacman.')
            DS.sudo([DS.paccommand] + pacargs + pacmanpkgnames)
        else:
            log.info('No repo packages in the list.')

        if pbpkgnames != []:
            log.info('Running pkgbuilder (pkgbuilder.main.main()).')
            pbmain(pbargs + pbpkgnames)
        else:
            log.info('No AUR packages in the list.')

        sanitycheck = pacmanpkgnames + pbpkgnames
        if len(sanitycheck) != len(pkgnames):
            log.info('Running pacman due to failed sanity check.')
            sanityargs = [item for item in pkgnames if (item not in
                          sanitycheck)]
            DS.sudo([DS.paccommand] + pacargs + sanityargs)
    elif (('-F' in argst) or ('--fetch' in argst) or
          ('--userfetch' in argst) or
          ('-X' in argst) or ('--runtx' in argst) or
          (re.search('-[a-zA-Z]*F', ' '.join(argst)) is not None) or
          (re.search('-[a-zA-Z]*X', ' '.join(argst)) is not None)):
        # pkgbuilder -F, --fetch / --userfetch / -X, --runtx.
        pbmain(argst)
    elif ('-h' in argst) or ('--help' in argst):
        show_help()
    elif ('-V' in argst) or ('--version' in argst):
        show_version()
    elif 'UTshibboleet' in argst:
        if argst[0] == 'unittests' and argst[1] == 'UTshibboleet':
            # http://xkcd.com/806/
            pass
        else:
            print('Please don’t use the reserved UTshibboleet argument.')

    elif (('-Q' in argst) or ('--query' in argst) or (re.search(
            '-[a-zA-Z]*Q', ''.join(argst)) is not None)):
        DS.run_command([DS.paccommand] + argst)
    else:
        DS.sudo([DS.paccommand] + argst)

Example 34

Project: pkgbuilder
Source File: wrapper.py
View license
def wrapper(source='AUTO'):
    """A wrapper for pacman and PKGBUILDer."""
    # Because I need to work with -S and nothing else, I am going to use
    # regular expressions on the argument list.  Sorry.
    if source == 'AUTO':
        argst = sys.argv[1:]
    else:
        argst = source

    log = logging.getLogger('pbwrapper')
    if '--debug' in argst:
        DS.debugmode()
    elif '--debugpb' in argst:
        DS.debugmode()
        argst.remove("--debugpb")
        sys.argv.remove("--debugpb")

    log.info('*** PBwrapper v{0} (PKGBUILDer '
             '{1})'.format(__wrapperversion__, __version__))

    if (('-L' in argst) or ('--unlock' in argst) or (re.search('-[a-zA-Z]*L',
                                                               ' '.join(argst))
                                                     is not None)):
        try:
            os.remove('/var/lib/pacman/db.lck')
            exit(0)
        except OSError as e:
            DS.fancy_error('[-L --unlock] ' + e.strerror)
            exit(1)

    if (('-S' in argst) or ('--sync' in argst) or (re.search('-[a-zA-Z]*S',
                                                             ' '.join(argst))
                                                   is not None)):
        # The user has requested -S.
        # -l/--list is in not in *a because it takes over the whole package
        # list, and that is a workaround.
        log.debug('Got -S, preparing to parse arguments...')
        pacmanshort = ['f', 'g', 'l', 'p', 'q']
        pacmanlong = ['asdeps', 'asexplicit', 'dbonly', 'downloadonly',
                      'force', 'groups', 'list', 'needed', 'nodeps',
                      'noprogressbar', 'noscriptlet', 'print', 'quiet',
                      'verbose']
        pacmanshorta = ['b', 'r']
        pacmanlonga = ['arch', 'cachedir', 'config', 'dbpath', 'gpgdir',
                       'hookdir', 'ignoregroup', 'logfile',
                       'print-format', 'root', 'assume-installed']

        pbshort = ['D', 'C', 'F']
        pblong = ['fetch', 'userfetch', 'vcsupgrade', 'novcsupgrade', 'colors',
                  'nocolors', 'depcheck', 'nodepcheck', 'validation',
                  'novalidation', 'install', 'buildonly', 'pgpcheck',
                  'skippgpcheck', 'deep', 'shallow', 'noclean', 'nodebug']

        commonshort = ['S', 'd', 'i', 's', 'v', 'w']
        commonlong = ['debug', 'info', 'search', 'sync', 'confirm',
                      'noconfirm']
        commonlongl = ['ignore']
        commonshortc = ['c', 'y', 'u']
        commonlongc = ['clean', 'refresh', 'sysupgrade']

        ignoredshort = ['L']
        ignoredlong = ['unlock']

        allpacman = pacmanshort + pacmanlong + pacmanshorta + pacmanlonga
        allpb = pbshort + pblong  # + pbshorta + pblonga
        allcommon = commonshort + commonlong + commonlongl + commonshortc + commonlongc

        allshort = pacmanshort + pbshort + commonshort
        alllong = pacmanlong + pblong + commonlong

        allshortc = commonshortc
        alllongc = commonlongc
        allcountable = allshortc + alllongc

        parser = argparse.ArgumentParser(add_help=False, usage=_('%(prog)s'
                                         ' <operation> [...]'),
                                         argument_default=argparse.SUPPRESS)
        parser.add_argument('-h', '--help', action='store_true',
                            default=False, dest='help')
        parser.add_argument('-V', '--version', action='store_true',
                            default=False, dest='version')

        for i in allshort + ignoredshort:
            parser.add_argument('-' + i, action='store_true', default=False,
                                dest=i)

        for i in alllong + ignoredlong:
            parser.add_argument('--' + i, action='store_true', default=False,
                                dest=i)

        for i in allshortc:
            parser.add_argument('-' + i, action='count', default=0, dest=i)

        for i in alllongc:
            parser.add_argument('--' + i, action='count', default=0, dest=i)

        for i in pacmanshorta:
            parser.add_argument('-' + i, action='store', nargs=1,
                                default='NIL', dest=i)

        for i in pacmanlonga:
            parser.add_argument('--' + i, action='store', nargs=1,
                                default='NIL', dest=i)

        for i in commonlongl:
            parser.add_argument('--' + i, action='append', dest=i)

        parser.add_argument('pkgnames', action='store', nargs='*')

        # Starting actual work.

        if source != 'AUTO':
            args = parser.parse_args(source)
        else:
            args = parser.parse_args()

        log.debug('Arguments parsed.  {0}'.format(args.__dict__))

        try:
            pkgnames = args.pkgnames
        except AttributeError:
            pkgnames = []

        execargs = []
        pacargs = []
        pbargs = []

        for k, v in args.__dict__.items():
            if v is not False:
                # == This argument has been provided.
                if k in allcountable:
                    # == This is a countable argument.
                    if k in allshortc:
                        for x in range(v):
                            execargs.append('-' + k)
                    elif k in alllongc:
                        for x in range(v):
                            execargs.append('--' + k)
                elif v:
                    # == This argument doesn't have a value.
                    if k in allshort:
                        execargs.append('-' + k)
                    elif k in alllong:
                        execargs.append('--' + k)

        for i in execargs:
            if i[1:] in allshort + allshortc:
                s = i[1:]
            elif i[2:] in alllong + alllongc:
                s = i[2:]
            else:
                raise SanityError('argparse broke')

            if s in allcommon:
                pacargs.append(i)
                pbargs.append(i)

            if s in allpacman:
                pacargs.append(i)
            elif s in allpb:
                pbargs.append(i)

        for k, v in args.__dict__.items():
            if v is not False and v != 'NIL':
                # == This argument can take values and has one.
                if k in pacmanshorta:
                    pacargs.append('-' + k)
                    pacargs.extend(v)
                elif k in pacmanlonga:
                    pacargs.append('--' + k)
                    pacargs.extend(v)
                elif k in commonlongl:
                    for vi in v:
                        pacargs.append('--' + k)
                        pacargs.append(vi)
                        pbargs.append('--' + k)
                        pbargs.append(vi)

        log.debug('Preparing to run pacman and/or PKGBUILDer...')

        if args.search or args.s:
            log.debug('Got -s.')
            if args.pkgnames:
                log.info('Running pacman.')
                DS.run_command([DS.paccommand] + pacargs + pkgnames)
                log.info('Running pkgbuilder (pkgbuilder.__main__.main()).')
                pbmain(pbargs + pkgnames)
            else:
                log.info('Nothing to do — args.pkgnames is empty.')

            exit()
        elif args.l or args.list:
            log.debug('Got -l.')
            log.info('Running pacman.')
            DS.run_command([DS.paccommand] + pacargs + pkgnames)
            exit()
        elif args.u or args.sysupgrade:
            log.debug('Got -u.')
            log.info('Running pacman.')
            DS.sudo([DS.paccommand] + pacargs)
            log.info('Running pkgbuilder (pkgbuilder.__main__.main()).')
            pbmain(pbargs, quit=False)
        elif args.y or args.refresh:
            log.debug('Got -y.')
            log.info('Running pacman.')
            DS.sudo([DS.paccommand] + pacargs)
        elif args.help:
            show_help()
            exit()
        elif args.version:
            show_version()
            exit()

        log.debug('Generating AUR packages list...')
        pbpkgnames = []
        info = pkgbuilder.utils.info(pkgnames)

        names = [i.name for i in info]
        pbpkgnames = [n for n in pkgnames if n in names]
        pacmanpkgnames = [i for i in pkgnames if i not in pbpkgnames]

        droppable = ['-u', '-y', '--sysupgrade', '--refresh']

        pacargs = [i for i in pacargs if i not in droppable]
        pbargs = [i for i in pbargs if i not in droppable]
        log.debug('Generated.')

        if pacmanpkgnames != []:
            log.info('Running pacman.')
            DS.sudo([DS.paccommand] + pacargs + pacmanpkgnames)
        else:
            log.info('No repo packages in the list.')

        if pbpkgnames != []:
            log.info('Running pkgbuilder (pkgbuilder.main.main()).')
            pbmain(pbargs + pbpkgnames)
        else:
            log.info('No AUR packages in the list.')

        sanitycheck = pacmanpkgnames + pbpkgnames
        if len(sanitycheck) != len(pkgnames):
            log.info('Running pacman due to failed sanity check.')
            sanityargs = [item for item in pkgnames if (item not in
                          sanitycheck)]
            DS.sudo([DS.paccommand] + pacargs + sanityargs)
    elif (('-F' in argst) or ('--fetch' in argst) or
          ('--userfetch' in argst) or
          ('-X' in argst) or ('--runtx' in argst) or
          (re.search('-[a-zA-Z]*F', ' '.join(argst)) is not None) or
          (re.search('-[a-zA-Z]*X', ' '.join(argst)) is not None)):
        # pkgbuilder -F, --fetch / --userfetch / -X, --runtx.
        pbmain(argst)
    elif ('-h' in argst) or ('--help' in argst):
        show_help()
    elif ('-V' in argst) or ('--version' in argst):
        show_version()
    elif 'UTshibboleet' in argst:
        if argst[0] == 'unittests' and argst[1] == 'UTshibboleet':
            # http://xkcd.com/806/
            pass
        else:
            print('Please don’t use the reserved UTshibboleet argument.')

    elif (('-Q' in argst) or ('--query' in argst) or (re.search(
            '-[a-zA-Z]*Q', ''.join(argst)) is not None)):
        DS.run_command([DS.paccommand] + argst)
    else:
        DS.sudo([DS.paccommand] + argst)

Example 35

Project: ItChat
Source File: client.py
View license
    def __produce_msg(self, l):
        rl = []
        srl = [40, 43, 50, 52, 53, 9999]
        # 40 msg, 43 videochat, 50 VOIPMSG, 52 voipnotifymsg, 53 webwxvoipnotifymsg, 9999 sysnotice
        for m in l:
            if '@@' in m['FromUserName'] or '@@' in m['ToUserName']:
                self.__produce_group_chat(m)
            else:
                tools.msg_formatter(m, 'Content')
            if m['MsgType'] == 1: # words
                if m['Url']:
                    regx = r'(.+?\(.+?\))'
                    data = re.search(regx, m['Content'])
                    data = 'Map' if data is None else data.group(1)
                    msg = {
                        'Type': 'Map',
                        'Text': data,}
                else:
                    msg = {
                        'Type': 'Text',
                        'Text': m['Content'],}
            elif m['MsgType'] == 3 or m['MsgType'] == 47: # picture
                download_fn = self.__get_download_fn(
                    '%s/webwxgetmsgimg' % self.loginInfo['url'], m['NewMsgId'])
                msg = {
                    'Type'     : 'Picture',
                    'FileName' : '%s.%s'%(time.strftime('%y%m%d-%H%M%S', time.localtime()),
                        'png' if m['MsgType'] == 3 else 'gif'),
                    'Text'     : download_fn, }
            elif m['MsgType'] == 34: # voice
                download_fn = self.__get_download_fn(
                    '%s/webwxgetvoice' % self.loginInfo['url'], m['NewMsgId'])
                msg = {
                    'Type': 'Recording',
                    'FileName' : '%s.mp4' % time.strftime('%y%m%d-%H%M%S', time.localtime()),
                    'Text': download_fn,}
            elif m['MsgType'] == 37: # friends
                msg = {
                    'Type': 'Friends',
                    'Text': {
                        'status'        : m['Status'],
                        'userName'      : m['RecommendInfo']['UserName'],
                        'ticket'        : m['Ticket'],
                        'userInfo' : m['RecommendInfo'], }, }
            elif m['MsgType'] == 42: # name card
                msg = {
                    'Type': 'Card',
                    'Text': m['RecommendInfo'], }
            elif m['MsgType'] == 49: # sharing
                if m['AppMsgType'] == 6:
                    msg = m
                    cookiesList = {name:data for name,data in self.s.cookies.items()}
                    def download_atta(attaDir=None):
                        url = self.loginInfo['fileUrl'] + '/webwxgetmedia'
                        params = {
                            'sender': msg['FromUserName'],
                            'mediaid': msg['MediaId'],
                            'filename': msg['FileName'],
                            'fromuser': self.loginInfo['wxuin'],
                            'pass_ticket': 'undefined',
                            'webwx_data_ticket': cookiesList['webwx_data_ticket'],}
                        r = self.s.get(url, params=params, stream=True)
                        tempStorage = io.BytesIO()
                        for block in r.iter_content(1024):
                            tempStorage.write(block)
                        if attaDir is None: return tempStorage.getvalue()
                        with open(attaDir, 'wb') as f: f.write(tempStorage.getvalue())
                    msg = {
                        'Type': 'Attachment',
                        'Text': download_atta, }
                elif m['AppMsgType'] == 17:
                    msg = {
                        'Type': 'Note',
                        'Text': m['FileName'], }
                elif m['AppMsgType'] == 2000:
                    regx = r'\[CDATA\[(.+?)\][\s\S]+?\[CDATA\[(.+?)\]'
                    data = re.search(regx, m['Content'])
                    if data:
                        data = data.group(2).split(u'。')[0]
                    else:
                        data = 'You may found detailed info in Content key.'
                    msg = {
                        'Type': 'Note',
                        'Text': data, }
                else:
                    msg = {
                        'Type': 'Sharing',
                        'Text': m['FileName'], }
            elif m['MsgType'] == 51: # phone init
                msg = {
                    'Type': 'Init',
                    'Text': m['ToUserName'], }
            elif m['MsgType'] == 62: # tiny video
                msgId = m['MsgId']
                def download_video(videoDir=None):
                    url = '%s/webwxgetvideo' % self.loginInfo['url']
                    params = {
                        'msgid': msgId,
                        'skey': self.loginInfo['skey'],}
                    headers = {'Range': 'bytes=0-'}
                    r = self.s.get(url, params=params, headers=headers, stream=True)
                    tempStorage = io.BytesIO()
                    for block in r.iter_content(1024):
                        tempStorage.write(block)
                    if videoDir is None: return tempStorage.getvalue()
                    with open(videoDir, 'wb') as f: f.write(tempStorage.getvalue())
                msg = {
                    'Type': 'Video',
                    'FileName' : '%s.mp4' % time.strftime('%y%m%d-%H%M%S', time.localtime()),
                    'Text': download_video, }
            elif m['MsgType'] == 10000:
                msg = {
                    'Type': 'Note',
                    'Text': m['Content'],}
            elif m['MsgType'] == 10002:
                regx = r'\[CDATA\[(.+?)\]\]'
                data = re.search(regx, m['Content'])
                data = 'System message' if data is None else data.group(1).replace('\\', '')
                msg = {
                    'Type': 'Note',
                    'Text': data, }
            elif m['MsgType'] in srl:
                msg = {
                    'Type': 'Useless',
                    'Text': 'UselessMsg', }
            else:
                out.print_line('MsgType Unknown: %s\n%s'%(m['MsgType'], str(m)), False)
                srl.append(m['MsgType'])
                msg = {
                    'Type': 'Useless',
                    'Text': 'UselessMsg', }
            m = dict(m, **msg)
            rl.append(m)
        return rl

Example 36

Project: mieru
Source File: bakatsuki.py
View license
def processHTML(fullName, folderName):
  """Process the raw HTML from Bakatsuki to a form usable for making EPUBs"""

  EXP = 0 #end of site
  HEADER = 0 #start of novel text
  WIKI_TABLE = 0 #end of novel text

  print("opening novel: %s" % fullName)
  print("directory: %s" % folderName)
  filename = folderName + ".html"
  print("filename: %s" % filename)
  # folder name is fullName
  # filename is fullName + .html
  chapterFile = open(os.path.join(folderName,filename), "rt", encoding='utf-8')
  novel = chapterFile.read()
  chapterFile.close()
  # get title of the novel
  TITLE = getTitle(novel)

  # switch to the EPUB content folder
  toplevelFolder = os.path.join(folderName, EPUB_TOPLEVEL_FOLDER)
  assurePath(toplevelFolder)
  print("switching to: %s" % toplevelFolder)
  os.chdir(toplevelFolder)
  # make sure all the needed folders exist
  assurePath(EPUB_METADATA_FOLDER)
  assurePath(EPUB_DATA_FOLDER)
  assurePath(EPUB_IMAGE_FOLDER)
  assurePath(EPUB_STYLE_FOLDER)
  assurePath(EPUB_TEXT_FOLDER)

  ### Start the content.opf file, add title ###
  contentFile = open(EPUB_CONTENT_FILE, 'w')
  contentFile.write("\
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<package xmlns=\"http://www.idpf.org/2007/opf\" unique-identifier=\"BookID\" version=\"2.0\">\n\
    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n\
        <dc:title>" + TITLE + "</dc:title>\n\
	<dc:language>en</dc:language>\n\
        <dc:rights>Public Domain</dc:rights>\n\
        <dc:creator opf:role=\"aut\">disciple961</dc:creator>\n\
        <dc:publisher>baka-tsuki.com</dc:publisher>\n\
        <dc:identifier id=\"BookID\" opf:scheme=\"UUID\">015ffaec-9340-42f8-b163-a0c5ab7d0611</dc:identifier>\n\
        <meta name=\"Sigil version\" content=\"0.2.4\"/>\n\
    </metadata>\n\
    <manifest>\n\
    <item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n\
")
  ### content.opf still open. get_images procedure adds lines if pictures are present ###

  ### Start the chapter file with novel text, add title ###
  chapterFile = open(EPUB_CHAPTER_FILE, 'w', encoding='utf-8')
  chapterFile.write("\
<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n\
  \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n\
\n\
<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\
<head>\n\
  <title>" + TITLE + "</title>\n\
  <link rel=\"stylesheet\" href=\"../Styles/stylesheet.css\" type=\"text/css\" />\n\
  <link rel=\"stylesheet\" type=\"application/vnd.adobe-page-template+xml\" href=\"../Styles/page-template.xpgt\" />\n\
</head>\n\
\n\
<body>\n\
  <div>\n\
")
  ### chapter file still open ###

  for line in novel.splitlines():
    if HEADER == 0:
      if re.search(r'<h.> <span class="mw-headline"', line) is not None:
        HEADER = 1
    if WIKI_TABLE == 0:
      if re.search(r'<table class="wikitable"', line) is not None:
        WIKI_TABLE = 1
    if (HEADER == 1) and (WIKI_TABLE == 0):
      if re.search('src="/project/images/thumb/', line) is not None:
        #print (re.search('src="/project/images/thumb/', line))
        getImages(line, contentFile, chapterFile)
        ### if the line contains an image, the procedure adds the appropriate tags into the chapter.xhtml and content.opf files ###
      else:
        chapterFile.write(line)
        chapterFile.write("\n")
        ### if the line doesnt contain a picture, it contains text and will be added into the chapter.xhtml ###
    if re.search('</html>', line) is not None:
      EXP = 1

  ### end of the chapter file ###
  chapterFile.write("\n\
  </div>\n\
</body>\n\
</html>\n\
")
  chapterFile.close()
  ### end of the chapter file ###

  ### end of the content.opf file ###
  contentFile.write("\
        <item id=\"page-template.xpgt\" href=\"Styles/page-template.xpgt\" media-type=\"application/vnd.adobe-page-template+xml\"/>\n\
        <item id=\"stylesheet.css\" href=\"Styles/stylesheet.css\" media-type=\"text/css\"/>\n\
        <item id=\"chap01.xhtml\" href=\"Text/chap01.xhtml\" media-type=\"application/xhtml+xml\"/>\n\
        <item id=\"title_page.xhtml\" href=\"Text/title_page.xhtml\" media-type=\"application/xhtml+xml\"/>\n\
    </manifest>\n\
    <spine toc=\"ncx\">\n\
        <itemref idref=\"title_page.xhtml\"/>\n\
        <itemref idref=\"chap01.xhtml\"/>\n\
        <itemref idref=\"chap02.xhtml\"/>\n\
    </spine>\n\
</package>\n\
")
  contentFile.close()
  ### end of the content.opf file ###


  ### adds title to the table of contents ###
  tocFile = open(EPUB_TOC_FILE, 'w')
  tocFile.write("\
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\"\n\
   \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n\
\n\
<ncx xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\">\n\
    <head>\n\
        <meta name=\"dtb:uid\" content=\"015ffaec-9340-42f8-b163-a0c5ab7d0611\"/>\n\
        <meta name=\"dtb:depth\" content=\"1\"/>\n\
        <meta name=\"dtb:totalPageCount\" content=\"0\"/>\n\
        <meta name=\"dtb:maxPageNumber\" content=\"0\"/>\n\
    </head>\n\
    <docTitle>\n\
        <text>" + TITLE + "</text>\n\
    </docTitle>\n\
    <navMap>\n\
        <navPoint id=\"navPoint-1\" playOrder=\"1\">\n\
            <navLabel>\n\
                <text>" + TITLE + "</text>\n\
            </navLabel>\n\
            <content src=\"Text/title_page.xhtml\"/>\n\
        </navPoint>\n\
        <navPoint id=\"navPoint-2\" playOrder=\"2\">\n\
            <navLabel>\n\
                <text>" + TITLE + "</text>\n\
            </navLabel>\n\
            <content src=\"Text/chap01.xhtml\"/>\n\
        </navPoint>\n\
    </navMap>\n\
</ncx>\n\
")
  tocFile.close()

  # remove the Baka-Tsuki suffix from the title
  TITLE = TITLE.replace('- Baka-Tsuki', '')

  ### adds title to the title page ###
  titleFile = open(EPUB_TITLE_PAGE_FILE, 'w', encoding='utf-8')
  titleFile.write("\
<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n\
  \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n\
\n\
<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\
<head>\n\
  <title>" + TITLE + "</title>\n\
  <link rel=\"stylesheet\" href=\"../Styles/stylesheet.css\" type=\"text/css\" />\n\
  <link rel=\"stylesheet\" type=\"application/vnd.adobe-page-template+xml\" href=\"../Styles/page-template.xpgt\" />\n\
</head>\n\
\n\
<body>\n\
  <div>\n\
    <h2 id=\"heading_id_2\">" + TITLE + "</h2>\n\
    <h2 id=\"heading_id_3\">Baka-Tsuki</h2>\n\
  </div>\n\
</body>\n\
</html>\n\
")
  titleFile.close()





  mimeFile = open(EPUB_MIME_FILE, 'w')
  mimeFile.write(EPUB_MIME)
  mimeFile.close()


  containerFile = open(EPUB_CONTAINER_FILE, 'w')
  containerFile.write("<?xml version=\"1.0\"?>\n \
<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n \
   <rootfiles>\n \
        <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n \
   </rootfiles>\n \
</container>")
  containerFile.close()

  stylesheetFile = open(EPUB_STYLESHEET_FILE, 'w')
  stylesheetFile.write("\
/* Style Sheet */\n\
/* This defines styles and classes used in the book */\n\
body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; }\n\
pre { font-size: x-small; }\n\
h1 { text-align: center; }\n\
h2 { text-align: center; }\n\
h3 { text-align: center; }\n\
h4 { text-align: center; }\n\
h5 { text-align: center; }\n\
h6 { text-align: center; }\n\
.CI {\n\
    text-align:center;\n\
    margin-top:0px;\n\
    margin-bottom:0px;\n\
    padding:0px;\n\
    }\n\
.center   {text-align: center;}\n\
.smcap    {font-variant: small-caps;}\n\
.u        {text-decoration: underline;}\n\
.bold     {font-weight: bold;}\n\
")
  stylesheetFile.close()

  templateFile = open(EPUB_STYLE_TEMPLATE_FILE, 'w')
  templateFile.write("\
<ade:template xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:ade=\"http://ns.adobe.com/2006/ade\"\n\
		 xmlns:fo=\"http://www.w3.org/1999/XSL/Format\">\n\
\n\
  <fo:layout-master-set>\n\
   <fo:simple-page-master master-name=\"single_column\">\n\
		<fo:region-body margin-bottom=\"3pt\" margin-top=\"0.5em\" margin-left=\"3pt\" margin-right=\"3pt\"/>\n\
    </fo:simple-page-master>\n\
  \n\
    <fo:simple-page-master master-name=\"single_column_head\">\n\
		<fo:region-before extent=\"8.3em\"/>\n\
		<fo:region-body margin-bottom=\"3pt\" margin-top=\"6em\" margin-left=\"3pt\" margin-right=\"3pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"two_column\"	margin-bottom=\"0.5em\" margin-top=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-body column-count=\"2\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"two_column_head\" margin-bottom=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-before extent=\"8.3em\"/>\n\
		<fo:region-body column-count=\"2\" margin-top=\"6em\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"three_column\" margin-bottom=\"0.5em\" margin-top=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-body column-count=\"3\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"three_column_head\" margin-bottom=\"0.5em\" margin-top=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-before extent=\"8.3em\"/>\n\
		<fo:region-body column-count=\"3\" margin-top=\"6em\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:page-sequence-master>\n\
        <fo:repeatable-page-master-alternatives>\n\
            <fo:conditional-page-master-reference master-reference=\"three_column_head\" page-position=\"first\" ade:min-page-width=\"80em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"three_column\" ade:min-page-width=\"80em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"two_column_head\" page-position=\"first\" ade:min-page-width=\"50em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"two_column\" ade:min-page-width=\"50em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"single_column_head\" page-position=\"first\" />\n\
            <fo:conditional-page-master-reference master-reference=\"single_column\"/>\n\
        </fo:repeatable-page-master-alternatives>\n\
    </fo:page-sequence-master>\n\
\n\
  </fo:layout-master-set>\n\
\n\
  <ade:style>\n\
    <ade:styling-rule selector=\".title_box\" display=\"adobe-other-region\" adobe-region=\"xsl-region-before\"/>\n\
  </ade:style>\n\
\n\
</ade:template>\n\
")
  templateFile.close()

Example 37

Project: mieru
Source File: bakatsuki.py
View license
def processHTML(fullName, folderName):
  """Process the raw HTML from Bakatsuki to a form usable for making EPUBs"""

  EXP = 0 #end of site
  HEADER = 0 #start of novel text
  WIKI_TABLE = 0 #end of novel text

  print("opening novel: %s" % fullName)
  print("directory: %s" % folderName)
  filename = folderName + ".html"
  print("filename: %s" % filename)
  # folder name is fullName
  # filename is fullName + .html
  chapterFile = open(os.path.join(folderName,filename), "rt", encoding='utf-8')
  novel = chapterFile.read()
  chapterFile.close()
  # get title of the novel
  TITLE = getTitle(novel)

  # switch to the EPUB content folder
  toplevelFolder = os.path.join(folderName, EPUB_TOPLEVEL_FOLDER)
  assurePath(toplevelFolder)
  print("switching to: %s" % toplevelFolder)
  os.chdir(toplevelFolder)
  # make sure all the needed folders exist
  assurePath(EPUB_METADATA_FOLDER)
  assurePath(EPUB_DATA_FOLDER)
  assurePath(EPUB_IMAGE_FOLDER)
  assurePath(EPUB_STYLE_FOLDER)
  assurePath(EPUB_TEXT_FOLDER)

  ### Start the content.opf file, add title ###
  contentFile = open(EPUB_CONTENT_FILE, 'w')
  contentFile.write("\
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<package xmlns=\"http://www.idpf.org/2007/opf\" unique-identifier=\"BookID\" version=\"2.0\">\n\
    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n\
        <dc:title>" + TITLE + "</dc:title>\n\
	<dc:language>en</dc:language>\n\
        <dc:rights>Public Domain</dc:rights>\n\
        <dc:creator opf:role=\"aut\">disciple961</dc:creator>\n\
        <dc:publisher>baka-tsuki.com</dc:publisher>\n\
        <dc:identifier id=\"BookID\" opf:scheme=\"UUID\">015ffaec-9340-42f8-b163-a0c5ab7d0611</dc:identifier>\n\
        <meta name=\"Sigil version\" content=\"0.2.4\"/>\n\
    </metadata>\n\
    <manifest>\n\
    <item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n\
")
  ### content.opf still open. get_images procedure adds lines if pictures are present ###

  ### Start the chapter file with novel text, add title ###
  chapterFile = open(EPUB_CHAPTER_FILE, 'w', encoding='utf-8')
  chapterFile.write("\
<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n\
  \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n\
\n\
<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\
<head>\n\
  <title>" + TITLE + "</title>\n\
  <link rel=\"stylesheet\" href=\"../Styles/stylesheet.css\" type=\"text/css\" />\n\
  <link rel=\"stylesheet\" type=\"application/vnd.adobe-page-template+xml\" href=\"../Styles/page-template.xpgt\" />\n\
</head>\n\
\n\
<body>\n\
  <div>\n\
")
  ### chapter file still open ###

  for line in novel.splitlines():
    if HEADER == 0:
      if re.search(r'<h.> <span class="mw-headline"', line) is not None:
        HEADER = 1
    if WIKI_TABLE == 0:
      if re.search(r'<table class="wikitable"', line) is not None:
        WIKI_TABLE = 1
    if (HEADER == 1) and (WIKI_TABLE == 0):
      if re.search('src="/project/images/thumb/', line) is not None:
        #print (re.search('src="/project/images/thumb/', line))
        getImages(line, contentFile, chapterFile)
        ### if the line contains an image, the procedure adds the appropriate tags into the chapter.xhtml and content.opf files ###
      else:
        chapterFile.write(line)
        chapterFile.write("\n")
        ### if the line doesnt contain a picture, it contains text and will be added into the chapter.xhtml ###
    if re.search('</html>', line) is not None:
      EXP = 1

  ### end of the chapter file ###
  chapterFile.write("\n\
  </div>\n\
</body>\n\
</html>\n\
")
  chapterFile.close()
  ### end of the chapter file ###

  ### end of the content.opf file ###
  contentFile.write("\
        <item id=\"page-template.xpgt\" href=\"Styles/page-template.xpgt\" media-type=\"application/vnd.adobe-page-template+xml\"/>\n\
        <item id=\"stylesheet.css\" href=\"Styles/stylesheet.css\" media-type=\"text/css\"/>\n\
        <item id=\"chap01.xhtml\" href=\"Text/chap01.xhtml\" media-type=\"application/xhtml+xml\"/>\n\
        <item id=\"title_page.xhtml\" href=\"Text/title_page.xhtml\" media-type=\"application/xhtml+xml\"/>\n\
    </manifest>\n\
    <spine toc=\"ncx\">\n\
        <itemref idref=\"title_page.xhtml\"/>\n\
        <itemref idref=\"chap01.xhtml\"/>\n\
        <itemref idref=\"chap02.xhtml\"/>\n\
    </spine>\n\
</package>\n\
")
  contentFile.close()
  ### end of the content.opf file ###


  ### adds title to the table of contents ###
  tocFile = open(EPUB_TOC_FILE, 'w')
  tocFile.write("\
<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\"\n\
   \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n\
\n\
<ncx xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\">\n\
    <head>\n\
        <meta name=\"dtb:uid\" content=\"015ffaec-9340-42f8-b163-a0c5ab7d0611\"/>\n\
        <meta name=\"dtb:depth\" content=\"1\"/>\n\
        <meta name=\"dtb:totalPageCount\" content=\"0\"/>\n\
        <meta name=\"dtb:maxPageNumber\" content=\"0\"/>\n\
    </head>\n\
    <docTitle>\n\
        <text>" + TITLE + "</text>\n\
    </docTitle>\n\
    <navMap>\n\
        <navPoint id=\"navPoint-1\" playOrder=\"1\">\n\
            <navLabel>\n\
                <text>" + TITLE + "</text>\n\
            </navLabel>\n\
            <content src=\"Text/title_page.xhtml\"/>\n\
        </navPoint>\n\
        <navPoint id=\"navPoint-2\" playOrder=\"2\">\n\
            <navLabel>\n\
                <text>" + TITLE + "</text>\n\
            </navLabel>\n\
            <content src=\"Text/chap01.xhtml\"/>\n\
        </navPoint>\n\
    </navMap>\n\
</ncx>\n\
")
  tocFile.close()

  # remove the Baka-Tsuki suffix from the title
  TITLE = TITLE.replace('- Baka-Tsuki', '')

  ### adds title to the title page ###
  titleFile = open(EPUB_TITLE_PAGE_FILE, 'w', encoding='utf-8')
  titleFile.write("\
<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n\
  \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n\
\n\
<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\
<head>\n\
  <title>" + TITLE + "</title>\n\
  <link rel=\"stylesheet\" href=\"../Styles/stylesheet.css\" type=\"text/css\" />\n\
  <link rel=\"stylesheet\" type=\"application/vnd.adobe-page-template+xml\" href=\"../Styles/page-template.xpgt\" />\n\
</head>\n\
\n\
<body>\n\
  <div>\n\
    <h2 id=\"heading_id_2\">" + TITLE + "</h2>\n\
    <h2 id=\"heading_id_3\">Baka-Tsuki</h2>\n\
  </div>\n\
</body>\n\
</html>\n\
")
  titleFile.close()





  mimeFile = open(EPUB_MIME_FILE, 'w')
  mimeFile.write(EPUB_MIME)
  mimeFile.close()


  containerFile = open(EPUB_CONTAINER_FILE, 'w')
  containerFile.write("<?xml version=\"1.0\"?>\n \
<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n \
   <rootfiles>\n \
        <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n \
   </rootfiles>\n \
</container>")
  containerFile.close()

  stylesheetFile = open(EPUB_STYLESHEET_FILE, 'w')
  stylesheetFile.write("\
/* Style Sheet */\n\
/* This defines styles and classes used in the book */\n\
body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; }\n\
pre { font-size: x-small; }\n\
h1 { text-align: center; }\n\
h2 { text-align: center; }\n\
h3 { text-align: center; }\n\
h4 { text-align: center; }\n\
h5 { text-align: center; }\n\
h6 { text-align: center; }\n\
.CI {\n\
    text-align:center;\n\
    margin-top:0px;\n\
    margin-bottom:0px;\n\
    padding:0px;\n\
    }\n\
.center   {text-align: center;}\n\
.smcap    {font-variant: small-caps;}\n\
.u        {text-decoration: underline;}\n\
.bold     {font-weight: bold;}\n\
")
  stylesheetFile.close()

  templateFile = open(EPUB_STYLE_TEMPLATE_FILE, 'w')
  templateFile.write("\
<ade:template xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:ade=\"http://ns.adobe.com/2006/ade\"\n\
		 xmlns:fo=\"http://www.w3.org/1999/XSL/Format\">\n\
\n\
  <fo:layout-master-set>\n\
   <fo:simple-page-master master-name=\"single_column\">\n\
		<fo:region-body margin-bottom=\"3pt\" margin-top=\"0.5em\" margin-left=\"3pt\" margin-right=\"3pt\"/>\n\
    </fo:simple-page-master>\n\
  \n\
    <fo:simple-page-master master-name=\"single_column_head\">\n\
		<fo:region-before extent=\"8.3em\"/>\n\
		<fo:region-body margin-bottom=\"3pt\" margin-top=\"6em\" margin-left=\"3pt\" margin-right=\"3pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"two_column\"	margin-bottom=\"0.5em\" margin-top=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-body column-count=\"2\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"two_column_head\" margin-bottom=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-before extent=\"8.3em\"/>\n\
		<fo:region-body column-count=\"2\" margin-top=\"6em\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"three_column\" margin-bottom=\"0.5em\" margin-top=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-body column-count=\"3\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:simple-page-master master-name=\"three_column_head\" margin-bottom=\"0.5em\" margin-top=\"0.5em\" margin-left=\"0.5em\" margin-right=\"0.5em\">\n\
		<fo:region-before extent=\"8.3em\"/>\n\
		<fo:region-body column-count=\"3\" margin-top=\"6em\" column-gap=\"10pt\"/>\n\
    </fo:simple-page-master>\n\
\n\
    <fo:page-sequence-master>\n\
        <fo:repeatable-page-master-alternatives>\n\
            <fo:conditional-page-master-reference master-reference=\"three_column_head\" page-position=\"first\" ade:min-page-width=\"80em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"three_column\" ade:min-page-width=\"80em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"two_column_head\" page-position=\"first\" ade:min-page-width=\"50em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"two_column\" ade:min-page-width=\"50em\"/>\n\
            <fo:conditional-page-master-reference master-reference=\"single_column_head\" page-position=\"first\" />\n\
            <fo:conditional-page-master-reference master-reference=\"single_column\"/>\n\
        </fo:repeatable-page-master-alternatives>\n\
    </fo:page-sequence-master>\n\
\n\
  </fo:layout-master-set>\n\
\n\
  <ade:style>\n\
    <ade:styling-rule selector=\".title_box\" display=\"adobe-other-region\" adobe-region=\"xsl-region-before\"/>\n\
  </ade:style>\n\
\n\
</ade:template>\n\
")
  templateFile.close()

Example 38

Project: mantaray
Source File: volatility_mr.py
View license
def volatility_mr(case_number, root_folder_path,  evidence, selected_profile, selected_plugin, selected_plugin_descr, complete_plugin_list):

	###Debug testing code###
	#print("The case_name is: " + case_number)
	#print("The output folder is: " + root_folder_path)
	#print("The evidence to process is: " + evidence)
	
	#create output folder path
	folder_path = root_folder_path + "/" + "Volatility"
	check_for_folder(folder_path, "NONE")
	
	#open a log file for output
	log_file = folder_path + "/Volatility_logfile.txt"
	outfile = open(log_file, 'wt+')

	Image_Path = evidence

	#add quotes to image path in case of spaces
	quoted_path = "'" + Image_Path + "'"

	#See Volatility Commands reference data (/usr/share/mantaray/docs/VolatilityUsage23.rst) for more information\n

	#start reporting lists####Still needs development; not being used at this time...
	win_plugins_complete = []
	win_plugins_not_supported = []
	win_plugins_skipped = []
	win_plugins_error = []

	###debug printing###
	#print("This is selected_plugin type:\n",type(selected_plugin))
	#print("This is selected_plugin:\n",selected_plugin)
	#print("This is selected_plugin_descr:\n",selected_plugin_descr)

	#print banner - MR
	print("\nMantaRay > " + version)
	print("mantarayforensics.com/forums/")
	print("[email protected]")
	print("github.com/mantarayforensics/mantaray\n")

	#print banner - Vol
	print("Volatility v2.4")
	print("volatilityfoundation.org")
	print("volatility-labs.blogspot.com")
	print("github.com/volatilityfoundation/volatility\n")

	print("Processing requested plugins:")
	
	#run selected plugins
	for plugin in selected_plugin:

		if plugin in suppress_list:
			num_index = complete_plugin_list.index(plugin)
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			print("\nRunning " + plugin + "...")
			print(descr + "...")
			print("The plugin " + plugin + " is not supported...")
			print("This plugin has advanced features.  Run manually...")
			outfile.write("The plugin " + plugin + " is not supported...\n")
			outfile.write("This plugin has advanced features.  Run manually...\n\n")
			win_plugins_not_supported.append(plugin)
			continue

		if plugin in plugin_not_currently_supported:
			num_index = complete_plugin_list.index(plugin)
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			print("\nRunning " + plugin + "...")
			print(descr + "...")
			print("The plugin " + plugin + " is not currently supported...")
			print("Support may be added in a future release...")
			print("Check GitHub for updates...")
			print("github.com/mantarayforensics/mantaray")
			print("Currently running:",version)
			outfile.write("The plugin " + plugin + " is not currently supported.\n")
			outfile.write("Support may be added in a future release.\n")
			outfile.write("Check GitHub for updates...\n")
			outfile.write("github.com/mantarayforensics/mantaray\n")
			outfile.write("The plugin was skipped.\n\n")
			win_plugins_skipped.append(plugin)
			continue

		if plugin == 'pstotal':
			num_index = complete_plugin_list.index('pstotal')
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			plugin = 'pstotal.dot.full-graph'
			print("\nRunning pstotal...")

			
			pstotal_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path \
			+ " pstotal --output=dot > " + "'" + folder_path + \
			"/pstotal.dot.full-graph.txt" + "'"
			print("Processing DOT output for full process graph...")
			output = Popen([pstotal_command], shell=True, stderr=PIPE)
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)
			
			pstotal_hidden_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path \
			+ " pstotal --output=dot -S -C > " + "'" + folder_path + \
			"/pstotal.dot.hidden-only-graph.txt" + "'"
			print("Processing DOT output for only hidden process graph...")
			output = Popen([pstotal_hidden_command], shell=True, stderr=PIPE)
			plugin = 'pstotal.dot.hidden-only-graph'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			pstotal_text_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path + \
			" pstotal --output=text > " + "'" + folder_path + \
			"/pstotal.text-only.txt" + "'"
			print("Processing text output for hidden processes...")
			output = Popen([pstotal_text_command], shell=True, stderr=PIPE)
			plugin = 'pstotal.text-only'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			pstotal_graphviz_command1 = "dot -Tpng " + "'" + folder_path + "/pstotal.dot.full-graph.txt" \
			+ "'" + " -o " + "'" + folder_path + \
			"/pstotal.dot.full-graph.png" + "'"
			print("Running Graphviz to create full graph (PNG)...")
			output = Popen([pstotal_graphviz_command1], shell=True, stderr=PIPE)
			plugin = 'pstotal.dot.full-graph'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			pstotal_graphviz_command2 = "dot -Tpng " + "'" + folder_path + "/pstotal.dot.hidden-only-graph.txt" \
			+ "'" + " -o " + "'" + folder_path + \
			"/pstotal.dot.hidden-only-graph.png" + "'"
			print("Running Graphviz to create hidden graph (PNG)...")
			output = Popen([pstotal_graphviz_command2], shell=True, stderr=PIPE)
			plugin = 'pstotal.dot.hidden-only-graph'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			plugin = 'pstotal'
			win_plugins_complete.append(plugin)
			continue

		xp_2003_only_plugins = ['connections', 'evtlogs']

		if plugin in xp_2003_only_plugins: 
			if re.search('XP', selected_profile):
				print("\nRunning [Windows XP and 2003 Only] plugin...")

			elif re.search('2003', selected_profile):
				print("\nRunning [Windows XP and 2003 Only] plugin...")

			else:
				continue

		xp_only_plugins = ['sockets','sockscan']

		if plugin in xp_only_plugins: 
			if re.search('XP', selected_profile):
				print("\nRunning [Windows XP Only] plugin...")
			else:
				continue

		vista_and_newer_only_plugins = ['netscan','pooltracker']

		if plugin in vista_and_newer_only_plugins:
			os_support = ['Vista','Win7','Win8']  
			for os_type in os_support:
				if re.search(os_type, selected_profile):
					print("\nRunning Vista and newer only plugin...")
			else:
				continue

		####ADD NEW MODULE####
		#elif plugin == <plugin name>:	
		#	print("\nRunning " + plugin + "...")
		#	<plugin name>_command = "vol.py -f " + quoted_path + plugin + " > " \
		#	+ "'" + folder_path + "/<plugin name>.txt"+"'"
		#	output = Popen([<plugin name>_command], shell=True, stderr=PIPE)
		#	error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)
		#	win_plugins_complete.append('devicetree')

		try:
			num_index = complete_plugin_list.index(plugin)
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			print("\nRunning " + plugin + "...")
			print(descr)					
			#print("This is plugin:\n",plugin)	
			processing_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path + " " + plugin + " > " \
			+ "'" + folder_path + "/" + plugin + ".txt"+"'"
			#print("Vol Processing Command:",processing_command)
			output = Popen([processing_command], shell=True, stderr=PIPE)
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)
			win_plugins_complete.append(plugin)

		except OSError as error:
			print("The plugin " + pluin + "experienced an OSError and failed, see log file...\n")
			outfile.write("The plugin " + plugin + " experienced an OSError and failed.\n")
			outfile.write(error + "\n")
		
	#close outfile
	outfile.close()

	#change permissions (option)
	#chmod_command = "chmod -R 777 " + root_folder_path
	#subprocess.call([chmod_command], shell=True)

	#change dir into output folder
	os.chdir(folder_path)

	#run text files through unix2dos
	for root, dirs, files in os.walk(folder_path):
		for filenames in files:
			#get file extension
			fileName, fileExtension = os.path.splitext(filenames)
			if(fileExtension.lower() == ".txt"):
				full_path = os.path.join(root,filenames)
				quoted_full_path = "'" +full_path+"'"
				print("Running Unix2dos against file: " + filenames)
				unix2dos_command = "sudo unix2dos " + quoted_full_path
				subprocess.call([unix2dos_command], shell=True)

Example 39

Project: mantaray
Source File: volatility_mr.py
View license
def volatility_mr(case_number, root_folder_path,  evidence, selected_profile, selected_plugin, selected_plugin_descr, complete_plugin_list):

	###Debug testing code###
	#print("The case_name is: " + case_number)
	#print("The output folder is: " + root_folder_path)
	#print("The evidence to process is: " + evidence)
	
	#create output folder path
	folder_path = root_folder_path + "/" + "Volatility"
	check_for_folder(folder_path, "NONE")
	
	#open a log file for output
	log_file = folder_path + "/Volatility_logfile.txt"
	outfile = open(log_file, 'wt+')

	Image_Path = evidence

	#add quotes to image path in case of spaces
	quoted_path = "'" + Image_Path + "'"

	#See Volatility Commands reference data (/usr/share/mantaray/docs/VolatilityUsage23.rst) for more information\n

	#start reporting lists####Still needs development; not being used at this time...
	win_plugins_complete = []
	win_plugins_not_supported = []
	win_plugins_skipped = []
	win_plugins_error = []

	###debug printing###
	#print("This is selected_plugin type:\n",type(selected_plugin))
	#print("This is selected_plugin:\n",selected_plugin)
	#print("This is selected_plugin_descr:\n",selected_plugin_descr)

	#print banner - MR
	print("\nMantaRay > " + version)
	print("mantarayforensics.com/forums/")
	print("[email protected]")
	print("github.com/mantarayforensics/mantaray\n")

	#print banner - Vol
	print("Volatility v2.4")
	print("volatilityfoundation.org")
	print("volatility-labs.blogspot.com")
	print("github.com/volatilityfoundation/volatility\n")

	print("Processing requested plugins:")
	
	#run selected plugins
	for plugin in selected_plugin:

		if plugin in suppress_list:
			num_index = complete_plugin_list.index(plugin)
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			print("\nRunning " + plugin + "...")
			print(descr + "...")
			print("The plugin " + plugin + " is not supported...")
			print("This plugin has advanced features.  Run manually...")
			outfile.write("The plugin " + plugin + " is not supported...\n")
			outfile.write("This plugin has advanced features.  Run manually...\n\n")
			win_plugins_not_supported.append(plugin)
			continue

		if plugin in plugin_not_currently_supported:
			num_index = complete_plugin_list.index(plugin)
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			print("\nRunning " + plugin + "...")
			print(descr + "...")
			print("The plugin " + plugin + " is not currently supported...")
			print("Support may be added in a future release...")
			print("Check GitHub for updates...")
			print("github.com/mantarayforensics/mantaray")
			print("Currently running:",version)
			outfile.write("The plugin " + plugin + " is not currently supported.\n")
			outfile.write("Support may be added in a future release.\n")
			outfile.write("Check GitHub for updates...\n")
			outfile.write("github.com/mantarayforensics/mantaray\n")
			outfile.write("The plugin was skipped.\n\n")
			win_plugins_skipped.append(plugin)
			continue

		if plugin == 'pstotal':
			num_index = complete_plugin_list.index('pstotal')
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			plugin = 'pstotal.dot.full-graph'
			print("\nRunning pstotal...")

			
			pstotal_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path \
			+ " pstotal --output=dot > " + "'" + folder_path + \
			"/pstotal.dot.full-graph.txt" + "'"
			print("Processing DOT output for full process graph...")
			output = Popen([pstotal_command], shell=True, stderr=PIPE)
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)
			
			pstotal_hidden_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path \
			+ " pstotal --output=dot -S -C > " + "'" + folder_path + \
			"/pstotal.dot.hidden-only-graph.txt" + "'"
			print("Processing DOT output for only hidden process graph...")
			output = Popen([pstotal_hidden_command], shell=True, stderr=PIPE)
			plugin = 'pstotal.dot.hidden-only-graph'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			pstotal_text_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path + \
			" pstotal --output=text > " + "'" + folder_path + \
			"/pstotal.text-only.txt" + "'"
			print("Processing text output for hidden processes...")
			output = Popen([pstotal_text_command], shell=True, stderr=PIPE)
			plugin = 'pstotal.text-only'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			pstotal_graphviz_command1 = "dot -Tpng " + "'" + folder_path + "/pstotal.dot.full-graph.txt" \
			+ "'" + " -o " + "'" + folder_path + \
			"/pstotal.dot.full-graph.png" + "'"
			print("Running Graphviz to create full graph (PNG)...")
			output = Popen([pstotal_graphviz_command1], shell=True, stderr=PIPE)
			plugin = 'pstotal.dot.full-graph'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			pstotal_graphviz_command2 = "dot -Tpng " + "'" + folder_path + "/pstotal.dot.hidden-only-graph.txt" \
			+ "'" + " -o " + "'" + folder_path + \
			"/pstotal.dot.hidden-only-graph.png" + "'"
			print("Running Graphviz to create hidden graph (PNG)...")
			output = Popen([pstotal_graphviz_command2], shell=True, stderr=PIPE)
			plugin = 'pstotal.dot.hidden-only-graph'
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)

			plugin = 'pstotal'
			win_plugins_complete.append(plugin)
			continue

		xp_2003_only_plugins = ['connections', 'evtlogs']

		if plugin in xp_2003_only_plugins: 
			if re.search('XP', selected_profile):
				print("\nRunning [Windows XP and 2003 Only] plugin...")

			elif re.search('2003', selected_profile):
				print("\nRunning [Windows XP and 2003 Only] plugin...")

			else:
				continue

		xp_only_plugins = ['sockets','sockscan']

		if plugin in xp_only_plugins: 
			if re.search('XP', selected_profile):
				print("\nRunning [Windows XP Only] plugin...")
			else:
				continue

		vista_and_newer_only_plugins = ['netscan','pooltracker']

		if plugin in vista_and_newer_only_plugins:
			os_support = ['Vista','Win7','Win8']  
			for os_type in os_support:
				if re.search(os_type, selected_profile):
					print("\nRunning Vista and newer only plugin...")
			else:
				continue

		####ADD NEW MODULE####
		#elif plugin == <plugin name>:	
		#	print("\nRunning " + plugin + "...")
		#	<plugin name>_command = "vol.py -f " + quoted_path + plugin + " > " \
		#	+ "'" + folder_path + "/<plugin name>.txt"+"'"
		#	output = Popen([<plugin name>_command], shell=True, stderr=PIPE)
		#	error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)
		#	win_plugins_complete.append('devicetree')

		try:
			num_index = complete_plugin_list.index(plugin)
			#print("This is num_index:",num_index)
			descr = selected_plugin_descr[num_index]
			print("\nRunning " + plugin + "...")
			print(descr)					
			#print("This is plugin:\n",plugin)	
			processing_command = "vol.py --profile=" + selected_profile + " -f " + quoted_path + " " + plugin + " > " \
			+ "'" + folder_path + "/" + plugin + ".txt"+"'"
			#print("Vol Processing Command:",processing_command)
			output = Popen([processing_command], shell=True, stderr=PIPE)
			error_logging(outfile,folder_path,selected_profile,plugin,output,win_plugins_error)
			win_plugins_complete.append(plugin)

		except OSError as error:
			print("The plugin " + pluin + "experienced an OSError and failed, see log file...\n")
			outfile.write("The plugin " + plugin + " experienced an OSError and failed.\n")
			outfile.write(error + "\n")
		
	#close outfile
	outfile.close()

	#change permissions (option)
	#chmod_command = "chmod -R 777 " + root_folder_path
	#subprocess.call([chmod_command], shell=True)

	#change dir into output folder
	os.chdir(folder_path)

	#run text files through unix2dos
	for root, dirs, files in os.walk(folder_path):
		for filenames in files:
			#get file extension
			fileName, fileExtension = os.path.splitext(filenames)
			if(fileExtension.lower() == ".txt"):
				full_path = os.path.join(root,filenames)
				quoted_full_path = "'" +full_path+"'"
				print("Running Unix2dos against file: " + filenames)
				unix2dos_command = "sudo unix2dos " + quoted_full_path
				subprocess.call([unix2dos_command], shell=True)

Example 40

Project: econsensus
Source File: process_email.py
View license
    def _process_email(self, mail, verbosity): # pylint: disable=R0914
        logger = logging.getLogger('econsensus')

        #handle multipart mails, cycle through mail 
        #until find text type with a full payload.
        if mail.is_multipart():
            for message in mail.get_payload():
                if message.get_content_maintype() == 'text':
                    msg_string = self._strip_string(message.get_payload(), verbosity)
                    if msg_string:
                        break
        else:
            msg_string = self._strip_string(mail.get_payload(), verbosity)       
        
        if not msg_string:
            logger.error("[EMAIL REJECTED] From '%s' Reason: Email payload empty" % mail['From'])
            return
        
        #Must match email 'from' address to user
        from_match = re.search('([\w\-\.][email protected]\w[\w\-]+\.+[\w\-]+)', mail['From'])
        if from_match:
            self._print_if_verbose(verbosity, "Found email 'from' '%s'" % from_match.group(1))
            try:
                user = User.objects.get(email=from_match.group(1))
            except ObjectDoesNotExist:
                logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known User" \
                             % (mail['From'], from_match.group(1)))
                return
            except MultipleObjectsReturned:
                logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned several Users for id '%s'" \
                             % (mail['From'], from_match.group(1)))
                return
            self._print_if_verbose(verbosity, "Matched email to user '%s'" % user)
        else:
            logger.error("[EMAIL REJECTED] From '%s' Reason: Unrecognised email address format" % mail['From'])
            return
        
        #Must match email 'to' address to organization
        org_match = re.search('([\w\-\.]+)@\w[\w\-]+\.+[\w\-]+', mail['To'])
        if org_match:
            self._print_if_verbose(verbosity, "Found email 'to' '%s'" % org_match.group(1))
            try:
                organization = Organization.objects.get(slug=org_match.group(1))
            except ObjectDoesNotExist:
                logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known Organization" \
                             % (mail['From'], org_match.group(1)))
                return
            except MultipleObjectsReturned:
                logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned several Organizations for id '%s'" \
                             % (mail['From'], org_match.group(1)))
                return
            self._print_if_verbose(verbosity, "Matched email to organization '%s'" % organization.name)
        else:
            logger.error("[EMAIL REJECTED] From '%s' Reason: Couldn't pull Organization from '%s'" % (mail['From'], mail['To']))
            return

        #User must be a member of the Organization
        if organization not in Organization.active.get_for_user(user):
            self._print_if_verbose(verbosity, "User %s is not a member of Organization %s" % (user.username, organization.name))
            logger.error("[EMAIL REJECTED] From '%s' Reason: User '%s' is not a member of Organization '%s'" \
                         % (mail['From'], user.username, organization.name))
            return

        #Look for feedback types in the message body
        rating = Feedback.COMMENT_STATUS                    
        description = msg_string                        
        parse_feedback = re.match('(\w+)\s*:\s*([\s\S]*)', msg_string, re.IGNORECASE)
        if parse_feedback:
            description = parse_feedback.group(2)
            rating_match = re.match('question|danger|concerns|consent|comment', parse_feedback.group(1), re.IGNORECASE)
            if rating_match:
                self._print_if_verbose(verbosity, "Found feedback rating '%s'" % rating_match.group())
                rating = dict(Feedback.RATING_CHOICES).values().index(rating_match.group().lower())

        # Determine whether email is in reply to a notification
        subject_match = re.search('\[EC#(\d+)(?:\\\\(\d+)(?:\\\\(\d+))?)?\]', mail['Subject'])
        if subject_match:
            #Check that the user has the right to comment against the decision.
            if subject_match.group(1):
                self._print_if_verbose(verbosity, "Found decision id '%s' in Subject" % subject_match.group(1))
                try:
                    decision = Decision.objects.get(pk=subject_match.group(1))
                except ObjectDoesNotExist:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known Decision" \
                                 % (mail['From'], subject_match.group(1)))
                    return
                except MultipleObjectsReturned:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned several Decisions for id '%s'" \
                                 % (mail['From'], subject_match.group(1)))
                    return
                if user not in decision.organization.users.all():
                    logger.error("[EMAIL REJECTED] From '%s' Reason: User cannot reply to decision #%s because they are not a member of that organization." \
                                 % (mail['From'], subject_match.group(1)))
                    return
   
            #process comment or feedback against feedback
            if subject_match.group(2):
                self._print_if_verbose(verbosity, "Found feedback id '%s' in Subject" % subject_match.group(2))
                try:
                    feedback = Feedback.objects.get(pk=subject_match.group(2))
                except ObjectDoesNotExist:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known Feedback" \
                                 % (mail['From'], subject_match.group(2)))
                    return
                except MultipleObjectsReturned:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned more than one Feedback for id '%s'" \
                                 % (mail['From'], subject_match.group(2)))
                    return
                
                if parse_feedback and rating_match:
                    decision = feedback.decision
                    self._print_if_verbose(verbosity, "Creating feedback with rating '%s' and description '%s'." % (rating, description))
                    feedback = Feedback(author=user, decision=decision, rating=rating, description=description)
                    feedback.save()
                    logger.info("User '%s' added feedback via email to decision #%s" % (user, decision.id))
                    self._print_if_verbose(verbosity, "Found corresponding object '%s'" % decision.excerpt)
                else:
                    comment_text = msg_string                
                    self._print_if_verbose(verbosity, "Creating comment '%s'." % (comment_text))
                    comment = Comment(user=user,
                                     user_name=user.get_full_name(),
                                     user_email=user.email,
                                     comment = comment_text,
                                     content_object=feedback, 
                                     object_pk=feedback.id,
                                     content_type=ContentType.objects.get(app_label="publicweb", model="feedback"),
                                     submit_date = timezone.now(),
                                     site = Site.objects.get_current())
                    comment.save()
                    logger.info("User '%s' added comment via email to feedback #%s" % (user, feedback.id))
                    self._print_if_verbose(verbosity, "Found corresponding object '%s'" % feedback.description)
            
            #process feedback against decision
            elif subject_match.group(1):
                self._print_if_verbose(verbosity, "Creating feedback with rating '%s' and description '%s'." % (rating, description))
                feedback = Feedback(author=user, decision=decision, rating=rating, description=description)
                feedback.save()
                logger.info("User '%s' added feedback via email to decision #%s" % (user, decision.id))
                self._print_if_verbose(verbosity, "Found corresponding object '%s'" % decision.excerpt)
                
            else:
                self._print_if_verbose(verbosity, "No id found in message subject: %s" % mail['Subject'])                
                logger.error("[EMAIL REJECTED] From '%s' Reason: No id present." \
                             % mail['From'])
        # Email was not in reply to a notification so create a new proposal
        else:
            proposal_match = re.search('proposal', mail['Subject'], re.IGNORECASE)
            if proposal_match:
                decision = Decision(author=user, editor=user, status=Decision.PROPOSAL_STATUS, organization=organization, \
                                    description=msg_string)
                decision.save()
                self._print_if_verbose(verbosity, "User '%s' created decision #%s via email" % (user, decision.id))                
                logger.info("User '%s' created decision #%s via email" % (user, decision.id))

            else:
                logger.error("[EMAIL REJECTED] From '%s' Reason: Email was not in reply to a notification and body didn't contain keyword 'proposal'" \
                             % mail['From'])

Example 41

Project: econsensus
Source File: process_email.py
View license
    def _process_email(self, mail, verbosity): # pylint: disable=R0914
        logger = logging.getLogger('econsensus')

        #handle multipart mails, cycle through mail 
        #until find text type with a full payload.
        if mail.is_multipart():
            for message in mail.get_payload():
                if message.get_content_maintype() == 'text':
                    msg_string = self._strip_string(message.get_payload(), verbosity)
                    if msg_string:
                        break
        else:
            msg_string = self._strip_string(mail.get_payload(), verbosity)       
        
        if not msg_string:
            logger.error("[EMAIL REJECTED] From '%s' Reason: Email payload empty" % mail['From'])
            return
        
        #Must match email 'from' address to user
        from_match = re.search('([\w\-\.][email protected]\w[\w\-]+\.+[\w\-]+)', mail['From'])
        if from_match:
            self._print_if_verbose(verbosity, "Found email 'from' '%s'" % from_match.group(1))
            try:
                user = User.objects.get(email=from_match.group(1))
            except ObjectDoesNotExist:
                logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known User" \
                             % (mail['From'], from_match.group(1)))
                return
            except MultipleObjectsReturned:
                logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned several Users for id '%s'" \
                             % (mail['From'], from_match.group(1)))
                return
            self._print_if_verbose(verbosity, "Matched email to user '%s'" % user)
        else:
            logger.error("[EMAIL REJECTED] From '%s' Reason: Unrecognised email address format" % mail['From'])
            return
        
        #Must match email 'to' address to organization
        org_match = re.search('([\w\-\.]+)@\w[\w\-]+\.+[\w\-]+', mail['To'])
        if org_match:
            self._print_if_verbose(verbosity, "Found email 'to' '%s'" % org_match.group(1))
            try:
                organization = Organization.objects.get(slug=org_match.group(1))
            except ObjectDoesNotExist:
                logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known Organization" \
                             % (mail['From'], org_match.group(1)))
                return
            except MultipleObjectsReturned:
                logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned several Organizations for id '%s'" \
                             % (mail['From'], org_match.group(1)))
                return
            self._print_if_verbose(verbosity, "Matched email to organization '%s'" % organization.name)
        else:
            logger.error("[EMAIL REJECTED] From '%s' Reason: Couldn't pull Organization from '%s'" % (mail['From'], mail['To']))
            return

        #User must be a member of the Organization
        if organization not in Organization.active.get_for_user(user):
            self._print_if_verbose(verbosity, "User %s is not a member of Organization %s" % (user.username, organization.name))
            logger.error("[EMAIL REJECTED] From '%s' Reason: User '%s' is not a member of Organization '%s'" \
                         % (mail['From'], user.username, organization.name))
            return

        #Look for feedback types in the message body
        rating = Feedback.COMMENT_STATUS                    
        description = msg_string                        
        parse_feedback = re.match('(\w+)\s*:\s*([\s\S]*)', msg_string, re.IGNORECASE)
        if parse_feedback:
            description = parse_feedback.group(2)
            rating_match = re.match('question|danger|concerns|consent|comment', parse_feedback.group(1), re.IGNORECASE)
            if rating_match:
                self._print_if_verbose(verbosity, "Found feedback rating '%s'" % rating_match.group())
                rating = dict(Feedback.RATING_CHOICES).values().index(rating_match.group().lower())

        # Determine whether email is in reply to a notification
        subject_match = re.search('\[EC#(\d+)(?:\\\\(\d+)(?:\\\\(\d+))?)?\]', mail['Subject'])
        if subject_match:
            #Check that the user has the right to comment against the decision.
            if subject_match.group(1):
                self._print_if_verbose(verbosity, "Found decision id '%s' in Subject" % subject_match.group(1))
                try:
                    decision = Decision.objects.get(pk=subject_match.group(1))
                except ObjectDoesNotExist:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known Decision" \
                                 % (mail['From'], subject_match.group(1)))
                    return
                except MultipleObjectsReturned:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned several Decisions for id '%s'" \
                                 % (mail['From'], subject_match.group(1)))
                    return
                if user not in decision.organization.users.all():
                    logger.error("[EMAIL REJECTED] From '%s' Reason: User cannot reply to decision #%s because they are not a member of that organization." \
                                 % (mail['From'], subject_match.group(1)))
                    return
   
            #process comment or feedback against feedback
            if subject_match.group(2):
                self._print_if_verbose(verbosity, "Found feedback id '%s' in Subject" % subject_match.group(2))
                try:
                    feedback = Feedback.objects.get(pk=subject_match.group(2))
                except ObjectDoesNotExist:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: id '%s' does not correspond to any known Feedback" \
                                 % (mail['From'], subject_match.group(2)))
                    return
                except MultipleObjectsReturned:
                    logger.error("[EMAIL REJECTED] From '%s' Reason: Query returned more than one Feedback for id '%s'" \
                                 % (mail['From'], subject_match.group(2)))
                    return
                
                if parse_feedback and rating_match:
                    decision = feedback.decision
                    self._print_if_verbose(verbosity, "Creating feedback with rating '%s' and description '%s'." % (rating, description))
                    feedback = Feedback(author=user, decision=decision, rating=rating, description=description)
                    feedback.save()
                    logger.info("User '%s' added feedback via email to decision #%s" % (user, decision.id))
                    self._print_if_verbose(verbosity, "Found corresponding object '%s'" % decision.excerpt)
                else:
                    comment_text = msg_string                
                    self._print_if_verbose(verbosity, "Creating comment '%s'." % (comment_text))
                    comment = Comment(user=user,
                                     user_name=user.get_full_name(),
                                     user_email=user.email,
                                     comment = comment_text,
                                     content_object=feedback, 
                                     object_pk=feedback.id,
                                     content_type=ContentType.objects.get(app_label="publicweb", model="feedback"),
                                     submit_date = timezone.now(),
                                     site = Site.objects.get_current())
                    comment.save()
                    logger.info("User '%s' added comment via email to feedback #%s" % (user, feedback.id))
                    self._print_if_verbose(verbosity, "Found corresponding object '%s'" % feedback.description)
            
            #process feedback against decision
            elif subject_match.group(1):
                self._print_if_verbose(verbosity, "Creating feedback with rating '%s' and description '%s'." % (rating, description))
                feedback = Feedback(author=user, decision=decision, rating=rating, description=description)
                feedback.save()
                logger.info("User '%s' added feedback via email to decision #%s" % (user, decision.id))
                self._print_if_verbose(verbosity, "Found corresponding object '%s'" % decision.excerpt)
                
            else:
                self._print_if_verbose(verbosity, "No id found in message subject: %s" % mail['Subject'])                
                logger.error("[EMAIL REJECTED] From '%s' Reason: No id present." \
                             % mail['From'])
        # Email was not in reply to a notification so create a new proposal
        else:
            proposal_match = re.search('proposal', mail['Subject'], re.IGNORECASE)
            if proposal_match:
                decision = Decision(author=user, editor=user, status=Decision.PROPOSAL_STATUS, organization=organization, \
                                    description=msg_string)
                decision.save()
                self._print_if_verbose(verbosity, "User '%s' created decision #%s via email" % (user, decision.id))                
                logger.info("User '%s' created decision #%s via email" % (user, decision.id))

            else:
                logger.error("[EMAIL REJECTED] From '%s' Reason: Email was not in reply to a notification and body didn't contain keyword 'proposal'" \
                             % mail['From'])

Example 42

Project: CVSAnalY
Source File: GitParser.py
View license
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None and self.branch is not None:
                if self.branch.tail.svn_tag is None:  # Skip commits on svn tags
                    self.handler.commit(self.branch.tail.commit)

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
                self.commit.parents = parents
            git_commit = self.GitCommit(self.commit, parents)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit)
                    printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this decoration
                        if self.branch and self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged", (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit)
                            printdbg("Commit %s on stash", (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0]))

            if not branch and not self.branch:
                branch = self.GitBranch(self.GitBranch.LOCAL, "(no-branch)", git_commit)
                printdbg("Commit %s on unknown local branch '%s'", (self.commit.revision, branch.name))

            # This part of code looks wired at first time so here is a small description what it does:
            #
            # * self.branch is the branch to which the last inspected commit belonged to
            # * branch is the branch of the current parsed commit
            #
            # This check is only to find branches which are fully merged into a already analyzed branch
            #
            # For more detailed information see https://github.com/MetricsGrimoire/CVSAnalY/issues/64
            if branch is not None and self.branch is not None:
                # Detect empty branches.
                # Ideally, the head of a branch can't have children.
                # When this happens is because the branch is empty, so we just ignore such branch.
                if self.branch.is_my_parent(git_commit):
                    printout(
                        "Info: Branch '%s' will be ignored, because it was already merged in an active one.",
                        (branch.name,)
                    )
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s to current %s", (pending_tag,
                                                                                         self.branch.tail.commit.revision,
                                                                                         self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                if self.branch is not None:
                    self.branch.set_tail(git_commit)
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)
            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)
            return

        # Commit date
        match = self.patterns['date'].match(line)
        if match:
            self.commit.date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # Author date
        match = self.patterns['author_date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.author_date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.author_date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if len(type) > 1:
                # merge actions
                if 'M' in type:
                    type = 'M'
                else:
                    # ignore merge actions without 'M'
                    return

            action.type = type
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)
            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # This is a workaround for a bug in the GNOME Git migration
        # There are commits on tags not correctly detected like this one:
        # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f
        # We want to ignore commits on tags since it doesn't make any sense in Git
        if self.is_gnome:
            match = self.patterns['svn-tag'].match(line.strip())
            if match:
                printout("Warning: detected a commit on a svn tag: %s", (match.group(0),))
                tag = match.group(1)
                if self.commit.tags and tag in self.commit.tags:
                    # The commit will be ignored, so move the tag
                    # to the next (previous in history) commit
                    self.branch.tail.svn_tag = tag

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)

Example 43

Project: CVSAnalY
Source File: GitParser.py
View license
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None and self.branch is not None:
                if self.branch.tail.svn_tag is None:  # Skip commits on svn tags
                    self.handler.commit(self.branch.tail.commit)

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
                self.commit.parents = parents
            git_commit = self.GitCommit(self.commit, parents)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit)
                    printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this decoration
                        if self.branch and self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged", (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit)
                            printdbg("Commit %s on stash", (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0]))

            if not branch and not self.branch:
                branch = self.GitBranch(self.GitBranch.LOCAL, "(no-branch)", git_commit)
                printdbg("Commit %s on unknown local branch '%s'", (self.commit.revision, branch.name))

            # This part of code looks wired at first time so here is a small description what it does:
            #
            # * self.branch is the branch to which the last inspected commit belonged to
            # * branch is the branch of the current parsed commit
            #
            # This check is only to find branches which are fully merged into a already analyzed branch
            #
            # For more detailed information see https://github.com/MetricsGrimoire/CVSAnalY/issues/64
            if branch is not None and self.branch is not None:
                # Detect empty branches.
                # Ideally, the head of a branch can't have children.
                # When this happens is because the branch is empty, so we just ignore such branch.
                if self.branch.is_my_parent(git_commit):
                    printout(
                        "Info: Branch '%s' will be ignored, because it was already merged in an active one.",
                        (branch.name,)
                    )
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s to current %s", (pending_tag,
                                                                                         self.branch.tail.commit.revision,
                                                                                         self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                if self.branch is not None:
                    self.branch.set_tail(git_commit)
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)
            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)
            return

        # Commit date
        match = self.patterns['date'].match(line)
        if match:
            self.commit.date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # Author date
        match = self.patterns['author_date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.author_date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.author_date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if len(type) > 1:
                # merge actions
                if 'M' in type:
                    type = 'M'
                else:
                    # ignore merge actions without 'M'
                    return

            action.type = type
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)
            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # This is a workaround for a bug in the GNOME Git migration
        # There are commits on tags not correctly detected like this one:
        # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f
        # We want to ignore commits on tags since it doesn't make any sense in Git
        if self.is_gnome:
            match = self.patterns['svn-tag'].match(line.strip())
            if match:
                printout("Warning: detected a commit on a svn tag: %s", (match.group(0),))
                tag = match.group(1)
                if self.commit.tags and tag in self.commit.tags:
                    # The commit will be ignored, so move the tag
                    # to the next (previous in history) commit
                    self.branch.tail.svn_tag = tag

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)

Example 44

Project: socorro
Source File: Dibbler.py
View license
    def found_terminator(self):
        """Asynchat override."""
        # Parse the HTTP request.
        requestLine, headers = (self._request+'\r\n').split('\r\n', 1)
        try:
            method, url, version = requestLine.strip().split()
        except ValueError:
            self.writeError(400, "Malformed request: '%s'" % requestLine)
            self.close_when_done()
            return

        # Parse the URL, and deal with POST vs. GET requests.
        method = method.upper()
        unused, unused, path, unused, query, unused = urlparse.urlparse(url)
        cgiParams = cgi.parse_qs(query, keep_blank_values=True)
        if self.get_terminator() == '\r\n\r\n' and method == 'POST':
            # We need to read the body - set a numeric async_chat terminator
            # equal to the Content-Length.
            match = re.search(r'(?i)content-length:\s*(\d+)', headers)
            contentLength = int(match.group(1))
            if contentLength > 0:
                self.set_terminator(contentLength)
                self._request = self._request + '\r\n\r\n'
                return

        # Have we just read the body of a POSTed request?  Decode the body,
        # which will contain parameters and possibly uploaded files.
        if type(self.get_terminator()) is type(1):
            self.set_terminator('\r\n\r\n')
            body = self._request.split('\r\n\r\n', 1)[1]
            match = re.search(r'(?i)content-type:\s*([^\r\n]+)', headers)
            contentTypeHeader = match.group(1)
            contentType, pdict = cgi.parse_header(contentTypeHeader)
            if contentType == 'multipart/form-data':
                # multipart/form-data - probably a file upload.
                bodyFile = StringIO.StringIO(body)
                cgiParams.update(cgi.parse_multipart(bodyFile, pdict))
            else:
                # A normal x-www-form-urlencoded.
                cgiParams.update(cgi.parse_qs(body, keep_blank_values=True))

        # Convert the cgi params into a simple dictionary.
        params = {}
        for name, value in cgiParams.iteritems():
            params[name] = value[0]

        # Parse the headers.
        headersRegex = re.compile('([^:]*):\s*(.*)')
        headersDict = dict([headersRegex.match(line).groups(2)
                           for line in headers.split('\r\n')
                           if headersRegex.match(line)])

        # HTTP Basic/Digest Authentication support.
        serverAuthMode = self._server.requestAuthenticationMode()
        if serverAuthMode != HTTPServer.NO_AUTHENTICATION:
            # The server wants us to authenticate the user.
            authResult = False
            authHeader = headersDict.get('Authorization')
            if authHeader:
                authMatch = re.search('(\w+)\s+(.*)', authHeader)
                authenticationMode, login = authMatch.groups()

                if authenticationMode == HTTPServer.BASIC_AUTHENTICATION:
                    authResult = self._basicAuthentication(login)
                elif authenticationMode == HTTPServer.DIGEST_AUTHENTICATION:
                    authResult = self._digestAuthentication(login, method)
                else:
                    print >> sys.stderr, "Unknown mode: %s" % authenticationMode

            if not authResult:
                self.writeUnauthorizedAccess(serverAuthMode)

        # Find and call the methlet.  '/eggs.gif' becomes 'onEggsGif'.
        if path == '/':
            path = '/Home'
        pieces = path[1:].split('.')
        name = 'on' + ''.join([piece.capitalize() for piece in pieces])
        for plugin in self._server._plugins:
            if hasattr(plugin, name):
                # The plugin's APIs (`write`, etc) reflect back to us via
                # `plugin._handler`.
                plugin._handler = self
                try:
                    # Call the methlet.
                    getattr(plugin, name)(**params)
                    if self._bufferedHeaders:
                        # The methlet returned without writing anything other
                        # than headers.  This isn't unreasonable - it might
                        # have written a 302 or something.  Flush the buffered
                        # headers
                        self.write(None)
                except:
                    # The methlet raised an exception - send the traceback to
                    # the browser, unless it's SystemExit in which case we let
                    # it go.
                    eType, eValue, eTrace = sys.exc_info()
                    if eType == SystemExit:
                        # Close all the listeners so that no further incoming
                        # connections appear.
                        contextMap = self._context._map
                        for dispatcher in contextMap.values():
                            if isinstance(dispatcher, Listener):
                                dispatcher.close()

                        # Let any existing connections close down first.  This
                        # has happened when all we have left are _HTTPHandlers
                        # (this one plus any others that are using keep-alive;
                        # none of the others can be actually doing any work
                        # because *we're* the one doing the work).
                        def isProtected(dispatcher):
                            return not isinstance(dispatcher, _HTTPHandler)

                        while len(filter(isProtected, contextMap.values())) > 0:
                            asyncore.poll(timeout=1, map=contextMap)

                        raise SystemExit

                    message = """<h3>500 Server error</h3><pre>%s</pre>"""
                    details = traceback.format_exception(eType, eValue, eTrace)
                    details = '\n'.join(details)
                    self.writeError(500, message % cgi.escape(details))
                plugin._handler = None
                break
        else:
            self.onUnknown(path, params)

        # `close_when_done` and `Connection: close` ensure that we don't
        # support keep-alives or pipelining.  There are problems with some
        # browsers, for instance with extra characters being appended after
        # the body of a POSTed request.
        self.close_when_done()

Example 45

View license
    def handle_label(self, input_filename, **options):

        if options['test']:
            import doctest
            failure_count, _ = doctest.testmod(sys.modules[__name__])
            sys.exit(0 if failure_count == 0 else 1)

        global VERBOSE
        VERBOSE = options['verbose']

        geocode_cache = get_geocode_cache()

        na_member_lookup = get_na_member_lookup()

        # Ensure that all the required kinds and other objects exist:

        ok_constituency_office, _ = OrganisationKind.objects.get_or_create(
            slug='constituency-office',
            name='Constituency Office')
        ok_constituency_area, _ = OrganisationKind.objects.get_or_create(
            slug='constituency-area',
            name='Constituency Area')

        pk_constituency_office, _ = PlaceKind.objects.get_or_create(
            slug='constituency-office',
            name='Constituency Office')
        pk_constituency_area, _ = PlaceKind.objects.get_or_create(
            slug='constituency-area',
            name='Constituency Area')

        ck_address, _ = ContactKind.objects.get_or_create(
            slug='address',
            name='Address')
        ck_email, _ = ContactKind.objects.get_or_create(
            slug='email',
            name='Email')
        ck_fax, _ = ContactKind.objects.get_or_create(
            slug='fax',
            name='Fax')
        ck_telephone, _ = ContactKind.objects.get_or_create(
            slug='voice',
            name='Voice')

        pt_constituency_contact, _ = PositionTitle.objects.get_or_create(
            slug='constituency-contact',
            name='Constituency Contact')
        pt_administrator, _ = PositionTitle.objects.get_or_create(
            slug='administrator',
            name='Administrator')

        ork_has_office, _ = OrganisationRelationshipKind.objects.get_or_create(
            name='has_office')

        organisation_content_type = ContentType.objects.get_for_model(Organisation)

        contact_source = "Data from the party via Geoffrey Kilpin"

        mapit_current_generation = Generation.objects.current()

        with_physical_addresses = 0
        geolocated = 0

        created_administrators = {}

        # There's at least one duplicate row, so detect and ignore any duplicates:
        rows_already_done = set()

        try:

            with open(input_filename) as fp:
                reader = csv.DictReader(fp)
                for row in reader:
                    # Make sure there's no leading or trailing
                    # whitespace, and we have unicode strings:
                    row = dict((k, row[k].decode('UTF-8').strip()) for k in row)
                    # Extract each column:
                    party_code = row['Party Code']
                    name = row['Name']
                    manual_lonlat = row['Manually Geocoded LonLat']
                    province = row['Province']
                    office_or_area = row['Type']
                    party = row['Party']
                    administrator = row['Administrator']
                    telephone = row['Tel']
                    fax = row['Fax']
                    physical_address = row['Physical Address']
                    email = row['E-mail']
                    municipality = row['Municipality']

                    abbreviated_party = party
                    m = re.search(r'\((?:|.*, )([A-Z\+]+)\)', party)
                    if m:
                        abbreviated_party = m.group(1)

                    unique_row_id = (party_code, name, party)

                    if unique_row_id in rows_already_done:
                        continue
                    else:
                        rows_already_done.add(unique_row_id)

                    # Collapse whitespace in the name to a single space:
                    name = re.sub(r'(?ms)\s+', ' ', name)

                    mz_party = Organisation.objects.get(name=party)

                    # At various points, constituency office or areas
                    # have been created with the wrong terminology, so
                    # look for any variant of the names:
                    title_data = {'party': abbreviated_party,
                                  'type': office_or_area,
                                  'party_code': party_code,
                                  'name': name}
                    possible_formats = [
                        u'{party} Constituency Area ({party_code}): {name}',
                        u'{party} Constituency Office ({party_code}): {name}',
                        u'{party} Constituency Area: {name}',
                        u'{party} Constituency Office: {name}']
                    org_slug_possibilities = [slugify(fmt.format(**title_data))
                                              for fmt in possible_formats]

                    if party_code:
                        organisation_name = u"{party} Constituency {type} ({party_code}): {name}".format(**title_data)
                    else:
                        organisation_name = u"{party} Constituency {type}: {name}".format(**title_data)

                    places_to_add = []
                    contacts_to_add = []
                    people_to_add = []
                    administrators_to_add = []

                    for contact_kind, value, in ((ck_email, email),
                                                 (ck_telephone, telephone),
                                                 (ck_fax, fax)):
                        if value:
                            contacts_to_add.append({
                                    'kind': contact_kind,
                                    'value': value,
                                    'source': contact_source})

                    if office_or_area == 'Office':
                        constituency_kind = ok_constituency_office

                        if physical_address:

                            # Sometimes there's lots of whitespace
                            # that splits the physical address from a
                            # P.O. Box address, so look for those cases:
                            pobox_address = None
                            m = re.search(r'(?ms)^(.*)\s{5,}(.*)$', physical_address)
                            if m:
                                physical_address = m.group(1).strip()
                                pobox_address = m.group(2).strip()

                            with_physical_addresses += 1
                            physical_address = physical_address.rstrip(',') + ", South Africa"
                            try:
                                verbose("physical_address: " + physical_address.encode('UTF-8'))
                                if manual_lonlat:
                                    verbose("using manually specified location: " + manual_lonlat)
                                    lon, lat = map(float, manual_lonlat.split(","))
                                else:
                                    lon, lat, geocode_cache = geocode(physical_address, geocode_cache, VERBOSE)
                                    verbose("maps to:")
                                    verbose("http://maps.google.com/maps?q=%f,%f" % (lat, lon))
                                geolocated += 1

                                place_name = u'Approximate position of ' + organisation_name
                                places_to_add.append({
                                    'name': place_name,
                                    'slug': slugify(place_name),
                                    'kind': pk_constituency_office,
                                    'location': Point(lon, lat)})

                                contacts_to_add.append({
                                        'kind': ck_address,
                                        'value': physical_address,
                                        'source': contact_source})

                            except LocationNotFound:
                                verbose("XXX no results found for: " + physical_address)

                            if pobox_address is not None:
                                contacts_to_add.append({
                                        'kind': ck_address,
                                        'value': pobox_address,
                                        'source': contact_source})

                            # Deal with the different formats of MP
                            # and MPL names for different parties:
                            for representative_type in ('MP', 'MPL'):
                                if party in ('African National Congress (ANC)',
                                             "African Peoples' Convention (APC)",
                                             "Azanian People's Organisation (AZAPO)",
                                             'Minority Front (MF)',
                                             'United Christian Democratic Party (UCDP)',
                                             'United Democratic Movement (UDM)',
                                             'African Christian Democratic Party (ACDP)'):
                                    name_strings = re.split(r'\s{4,}',row[representative_type])
                                    for name_string in name_strings:
                                        person = find_pombola_person(name_string, na_member_lookup, VERBOSE)
                                        if person:
                                            people_to_add.append(person)
                                elif party in ('Congress of the People (COPE)',
                                               'Freedom Front + (Vryheidsfront+, FF+)'):
                                    for contact in re.split(r'\s*;\s*', row[representative_type]):
                                        # Strip off the phone number
                                        # and email address before
                                        # resolving:
                                        person = find_pombola_person(
                                            re.sub(r'(?ms)\s*\d.*', '', contact),
                                            na_member_lookup,
                                            VERBOSE
                                        )
                                        if person:
                                            people_to_add.append(person)
                                else:
                                    raise Exception, "Unknown party '%s'" % (party,)

                        if municipality:
                            mapit_municipality = get_mapit_municipality(
                                municipality, province
                            )

                            if mapit_municipality:
                                place_name = u'Municipality associated with ' + organisation_name
                                places_to_add.append({
                                    'name': place_name,
                                    'slug': slugify(place_name),
                                    'kind': pk_constituency_office,
                                    'mapit_area': mapit_municipality})

                    elif office_or_area == 'Area':
                        # At the moment it's only for DA that these
                        # Constituency Areas exist, so check that assumption:
                        if party != 'Democratic Alliance (DA)':
                            raise Exception, "Unexpected party %s with Area" % (party)
                        constituency_kind = ok_constituency_area
                        province = fix_province_name(province)
                        mapit_province = Area.objects.get(
                            type__code='PRV',
                            generation_high__gte=mapit_current_generation,
                            generation_low__lte=mapit_current_generation,
                            name=province)
                        place_name = 'Unknown sub-area of %s known as %s' % (
                            province,
                            organisation_name)
                        places_to_add.append({
                                'name': place_name,
                                'slug': slugify(place_name),
                                'kind': pk_constituency_area,
                                'mapit_area': mapit_province})

                        for representative_type in ('MP', 'MPL'):
                            for contact in re.split(r'(?ms)\s*;\s*', row[representative_type]):
                                person = find_pombola_person(contact, na_member_lookup, VERBOSE)
                                if person:
                                    people_to_add.append(person)

                    else:
                        raise Exception, "Unknown type %s" % (office_or_area,)

                    # The Administrator column might have multiple
                    # administrator contacts, separated by
                    # semi-colons.  Each contact may have notes about
                    # them in brackets, and may be followed by more
                    # than one phone number, separated by slashes.
                    if administrator and administrator.lower() != 'vacant':
                        for administrator_contact in re.split(r'\s*;\s*', administrator):
                            # Strip out any bracketed notes:
                            administrator_contact = re.sub(r'\([^\)]*\)', '', administrator_contact)
                            # Extract any phone number at the end:
                            m = re.search(r'^([^0-9]*)([0-9\s/]*)$', administrator_contact)
                            phone_numbers = []
                            if m:
                                administrator_contact, phones = m.groups()
                                phone_numbers = [s.strip() for s in re.split(r'\s*/\s*', phones)]
                            administrator_contact = administrator_contact.strip()
                            # If there's no name after that, just skip this contact
                            if not administrator_contact:
                                continue
                            administrator_contact = re.sub(r'\s+', ' ', administrator_contact)
                            tuple_to_add = (administrator_contact,
                                            tuple(s for s in phone_numbers
                                                  if s and s != nonexistent_phone_number))
                            verbose("administrator name '%s', numbers '%s'" % tuple_to_add)
                            administrators_to_add.append(tuple_to_add)

                    organisation_kwargs = {
                        'name': organisation_name,
                        'slug': slugify(organisation_name),
                        'kind': constituency_kind}

                    # Check if this office appears to exist already:

                    identifier = None
                    identifier_scheme = "constituency-office/%s/" % (abbreviated_party,)

                    try:
                        if party_code:
                            # If there's something's in the "Party Code"
                            # column, we can check for an identifier and
                            # get the existing object reliable through that.
                            identifier = Identifier.objects.get(identifier=party_code,
                                                                scheme=identifier_scheme)
                            org = identifier.content_object
                        else:
                            # Otherwise use the slug we intend to use, and
                            # look for an existing organisation:
                            org = Organisation.objects.get(slug__in=org_slug_possibilities,
                                                           kind=constituency_kind)
                    except ObjectDoesNotExist:
                        org = Organisation()
                        if party_code:
                            identifier = Identifier(identifier=party_code,
                                                    scheme=identifier_scheme,
                                                    content_type=organisation_content_type)

                    # Make sure we set the same attributes and save:
                    for k, v in organisation_kwargs.items():
                        setattr(org, k, v)

                    if options['commit']:
                        org.save()
                        if party_code:
                            identifier.object_id = org.id
                            identifier.save()

                        # Replace all places associated with this
                        # organisation and re-add them:
                        org.place_set.all().delete()
                        for place_dict in places_to_add:
                            org.place_set.create(**place_dict)

                        # Replace all contact details associated with this
                        # organisation, and re-add them:
                        org.contacts.all().delete()
                        for contact_dict in contacts_to_add:
                            org.contacts.create(**contact_dict)

                        # Remove previous has_office relationships,
                        # between this office and any party, then re-add
                        # this one:
                        OrganisationRelationship.objects.filter(
                            organisation_b=org).delete()
                        OrganisationRelationship.objects.create(
                            organisation_a=mz_party,
                            kind=ork_has_office,
                            organisation_b=org)

                        # Remove all Membership relationships between this
                        # organisation and other people, then recreate them:
                        org.position_set.filter(title=pt_constituency_contact).delete()
                        for person in people_to_add:
                            org.position_set.create(
                                person=person,
                                title=pt_constituency_contact,
                                category='political')

                        # Remove any administrators for this organisation:
                        for position in org.position_set.filter(title=pt_administrator):
                            for contact in position.person.contacts.all():
                                contact.delete()
                            position.person.delete()
                            position.delete()
                        # And create new administrators:
                        for administrator_tuple in administrators_to_add:
                            administrator_name, phone_numbers = administrator_tuple
                            if administrator_tuple in created_administrators:
                                person = created_administrators[administrator_tuple]
                            else:
                                person = Person.objects.create(legal_name=administrator_name,
                                                               slug=slugify(administrator_name))
                                created_administrators[administrator_tuple] = person
                                for phone_number in phone_numbers:
                                    person.contacts.create(kind=ck_telephone,
                                                           value=phone_number,
                                                           source=contact_source)
                            Position.objects.create(person=person,
                                                    organisation=org,
                                                    title=pt_administrator,
                                                    category='political')

        finally:
            write_geocode_cache(geocode_cache)

        verbose("Geolocated %d out of %d physical addresses" % (geolocated, with_physical_addresses))

Example 46

View license
    def handle_label(self, input_filename, **options):

        if options['test']:
            import doctest
            failure_count, _ = doctest.testmod(sys.modules[__name__])
            sys.exit(0 if failure_count == 0 else 1)

        global VERBOSE
        VERBOSE = options['verbose']

        geocode_cache = get_geocode_cache()

        na_member_lookup = get_na_member_lookup()

        # Ensure that all the required kinds and other objects exist:

        ok_constituency_office, _ = OrganisationKind.objects.get_or_create(
            slug='constituency-office',
            name='Constituency Office')
        ok_constituency_area, _ = OrganisationKind.objects.get_or_create(
            slug='constituency-area',
            name='Constituency Area')

        pk_constituency_office, _ = PlaceKind.objects.get_or_create(
            slug='constituency-office',
            name='Constituency Office')
        pk_constituency_area, _ = PlaceKind.objects.get_or_create(
            slug='constituency-area',
            name='Constituency Area')

        ck_address, _ = ContactKind.objects.get_or_create(
            slug='address',
            name='Address')
        ck_email, _ = ContactKind.objects.get_or_create(
            slug='email',
            name='Email')
        ck_fax, _ = ContactKind.objects.get_or_create(
            slug='fax',
            name='Fax')
        ck_telephone, _ = ContactKind.objects.get_or_create(
            slug='voice',
            name='Voice')

        pt_constituency_contact, _ = PositionTitle.objects.get_or_create(
            slug='constituency-contact',
            name='Constituency Contact')
        pt_administrator, _ = PositionTitle.objects.get_or_create(
            slug='administrator',
            name='Administrator')

        ork_has_office, _ = OrganisationRelationshipKind.objects.get_or_create(
            name='has_office')

        organisation_content_type = ContentType.objects.get_for_model(Organisation)

        contact_source = "Data from the party via Geoffrey Kilpin"

        mapit_current_generation = Generation.objects.current()

        with_physical_addresses = 0
        geolocated = 0

        created_administrators = {}

        # There's at least one duplicate row, so detect and ignore any duplicates:
        rows_already_done = set()

        try:

            with open(input_filename) as fp:
                reader = csv.DictReader(fp)
                for row in reader:
                    # Make sure there's no leading or trailing
                    # whitespace, and we have unicode strings:
                    row = dict((k, row[k].decode('UTF-8').strip()) for k in row)
                    # Extract each column:
                    party_code = row['Party Code']
                    name = row['Name']
                    manual_lonlat = row['Manually Geocoded LonLat']
                    province = row['Province']
                    office_or_area = row['Type']
                    party = row['Party']
                    administrator = row['Administrator']
                    telephone = row['Tel']
                    fax = row['Fax']
                    physical_address = row['Physical Address']
                    email = row['E-mail']
                    municipality = row['Municipality']

                    abbreviated_party = party
                    m = re.search(r'\((?:|.*, )([A-Z\+]+)\)', party)
                    if m:
                        abbreviated_party = m.group(1)

                    unique_row_id = (party_code, name, party)

                    if unique_row_id in rows_already_done:
                        continue
                    else:
                        rows_already_done.add(unique_row_id)

                    # Collapse whitespace in the name to a single space:
                    name = re.sub(r'(?ms)\s+', ' ', name)

                    mz_party = Organisation.objects.get(name=party)

                    # At various points, constituency office or areas
                    # have been created with the wrong terminology, so
                    # look for any variant of the names:
                    title_data = {'party': abbreviated_party,
                                  'type': office_or_area,
                                  'party_code': party_code,
                                  'name': name}
                    possible_formats = [
                        u'{party} Constituency Area ({party_code}): {name}',
                        u'{party} Constituency Office ({party_code}): {name}',
                        u'{party} Constituency Area: {name}',
                        u'{party} Constituency Office: {name}']
                    org_slug_possibilities = [slugify(fmt.format(**title_data))
                                              for fmt in possible_formats]

                    if party_code:
                        organisation_name = u"{party} Constituency {type} ({party_code}): {name}".format(**title_data)
                    else:
                        organisation_name = u"{party} Constituency {type}: {name}".format(**title_data)

                    places_to_add = []
                    contacts_to_add = []
                    people_to_add = []
                    administrators_to_add = []

                    for contact_kind, value, in ((ck_email, email),
                                                 (ck_telephone, telephone),
                                                 (ck_fax, fax)):
                        if value:
                            contacts_to_add.append({
                                    'kind': contact_kind,
                                    'value': value,
                                    'source': contact_source})

                    if office_or_area == 'Office':
                        constituency_kind = ok_constituency_office

                        if physical_address:

                            # Sometimes there's lots of whitespace
                            # that splits the physical address from a
                            # P.O. Box address, so look for those cases:
                            pobox_address = None
                            m = re.search(r'(?ms)^(.*)\s{5,}(.*)$', physical_address)
                            if m:
                                physical_address = m.group(1).strip()
                                pobox_address = m.group(2).strip()

                            with_physical_addresses += 1
                            physical_address = physical_address.rstrip(',') + ", South Africa"
                            try:
                                verbose("physical_address: " + physical_address.encode('UTF-8'))
                                if manual_lonlat:
                                    verbose("using manually specified location: " + manual_lonlat)
                                    lon, lat = map(float, manual_lonlat.split(","))
                                else:
                                    lon, lat, geocode_cache = geocode(physical_address, geocode_cache, VERBOSE)
                                    verbose("maps to:")
                                    verbose("http://maps.google.com/maps?q=%f,%f" % (lat, lon))
                                geolocated += 1

                                place_name = u'Approximate position of ' + organisation_name
                                places_to_add.append({
                                    'name': place_name,
                                    'slug': slugify(place_name),
                                    'kind': pk_constituency_office,
                                    'location': Point(lon, lat)})

                                contacts_to_add.append({
                                        'kind': ck_address,
                                        'value': physical_address,
                                        'source': contact_source})

                            except LocationNotFound:
                                verbose("XXX no results found for: " + physical_address)

                            if pobox_address is not None:
                                contacts_to_add.append({
                                        'kind': ck_address,
                                        'value': pobox_address,
                                        'source': contact_source})

                            # Deal with the different formats of MP
                            # and MPL names for different parties:
                            for representative_type in ('MP', 'MPL'):
                                if party in ('African National Congress (ANC)',
                                             "African Peoples' Convention (APC)",
                                             "Azanian People's Organisation (AZAPO)",
                                             'Minority Front (MF)',
                                             'United Christian Democratic Party (UCDP)',
                                             'United Democratic Movement (UDM)',
                                             'African Christian Democratic Party (ACDP)'):
                                    name_strings = re.split(r'\s{4,}',row[representative_type])
                                    for name_string in name_strings:
                                        person = find_pombola_person(name_string, na_member_lookup, VERBOSE)
                                        if person:
                                            people_to_add.append(person)
                                elif party in ('Congress of the People (COPE)',
                                               'Freedom Front + (Vryheidsfront+, FF+)'):
                                    for contact in re.split(r'\s*;\s*', row[representative_type]):
                                        # Strip off the phone number
                                        # and email address before
                                        # resolving:
                                        person = find_pombola_person(
                                            re.sub(r'(?ms)\s*\d.*', '', contact),
                                            na_member_lookup,
                                            VERBOSE
                                        )
                                        if person:
                                            people_to_add.append(person)
                                else:
                                    raise Exception, "Unknown party '%s'" % (party,)

                        if municipality:
                            mapit_municipality = get_mapit_municipality(
                                municipality, province
                            )

                            if mapit_municipality:
                                place_name = u'Municipality associated with ' + organisation_name
                                places_to_add.append({
                                    'name': place_name,
                                    'slug': slugify(place_name),
                                    'kind': pk_constituency_office,
                                    'mapit_area': mapit_municipality})

                    elif office_or_area == 'Area':
                        # At the moment it's only for DA that these
                        # Constituency Areas exist, so check that assumption:
                        if party != 'Democratic Alliance (DA)':
                            raise Exception, "Unexpected party %s with Area" % (party)
                        constituency_kind = ok_constituency_area
                        province = fix_province_name(province)
                        mapit_province = Area.objects.get(
                            type__code='PRV',
                            generation_high__gte=mapit_current_generation,
                            generation_low__lte=mapit_current_generation,
                            name=province)
                        place_name = 'Unknown sub-area of %s known as %s' % (
                            province,
                            organisation_name)
                        places_to_add.append({
                                'name': place_name,
                                'slug': slugify(place_name),
                                'kind': pk_constituency_area,
                                'mapit_area': mapit_province})

                        for representative_type in ('MP', 'MPL'):
                            for contact in re.split(r'(?ms)\s*;\s*', row[representative_type]):
                                person = find_pombola_person(contact, na_member_lookup, VERBOSE)
                                if person:
                                    people_to_add.append(person)

                    else:
                        raise Exception, "Unknown type %s" % (office_or_area,)

                    # The Administrator column might have multiple
                    # administrator contacts, separated by
                    # semi-colons.  Each contact may have notes about
                    # them in brackets, and may be followed by more
                    # than one phone number, separated by slashes.
                    if administrator and administrator.lower() != 'vacant':
                        for administrator_contact in re.split(r'\s*;\s*', administrator):
                            # Strip out any bracketed notes:
                            administrator_contact = re.sub(r'\([^\)]*\)', '', administrator_contact)
                            # Extract any phone number at the end:
                            m = re.search(r'^([^0-9]*)([0-9\s/]*)$', administrator_contact)
                            phone_numbers = []
                            if m:
                                administrator_contact, phones = m.groups()
                                phone_numbers = [s.strip() for s in re.split(r'\s*/\s*', phones)]
                            administrator_contact = administrator_contact.strip()
                            # If there's no name after that, just skip this contact
                            if not administrator_contact:
                                continue
                            administrator_contact = re.sub(r'\s+', ' ', administrator_contact)
                            tuple_to_add = (administrator_contact,
                                            tuple(s for s in phone_numbers
                                                  if s and s != nonexistent_phone_number))
                            verbose("administrator name '%s', numbers '%s'" % tuple_to_add)
                            administrators_to_add.append(tuple_to_add)

                    organisation_kwargs = {
                        'name': organisation_name,
                        'slug': slugify(organisation_name),
                        'kind': constituency_kind}

                    # Check if this office appears to exist already:

                    identifier = None
                    identifier_scheme = "constituency-office/%s/" % (abbreviated_party,)

                    try:
                        if party_code:
                            # If there's something's in the "Party Code"
                            # column, we can check for an identifier and
                            # get the existing object reliable through that.
                            identifier = Identifier.objects.get(identifier=party_code,
                                                                scheme=identifier_scheme)
                            org = identifier.content_object
                        else:
                            # Otherwise use the slug we intend to use, and
                            # look for an existing organisation:
                            org = Organisation.objects.get(slug__in=org_slug_possibilities,
                                                           kind=constituency_kind)
                    except ObjectDoesNotExist:
                        org = Organisation()
                        if party_code:
                            identifier = Identifier(identifier=party_code,
                                                    scheme=identifier_scheme,
                                                    content_type=organisation_content_type)

                    # Make sure we set the same attributes and save:
                    for k, v in organisation_kwargs.items():
                        setattr(org, k, v)

                    if options['commit']:
                        org.save()
                        if party_code:
                            identifier.object_id = org.id
                            identifier.save()

                        # Replace all places associated with this
                        # organisation and re-add them:
                        org.place_set.all().delete()
                        for place_dict in places_to_add:
                            org.place_set.create(**place_dict)

                        # Replace all contact details associated with this
                        # organisation, and re-add them:
                        org.contacts.all().delete()
                        for contact_dict in contacts_to_add:
                            org.contacts.create(**contact_dict)

                        # Remove previous has_office relationships,
                        # between this office and any party, then re-add
                        # this one:
                        OrganisationRelationship.objects.filter(
                            organisation_b=org).delete()
                        OrganisationRelationship.objects.create(
                            organisation_a=mz_party,
                            kind=ork_has_office,
                            organisation_b=org)

                        # Remove all Membership relationships between this
                        # organisation and other people, then recreate them:
                        org.position_set.filter(title=pt_constituency_contact).delete()
                        for person in people_to_add:
                            org.position_set.create(
                                person=person,
                                title=pt_constituency_contact,
                                category='political')

                        # Remove any administrators for this organisation:
                        for position in org.position_set.filter(title=pt_administrator):
                            for contact in position.person.contacts.all():
                                contact.delete()
                            position.person.delete()
                            position.delete()
                        # And create new administrators:
                        for administrator_tuple in administrators_to_add:
                            administrator_name, phone_numbers = administrator_tuple
                            if administrator_tuple in created_administrators:
                                person = created_administrators[administrator_tuple]
                            else:
                                person = Person.objects.create(legal_name=administrator_name,
                                                               slug=slugify(administrator_name))
                                created_administrators[administrator_tuple] = person
                                for phone_number in phone_numbers:
                                    person.contacts.create(kind=ck_telephone,
                                                           value=phone_number,
                                                           source=contact_source)
                            Position.objects.create(person=person,
                                                    organisation=org,
                                                    title=pt_administrator,
                                                    category='political')

        finally:
            write_geocode_cache(geocode_cache)

        verbose("Geolocated %d out of %d physical addresses" % (geolocated, with_physical_addresses))

Example 47

View license
    def mirror_from_api(self):
        for extra_field in self.get_api_results('extra_fields'):
            with show_data_on_error('extra_field', extra_field):
                del extra_field['url']
                models.ExtraField.objects.create(**extra_field)
        for simple_field in self.get_api_results('simple_fields'):
            with show_data_on_error('simple_field', simple_field):
                simple_field.pop('url', None)
                models.SimplePopoloField.objects.create(**simple_field)
        for complex_field in self.get_api_results('complex_fields'):
            with show_data_on_error('complex_field', complex_field):
                complex_field.pop('url', None)
                models.ComplexPopoloField.objects.create(**complex_field)
        for area_type_data in self.get_api_results('area_types'):
            with show_data_on_error('area_type_data', area_type_data):
                del area_type_data['url']
                emodels.AreaType.objects.create(**area_type_data)
        party_sets_by_slug = {}
        for party_set_data in self.get_api_results('party_sets'):
            with show_data_on_error('party_set_data', party_set_data):
                del party_set_data['url']
                party_set = models.PartySet.objects.create(**party_set_data)
                party_sets_by_slug[party_set.slug] = party_set
        organization_to_parent = {}
        for organization_data in self.get_api_results('organizations'):
            with show_data_on_error('organization_data', organization_data):
                o = pmodels.Organization.objects.create(
                    name=organization_data['name'],
                    classification=organization_data['classification'],
                    founding_date=organization_data['founding_date'],
                    dissolution_date=organization_data['dissolution_date'],
                )
                models.OrganizationExtra.objects.create(
                    base=o,
                    slug=organization_data['id'],
                    register=organization_data['register'],
                )
                for party_set_data in organization_data['party_sets']:
                    with show_data_on_error('party_set_data', party_set_data):
                        party_set = party_sets_by_slug[party_set_data['slug']]
                        o.party_sets.add(party_set)
                self.add_related(
                    o, pmodels.Identifier, organization_data['identifiers']
                )
                self.add_related(
                    o, pmodels.ContactDetail, organization_data['contact_details']
                )
                self.add_related(
                    o, pmodels.OtherName, organization_data['other_names']
                )
                self.add_related(
                    o, pmodels.Link, organization_data['links']
                )
                self.add_related(
                    o, pmodels.Source, organization_data['sources']
                )
                # Save any parent:
                if organization_data['parent']:
                    organization_to_parent[organization_data['id']] = \
                        organization_data['parent']['id']
        # Set any parent organizations:
        for child_slug, parent_slug in organization_to_parent.items():
            child = pmodels.Organization.objects.get(extra__slug=child_slug)
            parent = pmodels.Organization.objects.get(extra__slug=parent_slug)
            child.parent = parent
            child.save()
        area_to_parent = {}
        for area_data in self.get_api_results('areas'):
            with show_data_on_error('area_data', area_data):
                a = pmodels.Area.objects.create(
                    id=area_data['id'],
                    classification=area_data['classification'],
                    identifier=area_data['identifier'],
                    name=area_data['name'],
                )
                self.add_related(
                    o, pmodels.Identifier, area_data['other_identifiers']
                )
                ae = models.AreaExtra(base=a)
                if area_data['type']:
                    area_type_id = area_data['type']['id']
                    at = emodels.AreaType.objects.get(id=area_type_id)
                    ae.type = at
                ae.save()
                # Save any parent:
                if area_data['parent']:
                    # The API currently (v0.9) returns a URL in the
                    # 'parent' field, although the existing code was
                    # written to expect a dictionary containing the
                    # ID.  Support either representation in this script:
                    if isinstance(area_data['parent'], string_types):
                        m = re.search(r'/areas/(\d+)', area_data['parent'])
                        if not m:
                            msg = "Couldn't extra area ID from parent URL"
                            raise Exception(msg)
                        area_to_parent[area_data['id']] = int(m.group(1))
                    else:
                        area_to_parent[area_data['id']] = \
                            area_data['parent']['id']
        # Set any parent areas:
        for child_id, parent_id in area_to_parent.items():
            child = pmodels.Area.objects.get(id=child_id)
            parent = pmodels.Area.objects.get(id=parent_id)
            child.parent = parent
            child.save()
        for election_data in self.get_api_results('elections'):
            with show_data_on_error('election_data', election_data):
                kwargs = {
                    k: election_data[k]
                    for k in (
                            'name',
                            'winner_membership_role',
                            'candidate_membership_role',
                            'election_date',
                            'for_post_role',
                            'current',
                            'use_for_candidate_suggestions',
                            'area_generation',
                            'party_lists_in_use',
                            'default_party_list_members_to_show',
                            'show_official_documents',
                            'ocd_division',
                            'description',
                    )
                }
                e = emodels.Election(slug=election_data['id'], **kwargs)
                election_org = election_data['organization']
                if election_org:
                    e.organization = pmodels.Organization.objects.get(
                        extra__slug=election_org['id']
                    )
                e.save()
                for area_type_data in election_data['area_types']:
                    e.area_types.add(
                        emodels.AreaType.objects.get(pk=area_type_data['id'])
                    )
        for post_data in self.get_api_results('posts'):
            with show_data_on_error('post_data', post_data):
                p = pmodels.Post(
                    label=post_data['label'],
                    role=post_data['role'],
                )
                p.organization = pmodels.Organization.objects.get(
                    extra__slug=post_data['organization']['id']
                )
                area_data = post_data['area']
                if area_data:
                    p.area = pmodels.Area.objects.get(id=area_data['id'])
                p.save()
                pe = models.PostExtra(
                    base=p,
                    slug=post_data['id'],
                    candidates_locked=post_data['candidates_locked'],
                    group=post_data['group'],
                )
                if post_data.get('party_set'):
                    party_set_data = post_data['party_set']
                    pe.party_set = \
                        models.PartySet.objects.get(pk=party_set_data['id'])
                pe.save()
                for election_data in post_data['elections']:
                    election = \
                        emodels.Election.objects.get(slug=election_data['id'])
                    models.PostExtraElection.objects.get_or_create(
                        postextra=pe,
                        election=election
                    )
        for post_election_data in self.get_api_results('post_elections'):
            with show_data_on_error('post_election_data', post_election_data):
                pe_election = models.PostExtraElection.objects.get(
                    postextra__slug=post_election_data['post']['id'],
                    election__slug=post_election_data['election']['id'],
                )
                pe_election.winner_count = post_election_data['winner_count']
                pe_election.save()
        extra_fields = {
            ef.key: ef for ef in models.ExtraField.objects.all()
        }
        for person_data in self.get_api_results('persons'):
            with show_data_on_error('person_data', person_data):
                kwargs = {
                    k: person_data[k] for k in
                    (
                        'id',
                        'name',
                        'honorific_prefix',
                        'honorific_suffix',
                        'sort_name',
                        'email',
                        'gender',
                        'birth_date',
                        'death_date',
                    )
                }
                p = pmodels.Person.objects.create(**kwargs)
                self.add_related(
                    p, pmodels.Identifier, person_data['identifiers']
                )
                self.add_related(
                    p, pmodels.ContactDetail, person_data['contact_details']
                )
                self.add_related(
                    p, pmodels.OtherName, person_data['other_names']
                )
                self.add_related(
                    p, pmodels.Link, person_data['links']
                )
                kwargs = {
                    'base': p,
                    'versions': json.dumps(person_data['versions'])
                }
                pe = models.PersonExtra.objects.create(**kwargs)
                # Look for any data in ExtraFields
                for extra_field_data in person_data['extra_fields']:
                    p.extra_field_values.create(
                        field=extra_fields[extra_field_data['key']],
                        value=extra_field_data['value'],
                    )

        for m_data in self.get_api_results('memberships'):
            with show_data_on_error('m_data', m_data):
                kwargs = {
                    k: m_data[k] for k in
                    ('label', 'role', 'start_date', 'end_date')
                }
                kwargs['person'] = pmodels.Person.objects.get(
                    pk=m_data['person']['id']
                )
                if m_data.get('on_behalf_of'):
                    kwargs['on_behalf_of'] = pmodels.Organization.objects.get(
                        extra__slug=m_data['on_behalf_of']['id']
                    )
                if m_data.get('organization'):
                    kwargs['organization'] = pmodels.Organization.objects.get(
                        extra__slug=m_data['organization']['id']
                    )
                if m_data.get('post'):
                    kwargs['post'] = pmodels.Post.objects.get(
                        extra__slug=m_data['post']['id']
                    )
                m = pmodels.Membership.objects.create(**kwargs)
                kwargs = {
                    'base': m,
                    'elected': m_data['elected'],
                    'party_list_position': m_data['party_list_position'],
                }
                if m_data.get('election'):
                    kwargs['election'] = emodels.Election.objects.get(
                        slug=m_data['election']['id']
                    )
                models.MembershipExtra.objects.create(**kwargs)
        for image_data in self.get_api_results('images'):
            with show_data_on_error('image_data', image_data):
                endpoint, object_id = re.search(
                    r'api/v0.9/(\w+)/([^/]*)/',
                    image_data['content_object']
                ).groups()
                if endpoint == 'organizations':
                    django_object = models.OrganizationExtra.objects.get(
                        slug=object_id
                    )
                elif endpoint == 'persons':
                    django_object = models.PersonExtra.objects.get(
                        base__id=object_id
                    )
                else:
                    msg = "Image referring to unhandled endpoint {0}"
                    raise Exception(msg.format(endpoint))
                suggested_filename = re.search(
                    r'/([^/]+)$',
                    image_data['image_url']
                ).group(1)
                full_url = self.base_url + image_data['image_url']
                image_filename = self.get_url_cached(full_url)
                extension = self.get_image_extension(image_filename)
                if not extension:
                    continue
                models.ImageExtra.objects.update_or_create_from_file(
                    image_filename,
                    join('images', suggested_filename),
                    md5sum=image_data['md5sum'] or '',
                    defaults = {
                        'uploading_user': self.get_user_from_username(
                            image_data['uploading_user']
                        ),
                        'copyright': image_data['copyright'] or '',
                        'notes': image_data['notes'] or '',
                        'user_copyright': image_data['user_copyright'] or '',
                        'user_notes': image_data['user_notes'] or '',
                        'base__source': image_data['source'] or '',
                        'base__is_primary': image_data['is_primary'],
                        'base__object_id': django_object.id,
                        'base__content_type_id':
                        ContentType.objects.get_for_model(django_object).id
                    }
                )
        reset_sql_list = connection.ops.sequence_reset_sql(
            no_style(), [
                emodels.AreaType, models.PartySet, pmodels.Area,
                emodels.Election, Image, models.ExtraField,
                models.SimplePopoloField, models.ComplexPopoloField,
                pmodels.Person,
            ]
        )
        if reset_sql_list:
            cursor = connection.cursor()
            for reset_sql in reset_sql_list:
                cursor.execute(reset_sql)

Example 48

View license
    def mirror_from_api(self):
        for extra_field in self.get_api_results('extra_fields'):
            with show_data_on_error('extra_field', extra_field):
                del extra_field['url']
                models.ExtraField.objects.create(**extra_field)
        for simple_field in self.get_api_results('simple_fields'):
            with show_data_on_error('simple_field', simple_field):
                simple_field.pop('url', None)
                models.SimplePopoloField.objects.create(**simple_field)
        for complex_field in self.get_api_results('complex_fields'):
            with show_data_on_error('complex_field', complex_field):
                complex_field.pop('url', None)
                models.ComplexPopoloField.objects.create(**complex_field)
        for area_type_data in self.get_api_results('area_types'):
            with show_data_on_error('area_type_data', area_type_data):
                del area_type_data['url']
                emodels.AreaType.objects.create(**area_type_data)
        party_sets_by_slug = {}
        for party_set_data in self.get_api_results('party_sets'):
            with show_data_on_error('party_set_data', party_set_data):
                del party_set_data['url']
                party_set = models.PartySet.objects.create(**party_set_data)
                party_sets_by_slug[party_set.slug] = party_set
        organization_to_parent = {}
        for organization_data in self.get_api_results('organizations'):
            with show_data_on_error('organization_data', organization_data):
                o = pmodels.Organization.objects.create(
                    name=organization_data['name'],
                    classification=organization_data['classification'],
                    founding_date=organization_data['founding_date'],
                    dissolution_date=organization_data['dissolution_date'],
                )
                models.OrganizationExtra.objects.create(
                    base=o,
                    slug=organization_data['id'],
                    register=organization_data['register'],
                )
                for party_set_data in organization_data['party_sets']:
                    with show_data_on_error('party_set_data', party_set_data):
                        party_set = party_sets_by_slug[party_set_data['slug']]
                        o.party_sets.add(party_set)
                self.add_related(
                    o, pmodels.Identifier, organization_data['identifiers']
                )
                self.add_related(
                    o, pmodels.ContactDetail, organization_data['contact_details']
                )
                self.add_related(
                    o, pmodels.OtherName, organization_data['other_names']
                )
                self.add_related(
                    o, pmodels.Link, organization_data['links']
                )
                self.add_related(
                    o, pmodels.Source, organization_data['sources']
                )
                # Save any parent:
                if organization_data['parent']:
                    organization_to_parent[organization_data['id']] = \
                        organization_data['parent']['id']
        # Set any parent organizations:
        for child_slug, parent_slug in organization_to_parent.items():
            child = pmodels.Organization.objects.get(extra__slug=child_slug)
            parent = pmodels.Organization.objects.get(extra__slug=parent_slug)
            child.parent = parent
            child.save()
        area_to_parent = {}
        for area_data in self.get_api_results('areas'):
            with show_data_on_error('area_data', area_data):
                a = pmodels.Area.objects.create(
                    id=area_data['id'],
                    classification=area_data['classification'],
                    identifier=area_data['identifier'],
                    name=area_data['name'],
                )
                self.add_related(
                    o, pmodels.Identifier, area_data['other_identifiers']
                )
                ae = models.AreaExtra(base=a)
                if area_data['type']:
                    area_type_id = area_data['type']['id']
                    at = emodels.AreaType.objects.get(id=area_type_id)
                    ae.type = at
                ae.save()
                # Save any parent:
                if area_data['parent']:
                    # The API currently (v0.9) returns a URL in the
                    # 'parent' field, although the existing code was
                    # written to expect a dictionary containing the
                    # ID.  Support either representation in this script:
                    if isinstance(area_data['parent'], string_types):
                        m = re.search(r'/areas/(\d+)', area_data['parent'])
                        if not m:
                            msg = "Couldn't extra area ID from parent URL"
                            raise Exception(msg)
                        area_to_parent[area_data['id']] = int(m.group(1))
                    else:
                        area_to_parent[area_data['id']] = \
                            area_data['parent']['id']
        # Set any parent areas:
        for child_id, parent_id in area_to_parent.items():
            child = pmodels.Area.objects.get(id=child_id)
            parent = pmodels.Area.objects.get(id=parent_id)
            child.parent = parent
            child.save()
        for election_data in self.get_api_results('elections'):
            with show_data_on_error('election_data', election_data):
                kwargs = {
                    k: election_data[k]
                    for k in (
                            'name',
                            'winner_membership_role',
                            'candidate_membership_role',
                            'election_date',
                            'for_post_role',
                            'current',
                            'use_for_candidate_suggestions',
                            'area_generation',
                            'party_lists_in_use',
                            'default_party_list_members_to_show',
                            'show_official_documents',
                            'ocd_division',
                            'description',
                    )
                }
                e = emodels.Election(slug=election_data['id'], **kwargs)
                election_org = election_data['organization']
                if election_org:
                    e.organization = pmodels.Organization.objects.get(
                        extra__slug=election_org['id']
                    )
                e.save()
                for area_type_data in election_data['area_types']:
                    e.area_types.add(
                        emodels.AreaType.objects.get(pk=area_type_data['id'])
                    )
        for post_data in self.get_api_results('posts'):
            with show_data_on_error('post_data', post_data):
                p = pmodels.Post(
                    label=post_data['label'],
                    role=post_data['role'],
                )
                p.organization = pmodels.Organization.objects.get(
                    extra__slug=post_data['organization']['id']
                )
                area_data = post_data['area']
                if area_data:
                    p.area = pmodels.Area.objects.get(id=area_data['id'])
                p.save()
                pe = models.PostExtra(
                    base=p,
                    slug=post_data['id'],
                    candidates_locked=post_data['candidates_locked'],
                    group=post_data['group'],
                )
                if post_data.get('party_set'):
                    party_set_data = post_data['party_set']
                    pe.party_set = \
                        models.PartySet.objects.get(pk=party_set_data['id'])
                pe.save()
                for election_data in post_data['elections']:
                    election = \
                        emodels.Election.objects.get(slug=election_data['id'])
                    models.PostExtraElection.objects.get_or_create(
                        postextra=pe,
                        election=election
                    )
        for post_election_data in self.get_api_results('post_elections'):
            with show_data_on_error('post_election_data', post_election_data):
                pe_election = models.PostExtraElection.objects.get(
                    postextra__slug=post_election_data['post']['id'],
                    election__slug=post_election_data['election']['id'],
                )
                pe_election.winner_count = post_election_data['winner_count']
                pe_election.save()
        extra_fields = {
            ef.key: ef for ef in models.ExtraField.objects.all()
        }
        for person_data in self.get_api_results('persons'):
            with show_data_on_error('person_data', person_data):
                kwargs = {
                    k: person_data[k] for k in
                    (
                        'id',
                        'name',
                        'honorific_prefix',
                        'honorific_suffix',
                        'sort_name',
                        'email',
                        'gender',
                        'birth_date',
                        'death_date',
                    )
                }
                p = pmodels.Person.objects.create(**kwargs)
                self.add_related(
                    p, pmodels.Identifier, person_data['identifiers']
                )
                self.add_related(
                    p, pmodels.ContactDetail, person_data['contact_details']
                )
                self.add_related(
                    p, pmodels.OtherName, person_data['other_names']
                )
                self.add_related(
                    p, pmodels.Link, person_data['links']
                )
                kwargs = {
                    'base': p,
                    'versions': json.dumps(person_data['versions'])
                }
                pe = models.PersonExtra.objects.create(**kwargs)
                # Look for any data in ExtraFields
                for extra_field_data in person_data['extra_fields']:
                    p.extra_field_values.create(
                        field=extra_fields[extra_field_data['key']],
                        value=extra_field_data['value'],
                    )

        for m_data in self.get_api_results('memberships'):
            with show_data_on_error('m_data', m_data):
                kwargs = {
                    k: m_data[k] for k in
                    ('label', 'role', 'start_date', 'end_date')
                }
                kwargs['person'] = pmodels.Person.objects.get(
                    pk=m_data['person']['id']
                )
                if m_data.get('on_behalf_of'):
                    kwargs['on_behalf_of'] = pmodels.Organization.objects.get(
                        extra__slug=m_data['on_behalf_of']['id']
                    )
                if m_data.get('organization'):
                    kwargs['organization'] = pmodels.Organization.objects.get(
                        extra__slug=m_data['organization']['id']
                    )
                if m_data.get('post'):
                    kwargs['post'] = pmodels.Post.objects.get(
                        extra__slug=m_data['post']['id']
                    )
                m = pmodels.Membership.objects.create(**kwargs)
                kwargs = {
                    'base': m,
                    'elected': m_data['elected'],
                    'party_list_position': m_data['party_list_position'],
                }
                if m_data.get('election'):
                    kwargs['election'] = emodels.Election.objects.get(
                        slug=m_data['election']['id']
                    )
                models.MembershipExtra.objects.create(**kwargs)
        for image_data in self.get_api_results('images'):
            with show_data_on_error('image_data', image_data):
                endpoint, object_id = re.search(
                    r'api/v0.9/(\w+)/([^/]*)/',
                    image_data['content_object']
                ).groups()
                if endpoint == 'organizations':
                    django_object = models.OrganizationExtra.objects.get(
                        slug=object_id
                    )
                elif endpoint == 'persons':
                    django_object = models.PersonExtra.objects.get(
                        base__id=object_id
                    )
                else:
                    msg = "Image referring to unhandled endpoint {0}"
                    raise Exception(msg.format(endpoint))
                suggested_filename = re.search(
                    r'/([^/]+)$',
                    image_data['image_url']
                ).group(1)
                full_url = self.base_url + image_data['image_url']
                image_filename = self.get_url_cached(full_url)
                extension = self.get_image_extension(image_filename)
                if not extension:
                    continue
                models.ImageExtra.objects.update_or_create_from_file(
                    image_filename,
                    join('images', suggested_filename),
                    md5sum=image_data['md5sum'] or '',
                    defaults = {
                        'uploading_user': self.get_user_from_username(
                            image_data['uploading_user']
                        ),
                        'copyright': image_data['copyright'] or '',
                        'notes': image_data['notes'] or '',
                        'user_copyright': image_data['user_copyright'] or '',
                        'user_notes': image_data['user_notes'] or '',
                        'base__source': image_data['source'] or '',
                        'base__is_primary': image_data['is_primary'],
                        'base__object_id': django_object.id,
                        'base__content_type_id':
                        ContentType.objects.get_for_model(django_object).id
                    }
                )
        reset_sql_list = connection.ops.sequence_reset_sql(
            no_style(), [
                emodels.AreaType, models.PartySet, pmodels.Area,
                emodels.Election, Image, models.ExtraField,
                models.SimplePopoloField, models.ComplexPopoloField,
                pmodels.Person,
            ]
        )
        if reset_sql_list:
            cursor = connection.cursor()
            for reset_sql in reset_sql_list:
                cursor.execute(reset_sql)

Example 49

Project: yap
Source File: yap_exe.py
View license
def execute_chunk(
        input_file_list_local,
        inp1,
        inp2,
        chunk_number,
	myrank,
        workflow_prov,
	eqp_dict):
    '''
    Executes preprocess commands for chunked data and passes to the alignment stage
    Takes chunked input data, filename list, chunk number, rank of the processor     
    and provenance list to append log data.
    ''' 
    # variable declaration
    input_filename_local = input_file_list_local[0]
    input_filename_local_2 = input_file_list_local[1]
    file_name = input_file_list_local[2]
    err_chunk_file = wd.err_log_path + "/" + file_name + \
        "_log_temp/" + file_name + "_" + str(chunk_number).zfill(6)
    stat_chunk_file = wd.stat_log_path + "/" + file_name + \
        "_log_temp/" + file_name + "_" + str(chunk_number).zfill(6)
    myhost = os.getenv('HOSTNAME')
    yap_file_io.write_data("HOSTNAME: " + str(myhost) + "\n", err_chunk_file)
    yap_file_io.write_data("HOSTNAME: " + str(myhost) + "\n", stat_chunk_file)
    yap_file_io.write_data("CHUNK NUMBER: " + str(chunk_number) + "\n", err_chunk_file)
    yap_file_io.write_data("CHUNK NUMBER: " + str(chunk_number) + "\n", stat_chunk_file)
    seqs_arr1 = []
    seqs_arr2 = []
    read_length = wd.max_read_length
    barcode_seqstruct_dict1 = {}
    barcode_seqstruct_dict2 = {}
    barcode_output_dict = {}
    aligner_out_str = ''
    sort_order = ''
    barcode_flag = 'False'
    sort_order = wd.alignment_sort_order
    # convert the input data based on format given in workflow configuration
    if wd.input_file_format == "qseq" or wd.input_file_format != wd.preprocess_output_file_format:
        inp1 = yap_tools.convert_format(inp1)
        if wd.paired_end_data == 'yes':
            inp2 = yap_tools.convert_format(inp2)
    if wd.run_preprocess_analysis == 'yes':
	str_out = "-"*20 + "PREPROCESS STARTED" +"\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20 + "\n"
	yap_file_io.write_data(str_out,err_chunk_file)
	yap_file_io.write_data(str_out,stat_chunk_file)
        # Run barcode splitter as first preprocess step
        for jj in range(0, len(wd.preprocess_cmd_arr)):
            preprocess_cmd_name = wd.preprocess_cmd_arr[jj][2][0][0]
            preprocess_cmd = wd.preprocess_cmd_arr[jj][2][0][1]
            if re.search('fastx_barcode_splitter', preprocess_cmd_name) is not None:
                barcode_flag = 'True'
                print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
		str_out= "YAP_COMMAND: " + preprocess_cmd + "\n" + "INPUT FILE: " + input_filename_local
		yap_file_io.write_data(str_out,err_chunk_file)
		yap_file_io.write_data(str_out,stat_chunk_file)
                barcode_seqstruct_dict1, workflow_prov = yap_preprocess.fastx_barcode_splitter(
                    inp1, wd.preprocess_output_file_format, preprocess_cmd, workflow_prov, err_chunk_file, stat_chunk_file)
                yap_file_io.write_data("_" * 30 + "\n", err_chunk_file)
                yap_file_io.write_data("_" * 30 + "\n", stat_chunk_file)
                barcode_seqstruct_dict1["no_barcode_specified"] = ''
                print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                if wd.paired_end_data == 'yes':
                    print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
		    str_out= "YAP_COMMAND: " + preprocess_cmd + "\n" + "INPUT FILE: " + input_filename_local_2
		    yap_file_io.write_data(str_out,err_chunk_file)
		    yap_file_io.write_data(str_out,stat_chunk_file)
                    barcode_seqstruct_dict2, workflow_prov = yap_preprocess.fastx_barcode_splitter(
                        inp2,wd.preprocess_output_file_format , preprocess_cmd, workflow_prov, err_chunk_file, stat_chunk_file)
                    yap_file_io.write_data("_" * 30 + "\n", err_chunk_file)
                    yap_file_io.write_data("_" * 30 + "\n", stat_chunk_file)
                    barcode_seqstruct_dict2["no_barcode_specified"] = ''
                    print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                break
        if barcode_flag == 'False':
            #if no barcode command; then create dictionary with one barcode tag
            barcode_seqstruct_dict1["no_barcode_specified"] = inp1
            barcode_seqstruct_dict2["no_barcode_specified"] = inp2
    else:
        #if no preprocess stage specified; then create dictionary with one barcode tag
        barcode_seqstruct_dict1["no_barcode_specified"] = inp1
        barcode_seqstruct_dict2["no_barcode_specified"] = inp2
    #iterate over the barcode dictionary 
    for barcode, inp1 in barcode_seqstruct_dict1.iteritems():
        run_unique_reads = 'False'
        barcode_value = yap_tools.rename_barcode(barcode)
        if wd.paired_end_data == "yes":
            inp2 = barcode_seqstruct_dict2[barcode]
        preprocessed_data_dict = {}
	#intialize matrix for basecount analysis
        aligner_output_str_local = ''
        basecount_matrix_local1 = numpy.zeros(
            (int(read_length), 5), dtype=numpy.int)
        basecount_matrix_local2 = numpy.zeros(
            (int(read_length), 5), dtype=numpy.int)
        barcode_output_dict.setdefault(barcode, [basecount_matrix_local1, basecount_matrix_local2])
        #set output file paths
        barcode_dir_path = wd.workflow_output_path + "/" + file_name + "/" + barcode
        preprocess_dir_path = barcode_dir_path + "/" + "preprocess_output"
        if wd.data_distribution_method != "file_based":
            if barcode_value != '':
                preprocess_out_filename1 = preprocess_dir_path + "/" + barcode_value + "_" + file_name + \
                    "_" + str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + barcode_value + "_" + file_name + \
                    "_" + str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_2.txt"
            else:
                preprocess_out_filename1 = preprocess_dir_path + "/" + file_name + "_" + \
                    str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + file_name + "_" + \
                    str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_2.txt"
        else:
            if barcode_value != '':
                preprocess_out_filename1 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + \
                    "_" + barcode_value + "_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + \
                    "_" + barcode_value + "_2.txt"
            else:
                preprocess_out_filename1 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + "_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + "_2.txt"
        aligner_dir_path = barcode_dir_path + "/" + "aligner_output"
        if barcode_value != '':
            aligner_output_filename = aligner_dir_path + "/" + "aligner_" + \
                file_name + "_" + barcode_value + \
                "_" + str(chunk_number).zfill(6)
        else:
            aligner_output_filename = aligner_dir_path + "/" + \
                "aligner_" + file_name + "_" + str(chunk_number).zfill(6)

        for jj in range(0, len(wd.preprocess_cmd_arr)):
            preprocess_cmd_name = wd.preprocess_cmd_arr[jj][2][0][1]
            preprocess_cmd = wd.preprocess_cmd_arr[jj][2][0][1]
            # skip fastqc and fastq screen and barcode splitter as they are
            # already executed
            if (re.search('fastqc', preprocess_cmd_name) is not None) or (re.search('fastq_screen', preprocess_cmd_name) is not None)or(re.search('fastx_barcode_splitter',
                                                                                                                                                  preprocess_cmd_name) is not None):
                pass
            else:
                if re.search('calculate_basecount_metrics', preprocess_cmd_name) is not None:
		    #excecute basecount calculation
                    basecount_matrix_local1, workflow_prov = yap_tools.qc_basecount(
                        inp1, workflow_prov)
                    basecount_matrix_local2, workflow_prov = yap_tools.qc_basecount(
                        inp2, workflow_prov)
                elif re.search('fastx_clipper', preprocess_cmd_name) is not None:
		    """
		    Check for fastx clipper as special case and execute.
		    This is because fastx clipper execution has been optimized by providing contaminants for every file,
		    instead of just applying contaminants universally. 
		    """ 
                    run_unique_reads = 'True'
                    if input_filename_local in wd.contaminant_dict.keys():
                        contaminants_arr1 = wd.contaminant_dict[
                            input_filename_local]
                        print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                        index = 0
                        for index in range(0, len(contaminants_arr1)):
			    #iterate over all the contaminants for this file 
                            fastx_clipper_cmd = preprocess_cmd
                            contaminant1 = contaminants_arr1[index].strip("\n")
                            if inp1 != '':
                                cont_replace = " -a " + contaminant1
                                fastx_clipper_cmd = fastx_clipper_cmd.replace(
                                    'pipe1', " - ") + " -a " + contaminant1
                                inp1 = yap_tools.multiproc_function(
                                    fastx_clipper_cmd, inp1, int(
                                        wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                                yap_log.merge_multiproc_files(
                                    fastx_clipper_cmd,
                                    input_filename_local,
                                    barcode,
                                    err_chunk_file,
                                    stat_chunk_file)
                            if inp1 == '':
                                break
                        print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                    if wd.paired_end_data == 'yes':
                        if input_filename_local_2 in wd.contaminant_dict.keys():
			    #repeat fastx clipper for the paired end
                            contaminants_arr2 = wd.contaminant_dict[
                                input_filename_local_2]
                            print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
                            index = 0
                            for index in range(0, len(contaminants_arr2)):
                                fastx_clipper_cmd = preprocess_cmd
                                contaminant2 = contaminants_arr2[
                                    index].strip("\n")
                                if inp2 != '':
                                    cont_replace = " -a " + contaminant2
                                    fastx_clipper_cmd = fastx_clipper_cmd.replace(
                                        'pipe1',
                                        " - ") + " -a " + contaminant2
                                    inp2 = yap_tools.multiproc_function(
                                        fastx_clipper_cmd, inp2, int(
                                            wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                                    yap_log.merge_multiproc_files(
                                        fastx_clipper_cmd,
                                        input_filename_local_2,
                                        barcode,
                                        err_chunk_file,
                                        stat_chunk_file)
                                if inp2 == '':
                                    break
                            print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
                elif re.search('eqp_rename_reads',preprocess_cmd_name) != None:
                        # this section renames reads according to specific format, applies to in-house use, neglect otherwise
                        inp1_arr = inp1.splitlines(1)
                        inp1=''
                        inp2_arr = inp2.splitlines(1)
                        inp2=''
                        read_count=1
                        if wd.data_distribution_method == "file_based":
                                if eqp_dict.has_key("eqp_read_counter"):
                                        if len(eqp_dict["eqp_read_counter"]) > 0:
                                                file_name, read_count = eqp_dict["eqp_read_counter"]
                                                if file_name !=  input_filename_local:
                                                        read_count = 1
                        format_lines = int(wd.format_specific_lines)
                        for i in range(0,len(inp1_arr),format_lines):
                                if wd.paired_end_data == 'yes':
                                        if (len(inp1_arr[i+1].strip("\n").replace('A','')) >= 5) and (len(inp2_arr[i+1].strip("\n").replace('A','')) >= 5) and (len(inp1_arr[i+1].strip("\n").replace('T','')) >= 5) and (len(inp2_arr[i+1].strip("\n").replace('T','')) >= 5) :
                                                inp1 += '@F'+str(read_count).zfill(9)+'/1'+'\n'
                                                inp2 += '@F'+str(read_count).zfill(9)+'/2'+'\n'
                                                for jj in range (1,format_lines):
                                                        inp1 += inp1_arr[i+jj]
                                                        inp2 += inp2_arr[i+jj]
                                                read_count += 1
                                else:
                                        if (len(inp1_arr[i+1].strip("\n").replace('A','')) >= 5) and (len(inp1_arr[i+1].strip("\n").replace('T','')) >= 5):
                                                inp1_arr[i] = '@F'+str(read_count).zfill(9)+'/1'+'\n'
                                                for jj in range (1,format_lines):
                                                        inp1 += inp1_arr[i+jj]
                                                read_count += 1
                        eqp_dict["eqp_read_counter"] = [ input_filename_local, read_count]
                        inp1_arr = []
                        inp2_arr = []
                else:
		    #set the flag to remove umatched pair after preprocesing 
                    run_unique_reads = 'True'
                    print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
		    #for all other preprocess commands execute this section
                    if inp1 != '':
                        preprocess_cmd = preprocess_cmd.replace('pipe1', ' - ')
                        inp1 = yap_tools.multiproc_function(
                            preprocess_cmd, inp1, int(
                                wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                        yap_log.merge_multiproc_files(
                            preprocess_cmd,
                            input_filename_local,
                            barcode,
                            err_chunk_file,
                            stat_chunk_file)
                    print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                    if wd.paired_end_data == 'yes':
                        preprocess_cmd = preprocess_cmd.replace('pipe1', ' - ')
                        print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
                        if inp2 != '':
                            inp2 = yap_tools.multiproc_function(
                                preprocess_cmd, inp2, int(
                                    wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                            yap_log.merge_multiproc_files(
                                preprocess_cmd,
                                input_filename_local_2,
                                barcode,
                                err_chunk_file,
                                stat_chunk_file)
                        print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
        if wd.paired_end_data == 'yes':
            if run_unique_reads == 'True':
		#remove all the umatched pairs from two chunks belonging to the same sample
		#this is because each chunk goes through command separately, not as a pair.
                if inp1 != '' and inp2 != '':
                    inp1, inp2 = yap_tools.find_unique_set(
                        inp1.splitlines(1), inp2.splitlines(1))
	if wd.run_preprocess_analysis  == 'yes':
		#write log data
		str_out="-"*20 + "PREPROCESS FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20 + "\n"
		yap_file_io.write_data(str_out, err_chunk_file)
		yap_file_io.write_data(str_out, stat_chunk_file)
        if wd.data_distribution_method != "file_based":
	    #if the workflow is not filebased; then pass the chunks for alignment.
            if wd.run_reference_alignment == 'yes':
		str_out="-"*20 + "ALIGNMENT STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20 + "\n"
		yap_file_io.write_data(str_out, err_chunk_file)
		yap_file_io.write_data(str_out, stat_chunk_file)
                if (wd.paired_end_data == 'yes' and inp1 != '' and inp2 != '') or (wd.paired_end_data != 'yes' and inp1 != ''):
                    print "Entering Alignment: Filename=", input_filename_local, "barcode=", barcode, " chunk number=", chunk_number, "\n"
                    if wd.paired_end_data == 'yes':
                        workflow_prov.append(
                            "INPUT: " +
                            input_filename_local +
                            " and " +
                            input_filename_local_2 +
                            " chunk number= " +
                            str(chunk_number))
                        aligner_out_str, workflow_prov = yap_aligner.run_aligner(
                            inp1, inp2,aligner_output_filename, chunk_number, myrank,workflow_prov, err_chunk_file, stat_chunk_file)
                    else:
                        workflow_prov.append(
                            "INPUT: " +
                            input_filename_local +
                            " chunk number= " +
                            str(chunk_number))
                        aligner_out_str, workflow_prov = yap_aligner.run_aligner(
                            inp1, '', aligner_output_filename, chunk_number,myrank,workflow_prov, err_chunk_file, stat_chunk_file)
                    rm_cmd = "rm " + aligner_output_filename + "*.sai"
                    if len(glob.glob(aligner_output_filename + "*.sai")) > 0:
                        prm = Popen(rm_cmd, shell='False').wait()
                    if len(glob.glob(aligner_output_filename + "*.head")) > 0:
                        prm = Popen(rm_cmd, shell='False').wait()

                else:
                    	print "Exiting Alignment: Filename=", input_filename_local, "barcode=", barcode, " chunk number=", chunk_number, "\n"
		str_out="-"*20 + "ALIGNMENT FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20+ "\n"
		yap_file_io.write_data(str_out, err_chunk_file)
		yap_file_io.write_data(str_out, stat_chunk_file)
            if wd.run_preprocess_analysis == 'yes':
                if wd.write_preprocessed_data == 'yes':
		    #write preprocessed data to file
                    yap_file_io.write_data(inp1, preprocess_out_filename1)
                    if wd.paired_end_data == "yes":
                        yap_file_io.write_data(inp2, preprocess_out_filename2)
                else:
		    #else empty input data chunk
                    inp1 = ''
                    inp2 = ''
            else:
		#else empty input data chunk
                inp1 = ''
                inp2 = ''

        else:
	    #if workflow is filebased; then write preprocessed data to file
            if wd.run_preprocess_analysis == "yes":
                if wd.write_preprocessed_data == 'yes' or wd.run_reference_alignment == "yes":
                    yap_file_io.write_data(inp1, preprocess_out_filename1)
                    if wd.paired_end_data == "yes":
                        yap_file_io.write_data(inp2, preprocess_out_filename2)
        barcode_output_dict[barcode][0] = basecount_matrix_local1
        barcode_output_dict[barcode][1] = basecount_matrix_local2
    return barcode_output_dict, workflow_prov

Example 50

Project: yap
Source File: yap_exe.py
View license
def execute_chunk(
        input_file_list_local,
        inp1,
        inp2,
        chunk_number,
	myrank,
        workflow_prov,
	eqp_dict):
    '''
    Executes preprocess commands for chunked data and passes to the alignment stage
    Takes chunked input data, filename list, chunk number, rank of the processor     
    and provenance list to append log data.
    ''' 
    # variable declaration
    input_filename_local = input_file_list_local[0]
    input_filename_local_2 = input_file_list_local[1]
    file_name = input_file_list_local[2]
    err_chunk_file = wd.err_log_path + "/" + file_name + \
        "_log_temp/" + file_name + "_" + str(chunk_number).zfill(6)
    stat_chunk_file = wd.stat_log_path + "/" + file_name + \
        "_log_temp/" + file_name + "_" + str(chunk_number).zfill(6)
    myhost = os.getenv('HOSTNAME')
    yap_file_io.write_data("HOSTNAME: " + str(myhost) + "\n", err_chunk_file)
    yap_file_io.write_data("HOSTNAME: " + str(myhost) + "\n", stat_chunk_file)
    yap_file_io.write_data("CHUNK NUMBER: " + str(chunk_number) + "\n", err_chunk_file)
    yap_file_io.write_data("CHUNK NUMBER: " + str(chunk_number) + "\n", stat_chunk_file)
    seqs_arr1 = []
    seqs_arr2 = []
    read_length = wd.max_read_length
    barcode_seqstruct_dict1 = {}
    barcode_seqstruct_dict2 = {}
    barcode_output_dict = {}
    aligner_out_str = ''
    sort_order = ''
    barcode_flag = 'False'
    sort_order = wd.alignment_sort_order
    # convert the input data based on format given in workflow configuration
    if wd.input_file_format == "qseq" or wd.input_file_format != wd.preprocess_output_file_format:
        inp1 = yap_tools.convert_format(inp1)
        if wd.paired_end_data == 'yes':
            inp2 = yap_tools.convert_format(inp2)
    if wd.run_preprocess_analysis == 'yes':
	str_out = "-"*20 + "PREPROCESS STARTED" +"\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20 + "\n"
	yap_file_io.write_data(str_out,err_chunk_file)
	yap_file_io.write_data(str_out,stat_chunk_file)
        # Run barcode splitter as first preprocess step
        for jj in range(0, len(wd.preprocess_cmd_arr)):
            preprocess_cmd_name = wd.preprocess_cmd_arr[jj][2][0][0]
            preprocess_cmd = wd.preprocess_cmd_arr[jj][2][0][1]
            if re.search('fastx_barcode_splitter', preprocess_cmd_name) is not None:
                barcode_flag = 'True'
                print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
		str_out= "YAP_COMMAND: " + preprocess_cmd + "\n" + "INPUT FILE: " + input_filename_local
		yap_file_io.write_data(str_out,err_chunk_file)
		yap_file_io.write_data(str_out,stat_chunk_file)
                barcode_seqstruct_dict1, workflow_prov = yap_preprocess.fastx_barcode_splitter(
                    inp1, wd.preprocess_output_file_format, preprocess_cmd, workflow_prov, err_chunk_file, stat_chunk_file)
                yap_file_io.write_data("_" * 30 + "\n", err_chunk_file)
                yap_file_io.write_data("_" * 30 + "\n", stat_chunk_file)
                barcode_seqstruct_dict1["no_barcode_specified"] = ''
                print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                if wd.paired_end_data == 'yes':
                    print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
		    str_out= "YAP_COMMAND: " + preprocess_cmd + "\n" + "INPUT FILE: " + input_filename_local_2
		    yap_file_io.write_data(str_out,err_chunk_file)
		    yap_file_io.write_data(str_out,stat_chunk_file)
                    barcode_seqstruct_dict2, workflow_prov = yap_preprocess.fastx_barcode_splitter(
                        inp2,wd.preprocess_output_file_format , preprocess_cmd, workflow_prov, err_chunk_file, stat_chunk_file)
                    yap_file_io.write_data("_" * 30 + "\n", err_chunk_file)
                    yap_file_io.write_data("_" * 30 + "\n", stat_chunk_file)
                    barcode_seqstruct_dict2["no_barcode_specified"] = ''
                    print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                break
        if barcode_flag == 'False':
            #if no barcode command; then create dictionary with one barcode tag
            barcode_seqstruct_dict1["no_barcode_specified"] = inp1
            barcode_seqstruct_dict2["no_barcode_specified"] = inp2
    else:
        #if no preprocess stage specified; then create dictionary with one barcode tag
        barcode_seqstruct_dict1["no_barcode_specified"] = inp1
        barcode_seqstruct_dict2["no_barcode_specified"] = inp2
    #iterate over the barcode dictionary 
    for barcode, inp1 in barcode_seqstruct_dict1.iteritems():
        run_unique_reads = 'False'
        barcode_value = yap_tools.rename_barcode(barcode)
        if wd.paired_end_data == "yes":
            inp2 = barcode_seqstruct_dict2[barcode]
        preprocessed_data_dict = {}
	#intialize matrix for basecount analysis
        aligner_output_str_local = ''
        basecount_matrix_local1 = numpy.zeros(
            (int(read_length), 5), dtype=numpy.int)
        basecount_matrix_local2 = numpy.zeros(
            (int(read_length), 5), dtype=numpy.int)
        barcode_output_dict.setdefault(barcode, [basecount_matrix_local1, basecount_matrix_local2])
        #set output file paths
        barcode_dir_path = wd.workflow_output_path + "/" + file_name + "/" + barcode
        preprocess_dir_path = barcode_dir_path + "/" + "preprocess_output"
        if wd.data_distribution_method != "file_based":
            if barcode_value != '':
                preprocess_out_filename1 = preprocess_dir_path + "/" + barcode_value + "_" + file_name + \
                    "_" + str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + barcode_value + "_" + file_name + \
                    "_" + str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_2.txt"
            else:
                preprocess_out_filename1 = preprocess_dir_path + "/" + file_name + "_" + \
                    str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + file_name + "_" + \
                    str(chunk_number).zfill(6) + "_" + \
                    str(myrank) + "_preprocessed_data_2.txt"
        else:
            if barcode_value != '':
                preprocess_out_filename1 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + \
                    "_" + barcode_value + "_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + \
                    "_" + barcode_value + "_2.txt"
            else:
                preprocess_out_filename1 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + "_1.txt"
                preprocess_out_filename2 = preprocess_dir_path + "/" + \
                    "preprocess_data" + "_" + file_name + "_2.txt"
        aligner_dir_path = barcode_dir_path + "/" + "aligner_output"
        if barcode_value != '':
            aligner_output_filename = aligner_dir_path + "/" + "aligner_" + \
                file_name + "_" + barcode_value + \
                "_" + str(chunk_number).zfill(6)
        else:
            aligner_output_filename = aligner_dir_path + "/" + \
                "aligner_" + file_name + "_" + str(chunk_number).zfill(6)

        for jj in range(0, len(wd.preprocess_cmd_arr)):
            preprocess_cmd_name = wd.preprocess_cmd_arr[jj][2][0][1]
            preprocess_cmd = wd.preprocess_cmd_arr[jj][2][0][1]
            # skip fastqc and fastq screen and barcode splitter as they are
            # already executed
            if (re.search('fastqc', preprocess_cmd_name) is not None) or (re.search('fastq_screen', preprocess_cmd_name) is not None)or(re.search('fastx_barcode_splitter',
                                                                                                                                                  preprocess_cmd_name) is not None):
                pass
            else:
                if re.search('calculate_basecount_metrics', preprocess_cmd_name) is not None:
		    #excecute basecount calculation
                    basecount_matrix_local1, workflow_prov = yap_tools.qc_basecount(
                        inp1, workflow_prov)
                    basecount_matrix_local2, workflow_prov = yap_tools.qc_basecount(
                        inp2, workflow_prov)
                elif re.search('fastx_clipper', preprocess_cmd_name) is not None:
		    """
		    Check for fastx clipper as special case and execute.
		    This is because fastx clipper execution has been optimized by providing contaminants for every file,
		    instead of just applying contaminants universally. 
		    """ 
                    run_unique_reads = 'True'
                    if input_filename_local in wd.contaminant_dict.keys():
                        contaminants_arr1 = wd.contaminant_dict[
                            input_filename_local]
                        print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                        index = 0
                        for index in range(0, len(contaminants_arr1)):
			    #iterate over all the contaminants for this file 
                            fastx_clipper_cmd = preprocess_cmd
                            contaminant1 = contaminants_arr1[index].strip("\n")
                            if inp1 != '':
                                cont_replace = " -a " + contaminant1
                                fastx_clipper_cmd = fastx_clipper_cmd.replace(
                                    'pipe1', " - ") + " -a " + contaminant1
                                inp1 = yap_tools.multiproc_function(
                                    fastx_clipper_cmd, inp1, int(
                                        wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                                yap_log.merge_multiproc_files(
                                    fastx_clipper_cmd,
                                    input_filename_local,
                                    barcode,
                                    err_chunk_file,
                                    stat_chunk_file)
                            if inp1 == '':
                                break
                        print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                    if wd.paired_end_data == 'yes':
                        if input_filename_local_2 in wd.contaminant_dict.keys():
			    #repeat fastx clipper for the paired end
                            contaminants_arr2 = wd.contaminant_dict[
                                input_filename_local_2]
                            print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
                            index = 0
                            for index in range(0, len(contaminants_arr2)):
                                fastx_clipper_cmd = preprocess_cmd
                                contaminant2 = contaminants_arr2[
                                    index].strip("\n")
                                if inp2 != '':
                                    cont_replace = " -a " + contaminant2
                                    fastx_clipper_cmd = fastx_clipper_cmd.replace(
                                        'pipe1',
                                        " - ") + " -a " + contaminant2
                                    inp2 = yap_tools.multiproc_function(
                                        fastx_clipper_cmd, inp2, int(
                                            wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                                    yap_log.merge_multiproc_files(
                                        fastx_clipper_cmd,
                                        input_filename_local_2,
                                        barcode,
                                        err_chunk_file,
                                        stat_chunk_file)
                                if inp2 == '':
                                    break
                            print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
                elif re.search('eqp_rename_reads',preprocess_cmd_name) != None:
                        # this section renames reads according to specific format, applies to in-house use, neglect otherwise
                        inp1_arr = inp1.splitlines(1)
                        inp1=''
                        inp2_arr = inp2.splitlines(1)
                        inp2=''
                        read_count=1
                        if wd.data_distribution_method == "file_based":
                                if eqp_dict.has_key("eqp_read_counter"):
                                        if len(eqp_dict["eqp_read_counter"]) > 0:
                                                file_name, read_count = eqp_dict["eqp_read_counter"]
                                                if file_name !=  input_filename_local:
                                                        read_count = 1
                        format_lines = int(wd.format_specific_lines)
                        for i in range(0,len(inp1_arr),format_lines):
                                if wd.paired_end_data == 'yes':
                                        if (len(inp1_arr[i+1].strip("\n").replace('A','')) >= 5) and (len(inp2_arr[i+1].strip("\n").replace('A','')) >= 5) and (len(inp1_arr[i+1].strip("\n").replace('T','')) >= 5) and (len(inp2_arr[i+1].strip("\n").replace('T','')) >= 5) :
                                                inp1 += '@F'+str(read_count).zfill(9)+'/1'+'\n'
                                                inp2 += '@F'+str(read_count).zfill(9)+'/2'+'\n'
                                                for jj in range (1,format_lines):
                                                        inp1 += inp1_arr[i+jj]
                                                        inp2 += inp2_arr[i+jj]
                                                read_count += 1
                                else:
                                        if (len(inp1_arr[i+1].strip("\n").replace('A','')) >= 5) and (len(inp1_arr[i+1].strip("\n").replace('T','')) >= 5):
                                                inp1_arr[i] = '@F'+str(read_count).zfill(9)+'/1'+'\n'
                                                for jj in range (1,format_lines):
                                                        inp1 += inp1_arr[i+jj]
                                                read_count += 1
                        eqp_dict["eqp_read_counter"] = [ input_filename_local, read_count]
                        inp1_arr = []
                        inp2_arr = []
                else:
		    #set the flag to remove umatched pair after preprocesing 
                    run_unique_reads = 'True'
                    print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
		    #for all other preprocess commands execute this section
                    if inp1 != '':
                        preprocess_cmd = preprocess_cmd.replace('pipe1', ' - ')
                        inp1 = yap_tools.multiproc_function(
                            preprocess_cmd, inp1, int(
                                wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                        yap_log.merge_multiproc_files(
                            preprocess_cmd,
                            input_filename_local,
                            barcode,
                            err_chunk_file,
                            stat_chunk_file)
                    print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local, " chunk number=", chunk_number, "\n"
                    if wd.paired_end_data == 'yes':
                        preprocess_cmd = preprocess_cmd.replace('pipe1', ' - ')
                        print "Entering " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
                        if inp2 != '':
                            inp2 = yap_tools.multiproc_function(
                                preprocess_cmd, inp2, int(
                                    wd.format_specific_lines), '', err_chunk_file, stat_chunk_file)
                            yap_log.merge_multiproc_files(
                                preprocess_cmd,
                                input_filename_local_2,
                                barcode,
                                err_chunk_file,
                                stat_chunk_file)
                        print "Exiting " + preprocess_cmd_name + " : Filename=", input_filename_local_2, " chunk number=", chunk_number, "\n"
        if wd.paired_end_data == 'yes':
            if run_unique_reads == 'True':
		#remove all the umatched pairs from two chunks belonging to the same sample
		#this is because each chunk goes through command separately, not as a pair.
                if inp1 != '' and inp2 != '':
                    inp1, inp2 = yap_tools.find_unique_set(
                        inp1.splitlines(1), inp2.splitlines(1))
	if wd.run_preprocess_analysis  == 'yes':
		#write log data
		str_out="-"*20 + "PREPROCESS FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20 + "\n"
		yap_file_io.write_data(str_out, err_chunk_file)
		yap_file_io.write_data(str_out, stat_chunk_file)
        if wd.data_distribution_method != "file_based":
	    #if the workflow is not filebased; then pass the chunks for alignment.
            if wd.run_reference_alignment == 'yes':
		str_out="-"*20 + "ALIGNMENT STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20 + "\n"
		yap_file_io.write_data(str_out, err_chunk_file)
		yap_file_io.write_data(str_out, stat_chunk_file)
                if (wd.paired_end_data == 'yes' and inp1 != '' and inp2 != '') or (wd.paired_end_data != 'yes' and inp1 != ''):
                    print "Entering Alignment: Filename=", input_filename_local, "barcode=", barcode, " chunk number=", chunk_number, "\n"
                    if wd.paired_end_data == 'yes':
                        workflow_prov.append(
                            "INPUT: " +
                            input_filename_local +
                            " and " +
                            input_filename_local_2 +
                            " chunk number= " +
                            str(chunk_number))
                        aligner_out_str, workflow_prov = yap_aligner.run_aligner(
                            inp1, inp2,aligner_output_filename, chunk_number, myrank,workflow_prov, err_chunk_file, stat_chunk_file)
                    else:
                        workflow_prov.append(
                            "INPUT: " +
                            input_filename_local +
                            " chunk number= " +
                            str(chunk_number))
                        aligner_out_str, workflow_prov = yap_aligner.run_aligner(
                            inp1, '', aligner_output_filename, chunk_number,myrank,workflow_prov, err_chunk_file, stat_chunk_file)
                    rm_cmd = "rm " + aligner_output_filename + "*.sai"
                    if len(glob.glob(aligner_output_filename + "*.sai")) > 0:
                        prm = Popen(rm_cmd, shell='False').wait()
                    if len(glob.glob(aligner_output_filename + "*.head")) > 0:
                        prm = Popen(rm_cmd, shell='False').wait()

                else:
                    	print "Exiting Alignment: Filename=", input_filename_local, "barcode=", barcode, " chunk number=", chunk_number, "\n"
		str_out="-"*20 + "ALIGNMENT FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "-"*20+ "\n"
		yap_file_io.write_data(str_out, err_chunk_file)
		yap_file_io.write_data(str_out, stat_chunk_file)
            if wd.run_preprocess_analysis == 'yes':
                if wd.write_preprocessed_data == 'yes':
		    #write preprocessed data to file
                    yap_file_io.write_data(inp1, preprocess_out_filename1)
                    if wd.paired_end_data == "yes":
                        yap_file_io.write_data(inp2, preprocess_out_filename2)
                else:
		    #else empty input data chunk
                    inp1 = ''
                    inp2 = ''
            else:
		#else empty input data chunk
                inp1 = ''
                inp2 = ''

        else:
	    #if workflow is filebased; then write preprocessed data to file
            if wd.run_preprocess_analysis == "yes":
                if wd.write_preprocessed_data == 'yes' or wd.run_reference_alignment == "yes":
                    yap_file_io.write_data(inp1, preprocess_out_filename1)
                    if wd.paired_end_data == "yes":
                        yap_file_io.write_data(inp2, preprocess_out_filename2)
        barcode_output_dict[barcode][0] = basecount_matrix_local1
        barcode_output_dict[barcode][1] = basecount_matrix_local2
    return barcode_output_dict, workflow_prov