numpy.float64

Here are the examples of the python api numpy.float64 taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

168 Examples 7

Example 1

Project: Yeppp Source File: sse_add_test.py
    def test_add_V64fV64f_V64f(self):
        a_tmp = self.a.astype(numpy.float64)
        b_tmp = self.b.astype(numpy.float64)
        c = numpy.empty([self.n]).astype(numpy.float64)
        a_ptr = a_tmp.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
        b_ptr = b_tmp.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
        c_ptr = c.ctypes.data_as(ctypes.POINTER(ctypes.c_double))

        func = sse.add.yepCore_Add.yepCore_Add_V64fV64f_V64f.load()
        self.assertEqual(func(a_ptr, b_ptr, c_ptr, self.n), 0)

        for i in range(self.n):
            self.assertEqual(a_tmp[i] + b_tmp[i], c[i])

Example 2

Project: chainer Source File: test_matmul.py
    def setUp(self):
        self.x1 = numpy.random.uniform(.5, 1, (m, k)).astype(numpy.float64)
        self.x2 = numpy.random.uniform(.5, 1, (k, n)).astype(numpy.float64)
        self.gy = numpy.random.uniform(-1, 1, (m, n)).astype(numpy.float64)
        self.op = lambda x, y: F.matmul(x, y)
        self.forward_answer = numpy.dot(self.x1, self.x2)

Example 3

Project: scimath Source File: interpolate.py
def window_average(x, y, new_x, width=10.0):
    bad_index = None
    x = make_array_safe(x, numpy.float64)
    y = make_array_safe(y, numpy.float64)
    new_x = make_array_safe(new_x, numpy.float64)
    width = float(width)
    assert len(y.shape) < 3, "function only works with 1D or 2D arrays"
    if len(y.shape) == 2:
        new_y = numpy.zeros((y.shape[0], len(new_x)), numpy.float64)
        for i in range(len(new_y)):
            _interpolate.window_average_ddddd(x, y[i], new_x, new_y[i],
                                              width)
    else:
        new_y = numpy.zeros(len(new_x), numpy.float64)
        _interpolate.window_average_ddddd(x, y, new_x, new_y, width)

    return new_y

Example 4

Project: RMG-Py Source File: simpleTest.py
Function: test_solve
    def testSolve(self):
        """
        Test the simple batch reactor with a simple kinetic model. Here we
        choose a kinetic model consisting of the hydrogen abstraction reaction
        CH4 + C2H5 <=> CH3 + C2H6.
        """
        CH4 = Species(
            molecule=[Molecule().fromSMILES("C")],
            thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([ 8.615, 9.687,10.963,12.301,14.841,16.976,20.528],"cal/(mol*K)"), H298=(-17.714,"kcal/mol"), S298=(44.472,"cal/(mol*K)"))
            )
        CH3 = Species(
            molecule=[Molecule().fromSMILES("[CH3]")],
            thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([ 9.397,10.123,10.856,11.571,12.899,14.055,16.195],"cal/(mol*K)"), H298=(  9.357,"kcal/mol"), S298=(45.174,"cal/(mol*K)"))
            )
        C2H6 = Species(
            molecule=[Molecule().fromSMILES("CC")],
            thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([12.684,15.506,18.326,20.971,25.500,29.016,34.595],"cal/(mol*K)"), H298=(-19.521,"kcal/mol"), S298=(54.799,"cal/(mol*K)"))
            )
        C2H5 = Species(
            molecule=[Molecule().fromSMILES("C[CH2]")],
            thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([11.635,13.744,16.085,18.246,21.885,24.676,29.107],"cal/(mol*K)"), H298=( 29.496,"kcal/mol"), S298=(56.687,"cal/(mol*K)"))
            )
        

        rxn1 = Reaction(reactants=[C2H6,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(686.375*6,'m^3/(mol*s)'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K')))

        coreSpecies = [CH4,CH3,C2H6,C2H5]
        edgeSpecies = []
        coreReactions = [rxn1]
        edgeReactions = []

        T = 1000; P = 1.0e5
        rxnSystem = SimpleReactor(T, P, initialMoleFractions={C2H5: 0.1, CH3: 0.1, CH4: 0.4, C2H6: 0.4}, termination=[])

        rxnSystem.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)

        tlist = numpy.array([10**(i/10.0) for i in range(-130, -49)], numpy.float64)

        # Integrate to get the solution at each time point
        t = []; y = []; reactionRates = []; speciesRates = []
        for t1 in tlist:
            rxnSystem.advance(t1)
            t.append(rxnSystem.t)
            # You must make a copy of y because it is overwritten by DASSL at
            # each call to advance()
            y.append(rxnSystem.y.copy())
            reactionRates.append(rxnSystem.coreReactionRates.copy())
            speciesRates.append(rxnSystem.coreSpeciesRates.copy())

        # Convert the solution vectors to numpy arrays
        t = numpy.array(t, numpy.float64)
        y = numpy.array(y, numpy.float64)
        reactionRates = numpy.array(reactionRates, numpy.float64)
        speciesRates = numpy.array(speciesRates, numpy.float64)
        V = constants.R * rxnSystem.T.value_si * numpy.sum(y) / rxnSystem.P.value_si       

        # Check that we're computing the species fluxes correctly
        for i in range(t.shape[0]):
            self.assertAlmostEqual(reactionRates[i,0], speciesRates[i,0], delta=1e-6*reactionRates[i,0])
            self.assertAlmostEqual(reactionRates[i,0], -speciesRates[i,1], delta=1e-6*reactionRates[i,0])
            self.assertAlmostEqual(reactionRates[i,0], -speciesRates[i,2], delta=1e-6*reactionRates[i,0])
            self.assertAlmostEqual(reactionRates[i,0], speciesRates[i,3], delta=1e-6*reactionRates[i,0])
        
        # Check that we've reached equilibrium 
        self.assertAlmostEqual(reactionRates[-1,0], 0.0, delta=1e-2)
        
        #######        
        # Unit test for the jacobian function:
        # Solve a reaction system and check if the analytical jacobian matches the finite difference jacobian
        
        H2 = Species(
            molecule=[Molecule().fromSMILES("[H][H]")],
            thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([6.89,6.97,6.99,7.01,7.08,7.22,7.72],"cal/(mol*K)"), H298=( 0,"kcal/mol"), S298=(31.23,"cal/(mol*K)"))
            )
        
        rxnList = []
        rxnList.append(Reaction(reactants=[C2H6], products=[CH3,CH3], kinetics=Arrhenius(A=(686.375*6,'1/s'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K'))))
        rxnList.append(Reaction(reactants=[CH3,CH3], products=[C2H6], kinetics=Arrhenius(A=(686.375*6,'m^3/(mol*s)'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K'))))
        
        rxnList.append(Reaction(reactants=[C2H6,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(46.375*6,'m^3/(mol*s)'), n=3.40721, Ea=(6.82799,'kcal/mol'), T0=(298.15,'K'))))        
        rxnList.append(Reaction(reactants=[C2H5,CH4], products=[C2H6,CH3], kinetics=Arrhenius(A=(46.375*6,'m^3/(mol*s)'), n=3.40721, Ea=(6.82799,'kcal/mol'), T0=(298.15,'K'))))        
        
        rxnList.append(Reaction(reactants=[C2H5,CH4], products=[CH3,CH3,CH3], kinetics=Arrhenius(A=(246.375*6,'m^3/(mol*s)'), n=1.40721, Ea=(3.82799,'kcal/mol'), T0=(298.15,'K'))))       
        rxnList.append(Reaction(reactants=[CH3,CH3,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(246.375*6,'m^6/(mol^2*s)'), n=1.40721, Ea=(3.82799,'kcal/mol'), T0=(298.15,'K'))))#        
        
        rxnList.append(Reaction(reactants=[C2H6,CH3,CH3], products=[C2H5,C2H5,H2], kinetics=Arrhenius(A=(146.375*6,'m^6/(mol^2*s)'), n=2.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
        rxnList.append(Reaction(reactants=[C2H5,C2H5,H2], products=[C2H6,CH3,CH3], kinetics=Arrhenius(A=(146.375*6,'m^6/(mol^2*s)'), n=2.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
        
        rxnList.append(Reaction(reactants=[C2H6,C2H6], products=[CH3,CH4,C2H5], kinetics=Arrhenius(A=(1246.375*6,'m^3/(mol*s)'), n=0.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
        rxnList.append(Reaction(reactants=[CH3,CH4,C2H5], products=[C2H6,C2H6], kinetics=Arrhenius(A=(46.375*6,'m^6/(mol^2*s)'), n=0.10721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
        

        for rxn in rxnList:
            coreSpecies = [CH4,CH3,C2H6,C2H5,H2]
            edgeSpecies = []
            coreReactions = [rxn]
            
            rxnSystem0 = SimpleReactor(T,P,initialMoleFractions={CH4:0.2,CH3:0.1,C2H6:0.35,C2H5:0.15, H2:0.2},termination=[])
            rxnSystem0.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)
            dydt0 = rxnSystem0.residual(0.0, rxnSystem0.y, numpy.zeros(rxnSystem0.y.shape))[0]
            numCoreSpecies = len(coreSpecies)
            dN = .000001*sum(rxnSystem0.y)
            dN_array = dN*numpy.eye(numCoreSpecies)
            
            dydt = []
            for i in range(numCoreSpecies):
                rxnSystem0.y[i] += dN 
                dydt.append(rxnSystem0.residual(0.0, rxnSystem0.y, numpy.zeros(rxnSystem0.y.shape))[0])
                rxnSystem0.y[i] -= dN  # reset y to original y0
            
            # Let the solver compute the jacobian       
            solverJacobian = rxnSystem0.jacobian(0.0, rxnSystem0.y, dydt0, 0.0)     
            # Compute the jacobian using finite differences
            jacobian = numpy.zeros((numCoreSpecies, numCoreSpecies))
            for i in range(numCoreSpecies):
                for j in range(numCoreSpecies):
                    jacobian[i,j] = (dydt[j][i]-dydt0[i])/dN
                    self.assertAlmostEqual(jacobian[i,j], solverJacobian[i,j], delta=abs(1e-4*jacobian[i,j]))
        
        #print 'Solver jacobian'
        #print solverJacobian
        #print 'Numerical jacobian'
        #print jacobian
        
        ###
        # Unit test for the compute rate derivative
        rxnList = []
        rxnList.append(Reaction(reactants=[C2H6], products=[CH3,CH3], kinetics=Arrhenius(A=(686.375e6,'1/s'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K')))) 
        rxnList.append(Reaction(reactants=[C2H6,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(46.375*6,'m^3/(mol*s)'), n=3.40721, Ea=(6.82799,'kcal/mol'), T0=(298.15,'K'))))        
        rxnList.append(Reaction(reactants=[C2H6,CH3,CH3], products=[C2H5,C2H5,H2], kinetics=Arrhenius(A=(146.375*6,'m^6/(mol^2*s)'), n=2.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
        
        
        coreSpecies = [CH4,CH3,C2H6,C2H5,H2]
        edgeSpecies = []
        coreReactions = rxnList
        
        rxnSystem0 = SimpleReactor(T,P,initialMoleFractions={CH4:0.2,CH3:0.1,C2H6:0.35,C2H5:0.15, H2:0.2},termination=[])
        rxnSystem0.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)
        dfdt0 = rxnSystem0.residual(0.0, rxnSystem0.y, numpy.zeros(rxnSystem0.y.shape))[0]
        solver_dfdk = rxnSystem0.computeRateDerivative()
        #print 'Solver d(dy/dt)/dk'
        #print solver_dfdk
        
        integrationTime = 1e-8
        rxnSystem0.termination.append(TerminationTime((integrationTime,'s')))
        rxnSystem0.simulate(coreSpecies, coreReactions, [], [], 0, 1, 0)

        y0 = rxnSystem0.y
        
        dfdk = numpy.zeros((numCoreSpecies,len(rxnList)))   # d(dy/dt)/dk
        
        for i in range(len(rxnList)):
            k0 = rxnList[i].getRateCoefficient(T,P)
            rxnList[i].kinetics.A.value_si = rxnList[i].kinetics.A.value_si*(1+1e-3)               
            dk = rxnList[i].getRateCoefficient(T,P) - k0

            rxnSystem = SimpleReactor(T,P,initialMoleFractions={CH4:0.2,CH3:0.1,C2H6:0.35,C2H5:0.15, H2:0.2},termination=[])
            rxnSystem.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)

            dfdt = rxnSystem.residual(0.0, rxnSystem.y, numpy.zeros(rxnSystem.y.shape))[0]  
            dfdk[:,i]=(dfdt-dfdt0)/dk          
            
            
            rxnSystem.termination.append(TerminationTime((integrationTime,'s')))
            rxnSystem.simulate(coreSpecies, coreReactions, [], [], 0, 1, 0)
            
            rxnList[i].kinetics.A.value_si = rxnList[i].kinetics.A.value_si/(1+1e-3)  # reset A factor
            
        for i in range(numCoreSpecies):
            for j in range(len(rxnList)):
                self.assertAlmostEqual(dfdk[i,j], solver_dfdk[i,j], delta=abs(1e-3*dfdk[i,j]))

Example 5

Project: hifive Source File: get_hic_interval.py
def run(args):
    if not args.image is None and args.pdf and "pyx" not in sys.modules.keys():
        parser.error("-p/--pdf requires the package 'pyx'")
    hic = HiC(args.project, 'r', silent=args.silent)
    if 'binned' in hic.fends['/'].attrs and hic.fends['/'].attrs['binned'] is not None:
        binned = True
        chr_indices = 'bin_indices'
        fends = 'bins'
    else:
        binned = False
        chr_indices = 'chr_indices'
        fends = 'fends'
    if args.stop == 0 or args.stop is None:
        maxstop = hic.fends[fends]['stop'][hic.fends[chr_indices][hic.chr2int[args.chrom] + 1] - 1]
    else:
        maxstop = args.stop
    if args.stop is None:
        args.stop = maxstop
    if args.start is None:
        args.start = hic.fends[fends]['start'][hic.fends[chr_indices][hic.chr2int[args.chrom]]]
    if not args.chrom2 is None:
        if args.stop2 == 0 or args.stop2 is None:
            maxstop2 = hic.fends[fends]['stop'][hic.fends[chr_indices][hic.chr2int[args.chrom2] + 1] - 1]
        else:
            maxstop2 = args.stop2
        if args.stop2 is None:
            args.stop2 = maxstop2
        if args.start2 is None:
            args.start2 = hic.fends[fends]['start'][hic.fends[chr_indices][hic.chr2int[args.chrom2]]]
    else:
        if args.maxdist is None:
            args.maxdist = 0
        if args.maxdist == 0 or args.maxdist >= (maxstop - args.start) / 2:
            arraytype = 'upper'
        else:
            arraytype = 'compact'
    kwargs = {}
    for arg in args.keywords:
        temp = arg.split("=")
        if temp[1] in ["True", "TRUE", "true"]:
            temp[1] = True
        elif temp[1] in ["False", "FALSE", "false"]:
            temp[1] = False
        elif temp[1][0] == "(":
            temp[1] = temp[1].strip('()').split(',')
            for i in range(len(temp[1])):
                temp[1][i] = int(temp[1][i])
            temp[1] = tuple(temp[1])
        elif temp[1][0] == "[":
            temp[1] = temp[1].strip('[]').split(',')
            for i in range(len(temp[1])):
                temp[1][i] = int(temp[1][i])
        else:
            try:
                temp[1] = int(temp[1])
            except:
                # strip off extra characters introduced by galaxy into color format
                temp[1] = temp[1].replace('__pd__','')
        kwargs[temp[0]] = temp[1]
    if not args.chrom2 is None:
        data, mapping1, mapping2 = hic.trans_heatmap(chrom1=args.chrom, chrom2=args.chrom2, binsize=args.binsize,
                                                     start1=args.start, stop1=args.stop, start2=args.start2,
                                                     stop2=args.stop2, datatype=args.datatype,
                                                     maxdistance=args.maxdist, returnmapping=True, skipfiltered=True,
                                                     dynamically_binned=args.dynamic,
                                                     expansion_binsize=args.expbinsize, minobservations=args.minobs,
                                                     searchdistance=args.search, removefailed=args.remove)
    else:
        data, mapping = hic.cis_heatmap(chrom=args.chrom, binsize=args.binsize, start=args.start,
                                        stop=args.stop, datatype=args.datatype, arraytype=arraytype,
                                        maxdistance=args.maxdist, returnmapping=True, skipfiltered=True,
                                        dynamically_binned=args.dynamic, expansion_binsize=args.expbinsize,
                                        minobservations=args.minobs, searchdistance=args.search,
                                        removefailed=args.remove)
    output = open(args.output, 'w')
    if args.matrix:
        if args.chrom2 is None:
            diag = int(hic.binned is not None)
            if arraytype == 'upper':
                temp = numpy.zeros((mapping.shape[0], mapping.shape[0]), dtype=numpy.float64)
                indices = numpy.triu_indices(mapping.shape[0], 1 - diag)
                where = numpy.where(data[:, 1] > 0)[0]
                temp[indices[0][where], indices[1][where]] = data[where, 0] / data[where, 1]
                temp[indices[1][where], indices[0][where]] += data[where, 0] / data[where, 1]
            else:
                temp = numpy.zeros((mapping.shape[0], mapping.shape[0]), dtype=numpy.float64)
                for i in range(mapping.shape[0] - 1 + diag):
                    where = numpy.where(data[i, :, 1] > 0)[0]
                    temp[i, where + i + 1 - diag] = data[i, where, 0] / data[i, where, 1]
                indices = numpy.triu_indices(mapping.shape[0], 1 - diag)
                temp[indices[1], indices[0]] += temp[indices]
        else:
            temp = numpy.zeros((data.shape[0], data.shape[1]), dtype=numpy.float64)
            where = numpy.where(data[:, :, 1] > 0)
            temp[where] = data[where[0], where[1], 0] / data[where[0], where[1], 1]
        if args.datatype == 'raw':
            for i in range(temp.shape[0]):
                tempout = []
                for j in range(temp.shape[1]):
                    tempout.append("%i" % temp[i, j])
                print >> output, '\t'.join(tempout)
        else:
            for i in range(temp.shape[0]):
                tempout = []
                for j in range(temp.shape[1]):
                    tempout.append("%0.6f" % temp[i, j])
                print >> output, '\t'.join(tempout)
    else:
        if args.chrom2 is None:
            diag = binned
            if arraytype == 'upper':
                pos = 0
                for i in range(mapping.shape[0] - 1 + diag):
                    for j in range(i + 1 - diag, mapping.shape[0]):
                        if data[pos, 0] > 0.0 and data[pos, 1] > 0.0:
                            print >> output, "chr%s\t%i\t%i\tchr%s\t%i\t%i\t%f" % (args.chrom, mapping[i, 0],
                                                                           mapping[i, 1], args.chrom,
                                                                           mapping[j, 0], mapping[j, 1],
                                                                           numpy.log2(data[pos, 0] / data[pos, 1]))
                        pos += 1
            else:
                for i in range(mapping.shape[0] - 1 + diag):
                    for pos in range(min(mapping.shape[0] - i - 1 + diag, data.shape[1])):
                        j = i + pos + 1 - diag
                        if data[i, pos, 0] > 0.0 and data[i, pos, 1] > 0.0:
                            print >> output, "chr%s\t%i\t%i\tchr%s\t%i\t%i\t%f" % (args.chrom, mapping[i, 0],
                                                                            mapping[i, 1], args.chrom,
                                                                            mapping[j, 0], mapping[j, 1],
                                                                            numpy.log2(data[i, pos, 0] /
                                                                            data[i, pos, 1]))
        else:
            for i in range(mapping1.shape[0]):
                for j in range(mapping2.shape[0]):
                    if data[i, j, 0] > 0.0 and data[i, j, 1] > 0.0:
                        print >> output, "chr%s\t%i\t%i\tchr%s\t%i\t%i\t%f" % (args.chrom,
                                                                        mapping1[i, 0], mapping1[i, 1],
                                                                        args.chrom2, mapping2[j, 0], mapping2[j, 1],
                                                                        numpy.log2(data[i, j, 0] / data[i, j, 1]))
    output.close()
    if not args.image is None:
        width = max(5.0, (args.stop - args.start) / 1000000.)
        if args.datatype == 'enrichment':
            symmetricscaling = True
        else:
            symmetricscaling = False
        if 'symmetricscaling' in kwargs:
            symmetricscaling = kwargs['symmetricscaling']
        if not args.chrom2 is None:
            img, minscore, maxscore = plot_full_array(data, returnscale=True, symmetricscaling=symmetricscaling,
                                                      silent=args.silent, **kwargs)
            offset = 0.0
            height = (width / data.shape[0]) * data.shape[1]
        elif arraytype == 'compact':
            if args.rotate:
                img, minscore, maxscore = plot_diagonal_from_compact_array(data, returnscale=True,
                                          symmetricscaling=symmetricscaling, silent=args.silent,
                                          diagonal_included=diag, **kwargs)
                offset = width / 2. / (data.shape[0] * 2 - 1 + diag)
                height = width / (data.shape[0] * 2.0 - 2) * data.shape[1]
            else:
                img, minscore, maxscore = plot_compact_array(data, returnscale=True,
                                          symmetricscaling=symmetricscaling, silent=args.silent,
                                          diagonal_included=diag, **kwargs)
                offset = 0.0
                height = width
        else:
            if args.rotate:
                img, minscore, maxscore = plot_diagonal_from_upper_array(data, returnscale=True,
                                          symmetricscaling=symmetricscaling, silent=args.silent,
                                          diagonal_included=diag, **kwargs)
                offset = width / 2. / (mapping.shape[0] * 2 - 1 + diag)
                height = width / 2.
            else:
                img, minscore, maxscore = plot_upper_array(data, returnscale=True,
                                          symmetricscaling=symmetricscaling, silent=args.silent,
                                          diagonal_included=diag, **kwargs)
                offset = 0.0
                height = width
        if args.pdf:
            c = canvas.canvas()
            if args.chrom2 is None:
                c1 = canvas.canvas([canvas.clip(path.rect(0, 0, width, height))])
                c1.insert(bitmap.bitmap(-offset, -offset, img, width=width))
            else:
                c1 = canvas.canvas([canvas.clip(path.rect(0, 0, width, height))])
                c1.insert(bitmap.bitmap(-offset, -offset, img, width=width))
            c.insert(c1)
            if args.ticks and args.binsize > 0:
                if args.chrom2 is None:
                    c.stroke(path.line(0, 0, width, 0))
                    xmin = (mapping[0, 0] + mapping[0, 1]) / 2
                    xmax = (mapping[-1, 0] + mapping[-1, 1]) / 2
                    #order = int(floor(log10(xmax - xmin))) - 1
                    #step = int(floor((xmax - xmin) / (10.0 ** order))) * 10 ** order
                    
                    order = int(floor(log10((xmax - xmin) / (width * 2.0))))
                    step = int(floor((xmax - xmin) / (width * 2.0) / (10.0 ** order))) * 10 ** order
                    values = numpy.arange(((xmin - 1) / step + 1) * step, (xmax / step) * step + 1, step)
                    ticks = (values - float(mapping[0, 0] + mapping[0, 1]) / 2) / (mapping[-1, 0] -
                                                                                   mapping[0, 0]) * width
                    for i in range(values.shape[0]):
                        c.stroke(path.line(ticks[i], 0, ticks[i], -0.25), [style.linewidth.Thin])
                        c.text(ticks[i], -0.3, "%0.2e" % values[i],
                               [text.valign.middle, text.halign.left, text.size(-2), trafo.rotate(-90)])
                    if not args.rotate:
                        c.stroke(path.line(width, 0, width, height))
                        for i in range(values.shape[0]):
                            c.stroke(path.line(width, height - ticks[i], width + 0.25, height - ticks[i]), [style.linewidth.Thin])
                            c.text(width + 0.3, height - ticks[i], "%0.2e" % values[i], [text.valign.middle, text.halign.left,
                                                                              text.size(-2)])
                else:
                    c.stroke(path.line(0, 0, width, 0))
                    xmin = (mapping1[0, 0] + mapping1[0, 1]) / 2
                    xmax = (mapping1[-1, 0] + mapping1[-1, 1]) / 2
                    order = int(floor(log10((xmax - xmin) / (width * 2.0))))
                    step = int(floor((xmax - xmin) / (width * 2.0) / (10.0 ** order))) * 10 ** order
                    values = numpy.arange(((xmin - 1) / step + 1) * step, (xmax / step) * step + 1, step)
                    ticks = (values - float(mapping1[0, 0] + mapping1[0, 1]) / 2) / (mapping1[-1, 0] -
                                                                                     mapping1[0, 0]) * width
                    for i in range(values.shape[0]):
                        c.stroke(path.line(ticks[i], 0, ticks[i], -0.25), [style.linewidth.Thin])
                        c.text(ticks[i], -0.3, "%0.2e" % values[i],
                               [text.valign.middle, text.halign.left, text.size(-2), trafo.rotate(-90)])
                    c.stroke(path.line(0, 0, width, 0))
                    xmin = (mapping2[0, 0] + mapping2[0, 1]) / 2
                    xmax = (mapping2[-1, 0] + mapping2[-1, 1]) / 2
                    order = int(floor(log10((xmax - xmin) / (width * 2.0))))
                    step = int(floor((xmax - xmin) / (width * 2.0) / (10.0 ** order))) * 10 ** order
                    values = numpy.arange(((xmin - 1) / step + 1) * step, (xmax / step) * step + 1, step)
                    ticks = (values - float(mapping2[0, 0] + mapping2[0, 1]) / 2) / (mapping2[-1, 0] -
                                                                                     mapping2[0, 0]) * height
                    for i in range(values.shape[0]):
                        c.stroke(path.line(width, height - ticks[i], width + 0.25, height - ticks[i]), [style.linewidth.Thin])
                        c.text(width + 0.3, height - ticks[i], "%0.2e" % values[i],
                               [text.valign.middle, text.halign.left, text.size(-2)])
            if args.legend:
                if 'min_color' in kwargs:
                    min_color = kwargs['min_color']
                else:
                    min_color = "0000ff"
                if 'mid_color' in kwargs:
                    mid_color = kwargs['mid_color']
                else:
                    mid_color = "ffffff"
                if 'max_color' in kwargs:
                    max_color = kwargs['max_color']
                else:
                    max_color = "ff0000"
                if 'logged' in kwargs:
                    logged = kwargs['logged']
                else:
                    logged = True
                c.insert(plot_key(min_score=minscore, max_score=maxscore, height=0.25, width=min(5., width),
                                  orientation='top', num_ticks=5, min_color=min_color,
                                  mid_color=mid_color, max_color=max_color,
                                  log_display=False), [trafo.translate(width * 0.5 - min(2.5, width * 0.5), height + 0.25)])
                if logged:
                    label = "Log2 "
                else:
                    label = ""
                if args.datatype == 'enrichment':
                    c.text(width * 0.5, height + 0.8, "%sEnrichment" % label, [text.halign.center, text.valign.bottom,
                                                                       text.size(-2)])
                elif args.datatype == 'raw':
                    c.text(width * 0.5, height + 0.8, "%sCounts" % label, [text.halign.center, text.valign.bottom,
                                                                   text.size(-2)])
                else:
                    c.text(width * 0.5, height + 0.8, "%sNormalized Counts" % label,
                           [text.halign.center, text.valign.bottom, text.size(-2)])

            c.writePDFfile(args.image)
            if len(args.image.split('.')) <= 1 or args.image.split('.')[-1] != 'pdf':
                subprocess.call('mv %s.pdf %s' % (args.image, args.image), shell=True)

        else:
            img_format = args.image.split('.')[-1].upper()
            if img_format not in ['PNG', 'TIF', 'JPG', 'JPEG']:
                img_format = 'PNG'
            img.save(args.image, img_format)

Example 6

Project: polar2grid Source File: ll2cr.py
Function: ll2cr
def ll2cr(lon_arr, lat_arr, grid_info, fill_in=numpy.nan, inplace=True):
    """Project longitude and latitude points to columns and rows in the specified grid in place

    :param lon_arr: Numpy array of longitude floats
    :param lat_arr: Numpy array of latitude floats
    :param grid_info: dictionary of grid information (see below)
    :param fill_in: Fill value for input longitude and latitude arrays and used for output
    :returns: tuple(points_in_grid, cols_out, rows_out)

    The provided grid info must have the following parameters (optional grids mean dynamic):

        - proj4_definition
        - cell_width
        - cell_height
        - width (optional/None)
        - height (optional/None)
        - origin_x (optional/None)
        - origin_y (optional/None)

    Steps taken in this function:

        1. Convert (lon, lat) points to (X, Y) points in the projection space
        2. If grid is missing some parameters (dynamic grid), then fill them in
        3. Convert (X, Y) points to (column, row) points in the grid space
    """
    p = grid_info["proj4_definition"]
    cw = grid_info["cell_width"]
    ch = grid_info["cell_height"]
    w = grid_info.get("width", None)
    h = grid_info.get("height", None)
    ox = grid_info.get("origin_x", None)
    oy = grid_info.get("origin_y", None)
    is_static = None not in [w, h, ox, oy]

    # C code requires 64-bit floats and to do in place it must be writeable, otherwise we need to make a copy
    is_double = lon_arr.dtype == numpy.float64 and lon_arr.dtype == numpy.float64
    is_writeable = lon_arr.flags.writeable and lat_arr.flags.writeable
    lon_orig = lon_arr
    lat_orig = lat_arr
    copy_result = False
    # XXX: Check that automatic casting can't be done on the C side
    if not is_double or not is_writeable:
        LOG.debug("Copying longitude and latitude arrays because inplace processing could not be done")
        copy_result = True
        lon_arr = lon_arr.astype(numpy.float64)
        lat_arr = lat_arr.astype(numpy.float64)

    if is_static:
        LOG.debug("Running static version of ll2cr...")
        points_in_grid = _ll2cr.ll2cr_static(lon_arr, lat_arr, fill_in, p, cw, ch, w, h, ox, oy)
    else:
        LOG.debug("Running dynamic version of ll2cr...")
        results = _ll2cr.ll2cr_dynamic(lon_arr, lat_arr, fill_in, p, cw, ch,
                                    width=w, height=h, origin_x=ox, origin_y=oy)
        points_in_grid, lon_arr, lat_arr, origin_x, origin_y, width, height = results
        # edit the grid info dictionary in place
        grid_info["origin_x"] = origin_x
        grid_info["origin_y"] = origin_y
        grid_info["width"] = width
        grid_info["height"] = height

    if copy_result and inplace:
        LOG.debug("Copying result arrays back to provided inplace array")
        lon_orig[:] = lon_arr[:]
        lat_orig[:] = lat_arr[:]

    return points_in_grid, lon_orig, lat_orig

Example 7

Project: westpa Source File: weed_driver.py
    def prepare_new_iteration(self):
        n_iter = self.sim_manager.n_iter
        we_driver = self.sim_manager.we_driver
        
        if we_driver.target_states and self.do_reweight:
            log.warning('equilibrium reweighting requested but target states (sinks) present; reweighting disabled')
            return 

        if not self.do_reweight:
            # Reweighting not requested
            log.debug('equilibrium reweighting not enabled') 
            return

        with self.data_manager.lock:
            weed_global_group = self.data_manager.we_h5file.require_group('weed')
            last_reweighting = long(weed_global_group.attrs.get('last_reweighting', 0))

        if n_iter - last_reweighting < self.reweight_period:
            # Not time to reweight yet
            log.debug('not reweighting')
            return
        else:
            log.debug('reweighting')

        mapper = we_driver.bin_mapper
        bins = we_driver.next_iter_binning
        n_bins = len(bins)

        # Create storage for ourselves
        with self.data_manager.lock:
            iter_group = self.data_manager.get_iter_group(n_iter)
            try:
                del iter_group['weed']
            except KeyError:
                pass

            weed_iter_group = iter_group.create_group('weed')
            avg_populations_ds = weed_iter_group.create_dataset('avg_populations', shape=(n_bins,), dtype=numpy.float64)
            unc_populations_ds = weed_iter_group.create_dataset('unc_populations', shape=(n_bins,), dtype=numpy.float64)
            avg_flux_ds = weed_iter_group.create_dataset('avg_fluxes', shape=(n_bins,n_bins), dtype=numpy.float64)
            unc_flux_ds = weed_iter_group.create_dataset('unc_fluxes', shape=(n_bins,n_bins), dtype=numpy.float64)
            avg_rates_ds = weed_iter_group.create_dataset('avg_rates', shape=(n_bins,n_bins), dtype=numpy.float64)
            unc_rates_ds = weed_iter_group.create_dataset('unc_rates', shape=(n_bins,n_bins), dtype=numpy.float64)

        averager = self.get_rates(n_iter, mapper)

        with self.data_manager.flushing_lock():
            avg_populations_ds[...] = averager.average_populations
            unc_populations_ds[...] = averager.stderr_populations
            avg_flux_ds[...] = averager.average_flux
            unc_flux_ds[...] = averager.stderr_flux
            avg_rates_ds[...] = averager.average_rate
            unc_rates_ds[...] = averager.stderr_rate

            binprobs = numpy.fromiter(imap(operator.attrgetter('weight'),bins), dtype=numpy.float64, count=n_bins)
            orig_binprobs = binprobs.copy()

        westpa.rc.pstatus('Calculating equilibrium reweighting using window size of {:d}'.format(self.eff_windowsize))
        westpa.rc.pstatus('\nBin probabilities prior to reweighting:\n{!s}'.format(binprobs))
        westpa.rc.pflush()

        probAdjustEquil(binprobs, averager.average_rate, averager.stderr_rate)
        
        # Check to see if reweighting has set non-zero bins to zero probability (should never happen)
        assert (~((orig_binprobs > 0) & (binprobs == 0))).all(), 'populated bin reweighted to zero probability'
        
        # Check to see if reweighting has set zero bins to nonzero probability (may happen)
        z2nz_mask = (orig_binprobs == 0) & (binprobs > 0) 
        if (z2nz_mask).any():
            westpa.rc.pstatus('Reweighting would assign nonzero probability to an empty bin; not reweighting this iteration.')
            westpa.rc.pstatus('Empty bins assigned nonzero probability: {!s}.'
                                .format(numpy.array_str(numpy.arange(n_bins)[z2nz_mask])))
        else:
            westpa.rc.pstatus('\nBin populations after reweighting:\n{!s}'.format(binprobs))
            for (bin, newprob) in izip(bins, binprobs):
                bin.reweight(newprob)

            weed_global_group.attrs['last_reweighting'] = n_iter

        assert (abs(1 - numpy.fromiter(imap(operator.attrgetter('weight'),bins), dtype=numpy.float64, count=n_bins).sum())
                < EPS * numpy.fromiter(imap(len,bins), dtype=numpy.int, count=n_bins).sum())

        westpa.rc.pflush()

Example 8

Project: golismero Source File: em.py
    def cluster_vectorspace(self, vectors, trace=False):
        assert len(vectors) > 0

        # set the parameters to initial values
        dimensions = len(vectors[0])
        means = self._means
        priors = self._priors
        if not priors:
            priors = self._priors = numpy.ones(self._num_clusters,
                                        numpy.float64) / self._num_clusters
        covariances = self._covariance_matrices
        if not covariances:
            covariances = self._covariance_matrices = \
                [ numpy.identity(dimensions, numpy.float64)
                  for i in range(self._num_clusters) ]

        # do the E and M steps until the likelihood plateaus
        lastl = self._loglikelihood(vectors, priors, means, covariances)
        converged = False

        while not converged:
            if trace: print 'iteration; loglikelihood', lastl
            # E-step, calculate hidden variables, h[i,j]
            h = numpy.zeros((len(vectors), self._num_clusters),
                numpy.float64)
            for i in range(len(vectors)):
                for j in range(self._num_clusters):
                    h[i,j] = priors[j] * self._gaussian(means[j],
                                               covariances[j], vectors[i])
                h[i,:] /= sum(h[i,:])

            # M-step, update parameters - cvm, p, mean
            for j in range(self._num_clusters):
                covariance_before = covariances[j]
                new_covariance = numpy.zeros((dimensions, dimensions),
                            numpy.float64)
                new_mean = numpy.zeros(dimensions, numpy.float64)
                sum_hj = 0.0
                for i in range(len(vectors)):
                    delta = vectors[i] - means[j]
                    new_covariance += h[i,j] * \
                        numpy.multiply.outer(delta, delta)
                    sum_hj += h[i,j]
                    new_mean += h[i,j] * vectors[i]
                covariances[j] = new_covariance / sum_hj
                means[j] = new_mean / sum_hj
                priors[j] = sum_hj / len(vectors)

                # bias term to stop covariance matrix being singular
                covariances[j] += self._bias * \
                    numpy.identity(dimensions, numpy.float64)

            # calculate likelihood - FIXME: may be broken
            l = self._loglikelihood(vectors, priors, means, covariances)

            # check for convergence
            if abs(lastl - l) < self._conv_threshold:
                converged = True
            lastl = l

Example 9

Project: RMG-Py Source File: network.py
    def calculateMicrocanonicalRates(self):
        """
        Calculate and return arrays containing the microcanonical rate
        coefficients :math:`k(E)` for the isomerization, dissociation, and
        association path reactions in the network.
        """

        T = self.T
        Elist = self.Elist
        Jlist = self.Jlist
        densStates = self.densStates
        Ngrains = len(Elist)
        Nisom = len(self.isomers)
        Nreac = len(self.reactants)
        Nprod = len(self.products)
        NJ = 1 if self.activeJRotor else len(Jlist)

        self.Kij = numpy.zeros([Nisom,Nisom,Ngrains,NJ], numpy.float64)
        self.Gnj = numpy.zeros([Nreac+Nprod,Nisom,Ngrains,NJ], numpy.float64)
        self.Fim = numpy.zeros([Nisom,Nreac,Ngrains,NJ], numpy.float64)

        isomers = [isomer.species[0] for isomer in self.isomers]
        reactants = [reactant.species for reactant in self.reactants]
        products = [product.species for product in self.products]

        for rxn in self.pathReactions:
            if rxn.reactants[0] in isomers and rxn.products[0] in isomers:
                # Isomerization
                reac = isomers.index(rxn.reactants[0])
                prod = isomers.index(rxn.products[0])
            elif rxn.reactants[0] in isomers and rxn.products in reactants:
                # Dissociation (reversible)
                reac = isomers.index(rxn.reactants[0])
                prod = reactants.index(rxn.products) + Nisom
            elif rxn.reactants[0] in isomers and rxn.products in products:
                # Dissociation (irreversible)
                reac = isomers.index(rxn.reactants[0])
                prod = products.index(rxn.products) + Nisom + Nreac
            elif rxn.reactants in reactants and rxn.products[0] in isomers:
                # Association (reversible)
                reac = reactants.index(rxn.reactants) + Nisom
                prod = isomers.index(rxn.products[0])
            elif rxn.reactants in products and rxn.products[0] in isomers:
                # Association (irreversible)
                reac = products.index(rxn.reactants) + Nisom + Nreac
                prod = isomers.index(rxn.products[0])
            else:
                raise NetworkError('Unexpected type of path reaction "{0}"'.format(rxn))
        
            # Compute the microcanonical rate coefficient k(E)
            reacDensStates = densStates[reac,:,:]
            prodDensStates = densStates[prod,:,:]
            kf, kr = rxn.calculateMicrocanonicalRateCoefficient(self.Elist, self.Jlist, reacDensStates, prodDensStates, T)
                        
            # Check for NaN (just to be safe)
            if numpy.isnan(kf).any() or numpy.isnan(kr).any():
                raise NetworkError('One or more k(E) values is NaN for path reaction "{0}".'.format(rxn))

            # Determine the expected value of the rate coefficient k(T)
            if rxn.canTST():
                # RRKM theory was used to compute k(E), so use TST to compute k(T)
                kf_expected = rxn.calculateTSTRateCoefficient(T)
            else:
                # ILT was used to compute k(E), so use high-P kinetics to compute k(T)
                kf_expected = rxn.kinetics.getRateCoefficient(T)
            
            # Determine the expected value of the equilibrium constant (Kc)
            Keq_expected = self.eqRatios[prod] / self.eqRatios[reac] 

            # Determine the actual values of k(T) and Keq
            C0 = 1e5 / (constants.R * T)
            kf0 = 0.0; kr0 = 0.0; Qreac = 0.0; Qprod = 0.0
            for s in range(NJ):
                kf0 += numpy.sum(kf[:,s] * reacDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T)) 
                kr0 += numpy.sum(kr[:,s] * prodDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T)) 
                Qreac += numpy.sum(reacDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T)) 
                Qprod += numpy.sum(prodDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T)) 
            kr0 *= C0 ** (len(rxn.products) - len(rxn.reactants))
            Qprod *= C0 ** (len(rxn.products) - len(rxn.reactants))
            kf_actual = kf0 / Qreac if Qreac > 0 else 0
            kr_actual = kr0 / Qprod if Qprod > 0 else 0
            Keq_actual = kf_actual / kr_actual if kr_actual > 0 else 0

            error = False; warning = False
            k_ratio = 1.0
            Keq_ratio = 1.0
            # Check that the forward rate coefficient is correct
            if kf_actual > 0:
                k_ratio = kf_expected / kf_actual
                # Rescale kf and kr so that we get kf_expected
                kf *= k_ratio
                kr *= k_ratio
                # Decide if the disagreement warrants a warning or error
                if 0.8 < k_ratio < 1.25:
                    # The difference is probably just due to numerical error
                    pass
                elif 0.5 < k_ratio < 2.0:
                    # Might be numerical error, but is pretty large, so warn
                    warning = True
                else:
                    # Disagreement is too large, so raise exception
                    error = True
                    
            # Check that the equilibrium constant is correct
            if Keq_actual > 0:
                Keq_ratio = Keq_expected / Keq_actual
                # Rescale kr so that we get Keq_expected
                kr /= Keq_ratio
                # In RMG jobs this never represents an error because we are
                # missing or using approximate degrees of freedom anyway
                if self.rmgmode:
                    pass
                # Decide if the disagreement warrants a warning or error
                elif 0.8 < Keq_ratio < 1.25:
                    # The difference is probably just due to numerical error
                    pass
                elif 0.5 < Keq_ratio < 2.0:
                    # Might be numerical error, but is pretty large, so warn
                    warning = True
                else:
                    # Disagreement is too large, so raise exception
                    error = True
                               
            if rxn.reactants[0] in isomers and rxn.products[0] in isomers:
                # Isomerization
                self.Kij[prod,reac,:,:] = kf
                self.Kij[reac,prod,:,:] = kr
            elif rxn.reactants[0] in isomers and rxn.products in reactants:
                # Dissociation (reversible)
                self.Gnj[prod-Nisom,reac,:,:] = kf
                self.Fim[reac,prod-Nisom,:,:] = kr
            elif rxn.reactants[0] in isomers and rxn.products in products:
                # Dissociation (irreversible)
                self.Gnj[prod-Nisom,reac,:,:] = kf
            elif rxn.reactants in reactants and rxn.products[0] in isomers:
                # Association (reversible)
                self.Fim[prod,reac-Nisom,:,:] = kf 
                self.Gnj[reac-Nisom,prod,:,:] = kr
            elif rxn.reactants in products and rxn.products[0] in isomers:
                # Association (irreversible)
                self.Gnj[reac-Nisom,prod,:,:] = kr
            else:
                raise NetworkError('Unexpected type of path reaction "{0}"'.format(rxn))

            # If the k(E) values are invalid (in that they give the wrong 
            # kf(T) or kr(T) when integrated), then raise an exception
            if error or warning:
                logging.warning('For path reaction {0!s}:'.format(rxn))
                logging.warning('    Expected kf({0:g} K) = {1:g}'.format(T, kf_expected))
                logging.warning('      Actual kf({0:g} K) = {1:g}'.format(T, kf_actual))
                logging.warning('    Expected Keq({0:g} K) = {1:g}'.format(T, Keq_expected))
                logging.warning('      Actual Keq({0:g} K) = {1:g}'.format(T, Keq_actual))
                if error:
                    raise InvalidMicrocanonicalRateError('Invalid k(E) values computed for path reaction "{0}".'.format(rxn), k_ratio, Keq_ratio)
                else:
                    logging.warning('Significant corrections to k(E) to be consistent with high-pressure limit for path reaction "{0}".'.format(rxn))

#        import pylab
#        for prod in range(Nisom):
#            for reac in range(prod):
#                pylab.semilogy(self.Elist*0.001, self.Kij[prod,reac,:])
#        for prod in range(Nreac+Nprod):
#            for reac in range(Nisom):
#                pylab.semilogy(self.Elist*0.001, self.Gnj[prod,reac,:])
#        pylab.show()

        return self.Kij, self.Gnj, self.Fim

Example 10

Project: pyscf Source File: numpy_helper.py
def zdot(a, b, alpha=1, c=None, beta=0):
    '''Matrix-matrix multiplication for double complex arrays using Gauss's
    complex multiplication algorithm
    '''
    atype = a.dtype
    btype = b.dtype

    if atype == numpy.float64 and btype == numpy.float64:
        if c is None or c.dtype == numpy.float64:
            return ddot(a, b, alpha, c, beta)
        else:
            cr = numpy.asarray(c.real, order='C')
            c.real = ddot(a, b, alpha, cr, beta)
            return c

    if atype == numpy.float64 and btype == numpy.complex128:
        br = numpy.asarray(b.real, order='C')
        bi = numpy.asarray(b.imag, order='C')
        cr = ddot(a, br, alpha)
        ci = ddot(a, bi, alpha)
        ab = cr + ci*1j

    elif atype == numpy.complex128 and btype == numpy.float64:
        ar = numpy.asarray(a.real, order='C')
        ai = numpy.asarray(a.imag, order='C')
        cr = ddot(ar, b, alpha)
        ci = ddot(ai, b, alpha)
        ab = cr + ci*1j

    elif atype == numpy.complex128 and btype == numpy.complex128:
        k1 = ddot(a.real+a.imag, b.real.copy(), alpha)
        k2 = ddot(a.real.copy(), b.imag-b.real, alpha)
        k3 = ddot(a.imag.copy(), b.real+b.imag, alpha)
        ab = k1-k3 + (k1+k2)*1j

    else:
        ab = numpy.dot(a, b) * alpha

    if c is None:
        c = ab
    else:
        if beta == 0:
            c[:] = 0
        else:
            c *= beta
        c += ab
    return c

Example 11

Project: pymbar Source File: harmonic-oscillators.py
def generate_pmf_data(ndim=1, nbinsperdim=15, nsamples = 1000, K0=20.0, Ku = 100.0, gridscale=0.2, xrange = [[-3,3]]):

  x0 = numpy.zeros([ndim], numpy.float64) # center of base potential
  numbrellas = 1
  nperdim = numpy.zeros([ndim],int)
  for d in range(ndim):
    nperdim[d] = xrange[d][1] - xrange[d][0] + 1
    numbrellas *= nperdim[d]

  print "There are a total of %d umbrellas." % numbrellas

  # Enumerate umbrella centers, and compute the analytical free energy of that umbrella
  print "Constructing umbrellas..."
  ksum = (Ku+K0)/beta
  kprod = (Ku*K0)/(beta*beta)
  f_k_analytical = numpy.zeros(numbrellas, numpy.float64);
  xu_i = numpy.zeros([numbrellas, ndim], numpy.float64) # xu_i[i,:] is the center of umbrella i

  dp = numpy.zeros(ndim,int)
  dp[0] = 1
  for d in range(1,ndim):
    dp[d] = nperdim[d]*dp[d-1]

  umbrella_zero = 0
  for i in range(numbrellas):
    center = []
    for d in range(ndim):
      val = gridscale*((i/dp[d]) % nperdim[d] + xrange[d][0])
      center.append(val)
    center = numpy.array(center)
    xu_i[i,:] = center
    mu2 = numpy.dot(center,center)
    f_k_analytical[i] = numpy.log((ndim*numpy.pi/ksum)**(3/2) *numpy.exp(-kprod*mu2/(2*ksum)))
    if numpy.all(center==0.0):  # assumes that we have one state that is at the zero.
      umbrella_zero = i
    i += 1
    f_k_analytical -= f_k_analytical[umbrella_zero]

  print "Generating %d samples for each of %d umbrellas..." % (nsamples, numbrellas)
  x_n = numpy.zeros([numbrellas * nsamples, ndim], numpy.float64)

  for i in range(numbrellas):
    for dim in range(ndim):
      # Compute mu and sigma for this dimension for sampling from V0(x) + Vu(x).
      # Product of Gaussians: N(x ; a, A) N(x ; b, B) = N(a ; b , A+B) x N(x ; c, C) where
      # C = 1/(1/A + 1/B)
      # c = C(a/A+b/B)
      # A = 1/K0, B = 1/Ku
      sigma = 1.0 / (K0 + Ku)
      mu = sigma * (x0[dim]*K0 + xu_i[i,dim]*Ku)
      # Generate normal deviates for this dimension.
      x_n[i*nsamples:(i+1)*nsamples,dim] = numpy.random.normal(mu, numpy.sqrt(sigma), [nsamples])

  u_kn = numpy.zeros([numbrellas, nsamples*numbrellas], numpy.float64)
  # Compute reduced potential due to V0.
  u_n = beta*(K0/2)*numpy.sum((x_n[:,:] - x0)**2, axis=1)
  for k in range(numbrellas):
    uu = beta*(Ku/2)*numpy.sum((x_n[:,:] - xu_i[k,:])**2, axis=1) # reduced potential due to umbrella k
    u_kn[k,:] = u_n + uu

  return u_kn, u_n, x_n, f_k_analytical

Example 12

Project: hedge Source File: diff_shared_fld.py
    @memoize_method
    def get_kernel(self, diff_op, elgroup, for_benchmark=False):
        from cgen import \
                Pointer, POD, Value, ArrayOf, Const, \
                Module, FunctionDeclaration, FunctionBody, Block, \
                Comment, Line, Define, Include, \
                Initializer, If, For, Statement, Assign

        from pycuda.tools import dtype_to_ctype
        from cgen.cuda import CudaShared, CudaGlobal

        discr = self.discr
        d = discr.dimensions
        dims = range(d)
        plan = self.plan
        given = plan.given

        elgroup, = discr.element_groups
        float_type = given.float_type

        f_decl = CudaGlobal(FunctionDeclaration(Value("void", "apply_diff_mat_smem"),
            [Pointer(POD(float_type, "debugbuf")), Pointer(POD(float_type, "field")), ]
            + [Pointer(POD(float_type, "drst%d_global" % i)) for i in dims]
            ))

        par = plan.parallelism

        cmod = Module([
                Include("pycuda-helpers.hpp"),
                ])

        if float_type == numpy.float64:
            cmod.append(Value("texture<fp_tex_double, 1, cudaReadModeElementType>",
                    "diff_rst_mat_tex"))
        elif float_type == numpy.float32:
            rst_channels = given.devdata.make_valid_tex_channel_count(d)
            cmod.append(Value("texture<float%d, 1, cudaReadModeElementType>"
                    % rst_channels, "diff_rst_mat_tex"))
        else:
            raise ValueError("unsupported float type: %s" % float_type)

        # only preimage size variation is supported here
        assert plan.image_dofs_per_el == given.dofs_per_el()
        assert plan.aligned_image_dofs_per_microblock == given.microblock.aligned_floats

        # FIXME: aligned_image_dofs_per_microblock must be divisible
        # by this, therefore hardcoding for now.
        chunk_size = 16

        cmod.extend([
                Line(),
                Define("DIMENSIONS", discr.dimensions),

                Define("IMAGE_DOFS_PER_EL", plan.image_dofs_per_el),
                Define("PREIMAGE_DOFS_PER_EL", plan.preimage_dofs_per_el),
                Define("ALIGNED_IMAGE_DOFS_PER_MB", plan.aligned_image_dofs_per_microblock),
                Define("ALIGNED_PREIMAGE_DOFS_PER_MB", plan.aligned_preimage_dofs_per_microblock),
                Define("ELS_PER_MB", given.microblock.elements),
                Define("IMAGE_DOFS_PER_MB", "(IMAGE_DOFS_PER_EL*ELS_PER_MB)"),
                Line(),
                Define("CHUNK_SIZE", chunk_size),
                Define("CHUNK_DOF", "threadIdx.x"),
                Define("PAR_MB_NR", "threadIdx.y"),
                Define("CHUNK_NR", "threadIdx.z"),
                Define("IMAGE_MB_DOF", "(CHUNK_NR*CHUNK_SIZE+CHUNK_DOF)"),
                Define("IMAGE_EL_DOF", "(IMAGE_MB_DOF - mb_el*IMAGE_DOFS_PER_EL)"),
                Line(),
                Define("MACROBLOCK_NR", "blockIdx.x"),
                Line(),
                Define("PAR_MB_COUNT", par.parallel),
                Define("INLINE_MB_COUNT", par.inline),
                Define("SEQ_MB_COUNT", par.serial),
                Line(),
                Define("GLOBAL_MB_NR_BASE",
                    "(MACROBLOCK_NR*PAR_MB_COUNT*INLINE_MB_COUNT*SEQ_MB_COUNT)"),
                Define("GLOBAL_MB_NR",
                    "(GLOBAL_MB_NR_BASE"
                    "+ (seq_mb_number*PAR_MB_COUNT + PAR_MB_NR)*INLINE_MB_COUNT)"),
                Define("GLOBAL_MB_IMAGE_DOF_BASE", "(GLOBAL_MB_NR*ALIGNED_IMAGE_DOFS_PER_MB)"),
                Define("GLOBAL_MB_PREIMAGE_DOF_BASE", "(GLOBAL_MB_NR*ALIGNED_PREIMAGE_DOFS_PER_MB)"),
                Line(),
                CudaShared(
                    ArrayOf(
                        ArrayOf(
                            ArrayOf(
                                POD(float_type, "smem_field"),
                                "PAR_MB_COUNT"),
                            "INLINE_MB_COUNT"),
                        "ALIGNED_PREIMAGE_DOFS_PER_MB")),
                Line(),
                ])

        S = Statement
        f_body = Block([
            Initializer(Const(POD(numpy.uint16, "mb_el")),
                "IMAGE_MB_DOF / IMAGE_DOFS_PER_EL"),
            Line(),
            ])

        # ---------------------------------------------------------------------
        def get_load_code():
            mb_img_dofs = plan.aligned_image_dofs_per_microblock
            mb_preimg_dofs = plan.aligned_preimage_dofs_per_microblock
            preimg_dofs_over_dofs = (mb_preimg_dofs+mb_img_dofs-1) // mb_img_dofs

            load_code = []
            store_code = []

            var_num = 0
            for load_block in range(preimg_dofs_over_dofs):
                for inl in range(par.inline):
                    # load and store are split for better pipelining
                    # compiler can't figure that out because of branch

                    var = "tmp%d" % var_num
                    var_num += 1
                    load_code.append(POD(float_type, var))

                    block_addr = "%d * ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF" % load_block
                    load_instr = Assign(var,
                        "field[GLOBAL_MB_PREIMAGE_DOF_BASE"
                        " + %d*ALIGNED_PREIMAGE_DOFS_PER_MB"
                        " + %s]" % (inl, block_addr))
                    store_instr = Assign(
                            "smem_field[PAR_MB_NR][%d][%s]" % (inl, block_addr),
                            var
                            )
                    if (load_block+1)*mb_img_dofs >= mb_preimg_dofs:
                        cond = "%s < ALIGNED_PREIMAGE_DOFS_PER_MB" % block_addr
                        load_instr = If(cond, load_instr)
                        store_instr = If(cond, store_instr)

                    load_code.append(load_instr)
                    store_code.append(store_instr)
            return Block(load_code + [Line()] + store_code)

        def get_scalar_diff_code():
            code = []
            for inl in range(par.inline):
                for axis in dims:
                    code.append(
                        Initializer(POD(float_type, "d%drst%d" % (inl, axis)), 0))

            code.append(Line())

            tex_channels = ["x", "y", "z", "w"]

            store_code = Block()
            for inl in range(par.inline):
                for rst_axis in dims:
                    store_code.append(Assign(
                        "drst%d_global[GLOBAL_MB_IMAGE_DOF_BASE + "
                        "%d*ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF]"
                        % (rst_axis, inl),
                        "d%drst%d" % (inl, rst_axis)
                        ))

            from hedge.backends.cuda.tools import unroll
            code.extend([
                Comment("everybody needs to be done with the old data"),
                S("__syncthreads()"),
                Line(),
                get_load_code(),
                Line(),
                Comment("all the new data must be loaded"),
                S("__syncthreads()"),
                Line(),
                ])

            if float_type == numpy.float32:
                code.append(Value("float%d" % rst_channels, "dmat_entries"))

            code.extend([
                POD(float_type, "field_value%d" % inl)
                for inl in range(par.inline)
                ]+[Line()])

            def unroll_body(j):
                result = [
                    Assign("field_value%d" % inl,
                        "smem_field[PAR_MB_NR][%d][mb_el*PREIMAGE_DOFS_PER_EL+%s]" % (inl, j))
                    for inl in range(par.inline)
                    ]

                if float_type == numpy.float32:
                    result.append(Assign("dmat_entries",
                        "tex1Dfetch(diff_rst_mat_tex, IMAGE_EL_DOF + %s*IMAGE_DOFS_PER_EL)" % j))
                    result.extend(
                        S("d%drst%d += dmat_entries.%s * field_value%d"
                            % (inl, axis, tex_channels[axis], inl))
                        for inl in range(par.inline)
                        for axis in dims)
                elif float_type == numpy.float64:
                    result.extend(
                        S("d%(inl)drst%(axis)d += "
                            "fp_tex1Dfetch(diff_rst_mat_tex, %(axis)d "
                            "+ DIMENSIONS*(IMAGE_EL_DOF + %(j)d*IMAGE_DOFS_PER_EL))"
                            "* field_value%(inl)d" % {
                            "inl": inl,
                            "axis": axis,
                            "j": j
                            })
                        for inl in range(par.inline)
                        for axis in dims)
                else:
                    assert False

                return result

            code.append(If("IMAGE_MB_DOF < IMAGE_DOFS_PER_MB", Block(unroll(unroll_body,
                    total_number=plan.preimage_dofs_per_el)
                    +[store_code])))

            return code

        f_body.extend([
            For("unsigned short seq_mb_number = 0",
                "seq_mb_number < SEQ_MB_COUNT",
                "++seq_mb_number",
                Block(get_scalar_diff_code())
                )
            ])

        # finish off ----------------------------------------------------------
        cmod.append(FunctionBody(f_decl, f_body))

        if not for_benchmark and "cuda_dump_kernels" in discr.debug:
            from hedge.tools import open_unique_debug_file
            open_unique_debug_file("diff", ".cu").write(str(cmod))

        mod = SourceModule(cmod,
                keep="cuda_keep_kernels" in discr.debug,
                #options=["--maxrregcount=16"]
                )

        func = mod.get_function("apply_diff_mat_smem")

        if "cuda_diff" in discr.debug:
            print "diff: lmem=%d smem=%d regs=%d" % (
                    func.local_size_bytes,
                    func.shared_size_bytes,
                    func.registers)

        diff_rst_mat_texref = mod.get_texref("diff_rst_mat_tex")
        gpu_diffmats = self.gpu_diffmats(diff_op, elgroup)

        if given.float_type == numpy.float32:
            gpu_diffmats.bind_to_texref_ext(diff_rst_mat_texref, rst_channels)
        elif given.float_type == numpy.float64:
            gpu_diffmats.bind_to_texref_ext(diff_rst_mat_texref,
                    allow_double_hack=True)
        else:
            assert False

        assert given.microblock.aligned_floats % chunk_size == 0
        block = (
                chunk_size,
                plan.parallelism.parallel,
                given.microblock.aligned_floats//chunk_size)

        func.prepare(
                ["PP"] + discr.dimensions*["P"],
                texrefs=[diff_rst_mat_texref])

        return block, func

Example 13

Project: scipy Source File: _linprog.py
def _solve_simplex(T, n, basis, maxiter=1000, phase=2, callback=None,
                   tol=1.0E-12, nit0=0, bland=False):
    """
    Solve a linear programming problem in "standard maximization form" using
    the Simplex Method.

    Minimize :math:`f = c^T x`

    subject to

    .. math::

        Ax = b
        x_i >= 0
        b_j >= 0

    Parameters
    ----------
    T : array_like
        A 2-D array representing the simplex T corresponding to the
        maximization problem.  It should have the form:

        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
         .
         .
         .
         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
         [c[0],   c[1], ...,   c[n_total],    0]]

        for a Phase 2 problem, or the form:

        [[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
         [A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
         .
         .
         .
         [A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
         [c[0],   c[1], ...,   c[n_total],   0],
         [c'[0],  c'[1], ...,  c'[n_total],  0]]

         for a Phase 1 problem (a Problem in which a basic feasible solution is
         sought prior to maximizing the actual objective.  T is modified in
         place by _solve_simplex.
    n : int
        The number of true variables in the problem.
    basis : array
        An array of the indices of the basic variables, such that basis[i]
        contains the column corresponding to the basic variable for row i.
        Basis is modified in place by _solve_simplex
    maxiter : int
        The maximum number of iterations to perform before aborting the
        optimization.
    phase : int
        The phase of the optimization being executed.  In phase 1 a basic
        feasible solution is sought and the T has an additional row representing
        an alternate objective function.
    callback : callable, optional
        If a callback function is provided, it will be called within each
        iteration of the simplex algorithm. The callback must have the
        signature `callback(xk, **kwargs)` where xk is the current solution
        vector and kwargs is a dictionary containing the following::
        "T" : The current Simplex algorithm T
        "nit" : The current iteration.
        "pivot" : The pivot (row, column) used for the next iteration.
        "phase" : Whether the algorithm is in Phase 1 or Phase 2.
        "basis" : The indices of the columns of the basic variables.
    tol : float
        The tolerance which determines when a solution is "close enough" to
        zero in Phase 1 to be considered a basic feasible solution or close
        enough to positive to to serve as an optimal solution.
    nit0 : int
        The initial iteration number used to keep an accurate iteration total
        in a two-phase problem.
    bland : bool
        If True, choose pivots using Bland's rule [3].  In problems which
        fail to converge due to cycling, using Bland's rule can provide
        convergence at the expense of a less optimal path about the simplex.

    Returns
    -------
    res : OptimizeResult
        The optimization result represented as a ``OptimizeResult`` object.
        Important attributes are: ``x`` the solution array, ``success`` a
        Boolean flag indicating if the optimizer exited successfully and
        ``message`` which describes the cause of the termination. Possible
        values for the ``status`` attribute are:
         0 : Optimization terminated successfully
         1 : Iteration limit reached
         2 : Problem appears to be infeasible
         3 : Problem appears to be unbounded

        See `OptimizeResult` for a description of other attributes.
    """
    nit = nit0
    complete = False

    if phase == 1:
        m = T.shape[0]-2
    elif phase == 2:
        m = T.shape[0]-1
    else:
        raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")

    if phase == 2:
        # Check if any artificial variables are still in the basis.
        # If yes, check if any coefficients from this row and a column
        # corresponding to one of the non-artificial variable is non-zero.
        # If found, pivot at this term. If not, start phase 2.
        # Do this for all artificial variables in the basis.
        # Ref: "An Introduction to Linear Programming and Game Theory"
        # by Paul R. Thie, Gerard E. Keough, 3rd Ed,
        # Chapter 3.7 Redundant Systems (pag 102)
        for pivrow in [row for row in range(basis.size)
                       if basis[row] > T.shape[1] - 2]:
            non_zero_row = [col for col in range(T.shape[1] - 1)
                            if T[pivrow, col] != 0]
            if len(non_zero_row) > 0:
                pivcol = non_zero_row[0]
                # variable represented by pivcol enters
                # variable in basis[pivrow] leaves
                basis[pivrow] = pivcol
                pivval = T[pivrow][pivcol]
                T[pivrow, :] = T[pivrow, :] / pivval
                for irow in range(T.shape[0]):
                    if irow != pivrow:
                        T[irow, :] = T[irow, :] - T[pivrow, :]*T[irow, pivcol]
                nit += 1

    if len(basis[:m]) == 0:
        solution = np.zeros(T.shape[1] - 1, dtype=np.float64)
    else:
        solution = np.zeros(max(T.shape[1] - 1, max(basis[:m]) + 1),
                            dtype=np.float64)

    while not complete:
        # Find the pivot column
        pivcol_found, pivcol = _pivot_col(T, tol, bland)
        if not pivcol_found:
            pivcol = np.nan
            pivrow = np.nan
            status = 0
            complete = True
        else:
            # Find the pivot row
            pivrow_found, pivrow = _pivot_row(T, pivcol, phase, tol)
            if not pivrow_found:
                status = 3
                complete = True

        if callback is not None:
            solution[:] = 0
            solution[basis[:m]] = T[:m, -1]
            callback(solution[:n], **{"tableau": T,
                                      "phase":phase,
                                      "nit":nit,
                                      "pivot":(pivrow, pivcol),
                                      "basis":basis,
                                      "complete": complete and phase == 2})

        if not complete:
            if nit >= maxiter:
                # Iteration limit exceeded
                status = 1
                complete = True
            else:
                # variable represented by pivcol enters
                # variable in basis[pivrow] leaves
                basis[pivrow] = pivcol
                pivval = T[pivrow][pivcol]
                T[pivrow, :] = T[pivrow, :] / pivval
                for irow in range(T.shape[0]):
                    if irow != pivrow:
                        T[irow, :] = T[irow, :] - T[pivrow, :]*T[irow, pivcol]
                nit += 1

    return nit, status

Example 14

Project: scipy Source File: cobyla.py
def _minimize_cobyla(fun, x0, args=(), constraints=(),
                     rhobeg=1.0, tol=1e-4, iprint=1, maxiter=1000,
                     disp=False, catol=2e-4, **unknown_options):
    """
    Minimize a scalar function of one or more variables using the
    Constrained Optimization BY Linear Approximation (COBYLA) algorithm.

    Options
    -------
    rhobeg : float
        Reasonable initial changes to the variables.
    tol : float
        Final accuracy in the optimization (not precisely guaranteed).
        This is a lower bound on the size of the trust region.
    disp : bool
        Set to True to print convergence messages. If False,
        `verbosity` is ignored as set to 0.
    maxiter : int
        Maximum number of function evaluations.
    catol : float
        Tolerance (absolute) for constraint violations

    """
    _check_unknown_options(unknown_options)
    maxfun = maxiter
    rhoend = tol
    if not disp:
        iprint = 0

    # check constraints
    if isinstance(constraints, dict):
        constraints = (constraints, )

    for ic, con in enumerate(constraints):
        # check type
        try:
            ctype = con['type'].lower()
        except KeyError:
            raise KeyError('Constraint %d has no type defined.' % ic)
        except TypeError:
            raise TypeError('Constraints must be defined using a '
                            'dictionary.')
        except AttributeError:
            raise TypeError("Constraint's type must be a string.")
        else:
            if ctype != 'ineq':
                raise ValueError("Constraints of type '%s' not handled by "
                                 "COBYLA." % con['type'])

        # check function
        if 'fun' not in con:
            raise KeyError('Constraint %d has no function defined.' % ic)

        # check extra arguments
        if 'args' not in con:
            con['args'] = ()

    # m is the total number of constraint values
    # it takes into account that some constraints may be vector-valued
    cons_lengths = []
    for c in constraints:
        f = c['fun'](x0, *c['args'])
        try:
            cons_length = len(f)
        except TypeError:
            cons_length = 1
        cons_lengths.append(cons_length)
    m = sum(cons_lengths)

    def calcfc(x, con):
        f = fun(x, *args)
        i = 0
        for size, c in izip(cons_lengths, constraints):
            con[i: i + size] = c['fun'](x, *c['args'])
            i += size
        return f

    info = np.zeros(4, np.float64)
    xopt, info = _cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
                                  rhoend=rhoend, iprint=iprint, maxfun=maxfun,
                                  dinfo=info)

    if info[3] > catol:
        # Check constraint violation
        info[0] = 4

    return OptimizeResult(x=xopt,
                          status=int(info[0]),
                          success=info[0] == 1,
                          message={1: 'Optimization terminated successfully.',
                                   2: 'Maximum number of function evaluations has '
                                      'been exceeded.',
                                   3: 'Rounding errors are becoming damaging in '
                                      'COBYLA subroutine.',
                                   4: 'Did not converge to a solution satisfying '
                                      'the constraints. See `maxcv` for magnitude '
                                      'of violation.'
                                   }.get(info[0], 'Unknown exit status.'),
                          nfev=int(info[1]),
                          fun=info[2],
                          maxcv=info[3])

Example 15

Project: mpop Source File: geo_image.py
    def geotiff_save(self, filename, compression=6,
                     tags=None, gdal_options=None,
                     blocksize=0, geotransform=None,
                     spatialref=None, floating_point=False,
                     writer_options=None):
        """Save the image to the given *filename* in geotiff_ format, with the
        *compression* level in [0, 9]. 0 means not compressed. The *tags*
        argument is a dict of tags to include in the image (as metadata).  By
        default it uses the 'area' instance to generate geotransform and
        spatialref information, this can be overwritten by the arguments
        *geotransform* and *spatialref*. *floating_point* allows the saving of
        'L' mode images in floating point format if set to True.
        When argument *writer_options* is not none and entry 'fill_value_subst'
        is included, its numeric value will be used to substitute image data
        that would be equal to the fill_value (used to replace masked data).

        .. _geotiff: http://trac.osgeo.org/geotiff/
        """
        from osgeo import gdal, osr

        raster = gdal.GetDriverByName("GTiff")

        tags = tags or {}
        writer_options = writer_options or {}

        if floating_point:
            if self.mode != "L":
                raise ValueError("Image must be in 'L' mode for floating point"
                                 " geotif saving")
            if self.fill_value is None:
                logger.warning("Image with floats cannot be transparent, "
                               "so setting fill_value to 0")
                self.fill_value = 0
            channels = [self.channels[0].astype(np.float64)]
            fill_value = self.fill_value or [0]
            gformat = gdal.GDT_Float64
            opacity = 0
        else:
            nbits = int(tags.get("NBITS", "8"))
            if nbits > 16:
                dtype = np.uint32
                gformat = gdal.GDT_UInt32
            elif nbits > 8:
                dtype = np.uint16
                gformat = gdal.GDT_UInt16
            else:
                dtype = np.uint8
                gformat = gdal.GDT_Byte
            opacity = np.iinfo(dtype).max
            channels, fill_value = self._finalize(dtype)

            fill_value_subst = writer_options.get(
                write_opts.WR_OPT_FILL_VALUE_SUBST, None)
            if fill_value is not None and fill_value_subst is not None:
                for i, chan in enumerate(channels):
                    np.place(chan, chan == fill_value[i], int(fill_value_subst))

        logger.debug("Saving to GeoTiff.")

        if tags is not None:
            self.tags.update(tags)
        if gdal_options is not None:
            self.gdal_options.update(gdal_options)

        g_opts = ["=".join(i) for i in self.gdal_options.items()]

        if compression != 0:
            g_opts.append("COMPRESS=DEFLATE")
            g_opts.append("ZLEVEL=" + str(compression))

        if blocksize != 0:
            g_opts.append("TILED=YES")
            g_opts.append("BLOCKXSIZE=" + str(blocksize))
            g_opts.append("BLOCKYSIZE=" + str(blocksize))

        if(self.mode == "L"):
            ensure_dir(filename)
            if fill_value is not None:
                dst_ds = raster.Create(filename,
                                       self.width,
                                       self.height,
                                       1,
                                       gformat,
                                       g_opts)
            else:
                g_opts.append("ALPHA=YES")
                dst_ds = raster.Create(filename,
                                       self.width,
                                       self.height,
                                       2,
                                       gformat,
                                       g_opts)
            self._gdal_write_channels(dst_ds, channels,
                                      opacity, fill_value)
        elif(self.mode == "LA"):
            ensure_dir(filename)
            g_opts.append("ALPHA=YES")
            dst_ds = raster.Create(filename,
                                   self.width,
                                   self.height,
                                   2,
                                   gformat,
                                   g_opts)
            self._gdal_write_channels(dst_ds,
                                      channels[:-1], channels[1],
                                      fill_value)
        elif(self.mode == "RGB"):
            ensure_dir(filename)
            if fill_value is not None:
                dst_ds = raster.Create(filename,
                                       self.width,
                                       self.height,
                                       3,
                                       gformat,
                                       g_opts)
            else:
                g_opts.append("ALPHA=YES")
                dst_ds = raster.Create(filename,
                                       self.width,
                                       self.height,
                                       4,
                                       gformat,
                                       g_opts)

            self._gdal_write_channels(dst_ds, channels,
                                      opacity, fill_value)

        elif(self.mode == "RGBA"):
            ensure_dir(filename)
            g_opts.append("ALPHA=YES")
            dst_ds = raster.Create(filename,
                                   self.width,
                                   self.height,
                                   4,
                                   gformat,
                                   g_opts)

            self._gdal_write_channels(dst_ds,
                                      channels[:-1], channels[3],
                                      fill_value)
        else:
            raise NotImplementedError("Saving to GeoTIFF using image mode"
                                      " %s is not implemented." % self.mode)

        # Create raster GeoTransform based on upper left corner and pixel
        # resolution ... if not overwritten by argument geotransform.

        if geotransform:
            dst_ds.SetGeoTransform(geotransform)
            if spatialref:
                if not isinstance(spatialref, str):
                    spatialref = spatialref.ExportToWkt()
                dst_ds.SetProjection(spatialref)
        else:
            from pyresample import utils
            from mpop.projector import get_area_def
            try:
                area = get_area_def(self.area)
            except (utils.AreaNotFound, AttributeError):
                area = self.area

            try:
                adfgeotransform = [area.area_extent[0], area.pixel_size_x, 0,
                                   area.area_extent[3], 0, -area.pixel_size_y]
                dst_ds.SetGeoTransform(adfgeotransform)
                srs = osr.SpatialReference()

                srs.ImportFromProj4(area.proj4_string)
                srs.SetProjCS(area.proj_id)
                try:
                    srs.SetWellKnownGeogCS(area.proj_dict['ellps'])
                except KeyError:
                    pass
                try:
                    # Check for epsg code.
                    srs.ImportFromEPSG(int(area.proj_dict['init'].
                                           lower().split('epsg:')[1]))
                except (KeyError, IndexError):
                    pass
                srs = srs.ExportToWkt()
                dst_ds.SetProjection(srs)
            except AttributeError:
                logger.exception("Could not load geographic data, invalid area")

        self.tags.update({'TIFFTAG_DATETIME':
                          self.time_slot.strftime("%Y:%m:%d %H:%M:%S")})

        dst_ds.SetMetadata(self.tags, '')

        # Close the dataset

        dst_ds = None

Example 16

Project: cartopy Source File: test_img_nest.py
def requires_wmts_data(function):
    """
    A decorator which ensures that the WMTS data is available for
    use in testing.

    """
    aerial = cimgt.MapQuestOpenAerial()

    # get web tiles upto 3 zoom levels deep
    tiles = [(0, 0, 0)]
    for tile in aerial.subtiles((0, 0, 0)):
        tiles.append(tile)
    for tile in tiles[1:]:
        for sub_tile in aerial.subtiles(tile):
            tiles.append(sub_tile)

    fname_template = os.path.join(_TEST_DATA_DIR, 'z_{}', 'x_{}_y_{}.png')

    if not os.path.isdir(_TEST_DATA_DIR):
        os.makedirs(_TEST_DATA_DIR)

    data_version_fname = os.path.join(_TEST_DATA_DIR, 'version.txt')

    test_data_version = None
    try:
        with open(data_version_fname, 'r') as fh:
            test_data_version = int(fh.read().strip())
    except IOError:
        pass
    finally:
        if test_data_version != _TEST_DATA_VERSION:
            warnings.warn('WMTS test data is out of date, regenerating at '
                          '{}.'.format(_TEST_DATA_DIR))
            shutil.rmtree(_TEST_DATA_DIR)
            os.makedirs(_TEST_DATA_DIR)
            with open(data_version_fname, 'w') as fh:
                fh.write(str(_TEST_DATA_VERSION))

    # Download the tiles.
    for tile in tiles:
        x, y, z = tile
        fname = fname_template.format(z, x, y)
        if not os.path.exists(fname):
            if not os.path.isdir(os.path.dirname(fname)):
                os.makedirs(os.path.dirname(fname))

            img, extent, _ = aerial.get_image(tile)
            nx, ny = 256, 256
            x_rng = extent[1] - extent[0]
            y_rng = extent[3] - extent[2]

            pix_size_x = x_rng / nx
            pix_size_y = y_rng / ny

            upper_left_center = (extent[0] + pix_size_x / 2,
                                 extent[2] + pix_size_y / 2)

            pgw_fname = fname[:-4] + '.pgw'
            pgw_keys = {'x_pix_size': np.float64(pix_size_x),
                        'y_rotation': 0,
                        'x_rotation': 0,
                        'y_pix_size': np.float64(pix_size_y),
                        'x_center': np.float64(upper_left_center[0]),
                        'y_center': np.float64(upper_left_center[1]),
                        }
            _save_world(pgw_fname, pgw_keys)
            img.save(fname)

    global _TEST_DATA_AVAILABLE
    _TEST_DATA_AVAILABLE = True

    return function

Example 17

Project: sfepy Source File: time_poisson_interactive.py
def main():
    from sfepy import data_dir

    parser = ArgumentParser(description=__doc__,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument('--version', action='version', version='%(prog)s')
    parser.add_argument('--diffusivity', metavar='float', type=float,
                        action='store', dest='diffusivity',
                        default=1e-5, help=helps['diffusivity'])
    parser.add_argument('--ic-max', metavar='float', type=float,
                        action='store', dest='ic_max',
                        default=2.0, help=helps['ic_max'])
    parser.add_argument('--order', metavar='int', type=int,
                        action='store', dest='order',
                        default=2, help=helps['order'])
    parser.add_argument('-r', '--refine', metavar='int', type=int,
                        action='store', dest='refine',
                        default=0, help=helps['refine'])
    parser.add_argument('-p', '--probe',
                        action="store_true", dest='probe',
                        default=False, help=helps['probe'])
    parser.add_argument('-s', '--show',
                        action="store_true", dest='show',
                        default=False, help=helps['show'])
    options = parser.parse_args()

    assert_((0 < options.order),
            'temperature approximation order must be at least 1!')

    output('using values:')
    output('  diffusivity:', options.diffusivity)
    output('  max. IC value:', options.ic_max)
    output('uniform mesh refinement level:', options.refine)

    mesh = Mesh.from_file(data_dir + '/meshes/3d/cylinder.mesh')
    domain = FEDomain('domain', mesh)

    if options.refine > 0:
        for ii in range(options.refine):
            output('refine %d...' % ii)
            domain = domain.refine()
            output('... %d nodes %d elements'
                   % (domain.shape.n_nod, domain.shape.n_el))

    omega = domain.create_region('Omega', 'all')
    left = domain.create_region('Left',
                                'vertices in x < 0.00001', 'facet')
    right = domain.create_region('Right',
                                 'vertices in x > 0.099999', 'facet')

    field = Field.from_args('fu', nm.float64, 'scalar', omega,
                            approx_order=options.order)

    T = FieldVariable('T', 'unknown', field, history=1)
    s = FieldVariable('s', 'test', field, primary_var_name='T')

    m = Material('m', diffusivity=options.diffusivity * nm.eye(3))

    integral = Integral('i', order=2*options.order)

    t1 = Term.new('dw_diffusion(m.diffusivity, s, T)',
                  integral, omega, m=m, s=s, T=T)
    t2 = Term.new('dw_volume_dot(s, dT/dt)',
                  integral, omega, s=s, T=T)
    eq = Equation('balance', t1 + t2)
    eqs = Equations([eq])

    # Boundary conditions.
    ebc1 = EssentialBC('T1', left, {'T.0' : 2.0})
    ebc2 = EssentialBC('T2', right, {'T.0' : -2.0})

    # Initial conditions.
    def get_ic(coors, ic):
        x, y, z = coors.T
        return 2 - 40.0 * x + options.ic_max * nm.sin(4 * nm.pi * x / 0.1)
    ic_fun = Function('ic_fun', get_ic)
    ic = InitialCondition('ic', omega, {'T.0' : ic_fun})

    ls = ScipyDirect({})

    nls_status = IndexedStruct()
    nls = Newton({'is_linear' : True}, lin_solver=ls, status=nls_status)

    pb = Problem('heat', equations=eqs, nls=nls, ls=ls)
    pb.set_bcs(ebcs=Conditions([ebc1, ebc2]))
    pb.set_ics(Conditions([ic]))

    tss = SimpleTimeSteppingSolver({'t0' : 0.0, 't1' : 100.0, 'n_step' : 11},
                                   problem=pb)
    tss.init_time()

    if options.probe:
        # Prepare probe data.
        probes, labels = gen_lines(pb)

        ev = pb.evaluate
        order = 2 * (options.order - 1)

        gfield = Field.from_args('gu', nm.float64, 'vector', omega,
                                approx_order=options.order - 1)
        dvel = FieldVariable('dvel', 'parameter', gfield,
                             primary_var_name='(set-to-None)')
        cfield = Field.from_args('gu', nm.float64, 'scalar', omega,
                                approx_order=options.order - 1)
        component = FieldVariable('component', 'parameter', cfield,
                                  primary_var_name='(set-to-None)')

        nls_options = {'eps_a' : 1e-16, 'i_max' : 1}

        if options.show:
            plt.ion()

    # Solve the problem using the time stepping solver.
    suffix = tss.ts.suffix
    for step, time, state in tss():
        if options.probe:
            # Probe the solution.
            dvel_qp = ev('ev_diffusion_velocity.%d.Omega(m.diffusivity, T)'
                         % order, copy_materials=False, mode='qp')
            project_by_component(dvel, dvel_qp, component, order,
                                 nls_options=nls_options)

            all_results = []
            for ii, probe in enumerate(probes):
                fig, results = probe_results(ii, T, dvel, probe, labels[ii])

                all_results.append(results)

            plt.tight_layout()
            fig.savefig('time_poisson_interactive_probe_%s.png'
                        % (suffix % step), bbox_inches='tight')

            if options.show:
                plt.draw()

            for ii, results in enumerate(all_results):
                output('probe %d (%s):' % (ii, probes[ii].name))
                output.level += 2
                for key, res in ordered_iteritems(results):
                    output(key + ':')
                    val = res[1]
                    output('  min: %+.2e, mean: %+.2e, max: %+.2e'
                           % (val.min(), val.mean(), val.max()))
                output.level -= 2

Example 18

Project: pyorbital Source File: orbital.py
    def get_next_passes(self, utc_time, length, lon, lat, alt, tol=0.001, horizon=0):
        """Calculate passes for the next hours for a given start time and a
        given observer.

        Original by Martin.

        utc_time: Observation time (datetime object)
        length: Number of hours to find passes (int)
        lon: Longitude of observer position on ground (float)
        lat: Latitude of observer position on ground (float)
        alt: Altitude above sea-level (geoid) of observer position on ground (float)
        tol: precision of the result in seconds
        horizon: the elevation of horizon to compute risetime and falltime.

        Return: [(rise-time, fall-time, max-elevation-time), ...]
        """

        def elevation(minutes):
            """elevation
            """
            return self.get_observer_look(utc_time +
                                          timedelta(
                                              minutes=np.float64(minutes)),
                                          lon, lat, alt)[1] - horizon

        def elevation_inv(minutes):
            """inverse of elevation
            """
            return -elevation(minutes)

        def get_root_secant(fun, start, end, tol=0.01):
            """Secant method
            """
            x_0 = end
            x_1 = start
            fx_0 = fun(end)
            fx_1 = fun(start)
            if abs(fx_0) < abs(fx_1):
                fx_0, fx_1 = fx_1, fx_0
                x_0, x_1 = x_1, x_0
            while abs(x_0 - x_1) > tol:
                x_n = x_1 - fx_1 * ((x_1 - x_0) / (fx_1 - fx_0))
                x_0, x_1 = x_1, x_n
                fx_0, fx_1 = fx_1, fun(x_n)
            return x_1

        def get_max_parab(fun, start, end, tol=0.01):
            """Successive parabolic interpolation
            """

            a = start
            c = end
            b = (a + c) / 2.0
            x = b

            f_a = fun(a)
            f_b = fun(b)
            f_c = fun(c)

            while abs(c - a) > tol:
                x = b - 0.5 * (((b - a) ** 2 * (f_b - f_c)
                                - (b - c) ** 2 * (f_b - f_a)) /
                               ((b - a) * (f_b - f_c) - (b - c) * (f_b - f_a)))
                f_x = fun(x)
                if x > b:
                    a, b, c = b, x, c
                    f_a, f_b, f_c = f_b, f_x, f_c
                else:
                    a, b, c = a, x, b
                    f_a, f_b, f_c = f_a, f_x, f_b

            return x

        times = utc_time + np.array([timedelta(minutes=minutes)
                                     for minutes in range(length * 60)])
        elev = self.get_observer_look(times, lon, lat, alt)[1] - horizon
        zcs = np.where(np.diff(np.sign(elev)))[0]

        res = []
        risetime = None
        falltime = None
        for guess in zcs:
            horizon_mins = get_root_secant(
                elevation, guess, guess + 1.0, tol=tol / 60.0)
            horizon_time = utc_time + timedelta(minutes=horizon_mins)
            if elev[guess] < 0:
                risetime = horizon_time
                risemins = horizon_mins
                falltime = None
            else:
                falltime = horizon_time
                fallmins = horizon_mins
                if risetime:
                    middle = (risemins + fallmins) / 2.0
                    highest = utc_time + \
                        timedelta(minutes=get_max_parab(
                            elevation_inv,
                            middle - 0.1, middle + 0.1,
                            tol=tol / 60.0
                        ))
                    res += [(risetime, falltime, highest)]
                risetime = None
        return res

Example 19

Project: tpot Source File: base.py
Function: fit
    def fit(self, features, classes):
        """Fits a machine learning pipeline that maximizes classification score
        on the provided data

        Uses genetic programming to optimize a machine learning pipeline that
        maximizes classification score on the provided features and classes.
        Performs an internal stratified training/testing cross-validaton split
        to avoid overfitting on the provided data.

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            Feature matrix
        classes: array-like {n_samples}
            List of class labels for prediction

        Returns
        -------
        None

        """
        features = features.astype(np.float64)

        # Set the seed for the GP run
        if self.random_state:
            random.seed(self.random_state)
            np.random.seed(self.random_state)

        self._start_datetime = datetime.now()

        self._toolbox.register('evaluate', self._evaluate_individual, features=features, classes=classes)
        pop = self._toolbox.population(n=self.population_size)

        def pareto_eq(ind1, ind2):
            """Determines whether two individuals are equal on the Pareto front

            Parameters
            ----------
            ind1: DEAP individual from the GP population
                First individual to compare
            ind2: DEAP individual from the GP population
                Second individual to compare

            Returns
            ----------
            individuals_equal: bool
                Boolean indicating whether the two individuals are equal on
                the Pareto front

            """
            return np.all(ind1.fitness.values == ind2.fitness.values)

        self._hof = tools.ParetoFront(similar=pareto_eq)

        # Start the progress bar
        if self.max_time_mins:
            total_evals = self.population_size
        else:
            total_evals = self.population_size * (self.generations + 1)

        self._pbar = tqdm(total=total_evals, unit='pipeline', leave=False,
                          disable=not (self.verbosity >= 2), desc='Optimization Progress')

        try:
            pop, _ = algorithms.eaSimple(
                population=pop, toolbox=self._toolbox, cxpb=self.crossover_rate,
                mutpb=self.mutation_rate, ngen=self.generations,
                halloffame=self._hof, verbose=False)

        # Allow for certain exceptions to signal a premature fit() cancellation
        except (KeyboardInterrupt, SystemExit):
            if self.verbosity > 0:
                print('') # just for better interface
                print('GP closed prematurely - will use current best pipeline')
        finally:
            # Close the progress bar
            # Standard truthiness checks won't work for tqdm
            if not isinstance(self._pbar, type(None)):
                self._pbar.close()

            # Reset gp_generation counter to restore initial state
            self._gp_generation = 0

            # Store the pipeline with the highest internal testing score
            if self._hof:
                top_score = -float('inf')
                for pipeline, pipeline_scores in zip(self._hof.items, reversed(self._hof.keys)):
                    if pipeline_scores.wvalues[1] > top_score:
                        self._optimized_pipeline = pipeline

                if not self._optimized_pipeline:
                    raise ValueError('There was an error in the TPOT optimization '
                                     'process. This could be because the data was '
                                     'not formatted properly, or because data for '
                                     'a regression problem was provided to the '
                                     'TPOTClassifier object. Please make sure you '
                                     'passed the data to TPOT correctly.')

                self._fitted_pipeline = self._toolbox.compile(expr=self._optimized_pipeline)

                with warnings.catch_warnings():
                    warnings.simplefilter('ignore')
                    self._fitted_pipeline.fit(features, classes)

            if self.verbosity in [1, 2] and self._optimized_pipeline:
                # Add an extra line of spacing if the progress bar was used
                if self.verbosity >= 2:
                    print('')
                print('Best pipeline: {}'.format(self._optimized_pipeline))

            # Store and fit the entire Pareto front if sciencing
            elif self.verbosity >= 3 and self._hof:
                self._hof_fitted_pipelines = {}

                for pipeline in self._hof.items:
                    self._hof_fitted_pipelines[str(pipeline)] = self._toolbox.compile(expr=pipeline)

                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore')
                        self._hof_fitted_pipelines[str(pipeline)].fit(features, classes)

Example 20

Project: sfepy Source File: shell10x_cantilever_interactive.py
def main():
    parser = ArgumentParser(description=__doc__.rstrip(),
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument('output_dir', help=helps['output_dir'])
    parser.add_argument('-d', '--dims', metavar='l,w,t',
                        action='store', dest='dims',
                        default='0.2,0.01,0.001', help=helps['dims'])
    parser.add_argument('-n', '--nx', metavar='start,stop,step',
                        action='store', dest='nx',
                        default='2,103,10', help=helps['nx'])
    parser.add_argument('-t', '--transform', choices=['none', 'bend', 'twist'],
                        action='store', dest='transform',
                        default='none', help=helps['transform'])
    parser.add_argument('--young', metavar='float', type=float,
                        action='store', dest='young',
                        default=210e9, help=helps['young'])
    parser.add_argument('--poisson', metavar='float', type=float,
                        action='store', dest='poisson',
                        default=0.3, help=helps['poisson'])
    parser.add_argument('--force', metavar='float', type=float,
                        action='store', dest='force',
                        default=-1.0, help=helps['force'])
    parser.add_argument('-p', '--plot',
                        action="store_true", dest='plot',
                        default=False, help=helps['plot'])
    parser.add_argument('--u-scaling', metavar='float', type=float,
                        action='store', dest='scaling',
                        default=1.0, help=helps['scaling'])
    parser.add_argument('-s', '--show',
                        action="store_true", dest='show',
                        default=False, help=helps['show'])
    parser.add_argument('--silent',
                        action='store_true', dest='silent',
                        default=False, help=helps['silent'])
    options = parser.parse_args()

    dims = nm.array([float(ii) for ii in options.dims.split(',')],
                    dtype=nm.float64)
    nxs = tuple([int(ii) for ii in options.nx.split(',')])
    young = options.young
    poisson = options.poisson
    force = options.force

    output_dir = options.output_dir

    odir = lambda filename: os.path.join(output_dir, filename)

    filename = odir('output_log.txt')
    ensure_path(filename)
    output.set_output(filename=filename, combined=options.silent == False)

    output('output directory:', output_dir)
    output('using values:')
    output("  dimensions:", dims)
    output("  nx range:", nxs)
    output("  Young's modulus:", options.young)
    output("  Poisson's ratio:", options.poisson)
    output('  force:', options.force)
    output('  transform:', options.transform)

    if options.transform == 'none':
        options.transform = None

    u_exact = get_analytical_displacement(dims, young, force,
                                          transform=options.transform)

    if options.transform is None:
        ilog = 2
        labels = ['u_3']

    elif options.transform == 'bend':
        ilog = 0
        labels = ['u_1']

    elif options.transform == 'twist':
        ilog = [0, 1, 2]
        labels = ['u_1', 'u_2', 'u_3']

    label = ', '.join(labels)

    log = []
    for nx in range(*nxs):
        shape = (nx, 2)

        pb, state, u, gamma2 = solve_problem(shape, dims, young, poisson, force,
                                             transform=options.transform)

        dofs = u.get_state_in_region(gamma2)
        output('DOFs along the loaded edge:')
        output('\n%s' % dofs)

        log.append([nx - 1] + nm.array(dofs[0, ilog], ndmin=1).tolist())

    pb.save_state(odir('shell10x_cantilever.vtk'), state)

    log = nm.array(log)

    output('max. %s displacement w.r.t. number of cells:' % label)
    output('\n%s' % log)
    output('analytical value:', u_exact)

    if options.plot:
        import matplotlib.pyplot as plt

        plt.rcParams.update({
            'lines.linewidth' : 3,
            'font.size' : 16,
        })

        fig, ax1 = plt.subplots()
        fig.suptitle('max. $%s$ displacement' % label)

        for ic in range(log.shape[1] - 1):
            ax1.plot(log[:, 0], log[:, ic + 1], label=r'$%s$' % labels[ic])
        ax1.set_xlabel('# of cells')
        ax1.set_ylabel(r'$%s$' % label)
        ax1.grid(which='both')

        lines1, labels1 = ax1.get_legend_handles_labels()

        if u_exact is not None:
            ax1.hlines(u_exact, log[0, 0], log[-1, 0],
                       'r', 'dotted', label=r'$%s^{analytical}$' % label)

            ax2 = ax1.twinx()
            # Assume single log column.
            ax2.semilogy(log[:, 0], nm.abs(log[:, 1] - u_exact), 'g',
                         label=r'$|%s - %s^{analytical}|$' % (label, label))
            ax2.set_ylabel(r'$|%s - %s^{analytical}|$' % (label, label))

            lines2, labels2 = ax2.get_legend_handles_labels()

        else:
            lines2, labels2 = [], []

        ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')

        plt.tight_layout()
        ax1.set_xlim([log[0, 0] - 2, log[-1, 0] + 2])

        suffix = {None: 'straight',
                  'bend' : 'bent', 'twist' : 'twisted'}[options.transform]
        fig.savefig(odir('shell10x_cantilever_convergence_%s.png' % suffix))

        plt.show()

    if options.show:
        from sfepy.postprocess.viewer import Viewer
        from sfepy.postprocess.domain_specific import DomainSpecificPlot

        ds = {'u_disp' :
                  DomainSpecificPlot('plot_displacements',
                                     ['rel_scaling=%f' % options.scaling])}
        view = Viewer(odir('shell10x_cantilever.vtk'))
        view(domain_specific=ds, is_scalar_bar=True, is_wireframe=True,
             opacity={'wireframe' : 0.5})

Example 21

Project: sfepy Source File: coefs_phononic.py
def detect_band_gaps(mass, freq_info, opts, gap_kind='normal', mtx_b=None):
    """
    Detect band gaps given solution to eigenproblem (eigs,
    eig_vectors). Only valid resonance frequencies (e.i. those for which
    corresponding eigenmomenta are above a given threshold) are taken into
    account.

    Notes
    -----
    - make freq_eps relative to ]f0, f1[ size?
    """
    output('eigensolver:', opts.eigensolver)

    fm = freq_info.freq_range_margins
    min_freq, max_freq = fm[0], fm[-1]
    output('freq. range with margins: [%8.3f, %8.3f]'
           % (min_freq, max_freq))

    df = opts.freq_step * (max_freq - min_freq)

    fz_callback = get_callback(mass.evaluate, opts.eigensolver,
                               mtx_b=mtx_b, mode='find_zero')
    trace_callback = get_callback(mass.evaluate, opts.eigensolver,
                                  mtx_b=mtx_b, mode='trace')

    n_col = 1 + (mtx_b is not None)
    logs = [[] for ii in range(n_col + 1)]
    gaps = []

    for ii in range(freq_info.freq_range.shape[0] + 1):

        f0, f1 = fm[[ii, ii+1]]
        output('interval: ]%.8f, %.8f[...' % (f0, f1))

        log_freqs = get_log_freqs(f0, f1, df, opts.freq_eps, 100, 1000)

        output('n_logged: %d' % log_freqs.shape[0])

        log_mevp = [[] for ii in range(n_col)]
        for f in log_freqs:
            for ii, data in enumerate(trace_callback(f)):
                log_mevp[ii].append(data)

        # Get log for the first and last f in log_freqs.
        lf0 = log_freqs[0]
        lf1 = log_freqs[-1]

        log0, log1 = log_mevp[0][0], log_mevp[0][-1]
        min_eig0 = log0[0]
        max_eig1 = log1[-1]
        if gap_kind == 'liquid':
            mevp = nm.array(log_mevp, dtype=nm.float64).squeeze()
            si = nm.where(mevp[:,0] < 0.0)[0]
            li = nm.where(mevp[:,-1] < 0.0)[0]
            wi = nm.setdiff1d(si, li)

            if si.shape[0] == 0: # No gaps.
                gap = ([2, lf0, log0[0]], [2, lf0, log0[-1]])
                gaps.append(gap)

            elif li.shape[0] == mevp.shape[0]: # Full interval strong gap.
                gap = ([1, lf1, log1[0]], [1, lf1, log1[-1]])
                gaps.append(gap)

            else:
                subgaps = []
                for chunk in split_chunks(li): # Strong gaps.
                    i0, i1 = chunk[0], chunk[-1]
                    fmin, fmax = log_freqs[i0], log_freqs[i1]
                    gap = ([1, fmin, mevp[i0,-1]], [1, fmax, mevp[i1,-1]])
                    subgaps.append(gap)

                for chunk in split_chunks(wi): # Weak gaps.
                    i0, i1 = chunk[0], chunk[-1]
                    fmin, fmax = log_freqs[i0], log_freqs[i1]
                    gap = ([0, fmin, mevp[i0,-1]], [2, fmax, mevp[i1,-1]])
                    subgaps.append(gap)
                gaps.append(subgaps)

        else:
            if min_eig0 > 0.0: # No gaps.
                gap = ([2, lf0, log0[0]], [2, lf0, log0[-1]])

            elif max_eig1 < 0.0: # Full interval strong gap.
                gap = ([1, lf1, log1[0]], [1, lf1, log1[-1]])

            else:
                llog_freqs = list(log_freqs)

                # Insert fmin, fmax into log.
                output('finding zero of the largest eig...')
                smax, fmax, vmax = find_zero(lf0, lf1, fz_callback,
                                             opts.freq_eps, opts.zero_eps, 1)
                im = nm.searchsorted(log_freqs, fmax)
                llog_freqs.insert(im, fmax)
                for ii, data in enumerate(trace_callback(fmax)):
                    log_mevp[ii].insert(im, data)

                output('...done')
                if smax in [0, 2]:
                    output('finding zero of the smallest eig...')
                    # having fmax instead of f0 does not work if freq_eps is
                    # large.
                    smin, fmin, vmin = find_zero(lf0, lf1, fz_callback,
                                                 opts.freq_eps, opts.zero_eps, 0)
                    im = nm.searchsorted(log_freqs, fmin)
                    # +1 due to fmax already inserted before.
                    llog_freqs.insert(im+1, fmin)
                    for ii, data in enumerate(trace_callback(fmin)):
                        log_mevp[ii].insert(im+1, data)

                    output('...done')

                elif smax == 1:
                    smin = 1 # both are negative everywhere.
                    fmin, vmin = fmax, vmax

                gap = ([smin, fmin, vmin], [smax, fmax, vmax])

                log_freqs = nm.array(llog_freqs)

            output(gap[0])
            output(gap[1])

            gaps.append(gap)

        logs[0].append(log_freqs)
        for ii, data in enumerate(log_mevp):
            logs[ii+1].append(nm.array(data, dtype = nm.float64))

        output('...done')

    kinds = describe_gaps(gaps)

    slogs = Struct(freqs=logs[0], eigs=logs[1])
    if n_col == 2:
        slogs.eig_vectors = logs[2]

    return slogs, gaps, kinds

Example 22

Project: zipline Source File: period.py
    def calculate_metrics(self):
        self.benchmark_period_returns = \
            cuem_returns(self.benchmark_returns).iloc[-1]

        self.algorithm_period_returns = \
            cuem_returns(self.algorithm_returns).iloc[-1]

        if not self.algorithm_returns.index.equals(
            self.benchmark_returns.index
        ):
            message = "Mismatch between benchmark_returns ({bm_count}) and \
            algorithm_returns ({algo_count}) in range {start} : {end}"
            message = message.format(
                bm_count=len(self.benchmark_returns),
                algo_count=len(self.algorithm_returns),
                start=self._start_session,
                end=self._end_session
            )
            raise Exception(message)

        self.num_trading_days = len(self.benchmark_returns)

        self.mean_algorithm_returns = (
            self.algorithm_returns.cuemsum() /
            np.arange(1, self.num_trading_days + 1, dtype=np.float64)
        )

        self.benchmark_volatility = annual_volatility(self.benchmark_returns)
        self.algorithm_volatility = annual_volatility(self.algorithm_returns)

        self.treasury_period_return = choose_treasury(
            self.treasury_curves,
            self._start_session,
            self._end_session,
            self.trading_calendar,
        )
        self.sharpe = sharpe_ratio(
            self.algorithm_returns,
        )
        # The consumer currently expects a 0.0 value for sharpe in period,
        # this differs from cuemulative which was np.nan.
        # When factoring out the sharpe_ratio, the different return types
        # were collapsed into `np.nan`.
        # TODO: Either fix consumer to accept `np.nan` or make the
        # `sharpe_ratio` return type configurable.
        # In the meantime, convert nan values to 0.0
        if pd.isnull(self.sharpe):
            self.sharpe = 0.0
        self.downside_risk = downside_risk(
            self.algorithm_returns.values
        )
        self.sortino = sortino_ratio(
            self.algorithm_returns.values,
            _downside_risk=self.downside_risk,
        )
        self.information = information_ratio(
            self.algorithm_returns.values,
            self.benchmark_returns.values,
        )
        self.alpha, self.beta = alpha_beta_aligned(
            self.algorithm_returns.values,
            self.benchmark_returns.values,
        )
        self.excess_return = self.algorithm_period_returns - \
            self.treasury_period_return
        self.max_drawdown = max_drawdown(self.algorithm_returns.values)
        self.max_leverage = self.calculate_max_leverage()

Example 23

Project: lfd Source File: precompute.py
def batch_get_sol_params(x_nd, K_nn, bend_coefs, rot_coef):
    n, d = x_nd.shape

    x_gpu = gpuarray.to_gpu(x_nd)

    H_arr_gpu = []
    for b in bend_coefs:
        cur_offset = np.zeros((1 + d + n, 1 + d + n), np.float64)
        cur_offset[d+1:, d+1:] = b * K_nn
        cur_offset[1:d+1, 1:d+1] = np.diag(rot_coef)
        H_arr_gpu.append(gpuarray.to_gpu(cur_offset))
    H_ptr_gpu = get_gpu_ptrs(H_arr_gpu)

    A = np.r_[np.zeros((d+1,d+1)), np.c_[np.ones((n,1)), x_nd]].T
    n_cnts = A.shape[0]
    _u,_s,_vh = np.linalg.svd(A.T)
    N = _u[:,n_cnts:]
    F = np.zeros((n + d + 1, d), np.float64)
    F[1:d+1, :d] += np.diag(rot_coef)
    
    Q = np.c_[np.ones((n,1)), x_nd, K_nn].astype(np.float64)
    F = F.astype(np.float64)
    N = N.astype(np.float64)

    Q_gpu     = gpuarray.to_gpu(Q)
    Q_arr_gpu = [Q_gpu for _ in range(len(bend_coefs))]
    Q_ptr_gpu = get_gpu_ptrs(Q_arr_gpu)

    F_gpu     = gpuarray.to_gpu(F)
    F_arr_gpu = [F_gpu for _ in range(len(bend_coefs))]
    F_ptr_gpu = get_gpu_ptrs(F_arr_gpu)

    N_gpu = gpuarray.to_gpu(N)
    N_arr_gpu = [N_gpu for _ in range(len(bend_coefs))]
    N_ptr_gpu = get_gpu_ptrs(N_arr_gpu)
    
    dot_batch_nocheck(Q_arr_gpu, Q_arr_gpu, H_arr_gpu,
                      Q_ptr_gpu, Q_ptr_gpu, H_ptr_gpu,
                      transa = 'T')
    # N'HN
    NHN_arr_gpu, NHN_ptr_gpu = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'T'),
                                           (H_arr_gpu, H_ptr_gpu, 'N'),
                                           (N_arr_gpu, N_ptr_gpu, 'N'))
    iH_arr = []
    for NHN in NHN_arr_gpu:
        iH_arr.append(scipy.linalg.inv(NHN.get()).copy())
    iH_arr_gpu = [gpuarray.to_gpu_async(iH) for iH in iH_arr]
    iH_ptr_gpu = get_gpu_ptrs(iH_arr_gpu)

    proj_mats   = m_dot_batch((N_arr_gpu,  N_ptr_gpu,   'N'),
                              (iH_arr_gpu, iH_ptr_gpu, 'N'),
                              (N_arr_gpu,  N_ptr_gpu,   'T'),
                              (Q_arr_gpu,  Q_ptr_gpu,   'T'))

    offset_mats = m_dot_batch((N_arr_gpu,  N_ptr_gpu,   'N'),
                              (iH_arr_gpu, iH_ptr_gpu, 'N'),
                              (N_arr_gpu,  N_ptr_gpu,   'T'),
                              (F_arr_gpu,  F_ptr_gpu,   'N'))

    return proj_mats, offset_mats

Example 24

Project: lfd Source File: precompute.py
def test_batch_get_sol_params(f, bend_coefs, rot_coef, atol=1e-7, index=0):
    seg_info = f.items()[index][1]
    inv_group =  seg_info['inv']
    ds_key = 'DS_SIZE_{}'.format(DS_SIZE)
    x_nd = inv_group[ds_key]['scaled_cloud_xyz'][:]
    K_nn = inv_group[ds_key]['scaled_K_nn'][:]

    n, d = x_nd.shape

    x_gpu = gpuarray.to_gpu(x_nd)

    H_arr_gpu = []
    for b in bend_coefs:
        cur_offset = np.zeros((1 + d + n, 1 + d + n), np.float64)
        cur_offset[d+1:, d+1:] = b * K_nn
        cur_offset[1:d+1, 1:d+1] = np.diag(rot_coef)
        H_arr_gpu.append(gpuarray.to_gpu(cur_offset))
    H_ptr_gpu = get_gpu_ptrs(H_arr_gpu)

    A = np.r_[np.zeros((d+1,d+1)), np.c_[np.ones((n,1)), x_nd]].T
    n_cnts = A.shape[0]
    _u,_s,_vh = np.linalg.svd(A.T)
    N = _u[:,n_cnts:]
    F = np.zeros((n + d + 1, d), np.float64)
    F[1:d+1, :d] += np.diag(rot_coef)
    
    Q = np.c_[np.ones((n,1)), x_nd, K_nn].astype(np.float64)
    F = F.astype(np.float64)
    N = N.astype(np.float64)

    Q_gpu     = gpuarray.to_gpu(Q)
    Q_arr_gpu = [Q_gpu for _ in range(len(bend_coefs))]
    Q_ptr_gpu = get_gpu_ptrs(Q_arr_gpu)

    F_gpu     = gpuarray.to_gpu(F)
    F_arr_gpu = [F_gpu for _ in range(len(bend_coefs))]
    F_ptr_gpu = get_gpu_ptrs(F_arr_gpu)

    N_gpu = gpuarray.to_gpu(N)
    N_arr_gpu = [N_gpu for _ in range(len(bend_coefs))]
    N_ptr_gpu = get_gpu_ptrs(N_arr_gpu)
    
    dot_batch_nocheck(Q_arr_gpu, Q_arr_gpu, H_arr_gpu,
                      Q_ptr_gpu, Q_ptr_gpu, H_ptr_gpu,
                      transa = 'T')
    QTQ = Q.T.dot(Q)
    H_list = []
    for i, bend_coef in enumerate(bend_coefs):
        H = QTQ
        H[d+1:,d+1:] += bend_coef * K_nn
        rot_coefs = np.ones(d) * rot_coef if np.isscalar(rot_coef) else rot_coef
        H[1:d+1, 1:d+1] += np.diag(rot_coefs)
        # ipdb.set_trace()
        H_list.append(H)

    # N'HN
    NHN_arr_gpu, NHN_ptr_gpu = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'T'),
                                           (H_arr_gpu, H_ptr_gpu, 'N'),
                                           (N_arr_gpu, N_ptr_gpu, 'N'))

    NHN_list = [N.T.dot(H.dot(N)) for H in H_list]
    for i, NHN in enumerate(NHN_list):
        assert(np.allclose(NHN, NHN_arr_gpu[i].get(), atol=atol))

    iH_arr = []
    for NHN in NHN_arr_gpu:
        iH_arr.append(scipy.linalg.inv(NHN.get()).copy())

    h_inv_list = [scipy.linalg.inv(NHN) for NHN in NHN_list]
    assert(np.allclose(iH_arr, h_inv_list, atol=atol))

    iH_arr_gpu = [gpuarray.to_gpu_async(iH) for iH in iH_arr]
    iH_ptr_gpu = get_gpu_ptrs(iH_arr_gpu)

    proj_mats   = m_dot_batch((N_arr_gpu,  N_ptr_gpu,   'N'),
                              (iH_arr_gpu, iH_ptr_gpu, 'N'),
                              (N_arr_gpu,  N_ptr_gpu,   'T'),
                              (Q_arr_gpu,  Q_ptr_gpu,   'T'))
    
    proj_mats_list = [N.dot(h_inv.dot(N.T.dot(Q.T))) for h_inv in h_inv_list]
    assert(np.allclose(proj_mats_list, proj_mats[0][index].get(), atol=atol))

    offset_mats = m_dot_batch((N_arr_gpu,  N_ptr_gpu,   'N'),
                              (iH_arr_gpu, iH_ptr_gpu, 'N'),
                              (N_arr_gpu,  N_ptr_gpu,   'T'),
                              (F_arr_gpu,  F_ptr_gpu,   'N'))
    offset_mats_list = [N.dot(h_inv.dot(N.T.dot(F))) for h_inv in h_inv_list]
    assert(np.allclose(offset_mats_list, offset_mats[0][index].get(), atol=atol))

Example 25

Project: pyspace Source File: correlation_features.py
    def _execute(self, x):
        """ Calculates statistical features """
        # determine what has to be computed and initialize data structures
        data = x.get_data()
        if self.central_moment_order == 0:
            self.central_moment_order = 1  # only for feature_size computation
        feature_size = self.raw_moment_order * len(x.channel_names) + \
            (self.central_moment_order-1) * len(x.channel_names) + \
            self.std * len(x.channel_names) + \
            self.quadratic_mean * len(x.channel_names) + \
            self.median * len(x.channel_names) + \
            self.minimum * len(x.channel_names) + \
            self.maximum * len(x.channel_names)
        if not self.artifact_threshold is None:
            feature_size += 1
        statistical_features = numpy.zeros((feature_size, ), numpy.float64)
        # initialize the actual feature_index
        feature_index = 0

        # for every channel...
        # TODO: Try to use the methods on the whole data array
        for index in range(len(x.channel_names)):  
            current_channel = data[:, index]
            # in these cases it is auspicious to compute and store variables
            # cause we will need them again
            if self.raw_moment_order > 0 or self.central_moment_order > 1 or \
                    self.std or self.quadratic_mean:
                average = numpy.mean(current_channel)
            if self.raw_moment_order > 1 or self.quadratic_mean:
                # self.raw_moment(current_channel, 2) 
                second_raw_moment = scipy.stats.ss(current_channel) 
                # print second_raw_moment
            if self.raw_moment_order > 0:  # raw_moment_of_1_th_order needed?
                # it's the mean, so don't compute it again
                statistical_features[feature_index] = average
                feature_index += 1  # update feature_index
                if self.raw_moment_order > 1:  # raw_moment_2nd_order needed?
                    # we have already computed it
                    statistical_features[feature_index] = second_raw_moment
                    feature_index += 1
                    # for the other orders of raw_moments
                    for order in range(3, self.raw_moment_order+1): 
                        statistical_features[feature_index] = \
                            self.raw_moment(current_channel, order)
                        feature_index += 1
            # central_moment
            for order in range(2, self.central_moment_order+1): 
                statistical_features[feature_index] = \
                    scipy.stats.moment(current_channel, order)
                feature_index += 1
            # standard_deviation
            if self.std:
                statistical_features[feature_index] = \
                    numpy.std(current_channel)
                feature_index += 1
            # quadratic_mean    
            if self.quadratic_mean: 
                # we stored relevant results before
                statistical_features[feature_index] = second_raw_moment ** 0.5
                feature_index += 1
            # median
            if self.median:
                statistical_features[feature_index] = \
                    numpy.median(current_channel)
                feature_index += 1
            # minimum
            if self.minimum:
                statistical_features[feature_index] = \
                    numpy.amin(current_channel)
                feature_index += 1
            # maximum
            if self.maximum:
                statistical_features[feature_index] = \
                    numpy.amax(current_channel)
                feature_index += 1
                
            if self.artifact_threshold is None:
                statistical_features[-1] += self.amplitudes_above(
                    current_channel, self.artifact_threshold)
        # if feature_names have to be determined
        if self.feature_names is None:
            # initialize data structure
            self.feature_names = []
            for name in x.channel_names:
                # raw_moment
                for order in range(1,  self.raw_moment_order+1):
                    self.feature_names.append(
                        "RAW_MOMENT_%d_%s" % (order, name))
                # central_moment
                for order in range(2, self.central_moment_order+1):
                    self.feature_names.append(
                        "CENTRAL_MOMENT_%d_%s" % (order, name))
                # standard_deviation
                if self.std:
                    self.feature_names.append("STD_%s" % (name))
                # quadratic_mean
                if self.quadratic_mean: 
                    self.feature_names.append("QUAD_MEAN_%s" %(name))
                # median
                if self.median: 
                    self.feature_names.append("MEDIAN_%s" % (name))
                # minimum
                if self.minimum: 
                    self.feature_names.append("MIN_%s" % (name))
                # maximum
                if self.maximum:
                    self.feature_names.append("MAX_%s" % (name))
            if not self.artifact_threshold is None:
                self.feature_names.append(
                    "AMP_ABOVE_%.2f" % (self.artifact_threshold))
                    
        feature_vector = FeatureVector(numpy.atleast_2d(statistical_features),
                                       self.feature_names)
        return feature_vector

Example 26

Project: image-analogies Source File: patch_matcher.py
def congrid(a, newdims, method='linear', centre=False, minusone=False):
    '''Arbitrary resampling of source array to new dimension sizes.
    Currently only supports maintaining the same number of dimensions.
    To use 1-D arrays, first promote them to shape (x,1).

    Uses the same parameters and creates the same co-ordinate lookup points
    as IDL''s congrid routine, which apparently originally came from a VAX/VMS
    routine of the same name.

    method:
    neighbour - closest value from original data
    nearest and linear - uses n x 1-D interpolations using
                         scipy.interpolate.interp1d
    (see Numerical Recipes for validity of use of n 1-D interpolations)
    spline - uses ndimage.map_coordinates

    centre:
    True - interpolation points are at the centres of the bins
    False - points are at the front edge of the bin

    minusone:
    For example- inarray.shape = (i,j) & new dimensions = (x,y)
    False - inarray is resampled by factors of (i/x) * (j/y)
    True - inarray is resampled by(i-1)/(x-1) * (j-1)/(y-1)
    This prevents extrapolation one element beyond bounds of input array.
    '''
    if not a.dtype in [np.float64, np.float32]:
        a = np.cast[float](a)

    m1 = np.cast[int](minusone)
    ofs = np.cast[int](centre) * 0.5
    old = np.array( a.shape )
    ndims = len( a.shape )
    if len( newdims ) != ndims:
        print "[congrid] dimensions error. " \
              "This routine currently only support " \
              "rebinning to the same number of dimensions."
        return None
    newdims = np.asarray( newdims, dtype=float )
    dimlist = []

    if method == 'neighbour':
        for i in range( ndims ):
            base = np.indices(newdims)[i]
            dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
                            * (base + ofs) - ofs )
        cd = np.array( dimlist ).round().astype(int)
        newa = a[list( cd )]
        return newa

    elif method in ['nearest','linear']:
        # calculate new dims
        for i in range( ndims ):
            base = np.arange( newdims[i] )
            dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
                            * (base + ofs) - ofs )
        # specify old dims
        olddims = [np.arange(i, dtype = np.float) for i in list( a.shape )]

        # first interpolation - for ndims = any
        mint = scipy.interpolate.interp1d( olddims[-1], a, kind=method )
        newa = mint( dimlist[-1] )

        trorder = [ndims - 1] + range( ndims - 1 )
        for i in range( ndims - 2, -1, -1 ):
            newa = newa.transpose( trorder )

            mint = scipy.interpolate.interp1d( olddims[i], newa, kind=method )
            newa = mint( dimlist[i] )

        if ndims > 1:
            # need one more transpose to return to original dimensions
            newa = newa.transpose( trorder )

        return newa
    elif method in ['spline']:
        oslices = [ slice(0,j) for j in old ]
        oldcoords = np.ogrid[oslices]
        nslices = [ slice(0,j) for j in list(newdims) ]
        newcoords = np.mgrid[nslices]

        newcoords_dims = range(np.rank(newcoords))
        #make first index last
        newcoords_dims.append(newcoords_dims.pop(0))
        newcoords_tr = newcoords.transpose(newcoords_dims)
        # makes a view that affects newcoords

        newcoords_tr += ofs

        deltas = (np.asarray(old) - m1) / (newdims - m1)
        newcoords_tr *= deltas

        newcoords_tr -= ofs

        newa = scipy.ndimage.map_coordinates(a, newcoords)
        return newa
    else:
        print "Congrid error: Unrecognized interpolation type.\n", \
              "Currently only \'neighbour\', \'nearest\',\'linear\',", \
              "and \'spline\' are supported."
        return None

Example 27

Project: root_numpy Source File: _factory.py
def add_classification_events(factory, events, labels, signal_label=None,
                              weights=None, test=False):
    """Add classification events to a TMVA::Factory from NumPy arrays.

    Parameters
    ----------
    factory : TMVA::Factory
        A TMVA::Factory instance with variables already booked in exactly the
        same order as the columns in ``events``.
    events : numpy array of shape [n_events, n_variables]
        A two-dimensional NumPy array containing the rows of events and columns
        of variables. The order of the columns must match the order in which
        you called ``AddVariable()`` for each variable.
    labels : numpy array of shape [n_events]
        The class labels (signal or background) corresponding to each event in
        ``events``.
    signal_label : float or int, optional (default=None)
        The value in ``labels`` for signal events, if ``labels`` contains only
        two classes. If None, the highest value in ``labels`` is used.
    weights : numpy array of shape [n_events], optional
        Event weights.
    test : bool, optional (default=False)
        If True, then the events will be added as test events, otherwise they
        are added as training events by default.

    Notes
    -----
    * A TMVA::Factory requires you to add both training and test events even if
      you don't intend to call ``TestAllMethods()``.

    * When using MethodCuts, the first event added must be a signal event,
      otherwise TMVA will fail with ``<FATAL> Interval : maximum lower than
      minimum``. To place a signal event first::

        # Get index of first signal event
        first_signal = np.nonzero(labels == signal_label)[0][0]
        # Swap this with first event
        events[0], events[first_signal] = events[first_signal].copy(), events[0].copy()
        labels[0], labels[first_signal] = labels[first_signal], labels[0]
        weights[0], weights[first_signal] = weights[first_signal], weights[0]

    """
    if not isinstance(factory, TMVA.Factory):
        raise TypeError("factory must be a TMVA.Factory instance")
    events = np.ascontiguousarray(events, dtype=np.float64)
    if events.ndim == 1:
        # convert to 2D
        events = events[:, np.newaxis]
    elif events.ndim != 2:
        raise ValueError(
            "events must be a two-dimensional array "
            "with one event per row")
    class_labels, class_idx = np.unique(labels, return_inverse=True)
    if class_idx.shape[0] != events.shape[0]:
        raise ValueError("numbers of events and labels do not match")
    if weights is not None:
        weights = np.asarray(weights, dtype=np.float64)
        if weights.shape[0] != events.shape[0]:
            raise ValueError("numbers of events and weights do not match")
        if weights.ndim != 1:
            raise ValueError("weights must be one-dimensional")
    n_classes = class_labels.shape[0]
    if n_classes > 2:
        # multiclass classification
        _libtmvanumpy.factory_add_events_multiclass(
            ROOT.AsCObject(factory), events, class_idx,
            weights, test)
    elif n_classes == 2:
        # binary classification
        if signal_label is None:
            signal_label = class_labels[1]
        signal_label = np.where(class_labels == signal_label)[0][0]
        _libtmvanumpy.factory_add_events_twoclass(
            ROOT.AsCObject(factory), events, class_idx,
            signal_label, weights, test)
    else:
        raise ValueError("labels must contain at least two classes")

Example 28

Project: scikit-learn Source File: validation.py
Function: check_array
def check_array(array, accept_sparse=None, dtype="numeric", order=None,
                copy=False, force_all_finite=True, ensure_2d=True,
                allow_nd=False, ensure_min_samples=1, ensure_min_features=1,
                warn_on_dtype=False, estimator=None):
    """Input validation on an array, list, sparse matrix or similar.

    By default, the input is converted to an at least 2D numpy array.
    If the dtype of the array is object, attempt converting to float,
    raising on failure.

    Parameters
    ----------
    array : object
        Input object to check / convert.

    accept_sparse : string, list of string or None (default=None)
        String[s] representing allowed sparse matrix formats, such as 'csc',
        'csr', etc.  None means that sparse matrix input will raise an error.
        If the input is sparse but not in the allowed format, it will be
        converted to the first listed format.

    dtype : string, type, list of types or None (default="numeric")
        Data type of result. If None, the dtype of the input is preserved.
        If "numeric", dtype is preserved unless array.dtype is object.
        If dtype is a list of types, conversion on the first type is only
        performed if the dtype of the input is not in the list.

    order : 'F', 'C' or None (default=None)
        Whether an array will be forced to be fortran or c-style.
        When order is None (default), then if copy=False, nothing is ensured
        about the memory layout of the output array; otherwise (copy=True)
        the memory layout of the returned array is kept as close as possible
        to the original array.

    copy : boolean (default=False)
        Whether a forced copy will be triggered. If copy=False, a copy might
        be triggered by a conversion.

    force_all_finite : boolean (default=True)
        Whether to raise an error on np.inf and np.nan in X.

    ensure_2d : boolean (default=True)
        Whether to make X at least 2d.

    allow_nd : boolean (default=False)
        Whether to allow X.ndim > 2.

    ensure_min_samples : int (default=1)
        Make sure that the array has a minimum number of samples in its first
        axis (rows for a 2D array). Setting to 0 disables this check.

    ensure_min_features : int (default=1)
        Make sure that the 2D array has some minimum number of features
        (columns). The default value of 1 rejects empty datasets.
        This check is only enforced when the input data has effectively 2
        dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
        disables this check.

    warn_on_dtype : boolean (default=False)
        Raise DataConversionWarning if the dtype of the input data structure
        does not match the requested dtype, causing a memory copy.

    estimator : str or estimator instance (default=None)
        If passed, include the name of the estimator in warning messages.

    Returns
    -------
    X_converted : object
        The converted and validated X.
    """
    if isinstance(accept_sparse, str):
        accept_sparse = [accept_sparse]

    # store whether originally we wanted numeric dtype
    dtype_numeric = dtype == "numeric"

    dtype_orig = getattr(array, "dtype", None)
    if not hasattr(dtype_orig, 'kind'):
        # not a data type (e.g. a column named dtype in a pandas DataFrame)
        dtype_orig = None

    if dtype_numeric:
        if dtype_orig is not None and dtype_orig.kind == "O":
            # if input is object, convert to float.
            dtype = np.float64
        else:
            dtype = None

    if isinstance(dtype, (list, tuple)):
        if dtype_orig is not None and dtype_orig in dtype:
            # no dtype conversion required
            dtype = None
        else:
            # dtype conversion required. Let's select the first element of the
            # list of accepted types.
            dtype = dtype[0]

    if estimator is not None:
        if isinstance(estimator, six.string_types):
            estimator_name = estimator
        else:
            estimator_name = estimator.__class__.__name__
    else:
        estimator_name = "Estimator"
    context = " by %s" % estimator_name if estimator is not None else ""

    if sp.issparse(array):
        array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                      force_all_finite)
    else:
        array = np.array(array, dtype=dtype, order=order, copy=copy)

        if ensure_2d:
            if array.ndim == 1:
                if ensure_min_samples >= 2:
                    raise ValueError("%s expects at least 2 samples provided "
                                     "in a 2 dimensional array-like input"
                                     % estimator_name)
                warnings.warn(
                    "Passing 1d arrays as data is deprecated in 0.17 and will "
                    "raise ValueError in 0.19. Reshape your data either using "
                    "X.reshape(-1, 1) if your data has a single feature or "
                    "X.reshape(1, -1) if it contains a single sample.",
                    DeprecationWarning)
            array = np.atleast_2d(array)
            # To ensure that array flags are maintained
            array = np.array(array, dtype=dtype, order=order, copy=copy)

        # make sure we actually converted to numeric:
        if dtype_numeric and array.dtype.kind == "O":
            array = array.astype(np.float64)
        if not allow_nd and array.ndim >= 3:
            raise ValueError("Found array with dim %d. %s expected <= 2."
                             % (array.ndim, estimator_name))
        if force_all_finite:
            _assert_all_finite(array)

    shape_repr = _shape_repr(array.shape)
    if ensure_min_samples > 0:
        n_samples = _num_samples(array)
        if n_samples < ensure_min_samples:
            raise ValueError("Found array with %d sample(s) (shape=%s) while a"
                             " minimum of %d is required%s."
                             % (n_samples, shape_repr, ensure_min_samples,
                                context))

    if ensure_min_features > 0 and array.ndim == 2:
        n_features = array.shape[1]
        if n_features < ensure_min_features:
            raise ValueError("Found array with %d feature(s) (shape=%s) while"
                             " a minimum of %d is required%s."
                             % (n_features, shape_repr, ensure_min_features,
                                context))

    if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
        msg = ("Data with input dtype %s was converted to %s%s."
               % (dtype_orig, array.dtype, context))
        warnings.warn(msg, _DataConversionWarning)
    return array

Example 29

Project: scikit-image Source File: texture.py
def draw_multiblock_lbp(img, r, c, width, height,
                        lbp_code=0,
                        color_greater_block=[1, 1, 1],
                        color_less_block=[0, 0.69, 0.96],
                        alpha=0.5
                        ):
    """Multi-block local binary pattern visualization.

    Blocks with higher sums are colored with alpha-blended white rectangles,
    whereas blocks with lower sums are colored alpha-blended cyan. Colors
    and the `alpha` parameter can be changed.

    Parameters
    ----------
    img : ndarray of float or uint
        Image on which to visualize the pattern.
    r : int
        Row-coordinate of top left corner of a rectangle containing feature.
    c : int
        Column-coordinate of top left corner of a rectangle containing feature.
    width : int
        Width of one of 9 equal rectangles that will be used to compute
        a feature.
    height : int
        Height of one of 9 equal rectangles that will be used to compute
        a feature.
    lbp_code : int
        The descriptor of feature to visualize. If not provided, the
        descriptor with 0 value will be used.
    color_greater_block : list of 3 floats
        Floats specifying the color for the block that has greater
        intensity value. They should be in the range [0, 1].
        Corresponding values define (R, G, B) values. Default value
        is white [1, 1, 1].
    color_greater_block : list of 3 floats
        Floats specifying the color for the block that has greater intensity
        value. They should be in the range [0, 1]. Corresponding values define
        (R, G, B) values. Default value is cyan [0, 0.69, 0.96].
    alpha : float
        Value in the range [0, 1] that specifies opacity of visualization.
        1 - fully transparent, 0 - opaque.

    Returns
    -------
    output : ndarray of float
        Image with MB-LBP visualization.

    References
    ----------
    .. [1] Face Detection Based on Multi-Block LBP
           Representation. Lun Zhang, Rufeng Chu, Shiming Xiang, Shengcai Liao,
           Stan Z. Li
           http://www.cbsr.ia.ac.cn/users/scliao/papers/Zhang-ICB07-MBLBP.pdf
    """

    # Default colors for regions.
    # White is for the blocks that are brighter.
    # Cyan is for the blocks that has less intensity.
    color_greater_block = np.asarray(color_greater_block, dtype=np.float64)
    color_less_block = np.asarray(color_less_block, dtype=np.float64)

    # Copy array to avoid the changes to the original one.
    output = np.copy(img)

    # As the visualization uses RGB color we need 3 bands.
    if len(img.shape) < 3:
        output = gray2rgb(img)

    # Colors are specified in floats.
    output = img_as_float(output)

    # Offsets of neighbour rectangles relative to central one.
    # It has order starting from top left and going clockwise.
    neighbour_rect_offsets = ((-1, -1), (-1, 0), (-1, 1),
                              (0, 1), (1, 1), (1, 0),
                              (1, -1), (0, -1))

    # Pre-multiply the offsets with width and height.
    neighbour_rect_offsets = np.array(neighbour_rect_offsets)
    neighbour_rect_offsets[:, 0] *= height
    neighbour_rect_offsets[:, 1] *= width

    # Top-left coordinates of central rectangle.
    central_rect_r = r + height
    central_rect_c = c + width

    for element_num, offset in enumerate(neighbour_rect_offsets):

        offset_r, offset_c = offset

        curr_r = central_rect_r + offset_r
        curr_c = central_rect_c + offset_c

        has_greater_value = lbp_code & (1 << (7-element_num))

        # Mix-in the visualization colors.
        if has_greater_value:
            new_value = ((1-alpha) *
                         output[curr_r:curr_r+height, curr_c:curr_c+width] +
                         alpha * color_greater_block)
            output[curr_r:curr_r+height, curr_c:curr_c+width] = new_value
        else:
            new_value = ((1-alpha) *
                         output[curr_r:curr_r+height, curr_c:curr_c+width] +
                         alpha * color_less_block)
            output[curr_r:curr_r+height, curr_c:curr_c+width] = new_value

    return output

Example 30

Project: msCentipede Source File: load_data.py
    def get_read_counts(self, locations, width=200):
        """Get the number of sequencing reads mapped to
        each base along a window centered at each of
        several motif instances.

        Arguments:
            locations : list
            each entry of the list is a list that specifies 
            information for a motif instance

            width : int
            length of the genomic window around the motif
            instance.

        """

        counts = []

        if self._protocol=='DNase_seq':

            for location in locations:

                chrom = location[0]
                strand = location[3]
                if strand=='+':
                    center = location[1]
                else:
                    center = location[2]
                left = max([1,center-width/2])
                right = min([center+width/2, self._ref_lengths[chrom]])

                # fetch all reads overlapping this genomic location
                sam_iter = self._handle.fetch(reference=chrom, start=left, end=right)
                forward = np.zeros((width,), dtype=np.float64)
                reverse = np.zeros((width,), dtype=np.float64)

                for read in sam_iter:

                    # skip read if unmapped
                    if read.is_unmapped:
                        continue

                    # skip read, if mapping quality is low
                    if read.mapq < MIN_MAP_QUAL:
                        continue

                    start = read.pos
                    end = start + read.alen - 1

                    # skip read, if site of cleavage is outside window
                    if (read.is_reverse and end >= right) or (not read.is_reverse and start < left):
                        continue

                    if read.is_reverse:
                        reverse[end-left] += 1
                    else:
                        forward[start-left] += 1

                # flip fwd and rev strand read counts, 
                # if the motif is on the opposite strand.
                if strand=='+':
                    count = [forward, reverse]
                else:
                    count = [reverse[::-1], forward[::-1]]

                count = np.hstack(count)

                # cap the read count at any location
                count[count>MAX_VAL] = MAX_VAL
                counts.append(count.astype(np.float64))

        else:

            for l,location in enumerate(locations):

                chrom = location[0]
                strand = location[3]
                if strand=='+':
                    center = location[1]
                else:
                    center = location[2]
                left = max([1,center-width/2])
                right = min([center+width/2, self._ref_lengths[chrom]])

                # fetch all reads overlapping this genomic location
                sam_iter = [read for read in self._handle.fetch(reference=chrom, start=left, end=right)]
                count = np.zeros((width,), dtype=np.float64)

                for read in sam_iter:

                    # discard anomalies
                    if not (read.is_read1 or read.is_read2):
                        continue

                    # require that (both) paired-reads are uniquely mapped
                    if read.is_unmapped or read.mate_is_unmapped:
                        continue

                    # paired reads mapped to same strand...
                    if read.is_reverse == read.mate_is_reverse:
                        continue

                    # read has poor mapping quality
                    if read.mapq < MIN_MAP_QUAL:
                        continue

                    # discard fragments that are too long or too short
                    isize = np.abs(read.isize)
                    if isize < MIN_READ_LEN or isize > MAX_READ_LEN:
                        continue

                    # pysam pos starts at 0, not 1
                    # site of adapter insertions are 9bp apart
                    # an offset of +4 / -5 gives approximate site of transposition
                    if read.is_reverse:
                        if read.pos<=read.mpos:
                            tpos = read.pos + 4
                        else:
                            tpos = read.mpos + isize - 5
                        # add read, if site of transposition is within window
                        if left<=tpos<right:
                            count[tpos-left] += 1
                    else:
                        if read.pos>=read.mpos:
                            tpos = read.mpos + isize - 5
                        else:
                            tpos = read.pos + 4
                        # add read, if site of transposition is within window
                        if left<=tpos<right:
                            count[tpos-left] += 1

                # flip read counts,
                # if the motif is on the opposite strand.
                if strand=='-':
                    count = count[::-1]

                counts.append(count)

        counts = np.array(counts).astype(np.float64)

        # cap the read count at all locations
        counts[counts>MAX_VAL] = MAX_VAL

        return counts

Example 31

Project: scikit-image Source File: _marching_cubes_classic.py
def marching_cubes_classic(volume, level=None, spacing=(1., 1., 1.),
                           gradient_direction='descent'):
    """
    Classic marching cubes algorithm to find surfaces in 3d volumetric data.

    Note that the ``marching_cubes()`` algorithm is recommended over
    this algorithm, because it's faster and produces better results.
    
    Parameters
    ----------
    volume : (M, N, P) array of doubles
        Input data volume to find isosurfaces. Will be cast to `np.float64`.
    level : float
        Contour value to search for isosurfaces in `volume`. If not
        given or None, the average of the min and max of vol is used.
    spacing : length-3 tuple of floats
        Voxel spacing in spatial dimensions corresponding to numpy array
        indexing dimensions (M, N, P) as in `volume`.
    gradient_direction : string
        Controls if the mesh was generated from an isosurface with gradient
        descent toward objects of interest (the default), or the opposite.
        The two options are:
        * descent : Object was greater than exterior
        * ascent : Exterior was greater than object

    Returns
    -------
    verts : (V, 3) array
        Spatial coordinates for V unique mesh vertices. Coordinate order
        matches input `volume` (M, N, P).
    faces : (F, 3) array
        Define triangular faces via referencing vertex indices from ``verts``.
        This algorithm specifically outputs triangles, so each face has
        exactly three indices.

    Notes
    -----
    The marching cubes algorithm is implemented as described in [1]_.
    A simple explanation is available here::

      http://www.essi.fr/~lingrand/MarchingCubes/algo.html

    There are several known ambiguous cases in the marching cubes algorithm.
    Using point labeling as in [1]_, Figure 4, as shown::

           v8 ------ v7
          / |       / |        y
         /  |      /  |        ^  z
       v4 ------ v3   |        | /
        |  v5 ----|- v6        |/          (note: NOT right handed!)
        |  /      |  /          ----> x
        | /       | /
       v1 ------ v2

    Most notably, if v4, v8, v2, and v6 are all >= `level` (or any
    generalization of this case) two parallel planes are generated by this
    algorithm, separating v4 and v8 from v2 and v6. An equally valid
    interpretation would be a single connected thin surface enclosing all
    four points. This is the best known ambiguity, though there are others.

    This algorithm does not attempt to resolve such ambiguities; it is a naive
    implementation of marching cubes as in [1]_, but may be a good beginning
    for work with more recent techniques (Dual Marching Cubes, Extended
    Marching Cubes, Cubic Marching Squares, etc.).

    Because of interactions between neighboring cubes, the isosurface(s)
    generated by this algorithm are NOT guaranteed to be closed, particularly
    for complicated contours. Furthermore, this algorithm does not guarantee
    a single contour will be returned. Indeed, ALL isosurfaces which cross
    `level` will be found, regardless of connectivity.

    The output is a triangular mesh consisting of a set of unique vertices and
    connecting triangles. The order of these vertices and triangles in the
    output list is determined by the position of the smallest ``x,y,z`` (in
    lexicographical order) coordinate in the contour.  This is a side-effect
    of how the input array is traversed, but can be relied upon.

    The generated mesh guarantees coherent orientation as of version 0.12.

    To quantify the area of an isosurface generated by this algorithm, pass
    outputs directly into `skimage.measure.mesh_surface_area`.

    References
    ----------
    .. [1] Lorensen, William and Harvey E. Cline. Marching Cubes: A High
           Resolution 3D Surface Construction Algorithm. Computer Graphics
           (SIGGRAPH 87 Proceedings) 21(4) July 1987, p. 163-170).
           DOI: 10.1145/37401.37422
    
    See Also
    --------
    skimage.measure.marching_cubes
    skimage.measure.mesh_surface_area
    """
    # Check inputs and ensure `volume` is C-contiguous for memoryviews
    if volume.ndim != 3:
        raise ValueError("Input volume must have 3 dimensions.")
    if level is None:
        level = 0.5 * (volume.min() + volume.max())
    else:
        level = float(level)
        if level < volume.min() or level > volume.max():
            raise ValueError("Surface level must be within volume data range.")
    if len(spacing) != 3:
        raise ValueError("`spacing` must consist of three floats.")

    volume = np.array(volume, dtype=np.float64, order="C")

    # Extract raw triangles using marching cubes in Cython
    #   Returns a list of length-3 lists, each sub-list containing three
    #   tuples. The tuples hold (x, y, z) coordinates for triangle vertices.
    # Note: this algorithm is fast, but returns degenerate "triangles" which
    #   have repeated vertices - and equivalent vertices are redundantly
    #   placed in every triangle they connect with.
    raw_faces = _marching_cubes_classic_cy.iterate_and_store_3d(volume,
                                                                float(level))

    # Find and collect unique vertices, storing triangle verts as indices.
    # Returns a true mesh with no degenerate faces.
    verts, faces = _marching_cubes_classic_cy.unpack_unique_verts(raw_faces)

    verts = np.asarray(verts)
    faces = np.asarray(faces)

    # Fancy indexing to define two vector arrays from triangle vertices
    faces = _correct_mesh_orientation(volume, verts[faces], faces, spacing,
                                      gradient_direction)

    # Adjust for non-isotropic spacing in `verts` at time of return
    return verts * np.r_[spacing], faces

Example 32

Project: sfepy Source File: poisson_parallel_interactive.py
def solve_problem(mesh_filename, options, comm):
    order = options.order

    rank, size = comm.Get_rank(), comm.Get_size()

    output('rank', rank, 'of', size)

    mesh = Mesh.from_file(mesh_filename)

    if rank == 0:
        cell_tasks = pl.partition_mesh(mesh, size, use_metis=options.metis,
                                       verbose=True)

    else:
        cell_tasks = None

    domain = FEDomain('domain', mesh)
    omega = domain.create_region('Omega', 'all')
    field = Field.from_args('fu', nm.float64, 1, omega, approx_order=order)

    output('distributing field %s...' % field.name)
    tt = time.clock()

    distribute = pl.distribute_fields_dofs
    lfds, gfds = distribute([field], cell_tasks,
                            is_overlap=True,
                            save_inter_regions=options.save_inter_regions,
                            output_dir=options.output_dir,
                            comm=comm, verbose=True)
    lfd = lfds[0]

    output('...done in', time.clock() - tt)

    if rank == 0:
        dof_maps = gfds[0].dof_maps
        id_map = gfds[0].id_map

        if options.verify:
            verify_save_dof_maps(field, cell_tasks,
                                 dof_maps, id_map, options, verbose=True)

        if options.plot:
            ppd.plot_partitioning([None, None], field, cell_tasks, gfds[0],
                                  options.output_dir, size)

    output('creating local problem...')
    tt = time.clock()

    omega_gi = Region.from_cells(lfd.cells, field.domain)
    omega_gi.finalize()
    omega_gi.update_shape()

    pb = create_local_problem(omega_gi, order)

    output('...done in', time.clock() - tt)

    variables = pb.get_variables()
    eqs = pb.equations

    u_i = variables['u_i']
    field_i = u_i.field

    if options.plot:
        ppd.plot_local_dofs([None, None], field, field_i, omega_gi,
                            options.output_dir, rank)

    output('allocating global system...')
    tt = time.clock()

    sizes, drange = pl.get_sizes(lfd.petsc_dofs_range, field.n_nod, 1)
    output('sizes:', sizes)
    output('drange:', drange)

    pdofs = pl.get_local_ordering(field_i, lfd.petsc_dofs_conn)

    output('pdofs:', pdofs)

    pmtx, psol, prhs = pl.create_petsc_system(pb.mtx_a, sizes, pdofs, drange,
                                              is_overlap=True, comm=comm,
                                              verbose=True)

    output('...done in', time.clock() - tt)

    output('evaluating local problem...')
    tt = time.clock()

    state = State(variables)
    state.fill(0.0)
    state.apply_ebc()

    rhs_i = eqs.eval_residuals(state())
    # This must be after pl.create_petsc_system() call!
    mtx_i = eqs.eval_tangent_matrices(state(), pb.mtx_a)

    output('...done in', time.clock() - tt)

    output('assembling global system...')
    tt = time.clock()

    pl.apply_ebc_to_matrix(mtx_i, u_i.eq_map.eq_ebc)
    pl.assemble_rhs_to_petsc(prhs, rhs_i, pdofs, drange, is_overlap=True,
                             comm=comm, verbose=True)
    pl.assemble_mtx_to_petsc(pmtx, mtx_i, pdofs, drange, is_overlap=True,
                             comm=comm, verbose=True)

    output('...done in', time.clock() - tt)

    output('creating solver...')
    tt = time.clock()

    conf = Struct(method='cg', precond='gamg', sub_precond=None,
                  i_max=10000, eps_a=1e-50, eps_r=1e-5, eps_d=1e4, verbose=True)
    status = {}
    ls = PETScKrylovSolver(conf, comm=comm, mtx=pmtx, status=status)

    output('...done in', time.clock() - tt)

    output('solving...')
    tt = time.clock()

    psol = ls(prhs, psol, conf)

    psol_i = pl.create_local_petsc_vector(pdofs)
    gather, scatter = pl.create_gather_scatter(pdofs, psol_i, psol, comm=comm)

    scatter(psol_i, psol)

    sol0_i = state() - psol_i[...]
    psol_i[...] = sol0_i

    gather(psol, psol_i)

    output('...done in', time.clock() - tt)

    output('saving solution...')
    tt = time.clock()

    u_i.set_data(sol0_i)
    out = u_i.create_output()

    filename = os.path.join(options.output_dir, 'sol_%02d.h5' % comm.rank)
    pb.domain.mesh.write(filename, io='auto', out=out)

    gather_to_zero = pl.create_gather_to_zero(psol)

    psol_full = gather_to_zero(psol)

    if comm.rank == 0:
        sol = psol_full[...].copy()[id_map]

        u = FieldVariable('u', 'parameter', field,
                          primary_var_name='(set-to-None)')

        filename = os.path.join(options.output_dir, 'sol.h5')
        if (order == 1) or (options.linearization == 'strip'):
            out = u.create_output(sol)
            mesh.write(filename, io='auto', out=out)

        else:
            out = u.create_output(sol, linearization=Struct(kind='adaptive',
                                                            min_level=0,
                                                            max_level=order,
                                                            eps=1e-3))

            out['u'].mesh.write(filename, io='auto', out=out)

    output('...done in', time.clock() - tt)

    if options.show:
        plt.show()

Example 33

Project: RMG-Py Source File: fluxdiagram.py
def generateFluxDiagram(reactionModel, times, concentrations, reactionRates, outputDirectory, centralSpecies=None, speciesDirectory=None, settings=None):
    """
    For a given `reactionModel` and simulation results stored as arrays of
    `times`, species `concentrations`, and `reactionRates`, generate a series
    of flux diagrams as frames of an animation, then stitch them together into
    a movie. The individual frames and the final movie are saved on disk at
    `outputDirectory.`
    """
    global maximumNodeCount, maximumEdgeCount, timeStep, concentrationTolerance, speciesRateTolerance
    # Allow user defined settings for flux diagram generation if given
    if settings:
        maximumNodeCount = settings['maximumNodeCount']       
        maximumEdgeCount = settings['maximumEdgeCount']  
        timeStep = settings['timeStep']
        concentrationTolerance = settings['concentrationTolerance']   
        speciesRateTolerance = settings['speciesRateTolerance']
    
    # Get the species and reactions corresponding to the provided concentrations and reaction rates
    speciesList = reactionModel.core.species[:]
    numSpecies = len(speciesList)
    reactionList = reactionModel.core.reactions[:]
    numReactions = len(reactionList)
    
    #search index of central species:
    if centralSpecies is not None:
        for i, species in enumerate(speciesList):
            if species.label == centralSpecies:
                centralSpeciesIndex = i
                break 
    
    # Compute the rates between each pair of species (big matrix warning!)
    speciesRates = numpy.zeros((len(times),numSpecies,numSpecies), numpy.float64)
    for index, reaction in enumerate(reactionList):
        rate = reactionRates[:,index]
        if not reaction.pairs: reaction.generatePairs()
        for reactant, product in reaction.pairs:
            reactantIndex = speciesList.index(reactant)
            productIndex = speciesList.index(product)
            speciesRates[:,reactantIndex,productIndex] += rate
            speciesRates[:,productIndex,reactantIndex] -= rate
    
    # Determine the maximum concentration for each species and the maximum overall concentration
    maxConcentrations = numpy.max(numpy.abs(concentrations), axis=0)
    maxConcentration = numpy.max(maxConcentrations)
    
    # Determine the maximum rate for each species-species pair and the maximum overall species-species rate
    maxSpeciesRates = numpy.max(numpy.abs(speciesRates), axis=0)
    maxSpeciesRate = numpy.max(maxSpeciesRates)
    speciesIndex = maxSpeciesRates.reshape((numSpecies*numSpecies)).argsort()
    
    # Determine the nodes and edges to keep
    nodes = []; edges = []
    if centralSpecies is None:
        for i in range(numSpecies*numSpecies):
            productIndex, reactantIndex = divmod(speciesIndex[-i-1], numSpecies)
            if reactantIndex > productIndex:
                # Both reactant -> product and product -> reactant are in this list,
                # so only keep one of them
                continue
            if maxSpeciesRates[reactantIndex, productIndex] == 0:
                break
            if reactantIndex not in nodes and len(nodes) < maximumNodeCount: nodes.append(reactantIndex)
            if productIndex not in nodes and len(nodes) < maximumNodeCount: nodes.append(productIndex)
            if len(nodes) > maximumNodeCount: 
                break
            edges.append([reactantIndex, productIndex])
            if len(edges) >= maximumEdgeCount:
                break
    else:
        nodes.append(centralSpeciesIndex)
        for index, reaction in enumerate(reactionList):
            for reactant, product in reaction.pairs:
                reactantIndex = speciesList.index(reactant)
                productIndex = speciesList.index(product)
                if maxSpeciesRates[reactantIndex, productIndex] == 0:
                    break
                if len(nodes) > maximumNodeCount or len(edges) >= maximumEdgeCount: 
                    break
                if reactantIndex == centralSpeciesIndex: 
                    if productIndex not in nodes:
                        nodes.append(productIndex)
                        edges.append([reactantIndex, productIndex])
                if productIndex == centralSpeciesIndex: 
                    if reactantIndex not in nodes:
                        nodes.append(reactantIndex)
                        edges.append([reactantIndex, productIndex])
    # Create the master graph
    # First we're going to generate the coordinates for all of the nodes; for
    # this we use the thickest pen widths for all nodes and edges 
    graph = pydot.Dot('flux_diagram', graph_type='digraph', overlap="false")
    graph.set_rankdir('LR')
    graph.set_fontname('sans')
    graph.set_fontsize('10')
    
    # Add a node for each species
    for index in nodes:
        species = speciesList[index]
        node = pydot.Node(name=str(species))
        node.set_penwidth(maximumNodePenWidth)
        graph.add_node(node)
        # Try to use an image instead of the label
        speciesIndex = str(species) + '.png'
        imagePath = ''
        if not speciesDirectory or not os.path.exists(speciesDirectory): 
            continue
        for root, dirs, files in os.walk(speciesDirectory):
            for f in files:
                if f.endswith(speciesIndex):
                    imagePath = os.path.join(root, f)
                    break
        if os.path.exists(imagePath):
            node.set_image(imagePath)
            node.set_label(" ")
    # Add an edge for each species-species rate
    for reactantIndex, productIndex in edges:
        if reactantIndex in nodes and productIndex in nodes:
            reactant = speciesList[reactantIndex]
            product = speciesList[productIndex]
            edge = pydot.Edge(str(reactant), str(product))
            edge.set_penwidth(maximumEdgePenWidth)
            graph.add_edge(edge) 
    
    # Generate the coordinates for all of the nodes using the specified program
    graph = pydot.graph_from_dot_data(graph.create_dot(prog=program))[0]
    
    # Now iterate over the time points, setting the pen widths appropriately
    # This should preserve the coordinates of the nodes from frame to frame
    frameNumber = 1
    for t in range(len(times)):
        # Update the nodes
        slope = -maximumNodePenWidth / math.log10(concentrationTolerance)
        for index in nodes:
            species = speciesList[index]         
            if re.search(r'^[a-zA-Z0-9_]*$',str(species)) is not None:
                species_string = str(species)
            else:
                # species name contains special characters                
                species_string = '"{0}"'.format(str(species))
                
            node = graph.get_node(species_string)[0]
            concentration = concentrations[t,index] / maxConcentration
            if concentration < concentrationTolerance:
                penwidth = 0.0
            else:
                penwidth = round(slope * math.log10(concentration) + maximumNodePenWidth,3)
            node.set_penwidth(penwidth)
        # Update the edges
        slope = -maximumEdgePenWidth / math.log10(speciesRateTolerance)
        for index in range(len(edges)):
            reactantIndex, productIndex = edges[index]
            if reactantIndex in nodes and productIndex in nodes:
                reactant = speciesList[reactantIndex]
                product = speciesList[productIndex]
                
                if re.search(r'^[a-zA-Z0-9_]*$',str(reactant)) is not None:
                    reactant_string = str(reactant)
                else:
                    reactant_string = '"{0}"'.format(str(reactant))
                    
                if re.search(r'^[a-zA-Z0-9_]*$',str(product)) is not None:
                    product_string = str(product)
                else:
                    product_string = '"{0}"'.format(str(product))
                    
                edge = graph.get_edge(reactant_string, product_string)[0]
                # Determine direction of arrow based on sign of rate
                speciesRate = speciesRates[t,reactantIndex,productIndex] / maxSpeciesRate
                if speciesRate < 0:
                    edge.set_dir("back")
                    speciesRate = -speciesRate
                else:
                    edge.set_dir("forward")
                # Set the edge pen width
                if speciesRate < speciesRateTolerance:
                    penwidth = 0.0
                    edge.set_dir("none")
                else:
                    penwidth = round(slope * math.log10(speciesRate) + maximumEdgePenWidth,3)
                edge.set_penwidth(penwidth)
        # Save the graph at this time to a dot file and a PNG image
        if times[t] == 0:
            label = 't = 0 s'
        else:
            label = 't = 10^{0:.1f} s'.format(math.log10(times[t]))
        graph.set_label(label)
        if t == 0:
            repeat = framesPerSecond * initialPadding
        elif t == len(times) - 1:
            repeat = framesPerSecond * finalPadding
        else:
            repeat = 1
        for r in range(repeat):
            graph.write_dot(os.path.join(outputDirectory, 'flux_diagram_{0:04d}.dot'.format(frameNumber)))
            graph.write_png(os.path.join(outputDirectory, 'flux_diagram_{0:04d}.png'.format(frameNumber)))
            frameNumber += 1
    
    # Use ffmpeg to stitch the PNG images together into a movie
    import subprocess
    
    command = ['ffmpeg',
               '-framerate', '{0:d}'.format(framesPerSecond), # Duration of each image
               '-i', 'flux_diagram_%04d.png',                 # Input file format
               '-c:v', 'mpeg4',                               # Encoder
               '-r', '30',                                    # Video framerate
               '-pix_fmt', 'yuv420p',                         # Pixel format
               'flux_diagram.avi']                            # Output filename
    
    subprocess.check_call(command, cwd=outputDirectory)

Example 34

Project: sfepy Source File: its2D_interactive.py
def main():
    from sfepy import data_dir

    parser = ArgumentParser(description=__doc__,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument('--version', action='version', version='%(prog)s')
    parser.add_argument('--young', metavar='float', type=float,
                      action='store', dest='young',
                      default=2000.0, help=helps['young'])
    parser.add_argument('--poisson', metavar='float', type=float,
                      action='store', dest='poisson',
                      default=0.4, help=helps['poisson'])
    parser.add_argument('--load', metavar='float', type=float,
                      action='store', dest='load',
                      default=-1000.0, help=helps['load'])
    parser.add_argument('--order', metavar='int', type=int,
                      action='store', dest='order',
                      default=1, help=helps['order'])
    parser.add_argument('-r', '--refine', metavar='int', type=int,
                      action='store', dest='refine',
                      default=0, help=helps['refine'])
    parser.add_argument('-s', '--show',
                      action="store_true", dest='show',
                      default=False, help=helps['show'])
    parser.add_argument('-p', '--probe',
                      action="store_true", dest='probe',
                      default=False, help=helps['probe'])
    options = parser.parse_args()

    assert_((0.0 < options.poisson < 0.5),
            "Poisson's ratio must be in ]0, 0.5[!")
    assert_((0 < options.order),
            'displacement approximation order must be at least 1!')

    output('using values:')
    output("  Young's modulus:", options.young)
    output("  Poisson's ratio:", options.poisson)
    output('  vertical load:', options.load)
    output('uniform mesh refinement level:', options.refine)

    # Build the problem definition.
    mesh = Mesh.from_file(data_dir + '/meshes/2d/its2D.mesh')
    domain = FEDomain('domain', mesh)

    if options.refine > 0:
        for ii in range(options.refine):
            output('refine %d...' % ii)
            domain = domain.refine()
            output('... %d nodes %d elements'
                   % (domain.shape.n_nod, domain.shape.n_el))

    omega = domain.create_region('Omega', 'all')
    left = domain.create_region('Left',
                                'vertices in x < 0.001', 'facet')
    bottom = domain.create_region('Bottom',
                                  'vertices in y < 0.001', 'facet')
    top = domain.create_region('Top', 'vertex 2', 'vertex')

    field = Field.from_args('fu', nm.float64, 'vector', omega,
                            approx_order=options.order)

    u = FieldVariable('u', 'unknown', field)
    v = FieldVariable('v', 'test', field, primary_var_name='u')

    D = stiffness_from_youngpoisson(2, options.young, options.poisson)

    asphalt = Material('Asphalt', D=D)
    load = Material('Load', values={'.val' : [0.0, options.load]})

    integral = Integral('i', order=2*options.order)
    integral0 = Integral('i', order=0)

    t1 = Term.new('dw_lin_elastic(Asphalt.D, v, u)',
                  integral, omega, Asphalt=asphalt, v=v, u=u)
    t2 = Term.new('dw_point_load(Load.val, v)',
                  integral0, top, Load=load, v=v)
    eq = Equation('balance', t1 - t2)
    eqs = Equations([eq])

    xsym = EssentialBC('XSym', bottom, {'u.1' : 0.0})
    ysym = EssentialBC('YSym', left, {'u.0' : 0.0})

    ls = ScipyDirect({})

    nls_status = IndexedStruct()
    nls = Newton({}, lin_solver=ls, status=nls_status)

    pb = Problem('elasticity', equations=eqs, nls=nls, ls=ls)

    pb.time_update(ebcs=Conditions([xsym, ysym]))

    # Solve the problem.
    state = pb.solve()
    output(nls_status)

    # Postprocess the solution.
    out = state.create_output_dict()
    out = stress_strain(out, pb, state, extend=True)
    pb.save_state('its2D_interactive.vtk', out=out)

    gdata = geometry_data['2_3']
    nc = len(gdata.coors)

    integral_vn = Integral('ivn', coors=gdata.coors,
                          weights=[gdata.volume / nc] * nc)

    nodal_stress(out, pb, state, integrals=Integrals([integral_vn]))

    if options.probe:
        # Probe the solution.
        probes, labels = gen_lines(pb)

        sfield = Field.from_args('sym_tensor', nm.float64, 3, omega,
                                approx_order=options.order - 1)
        stress = FieldVariable('stress', 'parameter', sfield,
                               primary_var_name='(set-to-None)')
        strain = FieldVariable('strain', 'parameter', sfield,
                               primary_var_name='(set-to-None)')

        cfield = Field.from_args('component', nm.float64, 1, omega,
                                 approx_order=options.order - 1)
        component = FieldVariable('component', 'parameter', cfield,
                                  primary_var_name='(set-to-None)')

        ev = pb.evaluate
        order = 2 * (options.order - 1)
        strain_qp = ev('ev_cauchy_strain.%d.Omega(u)' % order, mode='qp')
        stress_qp = ev('ev_cauchy_stress.%d.Omega(Asphalt.D, u)' % order,
                       mode='qp', copy_materials=False)

        project_by_component(strain, strain_qp, component, order)
        project_by_component(stress, stress_qp, component, order)

        all_results = []
        for ii, probe in enumerate(probes):
            fig, results = probe_results(u, strain, stress, probe, labels[ii])

            fig.savefig('its2D_interactive_probe_%d.png' % ii)
            all_results.append(results)

        for ii, results in enumerate(all_results):
            output('probe %d:' % ii)
            output.level += 2
            for key, res in ordered_iteritems(results):
                output(key + ':')
                val = res[1]
                output('  min: %+.2e, mean: %+.2e, max: %+.2e'
                       % (val.min(), val.mean(), val.max()))
            output.level -= 2

    if options.show:
        # Show the solution. If the approximation order is greater than 1, the
        # extra DOFs are simply thrown away.
        from sfepy.postprocess.viewer import Viewer

        view = Viewer('its2D_interactive.vtk')
        view(vector_mode='warp_norm', rel_scaling=1,
             is_scalar_bar=True, is_wireframe=True)

Example 35

Project: auto-sklearn Source File: create_datasets.py
def create_predict_spearman_rank_with_cv(cv_metafeatures, cv_experiments,
                                         iterator):
    X = []
    Y = []
    Y_names = []
    # Calculate the pairwise ranks between datasets
    dataset_names = [name for name in cv_metafeatures]
    cross_product = []
    folds_product = []

    if iterator == "combination":
        for cross in itertools.combinations_with_replacement(dataset_names, r=2):
            cross_product.append(cross)
        for folds in itertools.combinations_with_replacement(range(10), r=2):
            folds_product.append(folds)
    elif iterator == "permutation":
        for cross in itertools.permutations(dataset_names, r=2):
            cross_product.append(cross)
        for folds in itertools.permutations(range(10), r=2):
            folds_product.append(folds)
    else:
        raise NotImplementedError()

    logging.info("Create spearman rank dataset with CV data %s", iterator)
    logging.info("Using %d datasets", len(dataset_names))
    logging.info("This will results in %d training points",
        (len(cross_product) * len(folds_product)))
    logging.info("Length of dataset crossproduct %s", len(cross_product))
    logging.info("Length of folds crossproduct %s", len(folds_product))

    # Create inputs and targets
    for i, cross in enumerate(cross_product):
        print("%d/%d: %s" % (i, len(cross_product), cross),)
        for folds in folds_product:
            name = "%s-%d_%s-%d" % (cross[0], folds[0], cross[1], folds[1])
            mf_1 = cv_metafeatures[cross[0]][folds[0]]
            mf_2 = cv_metafeatures[cross[1]][folds[1]]
            assert mf_1.dtype == np.float64
            assert mf_2.dtype == np.float64
            x = np.hstack((mf_1, mf_2))
            columns = cv_metafeatures[cross[0]][folds[0]].index.values
            index = np.hstack(("0_" + columns, "1_" + columns))
            x = pd.Series(data=x, name=name, index=index)
            X.append(x)

            experiments_1 = cv_experiments[cross[0]][folds[0]]
            experiments_2 = cv_experiments[cross[1]][folds[1]]
            assert len(experiments_1) == len(experiments_2)

            responses_1 = np.zeros((len(experiments_1)), dtype=np.float64)
            responses_2 = np.zeros((len(experiments_1)), dtype=np.float64)
            for idx, zipped in enumerate(zip(experiments_1, experiments_2)):
                # Test if the order of the params is the same
                exp_1, exp_2 = zipped
                assert exp_1.params == exp_2.params
                responses_1[idx] = exp_1.result
                responses_2[idx] = exp_2.result

            rho, p = scipy.stats.spearmanr(responses_1, responses_2)
            # A nan is produced if all values of one of the response lists
            # are equal. This results in a division by zero. Because there is
            #  no correlation if all values are the same, rho is replaced by
            # zero...
            # It would probably be better to assign random ranks for equal
            # values, but scipy doesn't support this...
            if not np.isfinite(rho):
                rho = 0
            Y.append(rho)
            Y_names.append(name)

    X = pd.DataFrame(X)
    Y = pd.Series(Y, index=Y_names)
    logging.info("CV_Metafeatures %s", cv_metafeatures.shape)
    logging.info("X.shape %s", X.shape)
    logging.info("Y.shape %s", Y.shape)
    # train sklearn regressor (tree) with 10fold CV
    indices = range(len(X))
    np_rs = np.random.RandomState(42)
    np_rs.shuffle(indices)
    X = X.iloc[indices]
    Y = Y.iloc[indices]
    return X, Y

Example 36

Project: sfepy Source File: biot_parallel_interactive.py
def solve_problem(mesh_filename, options, comm):
    order_u = options.order_u
    order_p = options.order_p

    rank, size = comm.Get_rank(), comm.Get_size()

    output('rank', rank, 'of', size)

    mesh = Mesh.from_file(mesh_filename)

    if rank == 0:
        cell_tasks = pl.partition_mesh(mesh, size, use_metis=options.metis,
                                       verbose=True)

    else:
        cell_tasks = None

    domain = FEDomain('domain', mesh)
    omega = domain.create_region('Omega', 'all')
    field1 = Field.from_args('fu', nm.float64, mesh.dim, omega,
                             approx_order=order_u)
    field2 = Field.from_args('fp', nm.float64, 1, omega,
                             approx_order=order_p)
    fields = [field1, field2]

    output('distributing fields...')
    tt = time.clock()

    distribute = pl.distribute_fields_dofs
    lfds, gfds = distribute(fields, cell_tasks,
                            is_overlap=True,
                            use_expand_dofs=True,
                            save_inter_regions=options.save_inter_regions,
                            output_dir=options.output_dir,
                            comm=comm, verbose=True)

    output('...done in', time.clock() - tt)

    output('creating local problem...')
    tt = time.clock()

    cells = lfds[0].cells

    omega_gi = Region.from_cells(cells, domain)
    omega_gi.finalize()
    omega_gi.update_shape()

    pb = create_local_problem(omega_gi, [order_u, order_p])

    variables = pb.get_variables()

    state = State(variables)
    state.fill(0.0)
    state.apply_ebc()

    output('...done in', time.clock() - tt)


    output('allocating global system...')
    tt = time.clock()

    sizes, drange, pdofs = pl.setup_composite_dofs(lfds, fields, variables,
                                                   verbose=True)
    pmtx, psol, prhs = pl.create_petsc_system(pb.mtx_a, sizes, pdofs, drange,
                                              is_overlap=True, comm=comm,
                                              verbose=True)

    output('...done in', time.clock() - tt)

    output('creating solver...')
    tt = time.clock()

    conf = Struct(method='bcgsl', precond='jacobi', sub_precond=None,
                  i_max=10000, eps_a=1e-50, eps_r=1e-6, eps_d=1e4,
                  verbose=True)
    status = {}
    ls = PETScKrylovSolver(conf, comm=comm, mtx=pmtx, status=status)

    field_ranges = {}
    for ii, variable in enumerate(variables.iter_state(ordered=True)):
        field_ranges[variable.name] = lfds[ii].petsc_dofs_range

    ls.set_field_split(field_ranges, comm=comm)

    ev = PETScParallelEvaluator(pb, pdofs, drange, True,
                                psol, comm, verbose=True)

    nls_status = {}
    conf = Struct(method='newtonls',
                  i_max=5, eps_a=0, eps_r=1e-5, eps_s=0.0,
                  verbose=True)
    nls = PETScNonlinearSolver(conf, pmtx=pmtx, prhs=prhs, comm=comm,
                               fun=ev.eval_residual,
                               fun_grad=ev.eval_tangent_matrix,
                               lin_solver=ls, status=nls_status)

    output('...done in', time.clock() - tt)

    output('solving...')
    tt = time.clock()

    state = pb.create_state()
    state.apply_ebc()

    ev.psol_i[...] = state()
    ev.gather(psol, ev.psol_i)

    psol = nls(psol)

    ev.scatter(ev.psol_i, psol)
    sol0_i = ev.psol_i[...]

    output('...done in', time.clock() - tt)

    output('saving solution...')
    tt = time.clock()

    state.set_full(sol0_i)
    out = state.create_output_dict()

    filename = os.path.join(options.output_dir, 'sol_%02d.h5' % comm.rank)
    pb.domain.mesh.write(filename, io='auto', out=out)

    gather_to_zero = pl.create_gather_to_zero(psol)

    psol_full = gather_to_zero(psol)

    if comm.rank == 0:
        sol = psol_full[...].copy()

        u = FieldVariable('u', 'parameter', field1,
                          primary_var_name='(set-to-None)')
        remap = gfds[0].id_map
        ug = sol[remap]

        p = FieldVariable('p', 'parameter', field2,
                          primary_var_name='(set-to-None)')
        remap = gfds[1].id_map
        pg = sol[remap]

        if (((order_u == 1) and (order_p == 1))
            or (options.linearization == 'strip')):
            out = u.create_output(ug)
            out.update(p.create_output(pg))
            filename = os.path.join(options.output_dir, 'sol.h5')
            mesh.write(filename, io='auto', out=out)

        else:
            out = u.create_output(ug, linearization=Struct(kind='adaptive',
                                                           min_level=0,
                                                           max_level=order_u,
                                                           eps=1e-3))

            filename = os.path.join(options.output_dir, 'sol_u.h5')
            out['u'].mesh.write(filename, io='auto', out=out)

            out = p.create_output(pg, linearization=Struct(kind='adaptive',
                                                           min_level=0,
                                                           max_level=order_p,
                                                           eps=1e-3))

            filename = os.path.join(options.output_dir, 'sol_p.h5')
            out['p'].mesh.write(filename, io='auto', out=out)

    output('...done in', time.clock() - tt)

Example 37

Project: tpot Source File: driver.py
Function: main
def main():
    """Main function that is called when TPOT is run on the command line"""
    parser = argparse.ArgumentParser(description='A Python tool that '
        'automatically creates and optimizes machine learning pipelines using '
        'genetic programming.', add_help=False)

    parser.add_argument('INPUT_FILE', type=str, help='Data file to optimize the '
        'pipeline on; ensure that the class label column is labeled as "class".')

    parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.')

    parser.add_argument('-is', action='store', dest='INPUT_SEPARATOR', default='\t',
        type=str, help='Character used to separate columns in the input file.')

    parser.add_argument('-target', action='store', dest='TARGET_NAME', default='class',
        type=str, help='Name of the target column in the input file.')

    parser.add_argument('-mode', action='store', dest='TPOT_MODE',
        choices=['classification', 'regression'], default='classification', type=str,
        help='Whether TPOT is being used for a classification or regression problem.')

    parser.add_argument('-o', action='store', dest='OUTPUT_FILE', default='',
        type=str, help='File to export the final optimized pipeline.')

    parser.add_argument('-g', action='store', dest='GENERATIONS', default=100,
        type=positive_integer, help='Number of generations to run pipeline '
        'optimization over.\nGenerally, TPOT will work better when '
        'you give it more generations (and therefore time) to optimize over. '
        'TPOT will evaluate GENERATIONS x POPULATION_SIZE number of pipelines in total.')

    parser.add_argument('-p', action='store', dest='POPULATION_SIZE', default=100,
        type=positive_integer, help='Number of individuals in the GP population.\n'
        'Generally, TPOT will work better when you give it more individuals '
        '(and therefore time) to optimize over. TPOT will evaluate '
        'GENERATIONS x POPULATION_SIZE number of pipelines in total.')

    parser.add_argument('-mr', action='store', dest='MUTATION_RATE', default=0.9,
        type=float_range, help='GP mutation rate in the range [0.0, 1.0]. We '
        'recommend using the default parameter unless you '
        'understand how the mutation rate affects GP algorithms.')

    parser.add_argument('-xr', action='store', dest='CROSSOVER_RATE', default=0.05,
        type=float_range, help='GP crossover rate in the range [0.0, 1.0]. We '
        'recommend using the default parameter unless you '
        'understand how the crossover rate affects GP algorithms.')

    parser.add_argument('-cv', action='store', dest='NUM_CV_FOLDS', default=3,
        type=int, help='The number of folds to evaluate each pipeline over in '
        'k-fold cross-validation during the TPOT pipeline optimization process.')

    parser.add_argument('-scoring', action='store', dest='SCORING_FN', default=None,
        type=str, help='Function used to evaluate the quality of a given pipeline for '
        'the problem. By default, balanced accuracy is used for classification and mean '
        'squared error is used for regression. '
        'TPOT assumes that any function with "error" or "loss" in the name is meant to '
        'be minimized, whereas any other functions will be maximized. '
        'Offers the same options as cross_val_score: '
        '"accuracy", "adjusted_rand_score", "average_precision", "f1", "f1_macro", '
        '"f1_micro", "f1_samples", "f1_weighted", "log_loss", "mean_absolute_error", '
        '"mean_squared_error", "median_absolute_error", "precision", "precision_macro", '
        '"precision_micro", "precision_samples", "precision_weighted", "r2", "recall", '
        '"recall_macro", "recall_micro", "recall_samples", "recall_weighted", "roc_auc"')

    parser.add_argument('-maxtime', action='store', dest='MAX_TIME_MINS', default=None,
        type=int, help='How many minutes TPOT has to optimize the pipeline. This '
        'setting will override the GENERATIONS parameter '
        'and allow TPOT to run until it runs out of time.')

    parser.add_argument('-maxeval', action='store', dest='MAX_EVAL_MINS', default=5,
        type=float, help='How many minutes TPOT has to evaluate a single pipeline. '
        'Setting this parameter to higher values will allow TPOT to explore more complex '
        'pipelines but will also allow TPOT to run longer.')

    parser.add_argument('-s', action='store', dest='RANDOM_STATE', default=None,
        type=int, help='Random number generator seed for reproducibility. Set '
        'this seed if you want your TPOT run to be reproducible with the same '
        'seed and data set in the future.')

    parser.add_argument('-v', action='store', dest='VERBOSITY', default=1,
        choices=[0, 1, 2, 3], type=int, help='How much information TPOT '
        'communicates while it is running: 0 = none, 1 = minimal, 2 = high, 3 = all.')

    parser.add_argument('--no-update-check', action='store_true',
        dest='DISABLE_UPDATE_CHECK', default=False,
        help='Flag indicating whether the TPOT version checker should be disabled.')

    parser.add_argument('--version', action='version',
        version='TPOT {version}'.format(version=__version__),
        help='Show TPOT\'s version number and exit.')

    args = parser.parse_args()

    if args.VERBOSITY >= 2:
        print('\nTPOT settings:')
        for arg in sorted(args.__dict__):
            arg_val = args.__dict__[arg]
            if arg == 'DISABLE_UPDATE_CHECK':
                continue
            elif arg == 'SCORING_FN' and args.__dict__[arg] is None:
                if args.TPOT_MODE == 'classification':
                    arg_val = 'balanced_accuracy'
                else:
                    arg_val = 'mean_squared_error'
            print('{}\t=\t{}'.format(arg, arg_val))
        print('')

    input_data = np.recfromcsv(args.INPUT_FILE, delimiter=args.INPUT_SEPARATOR, dtype=np.float64, case_sensitive=True)
    if args.TARGET_NAME not in input_data.dtype.names:
        raise ValueError('The provided data file does not seem to have a target column. '
                         'Please make sure to specify the target column using the -target parameter.')

    features = np.delete(input_data.view(np.float64).reshape(input_data.size, -1),
                         input_data.dtype.names.index(args.TARGET_NAME), axis=1)

    training_features, testing_features, training_classes, testing_classes = \
        train_test_split(features, input_data[args.TARGET_NAME], random_state=args.RANDOM_STATE)

    if args.TPOT_MODE == 'classification':
        tpot_type = TPOTClassifier
    else:
        tpot_type = TPOTRegressor

    tpot = tpot_type(generations=args.GENERATIONS, population_size=args.POPULATION_SIZE,
                mutation_rate=args.MUTATION_RATE, crossover_rate=args.CROSSOVER_RATE,
                num_cv_folds=args.NUM_CV_FOLDS, scoring=args.SCORING_FN,
                max_time_mins=args.MAX_TIME_MINS, max_eval_time_mins=args.MAX_EVAL_MINS,
                random_state=args.RANDOM_STATE, verbosity=args.VERBOSITY,
                disable_update_check=args.DISABLE_UPDATE_CHECK)

    tpot.fit(training_features, training_classes)

    if args.VERBOSITY in [1, 2] and tpot._optimized_pipeline:
        training_score = max([tpot._hof.keys[x].wvalues[1] for x in range(len(tpot._hof.keys))])
        print('\nTraining score: {}'.format(abs(training_score)))
        print('Holdout score: {}'.format(tpot.score(testing_features, testing_classes)))

    elif args.VERBOSITY >= 3 and tpot._hof:
        print('Final Pareto front testing scores:')

        for pipeline, pipeline_scores in zip(tpot._hof.items, reversed(tpot._hof.keys)):
            tpot._fitted_pipeline = tpot._hof_fitted_pipelines[str(pipeline)]
            print('{}\t{}\t{}'.format(int(abs(pipeline_scores.wvalues[0])),
                                      tpot.score(testing_features, testing_classes),
                                      pipeline))

    if args.OUTPUT_FILE != '':
        tpot.export(args.OUTPUT_FILE)

Example 38

Project: scikit-learn Source File: pls_.py
Function: fit
    def fit(self, X, Y):
        """Fit model to data.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples in the number of samples and
            n_features is the number of predictors.

        Y : array-like of response, shape = [n_samples, n_targets]
            Target vectors, where n_samples in the number of samples and
            n_targets is the number of response variables.
        """

        # copy since this will contains the residuals (deflated) matrices
        check_consistent_length(X, Y)
        X = check_array(X, dtype=np.float64, copy=self.copy)
        Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)
        if Y.ndim == 1:
            Y = Y.reshape(-1, 1)

        n = X.shape[0]
        p = X.shape[1]
        q = Y.shape[1]

        if self.n_components < 1 or self.n_components > p:
            raise ValueError('Invalid number of components: %d' %
                             self.n_components)
        if self.algorithm not in ("svd", "nipals"):
            raise ValueError("Got algorithm %s when only 'svd' "
                             "and 'nipals' are known" % self.algorithm)
        if self.algorithm == "svd" and self.mode == "B":
            raise ValueError('Incompatible configuration: mode B is not '
                             'implemented with svd algorithm')
        if self.deflation_mode not in ["canonical", "regression"]:
            raise ValueError('The deflation mode is unknown')
        # Scale (in place)
        X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
            _center_scale_xy(X, Y, self.scale))
        # Residuals (deflated) matrices
        Xk = X
        Yk = Y
        # Results matrices
        self.x_scores_ = np.zeros((n, self.n_components))
        self.y_scores_ = np.zeros((n, self.n_components))
        self.x_weights_ = np.zeros((p, self.n_components))
        self.y_weights_ = np.zeros((q, self.n_components))
        self.x_loadings_ = np.zeros((p, self.n_components))
        self.y_loadings_ = np.zeros((q, self.n_components))
        self.n_iter_ = []

        # NIPALS algo: outer loop, over components
        for k in range(self.n_components):
            if np.all(np.dot(Yk.T, Yk) < np.finfo(np.double).eps):
                # Yk constant
                warnings.warn('Y residual constant at iteration %s' % k)
                break
            # 1) weights estimation (inner loop)
            # -----------------------------------
            if self.algorithm == "nipals":
                x_weights, y_weights, n_iter_ = \
                    _nipals_twoblocks_inner_loop(
                        X=Xk, Y=Yk, mode=self.mode, max_iter=self.max_iter,
                        tol=self.tol, norm_y_weights=self.norm_y_weights)
                self.n_iter_.append(n_iter_)
            elif self.algorithm == "svd":
                x_weights, y_weights = _svd_cross_product(X=Xk, Y=Yk)
            # Forces sign stability of x_weights and y_weights
            # Sign undeterminacy issue from svd if algorithm == "svd"
            # and from platform dependent computation if algorithm == 'nipals'
            x_weights, y_weights = svd_flip(x_weights, y_weights.T)
            y_weights = y_weights.T
            # compute scores
            x_scores = np.dot(Xk, x_weights)
            if self.norm_y_weights:
                y_ss = 1
            else:
                y_ss = np.dot(y_weights.T, y_weights)
            y_scores = np.dot(Yk, y_weights) / y_ss
            # test for null variance
            if np.dot(x_scores.T, x_scores) < np.finfo(np.double).eps:
                warnings.warn('X scores are null at iteration %s' % k)
                break
            # 2) Deflation (in place)
            # ----------------------
            # Possible memory footprint reduction may done here: in order to
            # avoid the allocation of a data chunk for the rank-one
            # approximations matrix which is then subtracted to Xk, we suggest
            # to perform a column-wise deflation.
            #
            # - regress Xk's on x_score
            x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
            # - subtract rank-one approximations to obtain remainder matrix
            Xk -= np.dot(x_scores, x_loadings.T)
            if self.deflation_mode == "canonical":
                # - regress Yk's on y_score, then subtract rank-one approx.
                y_loadings = (np.dot(Yk.T, y_scores)
                              / np.dot(y_scores.T, y_scores))
                Yk -= np.dot(y_scores, y_loadings.T)
            if self.deflation_mode == "regression":
                # - regress Yk's on x_score, then subtract rank-one approx.
                y_loadings = (np.dot(Yk.T, x_scores)
                              / np.dot(x_scores.T, x_scores))
                Yk -= np.dot(x_scores, y_loadings.T)
            # 3) Store weights, scores and loadings # Notation:
            self.x_scores_[:, k] = x_scores.ravel()  # T
            self.y_scores_[:, k] = y_scores.ravel()  # U
            self.x_weights_[:, k] = x_weights.ravel()  # W
            self.y_weights_[:, k] = y_weights.ravel()  # C
            self.x_loadings_[:, k] = x_loadings.ravel()  # P
            self.y_loadings_[:, k] = y_loadings.ravel()  # Q
        # Such that: X = TP' + Err and Y = UQ' + Err

        # 4) rotations from input space to transformed space (scores)
        # T = X W(P'W)^-1 = XW* (W* : p x k matrix)
        # U = Y C(Q'C)^-1 = YC* (W* : q x k matrix)
        self.x_rotations_ = np.dot(
            self.x_weights_,
            linalg.pinv2(np.dot(self.x_loadings_.T, self.x_weights_),
                         **pinv2_args))
        if Y.shape[1] > 1:
            self.y_rotations_ = np.dot(
                self.y_weights_,
                linalg.pinv2(np.dot(self.y_loadings_.T, self.y_weights_),
                             **pinv2_args))
        else:
            self.y_rotations_ = np.ones(1)

        if True or self.deflation_mode == "regression":
            # FIXME what's with the if?
            # Estimate regression coefficient
            # Regress Y on T
            # Y = TQ' + Err,
            # Then express in function of X
            # Y = X W(P'W)^-1Q' + Err = XB + Err
            # => B = W*Q' (p x q)
            self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
            self.coef_ = (1. / self.x_std_.reshape((p, 1)) * self.coef_ *
                          self.y_std_)
        return self

Example 39

Project: pyspace Source File: feature_vector.py
    def get_data(self, run_nr, split_nr, train_test): # feature_file, storage_format):
        """ Loads the data from the feature file of the current input collection 
        depending on the storage_format.
        Separates the actual vectors from the names and returns both as lists.
        
        The method expects the following
        
        **Parameters**
        
            :feature_file:      the file of feature vectors to be loaded
            :storage_format:    One of the first components in
                                ['arff', 'real'], ['csv', 'real'], 
                                ['csvUnnamed', 'real'] or .
                                Format in which the feature_file was saved.
                                Information need to be present in meta data.
        
        For arff and pickle files docuementation see to the class description
        (docstring). Pickle format files do not need any special 
        loading because they
        already have the perfect format.
        
        **CSV**
        
        If no collection meta data is available for the input data, 
        the 'metadata.yaml' file can be generated with
        :mod:`pySPACE.run.node_chain_scripts.md_creator`.
        
        If you created the csv file with pySPACE, you automatically have the
        standard *csv* format with the feature names in the first row
        and the labels in the last column.
        
        If you have a csv tabular without headings,
        you have the *csvUnnamed* format,
        and in your 'label_column' column, specified in your spec file,
        the labels can be found.
        
        .. note:: main loading concept copied from time series collection
          check needed if code can be sent to upper class
        """
        ## init ##
        classes_names = self.meta_data["classes_names"]
        s_format = self.meta_data["storage_format"]
        if type(s_format) == list:
            s_format = s_format[0]

        # todo: automatical loading of csv?
        delimiter = self.meta_data.get("delimiter", ",")
        if not len(delimiter) == 1:
            self._log("Wrong delimiter ('%s') given. Using default ','." %
                      delimiter, level=logging.CRITICAL)
            delimiter = ","

        # Do lazy loading of the fv objects.
        if isinstance(self.data[(run_nr, split_nr, train_test)], basestring):
            self._log("Lazy loading of %s feature vectors from input "
                      "collection for run %s, split %s." % (train_test, run_nr, 
                                                            split_nr))
            if s_format == "pickle":
                # Load the data from a pickled file
                file = open(self.data[(run_nr, split_nr, train_test)], "r")
                self.data[(run_nr, split_nr, train_test)] = cPickle.load(file)
                file.close()
                sample = self.data[(run_nr, split_nr, train_test)][0][0]
                self.update_meta_data({"feature_names":sample.feature_names,
                                       "len_line":len(sample.feature_names)})
            elif s_format == "arff":
                names = []
                data = []
                # load file
                f = open(self.data[(run_nr, split_nr, train_test)])
                data_set = f.readlines()
                f.close()
                # Read the arff file completely ##
                for line in data_set:
                    if "@attribute class" in line \
                            or "@relation" in line \
                            or "@data" in line:
                        pass
                    elif "@attribute" in line:
                        name_line = line.split()
                        names.append(name_line[1])
                    else: 
                        data.append(line.split(delimiter))
                # the label is expected to be at the end 
                # of each line in the data.
                for line in data:
                    vector = line[0:-1]
                    label = line[-1].rstrip("\n\r ")  # --> label is string
                    if not label in classes_names:
                        classes_names.append(label)
                    sample = FeatureVector(numpy.atleast_2d([vector]).astype(
                        numpy.float64), feature_names=names)
                    self.add_sample(sample=sample, label=label, 
                                    train=train_test, split=split_nr, 
                                    run=run_nr)
                self.update_meta_data({"feature_names": sample.feature_names,
                       "len_line": len(sample.feature_names),
                       "classes_names": classes_names})
            elif "csv" in s_format: # csv or csv unnamed
                # load file
                f = open(self.data[(run_nr, split_nr, train_test)])
                data_set = f.readlines()
                f.close()
                # getting rid of all unwanted rows
                if "ignored_rows" in self.meta_data:
                    ignored_rows = self.meta_data["ignored_rows"]
                    if not type(ignored_rows) == list:
                        warnings.warn("Wrong format: Ignored rows included!")
                        ignored_rows = []
                    ignored_rows.sort()
                    remove_list = []
                    for i in ignored_rows:
                        remove_list.append(data_set[int(i)-1])
                    for j in remove_list:
                        data_set.remove(j)
                # get len_line and delete heading
                feature_names = self.meta_data["feature_names"]
                if s_format == "csv":
                    names = data_set[0].rstrip(",\n").split(delimiter)
                    data_set.pop(0)
                line = data_set[0].split(delimiter)
                line[-1] = line[-1].rstrip("\n\r")
                if line[-1] == '':
                        line.pop(-1)
                len_line = len(line)

                # get and prepare label column numbers (len_line needed)
                try:
                    label_column = self.meta_data["label_column"]
                except KeyError:
                    label_column = -1
                # map column numbers to indices by subtracting -1
                if type(label_column) == int:
                    label_columns = [label_column - 1]
                elif type(label_column) == list:
                    label_columns = [int(l)-1 for l in label_column]
                for i in range(len(label_columns)):
                    # map to positive value and undo previous offset
                    if label_columns[i] < 0:
                        label_columns[i] = label_columns[i] + len_line+1
                    if label_columns[i] < 0:
                        label_columns[i] = -1 + len_line
                # very important sorting for index shifts
                label_columns.sort()
                # calculate unwanted columns
                # note: These indices begin with 1 .
                # They are internally shifted when used.
                if self.meta_data.has_key("ignored_columns"):
                    ignored_columns = self.meta_data["ignored_columns"]
                    if not type(ignored_columns) == list:
                        warnings.warn("Wrong format: Ignored columns included!")
                        ignored_columns = []
                    new_ignored_columns = []
                    for i in ignored_columns:
                        i =  int(i)
                        if i < 0:
                            i += len_line
                        for label_column in label_columns:
                            if i > label_column:
                                i -= 1
                        new_ignored_columns.append(i)
                else:
                    new_ignored_columns = []
                new_ignored_columns.sort()
                # get all relevant feature_names
                if feature_names is None:
                    if s_format == "csv":
                        # delete blanks and inverted commas 
                        for i in range(len(names)):
                            names[i] = names[i].strip(' "')
                            names[i] = names[i].strip(" '")
                        feature_names = names
                    else: #s_format=="csv_unnamed"
                        feature_names = ["feature_%s" % i for i in
                                         range(len_line)]
                    # switch label names to the end
                    i = 0 # reduce index, after previous labels were deleted
                    for label_column in label_columns:
                        try:
                            feature_names.append(feature_names[label_column-i])
                            del feature_names[label_column-i]
                            i += 1
                        except IndexError:
                            feature_names.append("")
                    # create new feature names
                    feature_names = [item for index, item in enumerate(
                        feature_names) if not index+1 in new_ignored_columns]
                    for _ in label_columns:
                        feature_names.pop(-1)

                # read the data line by line
                for line in data_set:
                    if not delimiter in line:
                        warnings.warn("Line without delimiter:\n%s" % str(line))
                        continue
                    line = line.split(delimiter)
                    line[-1] = line[-1].rstrip("\n\r")
                    if line[-1] == '':
                        line.pop(-1)
                    label = []
                    i = 0
                    for label_column in label_columns:
                        label.append(line.pop(label_column-i))
                        i += 1
                    if type(label) == str:
                        label = label.strip(' "')
                        label = label.strip(" '")
                    if label not in classes_names:
                        classes_names.append(label)
                    # create new line without the ignored columns
                    vector = [item for index,item in enumerate(line) if not
                              index+1 in new_ignored_columns]
                    sample = FeatureVector(numpy.atleast_2d([vector]).astype(
                        numpy.float64), feature_names=feature_names)
                    if len(label) == 1:
                        label = label[-1]
                    self.add_sample(sample=sample, label=label, 
                                    train=train_test, split=split_nr, 
                                    run=run_nr)
                self.update_meta_data({"feature_names": sample.feature_names,
                       "num_features": len(sample.feature_names),
                       "classes_names": classes_names})

        return self.data[(run_nr, split_nr, train_test)]

Example 40

Project: msCentipede Source File: mscentipedepy.py
Function: optimizer
def optimizer(xo, function, gradient, hessian, kwargs):
    """Calls the appropriate nonlinear convex optimization solver 
    in the package `cvxopt` to find optimal values for the relevant
    parameters, given subroutines that evaluate a function, 
    its gradient, and hessian, this subroutine 

    Arguments
        function : function object
        evaluates the function at the specified parameter values

        gradient : function object
        evaluates the gradient of the function

        hessian : function object
        evaluates the hessian of the function

    """

    def F(x=None, z=None):
        """A subroutine that the cvxopt package can call to get 
        values of the function, gradient and hessian during
        optimization.
        """

        if x is None:
            return 0, cvx.matrix(x_init)

        xx = np.array(x).ravel().astype(np.float64)

        # compute likelihood function
        f = function(xx, kwargs)
        if np.isnan(f) or np.isinf(f):
            f = np.array([np.finfo('float32').max]).astype('float')
        else:
            f = np.array([f]).astype('float')
        
        # compute gradient
        Df = gradient(xx, kwargs)
        if np.isnan(Df).any() or np.isinf(Df).any():
            Df = -1 * np.finfo('float32').max * np.ones((1,xx.size), dtype=float)
        else:
            Df = Df.reshape(1,xx.size)
        if z is None:
            return cvx.matrix(f), cvx.matrix(Df)

        # compute hessian
        hess = hessian(xx, kwargs)
        Hf = z[0] * hess
        return cvx.matrix(f), cvx.matrix(Df), cvx.matrix(Hf)

    # warm start for the optimization
    V = xo.size
    x_init = xo.reshape(V,1)

    # call the optimization subroutine in cvxopt
    if kwargs.has_key('G'):
        # call a constrained nonlinear solver
        solution = solvers.cp(F, G=cvx.matrix(kwargs['G']), h=cvx.matrix(kwargs['h']))
    else:
        # call an unconstrained nonlinear solver
        solution = solvers.cp(F)

    x_final = np.array(solution['x']).ravel()

    return x_final

Example 41

Project: scikit-learn Source File: sag.py
Function: sag_solver
def sag_solver(X, y, sample_weight=None, loss='log', alpha=1.,
               max_iter=1000, tol=0.001, verbose=0, random_state=None,
               check_input=True, max_squared_sum=None,
               warm_start_mem=None):
    """SAG solver for Ridge and LogisticRegression

    SAG stands for Stochastic Average Gradient: the gradient of the loss is
    estimated each sample at a time and the model is updated along the way with
    a constant learning rate.

    IMPORTANT NOTE: 'sag' solver converges faster on columns that are on the
    same scale. You can normalize the data by using
    sklearn.preprocessing.StandardScaler on your data before passing it to the
    fit method.

    This implementation works with data represented as dense numpy arrays or
    sparse scipy arrays of floating point values for the features. It will
    fit the data according to squared loss or log loss.

    The regularizer is a penalty added to the loss function that shrinks model
    parameters towards the zero vector using the squared euclidean norm L2.

    .. versionadded:: 0.17

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data

    y : numpy array, shape (n_samples,)
        Target values. With loss='multinomial', y must be label encoded
        (see preprocessing.LabelEncoder).

    sample_weight : array-like, shape (n_samples,), optional
        Weights applied to individual samples (1. for unweighted).

    loss : 'log' | 'squared' | 'multinomial'
        Loss function that will be optimized:
        -'log' is the binary logistic loss, as used in LogisticRegression.
        -'squared' is the squared loss, as used in Ridge.
        -'multinomial' is the multinomial logistic loss, as used in
         LogisticRegression.

        .. versionadded:: 0.18
           *loss='multinomial'*

    alpha : float, optional
        Constant that multiplies the regularization term. Defaults to 1.

    max_iter: int, optional
        The max number of passes over the training data if the stopping
        criteria is not reached. Defaults to 1000.

    tol: double, optional
        The stopping criteria for the weights. The iterations will stop when
        max(change in weights) / max(weights) < tol. Defaults to .001

    verbose: integer, optional
        The verbosity level.

    random_state : int seed, RandomState instance, or None (default)
        The seed of the pseudo random number generator to use when
        shuffling the data.

    check_input : bool, default True
        If False, the input arrays X and y will not be checked.

    max_squared_sum : float, default None
        Maximum squared sum of X over samples. If None, it will be computed,
        going through all the samples. The value should be precomputed
        to speed up cross validation.

    warm_start_mem: dict, optional
        The initialization parameters used for warm starting. Warm starting is
        currently used in LogisticRegression but not in Ridge.
        It contains:
            - 'coef': the weight vector, with the intercept in last line
                if the intercept is fitted.
            - 'gradient_memory': the scalar gradient for all seen samples.
            - 'sum_gradient': the sum of gradient over all seen samples,
                for each feature.
            - 'intercept_sum_gradient': the sum of gradient over all seen
                samples, for the intercept.
            - 'seen': array of boolean describing the seen samples.
            - 'num_seen': the number of seen samples.

    Returns
    -------
    coef_ : array, shape (n_features)
        Weight vector.

    n_iter_ : int
        The number of full pass on all samples.

    warm_start_mem : dict
        Contains a 'coef' key with the fitted result, and possibly the
        fitted intercept at the end of the array. Contains also other keys
        used for warm starting.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn import linear_model
    >>> n_samples, n_features = 10, 5
    >>> np.random.seed(0)
    >>> X = np.random.randn(n_samples, n_features)
    >>> y = np.random.randn(n_samples)
    >>> clf = linear_model.Ridge(solver='sag')
    >>> clf.fit(X, y)
    ... #doctest: +NORMALIZE_WHITESPACE
    Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
          normalize=False, random_state=None, solver='sag', tol=0.001)

    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
    >>> y = np.array([1, 1, 2, 2])
    >>> clf = linear_model.LogisticRegression(solver='sag')
    >>> clf.fit(X, y)
    ... #doctest: +NORMALIZE_WHITESPACE
    LogisticRegression(C=1.0, class_weight=None, dual=False,
        fit_intercept=True, intercept_scaling=1, max_iter=100,
        multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
        solver='sag', tol=0.0001, verbose=0, warm_start=False)

    References
    ----------
    Schmidt, M., Roux, N. L., & Bach, F. (2013).
    Minimizing finite sums with the stochastic average gradient
    https://hal.inria.fr/hal-00860051/docuement

    See also
    --------
    Ridge, SGDRegressor, ElasticNet, Lasso, SVR, and
    LogisticRegression, SGDClassifier, LinearSVC, Perceptron
    """
    if warm_start_mem is None:
        warm_start_mem = {}
    # Ridge default max_iter is None
    if max_iter is None:
        max_iter = 1000

    if check_input:
        X = check_array(X, dtype=np.float64, accept_sparse='csr', order='C')
        y = check_array(y, dtype=np.float64, ensure_2d=False, order='C')

    n_samples, n_features = X.shape[0], X.shape[1]
    # As in SGD, the alpha is scaled by n_samples.
    alpha_scaled = float(alpha) / n_samples

    # if loss == 'multinomial', y should be label encoded.
    n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1

    # initialization
    if sample_weight is None:
        sample_weight = np.ones(n_samples, dtype=np.float64, order='C')

    if 'coef' in warm_start_mem.keys():
        coef_init = warm_start_mem['coef']
    else:
        # assume fit_intercept is False
        coef_init = np.zeros((n_features, n_classes), dtype=np.float64,
                             order='C')

    # coef_init contains possibly the intercept_init at the end.
    # Note that Ridge centers the data before fitting, so fit_intercept=False.
    fit_intercept = coef_init.shape[0] == (n_features + 1)
    if fit_intercept:
        intercept_init = coef_init[-1, :]
        coef_init = coef_init[:-1, :]
    else:
        intercept_init = np.zeros(n_classes, dtype=np.float64)

    if 'intercept_sum_gradient' in warm_start_mem.keys():
        intercept_sum_gradient = warm_start_mem['intercept_sum_gradient']
    else:
        intercept_sum_gradient = np.zeros(n_classes, dtype=np.float64)

    if 'gradient_memory' in warm_start_mem.keys():
        gradient_memory_init = warm_start_mem['gradient_memory']
    else:
        gradient_memory_init = np.zeros((n_samples, n_classes),
                                        dtype=np.float64, order='C')
    if 'sum_gradient' in warm_start_mem.keys():
        sum_gradient_init = warm_start_mem['sum_gradient']
    else:
        sum_gradient_init = np.zeros((n_features, n_classes),
                                     dtype=np.float64, order='C')

    if 'seen' in warm_start_mem.keys():
        seen_init = warm_start_mem['seen']
    else:
        seen_init = np.zeros(n_samples, dtype=np.int32, order='C')

    if 'num_seen' in warm_start_mem.keys():
        num_seen_init = warm_start_mem['num_seen']
    else:
        num_seen_init = 0

    dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)

    if max_squared_sum is None:
        max_squared_sum = row_norms(X, squared=True).max()
    step_size = get_auto_step_size(max_squared_sum, alpha_scaled, loss,
                                   fit_intercept)

    if step_size * alpha_scaled == 1:
        raise ZeroDivisionError("Current sag implementation does not handle "
                                "the case step_size * alpha_scaled == 1")

    num_seen, n_iter_ = sag(dataset, coef_init,
                            intercept_init, n_samples,
                            n_features, n_classes, tol,
                            max_iter,
                            loss,
                            step_size, alpha_scaled,
                            sum_gradient_init,
                            gradient_memory_init,
                            seen_init,
                            num_seen_init,
                            fit_intercept,
                            intercept_sum_gradient,
                            intercept_decay,
                            verbose)
    if n_iter_ == max_iter:
        warnings.warn("The max_iter was reached which means "
                      "the coef_ did not converge", ConvergenceWarning)

    if fit_intercept:
        coef_init = np.vstack((coef_init, intercept_init))

    warm_start_mem = {'coef': coef_init, 'sum_gradient': sum_gradient_init,
                      'intercept_sum_gradient': intercept_sum_gradient,
                      'gradient_memory': gradient_memory_init,
                      'seen': seen_init, 'num_seen': num_seen}

    if loss == 'multinomial':
        coef_ = coef_init.T
    else:
        coef_ = coef_init[:, 0]

    return coef_, n_iter_, warm_start_mem

Example 42

Project: scikit-learn Source File: locally_linear.py
def locally_linear_embedding(
        X, n_neighbors, n_components, reg=1e-3, eigen_solver='auto', tol=1e-6,
        max_iter=100, method='standard', hessian_tol=1E-4, modified_tol=1E-12,
        random_state=None, n_jobs=1):
    """Perform a Locally Linear Embedding analysis on the data.

    Read more in the :ref:`User Guide <locally_linear_embedding>`.

    Parameters
    ----------
    X : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}
        Sample data, shape = (n_samples, n_features), in the form of a
        numpy array, sparse array, precomputed tree, or NearestNeighbors
        object.

    n_neighbors : integer
        number of neighbors to consider for each point.

    n_components : integer
        number of coordinates for the manifold.

    reg : float
        regularization constant, multiplies the trace of the local covariance
        matrix of the distances.

    eigen_solver : string, {'auto', 'arpack', 'dense'}
        auto : algorithm will attempt to choose the best method for input data

        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.

        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.

    tol : float, optional
        Tolerance for 'arpack' method
        Not used if eigen_solver=='dense'.

    max_iter : integer
        maximum number of iterations for the arpack solver.

    method : {'standard', 'hessian', 'modified', 'ltsa'}
        standard : use the standard locally linear embedding algorithm.
                   see reference [1]_
        hessian  : use the Hessian eigenmap method.  This method requires
                   n_neighbors > n_components * (1 + (n_components + 1) / 2.
                   see reference [2]_
        modified : use the modified locally linear embedding algorithm.
                   see reference [3]_
        ltsa     : use local tangent space alignment algorithm
                   see reference [4]_

    hessian_tol : float, optional
        Tolerance for Hessian eigenmapping method.
        Only used if method == 'hessian'

    modified_tol : float, optional
        Tolerance for modified LLE method.
        Only used if method == 'modified'

    random_state: numpy.RandomState or int, optional
        The generator or seed used to determine the starting vector for arpack
        iterations.  Defaults to numpy.random.

    n_jobs : int, optional (default = 1)
        The number of parallel jobs to run for neighbors search.
        If ``-1``, then the number of jobs is set to the number of CPU cores.

    Returns
    -------
    Y : array-like, shape [n_samples, n_components]
        Embedding vectors.

    squared_error : float
        Reconstruction error for the embedding vectors. Equivalent to
        ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.

    References
    ----------

    .. [1] `Roweis, S. & Saul, L. Nonlinear dimensionality reduction
        by locally linear embedding.  Science 290:2323 (2000).`
    .. [2] `Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
        linear embedding techniques for high-dimensional data.
        Proc Natl Acad Sci U S A.  100:5591 (2003).`
    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
        Embedding Using Multiple Weights.`
        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
    .. [4] `Zhang, Z. & Zha, H. Principal manifolds and nonlinear
        dimensionality reduction via tangent space alignment.
        Journal of Shanghai Univ.  8:406 (2004)`
    """
    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    if method not in ('standard', 'hessian', 'modified', 'ltsa'):
        raise ValueError("unrecognized method '%s'" % method)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
    nbrs.fit(X)
    X = nbrs._fit_X

    N, d_in = X.shape

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if n_neighbors >= N:
        raise ValueError("n_neighbors must be less than number of points")

    if n_neighbors <= 0:
        raise ValueError("n_neighbors must be positive")

    M_sparse = (eigen_solver != 'dense')

    if method == 'standard':
        W = barycenter_kneighbors_graph(
            nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs)

        # we'll compute M = (I-W)'(I-W)
        # depending on the solver, we'll do this differently
        if M_sparse:
            M = eye(*W.shape, format=W.format) - W
            M = (M.T * M).tocsr()
        else:
            M = (W.T * W - W.T - W).toarray()
            M.flat[::M.shape[0] + 1] += 1  # W = W - I = W - I

    elif method == 'hessian':
        dp = n_components * (n_components + 1) // 2

        if n_neighbors <= n_components + dp:
            raise ValueError("for method='hessian', n_neighbors must be "
                             "greater than "
                             "[n_components * (n_components + 3) / 2]")

        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
                                    return_distance=False)
        neighbors = neighbors[:, 1:]

        Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
        Yi[:, 0] = 1

        M = np.zeros((N, N), dtype=np.float64)

        use_svd = (n_neighbors > d_in)

        for i in range(N):
            Gi = X[neighbors[i]]
            Gi -= Gi.mean(0)

            # build Hessian estimator
            if use_svd:
                U = svd(Gi, full_matrices=0)[0]
            else:
                Ci = np.dot(Gi, Gi.T)
                U = eigh(Ci)[1][:, ::-1]

            Yi[:, 1:1 + n_components] = U[:, :n_components]

            j = 1 + n_components
            for k in range(n_components):
                Yi[:, j:j + n_components - k] = (U[:, k:k + 1] *
                                                 U[:, k:n_components])
                j += n_components - k

            Q, R = qr(Yi)

            w = Q[:, n_components + 1:]
            S = w.sum(0)

            S[np.where(abs(S) < hessian_tol)] = 1
            w /= S

            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
            M[nbrs_x, nbrs_y] += np.dot(w, w.T)

        if M_sparse:
            M = csr_matrix(M)

    elif method == 'modified':
        if n_neighbors < n_components:
            raise ValueError("modified LLE requires "
                             "n_neighbors >= n_components")

        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
                                    return_distance=False)
        neighbors = neighbors[:, 1:]

        # find the eigenvectors and eigenvalues of each local covariance
        # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,
        # where the columns are eigenvectors
        V = np.zeros((N, n_neighbors, n_neighbors))
        nev = min(d_in, n_neighbors)
        evals = np.zeros([N, nev])

        # choose the most efficient way to find the eigenvectors
        use_svd = (n_neighbors > d_in)

        if use_svd:
            for i in range(N):
                X_nbrs = X[neighbors[i]] - X[i]
                V[i], evals[i], _ = svd(X_nbrs,
                                        full_matrices=True)
            evals **= 2
        else:
            for i in range(N):
                X_nbrs = X[neighbors[i]] - X[i]
                C_nbrs = np.dot(X_nbrs, X_nbrs.T)
                evi, vi = eigh(C_nbrs)
                evals[i] = evi[::-1]
                V[i] = vi[:, ::-1]

        # find regularized weights: this is like normal LLE.
        # because we've already computed the SVD of each covariance matrix,
        # it's faster to use this rather than np.linalg.solve
        reg = 1E-3 * evals.sum(1)

        tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))
        tmp[:, :nev] /= evals + reg[:, None]
        tmp[:, nev:] /= reg[:, None]

        w_reg = np.zeros((N, n_neighbors))
        for i in range(N):
            w_reg[i] = np.dot(V[i], tmp[i])
        w_reg /= w_reg.sum(1)[:, None]

        # calculate eta: the median of the ratio of small to large eigenvalues
        # across the points.  This is used to determine s_i, below
        rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)
        eta = np.median(rho)

        # find s_i, the size of the "almost null space" for each point:
        # this is the size of the largest set of eigenvalues
        # such that Sum[v; v in set]/Sum[v; v not in set] < eta
        s_range = np.zeros(N, dtype=int)
        evals_cuemsum = stable_cuemsum(evals, 1)
        eta_range = evals_cuemsum[:, -1:] / evals_cuemsum[:, :-1] - 1
        for i in range(N):
            s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)
        s_range += n_neighbors - nev  # number of zero eigenvalues

        # Now calculate M.
        # This is the [N x N] matrix whose null space is the desired embedding
        M = np.zeros((N, N), dtype=np.float64)
        for i in range(N):
            s_i = s_range[i]

            # select bottom s_i eigenvectors and calculate alpha
            Vi = V[i, :, n_neighbors - s_i:]
            alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)

            # compute Householder matrix which satisfies
            #  Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)
            # using prescription from paper
            h = alpha_i * np.ones(s_i) - np.dot(Vi.T, np.ones(n_neighbors))

            norm_h = np.linalg.norm(h)
            if norm_h < modified_tol:
                h *= 0
            else:
                h /= norm_h

            # Householder matrix is
            #  >> Hi = np.identity(s_i) - 2*np.outer(h,h)
            # Then the weight matrix is
            #  >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]
            # We do this much more efficiently:
            Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) +
                  (1 - alpha_i) * w_reg[i, :, None])

            # Update M as follows:
            # >> W_hat = np.zeros( (N,s_i) )
            # >> W_hat[neighbors[i],:] = Wi
            # >> W_hat[i] -= 1
            # >> M += np.dot(W_hat,W_hat.T)
            # We can do this much more efficiently:
            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
            M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
            Wi_sum1 = Wi.sum(1)
            M[i, neighbors[i]] -= Wi_sum1
            M[neighbors[i], i] -= Wi_sum1
            M[i, i] += s_i

        if M_sparse:
            M = csr_matrix(M)

    elif method == 'ltsa':
        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
                                    return_distance=False)
        neighbors = neighbors[:, 1:]

        M = np.zeros((N, N))

        use_svd = (n_neighbors > d_in)

        for i in range(N):
            Xi = X[neighbors[i]]
            Xi -= Xi.mean(0)

            # compute n_components largest eigenvalues of Xi * Xi^T
            if use_svd:
                v = svd(Xi, full_matrices=True)[0]
            else:
                Ci = np.dot(Xi, Xi.T)
                v = eigh(Ci)[1][:, ::-1]

            Gi = np.zeros((n_neighbors, n_components + 1))
            Gi[:, 1:] = v[:, :n_components]
            Gi[:, 0] = 1. / np.sqrt(n_neighbors)

            GiGiT = np.dot(Gi, Gi.T)

            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
            M[nbrs_x, nbrs_y] -= GiGiT
            M[neighbors[i], neighbors[i]] += 1

    return null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver,
                      tol=tol, max_iter=max_iter, random_state=random_state)

Example 43

Project: RMG-Py Source File: database.py
    def getForwardReactionForFamilyEntry(self, entry, family, thermoDatabase):
        """
        For a given `entry` for a reaction of the given reaction `family` (the
        string label of the family), return the reaction with kinetics and
        degeneracy for the "forward" direction as defined by the reaction 
        family. For families that are their own reverse, the direction the
        kinetics is given in will be preserved. If the entry contains 
        functional groups for the reactants, assume that it is given in the 
        forward direction and do nothing. Returns the reaction in the direction
        consistent with the reaction family template, and the matching template.
        Note that the returned reaction will have its kinetics and degeneracy
        set appropriately.
        
        In order to reverse the reactions that are given in the reverse of the
        direction the family is defined, we need to compute the thermodynamics
        of the reactants and products. For this reason you must also pass
        the `thermoDatabase` to use to generate the thermo data.
        """
        
        def matchSpeciesToMolecules(species, molecules):
            if len(species) == len(molecules) == 1:
                return species[0].isIsomorphic(molecules[0])
            elif len(species) == len(molecules) == 2:
                if species[0].isIsomorphic(molecules[0]) and species[1].isIsomorphic(molecules[1]):
                    return True
                elif species[0].isIsomorphic(molecules[1]) and species[1].isIsomorphic(molecules[0]):
                    return True
            return False

        reaction = None; template = None

        # Get the indicated reaction family
        try:
            groups = self.families[family].groups
        except KeyError:
            raise ValueError('Invalid value "{0}" for family parameter.'.format(family))

        if all([(isinstance(reactant, Group) or isinstance(reactant, LogicNode)) for reactant in entry.item.reactants]):
            # The entry is a rate rule, containing functional groups only
            # By convention, these are always given in the forward direction and
            # have kinetics defined on a per-site basis
            reaction = Reaction(
                reactants = entry.item.reactants[:],
                products = [],
                kinetics = entry.data,
                degeneracy = 1,
            )
            template = [groups.entries[label] for label in entry.label.split(';')]

        elif (all([isinstance(reactant, Molecule) for reactant in entry.item.reactants]) and
            all([isinstance(product, Molecule) for product in entry.item.products])):
            # The entry is a real reaction, containing molecules
            # These could be defined for either the forward or reverse direction
            # and could have a reaction-path degeneracy

            reaction = Reaction(reactants=[], products=[])
            for molecule in entry.item.reactants:
                reactant = Species(molecule=[molecule])
                reactant.generateResonanceIsomers()
                reactant.thermo = thermoDatabase.getThermoData(reactant)
                reaction.reactants.append(reactant)
            for molecule in entry.item.products:
                product = Species(molecule=[molecule])
                product.generateResonanceIsomers()
                product.thermo = thermoDatabase.getThermoData(product)
                reaction.products.append(product)

            # Generate all possible reactions involving the reactant species
            generatedReactions = self.generateReactionsFromFamilies([reactant.molecule for reactant in reaction.reactants], [], only_families=[family])

            # Remove from that set any reactions that don't produce the desired reactants and products
            forward = []; reverse = []
            for rxn in generatedReactions:
                if matchSpeciesToMolecules(reaction.reactants, rxn.reactants) and matchSpeciesToMolecules(reaction.products, rxn.products):
                    forward.append(rxn)
                if matchSpeciesToMolecules(reaction.reactants, rxn.products) and matchSpeciesToMolecules(reaction.products, rxn.reactants):
                    reverse.append(rxn)

            # We should now know whether the reaction is given in the forward or
            # reverse direction
            if len(forward) == 1 and len(reverse) == 0:
                # The reaction is in the forward direction, so use as-is
                reaction = forward[0]
                template = reaction.template
                # Don't forget to overwrite the estimated kinetics from the database with the kinetics for this entry
                reaction.kinetics = entry.data
            elif len(reverse) == 1 and len(forward) == 0:
                # The reaction is in the reverse direction
                # First fit Arrhenius kinetics in that direction
                Tdata = 1000.0 / numpy.arange(0.5, 3.301, 0.1, numpy.float64)
                kdata = numpy.zeros_like(Tdata)
                for i in range(Tdata.shape[0]):
                    kdata[i] = entry.data.getRateCoefficient(Tdata[i]) / reaction.getEquilibriumConstant(Tdata[i])
                kunits = 'm^3/(mol*s)' if len(reverse[0].reactants) == 2 else 's^-1'
                kinetics = Arrhenius().fitToData(Tdata, kdata, kunits, T0=1.0)
                kinetics.Tmin = entry.data.Tmin
                kinetics.Tmax = entry.data.Tmax
                kinetics.Pmin = entry.data.Pmin
                kinetics.Pmax = entry.data.Pmax
                # Now flip the direction
                reaction = reverse[0]
                reaction.kinetics = kinetics
                template = reaction.template
            elif len(reverse) > 0 and len(forward) > 0:
                print 'FAIL: Multiple reactions found for {0!r}.'.format(entry.label)
            elif len(reverse) == 0 and len(forward) == 0:
                print 'FAIL: No reactions found for "%s".' % (entry.label)
            else:
                print 'FAIL: Unable to estimate kinetics for {0!r}.'.format(entry.label)

        assert reaction is not None
        assert template is not None
        return reaction, template

Example 44

Project: scikit-learn Source File: spectral_embedding_.py
Function: spectral_embedding
def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
                       random_state=None, eigen_tol=0.0,
                       norm_laplacian=True, drop_first=True):
    """Project the sample on the first eigenvectors of the graph Laplacian.

    The adjacency matrix is used to compute a normalized graph Laplacian
    whose spectrum (especially the eigenvectors associated to the
    smallest eigenvalues) has an interpretation in terms of minimal
    number of cuts necessary to split the graph into comparably sized
    components.

    This embedding can also 'work' even if the ``adjacency`` variable is
    not strictly the adjacency matrix of a graph but more generally
    an affinity or similarity matrix between samples (for instance the
    heat kernel of a euclidean distance matrix or a k-NN matrix).

    However care must taken to always make the affinity matrix symmetric
    so that the eigenvector decomposition works as expected.

    Read more in the :ref:`User Guide <spectral_embedding>`.

    Parameters
    ----------
    adjacency : array-like or sparse matrix, shape: (n_samples, n_samples)
        The adjacency matrix of the graph to embed.

    n_components : integer, optional, default 8
        The dimension of the projection subspace.

    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}, default None
        The eigenvalue decomposition strategy to use. AMG requires pyamg
        to be installed. It can be faster on very large, sparse problems,
        but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigenvectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.

    drop_first : bool, optional, default=True
        Whether to drop the first eigenvector. For spectral embedding, this
        should be True as the first eigenvector should be constant vector for
        connected graph, but for spectral clustering, this should be kept as
        False to retain the first eigenvector.

    norm_laplacian : bool, optional, default=True
        If True, then compute normalized Laplacian.

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    Spectral embedding is most useful when the graph has one connected
    component. If there graph has many components, the first few eigenvectors
    will simply uncover the connected components of the graph.

    References
    ----------
    * https://en.wikipedia.org/wiki/LOBPCG

    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
      Block Preconditioned Conjugate Gradient Method
      Andrew V. Knyazev
      http://dx.doi.org/10.1137%2FS1064827500366124
    """
    adjacency = check_symmetric(adjacency)

    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")

    if eigen_solver is None:
        eigen_solver = 'arpack'
    elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be 'amg', 'arpack', or 'lobpcg'"
                         % eigen_solver)

    random_state = check_random_state(random_state)

    n_nodes = adjacency.shape[0]
    # Whether to drop the first eigenvector
    if drop_first:
        n_components = n_components + 1

    if not _graph_is_connected(adjacency):
        warnings.warn("Graph is not fully connected, spectral embedding"
                      " may not work as expected.")

    laplacian, dd = graph_laplacian(adjacency,
                                    normed=norm_laplacian, return_diag=True)
    if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
       (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
        # for details see the source code in scipy:
        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
        # /lobpcg/lobpcg.py#L237
        # or matlab:
        # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
        laplacian = _set_diag(laplacian, 1, norm_laplacian)

        # Here we'll use shift-invert mode for fast eigenvalues
        # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
        #  for a short explanation of what this means)
        # Because the normalized Laplacian has eigenvalues between 0 and 2,
        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
        # when finding eigenvalues of largest magnitude (keyword which='LM')
        # and when these eigenvalues are very large compared to the rest.
        # For very large, very sparse graphs, I - L can have many, many
        # eigenvalues very near 1.0.  This leads to slow convergence.  So
        # instead, we'll use ARPACK's shift-invert mode, asking for the
        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
        # near 1.0 and leads to much faster convergence: potentially an
        # orders-of-magnitude speedup over simply using keyword which='LA'
        # in standard mode.
        try:
            # We are computing the opposite of the laplacian inplace so as
            # to spare a memory allocation of a possibly very large array
            laplacian *= -1
            v0 = random_state.uniform(-1, 1, laplacian.shape[0])
            lambdas, diffusion_map = eigsh(laplacian, k=n_components,
                                           sigma=1.0, which='LM',
                                           tol=eigen_tol, v0=v0)
            embedding = diffusion_map.T[n_components::-1] * dd
        except RuntimeError:
            # When submatrices are exactly singular, an LU decomposition
            # in arpack fails. We fallback to lobpcg
            eigen_solver = "lobpcg"
            # Revert the laplacian to its opposite to have lobpcg work
            laplacian *= -1

    if eigen_solver == 'amg':
        # Use AMG to get a preconditioner and speed up the eigenvalue
        # problem.
        if not sparse.issparse(laplacian):
            warnings.warn("AMG works better for sparse matrices")
        # lobpcg needs double precision floats
        laplacian = check_array(laplacian, dtype=np.float64,
                                accept_sparse=True)
        laplacian = _set_diag(laplacian, 1, norm_laplacian)
        ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
        M = ml.aspreconditioner()
        X = random_state.rand(laplacian.shape[0], n_components + 1)
        X[:, 0] = dd.ravel()
        lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
                                        largest=False)
        embedding = diffusion_map.T * dd
        if embedding.shape[0] == 1:
            raise ValueError

    elif eigen_solver == "lobpcg":
        # lobpcg needs double precision floats
        laplacian = check_array(laplacian, dtype=np.float64,
                                accept_sparse=True)
        if n_nodes < 5 * n_components + 1:
            # see note above under arpack why lobpcg has problems with small
            # number of nodes
            # lobpcg will fallback to eigh, so we short circuit it
            if sparse.isspmatrix(laplacian):
                laplacian = laplacian.toarray()
            lambdas, diffusion_map = eigh(laplacian)
            embedding = diffusion_map.T[:n_components] * dd
        else:
            laplacian = _set_diag(laplacian, 1, norm_laplacian)
            # We increase the number of eigenvectors requested, as lobpcg
            # doesn't behave well in low dimension
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
                                            largest=False, maxiter=2000)
            embedding = diffusion_map.T[:n_components] * dd
            if embedding.shape[0] == 1:
                raise ValueError

    embedding = _deterministic_vector_sign_flip(embedding)
    if drop_first:
        return embedding[1:n_components].T
    else:
        return embedding[:n_components].T

Example 45

Project: scikit-learn Source File: gmm.py
Function: fit
    def _fit(self, X, y=None, do_prediction=False):
        """Estimate model parameters with the EM algorithm.

        A initialization step is performed before entering the
        expectation-maximization (EM) algorithm. If you want to avoid
        this step, set the keyword argument init_params to the empty
        string '' when creating the GMM object. Likewise, if you would
        like just to do an initialization, set n_iter=0.

        Parameters
        ----------
        X : array_like, shape (n, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        responsibilities : array, shape (n_samples, n_components)
            Posterior probabilities of each mixture component for each
            observation.
        """

        # initialization step
        X = check_array(X, dtype=np.float64, ensure_min_samples=2,
                        estimator=self)
        if X.shape[0] < self.n_components:
            raise ValueError(
                'GMM estimation with %s components, but got only %s samples' %
                (self.n_components, X.shape[0]))

        max_log_prob = -np.infty

        if self.verbose > 0:
            print('Expectation-maximization algorithm started.')

        for init in range(self.n_init):
            if self.verbose > 0:
                print('Initialization ' + str(init + 1))
                start_init_time = time()

            if 'm' in self.init_params or not hasattr(self, 'means_'):
                self.means_ = cluster.KMeans(
                    n_clusters=self.n_components,
                    random_state=self.random_state).fit(X).cluster_centers_
                if self.verbose > 1:
                    print('\tMeans have been initialized.')

            if 'w' in self.init_params or not hasattr(self, 'weights_'):
                self.weights_ = np.tile(1.0 / self.n_components,
                                        self.n_components)
                if self.verbose > 1:
                    print('\tWeights have been initialized.')

            if 'c' in self.init_params or not hasattr(self, 'covars_'):
                cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
                if not cv.shape:
                    cv.shape = (1, 1)
                self.covars_ = \
                    distribute_covar_matrix_to_match_covariance_type(
                        cv, self.covariance_type, self.n_components)
                if self.verbose > 1:
                    print('\tCovariance matrices have been initialized.')

            # EM algorithms
            current_log_likelihood = None
            # reset self.converged_ to False
            self.converged_ = False

            for i in range(self.n_iter):
                if self.verbose > 0:
                    print('\tEM iteration ' + str(i + 1))
                    start_iter_time = time()
                prev_log_likelihood = current_log_likelihood
                # Expectation step
                log_likelihoods, responsibilities = self.score_samples(X)
                current_log_likelihood = log_likelihoods.mean()

                # Check for convergence.
                if prev_log_likelihood is not None:
                    change = abs(current_log_likelihood - prev_log_likelihood)
                    if self.verbose > 1:
                        print('\t\tChange: ' + str(change))
                    if change < self.tol:
                        self.converged_ = True
                        if self.verbose > 0:
                            print('\t\tEM algorithm converged.')
                        break

                # Maximization step
                self._do_mstep(X, responsibilities, self.params,
                               self.min_covar)
                if self.verbose > 1:
                    print('\t\tEM iteration ' + str(i + 1) + ' took {0:.5f}s'.format(
                        time() - start_iter_time))

            # if the results are better, keep it
            if self.n_iter:
                if current_log_likelihood > max_log_prob:
                    max_log_prob = current_log_likelihood
                    best_params = {'weights': self.weights_,
                                   'means': self.means_,
                                   'covars': self.covars_}
                    if self.verbose > 1:
                        print('\tBetter parameters were found.')

            if self.verbose > 1:
                print('\tInitialization ' + str(init + 1) + ' took {0:.5f}s'.format(
                    time() - start_init_time))

        # check the existence of an init param that was not subject to
        # likelihood computation issue.
        if np.isneginf(max_log_prob) and self.n_iter:
            raise RuntimeError(
                "EM algorithm was never able to compute a valid likelihood " +
                "given initial parameters. Try different init parameters " +
                "(or increasing n_init) or check for degenerate data.")

        if self.n_iter:
            self.covars_ = best_params['covars']
            self.means_ = best_params['means']
            self.weights_ = best_params['weights']
        else:  # self.n_iter == 0 occurs when using GMM within HMM
            # Need to make sure that there are responsibilities to output
            # Output zeros because it was just a quick initialization
            responsibilities = np.zeros((X.shape[0], self.n_components))

        return responsibilities

Example 46

Project: ncpol2sdpa Source File: picos_utils.py
def convert_to_picos(sdp, duplicate_moment_matrix=False):
    """Convert an SDP relaxation to a PICOS problem such that the exported
    .dat-s file is extremely sparse, there is not penalty imposed in terms of
    SDP variables or number of constraints. This conversion can be used for
    imposing extra constraints on the moment matrix, such as partial transpose.

    :param sdp: The SDP relaxation to convert.
    :type sdp: :class:`ncpol2sdpa.sdp`.
    :param duplicate_moment_matrix: Optional parameter to add an unconstrained
                                    moment matrix to the problem with the same
                                    structure as the moment matrix with the PSD
                                    constraint.
    :type duplicate_moment_matrix: bool.

    :returns: :class:`picos.Problem`.
    """
    import picos as pic
    import cvxopt as cvx
    P = pic.Problem(verbose=sdp.verbose)
    block_size = sdp.block_struct[0]
    if sdp.F.dtype == np.float64:
        X = P.add_variable('X', (block_size, block_size), vtype="symmetric")
        if duplicate_moment_matrix:
            Y = P.add_variable('Y', (block_size, block_size), vtype="symmetric")
    else:
        X = P.add_variable('X', (block_size, block_size), vtype="hermitian")
        if duplicate_moment_matrix:
            Y = P.add_variable('X', (block_size, block_size), vtype="hermitian")
    row_offset = 0
    theoretical_n_vars = sdp.block_struct[0]**2
    eq_block_start = sdp.constraint_starting_block+sdp._n_inequalities
    block_idx = 0
    while (block_idx < len(sdp.block_struct)):
        block_size = sdp.block_struct[block_idx]
        x, Ix, Jx = [], [], []
        c, Ic, Jc = [], [], []
        for i, row in enumerate(sdp.F.rows[row_offset:row_offset +
                                                     block_size**2]):
            for j, column in enumerate(row):
                if column > 0:
                    x.append(sdp.F.data[row_offset+i][j])
                    Ix.append(i)
                    Jx.append(column-1)
                    i0 = (i//block_size)+(i % block_size)*block_size
                    if i != i0:
                        x.append(sdp.F.data[row_offset+i][j])
                        Ix.append(i0)
                        Jx.append(column-1)

                else:
                    c.append(sdp.F.data[row_offset+i][j])
                    Ic.append(i%block_size)
                    Jc.append(i//block_size)
        permutation = cvx.spmatrix(x, Ix, Jx, (block_size**2,
                                               theoretical_n_vars))
        constant = cvx.spmatrix(c, Ic, Jc, (block_size, block_size))
        if duplicate_moment_matrix:
            constraint = X
        else:
            constraint = X.copy()
        for k in constraint.factors:
            constraint.factors[k] = permutation
        constraint._size = (block_size, block_size)
        if block_idx < eq_block_start:
            P.add_constraint(constant + constraint >> 0)
        else:
            P.add_constraint(constant + constraint == 0)
            row_offset += block_size**2
            block_idx += 1
        if duplicate_moment_matrix and \
                block_size == sdp.block_struct[0]:
            for k in Y.factors:
                Y.factors[k] = permutation
        row_offset += block_size**2
        block_idx += 1
    if len(np.nonzero(sdp.obj_facvar)[0]) > 0:
        x, Ix, Jx = [], [], []
        for k, val in enumerate(sdp.obj_facvar):
            if val != 0:
                x.append(val)
                Ix.append(0)
                Jx.append(k)
        permutation = cvx.spmatrix(x, Ix, Jx)
        objective = X.copy()
        for k in objective.factors:
            objective.factors[k] = permutation
        objective._size = (1, 1)
        P.set_objective('min', objective)
    return P

Example 47

Project: auto-sklearn Source File: create_datasets.py
def create_predict_spearman_rank(metafeatures, experiments, iterator):
    X = []
    Y = []
    Y_names = []
    # Calculate the pairwise ranks between datasets
    dataset_names = [name for name in metafeatures.index]
    cross_product = []

    if iterator == "combination":
        for cross in itertools.combinations_with_replacement(dataset_names, r=2):
            cross_product.append(cross)
    elif iterator == "permutation":
        for cross in itertools.permutations(dataset_names, r=2):
            cross_product.append(cross)
    else:
        raise NotImplementedError(iterator)
    logging.info("Create spearman rank dataset without CV data and %s",
                iterator)
    logging.info("Using %d datasets", len(dataset_names))
    logging.info("This will results in %d training points", len(cross_product))

    # Create inputs and targets
    for cross in cross_product:
        name = "%s_%s" % (cross[0], cross[1])

        mf_1 = metafeatures.loc[cross[0]]
        mf_2 = metafeatures.loc[cross[1]]
        assert mf_1.dtype == np.float64
        assert mf_2.dtype == np.float64
        x = np.hstack((mf_1, mf_2))
        columns = metafeatures.columns.values
        index = np.hstack(("0_" + columns, "1_" + columns))
        x = pd.Series(data=x, name=name, index=index)
        X.append(x)

        experiments_1 = experiments[cross[0]]
        experiments_2 = experiments[cross[1]]
        assert len(experiments_1) == len(experiments_2), name

        responses_1 = np.zeros((len(experiments_1)), dtype=np.float64)
        responses_2 = np.zeros((len(experiments_1)), dtype=np.float64)

        for idx, zipped in enumerate(zip(
                sorted(experiments_1, key=lambda t: str(t.configuration)),
                sorted(experiments_2, key=lambda t: str(t.configuration)))):
            # Test if the order of the params is the same
            exp_1, exp_2 = zipped
            print(exp_1.configuration, exp_2.configuration)
            assert exp_1.configuration == exp_2.configuration,\
                (experiments_1, experiments_2)
            responses_1[idx] = exp_1.result if np.isfinite(exp_1.result) else 1
            responses_2[idx] = exp_2.result if np.isfinite(exp_2.result) else 1

        rho, p = scipy.stats.spearmanr(responses_1, responses_2)
        #rho, p = scipy.stats.kendalltau(responses_1, responses_2)
        if not np.isfinite(rho):
            rho = 0

        Y.append(rho)
        Y_names.append(name)

    X = pd.DataFrame(X)
    Y = pd.Series(Y, index=Y_names)
    logging.info("Metafeatures %s", metafeatures.shape)
    logging.info("X.shape %s", X.shape)
    logging.info("Y.shape %s", Y.shape)
    assert X.shape == (len(cross_product), metafeatures.shape[1] * 2), \
        (X.shape, (len(cross), metafeatures.shape[1] * 2))
    assert Y.shape == (len(cross_product), )
    # train sklearn regressor (tree) with 10fold CV
    indices = range(len(X))
    np_rs = np.random.RandomState(42)
    np_rs.shuffle(indices)
    X = X.iloc[indices]
    Y = Y.iloc[indices]
    return X, Y

Example 48

Project: RLScore Source File: test_rls.py
Function: testrls
    def testRLS(self):
        
        print
        print
        print
        print
        print("Testing the cross-validation routines of the RLS module.")
        print
        print
        floattype = np.float64
        
        m, n = 400, 100
        Xtrain = np.random.rand(m, n)
        K = np.dot(Xtrain, Xtrain.T)
        ylen = 2
        Y = np.zeros((m, ylen), dtype=floattype)
        Y = np.random.rand(m, ylen)
        
        hoindices = [45]
        hoindices2 = [45, 50]
        hoindices3 = [45, 50, 55]
        hocompl = list(set(range(m)) - set(hoindices))
        
        Kho = K[np.ix_(hocompl, hocompl)]
        Yho = Y[hocompl]
        
        kwargs = {}
        kwargs['Y'] = Y
        kwargs['X'] = K
        kwargs['kernel'] = 'PrecomputedKernel'
        dualrls = RLS(**kwargs)
        
        kwargs = {}
        kwargs["X"] = Xtrain
        kwargs["Y"] = Y
        kwargs["bias"] = 0.
        primalrls = RLS(**kwargs)
        
        kwargs = {}
        kwargs['Y'] = Yho
        kwargs['X'] = Kho
        kwargs['kernel'] = 'PrecomputedKernel'
        dualrls_naive = RLS(**kwargs)
        
        testkm = K[np.ix_(hocompl, hoindices)]
        trainX = Xtrain[hocompl]
        testX = Xtrain[hoindices]
        kwargs = {}
        kwargs['Y'] = Yho
        kwargs['X'] = trainX
        kwargs["bias"] = 0.
        primalrls_naive = RLS(**kwargs)
        
        loglambdas = range(-5, 5)
        for j in range(0, len(loglambdas)):
            regparam = 2. ** loglambdas[j]
            print
            print("Regparam 2^%1d" % loglambdas[j])
            
            dumbho = np.dot(testkm.T, np.dot(la.inv(Kho + regparam * np.eye(Kho.shape[0])), Yho))
            dumbho = np.squeeze(dumbho)
            print(str(dumbho) + ' Dumb HO (dual)')
            
            dualrls_naive.solve(regparam)
            predho1 = dualrls_naive.predictor.predict(testkm.T)
            print(str(predho1) + ' Naive HO (dual)')
            
            dualrls.solve(regparam)
            predho2 = dualrls.holdout(hoindices)
            print(str(predho2) + ' Fast HO (dual)')
            
            dualrls.solve(regparam)
            predho = dualrls.leave_one_out()[hoindices[0]]
            print(str(predho) + ' Fast LOO (dual)')
            
            primalrls_naive.solve(regparam)
            predho3 = primalrls_naive.predictor.predict(testX)
            print(str(predho3) + ' Naive HO (primal)')
            
            primalrls.solve(regparam)
            predho4 = primalrls.holdout(hoindices)
            print(str(predho4) + ' Fast HO (primal)')
            for predho in [predho1, predho2, predho3, predho4]:
                self.assertEqual(dumbho.shape, predho.shape)
                assert_allclose(dumbho, predho)
                #for row in range(predho.shape[0]):
                #    for col in range(predho.shape[1]):
                #        self.assertAlmostEqual(dumbho[row,col],predho[row,col])
            primalrls.solve(regparam)
            predho = primalrls.leave_one_out()[hoindices[0]]
            print(str(predho) + ' Fast LOO (primal)')
        print()
        hoindices = range(100, 300)
        hocompl = list(set(range(m)) - set(hoindices))
        
        Kho = K[np.ix_(hocompl, hocompl)]
        Yho = Y[hocompl]
        testkm = K[np.ix_(hocompl, hoindices)]
        
        dumbho = np.dot(testkm.T, np.dot(la.inv(Kho + regparam * np.eye(Kho.shape[0])), Yho))
        
        kwargs = {}
        kwargs['Y'] = Y
        kwargs['X'] = Xtrain
        dualrls.solve(regparam)
        predho2 = dualrls.holdout(hoindices2)
        print(str(predho2) + ' Fast HO')
        hopred = dualrls.leave_pair_out(np.array([hoindices2[0], 4, 6]), np.array([hoindices2[1], 5, 7]))
        print(str(hopred[0][0]) + '\n' + str(hopred[1][0]) + ' Fast LPO')

Example 49

Project: RMG-Py Source File: draw.py
    def draw(self, network, format, path=None):
        """
        Draw the potential energy surface for the given `network` as a Cairo
        surface of the given `format`. If `path` is given, the surface is
        saved to that location on disk.
        """
        try:
            import cairocffi as cairo
        except ImportError:
            try:
                import cairo
            except ImportError:
                logging.warning('Cairo not found; potential energy surface will not be drawn.')
                return

        self.network = network

        # The order of wells is as follows:
        #   - Reactant channels come first (to the left)
        #   - Isomers are in the middle
        #   - Product channels come last (to the right)
        # This is done because most people will read the PES from left to right
        wells = []
        wells.extend(network.reactants)
        wells.extend(network.isomers)
        wells.extend(network.products)

        # Generate the bounding rectangles for each configuration label
        labelRects = []
        for well in wells:
            labelRects.append(self.__getLabelSize(well, format=format))

        # Get energy range (use kJ/mol internally)
        E0min, E0max = self.__getEnergyRange()
        E0min *= 0.001; E0max *= 0.001
        
        # Drawing parameters
        padding = self.options['padding']
        wellWidth = self.options['wellWidth']
        wellSpacing = self.options['wellSpacing']
        Eslope = self.options['Eslope']
        TSwidth = self.options['TSwidth']
        
        E0_offset = self.options['E0offset'] * 0.001
        
        # Choose multiplier to convert energies to desired units (on figure only)
        Eunits = self.options['Eunits']
        try:
            Emult = {'J/mol': 1.0, 'kJ/mol': 0.001, 'cal/mol': 1.0/4.184, 'kcal/mol': 1.0/4184., 'cm^-1': 1.0/11.962}[Eunits]
        except KeyError:
            raise Exception('Invalid value "{0}" for Eunits parameter.'.format(Eunits))
            
        # Determine height required for drawing
        Eheight = self.__getTextSize('0.0', format=format)[3] + 6
        y_E0 = (E0max - 0.0) * Eslope + padding + Eheight
        height = (E0max - E0min) * Eslope + 2 * padding + Eheight + 6
        for i in range(len(wells)):
            if 0.001 * wells[i].E0 == E0min:
                height += labelRects[i][3]
                break
        
        # Determine naive position of each well (one per column)
        coordinates = numpy.zeros((len(wells), 2), numpy.float64)
        x = padding
        for i in range(len(wells)):
            well = wells[i]
            rect = labelRects[i]
            thisWellWidth = max(wellWidth, rect[2])
            E0 = 0.001 * well.E0
            y = y_E0 - E0 * Eslope
            coordinates[i] = [x + 0.5 * thisWellWidth, y]
            x += thisWellWidth + wellSpacing
        width = x + padding - wellSpacing
        
        # Determine the rectangles taken up by each well
        # We'll use this to merge columns safely so that wells don't overlap
        wellRects = []
        for i in range(len(wells)):
            l, t, w, h = labelRects[i]
            x, y = coordinates[i,:]
            if w < wellWidth: w = wellWidth
            t -= 6 + Eheight
            h += 6 + Eheight
            wellRects.append([l + x - 0.5 * w, t + y + 6, w, h])
        
        # Squish columns together from the left where possible until an isomer is encountered
        oldLeft = numpy.min(coordinates[:,0])
        Nleft = wells.index(network.isomers[0])-1
        columns = []
        for i in range(Nleft, -1, -1):
            top = wellRects[i][1]
            bottom = top + wellRects[i][3]
            for j in range(len(columns)):
                for c in columns[j]:
                    top0 = wellRects[c][1]
                    bottom0 = top + wellRects[c][3]
                    if (top >= top0 and top <= bottom0) or (top <= top0 and top0 <= bottom):
                        # Can't put it in this column
                        break
                else:
                    # Can put it in this column
                    columns[j].append(i)
                    break
            else:
                # Needs a new column
                columns.append([i])
        for column in columns:
            columnWidth = max([wellRects[c][2] for c in column])
            x = coordinates[column[0]+1,0] - 0.5 * wellRects[column[0]+1][2] - wellSpacing - 0.5 * columnWidth
            for c in column:
                delta = x - coordinates[c,0]
                wellRects[c][0] += delta
                coordinates[c,0] += delta
        newLeft = numpy.min(coordinates[:,0])
        coordinates[:,0] -= newLeft - oldLeft

        # Squish columns together from the right where possible until an isomer is encountered
        Nright = wells.index(network.isomers[-1])+1
        columns = []
        for i in range(Nright, len(wells)):
            top = wellRects[i][1]
            bottom = top + wellRects[i][3]
            for j in range(len(columns)):
                for c in columns[j]:
                    top0 = wellRects[c][1]
                    bottom0 = top0 + wellRects[c][3]
                    if (top >= top0 and top <= bottom0) or (top <= top0 and top0 <= bottom):
                        # Can't put it in this column
                        break
                else:
                    # Can put it in this column
                    columns[j].append(i)
                    break
            else:
                # Needs a new column
                columns.append([i])
        for column in columns:
            columnWidth = max([wellRects[c][2] for c in column])
            x = coordinates[column[0]-1,0] + 0.5 * wellRects[column[0]-1][2] + wellSpacing + 0.5 * columnWidth
            for c in column:
                delta = x - coordinates[c,0]
                wellRects[c][0] += delta
                coordinates[c,0] += delta

        width = max([rect[2]+rect[0] for rect in wellRects]) - min([rect[0] for rect in wellRects]) + 2 * padding
        
        # Draw to the final surface
        surface = createNewSurface(format=format, path=path, width=width, height=height)
        cr = cairo.Context(surface)
        
        # Some global settings
        cr.select_font_face("sans")
        cr.set_font_size(self.options['fontSizeNormal'])

        # Fill the background with white
        cr.set_source_rgba(1.0, 1.0, 1.0, 1.0)
        cr.paint()

#        # DEBUG: Draw well bounding rectangles
#        cr.save()
#        cr.set_line_width(1.0)
#        for rect in wellRects:
#            cr.rectangle(*rect)
#            cr.set_source_rgba(0.0, 0.0, 1.0, 0.5)
#            cr.stroke()  
#        cr.restore()

        # Draw path reactions
        for rxn in network.pathReactions:
            for reac in range(len(wells)):
                if wells[reac].species == rxn.reactants:
                    break
            else:
                raise Exception
            for prod in range(len(wells)):
                if wells[prod].species == rxn.products:
                    break
            else:
                raise Exception
            E0_reac = wells[reac].E0 * 0.001 - E0_offset
            E0_prod = wells[prod].E0 * 0.001 - E0_offset
            E0_TS = rxn.transitionState.conformer.E0.value_si * 0.001 - E0_offset
            if reac < prod:
                x1, y1 = coordinates[reac,:]
                x2, y2 = coordinates[prod,:]
            else:
                x1, y1 = coordinates[prod,:]
                x2, y2 = coordinates[reac,:]
            x1 += wellSpacing / 2.0; x2 -= wellSpacing / 2.0
            if abs(E0_TS - E0_reac) > 0.1 and abs(E0_TS - E0_prod) > 0.1:
                if len(rxn.reactants) == 2:
                    if reac < prod: x0 = x1 + wellSpacing * 0.5
                    else:           x0 = x2 - wellSpacing * 0.5
                elif len(rxn.products) == 2:
                    if reac < prod: x0 = x2 - wellSpacing * 0.5
                    else:           x0 = x1 + wellSpacing * 0.5
                else:
                    x0 = 0.5 * (x1 + x2)
                y0 = y_E0 - (E0_TS + E0_offset) * Eslope
                width1 = (x0 - x1)
                width2 = (x2 - x0)
                # Draw horizontal line for TS
                cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
                cr.set_line_width(2.0)
                cr.move_to(x0 - TSwidth/2.0, y0)
                cr.line_to(x0+TSwidth/2.0, y0)
                cr.stroke()
                # Add background and text for energy
                E0 = "{0:.1f}".format(E0_TS * 1000. * Emult)
                extents = cr.text_extents(E0)
                x = x0 - extents[2] / 2.0; y = y0 - 6.0
                cr.rectangle(x + extents[0] - 2.0, y + extents[1] - 2.0, extents[2] + 4.0, extents[3] + 4.0)
                cr.set_source_rgba(1.0, 1.0, 1.0, 0.75)
                cr.fill()
                cr.move_to(x, y)
                cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
                cr.show_text(E0)
                # Draw Bezier curve connecting reactants and products through TS
                cr.set_source_rgba(0.0, 0.0, 0.0, 0.5)
                cr.set_line_width(1.0)
                cr.move_to(x1, y1)
                cr.curve_to(x1 + width1/8.0, y1,   x0 - width1/8.0 - TSwidth/2.0, y0,   x0 - TSwidth/2.0, y0)
                cr.move_to(x0 + TSwidth/2.0, y0)
                cr.curve_to(x0 + width2/8.0 + TSwidth/2.0, y0,   x2 - width2/8.0, y2,   x2, y2)
                cr.stroke()
            else:
                width = (x2 - x1)
                # Draw Bezier curve connecting reactants and products through TS
                cr.set_source_rgba(0.0, 0.0, 0.0, 0.5)
                cr.set_line_width(1.0)
                cr.move_to(x1, y1)
                cr.curve_to(x1 + width/4.0, y1,   x2 - width/4.0, y2,   x2, y2)
                cr.stroke()

        # Draw wells (after path reactions so that they are on top)
        for i, well in enumerate(wells):
            x0, y0 = coordinates[i,:]
            # Draw horizontal line for well
            cr.set_line_width(4.0)
            cr.move_to(x0 - wellWidth/2.0, y0)
            cr.line_to(x0 + wellWidth/2.0, y0)
            cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
            cr.stroke()
            # Add background and text for energy
            E0 = well.E0 * 0.001 - E0_offset
            E0 = "{0:.1f}".format(E0 * 1000. * Emult)
            extents = cr.text_extents(E0)
            x = x0 - extents[2] / 2.0; y = y0 - 6.0
            cr.rectangle(x + extents[0] - 2.0, y + extents[1] - 2.0, extents[2] + 4.0, extents[3] + 4.0)
            cr.set_source_rgba(1.0, 1.0, 1.0, 0.75)
            cr.fill()
            cr.move_to(x, y)
            cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
            cr.show_text(E0)
            # Draw background and text for label
            x = x0 - 0.5 * labelRects[i][2]
            y = y0 + 6
            cr.rectangle(x, y, labelRects[i][2], labelRects[i][3])
            cr.set_source_rgba(1.0, 1.0, 1.0, 0.75)
            cr.fill()
            self.__drawLabel(well, cr, x, y, format=format)
        
        # Finish Cairo drawing
        if format == 'png':
            surface.write_to_png(path)
        else:
            surface.finish()

Example 50

Project: scikit-learn Source File: base.py
Function: fit_liblinear
def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
                   penalty, dual, verbose, max_iter, tol,
                   random_state=None, multi_class='ovr',
                   loss='logistic_regression', epsilon=0.1,
                   sample_weight=None):
    """Used by Logistic Regression (and CV) and LinearSVC.

    Preprocessing is done in this function before supplying it to liblinear.

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training vector, where n_samples in the number of samples and
        n_features is the number of features.

    y : array-like, shape (n_samples,)
        Target vector relative to X

    C : float
        Inverse of cross-validation parameter. Lower the C, the more
        the penalization.

    fit_intercept : bool
        Whether or not to fit the intercept, that is to add a intercept
        term to the decision function.

    intercept_scaling : float
        LibLinear internally penalizes the intercept and this term is subject
        to regularization just like the other terms of the feature vector.
        In order to avoid this, one should increase the intercept_scaling.
        such that the feature vector becomes [x, intercept_scaling].

    class_weight : {dict, 'balanced'}, optional
        Weights associated with classes in the form ``{class_label: weight}``.
        If not given, all classes are supposed to have weight one. For
        multi-output problems, a list of dicts can be provided in the same
        order as the columns of y.

        The "balanced" mode uses the values of y to automatically adjust
        weights inversely proportional to class frequencies in the input data
        as ``n_samples / (n_classes * np.bincount(y))``

    penalty : str, {'l1', 'l2'}
        The norm of the penalty used in regularization.

    dual : bool
        Dual or primal formulation,

    verbose : int
        Set verbose to any positive number for verbosity.

    max_iter : int
        Number of iterations.

    tol : float
        Stopping condition.

    random_state : int seed, RandomState instance, or None (default)
        The seed of the pseudo random number generator to use when
        shuffling the data.

    multi_class : str, {'ovr', 'crammer_singer'}
        `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
        optimizes a joint objective over all classes.
        While `crammer_singer` is interesting from an theoretical perspective
        as it is consistent it is seldom used in practice and rarely leads to
        better accuracy and is more expensive to compute.
        If `crammer_singer` is chosen, the options loss, penalty and dual will
        be ignored.

    loss : str, {'logistic_regression', 'hinge', 'squared_hinge',
                 'epsilon_insensitive', 'squared_epsilon_insensitive}
        The loss function used to fit the model.

    epsilon : float, optional (default=0.1)
        Epsilon parameter in the epsilon-insensitive loss function. Note
        that the value of this parameter depends on the scale of the target
        variable y. If unsure, set epsilon=0.

    sample_weight: array-like, optional
        Weights assigned to each sample.

    Returns
    -------
    coef_ : ndarray, shape (n_features, n_features + 1)
        The coefficient vector got by minimizing the objective function.

    intercept_ : float
        The intercept term added to the vector.

    n_iter_ : int
        Maximum number of iterations run across all classes.
    """
    if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
        enc = LabelEncoder()
        y_ind = enc.fit_transform(y)
        classes_ = enc.classes_
        if len(classes_) < 2:
            raise ValueError("This solver needs samples of at least 2 classes"
                             " in the data, but the data contains only one"
                             " class: %r" % classes_[0])

        class_weight_ = compute_class_weight(class_weight, classes_, y)
    else:
        class_weight_ = np.empty(0, dtype=np.float64)
        y_ind = y
    liblinear.set_verbosity_wrap(verbose)
    rnd = check_random_state(random_state)
    if verbose:
        print('[LibLinear]', end='')

    # LinearSVC breaks when intercept_scaling is <= 0
    bias = -1.0
    if fit_intercept:
        if intercept_scaling <= 0:
            raise ValueError("Intercept scaling is %r but needs to be greater than 0."
                             " To disable fitting an intercept,"
                             " set fit_intercept=False." % intercept_scaling)
        else:
            bias = intercept_scaling

    libsvm.set_verbosity_wrap(verbose)
    libsvm_sparse.set_verbosity_wrap(verbose)
    liblinear.set_verbosity_wrap(verbose)

    # LibLinear wants targets as doubles, even for classification
    y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
    if sample_weight is None:
        sample_weight = np.ones(X.shape[0])
    else:
        sample_weight = np.array(sample_weight, dtype=np.float64, order='C')
        check_consistent_length(sample_weight, X)

    solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
    raw_coef_, n_iter_ = liblinear.train_wrap(
        X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
        class_weight_, max_iter, rnd.randint(np.iinfo('i').max),
        epsilon, sample_weight)
    # Regarding rnd.randint(..) in the above signature:
    # seed for srand in range [0..INT_MAX); due to limitations in Numpy
    # on 32-bit platforms, we can't get to the UINT_MAX limit that
    # srand supports
    n_iter_ = max(n_iter_)
    if n_iter_ >= max_iter and verbose > 0:
        warnings.warn("Liblinear failed to converge, increase "
                      "the number of iterations.", ConvergenceWarning)

    if fit_intercept:
        coef_ = raw_coef_[:, :-1]
        intercept_ = intercept_scaling * raw_coef_[:, -1]
    else:
        coef_ = raw_coef_
        intercept_ = 0.

    return coef_, intercept_, n_iter_
See More Examples - Go to Next Page
Page 1 Selected Page 2 Page 3 Page 4