Here are the examples of the python api numpy.float64 taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
168 Examples
5
Example 1
Project: Yeppp Source File: sse_add_test.py
def test_add_V64fV64f_V64f(self):
a_tmp = self.a.astype(numpy.float64)
b_tmp = self.b.astype(numpy.float64)
c = numpy.empty([self.n]).astype(numpy.float64)
a_ptr = a_tmp.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
b_ptr = b_tmp.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
c_ptr = c.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
func = sse.add.yepCore_Add.yepCore_Add_V64fV64f_V64f.load()
self.assertEqual(func(a_ptr, b_ptr, c_ptr, self.n), 0)
for i in range(self.n):
self.assertEqual(a_tmp[i] + b_tmp[i], c[i])
5
Example 2
Project: chainer Source File: test_matmul.py
def setUp(self):
self.x1 = numpy.random.uniform(.5, 1, (m, k)).astype(numpy.float64)
self.x2 = numpy.random.uniform(.5, 1, (k, n)).astype(numpy.float64)
self.gy = numpy.random.uniform(-1, 1, (m, n)).astype(numpy.float64)
self.op = lambda x, y: F.matmul(x, y)
self.forward_answer = numpy.dot(self.x1, self.x2)
5
Example 3
Project: scimath Source File: interpolate.py
def window_average(x, y, new_x, width=10.0):
bad_index = None
x = make_array_safe(x, numpy.float64)
y = make_array_safe(y, numpy.float64)
new_x = make_array_safe(new_x, numpy.float64)
width = float(width)
assert len(y.shape) < 3, "function only works with 1D or 2D arrays"
if len(y.shape) == 2:
new_y = numpy.zeros((y.shape[0], len(new_x)), numpy.float64)
for i in range(len(new_y)):
_interpolate.window_average_ddddd(x, y[i], new_x, new_y[i],
width)
else:
new_y = numpy.zeros(len(new_x), numpy.float64)
_interpolate.window_average_ddddd(x, y, new_x, new_y, width)
return new_y
3
Example 4
def testSolve(self):
"""
Test the simple batch reactor with a simple kinetic model. Here we
choose a kinetic model consisting of the hydrogen abstraction reaction
CH4 + C2H5 <=> CH3 + C2H6.
"""
CH4 = Species(
molecule=[Molecule().fromSMILES("C")],
thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([ 8.615, 9.687,10.963,12.301,14.841,16.976,20.528],"cal/(mol*K)"), H298=(-17.714,"kcal/mol"), S298=(44.472,"cal/(mol*K)"))
)
CH3 = Species(
molecule=[Molecule().fromSMILES("[CH3]")],
thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([ 9.397,10.123,10.856,11.571,12.899,14.055,16.195],"cal/(mol*K)"), H298=( 9.357,"kcal/mol"), S298=(45.174,"cal/(mol*K)"))
)
C2H6 = Species(
molecule=[Molecule().fromSMILES("CC")],
thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([12.684,15.506,18.326,20.971,25.500,29.016,34.595],"cal/(mol*K)"), H298=(-19.521,"kcal/mol"), S298=(54.799,"cal/(mol*K)"))
)
C2H5 = Species(
molecule=[Molecule().fromSMILES("C[CH2]")],
thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([11.635,13.744,16.085,18.246,21.885,24.676,29.107],"cal/(mol*K)"), H298=( 29.496,"kcal/mol"), S298=(56.687,"cal/(mol*K)"))
)
rxn1 = Reaction(reactants=[C2H6,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(686.375*6,'m^3/(mol*s)'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K')))
coreSpecies = [CH4,CH3,C2H6,C2H5]
edgeSpecies = []
coreReactions = [rxn1]
edgeReactions = []
T = 1000; P = 1.0e5
rxnSystem = SimpleReactor(T, P, initialMoleFractions={C2H5: 0.1, CH3: 0.1, CH4: 0.4, C2H6: 0.4}, termination=[])
rxnSystem.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)
tlist = numpy.array([10**(i/10.0) for i in range(-130, -49)], numpy.float64)
# Integrate to get the solution at each time point
t = []; y = []; reactionRates = []; speciesRates = []
for t1 in tlist:
rxnSystem.advance(t1)
t.append(rxnSystem.t)
# You must make a copy of y because it is overwritten by DASSL at
# each call to advance()
y.append(rxnSystem.y.copy())
reactionRates.append(rxnSystem.coreReactionRates.copy())
speciesRates.append(rxnSystem.coreSpeciesRates.copy())
# Convert the solution vectors to numpy arrays
t = numpy.array(t, numpy.float64)
y = numpy.array(y, numpy.float64)
reactionRates = numpy.array(reactionRates, numpy.float64)
speciesRates = numpy.array(speciesRates, numpy.float64)
V = constants.R * rxnSystem.T.value_si * numpy.sum(y) / rxnSystem.P.value_si
# Check that we're computing the species fluxes correctly
for i in range(t.shape[0]):
self.assertAlmostEqual(reactionRates[i,0], speciesRates[i,0], delta=1e-6*reactionRates[i,0])
self.assertAlmostEqual(reactionRates[i,0], -speciesRates[i,1], delta=1e-6*reactionRates[i,0])
self.assertAlmostEqual(reactionRates[i,0], -speciesRates[i,2], delta=1e-6*reactionRates[i,0])
self.assertAlmostEqual(reactionRates[i,0], speciesRates[i,3], delta=1e-6*reactionRates[i,0])
# Check that we've reached equilibrium
self.assertAlmostEqual(reactionRates[-1,0], 0.0, delta=1e-2)
#######
# Unit test for the jacobian function:
# Solve a reaction system and check if the analytical jacobian matches the finite difference jacobian
H2 = Species(
molecule=[Molecule().fromSMILES("[H][H]")],
thermo=ThermoData(Tdata=([300,400,500,600,800,1000,1500],"K"), Cpdata=([6.89,6.97,6.99,7.01,7.08,7.22,7.72],"cal/(mol*K)"), H298=( 0,"kcal/mol"), S298=(31.23,"cal/(mol*K)"))
)
rxnList = []
rxnList.append(Reaction(reactants=[C2H6], products=[CH3,CH3], kinetics=Arrhenius(A=(686.375*6,'1/s'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[CH3,CH3], products=[C2H6], kinetics=Arrhenius(A=(686.375*6,'m^3/(mol*s)'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H6,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(46.375*6,'m^3/(mol*s)'), n=3.40721, Ea=(6.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H5,CH4], products=[C2H6,CH3], kinetics=Arrhenius(A=(46.375*6,'m^3/(mol*s)'), n=3.40721, Ea=(6.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H5,CH4], products=[CH3,CH3,CH3], kinetics=Arrhenius(A=(246.375*6,'m^3/(mol*s)'), n=1.40721, Ea=(3.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[CH3,CH3,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(246.375*6,'m^6/(mol^2*s)'), n=1.40721, Ea=(3.82799,'kcal/mol'), T0=(298.15,'K'))))#
rxnList.append(Reaction(reactants=[C2H6,CH3,CH3], products=[C2H5,C2H5,H2], kinetics=Arrhenius(A=(146.375*6,'m^6/(mol^2*s)'), n=2.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H5,C2H5,H2], products=[C2H6,CH3,CH3], kinetics=Arrhenius(A=(146.375*6,'m^6/(mol^2*s)'), n=2.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H6,C2H6], products=[CH3,CH4,C2H5], kinetics=Arrhenius(A=(1246.375*6,'m^3/(mol*s)'), n=0.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[CH3,CH4,C2H5], products=[C2H6,C2H6], kinetics=Arrhenius(A=(46.375*6,'m^6/(mol^2*s)'), n=0.10721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
for rxn in rxnList:
coreSpecies = [CH4,CH3,C2H6,C2H5,H2]
edgeSpecies = []
coreReactions = [rxn]
rxnSystem0 = SimpleReactor(T,P,initialMoleFractions={CH4:0.2,CH3:0.1,C2H6:0.35,C2H5:0.15, H2:0.2},termination=[])
rxnSystem0.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)
dydt0 = rxnSystem0.residual(0.0, rxnSystem0.y, numpy.zeros(rxnSystem0.y.shape))[0]
numCoreSpecies = len(coreSpecies)
dN = .000001*sum(rxnSystem0.y)
dN_array = dN*numpy.eye(numCoreSpecies)
dydt = []
for i in range(numCoreSpecies):
rxnSystem0.y[i] += dN
dydt.append(rxnSystem0.residual(0.0, rxnSystem0.y, numpy.zeros(rxnSystem0.y.shape))[0])
rxnSystem0.y[i] -= dN # reset y to original y0
# Let the solver compute the jacobian
solverJacobian = rxnSystem0.jacobian(0.0, rxnSystem0.y, dydt0, 0.0)
# Compute the jacobian using finite differences
jacobian = numpy.zeros((numCoreSpecies, numCoreSpecies))
for i in range(numCoreSpecies):
for j in range(numCoreSpecies):
jacobian[i,j] = (dydt[j][i]-dydt0[i])/dN
self.assertAlmostEqual(jacobian[i,j], solverJacobian[i,j], delta=abs(1e-4*jacobian[i,j]))
#print 'Solver jacobian'
#print solverJacobian
#print 'Numerical jacobian'
#print jacobian
###
# Unit test for the compute rate derivative
rxnList = []
rxnList.append(Reaction(reactants=[C2H6], products=[CH3,CH3], kinetics=Arrhenius(A=(686.375e6,'1/s'), n=4.40721, Ea=(7.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H6,CH3], products=[C2H5,CH4], kinetics=Arrhenius(A=(46.375*6,'m^3/(mol*s)'), n=3.40721, Ea=(6.82799,'kcal/mol'), T0=(298.15,'K'))))
rxnList.append(Reaction(reactants=[C2H6,CH3,CH3], products=[C2H5,C2H5,H2], kinetics=Arrhenius(A=(146.375*6,'m^6/(mol^2*s)'), n=2.40721, Ea=(8.82799,'kcal/mol'), T0=(298.15,'K'))))
coreSpecies = [CH4,CH3,C2H6,C2H5,H2]
edgeSpecies = []
coreReactions = rxnList
rxnSystem0 = SimpleReactor(T,P,initialMoleFractions={CH4:0.2,CH3:0.1,C2H6:0.35,C2H5:0.15, H2:0.2},termination=[])
rxnSystem0.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)
dfdt0 = rxnSystem0.residual(0.0, rxnSystem0.y, numpy.zeros(rxnSystem0.y.shape))[0]
solver_dfdk = rxnSystem0.computeRateDerivative()
#print 'Solver d(dy/dt)/dk'
#print solver_dfdk
integrationTime = 1e-8
rxnSystem0.termination.append(TerminationTime((integrationTime,'s')))
rxnSystem0.simulate(coreSpecies, coreReactions, [], [], 0, 1, 0)
y0 = rxnSystem0.y
dfdk = numpy.zeros((numCoreSpecies,len(rxnList))) # d(dy/dt)/dk
for i in range(len(rxnList)):
k0 = rxnList[i].getRateCoefficient(T,P)
rxnList[i].kinetics.A.value_si = rxnList[i].kinetics.A.value_si*(1+1e-3)
dk = rxnList[i].getRateCoefficient(T,P) - k0
rxnSystem = SimpleReactor(T,P,initialMoleFractions={CH4:0.2,CH3:0.1,C2H6:0.35,C2H5:0.15, H2:0.2},termination=[])
rxnSystem.initializeModel(coreSpecies, coreReactions, edgeSpecies, edgeReactions)
dfdt = rxnSystem.residual(0.0, rxnSystem.y, numpy.zeros(rxnSystem.y.shape))[0]
dfdk[:,i]=(dfdt-dfdt0)/dk
rxnSystem.termination.append(TerminationTime((integrationTime,'s')))
rxnSystem.simulate(coreSpecies, coreReactions, [], [], 0, 1, 0)
rxnList[i].kinetics.A.value_si = rxnList[i].kinetics.A.value_si/(1+1e-3) # reset A factor
for i in range(numCoreSpecies):
for j in range(len(rxnList)):
self.assertAlmostEqual(dfdk[i,j], solver_dfdk[i,j], delta=abs(1e-3*dfdk[i,j]))
2
Example 5
Project: hifive Source File: get_hic_interval.py
def run(args):
if not args.image is None and args.pdf and "pyx" not in sys.modules.keys():
parser.error("-p/--pdf requires the package 'pyx'")
hic = HiC(args.project, 'r', silent=args.silent)
if 'binned' in hic.fends['/'].attrs and hic.fends['/'].attrs['binned'] is not None:
binned = True
chr_indices = 'bin_indices'
fends = 'bins'
else:
binned = False
chr_indices = 'chr_indices'
fends = 'fends'
if args.stop == 0 or args.stop is None:
maxstop = hic.fends[fends]['stop'][hic.fends[chr_indices][hic.chr2int[args.chrom] + 1] - 1]
else:
maxstop = args.stop
if args.stop is None:
args.stop = maxstop
if args.start is None:
args.start = hic.fends[fends]['start'][hic.fends[chr_indices][hic.chr2int[args.chrom]]]
if not args.chrom2 is None:
if args.stop2 == 0 or args.stop2 is None:
maxstop2 = hic.fends[fends]['stop'][hic.fends[chr_indices][hic.chr2int[args.chrom2] + 1] - 1]
else:
maxstop2 = args.stop2
if args.stop2 is None:
args.stop2 = maxstop2
if args.start2 is None:
args.start2 = hic.fends[fends]['start'][hic.fends[chr_indices][hic.chr2int[args.chrom2]]]
else:
if args.maxdist is None:
args.maxdist = 0
if args.maxdist == 0 or args.maxdist >= (maxstop - args.start) / 2:
arraytype = 'upper'
else:
arraytype = 'compact'
kwargs = {}
for arg in args.keywords:
temp = arg.split("=")
if temp[1] in ["True", "TRUE", "true"]:
temp[1] = True
elif temp[1] in ["False", "FALSE", "false"]:
temp[1] = False
elif temp[1][0] == "(":
temp[1] = temp[1].strip('()').split(',')
for i in range(len(temp[1])):
temp[1][i] = int(temp[1][i])
temp[1] = tuple(temp[1])
elif temp[1][0] == "[":
temp[1] = temp[1].strip('[]').split(',')
for i in range(len(temp[1])):
temp[1][i] = int(temp[1][i])
else:
try:
temp[1] = int(temp[1])
except:
# strip off extra characters introduced by galaxy into color format
temp[1] = temp[1].replace('__pd__','')
kwargs[temp[0]] = temp[1]
if not args.chrom2 is None:
data, mapping1, mapping2 = hic.trans_heatmap(chrom1=args.chrom, chrom2=args.chrom2, binsize=args.binsize,
start1=args.start, stop1=args.stop, start2=args.start2,
stop2=args.stop2, datatype=args.datatype,
maxdistance=args.maxdist, returnmapping=True, skipfiltered=True,
dynamically_binned=args.dynamic,
expansion_binsize=args.expbinsize, minobservations=args.minobs,
searchdistance=args.search, removefailed=args.remove)
else:
data, mapping = hic.cis_heatmap(chrom=args.chrom, binsize=args.binsize, start=args.start,
stop=args.stop, datatype=args.datatype, arraytype=arraytype,
maxdistance=args.maxdist, returnmapping=True, skipfiltered=True,
dynamically_binned=args.dynamic, expansion_binsize=args.expbinsize,
minobservations=args.minobs, searchdistance=args.search,
removefailed=args.remove)
output = open(args.output, 'w')
if args.matrix:
if args.chrom2 is None:
diag = int(hic.binned is not None)
if arraytype == 'upper':
temp = numpy.zeros((mapping.shape[0], mapping.shape[0]), dtype=numpy.float64)
indices = numpy.triu_indices(mapping.shape[0], 1 - diag)
where = numpy.where(data[:, 1] > 0)[0]
temp[indices[0][where], indices[1][where]] = data[where, 0] / data[where, 1]
temp[indices[1][where], indices[0][where]] += data[where, 0] / data[where, 1]
else:
temp = numpy.zeros((mapping.shape[0], mapping.shape[0]), dtype=numpy.float64)
for i in range(mapping.shape[0] - 1 + diag):
where = numpy.where(data[i, :, 1] > 0)[0]
temp[i, where + i + 1 - diag] = data[i, where, 0] / data[i, where, 1]
indices = numpy.triu_indices(mapping.shape[0], 1 - diag)
temp[indices[1], indices[0]] += temp[indices]
else:
temp = numpy.zeros((data.shape[0], data.shape[1]), dtype=numpy.float64)
where = numpy.where(data[:, :, 1] > 0)
temp[where] = data[where[0], where[1], 0] / data[where[0], where[1], 1]
if args.datatype == 'raw':
for i in range(temp.shape[0]):
tempout = []
for j in range(temp.shape[1]):
tempout.append("%i" % temp[i, j])
print >> output, '\t'.join(tempout)
else:
for i in range(temp.shape[0]):
tempout = []
for j in range(temp.shape[1]):
tempout.append("%0.6f" % temp[i, j])
print >> output, '\t'.join(tempout)
else:
if args.chrom2 is None:
diag = binned
if arraytype == 'upper':
pos = 0
for i in range(mapping.shape[0] - 1 + diag):
for j in range(i + 1 - diag, mapping.shape[0]):
if data[pos, 0] > 0.0 and data[pos, 1] > 0.0:
print >> output, "chr%s\t%i\t%i\tchr%s\t%i\t%i\t%f" % (args.chrom, mapping[i, 0],
mapping[i, 1], args.chrom,
mapping[j, 0], mapping[j, 1],
numpy.log2(data[pos, 0] / data[pos, 1]))
pos += 1
else:
for i in range(mapping.shape[0] - 1 + diag):
for pos in range(min(mapping.shape[0] - i - 1 + diag, data.shape[1])):
j = i + pos + 1 - diag
if data[i, pos, 0] > 0.0 and data[i, pos, 1] > 0.0:
print >> output, "chr%s\t%i\t%i\tchr%s\t%i\t%i\t%f" % (args.chrom, mapping[i, 0],
mapping[i, 1], args.chrom,
mapping[j, 0], mapping[j, 1],
numpy.log2(data[i, pos, 0] /
data[i, pos, 1]))
else:
for i in range(mapping1.shape[0]):
for j in range(mapping2.shape[0]):
if data[i, j, 0] > 0.0 and data[i, j, 1] > 0.0:
print >> output, "chr%s\t%i\t%i\tchr%s\t%i\t%i\t%f" % (args.chrom,
mapping1[i, 0], mapping1[i, 1],
args.chrom2, mapping2[j, 0], mapping2[j, 1],
numpy.log2(data[i, j, 0] / data[i, j, 1]))
output.close()
if not args.image is None:
width = max(5.0, (args.stop - args.start) / 1000000.)
if args.datatype == 'enrichment':
symmetricscaling = True
else:
symmetricscaling = False
if 'symmetricscaling' in kwargs:
symmetricscaling = kwargs['symmetricscaling']
if not args.chrom2 is None:
img, minscore, maxscore = plot_full_array(data, returnscale=True, symmetricscaling=symmetricscaling,
silent=args.silent, **kwargs)
offset = 0.0
height = (width / data.shape[0]) * data.shape[1]
elif arraytype == 'compact':
if args.rotate:
img, minscore, maxscore = plot_diagonal_from_compact_array(data, returnscale=True,
symmetricscaling=symmetricscaling, silent=args.silent,
diagonal_included=diag, **kwargs)
offset = width / 2. / (data.shape[0] * 2 - 1 + diag)
height = width / (data.shape[0] * 2.0 - 2) * data.shape[1]
else:
img, minscore, maxscore = plot_compact_array(data, returnscale=True,
symmetricscaling=symmetricscaling, silent=args.silent,
diagonal_included=diag, **kwargs)
offset = 0.0
height = width
else:
if args.rotate:
img, minscore, maxscore = plot_diagonal_from_upper_array(data, returnscale=True,
symmetricscaling=symmetricscaling, silent=args.silent,
diagonal_included=diag, **kwargs)
offset = width / 2. / (mapping.shape[0] * 2 - 1 + diag)
height = width / 2.
else:
img, minscore, maxscore = plot_upper_array(data, returnscale=True,
symmetricscaling=symmetricscaling, silent=args.silent,
diagonal_included=diag, **kwargs)
offset = 0.0
height = width
if args.pdf:
c = canvas.canvas()
if args.chrom2 is None:
c1 = canvas.canvas([canvas.clip(path.rect(0, 0, width, height))])
c1.insert(bitmap.bitmap(-offset, -offset, img, width=width))
else:
c1 = canvas.canvas([canvas.clip(path.rect(0, 0, width, height))])
c1.insert(bitmap.bitmap(-offset, -offset, img, width=width))
c.insert(c1)
if args.ticks and args.binsize > 0:
if args.chrom2 is None:
c.stroke(path.line(0, 0, width, 0))
xmin = (mapping[0, 0] + mapping[0, 1]) / 2
xmax = (mapping[-1, 0] + mapping[-1, 1]) / 2
#order = int(floor(log10(xmax - xmin))) - 1
#step = int(floor((xmax - xmin) / (10.0 ** order))) * 10 ** order
order = int(floor(log10((xmax - xmin) / (width * 2.0))))
step = int(floor((xmax - xmin) / (width * 2.0) / (10.0 ** order))) * 10 ** order
values = numpy.arange(((xmin - 1) / step + 1) * step, (xmax / step) * step + 1, step)
ticks = (values - float(mapping[0, 0] + mapping[0, 1]) / 2) / (mapping[-1, 0] -
mapping[0, 0]) * width
for i in range(values.shape[0]):
c.stroke(path.line(ticks[i], 0, ticks[i], -0.25), [style.linewidth.Thin])
c.text(ticks[i], -0.3, "%0.2e" % values[i],
[text.valign.middle, text.halign.left, text.size(-2), trafo.rotate(-90)])
if not args.rotate:
c.stroke(path.line(width, 0, width, height))
for i in range(values.shape[0]):
c.stroke(path.line(width, height - ticks[i], width + 0.25, height - ticks[i]), [style.linewidth.Thin])
c.text(width + 0.3, height - ticks[i], "%0.2e" % values[i], [text.valign.middle, text.halign.left,
text.size(-2)])
else:
c.stroke(path.line(0, 0, width, 0))
xmin = (mapping1[0, 0] + mapping1[0, 1]) / 2
xmax = (mapping1[-1, 0] + mapping1[-1, 1]) / 2
order = int(floor(log10((xmax - xmin) / (width * 2.0))))
step = int(floor((xmax - xmin) / (width * 2.0) / (10.0 ** order))) * 10 ** order
values = numpy.arange(((xmin - 1) / step + 1) * step, (xmax / step) * step + 1, step)
ticks = (values - float(mapping1[0, 0] + mapping1[0, 1]) / 2) / (mapping1[-1, 0] -
mapping1[0, 0]) * width
for i in range(values.shape[0]):
c.stroke(path.line(ticks[i], 0, ticks[i], -0.25), [style.linewidth.Thin])
c.text(ticks[i], -0.3, "%0.2e" % values[i],
[text.valign.middle, text.halign.left, text.size(-2), trafo.rotate(-90)])
c.stroke(path.line(0, 0, width, 0))
xmin = (mapping2[0, 0] + mapping2[0, 1]) / 2
xmax = (mapping2[-1, 0] + mapping2[-1, 1]) / 2
order = int(floor(log10((xmax - xmin) / (width * 2.0))))
step = int(floor((xmax - xmin) / (width * 2.0) / (10.0 ** order))) * 10 ** order
values = numpy.arange(((xmin - 1) / step + 1) * step, (xmax / step) * step + 1, step)
ticks = (values - float(mapping2[0, 0] + mapping2[0, 1]) / 2) / (mapping2[-1, 0] -
mapping2[0, 0]) * height
for i in range(values.shape[0]):
c.stroke(path.line(width, height - ticks[i], width + 0.25, height - ticks[i]), [style.linewidth.Thin])
c.text(width + 0.3, height - ticks[i], "%0.2e" % values[i],
[text.valign.middle, text.halign.left, text.size(-2)])
if args.legend:
if 'min_color' in kwargs:
min_color = kwargs['min_color']
else:
min_color = "0000ff"
if 'mid_color' in kwargs:
mid_color = kwargs['mid_color']
else:
mid_color = "ffffff"
if 'max_color' in kwargs:
max_color = kwargs['max_color']
else:
max_color = "ff0000"
if 'logged' in kwargs:
logged = kwargs['logged']
else:
logged = True
c.insert(plot_key(min_score=minscore, max_score=maxscore, height=0.25, width=min(5., width),
orientation='top', num_ticks=5, min_color=min_color,
mid_color=mid_color, max_color=max_color,
log_display=False), [trafo.translate(width * 0.5 - min(2.5, width * 0.5), height + 0.25)])
if logged:
label = "Log2 "
else:
label = ""
if args.datatype == 'enrichment':
c.text(width * 0.5, height + 0.8, "%sEnrichment" % label, [text.halign.center, text.valign.bottom,
text.size(-2)])
elif args.datatype == 'raw':
c.text(width * 0.5, height + 0.8, "%sCounts" % label, [text.halign.center, text.valign.bottom,
text.size(-2)])
else:
c.text(width * 0.5, height + 0.8, "%sNormalized Counts" % label,
[text.halign.center, text.valign.bottom, text.size(-2)])
c.writePDFfile(args.image)
if len(args.image.split('.')) <= 1 or args.image.split('.')[-1] != 'pdf':
subprocess.call('mv %s.pdf %s' % (args.image, args.image), shell=True)
else:
img_format = args.image.split('.')[-1].upper()
if img_format not in ['PNG', 'TIF', 'JPG', 'JPEG']:
img_format = 'PNG'
img.save(args.image, img_format)
2
Example 6
def ll2cr(lon_arr, lat_arr, grid_info, fill_in=numpy.nan, inplace=True):
"""Project longitude and latitude points to columns and rows in the specified grid in place
:param lon_arr: Numpy array of longitude floats
:param lat_arr: Numpy array of latitude floats
:param grid_info: dictionary of grid information (see below)
:param fill_in: Fill value for input longitude and latitude arrays and used for output
:returns: tuple(points_in_grid, cols_out, rows_out)
The provided grid info must have the following parameters (optional grids mean dynamic):
- proj4_definition
- cell_width
- cell_height
- width (optional/None)
- height (optional/None)
- origin_x (optional/None)
- origin_y (optional/None)
Steps taken in this function:
1. Convert (lon, lat) points to (X, Y) points in the projection space
2. If grid is missing some parameters (dynamic grid), then fill them in
3. Convert (X, Y) points to (column, row) points in the grid space
"""
p = grid_info["proj4_definition"]
cw = grid_info["cell_width"]
ch = grid_info["cell_height"]
w = grid_info.get("width", None)
h = grid_info.get("height", None)
ox = grid_info.get("origin_x", None)
oy = grid_info.get("origin_y", None)
is_static = None not in [w, h, ox, oy]
# C code requires 64-bit floats and to do in place it must be writeable, otherwise we need to make a copy
is_double = lon_arr.dtype == numpy.float64 and lon_arr.dtype == numpy.float64
is_writeable = lon_arr.flags.writeable and lat_arr.flags.writeable
lon_orig = lon_arr
lat_orig = lat_arr
copy_result = False
# XXX: Check that automatic casting can't be done on the C side
if not is_double or not is_writeable:
LOG.debug("Copying longitude and latitude arrays because inplace processing could not be done")
copy_result = True
lon_arr = lon_arr.astype(numpy.float64)
lat_arr = lat_arr.astype(numpy.float64)
if is_static:
LOG.debug("Running static version of ll2cr...")
points_in_grid = _ll2cr.ll2cr_static(lon_arr, lat_arr, fill_in, p, cw, ch, w, h, ox, oy)
else:
LOG.debug("Running dynamic version of ll2cr...")
results = _ll2cr.ll2cr_dynamic(lon_arr, lat_arr, fill_in, p, cw, ch,
width=w, height=h, origin_x=ox, origin_y=oy)
points_in_grid, lon_arr, lat_arr, origin_x, origin_y, width, height = results
# edit the grid info dictionary in place
grid_info["origin_x"] = origin_x
grid_info["origin_y"] = origin_y
grid_info["width"] = width
grid_info["height"] = height
if copy_result and inplace:
LOG.debug("Copying result arrays back to provided inplace array")
lon_orig[:] = lon_arr[:]
lat_orig[:] = lat_arr[:]
return points_in_grid, lon_orig, lat_orig
2
Example 7
Project: westpa Source File: weed_driver.py
def prepare_new_iteration(self):
n_iter = self.sim_manager.n_iter
we_driver = self.sim_manager.we_driver
if we_driver.target_states and self.do_reweight:
log.warning('equilibrium reweighting requested but target states (sinks) present; reweighting disabled')
return
if not self.do_reweight:
# Reweighting not requested
log.debug('equilibrium reweighting not enabled')
return
with self.data_manager.lock:
weed_global_group = self.data_manager.we_h5file.require_group('weed')
last_reweighting = long(weed_global_group.attrs.get('last_reweighting', 0))
if n_iter - last_reweighting < self.reweight_period:
# Not time to reweight yet
log.debug('not reweighting')
return
else:
log.debug('reweighting')
mapper = we_driver.bin_mapper
bins = we_driver.next_iter_binning
n_bins = len(bins)
# Create storage for ourselves
with self.data_manager.lock:
iter_group = self.data_manager.get_iter_group(n_iter)
try:
del iter_group['weed']
except KeyError:
pass
weed_iter_group = iter_group.create_group('weed')
avg_populations_ds = weed_iter_group.create_dataset('avg_populations', shape=(n_bins,), dtype=numpy.float64)
unc_populations_ds = weed_iter_group.create_dataset('unc_populations', shape=(n_bins,), dtype=numpy.float64)
avg_flux_ds = weed_iter_group.create_dataset('avg_fluxes', shape=(n_bins,n_bins), dtype=numpy.float64)
unc_flux_ds = weed_iter_group.create_dataset('unc_fluxes', shape=(n_bins,n_bins), dtype=numpy.float64)
avg_rates_ds = weed_iter_group.create_dataset('avg_rates', shape=(n_bins,n_bins), dtype=numpy.float64)
unc_rates_ds = weed_iter_group.create_dataset('unc_rates', shape=(n_bins,n_bins), dtype=numpy.float64)
averager = self.get_rates(n_iter, mapper)
with self.data_manager.flushing_lock():
avg_populations_ds[...] = averager.average_populations
unc_populations_ds[...] = averager.stderr_populations
avg_flux_ds[...] = averager.average_flux
unc_flux_ds[...] = averager.stderr_flux
avg_rates_ds[...] = averager.average_rate
unc_rates_ds[...] = averager.stderr_rate
binprobs = numpy.fromiter(imap(operator.attrgetter('weight'),bins), dtype=numpy.float64, count=n_bins)
orig_binprobs = binprobs.copy()
westpa.rc.pstatus('Calculating equilibrium reweighting using window size of {:d}'.format(self.eff_windowsize))
westpa.rc.pstatus('\nBin probabilities prior to reweighting:\n{!s}'.format(binprobs))
westpa.rc.pflush()
probAdjustEquil(binprobs, averager.average_rate, averager.stderr_rate)
# Check to see if reweighting has set non-zero bins to zero probability (should never happen)
assert (~((orig_binprobs > 0) & (binprobs == 0))).all(), 'populated bin reweighted to zero probability'
# Check to see if reweighting has set zero bins to nonzero probability (may happen)
z2nz_mask = (orig_binprobs == 0) & (binprobs > 0)
if (z2nz_mask).any():
westpa.rc.pstatus('Reweighting would assign nonzero probability to an empty bin; not reweighting this iteration.')
westpa.rc.pstatus('Empty bins assigned nonzero probability: {!s}.'
.format(numpy.array_str(numpy.arange(n_bins)[z2nz_mask])))
else:
westpa.rc.pstatus('\nBin populations after reweighting:\n{!s}'.format(binprobs))
for (bin, newprob) in izip(bins, binprobs):
bin.reweight(newprob)
weed_global_group.attrs['last_reweighting'] = n_iter
assert (abs(1 - numpy.fromiter(imap(operator.attrgetter('weight'),bins), dtype=numpy.float64, count=n_bins).sum())
< EPS * numpy.fromiter(imap(len,bins), dtype=numpy.int, count=n_bins).sum())
westpa.rc.pflush()
2
Example 8
Project: golismero Source File: em.py
def cluster_vectorspace(self, vectors, trace=False):
assert len(vectors) > 0
# set the parameters to initial values
dimensions = len(vectors[0])
means = self._means
priors = self._priors
if not priors:
priors = self._priors = numpy.ones(self._num_clusters,
numpy.float64) / self._num_clusters
covariances = self._covariance_matrices
if not covariances:
covariances = self._covariance_matrices = \
[ numpy.identity(dimensions, numpy.float64)
for i in range(self._num_clusters) ]
# do the E and M steps until the likelihood plateaus
lastl = self._loglikelihood(vectors, priors, means, covariances)
converged = False
while not converged:
if trace: print 'iteration; loglikelihood', lastl
# E-step, calculate hidden variables, h[i,j]
h = numpy.zeros((len(vectors), self._num_clusters),
numpy.float64)
for i in range(len(vectors)):
for j in range(self._num_clusters):
h[i,j] = priors[j] * self._gaussian(means[j],
covariances[j], vectors[i])
h[i,:] /= sum(h[i,:])
# M-step, update parameters - cvm, p, mean
for j in range(self._num_clusters):
covariance_before = covariances[j]
new_covariance = numpy.zeros((dimensions, dimensions),
numpy.float64)
new_mean = numpy.zeros(dimensions, numpy.float64)
sum_hj = 0.0
for i in range(len(vectors)):
delta = vectors[i] - means[j]
new_covariance += h[i,j] * \
numpy.multiply.outer(delta, delta)
sum_hj += h[i,j]
new_mean += h[i,j] * vectors[i]
covariances[j] = new_covariance / sum_hj
means[j] = new_mean / sum_hj
priors[j] = sum_hj / len(vectors)
# bias term to stop covariance matrix being singular
covariances[j] += self._bias * \
numpy.identity(dimensions, numpy.float64)
# calculate likelihood - FIXME: may be broken
l = self._loglikelihood(vectors, priors, means, covariances)
# check for convergence
if abs(lastl - l) < self._conv_threshold:
converged = True
lastl = l
2
Example 9
Project: RMG-Py Source File: network.py
def calculateMicrocanonicalRates(self):
"""
Calculate and return arrays containing the microcanonical rate
coefficients :math:`k(E)` for the isomerization, dissociation, and
association path reactions in the network.
"""
T = self.T
Elist = self.Elist
Jlist = self.Jlist
densStates = self.densStates
Ngrains = len(Elist)
Nisom = len(self.isomers)
Nreac = len(self.reactants)
Nprod = len(self.products)
NJ = 1 if self.activeJRotor else len(Jlist)
self.Kij = numpy.zeros([Nisom,Nisom,Ngrains,NJ], numpy.float64)
self.Gnj = numpy.zeros([Nreac+Nprod,Nisom,Ngrains,NJ], numpy.float64)
self.Fim = numpy.zeros([Nisom,Nreac,Ngrains,NJ], numpy.float64)
isomers = [isomer.species[0] for isomer in self.isomers]
reactants = [reactant.species for reactant in self.reactants]
products = [product.species for product in self.products]
for rxn in self.pathReactions:
if rxn.reactants[0] in isomers and rxn.products[0] in isomers:
# Isomerization
reac = isomers.index(rxn.reactants[0])
prod = isomers.index(rxn.products[0])
elif rxn.reactants[0] in isomers and rxn.products in reactants:
# Dissociation (reversible)
reac = isomers.index(rxn.reactants[0])
prod = reactants.index(rxn.products) + Nisom
elif rxn.reactants[0] in isomers and rxn.products in products:
# Dissociation (irreversible)
reac = isomers.index(rxn.reactants[0])
prod = products.index(rxn.products) + Nisom + Nreac
elif rxn.reactants in reactants and rxn.products[0] in isomers:
# Association (reversible)
reac = reactants.index(rxn.reactants) + Nisom
prod = isomers.index(rxn.products[0])
elif rxn.reactants in products and rxn.products[0] in isomers:
# Association (irreversible)
reac = products.index(rxn.reactants) + Nisom + Nreac
prod = isomers.index(rxn.products[0])
else:
raise NetworkError('Unexpected type of path reaction "{0}"'.format(rxn))
# Compute the microcanonical rate coefficient k(E)
reacDensStates = densStates[reac,:,:]
prodDensStates = densStates[prod,:,:]
kf, kr = rxn.calculateMicrocanonicalRateCoefficient(self.Elist, self.Jlist, reacDensStates, prodDensStates, T)
# Check for NaN (just to be safe)
if numpy.isnan(kf).any() or numpy.isnan(kr).any():
raise NetworkError('One or more k(E) values is NaN for path reaction "{0}".'.format(rxn))
# Determine the expected value of the rate coefficient k(T)
if rxn.canTST():
# RRKM theory was used to compute k(E), so use TST to compute k(T)
kf_expected = rxn.calculateTSTRateCoefficient(T)
else:
# ILT was used to compute k(E), so use high-P kinetics to compute k(T)
kf_expected = rxn.kinetics.getRateCoefficient(T)
# Determine the expected value of the equilibrium constant (Kc)
Keq_expected = self.eqRatios[prod] / self.eqRatios[reac]
# Determine the actual values of k(T) and Keq
C0 = 1e5 / (constants.R * T)
kf0 = 0.0; kr0 = 0.0; Qreac = 0.0; Qprod = 0.0
for s in range(NJ):
kf0 += numpy.sum(kf[:,s] * reacDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T))
kr0 += numpy.sum(kr[:,s] * prodDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T))
Qreac += numpy.sum(reacDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T))
Qprod += numpy.sum(prodDensStates[:,s] * (2*Jlist[s]+1) * numpy.exp(-Elist / constants.R / T))
kr0 *= C0 ** (len(rxn.products) - len(rxn.reactants))
Qprod *= C0 ** (len(rxn.products) - len(rxn.reactants))
kf_actual = kf0 / Qreac if Qreac > 0 else 0
kr_actual = kr0 / Qprod if Qprod > 0 else 0
Keq_actual = kf_actual / kr_actual if kr_actual > 0 else 0
error = False; warning = False
k_ratio = 1.0
Keq_ratio = 1.0
# Check that the forward rate coefficient is correct
if kf_actual > 0:
k_ratio = kf_expected / kf_actual
# Rescale kf and kr so that we get kf_expected
kf *= k_ratio
kr *= k_ratio
# Decide if the disagreement warrants a warning or error
if 0.8 < k_ratio < 1.25:
# The difference is probably just due to numerical error
pass
elif 0.5 < k_ratio < 2.0:
# Might be numerical error, but is pretty large, so warn
warning = True
else:
# Disagreement is too large, so raise exception
error = True
# Check that the equilibrium constant is correct
if Keq_actual > 0:
Keq_ratio = Keq_expected / Keq_actual
# Rescale kr so that we get Keq_expected
kr /= Keq_ratio
# In RMG jobs this never represents an error because we are
# missing or using approximate degrees of freedom anyway
if self.rmgmode:
pass
# Decide if the disagreement warrants a warning or error
elif 0.8 < Keq_ratio < 1.25:
# The difference is probably just due to numerical error
pass
elif 0.5 < Keq_ratio < 2.0:
# Might be numerical error, but is pretty large, so warn
warning = True
else:
# Disagreement is too large, so raise exception
error = True
if rxn.reactants[0] in isomers and rxn.products[0] in isomers:
# Isomerization
self.Kij[prod,reac,:,:] = kf
self.Kij[reac,prod,:,:] = kr
elif rxn.reactants[0] in isomers and rxn.products in reactants:
# Dissociation (reversible)
self.Gnj[prod-Nisom,reac,:,:] = kf
self.Fim[reac,prod-Nisom,:,:] = kr
elif rxn.reactants[0] in isomers and rxn.products in products:
# Dissociation (irreversible)
self.Gnj[prod-Nisom,reac,:,:] = kf
elif rxn.reactants in reactants and rxn.products[0] in isomers:
# Association (reversible)
self.Fim[prod,reac-Nisom,:,:] = kf
self.Gnj[reac-Nisom,prod,:,:] = kr
elif rxn.reactants in products and rxn.products[0] in isomers:
# Association (irreversible)
self.Gnj[reac-Nisom,prod,:,:] = kr
else:
raise NetworkError('Unexpected type of path reaction "{0}"'.format(rxn))
# If the k(E) values are invalid (in that they give the wrong
# kf(T) or kr(T) when integrated), then raise an exception
if error or warning:
logging.warning('For path reaction {0!s}:'.format(rxn))
logging.warning(' Expected kf({0:g} K) = {1:g}'.format(T, kf_expected))
logging.warning(' Actual kf({0:g} K) = {1:g}'.format(T, kf_actual))
logging.warning(' Expected Keq({0:g} K) = {1:g}'.format(T, Keq_expected))
logging.warning(' Actual Keq({0:g} K) = {1:g}'.format(T, Keq_actual))
if error:
raise InvalidMicrocanonicalRateError('Invalid k(E) values computed for path reaction "{0}".'.format(rxn), k_ratio, Keq_ratio)
else:
logging.warning('Significant corrections to k(E) to be consistent with high-pressure limit for path reaction "{0}".'.format(rxn))
# import pylab
# for prod in range(Nisom):
# for reac in range(prod):
# pylab.semilogy(self.Elist*0.001, self.Kij[prod,reac,:])
# for prod in range(Nreac+Nprod):
# for reac in range(Nisom):
# pylab.semilogy(self.Elist*0.001, self.Gnj[prod,reac,:])
# pylab.show()
return self.Kij, self.Gnj, self.Fim
2
Example 10
Project: pyscf Source File: numpy_helper.py
def zdot(a, b, alpha=1, c=None, beta=0):
'''Matrix-matrix multiplication for double complex arrays using Gauss's
complex multiplication algorithm
'''
atype = a.dtype
btype = b.dtype
if atype == numpy.float64 and btype == numpy.float64:
if c is None or c.dtype == numpy.float64:
return ddot(a, b, alpha, c, beta)
else:
cr = numpy.asarray(c.real, order='C')
c.real = ddot(a, b, alpha, cr, beta)
return c
if atype == numpy.float64 and btype == numpy.complex128:
br = numpy.asarray(b.real, order='C')
bi = numpy.asarray(b.imag, order='C')
cr = ddot(a, br, alpha)
ci = ddot(a, bi, alpha)
ab = cr + ci*1j
elif atype == numpy.complex128 and btype == numpy.float64:
ar = numpy.asarray(a.real, order='C')
ai = numpy.asarray(a.imag, order='C')
cr = ddot(ar, b, alpha)
ci = ddot(ai, b, alpha)
ab = cr + ci*1j
elif atype == numpy.complex128 and btype == numpy.complex128:
k1 = ddot(a.real+a.imag, b.real.copy(), alpha)
k2 = ddot(a.real.copy(), b.imag-b.real, alpha)
k3 = ddot(a.imag.copy(), b.real+b.imag, alpha)
ab = k1-k3 + (k1+k2)*1j
else:
ab = numpy.dot(a, b) * alpha
if c is None:
c = ab
else:
if beta == 0:
c[:] = 0
else:
c *= beta
c += ab
return c
2
Example 11
Project: pymbar Source File: harmonic-oscillators.py
def generate_pmf_data(ndim=1, nbinsperdim=15, nsamples = 1000, K0=20.0, Ku = 100.0, gridscale=0.2, xrange = [[-3,3]]):
x0 = numpy.zeros([ndim], numpy.float64) # center of base potential
numbrellas = 1
nperdim = numpy.zeros([ndim],int)
for d in range(ndim):
nperdim[d] = xrange[d][1] - xrange[d][0] + 1
numbrellas *= nperdim[d]
print "There are a total of %d umbrellas." % numbrellas
# Enumerate umbrella centers, and compute the analytical free energy of that umbrella
print "Constructing umbrellas..."
ksum = (Ku+K0)/beta
kprod = (Ku*K0)/(beta*beta)
f_k_analytical = numpy.zeros(numbrellas, numpy.float64);
xu_i = numpy.zeros([numbrellas, ndim], numpy.float64) # xu_i[i,:] is the center of umbrella i
dp = numpy.zeros(ndim,int)
dp[0] = 1
for d in range(1,ndim):
dp[d] = nperdim[d]*dp[d-1]
umbrella_zero = 0
for i in range(numbrellas):
center = []
for d in range(ndim):
val = gridscale*((i/dp[d]) % nperdim[d] + xrange[d][0])
center.append(val)
center = numpy.array(center)
xu_i[i,:] = center
mu2 = numpy.dot(center,center)
f_k_analytical[i] = numpy.log((ndim*numpy.pi/ksum)**(3/2) *numpy.exp(-kprod*mu2/(2*ksum)))
if numpy.all(center==0.0): # assumes that we have one state that is at the zero.
umbrella_zero = i
i += 1
f_k_analytical -= f_k_analytical[umbrella_zero]
print "Generating %d samples for each of %d umbrellas..." % (nsamples, numbrellas)
x_n = numpy.zeros([numbrellas * nsamples, ndim], numpy.float64)
for i in range(numbrellas):
for dim in range(ndim):
# Compute mu and sigma for this dimension for sampling from V0(x) + Vu(x).
# Product of Gaussians: N(x ; a, A) N(x ; b, B) = N(a ; b , A+B) x N(x ; c, C) where
# C = 1/(1/A + 1/B)
# c = C(a/A+b/B)
# A = 1/K0, B = 1/Ku
sigma = 1.0 / (K0 + Ku)
mu = sigma * (x0[dim]*K0 + xu_i[i,dim]*Ku)
# Generate normal deviates for this dimension.
x_n[i*nsamples:(i+1)*nsamples,dim] = numpy.random.normal(mu, numpy.sqrt(sigma), [nsamples])
u_kn = numpy.zeros([numbrellas, nsamples*numbrellas], numpy.float64)
# Compute reduced potential due to V0.
u_n = beta*(K0/2)*numpy.sum((x_n[:,:] - x0)**2, axis=1)
for k in range(numbrellas):
uu = beta*(Ku/2)*numpy.sum((x_n[:,:] - xu_i[k,:])**2, axis=1) # reduced potential due to umbrella k
u_kn[k,:] = u_n + uu
return u_kn, u_n, x_n, f_k_analytical
2
Example 12
Project: hedge Source File: diff_shared_fld.py
@memoize_method
def get_kernel(self, diff_op, elgroup, for_benchmark=False):
from cgen import \
Pointer, POD, Value, ArrayOf, Const, \
Module, FunctionDeclaration, FunctionBody, Block, \
Comment, Line, Define, Include, \
Initializer, If, For, Statement, Assign
from pycuda.tools import dtype_to_ctype
from cgen.cuda import CudaShared, CudaGlobal
discr = self.discr
d = discr.dimensions
dims = range(d)
plan = self.plan
given = plan.given
elgroup, = discr.element_groups
float_type = given.float_type
f_decl = CudaGlobal(FunctionDeclaration(Value("void", "apply_diff_mat_smem"),
[Pointer(POD(float_type, "debugbuf")), Pointer(POD(float_type, "field")), ]
+ [Pointer(POD(float_type, "drst%d_global" % i)) for i in dims]
))
par = plan.parallelism
cmod = Module([
Include("pycuda-helpers.hpp"),
])
if float_type == numpy.float64:
cmod.append(Value("texture<fp_tex_double, 1, cudaReadModeElementType>",
"diff_rst_mat_tex"))
elif float_type == numpy.float32:
rst_channels = given.devdata.make_valid_tex_channel_count(d)
cmod.append(Value("texture<float%d, 1, cudaReadModeElementType>"
% rst_channels, "diff_rst_mat_tex"))
else:
raise ValueError("unsupported float type: %s" % float_type)
# only preimage size variation is supported here
assert plan.image_dofs_per_el == given.dofs_per_el()
assert plan.aligned_image_dofs_per_microblock == given.microblock.aligned_floats
# FIXME: aligned_image_dofs_per_microblock must be divisible
# by this, therefore hardcoding for now.
chunk_size = 16
cmod.extend([
Line(),
Define("DIMENSIONS", discr.dimensions),
Define("IMAGE_DOFS_PER_EL", plan.image_dofs_per_el),
Define("PREIMAGE_DOFS_PER_EL", plan.preimage_dofs_per_el),
Define("ALIGNED_IMAGE_DOFS_PER_MB", plan.aligned_image_dofs_per_microblock),
Define("ALIGNED_PREIMAGE_DOFS_PER_MB", plan.aligned_preimage_dofs_per_microblock),
Define("ELS_PER_MB", given.microblock.elements),
Define("IMAGE_DOFS_PER_MB", "(IMAGE_DOFS_PER_EL*ELS_PER_MB)"),
Line(),
Define("CHUNK_SIZE", chunk_size),
Define("CHUNK_DOF", "threadIdx.x"),
Define("PAR_MB_NR", "threadIdx.y"),
Define("CHUNK_NR", "threadIdx.z"),
Define("IMAGE_MB_DOF", "(CHUNK_NR*CHUNK_SIZE+CHUNK_DOF)"),
Define("IMAGE_EL_DOF", "(IMAGE_MB_DOF - mb_el*IMAGE_DOFS_PER_EL)"),
Line(),
Define("MACROBLOCK_NR", "blockIdx.x"),
Line(),
Define("PAR_MB_COUNT", par.parallel),
Define("INLINE_MB_COUNT", par.inline),
Define("SEQ_MB_COUNT", par.serial),
Line(),
Define("GLOBAL_MB_NR_BASE",
"(MACROBLOCK_NR*PAR_MB_COUNT*INLINE_MB_COUNT*SEQ_MB_COUNT)"),
Define("GLOBAL_MB_NR",
"(GLOBAL_MB_NR_BASE"
"+ (seq_mb_number*PAR_MB_COUNT + PAR_MB_NR)*INLINE_MB_COUNT)"),
Define("GLOBAL_MB_IMAGE_DOF_BASE", "(GLOBAL_MB_NR*ALIGNED_IMAGE_DOFS_PER_MB)"),
Define("GLOBAL_MB_PREIMAGE_DOF_BASE", "(GLOBAL_MB_NR*ALIGNED_PREIMAGE_DOFS_PER_MB)"),
Line(),
CudaShared(
ArrayOf(
ArrayOf(
ArrayOf(
POD(float_type, "smem_field"),
"PAR_MB_COUNT"),
"INLINE_MB_COUNT"),
"ALIGNED_PREIMAGE_DOFS_PER_MB")),
Line(),
])
S = Statement
f_body = Block([
Initializer(Const(POD(numpy.uint16, "mb_el")),
"IMAGE_MB_DOF / IMAGE_DOFS_PER_EL"),
Line(),
])
# ---------------------------------------------------------------------
def get_load_code():
mb_img_dofs = plan.aligned_image_dofs_per_microblock
mb_preimg_dofs = plan.aligned_preimage_dofs_per_microblock
preimg_dofs_over_dofs = (mb_preimg_dofs+mb_img_dofs-1) // mb_img_dofs
load_code = []
store_code = []
var_num = 0
for load_block in range(preimg_dofs_over_dofs):
for inl in range(par.inline):
# load and store are split for better pipelining
# compiler can't figure that out because of branch
var = "tmp%d" % var_num
var_num += 1
load_code.append(POD(float_type, var))
block_addr = "%d * ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF" % load_block
load_instr = Assign(var,
"field[GLOBAL_MB_PREIMAGE_DOF_BASE"
" + %d*ALIGNED_PREIMAGE_DOFS_PER_MB"
" + %s]" % (inl, block_addr))
store_instr = Assign(
"smem_field[PAR_MB_NR][%d][%s]" % (inl, block_addr),
var
)
if (load_block+1)*mb_img_dofs >= mb_preimg_dofs:
cond = "%s < ALIGNED_PREIMAGE_DOFS_PER_MB" % block_addr
load_instr = If(cond, load_instr)
store_instr = If(cond, store_instr)
load_code.append(load_instr)
store_code.append(store_instr)
return Block(load_code + [Line()] + store_code)
def get_scalar_diff_code():
code = []
for inl in range(par.inline):
for axis in dims:
code.append(
Initializer(POD(float_type, "d%drst%d" % (inl, axis)), 0))
code.append(Line())
tex_channels = ["x", "y", "z", "w"]
store_code = Block()
for inl in range(par.inline):
for rst_axis in dims:
store_code.append(Assign(
"drst%d_global[GLOBAL_MB_IMAGE_DOF_BASE + "
"%d*ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF]"
% (rst_axis, inl),
"d%drst%d" % (inl, rst_axis)
))
from hedge.backends.cuda.tools import unroll
code.extend([
Comment("everybody needs to be done with the old data"),
S("__syncthreads()"),
Line(),
get_load_code(),
Line(),
Comment("all the new data must be loaded"),
S("__syncthreads()"),
Line(),
])
if float_type == numpy.float32:
code.append(Value("float%d" % rst_channels, "dmat_entries"))
code.extend([
POD(float_type, "field_value%d" % inl)
for inl in range(par.inline)
]+[Line()])
def unroll_body(j):
result = [
Assign("field_value%d" % inl,
"smem_field[PAR_MB_NR][%d][mb_el*PREIMAGE_DOFS_PER_EL+%s]" % (inl, j))
for inl in range(par.inline)
]
if float_type == numpy.float32:
result.append(Assign("dmat_entries",
"tex1Dfetch(diff_rst_mat_tex, IMAGE_EL_DOF + %s*IMAGE_DOFS_PER_EL)" % j))
result.extend(
S("d%drst%d += dmat_entries.%s * field_value%d"
% (inl, axis, tex_channels[axis], inl))
for inl in range(par.inline)
for axis in dims)
elif float_type == numpy.float64:
result.extend(
S("d%(inl)drst%(axis)d += "
"fp_tex1Dfetch(diff_rst_mat_tex, %(axis)d "
"+ DIMENSIONS*(IMAGE_EL_DOF + %(j)d*IMAGE_DOFS_PER_EL))"
"* field_value%(inl)d" % {
"inl": inl,
"axis": axis,
"j": j
})
for inl in range(par.inline)
for axis in dims)
else:
assert False
return result
code.append(If("IMAGE_MB_DOF < IMAGE_DOFS_PER_MB", Block(unroll(unroll_body,
total_number=plan.preimage_dofs_per_el)
+[store_code])))
return code
f_body.extend([
For("unsigned short seq_mb_number = 0",
"seq_mb_number < SEQ_MB_COUNT",
"++seq_mb_number",
Block(get_scalar_diff_code())
)
])
# finish off ----------------------------------------------------------
cmod.append(FunctionBody(f_decl, f_body))
if not for_benchmark and "cuda_dump_kernels" in discr.debug:
from hedge.tools import open_unique_debug_file
open_unique_debug_file("diff", ".cu").write(str(cmod))
mod = SourceModule(cmod,
keep="cuda_keep_kernels" in discr.debug,
#options=["--maxrregcount=16"]
)
func = mod.get_function("apply_diff_mat_smem")
if "cuda_diff" in discr.debug:
print "diff: lmem=%d smem=%d regs=%d" % (
func.local_size_bytes,
func.shared_size_bytes,
func.registers)
diff_rst_mat_texref = mod.get_texref("diff_rst_mat_tex")
gpu_diffmats = self.gpu_diffmats(diff_op, elgroup)
if given.float_type == numpy.float32:
gpu_diffmats.bind_to_texref_ext(diff_rst_mat_texref, rst_channels)
elif given.float_type == numpy.float64:
gpu_diffmats.bind_to_texref_ext(diff_rst_mat_texref,
allow_double_hack=True)
else:
assert False
assert given.microblock.aligned_floats % chunk_size == 0
block = (
chunk_size,
plan.parallelism.parallel,
given.microblock.aligned_floats//chunk_size)
func.prepare(
["PP"] + discr.dimensions*["P"],
texrefs=[diff_rst_mat_texref])
return block, func
1
Example 13
Project: scipy Source File: _linprog.py
def _solve_simplex(T, n, basis, maxiter=1000, phase=2, callback=None,
tol=1.0E-12, nit0=0, bland=False):
"""
Solve a linear programming problem in "standard maximization form" using
the Simplex Method.
Minimize :math:`f = c^T x`
subject to
.. math::
Ax = b
x_i >= 0
b_j >= 0
Parameters
----------
T : array_like
A 2-D array representing the simplex T corresponding to the
maximization problem. It should have the form:
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
.
.
.
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
[c[0], c[1], ..., c[n_total], 0]]
for a Phase 2 problem, or the form:
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
.
.
.
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
[c[0], c[1], ..., c[n_total], 0],
[c'[0], c'[1], ..., c'[n_total], 0]]
for a Phase 1 problem (a Problem in which a basic feasible solution is
sought prior to maximizing the actual objective. T is modified in
place by _solve_simplex.
n : int
The number of true variables in the problem.
basis : array
An array of the indices of the basic variables, such that basis[i]
contains the column corresponding to the basic variable for row i.
Basis is modified in place by _solve_simplex
maxiter : int
The maximum number of iterations to perform before aborting the
optimization.
phase : int
The phase of the optimization being executed. In phase 1 a basic
feasible solution is sought and the T has an additional row representing
an alternate objective function.
callback : callable, optional
If a callback function is provided, it will be called within each
iteration of the simplex algorithm. The callback must have the
signature `callback(xk, **kwargs)` where xk is the current solution
vector and kwargs is a dictionary containing the following::
"T" : The current Simplex algorithm T
"nit" : The current iteration.
"pivot" : The pivot (row, column) used for the next iteration.
"phase" : Whether the algorithm is in Phase 1 or Phase 2.
"basis" : The indices of the columns of the basic variables.
tol : float
The tolerance which determines when a solution is "close enough" to
zero in Phase 1 to be considered a basic feasible solution or close
enough to positive to to serve as an optimal solution.
nit0 : int
The initial iteration number used to keep an accurate iteration total
in a two-phase problem.
bland : bool
If True, choose pivots using Bland's rule [3]. In problems which
fail to converge due to cycling, using Bland's rule can provide
convergence at the expense of a less optimal path about the simplex.
Returns
-------
res : OptimizeResult
The optimization result represented as a ``OptimizeResult`` object.
Important attributes are: ``x`` the solution array, ``success`` a
Boolean flag indicating if the optimizer exited successfully and
``message`` which describes the cause of the termination. Possible
values for the ``status`` attribute are:
0 : Optimization terminated successfully
1 : Iteration limit reached
2 : Problem appears to be infeasible
3 : Problem appears to be unbounded
See `OptimizeResult` for a description of other attributes.
"""
nit = nit0
complete = False
if phase == 1:
m = T.shape[0]-2
elif phase == 2:
m = T.shape[0]-1
else:
raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")
if phase == 2:
# Check if any artificial variables are still in the basis.
# If yes, check if any coefficients from this row and a column
# corresponding to one of the non-artificial variable is non-zero.
# If found, pivot at this term. If not, start phase 2.
# Do this for all artificial variables in the basis.
# Ref: "An Introduction to Linear Programming and Game Theory"
# by Paul R. Thie, Gerard E. Keough, 3rd Ed,
# Chapter 3.7 Redundant Systems (pag 102)
for pivrow in [row for row in range(basis.size)
if basis[row] > T.shape[1] - 2]:
non_zero_row = [col for col in range(T.shape[1] - 1)
if T[pivrow, col] != 0]
if len(non_zero_row) > 0:
pivcol = non_zero_row[0]
# variable represented by pivcol enters
# variable in basis[pivrow] leaves
basis[pivrow] = pivcol
pivval = T[pivrow][pivcol]
T[pivrow, :] = T[pivrow, :] / pivval
for irow in range(T.shape[0]):
if irow != pivrow:
T[irow, :] = T[irow, :] - T[pivrow, :]*T[irow, pivcol]
nit += 1
if len(basis[:m]) == 0:
solution = np.zeros(T.shape[1] - 1, dtype=np.float64)
else:
solution = np.zeros(max(T.shape[1] - 1, max(basis[:m]) + 1),
dtype=np.float64)
while not complete:
# Find the pivot column
pivcol_found, pivcol = _pivot_col(T, tol, bland)
if not pivcol_found:
pivcol = np.nan
pivrow = np.nan
status = 0
complete = True
else:
# Find the pivot row
pivrow_found, pivrow = _pivot_row(T, pivcol, phase, tol)
if not pivrow_found:
status = 3
complete = True
if callback is not None:
solution[:] = 0
solution[basis[:m]] = T[:m, -1]
callback(solution[:n], **{"tableau": T,
"phase":phase,
"nit":nit,
"pivot":(pivrow, pivcol),
"basis":basis,
"complete": complete and phase == 2})
if not complete:
if nit >= maxiter:
# Iteration limit exceeded
status = 1
complete = True
else:
# variable represented by pivcol enters
# variable in basis[pivrow] leaves
basis[pivrow] = pivcol
pivval = T[pivrow][pivcol]
T[pivrow, :] = T[pivrow, :] / pivval
for irow in range(T.shape[0]):
if irow != pivrow:
T[irow, :] = T[irow, :] - T[pivrow, :]*T[irow, pivcol]
nit += 1
return nit, status
0
Example 14
Project: scipy Source File: cobyla.py
def _minimize_cobyla(fun, x0, args=(), constraints=(),
rhobeg=1.0, tol=1e-4, iprint=1, maxiter=1000,
disp=False, catol=2e-4, **unknown_options):
"""
Minimize a scalar function of one or more variables using the
Constrained Optimization BY Linear Approximation (COBYLA) algorithm.
Options
-------
rhobeg : float
Reasonable initial changes to the variables.
tol : float
Final accuracy in the optimization (not precisely guaranteed).
This is a lower bound on the size of the trust region.
disp : bool
Set to True to print convergence messages. If False,
`verbosity` is ignored as set to 0.
maxiter : int
Maximum number of function evaluations.
catol : float
Tolerance (absolute) for constraint violations
"""
_check_unknown_options(unknown_options)
maxfun = maxiter
rhoend = tol
if not disp:
iprint = 0
# check constraints
if isinstance(constraints, dict):
constraints = (constraints, )
for ic, con in enumerate(constraints):
# check type
try:
ctype = con['type'].lower()
except KeyError:
raise KeyError('Constraint %d has no type defined.' % ic)
except TypeError:
raise TypeError('Constraints must be defined using a '
'dictionary.')
except AttributeError:
raise TypeError("Constraint's type must be a string.")
else:
if ctype != 'ineq':
raise ValueError("Constraints of type '%s' not handled by "
"COBYLA." % con['type'])
# check function
if 'fun' not in con:
raise KeyError('Constraint %d has no function defined.' % ic)
# check extra arguments
if 'args' not in con:
con['args'] = ()
# m is the total number of constraint values
# it takes into account that some constraints may be vector-valued
cons_lengths = []
for c in constraints:
f = c['fun'](x0, *c['args'])
try:
cons_length = len(f)
except TypeError:
cons_length = 1
cons_lengths.append(cons_length)
m = sum(cons_lengths)
def calcfc(x, con):
f = fun(x, *args)
i = 0
for size, c in izip(cons_lengths, constraints):
con[i: i + size] = c['fun'](x, *c['args'])
i += size
return f
info = np.zeros(4, np.float64)
xopt, info = _cobyla.minimize(calcfc, m=m, x=np.copy(x0), rhobeg=rhobeg,
rhoend=rhoend, iprint=iprint, maxfun=maxfun,
dinfo=info)
if info[3] > catol:
# Check constraint violation
info[0] = 4
return OptimizeResult(x=xopt,
status=int(info[0]),
success=info[0] == 1,
message={1: 'Optimization terminated successfully.',
2: 'Maximum number of function evaluations has '
'been exceeded.',
3: 'Rounding errors are becoming damaging in '
'COBYLA subroutine.',
4: 'Did not converge to a solution satisfying '
'the constraints. See `maxcv` for magnitude '
'of violation.'
}.get(info[0], 'Unknown exit status.'),
nfev=int(info[1]),
fun=info[2],
maxcv=info[3])
0
Example 15
Project: mpop Source File: geo_image.py
def geotiff_save(self, filename, compression=6,
tags=None, gdal_options=None,
blocksize=0, geotransform=None,
spatialref=None, floating_point=False,
writer_options=None):
"""Save the image to the given *filename* in geotiff_ format, with the
*compression* level in [0, 9]. 0 means not compressed. The *tags*
argument is a dict of tags to include in the image (as metadata). By
default it uses the 'area' instance to generate geotransform and
spatialref information, this can be overwritten by the arguments
*geotransform* and *spatialref*. *floating_point* allows the saving of
'L' mode images in floating point format if set to True.
When argument *writer_options* is not none and entry 'fill_value_subst'
is included, its numeric value will be used to substitute image data
that would be equal to the fill_value (used to replace masked data).
.. _geotiff: http://trac.osgeo.org/geotiff/
"""
from osgeo import gdal, osr
raster = gdal.GetDriverByName("GTiff")
tags = tags or {}
writer_options = writer_options or {}
if floating_point:
if self.mode != "L":
raise ValueError("Image must be in 'L' mode for floating point"
" geotif saving")
if self.fill_value is None:
logger.warning("Image with floats cannot be transparent, "
"so setting fill_value to 0")
self.fill_value = 0
channels = [self.channels[0].astype(np.float64)]
fill_value = self.fill_value or [0]
gformat = gdal.GDT_Float64
opacity = 0
else:
nbits = int(tags.get("NBITS", "8"))
if nbits > 16:
dtype = np.uint32
gformat = gdal.GDT_UInt32
elif nbits > 8:
dtype = np.uint16
gformat = gdal.GDT_UInt16
else:
dtype = np.uint8
gformat = gdal.GDT_Byte
opacity = np.iinfo(dtype).max
channels, fill_value = self._finalize(dtype)
fill_value_subst = writer_options.get(
write_opts.WR_OPT_FILL_VALUE_SUBST, None)
if fill_value is not None and fill_value_subst is not None:
for i, chan in enumerate(channels):
np.place(chan, chan == fill_value[i], int(fill_value_subst))
logger.debug("Saving to GeoTiff.")
if tags is not None:
self.tags.update(tags)
if gdal_options is not None:
self.gdal_options.update(gdal_options)
g_opts = ["=".join(i) for i in self.gdal_options.items()]
if compression != 0:
g_opts.append("COMPRESS=DEFLATE")
g_opts.append("ZLEVEL=" + str(compression))
if blocksize != 0:
g_opts.append("TILED=YES")
g_opts.append("BLOCKXSIZE=" + str(blocksize))
g_opts.append("BLOCKYSIZE=" + str(blocksize))
if(self.mode == "L"):
ensure_dir(filename)
if fill_value is not None:
dst_ds = raster.Create(filename,
self.width,
self.height,
1,
gformat,
g_opts)
else:
g_opts.append("ALPHA=YES")
dst_ds = raster.Create(filename,
self.width,
self.height,
2,
gformat,
g_opts)
self._gdal_write_channels(dst_ds, channels,
opacity, fill_value)
elif(self.mode == "LA"):
ensure_dir(filename)
g_opts.append("ALPHA=YES")
dst_ds = raster.Create(filename,
self.width,
self.height,
2,
gformat,
g_opts)
self._gdal_write_channels(dst_ds,
channels[:-1], channels[1],
fill_value)
elif(self.mode == "RGB"):
ensure_dir(filename)
if fill_value is not None:
dst_ds = raster.Create(filename,
self.width,
self.height,
3,
gformat,
g_opts)
else:
g_opts.append("ALPHA=YES")
dst_ds = raster.Create(filename,
self.width,
self.height,
4,
gformat,
g_opts)
self._gdal_write_channels(dst_ds, channels,
opacity, fill_value)
elif(self.mode == "RGBA"):
ensure_dir(filename)
g_opts.append("ALPHA=YES")
dst_ds = raster.Create(filename,
self.width,
self.height,
4,
gformat,
g_opts)
self._gdal_write_channels(dst_ds,
channels[:-1], channels[3],
fill_value)
else:
raise NotImplementedError("Saving to GeoTIFF using image mode"
" %s is not implemented." % self.mode)
# Create raster GeoTransform based on upper left corner and pixel
# resolution ... if not overwritten by argument geotransform.
if geotransform:
dst_ds.SetGeoTransform(geotransform)
if spatialref:
if not isinstance(spatialref, str):
spatialref = spatialref.ExportToWkt()
dst_ds.SetProjection(spatialref)
else:
from pyresample import utils
from mpop.projector import get_area_def
try:
area = get_area_def(self.area)
except (utils.AreaNotFound, AttributeError):
area = self.area
try:
adfgeotransform = [area.area_extent[0], area.pixel_size_x, 0,
area.area_extent[3], 0, -area.pixel_size_y]
dst_ds.SetGeoTransform(adfgeotransform)
srs = osr.SpatialReference()
srs.ImportFromProj4(area.proj4_string)
srs.SetProjCS(area.proj_id)
try:
srs.SetWellKnownGeogCS(area.proj_dict['ellps'])
except KeyError:
pass
try:
# Check for epsg code.
srs.ImportFromEPSG(int(area.proj_dict['init'].
lower().split('epsg:')[1]))
except (KeyError, IndexError):
pass
srs = srs.ExportToWkt()
dst_ds.SetProjection(srs)
except AttributeError:
logger.exception("Could not load geographic data, invalid area")
self.tags.update({'TIFFTAG_DATETIME':
self.time_slot.strftime("%Y:%m:%d %H:%M:%S")})
dst_ds.SetMetadata(self.tags, '')
# Close the dataset
dst_ds = None
0
Example 16
Project: cartopy Source File: test_img_nest.py
def requires_wmts_data(function):
"""
A decorator which ensures that the WMTS data is available for
use in testing.
"""
aerial = cimgt.MapQuestOpenAerial()
# get web tiles upto 3 zoom levels deep
tiles = [(0, 0, 0)]
for tile in aerial.subtiles((0, 0, 0)):
tiles.append(tile)
for tile in tiles[1:]:
for sub_tile in aerial.subtiles(tile):
tiles.append(sub_tile)
fname_template = os.path.join(_TEST_DATA_DIR, 'z_{}', 'x_{}_y_{}.png')
if not os.path.isdir(_TEST_DATA_DIR):
os.makedirs(_TEST_DATA_DIR)
data_version_fname = os.path.join(_TEST_DATA_DIR, 'version.txt')
test_data_version = None
try:
with open(data_version_fname, 'r') as fh:
test_data_version = int(fh.read().strip())
except IOError:
pass
finally:
if test_data_version != _TEST_DATA_VERSION:
warnings.warn('WMTS test data is out of date, regenerating at '
'{}.'.format(_TEST_DATA_DIR))
shutil.rmtree(_TEST_DATA_DIR)
os.makedirs(_TEST_DATA_DIR)
with open(data_version_fname, 'w') as fh:
fh.write(str(_TEST_DATA_VERSION))
# Download the tiles.
for tile in tiles:
x, y, z = tile
fname = fname_template.format(z, x, y)
if not os.path.exists(fname):
if not os.path.isdir(os.path.dirname(fname)):
os.makedirs(os.path.dirname(fname))
img, extent, _ = aerial.get_image(tile)
nx, ny = 256, 256
x_rng = extent[1] - extent[0]
y_rng = extent[3] - extent[2]
pix_size_x = x_rng / nx
pix_size_y = y_rng / ny
upper_left_center = (extent[0] + pix_size_x / 2,
extent[2] + pix_size_y / 2)
pgw_fname = fname[:-4] + '.pgw'
pgw_keys = {'x_pix_size': np.float64(pix_size_x),
'y_rotation': 0,
'x_rotation': 0,
'y_pix_size': np.float64(pix_size_y),
'x_center': np.float64(upper_left_center[0]),
'y_center': np.float64(upper_left_center[1]),
}
_save_world(pgw_fname, pgw_keys)
img.save(fname)
global _TEST_DATA_AVAILABLE
_TEST_DATA_AVAILABLE = True
return function
0
Example 17
Project: sfepy Source File: time_poisson_interactive.py
def main():
from sfepy import data_dir
parser = ArgumentParser(description=__doc__,
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version', version='%(prog)s')
parser.add_argument('--diffusivity', metavar='float', type=float,
action='store', dest='diffusivity',
default=1e-5, help=helps['diffusivity'])
parser.add_argument('--ic-max', metavar='float', type=float,
action='store', dest='ic_max',
default=2.0, help=helps['ic_max'])
parser.add_argument('--order', metavar='int', type=int,
action='store', dest='order',
default=2, help=helps['order'])
parser.add_argument('-r', '--refine', metavar='int', type=int,
action='store', dest='refine',
default=0, help=helps['refine'])
parser.add_argument('-p', '--probe',
action="store_true", dest='probe',
default=False, help=helps['probe'])
parser.add_argument('-s', '--show',
action="store_true", dest='show',
default=False, help=helps['show'])
options = parser.parse_args()
assert_((0 < options.order),
'temperature approximation order must be at least 1!')
output('using values:')
output(' diffusivity:', options.diffusivity)
output(' max. IC value:', options.ic_max)
output('uniform mesh refinement level:', options.refine)
mesh = Mesh.from_file(data_dir + '/meshes/3d/cylinder.mesh')
domain = FEDomain('domain', mesh)
if options.refine > 0:
for ii in range(options.refine):
output('refine %d...' % ii)
domain = domain.refine()
output('... %d nodes %d elements'
% (domain.shape.n_nod, domain.shape.n_el))
omega = domain.create_region('Omega', 'all')
left = domain.create_region('Left',
'vertices in x < 0.00001', 'facet')
right = domain.create_region('Right',
'vertices in x > 0.099999', 'facet')
field = Field.from_args('fu', nm.float64, 'scalar', omega,
approx_order=options.order)
T = FieldVariable('T', 'unknown', field, history=1)
s = FieldVariable('s', 'test', field, primary_var_name='T')
m = Material('m', diffusivity=options.diffusivity * nm.eye(3))
integral = Integral('i', order=2*options.order)
t1 = Term.new('dw_diffusion(m.diffusivity, s, T)',
integral, omega, m=m, s=s, T=T)
t2 = Term.new('dw_volume_dot(s, dT/dt)',
integral, omega, s=s, T=T)
eq = Equation('balance', t1 + t2)
eqs = Equations([eq])
# Boundary conditions.
ebc1 = EssentialBC('T1', left, {'T.0' : 2.0})
ebc2 = EssentialBC('T2', right, {'T.0' : -2.0})
# Initial conditions.
def get_ic(coors, ic):
x, y, z = coors.T
return 2 - 40.0 * x + options.ic_max * nm.sin(4 * nm.pi * x / 0.1)
ic_fun = Function('ic_fun', get_ic)
ic = InitialCondition('ic', omega, {'T.0' : ic_fun})
ls = ScipyDirect({})
nls_status = IndexedStruct()
nls = Newton({'is_linear' : True}, lin_solver=ls, status=nls_status)
pb = Problem('heat', equations=eqs, nls=nls, ls=ls)
pb.set_bcs(ebcs=Conditions([ebc1, ebc2]))
pb.set_ics(Conditions([ic]))
tss = SimpleTimeSteppingSolver({'t0' : 0.0, 't1' : 100.0, 'n_step' : 11},
problem=pb)
tss.init_time()
if options.probe:
# Prepare probe data.
probes, labels = gen_lines(pb)
ev = pb.evaluate
order = 2 * (options.order - 1)
gfield = Field.from_args('gu', nm.float64, 'vector', omega,
approx_order=options.order - 1)
dvel = FieldVariable('dvel', 'parameter', gfield,
primary_var_name='(set-to-None)')
cfield = Field.from_args('gu', nm.float64, 'scalar', omega,
approx_order=options.order - 1)
component = FieldVariable('component', 'parameter', cfield,
primary_var_name='(set-to-None)')
nls_options = {'eps_a' : 1e-16, 'i_max' : 1}
if options.show:
plt.ion()
# Solve the problem using the time stepping solver.
suffix = tss.ts.suffix
for step, time, state in tss():
if options.probe:
# Probe the solution.
dvel_qp = ev('ev_diffusion_velocity.%d.Omega(m.diffusivity, T)'
% order, copy_materials=False, mode='qp')
project_by_component(dvel, dvel_qp, component, order,
nls_options=nls_options)
all_results = []
for ii, probe in enumerate(probes):
fig, results = probe_results(ii, T, dvel, probe, labels[ii])
all_results.append(results)
plt.tight_layout()
fig.savefig('time_poisson_interactive_probe_%s.png'
% (suffix % step), bbox_inches='tight')
if options.show:
plt.draw()
for ii, results in enumerate(all_results):
output('probe %d (%s):' % (ii, probes[ii].name))
output.level += 2
for key, res in ordered_iteritems(results):
output(key + ':')
val = res[1]
output(' min: %+.2e, mean: %+.2e, max: %+.2e'
% (val.min(), val.mean(), val.max()))
output.level -= 2
0
Example 18
Project: pyorbital Source File: orbital.py
def get_next_passes(self, utc_time, length, lon, lat, alt, tol=0.001, horizon=0):
"""Calculate passes for the next hours for a given start time and a
given observer.
Original by Martin.
utc_time: Observation time (datetime object)
length: Number of hours to find passes (int)
lon: Longitude of observer position on ground (float)
lat: Latitude of observer position on ground (float)
alt: Altitude above sea-level (geoid) of observer position on ground (float)
tol: precision of the result in seconds
horizon: the elevation of horizon to compute risetime and falltime.
Return: [(rise-time, fall-time, max-elevation-time), ...]
"""
def elevation(minutes):
"""elevation
"""
return self.get_observer_look(utc_time +
timedelta(
minutes=np.float64(minutes)),
lon, lat, alt)[1] - horizon
def elevation_inv(minutes):
"""inverse of elevation
"""
return -elevation(minutes)
def get_root_secant(fun, start, end, tol=0.01):
"""Secant method
"""
x_0 = end
x_1 = start
fx_0 = fun(end)
fx_1 = fun(start)
if abs(fx_0) < abs(fx_1):
fx_0, fx_1 = fx_1, fx_0
x_0, x_1 = x_1, x_0
while abs(x_0 - x_1) > tol:
x_n = x_1 - fx_1 * ((x_1 - x_0) / (fx_1 - fx_0))
x_0, x_1 = x_1, x_n
fx_0, fx_1 = fx_1, fun(x_n)
return x_1
def get_max_parab(fun, start, end, tol=0.01):
"""Successive parabolic interpolation
"""
a = start
c = end
b = (a + c) / 2.0
x = b
f_a = fun(a)
f_b = fun(b)
f_c = fun(c)
while abs(c - a) > tol:
x = b - 0.5 * (((b - a) ** 2 * (f_b - f_c)
- (b - c) ** 2 * (f_b - f_a)) /
((b - a) * (f_b - f_c) - (b - c) * (f_b - f_a)))
f_x = fun(x)
if x > b:
a, b, c = b, x, c
f_a, f_b, f_c = f_b, f_x, f_c
else:
a, b, c = a, x, b
f_a, f_b, f_c = f_a, f_x, f_b
return x
times = utc_time + np.array([timedelta(minutes=minutes)
for minutes in range(length * 60)])
elev = self.get_observer_look(times, lon, lat, alt)[1] - horizon
zcs = np.where(np.diff(np.sign(elev)))[0]
res = []
risetime = None
falltime = None
for guess in zcs:
horizon_mins = get_root_secant(
elevation, guess, guess + 1.0, tol=tol / 60.0)
horizon_time = utc_time + timedelta(minutes=horizon_mins)
if elev[guess] < 0:
risetime = horizon_time
risemins = horizon_mins
falltime = None
else:
falltime = horizon_time
fallmins = horizon_mins
if risetime:
middle = (risemins + fallmins) / 2.0
highest = utc_time + \
timedelta(minutes=get_max_parab(
elevation_inv,
middle - 0.1, middle + 0.1,
tol=tol / 60.0
))
res += [(risetime, falltime, highest)]
risetime = None
return res
0
Example 19
def fit(self, features, classes):
"""Fits a machine learning pipeline that maximizes classification score
on the provided data
Uses genetic programming to optimize a machine learning pipeline that
maximizes classification score on the provided features and classes.
Performs an internal stratified training/testing cross-validaton split
to avoid overfitting on the provided data.
Parameters
----------
features: array-like {n_samples, n_features}
Feature matrix
classes: array-like {n_samples}
List of class labels for prediction
Returns
-------
None
"""
features = features.astype(np.float64)
# Set the seed for the GP run
if self.random_state:
random.seed(self.random_state)
np.random.seed(self.random_state)
self._start_datetime = datetime.now()
self._toolbox.register('evaluate', self._evaluate_individual, features=features, classes=classes)
pop = self._toolbox.population(n=self.population_size)
def pareto_eq(ind1, ind2):
"""Determines whether two individuals are equal on the Pareto front
Parameters
----------
ind1: DEAP individual from the GP population
First individual to compare
ind2: DEAP individual from the GP population
Second individual to compare
Returns
----------
individuals_equal: bool
Boolean indicating whether the two individuals are equal on
the Pareto front
"""
return np.all(ind1.fitness.values == ind2.fitness.values)
self._hof = tools.ParetoFront(similar=pareto_eq)
# Start the progress bar
if self.max_time_mins:
total_evals = self.population_size
else:
total_evals = self.population_size * (self.generations + 1)
self._pbar = tqdm(total=total_evals, unit='pipeline', leave=False,
disable=not (self.verbosity >= 2), desc='Optimization Progress')
try:
pop, _ = algorithms.eaSimple(
population=pop, toolbox=self._toolbox, cxpb=self.crossover_rate,
mutpb=self.mutation_rate, ngen=self.generations,
halloffame=self._hof, verbose=False)
# Allow for certain exceptions to signal a premature fit() cancellation
except (KeyboardInterrupt, SystemExit):
if self.verbosity > 0:
print('') # just for better interface
print('GP closed prematurely - will use current best pipeline')
finally:
# Close the progress bar
# Standard truthiness checks won't work for tqdm
if not isinstance(self._pbar, type(None)):
self._pbar.close()
# Reset gp_generation counter to restore initial state
self._gp_generation = 0
# Store the pipeline with the highest internal testing score
if self._hof:
top_score = -float('inf')
for pipeline, pipeline_scores in zip(self._hof.items, reversed(self._hof.keys)):
if pipeline_scores.wvalues[1] > top_score:
self._optimized_pipeline = pipeline
if not self._optimized_pipeline:
raise ValueError('There was an error in the TPOT optimization '
'process. This could be because the data was '
'not formatted properly, or because data for '
'a regression problem was provided to the '
'TPOTClassifier object. Please make sure you '
'passed the data to TPOT correctly.')
self._fitted_pipeline = self._toolbox.compile(expr=self._optimized_pipeline)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
self._fitted_pipeline.fit(features, classes)
if self.verbosity in [1, 2] and self._optimized_pipeline:
# Add an extra line of spacing if the progress bar was used
if self.verbosity >= 2:
print('')
print('Best pipeline: {}'.format(self._optimized_pipeline))
# Store and fit the entire Pareto front if sciencing
elif self.verbosity >= 3 and self._hof:
self._hof_fitted_pipelines = {}
for pipeline in self._hof.items:
self._hof_fitted_pipelines[str(pipeline)] = self._toolbox.compile(expr=pipeline)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
self._hof_fitted_pipelines[str(pipeline)].fit(features, classes)
0
Example 20
Project: sfepy Source File: shell10x_cantilever_interactive.py
def main():
parser = ArgumentParser(description=__doc__.rstrip(),
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument('output_dir', help=helps['output_dir'])
parser.add_argument('-d', '--dims', metavar='l,w,t',
action='store', dest='dims',
default='0.2,0.01,0.001', help=helps['dims'])
parser.add_argument('-n', '--nx', metavar='start,stop,step',
action='store', dest='nx',
default='2,103,10', help=helps['nx'])
parser.add_argument('-t', '--transform', choices=['none', 'bend', 'twist'],
action='store', dest='transform',
default='none', help=helps['transform'])
parser.add_argument('--young', metavar='float', type=float,
action='store', dest='young',
default=210e9, help=helps['young'])
parser.add_argument('--poisson', metavar='float', type=float,
action='store', dest='poisson',
default=0.3, help=helps['poisson'])
parser.add_argument('--force', metavar='float', type=float,
action='store', dest='force',
default=-1.0, help=helps['force'])
parser.add_argument('-p', '--plot',
action="store_true", dest='plot',
default=False, help=helps['plot'])
parser.add_argument('--u-scaling', metavar='float', type=float,
action='store', dest='scaling',
default=1.0, help=helps['scaling'])
parser.add_argument('-s', '--show',
action="store_true", dest='show',
default=False, help=helps['show'])
parser.add_argument('--silent',
action='store_true', dest='silent',
default=False, help=helps['silent'])
options = parser.parse_args()
dims = nm.array([float(ii) for ii in options.dims.split(',')],
dtype=nm.float64)
nxs = tuple([int(ii) for ii in options.nx.split(',')])
young = options.young
poisson = options.poisson
force = options.force
output_dir = options.output_dir
odir = lambda filename: os.path.join(output_dir, filename)
filename = odir('output_log.txt')
ensure_path(filename)
output.set_output(filename=filename, combined=options.silent == False)
output('output directory:', output_dir)
output('using values:')
output(" dimensions:", dims)
output(" nx range:", nxs)
output(" Young's modulus:", options.young)
output(" Poisson's ratio:", options.poisson)
output(' force:', options.force)
output(' transform:', options.transform)
if options.transform == 'none':
options.transform = None
u_exact = get_analytical_displacement(dims, young, force,
transform=options.transform)
if options.transform is None:
ilog = 2
labels = ['u_3']
elif options.transform == 'bend':
ilog = 0
labels = ['u_1']
elif options.transform == 'twist':
ilog = [0, 1, 2]
labels = ['u_1', 'u_2', 'u_3']
label = ', '.join(labels)
log = []
for nx in range(*nxs):
shape = (nx, 2)
pb, state, u, gamma2 = solve_problem(shape, dims, young, poisson, force,
transform=options.transform)
dofs = u.get_state_in_region(gamma2)
output('DOFs along the loaded edge:')
output('\n%s' % dofs)
log.append([nx - 1] + nm.array(dofs[0, ilog], ndmin=1).tolist())
pb.save_state(odir('shell10x_cantilever.vtk'), state)
log = nm.array(log)
output('max. %s displacement w.r.t. number of cells:' % label)
output('\n%s' % log)
output('analytical value:', u_exact)
if options.plot:
import matplotlib.pyplot as plt
plt.rcParams.update({
'lines.linewidth' : 3,
'font.size' : 16,
})
fig, ax1 = plt.subplots()
fig.suptitle('max. $%s$ displacement' % label)
for ic in range(log.shape[1] - 1):
ax1.plot(log[:, 0], log[:, ic + 1], label=r'$%s$' % labels[ic])
ax1.set_xlabel('# of cells')
ax1.set_ylabel(r'$%s$' % label)
ax1.grid(which='both')
lines1, labels1 = ax1.get_legend_handles_labels()
if u_exact is not None:
ax1.hlines(u_exact, log[0, 0], log[-1, 0],
'r', 'dotted', label=r'$%s^{analytical}$' % label)
ax2 = ax1.twinx()
# Assume single log column.
ax2.semilogy(log[:, 0], nm.abs(log[:, 1] - u_exact), 'g',
label=r'$|%s - %s^{analytical}|$' % (label, label))
ax2.set_ylabel(r'$|%s - %s^{analytical}|$' % (label, label))
lines2, labels2 = ax2.get_legend_handles_labels()
else:
lines2, labels2 = [], []
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')
plt.tight_layout()
ax1.set_xlim([log[0, 0] - 2, log[-1, 0] + 2])
suffix = {None: 'straight',
'bend' : 'bent', 'twist' : 'twisted'}[options.transform]
fig.savefig(odir('shell10x_cantilever_convergence_%s.png' % suffix))
plt.show()
if options.show:
from sfepy.postprocess.viewer import Viewer
from sfepy.postprocess.domain_specific import DomainSpecificPlot
ds = {'u_disp' :
DomainSpecificPlot('plot_displacements',
['rel_scaling=%f' % options.scaling])}
view = Viewer(odir('shell10x_cantilever.vtk'))
view(domain_specific=ds, is_scalar_bar=True, is_wireframe=True,
opacity={'wireframe' : 0.5})
0
Example 21
Project: sfepy Source File: coefs_phononic.py
def detect_band_gaps(mass, freq_info, opts, gap_kind='normal', mtx_b=None):
"""
Detect band gaps given solution to eigenproblem (eigs,
eig_vectors). Only valid resonance frequencies (e.i. those for which
corresponding eigenmomenta are above a given threshold) are taken into
account.
Notes
-----
- make freq_eps relative to ]f0, f1[ size?
"""
output('eigensolver:', opts.eigensolver)
fm = freq_info.freq_range_margins
min_freq, max_freq = fm[0], fm[-1]
output('freq. range with margins: [%8.3f, %8.3f]'
% (min_freq, max_freq))
df = opts.freq_step * (max_freq - min_freq)
fz_callback = get_callback(mass.evaluate, opts.eigensolver,
mtx_b=mtx_b, mode='find_zero')
trace_callback = get_callback(mass.evaluate, opts.eigensolver,
mtx_b=mtx_b, mode='trace')
n_col = 1 + (mtx_b is not None)
logs = [[] for ii in range(n_col + 1)]
gaps = []
for ii in range(freq_info.freq_range.shape[0] + 1):
f0, f1 = fm[[ii, ii+1]]
output('interval: ]%.8f, %.8f[...' % (f0, f1))
log_freqs = get_log_freqs(f0, f1, df, opts.freq_eps, 100, 1000)
output('n_logged: %d' % log_freqs.shape[0])
log_mevp = [[] for ii in range(n_col)]
for f in log_freqs:
for ii, data in enumerate(trace_callback(f)):
log_mevp[ii].append(data)
# Get log for the first and last f in log_freqs.
lf0 = log_freqs[0]
lf1 = log_freqs[-1]
log0, log1 = log_mevp[0][0], log_mevp[0][-1]
min_eig0 = log0[0]
max_eig1 = log1[-1]
if gap_kind == 'liquid':
mevp = nm.array(log_mevp, dtype=nm.float64).squeeze()
si = nm.where(mevp[:,0] < 0.0)[0]
li = nm.where(mevp[:,-1] < 0.0)[0]
wi = nm.setdiff1d(si, li)
if si.shape[0] == 0: # No gaps.
gap = ([2, lf0, log0[0]], [2, lf0, log0[-1]])
gaps.append(gap)
elif li.shape[0] == mevp.shape[0]: # Full interval strong gap.
gap = ([1, lf1, log1[0]], [1, lf1, log1[-1]])
gaps.append(gap)
else:
subgaps = []
for chunk in split_chunks(li): # Strong gaps.
i0, i1 = chunk[0], chunk[-1]
fmin, fmax = log_freqs[i0], log_freqs[i1]
gap = ([1, fmin, mevp[i0,-1]], [1, fmax, mevp[i1,-1]])
subgaps.append(gap)
for chunk in split_chunks(wi): # Weak gaps.
i0, i1 = chunk[0], chunk[-1]
fmin, fmax = log_freqs[i0], log_freqs[i1]
gap = ([0, fmin, mevp[i0,-1]], [2, fmax, mevp[i1,-1]])
subgaps.append(gap)
gaps.append(subgaps)
else:
if min_eig0 > 0.0: # No gaps.
gap = ([2, lf0, log0[0]], [2, lf0, log0[-1]])
elif max_eig1 < 0.0: # Full interval strong gap.
gap = ([1, lf1, log1[0]], [1, lf1, log1[-1]])
else:
llog_freqs = list(log_freqs)
# Insert fmin, fmax into log.
output('finding zero of the largest eig...')
smax, fmax, vmax = find_zero(lf0, lf1, fz_callback,
opts.freq_eps, opts.zero_eps, 1)
im = nm.searchsorted(log_freqs, fmax)
llog_freqs.insert(im, fmax)
for ii, data in enumerate(trace_callback(fmax)):
log_mevp[ii].insert(im, data)
output('...done')
if smax in [0, 2]:
output('finding zero of the smallest eig...')
# having fmax instead of f0 does not work if freq_eps is
# large.
smin, fmin, vmin = find_zero(lf0, lf1, fz_callback,
opts.freq_eps, opts.zero_eps, 0)
im = nm.searchsorted(log_freqs, fmin)
# +1 due to fmax already inserted before.
llog_freqs.insert(im+1, fmin)
for ii, data in enumerate(trace_callback(fmin)):
log_mevp[ii].insert(im+1, data)
output('...done')
elif smax == 1:
smin = 1 # both are negative everywhere.
fmin, vmin = fmax, vmax
gap = ([smin, fmin, vmin], [smax, fmax, vmax])
log_freqs = nm.array(llog_freqs)
output(gap[0])
output(gap[1])
gaps.append(gap)
logs[0].append(log_freqs)
for ii, data in enumerate(log_mevp):
logs[ii+1].append(nm.array(data, dtype = nm.float64))
output('...done')
kinds = describe_gaps(gaps)
slogs = Struct(freqs=logs[0], eigs=logs[1])
if n_col == 2:
slogs.eig_vectors = logs[2]
return slogs, gaps, kinds
0
Example 22
Project: zipline Source File: period.py
def calculate_metrics(self):
self.benchmark_period_returns = \
cuem_returns(self.benchmark_returns).iloc[-1]
self.algorithm_period_returns = \
cuem_returns(self.algorithm_returns).iloc[-1]
if not self.algorithm_returns.index.equals(
self.benchmark_returns.index
):
message = "Mismatch between benchmark_returns ({bm_count}) and \
algorithm_returns ({algo_count}) in range {start} : {end}"
message = message.format(
bm_count=len(self.benchmark_returns),
algo_count=len(self.algorithm_returns),
start=self._start_session,
end=self._end_session
)
raise Exception(message)
self.num_trading_days = len(self.benchmark_returns)
self.mean_algorithm_returns = (
self.algorithm_returns.cuemsum() /
np.arange(1, self.num_trading_days + 1, dtype=np.float64)
)
self.benchmark_volatility = annual_volatility(self.benchmark_returns)
self.algorithm_volatility = annual_volatility(self.algorithm_returns)
self.treasury_period_return = choose_treasury(
self.treasury_curves,
self._start_session,
self._end_session,
self.trading_calendar,
)
self.sharpe = sharpe_ratio(
self.algorithm_returns,
)
# The consumer currently expects a 0.0 value for sharpe in period,
# this differs from cuemulative which was np.nan.
# When factoring out the sharpe_ratio, the different return types
# were collapsed into `np.nan`.
# TODO: Either fix consumer to accept `np.nan` or make the
# `sharpe_ratio` return type configurable.
# In the meantime, convert nan values to 0.0
if pd.isnull(self.sharpe):
self.sharpe = 0.0
self.downside_risk = downside_risk(
self.algorithm_returns.values
)
self.sortino = sortino_ratio(
self.algorithm_returns.values,
_downside_risk=self.downside_risk,
)
self.information = information_ratio(
self.algorithm_returns.values,
self.benchmark_returns.values,
)
self.alpha, self.beta = alpha_beta_aligned(
self.algorithm_returns.values,
self.benchmark_returns.values,
)
self.excess_return = self.algorithm_period_returns - \
self.treasury_period_return
self.max_drawdown = max_drawdown(self.algorithm_returns.values)
self.max_leverage = self.calculate_max_leverage()
0
Example 23
Project: lfd Source File: precompute.py
def batch_get_sol_params(x_nd, K_nn, bend_coefs, rot_coef):
n, d = x_nd.shape
x_gpu = gpuarray.to_gpu(x_nd)
H_arr_gpu = []
for b in bend_coefs:
cur_offset = np.zeros((1 + d + n, 1 + d + n), np.float64)
cur_offset[d+1:, d+1:] = b * K_nn
cur_offset[1:d+1, 1:d+1] = np.diag(rot_coef)
H_arr_gpu.append(gpuarray.to_gpu(cur_offset))
H_ptr_gpu = get_gpu_ptrs(H_arr_gpu)
A = np.r_[np.zeros((d+1,d+1)), np.c_[np.ones((n,1)), x_nd]].T
n_cnts = A.shape[0]
_u,_s,_vh = np.linalg.svd(A.T)
N = _u[:,n_cnts:]
F = np.zeros((n + d + 1, d), np.float64)
F[1:d+1, :d] += np.diag(rot_coef)
Q = np.c_[np.ones((n,1)), x_nd, K_nn].astype(np.float64)
F = F.astype(np.float64)
N = N.astype(np.float64)
Q_gpu = gpuarray.to_gpu(Q)
Q_arr_gpu = [Q_gpu for _ in range(len(bend_coefs))]
Q_ptr_gpu = get_gpu_ptrs(Q_arr_gpu)
F_gpu = gpuarray.to_gpu(F)
F_arr_gpu = [F_gpu for _ in range(len(bend_coefs))]
F_ptr_gpu = get_gpu_ptrs(F_arr_gpu)
N_gpu = gpuarray.to_gpu(N)
N_arr_gpu = [N_gpu for _ in range(len(bend_coefs))]
N_ptr_gpu = get_gpu_ptrs(N_arr_gpu)
dot_batch_nocheck(Q_arr_gpu, Q_arr_gpu, H_arr_gpu,
Q_ptr_gpu, Q_ptr_gpu, H_ptr_gpu,
transa = 'T')
# N'HN
NHN_arr_gpu, NHN_ptr_gpu = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'T'),
(H_arr_gpu, H_ptr_gpu, 'N'),
(N_arr_gpu, N_ptr_gpu, 'N'))
iH_arr = []
for NHN in NHN_arr_gpu:
iH_arr.append(scipy.linalg.inv(NHN.get()).copy())
iH_arr_gpu = [gpuarray.to_gpu_async(iH) for iH in iH_arr]
iH_ptr_gpu = get_gpu_ptrs(iH_arr_gpu)
proj_mats = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'N'),
(iH_arr_gpu, iH_ptr_gpu, 'N'),
(N_arr_gpu, N_ptr_gpu, 'T'),
(Q_arr_gpu, Q_ptr_gpu, 'T'))
offset_mats = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'N'),
(iH_arr_gpu, iH_ptr_gpu, 'N'),
(N_arr_gpu, N_ptr_gpu, 'T'),
(F_arr_gpu, F_ptr_gpu, 'N'))
return proj_mats, offset_mats
0
Example 24
Project: lfd Source File: precompute.py
def test_batch_get_sol_params(f, bend_coefs, rot_coef, atol=1e-7, index=0):
seg_info = f.items()[index][1]
inv_group = seg_info['inv']
ds_key = 'DS_SIZE_{}'.format(DS_SIZE)
x_nd = inv_group[ds_key]['scaled_cloud_xyz'][:]
K_nn = inv_group[ds_key]['scaled_K_nn'][:]
n, d = x_nd.shape
x_gpu = gpuarray.to_gpu(x_nd)
H_arr_gpu = []
for b in bend_coefs:
cur_offset = np.zeros((1 + d + n, 1 + d + n), np.float64)
cur_offset[d+1:, d+1:] = b * K_nn
cur_offset[1:d+1, 1:d+1] = np.diag(rot_coef)
H_arr_gpu.append(gpuarray.to_gpu(cur_offset))
H_ptr_gpu = get_gpu_ptrs(H_arr_gpu)
A = np.r_[np.zeros((d+1,d+1)), np.c_[np.ones((n,1)), x_nd]].T
n_cnts = A.shape[0]
_u,_s,_vh = np.linalg.svd(A.T)
N = _u[:,n_cnts:]
F = np.zeros((n + d + 1, d), np.float64)
F[1:d+1, :d] += np.diag(rot_coef)
Q = np.c_[np.ones((n,1)), x_nd, K_nn].astype(np.float64)
F = F.astype(np.float64)
N = N.astype(np.float64)
Q_gpu = gpuarray.to_gpu(Q)
Q_arr_gpu = [Q_gpu for _ in range(len(bend_coefs))]
Q_ptr_gpu = get_gpu_ptrs(Q_arr_gpu)
F_gpu = gpuarray.to_gpu(F)
F_arr_gpu = [F_gpu for _ in range(len(bend_coefs))]
F_ptr_gpu = get_gpu_ptrs(F_arr_gpu)
N_gpu = gpuarray.to_gpu(N)
N_arr_gpu = [N_gpu for _ in range(len(bend_coefs))]
N_ptr_gpu = get_gpu_ptrs(N_arr_gpu)
dot_batch_nocheck(Q_arr_gpu, Q_arr_gpu, H_arr_gpu,
Q_ptr_gpu, Q_ptr_gpu, H_ptr_gpu,
transa = 'T')
QTQ = Q.T.dot(Q)
H_list = []
for i, bend_coef in enumerate(bend_coefs):
H = QTQ
H[d+1:,d+1:] += bend_coef * K_nn
rot_coefs = np.ones(d) * rot_coef if np.isscalar(rot_coef) else rot_coef
H[1:d+1, 1:d+1] += np.diag(rot_coefs)
# ipdb.set_trace()
H_list.append(H)
# N'HN
NHN_arr_gpu, NHN_ptr_gpu = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'T'),
(H_arr_gpu, H_ptr_gpu, 'N'),
(N_arr_gpu, N_ptr_gpu, 'N'))
NHN_list = [N.T.dot(H.dot(N)) for H in H_list]
for i, NHN in enumerate(NHN_list):
assert(np.allclose(NHN, NHN_arr_gpu[i].get(), atol=atol))
iH_arr = []
for NHN in NHN_arr_gpu:
iH_arr.append(scipy.linalg.inv(NHN.get()).copy())
h_inv_list = [scipy.linalg.inv(NHN) for NHN in NHN_list]
assert(np.allclose(iH_arr, h_inv_list, atol=atol))
iH_arr_gpu = [gpuarray.to_gpu_async(iH) for iH in iH_arr]
iH_ptr_gpu = get_gpu_ptrs(iH_arr_gpu)
proj_mats = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'N'),
(iH_arr_gpu, iH_ptr_gpu, 'N'),
(N_arr_gpu, N_ptr_gpu, 'T'),
(Q_arr_gpu, Q_ptr_gpu, 'T'))
proj_mats_list = [N.dot(h_inv.dot(N.T.dot(Q.T))) for h_inv in h_inv_list]
assert(np.allclose(proj_mats_list, proj_mats[0][index].get(), atol=atol))
offset_mats = m_dot_batch((N_arr_gpu, N_ptr_gpu, 'N'),
(iH_arr_gpu, iH_ptr_gpu, 'N'),
(N_arr_gpu, N_ptr_gpu, 'T'),
(F_arr_gpu, F_ptr_gpu, 'N'))
offset_mats_list = [N.dot(h_inv.dot(N.T.dot(F))) for h_inv in h_inv_list]
assert(np.allclose(offset_mats_list, offset_mats[0][index].get(), atol=atol))
0
Example 25
Project: pyspace Source File: correlation_features.py
def _execute(self, x):
""" Calculates statistical features """
# determine what has to be computed and initialize data structures
data = x.get_data()
if self.central_moment_order == 0:
self.central_moment_order = 1 # only for feature_size computation
feature_size = self.raw_moment_order * len(x.channel_names) + \
(self.central_moment_order-1) * len(x.channel_names) + \
self.std * len(x.channel_names) + \
self.quadratic_mean * len(x.channel_names) + \
self.median * len(x.channel_names) + \
self.minimum * len(x.channel_names) + \
self.maximum * len(x.channel_names)
if not self.artifact_threshold is None:
feature_size += 1
statistical_features = numpy.zeros((feature_size, ), numpy.float64)
# initialize the actual feature_index
feature_index = 0
# for every channel...
# TODO: Try to use the methods on the whole data array
for index in range(len(x.channel_names)):
current_channel = data[:, index]
# in these cases it is auspicious to compute and store variables
# cause we will need them again
if self.raw_moment_order > 0 or self.central_moment_order > 1 or \
self.std or self.quadratic_mean:
average = numpy.mean(current_channel)
if self.raw_moment_order > 1 or self.quadratic_mean:
# self.raw_moment(current_channel, 2)
second_raw_moment = scipy.stats.ss(current_channel)
# print second_raw_moment
if self.raw_moment_order > 0: # raw_moment_of_1_th_order needed?
# it's the mean, so don't compute it again
statistical_features[feature_index] = average
feature_index += 1 # update feature_index
if self.raw_moment_order > 1: # raw_moment_2nd_order needed?
# we have already computed it
statistical_features[feature_index] = second_raw_moment
feature_index += 1
# for the other orders of raw_moments
for order in range(3, self.raw_moment_order+1):
statistical_features[feature_index] = \
self.raw_moment(current_channel, order)
feature_index += 1
# central_moment
for order in range(2, self.central_moment_order+1):
statistical_features[feature_index] = \
scipy.stats.moment(current_channel, order)
feature_index += 1
# standard_deviation
if self.std:
statistical_features[feature_index] = \
numpy.std(current_channel)
feature_index += 1
# quadratic_mean
if self.quadratic_mean:
# we stored relevant results before
statistical_features[feature_index] = second_raw_moment ** 0.5
feature_index += 1
# median
if self.median:
statistical_features[feature_index] = \
numpy.median(current_channel)
feature_index += 1
# minimum
if self.minimum:
statistical_features[feature_index] = \
numpy.amin(current_channel)
feature_index += 1
# maximum
if self.maximum:
statistical_features[feature_index] = \
numpy.amax(current_channel)
feature_index += 1
if self.artifact_threshold is None:
statistical_features[-1] += self.amplitudes_above(
current_channel, self.artifact_threshold)
# if feature_names have to be determined
if self.feature_names is None:
# initialize data structure
self.feature_names = []
for name in x.channel_names:
# raw_moment
for order in range(1, self.raw_moment_order+1):
self.feature_names.append(
"RAW_MOMENT_%d_%s" % (order, name))
# central_moment
for order in range(2, self.central_moment_order+1):
self.feature_names.append(
"CENTRAL_MOMENT_%d_%s" % (order, name))
# standard_deviation
if self.std:
self.feature_names.append("STD_%s" % (name))
# quadratic_mean
if self.quadratic_mean:
self.feature_names.append("QUAD_MEAN_%s" %(name))
# median
if self.median:
self.feature_names.append("MEDIAN_%s" % (name))
# minimum
if self.minimum:
self.feature_names.append("MIN_%s" % (name))
# maximum
if self.maximum:
self.feature_names.append("MAX_%s" % (name))
if not self.artifact_threshold is None:
self.feature_names.append(
"AMP_ABOVE_%.2f" % (self.artifact_threshold))
feature_vector = FeatureVector(numpy.atleast_2d(statistical_features),
self.feature_names)
return feature_vector
0
Example 26
Project: image-analogies Source File: patch_matcher.py
def congrid(a, newdims, method='linear', centre=False, minusone=False):
'''Arbitrary resampling of source array to new dimension sizes.
Currently only supports maintaining the same number of dimensions.
To use 1-D arrays, first promote them to shape (x,1).
Uses the same parameters and creates the same co-ordinate lookup points
as IDL''s congrid routine, which apparently originally came from a VAX/VMS
routine of the same name.
method:
neighbour - closest value from original data
nearest and linear - uses n x 1-D interpolations using
scipy.interpolate.interp1d
(see Numerical Recipes for validity of use of n 1-D interpolations)
spline - uses ndimage.map_coordinates
centre:
True - interpolation points are at the centres of the bins
False - points are at the front edge of the bin
minusone:
For example- inarray.shape = (i,j) & new dimensions = (x,y)
False - inarray is resampled by factors of (i/x) * (j/y)
True - inarray is resampled by(i-1)/(x-1) * (j-1)/(y-1)
This prevents extrapolation one element beyond bounds of input array.
'''
if not a.dtype in [np.float64, np.float32]:
a = np.cast[float](a)
m1 = np.cast[int](minusone)
ofs = np.cast[int](centre) * 0.5
old = np.array( a.shape )
ndims = len( a.shape )
if len( newdims ) != ndims:
print "[congrid] dimensions error. " \
"This routine currently only support " \
"rebinning to the same number of dimensions."
return None
newdims = np.asarray( newdims, dtype=float )
dimlist = []
if method == 'neighbour':
for i in range( ndims ):
base = np.indices(newdims)[i]
dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
* (base + ofs) - ofs )
cd = np.array( dimlist ).round().astype(int)
newa = a[list( cd )]
return newa
elif method in ['nearest','linear']:
# calculate new dims
for i in range( ndims ):
base = np.arange( newdims[i] )
dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
* (base + ofs) - ofs )
# specify old dims
olddims = [np.arange(i, dtype = np.float) for i in list( a.shape )]
# first interpolation - for ndims = any
mint = scipy.interpolate.interp1d( olddims[-1], a, kind=method )
newa = mint( dimlist[-1] )
trorder = [ndims - 1] + range( ndims - 1 )
for i in range( ndims - 2, -1, -1 ):
newa = newa.transpose( trorder )
mint = scipy.interpolate.interp1d( olddims[i], newa, kind=method )
newa = mint( dimlist[i] )
if ndims > 1:
# need one more transpose to return to original dimensions
newa = newa.transpose( trorder )
return newa
elif method in ['spline']:
oslices = [ slice(0,j) for j in old ]
oldcoords = np.ogrid[oslices]
nslices = [ slice(0,j) for j in list(newdims) ]
newcoords = np.mgrid[nslices]
newcoords_dims = range(np.rank(newcoords))
#make first index last
newcoords_dims.append(newcoords_dims.pop(0))
newcoords_tr = newcoords.transpose(newcoords_dims)
# makes a view that affects newcoords
newcoords_tr += ofs
deltas = (np.asarray(old) - m1) / (newdims - m1)
newcoords_tr *= deltas
newcoords_tr -= ofs
newa = scipy.ndimage.map_coordinates(a, newcoords)
return newa
else:
print "Congrid error: Unrecognized interpolation type.\n", \
"Currently only \'neighbour\', \'nearest\',\'linear\',", \
"and \'spline\' are supported."
return None
0
Example 27
Project: root_numpy Source File: _factory.py
def add_classification_events(factory, events, labels, signal_label=None,
weights=None, test=False):
"""Add classification events to a TMVA::Factory from NumPy arrays.
Parameters
----------
factory : TMVA::Factory
A TMVA::Factory instance with variables already booked in exactly the
same order as the columns in ``events``.
events : numpy array of shape [n_events, n_variables]
A two-dimensional NumPy array containing the rows of events and columns
of variables. The order of the columns must match the order in which
you called ``AddVariable()`` for each variable.
labels : numpy array of shape [n_events]
The class labels (signal or background) corresponding to each event in
``events``.
signal_label : float or int, optional (default=None)
The value in ``labels`` for signal events, if ``labels`` contains only
two classes. If None, the highest value in ``labels`` is used.
weights : numpy array of shape [n_events], optional
Event weights.
test : bool, optional (default=False)
If True, then the events will be added as test events, otherwise they
are added as training events by default.
Notes
-----
* A TMVA::Factory requires you to add both training and test events even if
you don't intend to call ``TestAllMethods()``.
* When using MethodCuts, the first event added must be a signal event,
otherwise TMVA will fail with ``<FATAL> Interval : maximum lower than
minimum``. To place a signal event first::
# Get index of first signal event
first_signal = np.nonzero(labels == signal_label)[0][0]
# Swap this with first event
events[0], events[first_signal] = events[first_signal].copy(), events[0].copy()
labels[0], labels[first_signal] = labels[first_signal], labels[0]
weights[0], weights[first_signal] = weights[first_signal], weights[0]
"""
if not isinstance(factory, TMVA.Factory):
raise TypeError("factory must be a TMVA.Factory instance")
events = np.ascontiguousarray(events, dtype=np.float64)
if events.ndim == 1:
# convert to 2D
events = events[:, np.newaxis]
elif events.ndim != 2:
raise ValueError(
"events must be a two-dimensional array "
"with one event per row")
class_labels, class_idx = np.unique(labels, return_inverse=True)
if class_idx.shape[0] != events.shape[0]:
raise ValueError("numbers of events and labels do not match")
if weights is not None:
weights = np.asarray(weights, dtype=np.float64)
if weights.shape[0] != events.shape[0]:
raise ValueError("numbers of events and weights do not match")
if weights.ndim != 1:
raise ValueError("weights must be one-dimensional")
n_classes = class_labels.shape[0]
if n_classes > 2:
# multiclass classification
_libtmvanumpy.factory_add_events_multiclass(
ROOT.AsCObject(factory), events, class_idx,
weights, test)
elif n_classes == 2:
# binary classification
if signal_label is None:
signal_label = class_labels[1]
signal_label = np.where(class_labels == signal_label)[0][0]
_libtmvanumpy.factory_add_events_twoclass(
ROOT.AsCObject(factory), events, class_idx,
signal_label, weights, test)
else:
raise ValueError("labels must contain at least two classes")
0
Example 28
def check_array(array, accept_sparse=None, dtype="numeric", order=None,
copy=False, force_all_finite=True, ensure_2d=True,
allow_nd=False, ensure_min_samples=1, ensure_min_features=1,
warn_on_dtype=False, estimator=None):
"""Input validation on an array, list, sparse matrix or similar.
By default, the input is converted to an at least 2D numpy array.
If the dtype of the array is object, attempt converting to float,
raising on failure.
Parameters
----------
array : object
Input object to check / convert.
accept_sparse : string, list of string or None (default=None)
String[s] representing allowed sparse matrix formats, such as 'csc',
'csr', etc. None means that sparse matrix input will raise an error.
If the input is sparse but not in the allowed format, it will be
converted to the first listed format.
dtype : string, type, list of types or None (default="numeric")
Data type of result. If None, the dtype of the input is preserved.
If "numeric", dtype is preserved unless array.dtype is object.
If dtype is a list of types, conversion on the first type is only
performed if the dtype of the input is not in the list.
order : 'F', 'C' or None (default=None)
Whether an array will be forced to be fortran or c-style.
When order is None (default), then if copy=False, nothing is ensured
about the memory layout of the output array; otherwise (copy=True)
the memory layout of the returned array is kept as close as possible
to the original array.
copy : boolean (default=False)
Whether a forced copy will be triggered. If copy=False, a copy might
be triggered by a conversion.
force_all_finite : boolean (default=True)
Whether to raise an error on np.inf and np.nan in X.
ensure_2d : boolean (default=True)
Whether to make X at least 2d.
allow_nd : boolean (default=False)
Whether to allow X.ndim > 2.
ensure_min_samples : int (default=1)
Make sure that the array has a minimum number of samples in its first
axis (rows for a 2D array). Setting to 0 disables this check.
ensure_min_features : int (default=1)
Make sure that the 2D array has some minimum number of features
(columns). The default value of 1 rejects empty datasets.
This check is only enforced when the input data has effectively 2
dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
disables this check.
warn_on_dtype : boolean (default=False)
Raise DataConversionWarning if the dtype of the input data structure
does not match the requested dtype, causing a memory copy.
estimator : str or estimator instance (default=None)
If passed, include the name of the estimator in warning messages.
Returns
-------
X_converted : object
The converted and validated X.
"""
if isinstance(accept_sparse, str):
accept_sparse = [accept_sparse]
# store whether originally we wanted numeric dtype
dtype_numeric = dtype == "numeric"
dtype_orig = getattr(array, "dtype", None)
if not hasattr(dtype_orig, 'kind'):
# not a data type (e.g. a column named dtype in a pandas DataFrame)
dtype_orig = None
if dtype_numeric:
if dtype_orig is not None and dtype_orig.kind == "O":
# if input is object, convert to float.
dtype = np.float64
else:
dtype = None
if isinstance(dtype, (list, tuple)):
if dtype_orig is not None and dtype_orig in dtype:
# no dtype conversion required
dtype = None
else:
# dtype conversion required. Let's select the first element of the
# list of accepted types.
dtype = dtype[0]
if estimator is not None:
if isinstance(estimator, six.string_types):
estimator_name = estimator
else:
estimator_name = estimator.__class__.__name__
else:
estimator_name = "Estimator"
context = " by %s" % estimator_name if estimator is not None else ""
if sp.issparse(array):
array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
force_all_finite)
else:
array = np.array(array, dtype=dtype, order=order, copy=copy)
if ensure_2d:
if array.ndim == 1:
if ensure_min_samples >= 2:
raise ValueError("%s expects at least 2 samples provided "
"in a 2 dimensional array-like input"
% estimator_name)
warnings.warn(
"Passing 1d arrays as data is deprecated in 0.17 and will "
"raise ValueError in 0.19. Reshape your data either using "
"X.reshape(-1, 1) if your data has a single feature or "
"X.reshape(1, -1) if it contains a single sample.",
DeprecationWarning)
array = np.atleast_2d(array)
# To ensure that array flags are maintained
array = np.array(array, dtype=dtype, order=order, copy=copy)
# make sure we actually converted to numeric:
if dtype_numeric and array.dtype.kind == "O":
array = array.astype(np.float64)
if not allow_nd and array.ndim >= 3:
raise ValueError("Found array with dim %d. %s expected <= 2."
% (array.ndim, estimator_name))
if force_all_finite:
_assert_all_finite(array)
shape_repr = _shape_repr(array.shape)
if ensure_min_samples > 0:
n_samples = _num_samples(array)
if n_samples < ensure_min_samples:
raise ValueError("Found array with %d sample(s) (shape=%s) while a"
" minimum of %d is required%s."
% (n_samples, shape_repr, ensure_min_samples,
context))
if ensure_min_features > 0 and array.ndim == 2:
n_features = array.shape[1]
if n_features < ensure_min_features:
raise ValueError("Found array with %d feature(s) (shape=%s) while"
" a minimum of %d is required%s."
% (n_features, shape_repr, ensure_min_features,
context))
if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
msg = ("Data with input dtype %s was converted to %s%s."
% (dtype_orig, array.dtype, context))
warnings.warn(msg, _DataConversionWarning)
return array
0
Example 29
Project: scikit-image Source File: texture.py
def draw_multiblock_lbp(img, r, c, width, height,
lbp_code=0,
color_greater_block=[1, 1, 1],
color_less_block=[0, 0.69, 0.96],
alpha=0.5
):
"""Multi-block local binary pattern visualization.
Blocks with higher sums are colored with alpha-blended white rectangles,
whereas blocks with lower sums are colored alpha-blended cyan. Colors
and the `alpha` parameter can be changed.
Parameters
----------
img : ndarray of float or uint
Image on which to visualize the pattern.
r : int
Row-coordinate of top left corner of a rectangle containing feature.
c : int
Column-coordinate of top left corner of a rectangle containing feature.
width : int
Width of one of 9 equal rectangles that will be used to compute
a feature.
height : int
Height of one of 9 equal rectangles that will be used to compute
a feature.
lbp_code : int
The descriptor of feature to visualize. If not provided, the
descriptor with 0 value will be used.
color_greater_block : list of 3 floats
Floats specifying the color for the block that has greater
intensity value. They should be in the range [0, 1].
Corresponding values define (R, G, B) values. Default value
is white [1, 1, 1].
color_greater_block : list of 3 floats
Floats specifying the color for the block that has greater intensity
value. They should be in the range [0, 1]. Corresponding values define
(R, G, B) values. Default value is cyan [0, 0.69, 0.96].
alpha : float
Value in the range [0, 1] that specifies opacity of visualization.
1 - fully transparent, 0 - opaque.
Returns
-------
output : ndarray of float
Image with MB-LBP visualization.
References
----------
.. [1] Face Detection Based on Multi-Block LBP
Representation. Lun Zhang, Rufeng Chu, Shiming Xiang, Shengcai Liao,
Stan Z. Li
http://www.cbsr.ia.ac.cn/users/scliao/papers/Zhang-ICB07-MBLBP.pdf
"""
# Default colors for regions.
# White is for the blocks that are brighter.
# Cyan is for the blocks that has less intensity.
color_greater_block = np.asarray(color_greater_block, dtype=np.float64)
color_less_block = np.asarray(color_less_block, dtype=np.float64)
# Copy array to avoid the changes to the original one.
output = np.copy(img)
# As the visualization uses RGB color we need 3 bands.
if len(img.shape) < 3:
output = gray2rgb(img)
# Colors are specified in floats.
output = img_as_float(output)
# Offsets of neighbour rectangles relative to central one.
# It has order starting from top left and going clockwise.
neighbour_rect_offsets = ((-1, -1), (-1, 0), (-1, 1),
(0, 1), (1, 1), (1, 0),
(1, -1), (0, -1))
# Pre-multiply the offsets with width and height.
neighbour_rect_offsets = np.array(neighbour_rect_offsets)
neighbour_rect_offsets[:, 0] *= height
neighbour_rect_offsets[:, 1] *= width
# Top-left coordinates of central rectangle.
central_rect_r = r + height
central_rect_c = c + width
for element_num, offset in enumerate(neighbour_rect_offsets):
offset_r, offset_c = offset
curr_r = central_rect_r + offset_r
curr_c = central_rect_c + offset_c
has_greater_value = lbp_code & (1 << (7-element_num))
# Mix-in the visualization colors.
if has_greater_value:
new_value = ((1-alpha) *
output[curr_r:curr_r+height, curr_c:curr_c+width] +
alpha * color_greater_block)
output[curr_r:curr_r+height, curr_c:curr_c+width] = new_value
else:
new_value = ((1-alpha) *
output[curr_r:curr_r+height, curr_c:curr_c+width] +
alpha * color_less_block)
output[curr_r:curr_r+height, curr_c:curr_c+width] = new_value
return output
0
Example 30
Project: msCentipede Source File: load_data.py
def get_read_counts(self, locations, width=200):
"""Get the number of sequencing reads mapped to
each base along a window centered at each of
several motif instances.
Arguments:
locations : list
each entry of the list is a list that specifies
information for a motif instance
width : int
length of the genomic window around the motif
instance.
"""
counts = []
if self._protocol=='DNase_seq':
for location in locations:
chrom = location[0]
strand = location[3]
if strand=='+':
center = location[1]
else:
center = location[2]
left = max([1,center-width/2])
right = min([center+width/2, self._ref_lengths[chrom]])
# fetch all reads overlapping this genomic location
sam_iter = self._handle.fetch(reference=chrom, start=left, end=right)
forward = np.zeros((width,), dtype=np.float64)
reverse = np.zeros((width,), dtype=np.float64)
for read in sam_iter:
# skip read if unmapped
if read.is_unmapped:
continue
# skip read, if mapping quality is low
if read.mapq < MIN_MAP_QUAL:
continue
start = read.pos
end = start + read.alen - 1
# skip read, if site of cleavage is outside window
if (read.is_reverse and end >= right) or (not read.is_reverse and start < left):
continue
if read.is_reverse:
reverse[end-left] += 1
else:
forward[start-left] += 1
# flip fwd and rev strand read counts,
# if the motif is on the opposite strand.
if strand=='+':
count = [forward, reverse]
else:
count = [reverse[::-1], forward[::-1]]
count = np.hstack(count)
# cap the read count at any location
count[count>MAX_VAL] = MAX_VAL
counts.append(count.astype(np.float64))
else:
for l,location in enumerate(locations):
chrom = location[0]
strand = location[3]
if strand=='+':
center = location[1]
else:
center = location[2]
left = max([1,center-width/2])
right = min([center+width/2, self._ref_lengths[chrom]])
# fetch all reads overlapping this genomic location
sam_iter = [read for read in self._handle.fetch(reference=chrom, start=left, end=right)]
count = np.zeros((width,), dtype=np.float64)
for read in sam_iter:
# discard anomalies
if not (read.is_read1 or read.is_read2):
continue
# require that (both) paired-reads are uniquely mapped
if read.is_unmapped or read.mate_is_unmapped:
continue
# paired reads mapped to same strand...
if read.is_reverse == read.mate_is_reverse:
continue
# read has poor mapping quality
if read.mapq < MIN_MAP_QUAL:
continue
# discard fragments that are too long or too short
isize = np.abs(read.isize)
if isize < MIN_READ_LEN or isize > MAX_READ_LEN:
continue
# pysam pos starts at 0, not 1
# site of adapter insertions are 9bp apart
# an offset of +4 / -5 gives approximate site of transposition
if read.is_reverse:
if read.pos<=read.mpos:
tpos = read.pos + 4
else:
tpos = read.mpos + isize - 5
# add read, if site of transposition is within window
if left<=tpos<right:
count[tpos-left] += 1
else:
if read.pos>=read.mpos:
tpos = read.mpos + isize - 5
else:
tpos = read.pos + 4
# add read, if site of transposition is within window
if left<=tpos<right:
count[tpos-left] += 1
# flip read counts,
# if the motif is on the opposite strand.
if strand=='-':
count = count[::-1]
counts.append(count)
counts = np.array(counts).astype(np.float64)
# cap the read count at all locations
counts[counts>MAX_VAL] = MAX_VAL
return counts
0
Example 31
Project: scikit-image Source File: _marching_cubes_classic.py
def marching_cubes_classic(volume, level=None, spacing=(1., 1., 1.),
gradient_direction='descent'):
"""
Classic marching cubes algorithm to find surfaces in 3d volumetric data.
Note that the ``marching_cubes()`` algorithm is recommended over
this algorithm, because it's faster and produces better results.
Parameters
----------
volume : (M, N, P) array of doubles
Input data volume to find isosurfaces. Will be cast to `np.float64`.
level : float
Contour value to search for isosurfaces in `volume`. If not
given or None, the average of the min and max of vol is used.
spacing : length-3 tuple of floats
Voxel spacing in spatial dimensions corresponding to numpy array
indexing dimensions (M, N, P) as in `volume`.
gradient_direction : string
Controls if the mesh was generated from an isosurface with gradient
descent toward objects of interest (the default), or the opposite.
The two options are:
* descent : Object was greater than exterior
* ascent : Exterior was greater than object
Returns
-------
verts : (V, 3) array
Spatial coordinates for V unique mesh vertices. Coordinate order
matches input `volume` (M, N, P).
faces : (F, 3) array
Define triangular faces via referencing vertex indices from ``verts``.
This algorithm specifically outputs triangles, so each face has
exactly three indices.
Notes
-----
The marching cubes algorithm is implemented as described in [1]_.
A simple explanation is available here::
http://www.essi.fr/~lingrand/MarchingCubes/algo.html
There are several known ambiguous cases in the marching cubes algorithm.
Using point labeling as in [1]_, Figure 4, as shown::
v8 ------ v7
/ | / | y
/ | / | ^ z
v4 ------ v3 | | /
| v5 ----|- v6 |/ (note: NOT right handed!)
| / | / ----> x
| / | /
v1 ------ v2
Most notably, if v4, v8, v2, and v6 are all >= `level` (or any
generalization of this case) two parallel planes are generated by this
algorithm, separating v4 and v8 from v2 and v6. An equally valid
interpretation would be a single connected thin surface enclosing all
four points. This is the best known ambiguity, though there are others.
This algorithm does not attempt to resolve such ambiguities; it is a naive
implementation of marching cubes as in [1]_, but may be a good beginning
for work with more recent techniques (Dual Marching Cubes, Extended
Marching Cubes, Cubic Marching Squares, etc.).
Because of interactions between neighboring cubes, the isosurface(s)
generated by this algorithm are NOT guaranteed to be closed, particularly
for complicated contours. Furthermore, this algorithm does not guarantee
a single contour will be returned. Indeed, ALL isosurfaces which cross
`level` will be found, regardless of connectivity.
The output is a triangular mesh consisting of a set of unique vertices and
connecting triangles. The order of these vertices and triangles in the
output list is determined by the position of the smallest ``x,y,z`` (in
lexicographical order) coordinate in the contour. This is a side-effect
of how the input array is traversed, but can be relied upon.
The generated mesh guarantees coherent orientation as of version 0.12.
To quantify the area of an isosurface generated by this algorithm, pass
outputs directly into `skimage.measure.mesh_surface_area`.
References
----------
.. [1] Lorensen, William and Harvey E. Cline. Marching Cubes: A High
Resolution 3D Surface Construction Algorithm. Computer Graphics
(SIGGRAPH 87 Proceedings) 21(4) July 1987, p. 163-170).
DOI: 10.1145/37401.37422
See Also
--------
skimage.measure.marching_cubes
skimage.measure.mesh_surface_area
"""
# Check inputs and ensure `volume` is C-contiguous for memoryviews
if volume.ndim != 3:
raise ValueError("Input volume must have 3 dimensions.")
if level is None:
level = 0.5 * (volume.min() + volume.max())
else:
level = float(level)
if level < volume.min() or level > volume.max():
raise ValueError("Surface level must be within volume data range.")
if len(spacing) != 3:
raise ValueError("`spacing` must consist of three floats.")
volume = np.array(volume, dtype=np.float64, order="C")
# Extract raw triangles using marching cubes in Cython
# Returns a list of length-3 lists, each sub-list containing three
# tuples. The tuples hold (x, y, z) coordinates for triangle vertices.
# Note: this algorithm is fast, but returns degenerate "triangles" which
# have repeated vertices - and equivalent vertices are redundantly
# placed in every triangle they connect with.
raw_faces = _marching_cubes_classic_cy.iterate_and_store_3d(volume,
float(level))
# Find and collect unique vertices, storing triangle verts as indices.
# Returns a true mesh with no degenerate faces.
verts, faces = _marching_cubes_classic_cy.unpack_unique_verts(raw_faces)
verts = np.asarray(verts)
faces = np.asarray(faces)
# Fancy indexing to define two vector arrays from triangle vertices
faces = _correct_mesh_orientation(volume, verts[faces], faces, spacing,
gradient_direction)
# Adjust for non-isotropic spacing in `verts` at time of return
return verts * np.r_[spacing], faces
0
Example 32
Project: sfepy Source File: poisson_parallel_interactive.py
def solve_problem(mesh_filename, options, comm):
order = options.order
rank, size = comm.Get_rank(), comm.Get_size()
output('rank', rank, 'of', size)
mesh = Mesh.from_file(mesh_filename)
if rank == 0:
cell_tasks = pl.partition_mesh(mesh, size, use_metis=options.metis,
verbose=True)
else:
cell_tasks = None
domain = FEDomain('domain', mesh)
omega = domain.create_region('Omega', 'all')
field = Field.from_args('fu', nm.float64, 1, omega, approx_order=order)
output('distributing field %s...' % field.name)
tt = time.clock()
distribute = pl.distribute_fields_dofs
lfds, gfds = distribute([field], cell_tasks,
is_overlap=True,
save_inter_regions=options.save_inter_regions,
output_dir=options.output_dir,
comm=comm, verbose=True)
lfd = lfds[0]
output('...done in', time.clock() - tt)
if rank == 0:
dof_maps = gfds[0].dof_maps
id_map = gfds[0].id_map
if options.verify:
verify_save_dof_maps(field, cell_tasks,
dof_maps, id_map, options, verbose=True)
if options.plot:
ppd.plot_partitioning([None, None], field, cell_tasks, gfds[0],
options.output_dir, size)
output('creating local problem...')
tt = time.clock()
omega_gi = Region.from_cells(lfd.cells, field.domain)
omega_gi.finalize()
omega_gi.update_shape()
pb = create_local_problem(omega_gi, order)
output('...done in', time.clock() - tt)
variables = pb.get_variables()
eqs = pb.equations
u_i = variables['u_i']
field_i = u_i.field
if options.plot:
ppd.plot_local_dofs([None, None], field, field_i, omega_gi,
options.output_dir, rank)
output('allocating global system...')
tt = time.clock()
sizes, drange = pl.get_sizes(lfd.petsc_dofs_range, field.n_nod, 1)
output('sizes:', sizes)
output('drange:', drange)
pdofs = pl.get_local_ordering(field_i, lfd.petsc_dofs_conn)
output('pdofs:', pdofs)
pmtx, psol, prhs = pl.create_petsc_system(pb.mtx_a, sizes, pdofs, drange,
is_overlap=True, comm=comm,
verbose=True)
output('...done in', time.clock() - tt)
output('evaluating local problem...')
tt = time.clock()
state = State(variables)
state.fill(0.0)
state.apply_ebc()
rhs_i = eqs.eval_residuals(state())
# This must be after pl.create_petsc_system() call!
mtx_i = eqs.eval_tangent_matrices(state(), pb.mtx_a)
output('...done in', time.clock() - tt)
output('assembling global system...')
tt = time.clock()
pl.apply_ebc_to_matrix(mtx_i, u_i.eq_map.eq_ebc)
pl.assemble_rhs_to_petsc(prhs, rhs_i, pdofs, drange, is_overlap=True,
comm=comm, verbose=True)
pl.assemble_mtx_to_petsc(pmtx, mtx_i, pdofs, drange, is_overlap=True,
comm=comm, verbose=True)
output('...done in', time.clock() - tt)
output('creating solver...')
tt = time.clock()
conf = Struct(method='cg', precond='gamg', sub_precond=None,
i_max=10000, eps_a=1e-50, eps_r=1e-5, eps_d=1e4, verbose=True)
status = {}
ls = PETScKrylovSolver(conf, comm=comm, mtx=pmtx, status=status)
output('...done in', time.clock() - tt)
output('solving...')
tt = time.clock()
psol = ls(prhs, psol, conf)
psol_i = pl.create_local_petsc_vector(pdofs)
gather, scatter = pl.create_gather_scatter(pdofs, psol_i, psol, comm=comm)
scatter(psol_i, psol)
sol0_i = state() - psol_i[...]
psol_i[...] = sol0_i
gather(psol, psol_i)
output('...done in', time.clock() - tt)
output('saving solution...')
tt = time.clock()
u_i.set_data(sol0_i)
out = u_i.create_output()
filename = os.path.join(options.output_dir, 'sol_%02d.h5' % comm.rank)
pb.domain.mesh.write(filename, io='auto', out=out)
gather_to_zero = pl.create_gather_to_zero(psol)
psol_full = gather_to_zero(psol)
if comm.rank == 0:
sol = psol_full[...].copy()[id_map]
u = FieldVariable('u', 'parameter', field,
primary_var_name='(set-to-None)')
filename = os.path.join(options.output_dir, 'sol.h5')
if (order == 1) or (options.linearization == 'strip'):
out = u.create_output(sol)
mesh.write(filename, io='auto', out=out)
else:
out = u.create_output(sol, linearization=Struct(kind='adaptive',
min_level=0,
max_level=order,
eps=1e-3))
out['u'].mesh.write(filename, io='auto', out=out)
output('...done in', time.clock() - tt)
if options.show:
plt.show()
0
Example 33
Project: RMG-Py Source File: fluxdiagram.py
def generateFluxDiagram(reactionModel, times, concentrations, reactionRates, outputDirectory, centralSpecies=None, speciesDirectory=None, settings=None):
"""
For a given `reactionModel` and simulation results stored as arrays of
`times`, species `concentrations`, and `reactionRates`, generate a series
of flux diagrams as frames of an animation, then stitch them together into
a movie. The individual frames and the final movie are saved on disk at
`outputDirectory.`
"""
global maximumNodeCount, maximumEdgeCount, timeStep, concentrationTolerance, speciesRateTolerance
# Allow user defined settings for flux diagram generation if given
if settings:
maximumNodeCount = settings['maximumNodeCount']
maximumEdgeCount = settings['maximumEdgeCount']
timeStep = settings['timeStep']
concentrationTolerance = settings['concentrationTolerance']
speciesRateTolerance = settings['speciesRateTolerance']
# Get the species and reactions corresponding to the provided concentrations and reaction rates
speciesList = reactionModel.core.species[:]
numSpecies = len(speciesList)
reactionList = reactionModel.core.reactions[:]
numReactions = len(reactionList)
#search index of central species:
if centralSpecies is not None:
for i, species in enumerate(speciesList):
if species.label == centralSpecies:
centralSpeciesIndex = i
break
# Compute the rates between each pair of species (big matrix warning!)
speciesRates = numpy.zeros((len(times),numSpecies,numSpecies), numpy.float64)
for index, reaction in enumerate(reactionList):
rate = reactionRates[:,index]
if not reaction.pairs: reaction.generatePairs()
for reactant, product in reaction.pairs:
reactantIndex = speciesList.index(reactant)
productIndex = speciesList.index(product)
speciesRates[:,reactantIndex,productIndex] += rate
speciesRates[:,productIndex,reactantIndex] -= rate
# Determine the maximum concentration for each species and the maximum overall concentration
maxConcentrations = numpy.max(numpy.abs(concentrations), axis=0)
maxConcentration = numpy.max(maxConcentrations)
# Determine the maximum rate for each species-species pair and the maximum overall species-species rate
maxSpeciesRates = numpy.max(numpy.abs(speciesRates), axis=0)
maxSpeciesRate = numpy.max(maxSpeciesRates)
speciesIndex = maxSpeciesRates.reshape((numSpecies*numSpecies)).argsort()
# Determine the nodes and edges to keep
nodes = []; edges = []
if centralSpecies is None:
for i in range(numSpecies*numSpecies):
productIndex, reactantIndex = divmod(speciesIndex[-i-1], numSpecies)
if reactantIndex > productIndex:
# Both reactant -> product and product -> reactant are in this list,
# so only keep one of them
continue
if maxSpeciesRates[reactantIndex, productIndex] == 0:
break
if reactantIndex not in nodes and len(nodes) < maximumNodeCount: nodes.append(reactantIndex)
if productIndex not in nodes and len(nodes) < maximumNodeCount: nodes.append(productIndex)
if len(nodes) > maximumNodeCount:
break
edges.append([reactantIndex, productIndex])
if len(edges) >= maximumEdgeCount:
break
else:
nodes.append(centralSpeciesIndex)
for index, reaction in enumerate(reactionList):
for reactant, product in reaction.pairs:
reactantIndex = speciesList.index(reactant)
productIndex = speciesList.index(product)
if maxSpeciesRates[reactantIndex, productIndex] == 0:
break
if len(nodes) > maximumNodeCount or len(edges) >= maximumEdgeCount:
break
if reactantIndex == centralSpeciesIndex:
if productIndex not in nodes:
nodes.append(productIndex)
edges.append([reactantIndex, productIndex])
if productIndex == centralSpeciesIndex:
if reactantIndex not in nodes:
nodes.append(reactantIndex)
edges.append([reactantIndex, productIndex])
# Create the master graph
# First we're going to generate the coordinates for all of the nodes; for
# this we use the thickest pen widths for all nodes and edges
graph = pydot.Dot('flux_diagram', graph_type='digraph', overlap="false")
graph.set_rankdir('LR')
graph.set_fontname('sans')
graph.set_fontsize('10')
# Add a node for each species
for index in nodes:
species = speciesList[index]
node = pydot.Node(name=str(species))
node.set_penwidth(maximumNodePenWidth)
graph.add_node(node)
# Try to use an image instead of the label
speciesIndex = str(species) + '.png'
imagePath = ''
if not speciesDirectory or not os.path.exists(speciesDirectory):
continue
for root, dirs, files in os.walk(speciesDirectory):
for f in files:
if f.endswith(speciesIndex):
imagePath = os.path.join(root, f)
break
if os.path.exists(imagePath):
node.set_image(imagePath)
node.set_label(" ")
# Add an edge for each species-species rate
for reactantIndex, productIndex in edges:
if reactantIndex in nodes and productIndex in nodes:
reactant = speciesList[reactantIndex]
product = speciesList[productIndex]
edge = pydot.Edge(str(reactant), str(product))
edge.set_penwidth(maximumEdgePenWidth)
graph.add_edge(edge)
# Generate the coordinates for all of the nodes using the specified program
graph = pydot.graph_from_dot_data(graph.create_dot(prog=program))[0]
# Now iterate over the time points, setting the pen widths appropriately
# This should preserve the coordinates of the nodes from frame to frame
frameNumber = 1
for t in range(len(times)):
# Update the nodes
slope = -maximumNodePenWidth / math.log10(concentrationTolerance)
for index in nodes:
species = speciesList[index]
if re.search(r'^[a-zA-Z0-9_]*$',str(species)) is not None:
species_string = str(species)
else:
# species name contains special characters
species_string = '"{0}"'.format(str(species))
node = graph.get_node(species_string)[0]
concentration = concentrations[t,index] / maxConcentration
if concentration < concentrationTolerance:
penwidth = 0.0
else:
penwidth = round(slope * math.log10(concentration) + maximumNodePenWidth,3)
node.set_penwidth(penwidth)
# Update the edges
slope = -maximumEdgePenWidth / math.log10(speciesRateTolerance)
for index in range(len(edges)):
reactantIndex, productIndex = edges[index]
if reactantIndex in nodes and productIndex in nodes:
reactant = speciesList[reactantIndex]
product = speciesList[productIndex]
if re.search(r'^[a-zA-Z0-9_]*$',str(reactant)) is not None:
reactant_string = str(reactant)
else:
reactant_string = '"{0}"'.format(str(reactant))
if re.search(r'^[a-zA-Z0-9_]*$',str(product)) is not None:
product_string = str(product)
else:
product_string = '"{0}"'.format(str(product))
edge = graph.get_edge(reactant_string, product_string)[0]
# Determine direction of arrow based on sign of rate
speciesRate = speciesRates[t,reactantIndex,productIndex] / maxSpeciesRate
if speciesRate < 0:
edge.set_dir("back")
speciesRate = -speciesRate
else:
edge.set_dir("forward")
# Set the edge pen width
if speciesRate < speciesRateTolerance:
penwidth = 0.0
edge.set_dir("none")
else:
penwidth = round(slope * math.log10(speciesRate) + maximumEdgePenWidth,3)
edge.set_penwidth(penwidth)
# Save the graph at this time to a dot file and a PNG image
if times[t] == 0:
label = 't = 0 s'
else:
label = 't = 10^{0:.1f} s'.format(math.log10(times[t]))
graph.set_label(label)
if t == 0:
repeat = framesPerSecond * initialPadding
elif t == len(times) - 1:
repeat = framesPerSecond * finalPadding
else:
repeat = 1
for r in range(repeat):
graph.write_dot(os.path.join(outputDirectory, 'flux_diagram_{0:04d}.dot'.format(frameNumber)))
graph.write_png(os.path.join(outputDirectory, 'flux_diagram_{0:04d}.png'.format(frameNumber)))
frameNumber += 1
# Use ffmpeg to stitch the PNG images together into a movie
import subprocess
command = ['ffmpeg',
'-framerate', '{0:d}'.format(framesPerSecond), # Duration of each image
'-i', 'flux_diagram_%04d.png', # Input file format
'-c:v', 'mpeg4', # Encoder
'-r', '30', # Video framerate
'-pix_fmt', 'yuv420p', # Pixel format
'flux_diagram.avi'] # Output filename
subprocess.check_call(command, cwd=outputDirectory)
0
Example 34
Project: sfepy Source File: its2D_interactive.py
def main():
from sfepy import data_dir
parser = ArgumentParser(description=__doc__,
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version', version='%(prog)s')
parser.add_argument('--young', metavar='float', type=float,
action='store', dest='young',
default=2000.0, help=helps['young'])
parser.add_argument('--poisson', metavar='float', type=float,
action='store', dest='poisson',
default=0.4, help=helps['poisson'])
parser.add_argument('--load', metavar='float', type=float,
action='store', dest='load',
default=-1000.0, help=helps['load'])
parser.add_argument('--order', metavar='int', type=int,
action='store', dest='order',
default=1, help=helps['order'])
parser.add_argument('-r', '--refine', metavar='int', type=int,
action='store', dest='refine',
default=0, help=helps['refine'])
parser.add_argument('-s', '--show',
action="store_true", dest='show',
default=False, help=helps['show'])
parser.add_argument('-p', '--probe',
action="store_true", dest='probe',
default=False, help=helps['probe'])
options = parser.parse_args()
assert_((0.0 < options.poisson < 0.5),
"Poisson's ratio must be in ]0, 0.5[!")
assert_((0 < options.order),
'displacement approximation order must be at least 1!')
output('using values:')
output(" Young's modulus:", options.young)
output(" Poisson's ratio:", options.poisson)
output(' vertical load:', options.load)
output('uniform mesh refinement level:', options.refine)
# Build the problem definition.
mesh = Mesh.from_file(data_dir + '/meshes/2d/its2D.mesh')
domain = FEDomain('domain', mesh)
if options.refine > 0:
for ii in range(options.refine):
output('refine %d...' % ii)
domain = domain.refine()
output('... %d nodes %d elements'
% (domain.shape.n_nod, domain.shape.n_el))
omega = domain.create_region('Omega', 'all')
left = domain.create_region('Left',
'vertices in x < 0.001', 'facet')
bottom = domain.create_region('Bottom',
'vertices in y < 0.001', 'facet')
top = domain.create_region('Top', 'vertex 2', 'vertex')
field = Field.from_args('fu', nm.float64, 'vector', omega,
approx_order=options.order)
u = FieldVariable('u', 'unknown', field)
v = FieldVariable('v', 'test', field, primary_var_name='u')
D = stiffness_from_youngpoisson(2, options.young, options.poisson)
asphalt = Material('Asphalt', D=D)
load = Material('Load', values={'.val' : [0.0, options.load]})
integral = Integral('i', order=2*options.order)
integral0 = Integral('i', order=0)
t1 = Term.new('dw_lin_elastic(Asphalt.D, v, u)',
integral, omega, Asphalt=asphalt, v=v, u=u)
t2 = Term.new('dw_point_load(Load.val, v)',
integral0, top, Load=load, v=v)
eq = Equation('balance', t1 - t2)
eqs = Equations([eq])
xsym = EssentialBC('XSym', bottom, {'u.1' : 0.0})
ysym = EssentialBC('YSym', left, {'u.0' : 0.0})
ls = ScipyDirect({})
nls_status = IndexedStruct()
nls = Newton({}, lin_solver=ls, status=nls_status)
pb = Problem('elasticity', equations=eqs, nls=nls, ls=ls)
pb.time_update(ebcs=Conditions([xsym, ysym]))
# Solve the problem.
state = pb.solve()
output(nls_status)
# Postprocess the solution.
out = state.create_output_dict()
out = stress_strain(out, pb, state, extend=True)
pb.save_state('its2D_interactive.vtk', out=out)
gdata = geometry_data['2_3']
nc = len(gdata.coors)
integral_vn = Integral('ivn', coors=gdata.coors,
weights=[gdata.volume / nc] * nc)
nodal_stress(out, pb, state, integrals=Integrals([integral_vn]))
if options.probe:
# Probe the solution.
probes, labels = gen_lines(pb)
sfield = Field.from_args('sym_tensor', nm.float64, 3, omega,
approx_order=options.order - 1)
stress = FieldVariable('stress', 'parameter', sfield,
primary_var_name='(set-to-None)')
strain = FieldVariable('strain', 'parameter', sfield,
primary_var_name='(set-to-None)')
cfield = Field.from_args('component', nm.float64, 1, omega,
approx_order=options.order - 1)
component = FieldVariable('component', 'parameter', cfield,
primary_var_name='(set-to-None)')
ev = pb.evaluate
order = 2 * (options.order - 1)
strain_qp = ev('ev_cauchy_strain.%d.Omega(u)' % order, mode='qp')
stress_qp = ev('ev_cauchy_stress.%d.Omega(Asphalt.D, u)' % order,
mode='qp', copy_materials=False)
project_by_component(strain, strain_qp, component, order)
project_by_component(stress, stress_qp, component, order)
all_results = []
for ii, probe in enumerate(probes):
fig, results = probe_results(u, strain, stress, probe, labels[ii])
fig.savefig('its2D_interactive_probe_%d.png' % ii)
all_results.append(results)
for ii, results in enumerate(all_results):
output('probe %d:' % ii)
output.level += 2
for key, res in ordered_iteritems(results):
output(key + ':')
val = res[1]
output(' min: %+.2e, mean: %+.2e, max: %+.2e'
% (val.min(), val.mean(), val.max()))
output.level -= 2
if options.show:
# Show the solution. If the approximation order is greater than 1, the
# extra DOFs are simply thrown away.
from sfepy.postprocess.viewer import Viewer
view = Viewer('its2D_interactive.vtk')
view(vector_mode='warp_norm', rel_scaling=1,
is_scalar_bar=True, is_wireframe=True)
0
Example 35
Project: auto-sklearn Source File: create_datasets.py
def create_predict_spearman_rank_with_cv(cv_metafeatures, cv_experiments,
iterator):
X = []
Y = []
Y_names = []
# Calculate the pairwise ranks between datasets
dataset_names = [name for name in cv_metafeatures]
cross_product = []
folds_product = []
if iterator == "combination":
for cross in itertools.combinations_with_replacement(dataset_names, r=2):
cross_product.append(cross)
for folds in itertools.combinations_with_replacement(range(10), r=2):
folds_product.append(folds)
elif iterator == "permutation":
for cross in itertools.permutations(dataset_names, r=2):
cross_product.append(cross)
for folds in itertools.permutations(range(10), r=2):
folds_product.append(folds)
else:
raise NotImplementedError()
logging.info("Create spearman rank dataset with CV data %s", iterator)
logging.info("Using %d datasets", len(dataset_names))
logging.info("This will results in %d training points",
(len(cross_product) * len(folds_product)))
logging.info("Length of dataset crossproduct %s", len(cross_product))
logging.info("Length of folds crossproduct %s", len(folds_product))
# Create inputs and targets
for i, cross in enumerate(cross_product):
print("%d/%d: %s" % (i, len(cross_product), cross),)
for folds in folds_product:
name = "%s-%d_%s-%d" % (cross[0], folds[0], cross[1], folds[1])
mf_1 = cv_metafeatures[cross[0]][folds[0]]
mf_2 = cv_metafeatures[cross[1]][folds[1]]
assert mf_1.dtype == np.float64
assert mf_2.dtype == np.float64
x = np.hstack((mf_1, mf_2))
columns = cv_metafeatures[cross[0]][folds[0]].index.values
index = np.hstack(("0_" + columns, "1_" + columns))
x = pd.Series(data=x, name=name, index=index)
X.append(x)
experiments_1 = cv_experiments[cross[0]][folds[0]]
experiments_2 = cv_experiments[cross[1]][folds[1]]
assert len(experiments_1) == len(experiments_2)
responses_1 = np.zeros((len(experiments_1)), dtype=np.float64)
responses_2 = np.zeros((len(experiments_1)), dtype=np.float64)
for idx, zipped in enumerate(zip(experiments_1, experiments_2)):
# Test if the order of the params is the same
exp_1, exp_2 = zipped
assert exp_1.params == exp_2.params
responses_1[idx] = exp_1.result
responses_2[idx] = exp_2.result
rho, p = scipy.stats.spearmanr(responses_1, responses_2)
# A nan is produced if all values of one of the response lists
# are equal. This results in a division by zero. Because there is
# no correlation if all values are the same, rho is replaced by
# zero...
# It would probably be better to assign random ranks for equal
# values, but scipy doesn't support this...
if not np.isfinite(rho):
rho = 0
Y.append(rho)
Y_names.append(name)
X = pd.DataFrame(X)
Y = pd.Series(Y, index=Y_names)
logging.info("CV_Metafeatures %s", cv_metafeatures.shape)
logging.info("X.shape %s", X.shape)
logging.info("Y.shape %s", Y.shape)
# train sklearn regressor (tree) with 10fold CV
indices = range(len(X))
np_rs = np.random.RandomState(42)
np_rs.shuffle(indices)
X = X.iloc[indices]
Y = Y.iloc[indices]
return X, Y
0
Example 36
Project: sfepy Source File: biot_parallel_interactive.py
def solve_problem(mesh_filename, options, comm):
order_u = options.order_u
order_p = options.order_p
rank, size = comm.Get_rank(), comm.Get_size()
output('rank', rank, 'of', size)
mesh = Mesh.from_file(mesh_filename)
if rank == 0:
cell_tasks = pl.partition_mesh(mesh, size, use_metis=options.metis,
verbose=True)
else:
cell_tasks = None
domain = FEDomain('domain', mesh)
omega = domain.create_region('Omega', 'all')
field1 = Field.from_args('fu', nm.float64, mesh.dim, omega,
approx_order=order_u)
field2 = Field.from_args('fp', nm.float64, 1, omega,
approx_order=order_p)
fields = [field1, field2]
output('distributing fields...')
tt = time.clock()
distribute = pl.distribute_fields_dofs
lfds, gfds = distribute(fields, cell_tasks,
is_overlap=True,
use_expand_dofs=True,
save_inter_regions=options.save_inter_regions,
output_dir=options.output_dir,
comm=comm, verbose=True)
output('...done in', time.clock() - tt)
output('creating local problem...')
tt = time.clock()
cells = lfds[0].cells
omega_gi = Region.from_cells(cells, domain)
omega_gi.finalize()
omega_gi.update_shape()
pb = create_local_problem(omega_gi, [order_u, order_p])
variables = pb.get_variables()
state = State(variables)
state.fill(0.0)
state.apply_ebc()
output('...done in', time.clock() - tt)
output('allocating global system...')
tt = time.clock()
sizes, drange, pdofs = pl.setup_composite_dofs(lfds, fields, variables,
verbose=True)
pmtx, psol, prhs = pl.create_petsc_system(pb.mtx_a, sizes, pdofs, drange,
is_overlap=True, comm=comm,
verbose=True)
output('...done in', time.clock() - tt)
output('creating solver...')
tt = time.clock()
conf = Struct(method='bcgsl', precond='jacobi', sub_precond=None,
i_max=10000, eps_a=1e-50, eps_r=1e-6, eps_d=1e4,
verbose=True)
status = {}
ls = PETScKrylovSolver(conf, comm=comm, mtx=pmtx, status=status)
field_ranges = {}
for ii, variable in enumerate(variables.iter_state(ordered=True)):
field_ranges[variable.name] = lfds[ii].petsc_dofs_range
ls.set_field_split(field_ranges, comm=comm)
ev = PETScParallelEvaluator(pb, pdofs, drange, True,
psol, comm, verbose=True)
nls_status = {}
conf = Struct(method='newtonls',
i_max=5, eps_a=0, eps_r=1e-5, eps_s=0.0,
verbose=True)
nls = PETScNonlinearSolver(conf, pmtx=pmtx, prhs=prhs, comm=comm,
fun=ev.eval_residual,
fun_grad=ev.eval_tangent_matrix,
lin_solver=ls, status=nls_status)
output('...done in', time.clock() - tt)
output('solving...')
tt = time.clock()
state = pb.create_state()
state.apply_ebc()
ev.psol_i[...] = state()
ev.gather(psol, ev.psol_i)
psol = nls(psol)
ev.scatter(ev.psol_i, psol)
sol0_i = ev.psol_i[...]
output('...done in', time.clock() - tt)
output('saving solution...')
tt = time.clock()
state.set_full(sol0_i)
out = state.create_output_dict()
filename = os.path.join(options.output_dir, 'sol_%02d.h5' % comm.rank)
pb.domain.mesh.write(filename, io='auto', out=out)
gather_to_zero = pl.create_gather_to_zero(psol)
psol_full = gather_to_zero(psol)
if comm.rank == 0:
sol = psol_full[...].copy()
u = FieldVariable('u', 'parameter', field1,
primary_var_name='(set-to-None)')
remap = gfds[0].id_map
ug = sol[remap]
p = FieldVariable('p', 'parameter', field2,
primary_var_name='(set-to-None)')
remap = gfds[1].id_map
pg = sol[remap]
if (((order_u == 1) and (order_p == 1))
or (options.linearization == 'strip')):
out = u.create_output(ug)
out.update(p.create_output(pg))
filename = os.path.join(options.output_dir, 'sol.h5')
mesh.write(filename, io='auto', out=out)
else:
out = u.create_output(ug, linearization=Struct(kind='adaptive',
min_level=0,
max_level=order_u,
eps=1e-3))
filename = os.path.join(options.output_dir, 'sol_u.h5')
out['u'].mesh.write(filename, io='auto', out=out)
out = p.create_output(pg, linearization=Struct(kind='adaptive',
min_level=0,
max_level=order_p,
eps=1e-3))
filename = os.path.join(options.output_dir, 'sol_p.h5')
out['p'].mesh.write(filename, io='auto', out=out)
output('...done in', time.clock() - tt)
0
Example 37
def main():
"""Main function that is called when TPOT is run on the command line"""
parser = argparse.ArgumentParser(description='A Python tool that '
'automatically creates and optimizes machine learning pipelines using '
'genetic programming.', add_help=False)
parser.add_argument('INPUT_FILE', type=str, help='Data file to optimize the '
'pipeline on; ensure that the class label column is labeled as "class".')
parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
parser.add_argument('-is', action='store', dest='INPUT_SEPARATOR', default='\t',
type=str, help='Character used to separate columns in the input file.')
parser.add_argument('-target', action='store', dest='TARGET_NAME', default='class',
type=str, help='Name of the target column in the input file.')
parser.add_argument('-mode', action='store', dest='TPOT_MODE',
choices=['classification', 'regression'], default='classification', type=str,
help='Whether TPOT is being used for a classification or regression problem.')
parser.add_argument('-o', action='store', dest='OUTPUT_FILE', default='',
type=str, help='File to export the final optimized pipeline.')
parser.add_argument('-g', action='store', dest='GENERATIONS', default=100,
type=positive_integer, help='Number of generations to run pipeline '
'optimization over.\nGenerally, TPOT will work better when '
'you give it more generations (and therefore time) to optimize over. '
'TPOT will evaluate GENERATIONS x POPULATION_SIZE number of pipelines in total.')
parser.add_argument('-p', action='store', dest='POPULATION_SIZE', default=100,
type=positive_integer, help='Number of individuals in the GP population.\n'
'Generally, TPOT will work better when you give it more individuals '
'(and therefore time) to optimize over. TPOT will evaluate '
'GENERATIONS x POPULATION_SIZE number of pipelines in total.')
parser.add_argument('-mr', action='store', dest='MUTATION_RATE', default=0.9,
type=float_range, help='GP mutation rate in the range [0.0, 1.0]. We '
'recommend using the default parameter unless you '
'understand how the mutation rate affects GP algorithms.')
parser.add_argument('-xr', action='store', dest='CROSSOVER_RATE', default=0.05,
type=float_range, help='GP crossover rate in the range [0.0, 1.0]. We '
'recommend using the default parameter unless you '
'understand how the crossover rate affects GP algorithms.')
parser.add_argument('-cv', action='store', dest='NUM_CV_FOLDS', default=3,
type=int, help='The number of folds to evaluate each pipeline over in '
'k-fold cross-validation during the TPOT pipeline optimization process.')
parser.add_argument('-scoring', action='store', dest='SCORING_FN', default=None,
type=str, help='Function used to evaluate the quality of a given pipeline for '
'the problem. By default, balanced accuracy is used for classification and mean '
'squared error is used for regression. '
'TPOT assumes that any function with "error" or "loss" in the name is meant to '
'be minimized, whereas any other functions will be maximized. '
'Offers the same options as cross_val_score: '
'"accuracy", "adjusted_rand_score", "average_precision", "f1", "f1_macro", '
'"f1_micro", "f1_samples", "f1_weighted", "log_loss", "mean_absolute_error", '
'"mean_squared_error", "median_absolute_error", "precision", "precision_macro", '
'"precision_micro", "precision_samples", "precision_weighted", "r2", "recall", '
'"recall_macro", "recall_micro", "recall_samples", "recall_weighted", "roc_auc"')
parser.add_argument('-maxtime', action='store', dest='MAX_TIME_MINS', default=None,
type=int, help='How many minutes TPOT has to optimize the pipeline. This '
'setting will override the GENERATIONS parameter '
'and allow TPOT to run until it runs out of time.')
parser.add_argument('-maxeval', action='store', dest='MAX_EVAL_MINS', default=5,
type=float, help='How many minutes TPOT has to evaluate a single pipeline. '
'Setting this parameter to higher values will allow TPOT to explore more complex '
'pipelines but will also allow TPOT to run longer.')
parser.add_argument('-s', action='store', dest='RANDOM_STATE', default=None,
type=int, help='Random number generator seed for reproducibility. Set '
'this seed if you want your TPOT run to be reproducible with the same '
'seed and data set in the future.')
parser.add_argument('-v', action='store', dest='VERBOSITY', default=1,
choices=[0, 1, 2, 3], type=int, help='How much information TPOT '
'communicates while it is running: 0 = none, 1 = minimal, 2 = high, 3 = all.')
parser.add_argument('--no-update-check', action='store_true',
dest='DISABLE_UPDATE_CHECK', default=False,
help='Flag indicating whether the TPOT version checker should be disabled.')
parser.add_argument('--version', action='version',
version='TPOT {version}'.format(version=__version__),
help='Show TPOT\'s version number and exit.')
args = parser.parse_args()
if args.VERBOSITY >= 2:
print('\nTPOT settings:')
for arg in sorted(args.__dict__):
arg_val = args.__dict__[arg]
if arg == 'DISABLE_UPDATE_CHECK':
continue
elif arg == 'SCORING_FN' and args.__dict__[arg] is None:
if args.TPOT_MODE == 'classification':
arg_val = 'balanced_accuracy'
else:
arg_val = 'mean_squared_error'
print('{}\t=\t{}'.format(arg, arg_val))
print('')
input_data = np.recfromcsv(args.INPUT_FILE, delimiter=args.INPUT_SEPARATOR, dtype=np.float64, case_sensitive=True)
if args.TARGET_NAME not in input_data.dtype.names:
raise ValueError('The provided data file does not seem to have a target column. '
'Please make sure to specify the target column using the -target parameter.')
features = np.delete(input_data.view(np.float64).reshape(input_data.size, -1),
input_data.dtype.names.index(args.TARGET_NAME), axis=1)
training_features, testing_features, training_classes, testing_classes = \
train_test_split(features, input_data[args.TARGET_NAME], random_state=args.RANDOM_STATE)
if args.TPOT_MODE == 'classification':
tpot_type = TPOTClassifier
else:
tpot_type = TPOTRegressor
tpot = tpot_type(generations=args.GENERATIONS, population_size=args.POPULATION_SIZE,
mutation_rate=args.MUTATION_RATE, crossover_rate=args.CROSSOVER_RATE,
num_cv_folds=args.NUM_CV_FOLDS, scoring=args.SCORING_FN,
max_time_mins=args.MAX_TIME_MINS, max_eval_time_mins=args.MAX_EVAL_MINS,
random_state=args.RANDOM_STATE, verbosity=args.VERBOSITY,
disable_update_check=args.DISABLE_UPDATE_CHECK)
tpot.fit(training_features, training_classes)
if args.VERBOSITY in [1, 2] and tpot._optimized_pipeline:
training_score = max([tpot._hof.keys[x].wvalues[1] for x in range(len(tpot._hof.keys))])
print('\nTraining score: {}'.format(abs(training_score)))
print('Holdout score: {}'.format(tpot.score(testing_features, testing_classes)))
elif args.VERBOSITY >= 3 and tpot._hof:
print('Final Pareto front testing scores:')
for pipeline, pipeline_scores in zip(tpot._hof.items, reversed(tpot._hof.keys)):
tpot._fitted_pipeline = tpot._hof_fitted_pipelines[str(pipeline)]
print('{}\t{}\t{}'.format(int(abs(pipeline_scores.wvalues[0])),
tpot.score(testing_features, testing_classes),
pipeline))
if args.OUTPUT_FILE != '':
tpot.export(args.OUTPUT_FILE)
0
Example 38
def fit(self, X, Y):
"""Fit model to data.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples in the number of samples and
n_features is the number of predictors.
Y : array-like of response, shape = [n_samples, n_targets]
Target vectors, where n_samples in the number of samples and
n_targets is the number of response variables.
"""
# copy since this will contains the residuals (deflated) matrices
check_consistent_length(X, Y)
X = check_array(X, dtype=np.float64, copy=self.copy)
Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
n = X.shape[0]
p = X.shape[1]
q = Y.shape[1]
if self.n_components < 1 or self.n_components > p:
raise ValueError('Invalid number of components: %d' %
self.n_components)
if self.algorithm not in ("svd", "nipals"):
raise ValueError("Got algorithm %s when only 'svd' "
"and 'nipals' are known" % self.algorithm)
if self.algorithm == "svd" and self.mode == "B":
raise ValueError('Incompatible configuration: mode B is not '
'implemented with svd algorithm')
if self.deflation_mode not in ["canonical", "regression"]:
raise ValueError('The deflation mode is unknown')
# Scale (in place)
X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
_center_scale_xy(X, Y, self.scale))
# Residuals (deflated) matrices
Xk = X
Yk = Y
# Results matrices
self.x_scores_ = np.zeros((n, self.n_components))
self.y_scores_ = np.zeros((n, self.n_components))
self.x_weights_ = np.zeros((p, self.n_components))
self.y_weights_ = np.zeros((q, self.n_components))
self.x_loadings_ = np.zeros((p, self.n_components))
self.y_loadings_ = np.zeros((q, self.n_components))
self.n_iter_ = []
# NIPALS algo: outer loop, over components
for k in range(self.n_components):
if np.all(np.dot(Yk.T, Yk) < np.finfo(np.double).eps):
# Yk constant
warnings.warn('Y residual constant at iteration %s' % k)
break
# 1) weights estimation (inner loop)
# -----------------------------------
if self.algorithm == "nipals":
x_weights, y_weights, n_iter_ = \
_nipals_twoblocks_inner_loop(
X=Xk, Y=Yk, mode=self.mode, max_iter=self.max_iter,
tol=self.tol, norm_y_weights=self.norm_y_weights)
self.n_iter_.append(n_iter_)
elif self.algorithm == "svd":
x_weights, y_weights = _svd_cross_product(X=Xk, Y=Yk)
# Forces sign stability of x_weights and y_weights
# Sign undeterminacy issue from svd if algorithm == "svd"
# and from platform dependent computation if algorithm == 'nipals'
x_weights, y_weights = svd_flip(x_weights, y_weights.T)
y_weights = y_weights.T
# compute scores
x_scores = np.dot(Xk, x_weights)
if self.norm_y_weights:
y_ss = 1
else:
y_ss = np.dot(y_weights.T, y_weights)
y_scores = np.dot(Yk, y_weights) / y_ss
# test for null variance
if np.dot(x_scores.T, x_scores) < np.finfo(np.double).eps:
warnings.warn('X scores are null at iteration %s' % k)
break
# 2) Deflation (in place)
# ----------------------
# Possible memory footprint reduction may done here: in order to
# avoid the allocation of a data chunk for the rank-one
# approximations matrix which is then subtracted to Xk, we suggest
# to perform a column-wise deflation.
#
# - regress Xk's on x_score
x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
# - subtract rank-one approximations to obtain remainder matrix
Xk -= np.dot(x_scores, x_loadings.T)
if self.deflation_mode == "canonical":
# - regress Yk's on y_score, then subtract rank-one approx.
y_loadings = (np.dot(Yk.T, y_scores)
/ np.dot(y_scores.T, y_scores))
Yk -= np.dot(y_scores, y_loadings.T)
if self.deflation_mode == "regression":
# - regress Yk's on x_score, then subtract rank-one approx.
y_loadings = (np.dot(Yk.T, x_scores)
/ np.dot(x_scores.T, x_scores))
Yk -= np.dot(x_scores, y_loadings.T)
# 3) Store weights, scores and loadings # Notation:
self.x_scores_[:, k] = x_scores.ravel() # T
self.y_scores_[:, k] = y_scores.ravel() # U
self.x_weights_[:, k] = x_weights.ravel() # W
self.y_weights_[:, k] = y_weights.ravel() # C
self.x_loadings_[:, k] = x_loadings.ravel() # P
self.y_loadings_[:, k] = y_loadings.ravel() # Q
# Such that: X = TP' + Err and Y = UQ' + Err
# 4) rotations from input space to transformed space (scores)
# T = X W(P'W)^-1 = XW* (W* : p x k matrix)
# U = Y C(Q'C)^-1 = YC* (W* : q x k matrix)
self.x_rotations_ = np.dot(
self.x_weights_,
linalg.pinv2(np.dot(self.x_loadings_.T, self.x_weights_),
**pinv2_args))
if Y.shape[1] > 1:
self.y_rotations_ = np.dot(
self.y_weights_,
linalg.pinv2(np.dot(self.y_loadings_.T, self.y_weights_),
**pinv2_args))
else:
self.y_rotations_ = np.ones(1)
if True or self.deflation_mode == "regression":
# FIXME what's with the if?
# Estimate regression coefficient
# Regress Y on T
# Y = TQ' + Err,
# Then express in function of X
# Y = X W(P'W)^-1Q' + Err = XB + Err
# => B = W*Q' (p x q)
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
self.coef_ = (1. / self.x_std_.reshape((p, 1)) * self.coef_ *
self.y_std_)
return self
0
Example 39
Project: pyspace Source File: feature_vector.py
def get_data(self, run_nr, split_nr, train_test): # feature_file, storage_format):
""" Loads the data from the feature file of the current input collection
depending on the storage_format.
Separates the actual vectors from the names and returns both as lists.
The method expects the following
**Parameters**
:feature_file: the file of feature vectors to be loaded
:storage_format: One of the first components in
['arff', 'real'], ['csv', 'real'],
['csvUnnamed', 'real'] or .
Format in which the feature_file was saved.
Information need to be present in meta data.
For arff and pickle files docuementation see to the class description
(docstring). Pickle format files do not need any special
loading because they
already have the perfect format.
**CSV**
If no collection meta data is available for the input data,
the 'metadata.yaml' file can be generated with
:mod:`pySPACE.run.node_chain_scripts.md_creator`.
If you created the csv file with pySPACE, you automatically have the
standard *csv* format with the feature names in the first row
and the labels in the last column.
If you have a csv tabular without headings,
you have the *csvUnnamed* format,
and in your 'label_column' column, specified in your spec file,
the labels can be found.
.. note:: main loading concept copied from time series collection
check needed if code can be sent to upper class
"""
## init ##
classes_names = self.meta_data["classes_names"]
s_format = self.meta_data["storage_format"]
if type(s_format) == list:
s_format = s_format[0]
# todo: automatical loading of csv?
delimiter = self.meta_data.get("delimiter", ",")
if not len(delimiter) == 1:
self._log("Wrong delimiter ('%s') given. Using default ','." %
delimiter, level=logging.CRITICAL)
delimiter = ","
# Do lazy loading of the fv objects.
if isinstance(self.data[(run_nr, split_nr, train_test)], basestring):
self._log("Lazy loading of %s feature vectors from input "
"collection for run %s, split %s." % (train_test, run_nr,
split_nr))
if s_format == "pickle":
# Load the data from a pickled file
file = open(self.data[(run_nr, split_nr, train_test)], "r")
self.data[(run_nr, split_nr, train_test)] = cPickle.load(file)
file.close()
sample = self.data[(run_nr, split_nr, train_test)][0][0]
self.update_meta_data({"feature_names":sample.feature_names,
"len_line":len(sample.feature_names)})
elif s_format == "arff":
names = []
data = []
# load file
f = open(self.data[(run_nr, split_nr, train_test)])
data_set = f.readlines()
f.close()
# Read the arff file completely ##
for line in data_set:
if "@attribute class" in line \
or "@relation" in line \
or "@data" in line:
pass
elif "@attribute" in line:
name_line = line.split()
names.append(name_line[1])
else:
data.append(line.split(delimiter))
# the label is expected to be at the end
# of each line in the data.
for line in data:
vector = line[0:-1]
label = line[-1].rstrip("\n\r ") # --> label is string
if not label in classes_names:
classes_names.append(label)
sample = FeatureVector(numpy.atleast_2d([vector]).astype(
numpy.float64), feature_names=names)
self.add_sample(sample=sample, label=label,
train=train_test, split=split_nr,
run=run_nr)
self.update_meta_data({"feature_names": sample.feature_names,
"len_line": len(sample.feature_names),
"classes_names": classes_names})
elif "csv" in s_format: # csv or csv unnamed
# load file
f = open(self.data[(run_nr, split_nr, train_test)])
data_set = f.readlines()
f.close()
# getting rid of all unwanted rows
if "ignored_rows" in self.meta_data:
ignored_rows = self.meta_data["ignored_rows"]
if not type(ignored_rows) == list:
warnings.warn("Wrong format: Ignored rows included!")
ignored_rows = []
ignored_rows.sort()
remove_list = []
for i in ignored_rows:
remove_list.append(data_set[int(i)-1])
for j in remove_list:
data_set.remove(j)
# get len_line and delete heading
feature_names = self.meta_data["feature_names"]
if s_format == "csv":
names = data_set[0].rstrip(",\n").split(delimiter)
data_set.pop(0)
line = data_set[0].split(delimiter)
line[-1] = line[-1].rstrip("\n\r")
if line[-1] == '':
line.pop(-1)
len_line = len(line)
# get and prepare label column numbers (len_line needed)
try:
label_column = self.meta_data["label_column"]
except KeyError:
label_column = -1
# map column numbers to indices by subtracting -1
if type(label_column) == int:
label_columns = [label_column - 1]
elif type(label_column) == list:
label_columns = [int(l)-1 for l in label_column]
for i in range(len(label_columns)):
# map to positive value and undo previous offset
if label_columns[i] < 0:
label_columns[i] = label_columns[i] + len_line+1
if label_columns[i] < 0:
label_columns[i] = -1 + len_line
# very important sorting for index shifts
label_columns.sort()
# calculate unwanted columns
# note: These indices begin with 1 .
# They are internally shifted when used.
if self.meta_data.has_key("ignored_columns"):
ignored_columns = self.meta_data["ignored_columns"]
if not type(ignored_columns) == list:
warnings.warn("Wrong format: Ignored columns included!")
ignored_columns = []
new_ignored_columns = []
for i in ignored_columns:
i = int(i)
if i < 0:
i += len_line
for label_column in label_columns:
if i > label_column:
i -= 1
new_ignored_columns.append(i)
else:
new_ignored_columns = []
new_ignored_columns.sort()
# get all relevant feature_names
if feature_names is None:
if s_format == "csv":
# delete blanks and inverted commas
for i in range(len(names)):
names[i] = names[i].strip(' "')
names[i] = names[i].strip(" '")
feature_names = names
else: #s_format=="csv_unnamed"
feature_names = ["feature_%s" % i for i in
range(len_line)]
# switch label names to the end
i = 0 # reduce index, after previous labels were deleted
for label_column in label_columns:
try:
feature_names.append(feature_names[label_column-i])
del feature_names[label_column-i]
i += 1
except IndexError:
feature_names.append("")
# create new feature names
feature_names = [item for index, item in enumerate(
feature_names) if not index+1 in new_ignored_columns]
for _ in label_columns:
feature_names.pop(-1)
# read the data line by line
for line in data_set:
if not delimiter in line:
warnings.warn("Line without delimiter:\n%s" % str(line))
continue
line = line.split(delimiter)
line[-1] = line[-1].rstrip("\n\r")
if line[-1] == '':
line.pop(-1)
label = []
i = 0
for label_column in label_columns:
label.append(line.pop(label_column-i))
i += 1
if type(label) == str:
label = label.strip(' "')
label = label.strip(" '")
if label not in classes_names:
classes_names.append(label)
# create new line without the ignored columns
vector = [item for index,item in enumerate(line) if not
index+1 in new_ignored_columns]
sample = FeatureVector(numpy.atleast_2d([vector]).astype(
numpy.float64), feature_names=feature_names)
if len(label) == 1:
label = label[-1]
self.add_sample(sample=sample, label=label,
train=train_test, split=split_nr,
run=run_nr)
self.update_meta_data({"feature_names": sample.feature_names,
"num_features": len(sample.feature_names),
"classes_names": classes_names})
return self.data[(run_nr, split_nr, train_test)]
0
Example 40
def optimizer(xo, function, gradient, hessian, kwargs):
"""Calls the appropriate nonlinear convex optimization solver
in the package `cvxopt` to find optimal values for the relevant
parameters, given subroutines that evaluate a function,
its gradient, and hessian, this subroutine
Arguments
function : function object
evaluates the function at the specified parameter values
gradient : function object
evaluates the gradient of the function
hessian : function object
evaluates the hessian of the function
"""
def F(x=None, z=None):
"""A subroutine that the cvxopt package can call to get
values of the function, gradient and hessian during
optimization.
"""
if x is None:
return 0, cvx.matrix(x_init)
xx = np.array(x).ravel().astype(np.float64)
# compute likelihood function
f = function(xx, kwargs)
if np.isnan(f) or np.isinf(f):
f = np.array([np.finfo('float32').max]).astype('float')
else:
f = np.array([f]).astype('float')
# compute gradient
Df = gradient(xx, kwargs)
if np.isnan(Df).any() or np.isinf(Df).any():
Df = -1 * np.finfo('float32').max * np.ones((1,xx.size), dtype=float)
else:
Df = Df.reshape(1,xx.size)
if z is None:
return cvx.matrix(f), cvx.matrix(Df)
# compute hessian
hess = hessian(xx, kwargs)
Hf = z[0] * hess
return cvx.matrix(f), cvx.matrix(Df), cvx.matrix(Hf)
# warm start for the optimization
V = xo.size
x_init = xo.reshape(V,1)
# call the optimization subroutine in cvxopt
if kwargs.has_key('G'):
# call a constrained nonlinear solver
solution = solvers.cp(F, G=cvx.matrix(kwargs['G']), h=cvx.matrix(kwargs['h']))
else:
# call an unconstrained nonlinear solver
solution = solvers.cp(F)
x_final = np.array(solution['x']).ravel()
return x_final
0
Example 41
def sag_solver(X, y, sample_weight=None, loss='log', alpha=1.,
max_iter=1000, tol=0.001, verbose=0, random_state=None,
check_input=True, max_squared_sum=None,
warm_start_mem=None):
"""SAG solver for Ridge and LogisticRegression
SAG stands for Stochastic Average Gradient: the gradient of the loss is
estimated each sample at a time and the model is updated along the way with
a constant learning rate.
IMPORTANT NOTE: 'sag' solver converges faster on columns that are on the
same scale. You can normalize the data by using
sklearn.preprocessing.StandardScaler on your data before passing it to the
fit method.
This implementation works with data represented as dense numpy arrays or
sparse scipy arrays of floating point values for the features. It will
fit the data according to squared loss or log loss.
The regularizer is a penalty added to the loss function that shrinks model
parameters towards the zero vector using the squared euclidean norm L2.
.. versionadded:: 0.17
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data
y : numpy array, shape (n_samples,)
Target values. With loss='multinomial', y must be label encoded
(see preprocessing.LabelEncoder).
sample_weight : array-like, shape (n_samples,), optional
Weights applied to individual samples (1. for unweighted).
loss : 'log' | 'squared' | 'multinomial'
Loss function that will be optimized:
-'log' is the binary logistic loss, as used in LogisticRegression.
-'squared' is the squared loss, as used in Ridge.
-'multinomial' is the multinomial logistic loss, as used in
LogisticRegression.
.. versionadded:: 0.18
*loss='multinomial'*
alpha : float, optional
Constant that multiplies the regularization term. Defaults to 1.
max_iter: int, optional
The max number of passes over the training data if the stopping
criteria is not reached. Defaults to 1000.
tol: double, optional
The stopping criteria for the weights. The iterations will stop when
max(change in weights) / max(weights) < tol. Defaults to .001
verbose: integer, optional
The verbosity level.
random_state : int seed, RandomState instance, or None (default)
The seed of the pseudo random number generator to use when
shuffling the data.
check_input : bool, default True
If False, the input arrays X and y will not be checked.
max_squared_sum : float, default None
Maximum squared sum of X over samples. If None, it will be computed,
going through all the samples. The value should be precomputed
to speed up cross validation.
warm_start_mem: dict, optional
The initialization parameters used for warm starting. Warm starting is
currently used in LogisticRegression but not in Ridge.
It contains:
- 'coef': the weight vector, with the intercept in last line
if the intercept is fitted.
- 'gradient_memory': the scalar gradient for all seen samples.
- 'sum_gradient': the sum of gradient over all seen samples,
for each feature.
- 'intercept_sum_gradient': the sum of gradient over all seen
samples, for the intercept.
- 'seen': array of boolean describing the seen samples.
- 'num_seen': the number of seen samples.
Returns
-------
coef_ : array, shape (n_features)
Weight vector.
n_iter_ : int
The number of full pass on all samples.
warm_start_mem : dict
Contains a 'coef' key with the fitted result, and possibly the
fitted intercept at the end of the array. Contains also other keys
used for warm starting.
Examples
--------
>>> import numpy as np
>>> from sklearn import linear_model
>>> n_samples, n_features = 10, 5
>>> np.random.seed(0)
>>> X = np.random.randn(n_samples, n_features)
>>> y = np.random.randn(n_samples)
>>> clf = linear_model.Ridge(solver='sag')
>>> clf.fit(X, y)
... #doctest: +NORMALIZE_WHITESPACE
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
normalize=False, random_state=None, solver='sag', tol=0.001)
>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
>>> y = np.array([1, 1, 2, 2])
>>> clf = linear_model.LogisticRegression(solver='sag')
>>> clf.fit(X, y)
... #doctest: +NORMALIZE_WHITESPACE
LogisticRegression(C=1.0, class_weight=None, dual=False,
fit_intercept=True, intercept_scaling=1, max_iter=100,
multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
solver='sag', tol=0.0001, verbose=0, warm_start=False)
References
----------
Schmidt, M., Roux, N. L., & Bach, F. (2013).
Minimizing finite sums with the stochastic average gradient
https://hal.inria.fr/hal-00860051/docuement
See also
--------
Ridge, SGDRegressor, ElasticNet, Lasso, SVR, and
LogisticRegression, SGDClassifier, LinearSVC, Perceptron
"""
if warm_start_mem is None:
warm_start_mem = {}
# Ridge default max_iter is None
if max_iter is None:
max_iter = 1000
if check_input:
X = check_array(X, dtype=np.float64, accept_sparse='csr', order='C')
y = check_array(y, dtype=np.float64, ensure_2d=False, order='C')
n_samples, n_features = X.shape[0], X.shape[1]
# As in SGD, the alpha is scaled by n_samples.
alpha_scaled = float(alpha) / n_samples
# if loss == 'multinomial', y should be label encoded.
n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1
# initialization
if sample_weight is None:
sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
if 'coef' in warm_start_mem.keys():
coef_init = warm_start_mem['coef']
else:
# assume fit_intercept is False
coef_init = np.zeros((n_features, n_classes), dtype=np.float64,
order='C')
# coef_init contains possibly the intercept_init at the end.
# Note that Ridge centers the data before fitting, so fit_intercept=False.
fit_intercept = coef_init.shape[0] == (n_features + 1)
if fit_intercept:
intercept_init = coef_init[-1, :]
coef_init = coef_init[:-1, :]
else:
intercept_init = np.zeros(n_classes, dtype=np.float64)
if 'intercept_sum_gradient' in warm_start_mem.keys():
intercept_sum_gradient = warm_start_mem['intercept_sum_gradient']
else:
intercept_sum_gradient = np.zeros(n_classes, dtype=np.float64)
if 'gradient_memory' in warm_start_mem.keys():
gradient_memory_init = warm_start_mem['gradient_memory']
else:
gradient_memory_init = np.zeros((n_samples, n_classes),
dtype=np.float64, order='C')
if 'sum_gradient' in warm_start_mem.keys():
sum_gradient_init = warm_start_mem['sum_gradient']
else:
sum_gradient_init = np.zeros((n_features, n_classes),
dtype=np.float64, order='C')
if 'seen' in warm_start_mem.keys():
seen_init = warm_start_mem['seen']
else:
seen_init = np.zeros(n_samples, dtype=np.int32, order='C')
if 'num_seen' in warm_start_mem.keys():
num_seen_init = warm_start_mem['num_seen']
else:
num_seen_init = 0
dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)
if max_squared_sum is None:
max_squared_sum = row_norms(X, squared=True).max()
step_size = get_auto_step_size(max_squared_sum, alpha_scaled, loss,
fit_intercept)
if step_size * alpha_scaled == 1:
raise ZeroDivisionError("Current sag implementation does not handle "
"the case step_size * alpha_scaled == 1")
num_seen, n_iter_ = sag(dataset, coef_init,
intercept_init, n_samples,
n_features, n_classes, tol,
max_iter,
loss,
step_size, alpha_scaled,
sum_gradient_init,
gradient_memory_init,
seen_init,
num_seen_init,
fit_intercept,
intercept_sum_gradient,
intercept_decay,
verbose)
if n_iter_ == max_iter:
warnings.warn("The max_iter was reached which means "
"the coef_ did not converge", ConvergenceWarning)
if fit_intercept:
coef_init = np.vstack((coef_init, intercept_init))
warm_start_mem = {'coef': coef_init, 'sum_gradient': sum_gradient_init,
'intercept_sum_gradient': intercept_sum_gradient,
'gradient_memory': gradient_memory_init,
'seen': seen_init, 'num_seen': num_seen}
if loss == 'multinomial':
coef_ = coef_init.T
else:
coef_ = coef_init[:, 0]
return coef_, n_iter_, warm_start_mem
0
Example 42
Project: scikit-learn Source File: locally_linear.py
def locally_linear_embedding(
X, n_neighbors, n_components, reg=1e-3, eigen_solver='auto', tol=1e-6,
max_iter=100, method='standard', hessian_tol=1E-4, modified_tol=1E-12,
random_state=None, n_jobs=1):
"""Perform a Locally Linear Embedding analysis on the data.
Read more in the :ref:`User Guide <locally_linear_embedding>`.
Parameters
----------
X : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}
Sample data, shape = (n_samples, n_features), in the form of a
numpy array, sparse array, precomputed tree, or NearestNeighbors
object.
n_neighbors : integer
number of neighbors to consider for each point.
n_components : integer
number of coordinates for the manifold.
reg : float
regularization constant, multiplies the trace of the local covariance
matrix of the distances.
eigen_solver : string, {'auto', 'arpack', 'dense'}
auto : algorithm will attempt to choose the best method for input data
arpack : use arnoldi iteration in shift-invert mode.
For this method, M may be a dense matrix, sparse matrix,
or general linear operator.
Warning: ARPACK can be unstable for some problems. It is
best to try several random seeds in order to check results.
dense : use standard dense matrix operations for the eigenvalue
decomposition. For this method, M must be an array
or matrix type. This method should be avoided for
large problems.
tol : float, optional
Tolerance for 'arpack' method
Not used if eigen_solver=='dense'.
max_iter : integer
maximum number of iterations for the arpack solver.
method : {'standard', 'hessian', 'modified', 'ltsa'}
standard : use the standard locally linear embedding algorithm.
see reference [1]_
hessian : use the Hessian eigenmap method. This method requires
n_neighbors > n_components * (1 + (n_components + 1) / 2.
see reference [2]_
modified : use the modified locally linear embedding algorithm.
see reference [3]_
ltsa : use local tangent space alignment algorithm
see reference [4]_
hessian_tol : float, optional
Tolerance for Hessian eigenmapping method.
Only used if method == 'hessian'
modified_tol : float, optional
Tolerance for modified LLE method.
Only used if method == 'modified'
random_state: numpy.RandomState or int, optional
The generator or seed used to determine the starting vector for arpack
iterations. Defaults to numpy.random.
n_jobs : int, optional (default = 1)
The number of parallel jobs to run for neighbors search.
If ``-1``, then the number of jobs is set to the number of CPU cores.
Returns
-------
Y : array-like, shape [n_samples, n_components]
Embedding vectors.
squared_error : float
Reconstruction error for the embedding vectors. Equivalent to
``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.
References
----------
.. [1] `Roweis, S. & Saul, L. Nonlinear dimensionality reduction
by locally linear embedding. Science 290:2323 (2000).`
.. [2] `Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
linear embedding techniques for high-dimensional data.
Proc Natl Acad Sci U S A. 100:5591 (2003).`
.. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
Embedding Using Multiple Weights.`
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
.. [4] `Zhang, Z. & Zha, H. Principal manifolds and nonlinear
dimensionality reduction via tangent space alignment.
Journal of Shanghai Univ. 8:406 (2004)`
"""
if eigen_solver not in ('auto', 'arpack', 'dense'):
raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)
if method not in ('standard', 'hessian', 'modified', 'ltsa'):
raise ValueError("unrecognized method '%s'" % method)
nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
nbrs.fit(X)
X = nbrs._fit_X
N, d_in = X.shape
if n_components > d_in:
raise ValueError("output dimension must be less than or equal "
"to input dimension")
if n_neighbors >= N:
raise ValueError("n_neighbors must be less than number of points")
if n_neighbors <= 0:
raise ValueError("n_neighbors must be positive")
M_sparse = (eigen_solver != 'dense')
if method == 'standard':
W = barycenter_kneighbors_graph(
nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs)
# we'll compute M = (I-W)'(I-W)
# depending on the solver, we'll do this differently
if M_sparse:
M = eye(*W.shape, format=W.format) - W
M = (M.T * M).tocsr()
else:
M = (W.T * W - W.T - W).toarray()
M.flat[::M.shape[0] + 1] += 1 # W = W - I = W - I
elif method == 'hessian':
dp = n_components * (n_components + 1) // 2
if n_neighbors <= n_components + dp:
raise ValueError("for method='hessian', n_neighbors must be "
"greater than "
"[n_components * (n_components + 3) / 2]")
neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
return_distance=False)
neighbors = neighbors[:, 1:]
Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
Yi[:, 0] = 1
M = np.zeros((N, N), dtype=np.float64)
use_svd = (n_neighbors > d_in)
for i in range(N):
Gi = X[neighbors[i]]
Gi -= Gi.mean(0)
# build Hessian estimator
if use_svd:
U = svd(Gi, full_matrices=0)[0]
else:
Ci = np.dot(Gi, Gi.T)
U = eigh(Ci)[1][:, ::-1]
Yi[:, 1:1 + n_components] = U[:, :n_components]
j = 1 + n_components
for k in range(n_components):
Yi[:, j:j + n_components - k] = (U[:, k:k + 1] *
U[:, k:n_components])
j += n_components - k
Q, R = qr(Yi)
w = Q[:, n_components + 1:]
S = w.sum(0)
S[np.where(abs(S) < hessian_tol)] = 1
w /= S
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
M[nbrs_x, nbrs_y] += np.dot(w, w.T)
if M_sparse:
M = csr_matrix(M)
elif method == 'modified':
if n_neighbors < n_components:
raise ValueError("modified LLE requires "
"n_neighbors >= n_components")
neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
return_distance=False)
neighbors = neighbors[:, 1:]
# find the eigenvectors and eigenvalues of each local covariance
# matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,
# where the columns are eigenvectors
V = np.zeros((N, n_neighbors, n_neighbors))
nev = min(d_in, n_neighbors)
evals = np.zeros([N, nev])
# choose the most efficient way to find the eigenvectors
use_svd = (n_neighbors > d_in)
if use_svd:
for i in range(N):
X_nbrs = X[neighbors[i]] - X[i]
V[i], evals[i], _ = svd(X_nbrs,
full_matrices=True)
evals **= 2
else:
for i in range(N):
X_nbrs = X[neighbors[i]] - X[i]
C_nbrs = np.dot(X_nbrs, X_nbrs.T)
evi, vi = eigh(C_nbrs)
evals[i] = evi[::-1]
V[i] = vi[:, ::-1]
# find regularized weights: this is like normal LLE.
# because we've already computed the SVD of each covariance matrix,
# it's faster to use this rather than np.linalg.solve
reg = 1E-3 * evals.sum(1)
tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))
tmp[:, :nev] /= evals + reg[:, None]
tmp[:, nev:] /= reg[:, None]
w_reg = np.zeros((N, n_neighbors))
for i in range(N):
w_reg[i] = np.dot(V[i], tmp[i])
w_reg /= w_reg.sum(1)[:, None]
# calculate eta: the median of the ratio of small to large eigenvalues
# across the points. This is used to determine s_i, below
rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)
eta = np.median(rho)
# find s_i, the size of the "almost null space" for each point:
# this is the size of the largest set of eigenvalues
# such that Sum[v; v in set]/Sum[v; v not in set] < eta
s_range = np.zeros(N, dtype=int)
evals_cuemsum = stable_cuemsum(evals, 1)
eta_range = evals_cuemsum[:, -1:] / evals_cuemsum[:, :-1] - 1
for i in range(N):
s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)
s_range += n_neighbors - nev # number of zero eigenvalues
# Now calculate M.
# This is the [N x N] matrix whose null space is the desired embedding
M = np.zeros((N, N), dtype=np.float64)
for i in range(N):
s_i = s_range[i]
# select bottom s_i eigenvectors and calculate alpha
Vi = V[i, :, n_neighbors - s_i:]
alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)
# compute Householder matrix which satisfies
# Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)
# using prescription from paper
h = alpha_i * np.ones(s_i) - np.dot(Vi.T, np.ones(n_neighbors))
norm_h = np.linalg.norm(h)
if norm_h < modified_tol:
h *= 0
else:
h /= norm_h
# Householder matrix is
# >> Hi = np.identity(s_i) - 2*np.outer(h,h)
# Then the weight matrix is
# >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]
# We do this much more efficiently:
Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) +
(1 - alpha_i) * w_reg[i, :, None])
# Update M as follows:
# >> W_hat = np.zeros( (N,s_i) )
# >> W_hat[neighbors[i],:] = Wi
# >> W_hat[i] -= 1
# >> M += np.dot(W_hat,W_hat.T)
# We can do this much more efficiently:
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
Wi_sum1 = Wi.sum(1)
M[i, neighbors[i]] -= Wi_sum1
M[neighbors[i], i] -= Wi_sum1
M[i, i] += s_i
if M_sparse:
M = csr_matrix(M)
elif method == 'ltsa':
neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
return_distance=False)
neighbors = neighbors[:, 1:]
M = np.zeros((N, N))
use_svd = (n_neighbors > d_in)
for i in range(N):
Xi = X[neighbors[i]]
Xi -= Xi.mean(0)
# compute n_components largest eigenvalues of Xi * Xi^T
if use_svd:
v = svd(Xi, full_matrices=True)[0]
else:
Ci = np.dot(Xi, Xi.T)
v = eigh(Ci)[1][:, ::-1]
Gi = np.zeros((n_neighbors, n_components + 1))
Gi[:, 1:] = v[:, :n_components]
Gi[:, 0] = 1. / np.sqrt(n_neighbors)
GiGiT = np.dot(Gi, Gi.T)
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
M[nbrs_x, nbrs_y] -= GiGiT
M[neighbors[i], neighbors[i]] += 1
return null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver,
tol=tol, max_iter=max_iter, random_state=random_state)
0
Example 43
Project: RMG-Py Source File: database.py
def getForwardReactionForFamilyEntry(self, entry, family, thermoDatabase):
"""
For a given `entry` for a reaction of the given reaction `family` (the
string label of the family), return the reaction with kinetics and
degeneracy for the "forward" direction as defined by the reaction
family. For families that are their own reverse, the direction the
kinetics is given in will be preserved. If the entry contains
functional groups for the reactants, assume that it is given in the
forward direction and do nothing. Returns the reaction in the direction
consistent with the reaction family template, and the matching template.
Note that the returned reaction will have its kinetics and degeneracy
set appropriately.
In order to reverse the reactions that are given in the reverse of the
direction the family is defined, we need to compute the thermodynamics
of the reactants and products. For this reason you must also pass
the `thermoDatabase` to use to generate the thermo data.
"""
def matchSpeciesToMolecules(species, molecules):
if len(species) == len(molecules) == 1:
return species[0].isIsomorphic(molecules[0])
elif len(species) == len(molecules) == 2:
if species[0].isIsomorphic(molecules[0]) and species[1].isIsomorphic(molecules[1]):
return True
elif species[0].isIsomorphic(molecules[1]) and species[1].isIsomorphic(molecules[0]):
return True
return False
reaction = None; template = None
# Get the indicated reaction family
try:
groups = self.families[family].groups
except KeyError:
raise ValueError('Invalid value "{0}" for family parameter.'.format(family))
if all([(isinstance(reactant, Group) or isinstance(reactant, LogicNode)) for reactant in entry.item.reactants]):
# The entry is a rate rule, containing functional groups only
# By convention, these are always given in the forward direction and
# have kinetics defined on a per-site basis
reaction = Reaction(
reactants = entry.item.reactants[:],
products = [],
kinetics = entry.data,
degeneracy = 1,
)
template = [groups.entries[label] for label in entry.label.split(';')]
elif (all([isinstance(reactant, Molecule) for reactant in entry.item.reactants]) and
all([isinstance(product, Molecule) for product in entry.item.products])):
# The entry is a real reaction, containing molecules
# These could be defined for either the forward or reverse direction
# and could have a reaction-path degeneracy
reaction = Reaction(reactants=[], products=[])
for molecule in entry.item.reactants:
reactant = Species(molecule=[molecule])
reactant.generateResonanceIsomers()
reactant.thermo = thermoDatabase.getThermoData(reactant)
reaction.reactants.append(reactant)
for molecule in entry.item.products:
product = Species(molecule=[molecule])
product.generateResonanceIsomers()
product.thermo = thermoDatabase.getThermoData(product)
reaction.products.append(product)
# Generate all possible reactions involving the reactant species
generatedReactions = self.generateReactionsFromFamilies([reactant.molecule for reactant in reaction.reactants], [], only_families=[family])
# Remove from that set any reactions that don't produce the desired reactants and products
forward = []; reverse = []
for rxn in generatedReactions:
if matchSpeciesToMolecules(reaction.reactants, rxn.reactants) and matchSpeciesToMolecules(reaction.products, rxn.products):
forward.append(rxn)
if matchSpeciesToMolecules(reaction.reactants, rxn.products) and matchSpeciesToMolecules(reaction.products, rxn.reactants):
reverse.append(rxn)
# We should now know whether the reaction is given in the forward or
# reverse direction
if len(forward) == 1 and len(reverse) == 0:
# The reaction is in the forward direction, so use as-is
reaction = forward[0]
template = reaction.template
# Don't forget to overwrite the estimated kinetics from the database with the kinetics for this entry
reaction.kinetics = entry.data
elif len(reverse) == 1 and len(forward) == 0:
# The reaction is in the reverse direction
# First fit Arrhenius kinetics in that direction
Tdata = 1000.0 / numpy.arange(0.5, 3.301, 0.1, numpy.float64)
kdata = numpy.zeros_like(Tdata)
for i in range(Tdata.shape[0]):
kdata[i] = entry.data.getRateCoefficient(Tdata[i]) / reaction.getEquilibriumConstant(Tdata[i])
kunits = 'm^3/(mol*s)' if len(reverse[0].reactants) == 2 else 's^-1'
kinetics = Arrhenius().fitToData(Tdata, kdata, kunits, T0=1.0)
kinetics.Tmin = entry.data.Tmin
kinetics.Tmax = entry.data.Tmax
kinetics.Pmin = entry.data.Pmin
kinetics.Pmax = entry.data.Pmax
# Now flip the direction
reaction = reverse[0]
reaction.kinetics = kinetics
template = reaction.template
elif len(reverse) > 0 and len(forward) > 0:
print 'FAIL: Multiple reactions found for {0!r}.'.format(entry.label)
elif len(reverse) == 0 and len(forward) == 0:
print 'FAIL: No reactions found for "%s".' % (entry.label)
else:
print 'FAIL: Unable to estimate kinetics for {0!r}.'.format(entry.label)
assert reaction is not None
assert template is not None
return reaction, template
0
Example 44
def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
random_state=None, eigen_tol=0.0,
norm_laplacian=True, drop_first=True):
"""Project the sample on the first eigenvectors of the graph Laplacian.
The adjacency matrix is used to compute a normalized graph Laplacian
whose spectrum (especially the eigenvectors associated to the
smallest eigenvalues) has an interpretation in terms of minimal
number of cuts necessary to split the graph into comparably sized
components.
This embedding can also 'work' even if the ``adjacency`` variable is
not strictly the adjacency matrix of a graph but more generally
an affinity or similarity matrix between samples (for instance the
heat kernel of a euclidean distance matrix or a k-NN matrix).
However care must taken to always make the affinity matrix symmetric
so that the eigenvector decomposition works as expected.
Read more in the :ref:`User Guide <spectral_embedding>`.
Parameters
----------
adjacency : array-like or sparse matrix, shape: (n_samples, n_samples)
The adjacency matrix of the graph to embed.
n_components : integer, optional, default 8
The dimension of the projection subspace.
eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}, default None
The eigenvalue decomposition strategy to use. AMG requires pyamg
to be installed. It can be faster on very large, sparse problems,
but may also lead to instabilities.
random_state : int seed, RandomState instance, or None (default)
A pseudo random number generator used for the initialization of the
lobpcg eigenvectors decomposition when eigen_solver == 'amg'.
By default, arpack is used.
eigen_tol : float, optional, default=0.0
Stopping criterion for eigendecomposition of the Laplacian matrix
when using arpack eigen_solver.
drop_first : bool, optional, default=True
Whether to drop the first eigenvector. For spectral embedding, this
should be True as the first eigenvector should be constant vector for
connected graph, but for spectral clustering, this should be kept as
False to retain the first eigenvector.
norm_laplacian : bool, optional, default=True
If True, then compute normalized Laplacian.
Returns
-------
embedding : array, shape=(n_samples, n_components)
The reduced samples.
Notes
-----
Spectral embedding is most useful when the graph has one connected
component. If there graph has many components, the first few eigenvectors
will simply uncover the connected components of the graph.
References
----------
* https://en.wikipedia.org/wiki/LOBPCG
* Toward the Optimal Preconditioned Eigensolver: Locally Optimal
Block Preconditioned Conjugate Gradient Method
Andrew V. Knyazev
http://dx.doi.org/10.1137%2FS1064827500366124
"""
adjacency = check_symmetric(adjacency)
try:
from pyamg import smoothed_aggregation_solver
except ImportError:
if eigen_solver == "amg":
raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
"not available.")
if eigen_solver is None:
eigen_solver = 'arpack'
elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
raise ValueError("Unknown value for eigen_solver: '%s'."
"Should be 'amg', 'arpack', or 'lobpcg'"
% eigen_solver)
random_state = check_random_state(random_state)
n_nodes = adjacency.shape[0]
# Whether to drop the first eigenvector
if drop_first:
n_components = n_components + 1
if not _graph_is_connected(adjacency):
warnings.warn("Graph is not fully connected, spectral embedding"
" may not work as expected.")
laplacian, dd = graph_laplacian(adjacency,
normed=norm_laplacian, return_diag=True)
if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
(not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
# lobpcg used with eigen_solver='amg' has bugs for low number of nodes
# for details see the source code in scipy:
# https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
# /lobpcg/lobpcg.py#L237
# or matlab:
# http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
laplacian = _set_diag(laplacian, 1, norm_laplacian)
# Here we'll use shift-invert mode for fast eigenvalues
# (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
# for a short explanation of what this means)
# Because the normalized Laplacian has eigenvalues between 0 and 2,
# I - L has eigenvalues between -1 and 1. ARPACK is most efficient
# when finding eigenvalues of largest magnitude (keyword which='LM')
# and when these eigenvalues are very large compared to the rest.
# For very large, very sparse graphs, I - L can have many, many
# eigenvalues very near 1.0. This leads to slow convergence. So
# instead, we'll use ARPACK's shift-invert mode, asking for the
# eigenvalues near 1.0. This effectively spreads-out the spectrum
# near 1.0 and leads to much faster convergence: potentially an
# orders-of-magnitude speedup over simply using keyword which='LA'
# in standard mode.
try:
# We are computing the opposite of the laplacian inplace so as
# to spare a memory allocation of a possibly very large array
laplacian *= -1
v0 = random_state.uniform(-1, 1, laplacian.shape[0])
lambdas, diffusion_map = eigsh(laplacian, k=n_components,
sigma=1.0, which='LM',
tol=eigen_tol, v0=v0)
embedding = diffusion_map.T[n_components::-1] * dd
except RuntimeError:
# When submatrices are exactly singular, an LU decomposition
# in arpack fails. We fallback to lobpcg
eigen_solver = "lobpcg"
# Revert the laplacian to its opposite to have lobpcg work
laplacian *= -1
if eigen_solver == 'amg':
# Use AMG to get a preconditioner and speed up the eigenvalue
# problem.
if not sparse.issparse(laplacian):
warnings.warn("AMG works better for sparse matrices")
# lobpcg needs double precision floats
laplacian = check_array(laplacian, dtype=np.float64,
accept_sparse=True)
laplacian = _set_diag(laplacian, 1, norm_laplacian)
ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
M = ml.aspreconditioner()
X = random_state.rand(laplacian.shape[0], n_components + 1)
X[:, 0] = dd.ravel()
lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
largest=False)
embedding = diffusion_map.T * dd
if embedding.shape[0] == 1:
raise ValueError
elif eigen_solver == "lobpcg":
# lobpcg needs double precision floats
laplacian = check_array(laplacian, dtype=np.float64,
accept_sparse=True)
if n_nodes < 5 * n_components + 1:
# see note above under arpack why lobpcg has problems with small
# number of nodes
# lobpcg will fallback to eigh, so we short circuit it
if sparse.isspmatrix(laplacian):
laplacian = laplacian.toarray()
lambdas, diffusion_map = eigh(laplacian)
embedding = diffusion_map.T[:n_components] * dd
else:
laplacian = _set_diag(laplacian, 1, norm_laplacian)
# We increase the number of eigenvectors requested, as lobpcg
# doesn't behave well in low dimension
X = random_state.rand(laplacian.shape[0], n_components + 1)
X[:, 0] = dd.ravel()
lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
largest=False, maxiter=2000)
embedding = diffusion_map.T[:n_components] * dd
if embedding.shape[0] == 1:
raise ValueError
embedding = _deterministic_vector_sign_flip(embedding)
if drop_first:
return embedding[1:n_components].T
else:
return embedding[:n_components].T
0
Example 45
def _fit(self, X, y=None, do_prediction=False):
"""Estimate model parameters with the EM algorithm.
A initialization step is performed before entering the
expectation-maximization (EM) algorithm. If you want to avoid
this step, set the keyword argument init_params to the empty
string '' when creating the GMM object. Likewise, if you would
like just to do an initialization, set n_iter=0.
Parameters
----------
X : array_like, shape (n, n_features)
List of n_features-dimensional data points. Each row
corresponds to a single data point.
Returns
-------
responsibilities : array, shape (n_samples, n_components)
Posterior probabilities of each mixture component for each
observation.
"""
# initialization step
X = check_array(X, dtype=np.float64, ensure_min_samples=2,
estimator=self)
if X.shape[0] < self.n_components:
raise ValueError(
'GMM estimation with %s components, but got only %s samples' %
(self.n_components, X.shape[0]))
max_log_prob = -np.infty
if self.verbose > 0:
print('Expectation-maximization algorithm started.')
for init in range(self.n_init):
if self.verbose > 0:
print('Initialization ' + str(init + 1))
start_init_time = time()
if 'm' in self.init_params or not hasattr(self, 'means_'):
self.means_ = cluster.KMeans(
n_clusters=self.n_components,
random_state=self.random_state).fit(X).cluster_centers_
if self.verbose > 1:
print('\tMeans have been initialized.')
if 'w' in self.init_params or not hasattr(self, 'weights_'):
self.weights_ = np.tile(1.0 / self.n_components,
self.n_components)
if self.verbose > 1:
print('\tWeights have been initialized.')
if 'c' in self.init_params or not hasattr(self, 'covars_'):
cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
if not cv.shape:
cv.shape = (1, 1)
self.covars_ = \
distribute_covar_matrix_to_match_covariance_type(
cv, self.covariance_type, self.n_components)
if self.verbose > 1:
print('\tCovariance matrices have been initialized.')
# EM algorithms
current_log_likelihood = None
# reset self.converged_ to False
self.converged_ = False
for i in range(self.n_iter):
if self.verbose > 0:
print('\tEM iteration ' + str(i + 1))
start_iter_time = time()
prev_log_likelihood = current_log_likelihood
# Expectation step
log_likelihoods, responsibilities = self.score_samples(X)
current_log_likelihood = log_likelihoods.mean()
# Check for convergence.
if prev_log_likelihood is not None:
change = abs(current_log_likelihood - prev_log_likelihood)
if self.verbose > 1:
print('\t\tChange: ' + str(change))
if change < self.tol:
self.converged_ = True
if self.verbose > 0:
print('\t\tEM algorithm converged.')
break
# Maximization step
self._do_mstep(X, responsibilities, self.params,
self.min_covar)
if self.verbose > 1:
print('\t\tEM iteration ' + str(i + 1) + ' took {0:.5f}s'.format(
time() - start_iter_time))
# if the results are better, keep it
if self.n_iter:
if current_log_likelihood > max_log_prob:
max_log_prob = current_log_likelihood
best_params = {'weights': self.weights_,
'means': self.means_,
'covars': self.covars_}
if self.verbose > 1:
print('\tBetter parameters were found.')
if self.verbose > 1:
print('\tInitialization ' + str(init + 1) + ' took {0:.5f}s'.format(
time() - start_init_time))
# check the existence of an init param that was not subject to
# likelihood computation issue.
if np.isneginf(max_log_prob) and self.n_iter:
raise RuntimeError(
"EM algorithm was never able to compute a valid likelihood " +
"given initial parameters. Try different init parameters " +
"(or increasing n_init) or check for degenerate data.")
if self.n_iter:
self.covars_ = best_params['covars']
self.means_ = best_params['means']
self.weights_ = best_params['weights']
else: # self.n_iter == 0 occurs when using GMM within HMM
# Need to make sure that there are responsibilities to output
# Output zeros because it was just a quick initialization
responsibilities = np.zeros((X.shape[0], self.n_components))
return responsibilities
0
Example 46
Project: ncpol2sdpa Source File: picos_utils.py
def convert_to_picos(sdp, duplicate_moment_matrix=False):
"""Convert an SDP relaxation to a PICOS problem such that the exported
.dat-s file is extremely sparse, there is not penalty imposed in terms of
SDP variables or number of constraints. This conversion can be used for
imposing extra constraints on the moment matrix, such as partial transpose.
:param sdp: The SDP relaxation to convert.
:type sdp: :class:`ncpol2sdpa.sdp`.
:param duplicate_moment_matrix: Optional parameter to add an unconstrained
moment matrix to the problem with the same
structure as the moment matrix with the PSD
constraint.
:type duplicate_moment_matrix: bool.
:returns: :class:`picos.Problem`.
"""
import picos as pic
import cvxopt as cvx
P = pic.Problem(verbose=sdp.verbose)
block_size = sdp.block_struct[0]
if sdp.F.dtype == np.float64:
X = P.add_variable('X', (block_size, block_size), vtype="symmetric")
if duplicate_moment_matrix:
Y = P.add_variable('Y', (block_size, block_size), vtype="symmetric")
else:
X = P.add_variable('X', (block_size, block_size), vtype="hermitian")
if duplicate_moment_matrix:
Y = P.add_variable('X', (block_size, block_size), vtype="hermitian")
row_offset = 0
theoretical_n_vars = sdp.block_struct[0]**2
eq_block_start = sdp.constraint_starting_block+sdp._n_inequalities
block_idx = 0
while (block_idx < len(sdp.block_struct)):
block_size = sdp.block_struct[block_idx]
x, Ix, Jx = [], [], []
c, Ic, Jc = [], [], []
for i, row in enumerate(sdp.F.rows[row_offset:row_offset +
block_size**2]):
for j, column in enumerate(row):
if column > 0:
x.append(sdp.F.data[row_offset+i][j])
Ix.append(i)
Jx.append(column-1)
i0 = (i//block_size)+(i % block_size)*block_size
if i != i0:
x.append(sdp.F.data[row_offset+i][j])
Ix.append(i0)
Jx.append(column-1)
else:
c.append(sdp.F.data[row_offset+i][j])
Ic.append(i%block_size)
Jc.append(i//block_size)
permutation = cvx.spmatrix(x, Ix, Jx, (block_size**2,
theoretical_n_vars))
constant = cvx.spmatrix(c, Ic, Jc, (block_size, block_size))
if duplicate_moment_matrix:
constraint = X
else:
constraint = X.copy()
for k in constraint.factors:
constraint.factors[k] = permutation
constraint._size = (block_size, block_size)
if block_idx < eq_block_start:
P.add_constraint(constant + constraint >> 0)
else:
P.add_constraint(constant + constraint == 0)
row_offset += block_size**2
block_idx += 1
if duplicate_moment_matrix and \
block_size == sdp.block_struct[0]:
for k in Y.factors:
Y.factors[k] = permutation
row_offset += block_size**2
block_idx += 1
if len(np.nonzero(sdp.obj_facvar)[0]) > 0:
x, Ix, Jx = [], [], []
for k, val in enumerate(sdp.obj_facvar):
if val != 0:
x.append(val)
Ix.append(0)
Jx.append(k)
permutation = cvx.spmatrix(x, Ix, Jx)
objective = X.copy()
for k in objective.factors:
objective.factors[k] = permutation
objective._size = (1, 1)
P.set_objective('min', objective)
return P
0
Example 47
Project: auto-sklearn Source File: create_datasets.py
def create_predict_spearman_rank(metafeatures, experiments, iterator):
X = []
Y = []
Y_names = []
# Calculate the pairwise ranks between datasets
dataset_names = [name for name in metafeatures.index]
cross_product = []
if iterator == "combination":
for cross in itertools.combinations_with_replacement(dataset_names, r=2):
cross_product.append(cross)
elif iterator == "permutation":
for cross in itertools.permutations(dataset_names, r=2):
cross_product.append(cross)
else:
raise NotImplementedError(iterator)
logging.info("Create spearman rank dataset without CV data and %s",
iterator)
logging.info("Using %d datasets", len(dataset_names))
logging.info("This will results in %d training points", len(cross_product))
# Create inputs and targets
for cross in cross_product:
name = "%s_%s" % (cross[0], cross[1])
mf_1 = metafeatures.loc[cross[0]]
mf_2 = metafeatures.loc[cross[1]]
assert mf_1.dtype == np.float64
assert mf_2.dtype == np.float64
x = np.hstack((mf_1, mf_2))
columns = metafeatures.columns.values
index = np.hstack(("0_" + columns, "1_" + columns))
x = pd.Series(data=x, name=name, index=index)
X.append(x)
experiments_1 = experiments[cross[0]]
experiments_2 = experiments[cross[1]]
assert len(experiments_1) == len(experiments_2), name
responses_1 = np.zeros((len(experiments_1)), dtype=np.float64)
responses_2 = np.zeros((len(experiments_1)), dtype=np.float64)
for idx, zipped in enumerate(zip(
sorted(experiments_1, key=lambda t: str(t.configuration)),
sorted(experiments_2, key=lambda t: str(t.configuration)))):
# Test if the order of the params is the same
exp_1, exp_2 = zipped
print(exp_1.configuration, exp_2.configuration)
assert exp_1.configuration == exp_2.configuration,\
(experiments_1, experiments_2)
responses_1[idx] = exp_1.result if np.isfinite(exp_1.result) else 1
responses_2[idx] = exp_2.result if np.isfinite(exp_2.result) else 1
rho, p = scipy.stats.spearmanr(responses_1, responses_2)
#rho, p = scipy.stats.kendalltau(responses_1, responses_2)
if not np.isfinite(rho):
rho = 0
Y.append(rho)
Y_names.append(name)
X = pd.DataFrame(X)
Y = pd.Series(Y, index=Y_names)
logging.info("Metafeatures %s", metafeatures.shape)
logging.info("X.shape %s", X.shape)
logging.info("Y.shape %s", Y.shape)
assert X.shape == (len(cross_product), metafeatures.shape[1] * 2), \
(X.shape, (len(cross), metafeatures.shape[1] * 2))
assert Y.shape == (len(cross_product), )
# train sklearn regressor (tree) with 10fold CV
indices = range(len(X))
np_rs = np.random.RandomState(42)
np_rs.shuffle(indices)
X = X.iloc[indices]
Y = Y.iloc[indices]
return X, Y
0
Example 48
def testRLS(self):
print
print
print
print
print("Testing the cross-validation routines of the RLS module.")
print
print
floattype = np.float64
m, n = 400, 100
Xtrain = np.random.rand(m, n)
K = np.dot(Xtrain, Xtrain.T)
ylen = 2
Y = np.zeros((m, ylen), dtype=floattype)
Y = np.random.rand(m, ylen)
hoindices = [45]
hoindices2 = [45, 50]
hoindices3 = [45, 50, 55]
hocompl = list(set(range(m)) - set(hoindices))
Kho = K[np.ix_(hocompl, hocompl)]
Yho = Y[hocompl]
kwargs = {}
kwargs['Y'] = Y
kwargs['X'] = K
kwargs['kernel'] = 'PrecomputedKernel'
dualrls = RLS(**kwargs)
kwargs = {}
kwargs["X"] = Xtrain
kwargs["Y"] = Y
kwargs["bias"] = 0.
primalrls = RLS(**kwargs)
kwargs = {}
kwargs['Y'] = Yho
kwargs['X'] = Kho
kwargs['kernel'] = 'PrecomputedKernel'
dualrls_naive = RLS(**kwargs)
testkm = K[np.ix_(hocompl, hoindices)]
trainX = Xtrain[hocompl]
testX = Xtrain[hoindices]
kwargs = {}
kwargs['Y'] = Yho
kwargs['X'] = trainX
kwargs["bias"] = 0.
primalrls_naive = RLS(**kwargs)
loglambdas = range(-5, 5)
for j in range(0, len(loglambdas)):
regparam = 2. ** loglambdas[j]
print
print("Regparam 2^%1d" % loglambdas[j])
dumbho = np.dot(testkm.T, np.dot(la.inv(Kho + regparam * np.eye(Kho.shape[0])), Yho))
dumbho = np.squeeze(dumbho)
print(str(dumbho) + ' Dumb HO (dual)')
dualrls_naive.solve(regparam)
predho1 = dualrls_naive.predictor.predict(testkm.T)
print(str(predho1) + ' Naive HO (dual)')
dualrls.solve(regparam)
predho2 = dualrls.holdout(hoindices)
print(str(predho2) + ' Fast HO (dual)')
dualrls.solve(regparam)
predho = dualrls.leave_one_out()[hoindices[0]]
print(str(predho) + ' Fast LOO (dual)')
primalrls_naive.solve(regparam)
predho3 = primalrls_naive.predictor.predict(testX)
print(str(predho3) + ' Naive HO (primal)')
primalrls.solve(regparam)
predho4 = primalrls.holdout(hoindices)
print(str(predho4) + ' Fast HO (primal)')
for predho in [predho1, predho2, predho3, predho4]:
self.assertEqual(dumbho.shape, predho.shape)
assert_allclose(dumbho, predho)
#for row in range(predho.shape[0]):
# for col in range(predho.shape[1]):
# self.assertAlmostEqual(dumbho[row,col],predho[row,col])
primalrls.solve(regparam)
predho = primalrls.leave_one_out()[hoindices[0]]
print(str(predho) + ' Fast LOO (primal)')
print()
hoindices = range(100, 300)
hocompl = list(set(range(m)) - set(hoindices))
Kho = K[np.ix_(hocompl, hocompl)]
Yho = Y[hocompl]
testkm = K[np.ix_(hocompl, hoindices)]
dumbho = np.dot(testkm.T, np.dot(la.inv(Kho + regparam * np.eye(Kho.shape[0])), Yho))
kwargs = {}
kwargs['Y'] = Y
kwargs['X'] = Xtrain
dualrls.solve(regparam)
predho2 = dualrls.holdout(hoindices2)
print(str(predho2) + ' Fast HO')
hopred = dualrls.leave_pair_out(np.array([hoindices2[0], 4, 6]), np.array([hoindices2[1], 5, 7]))
print(str(hopred[0][0]) + '\n' + str(hopred[1][0]) + ' Fast LPO')
0
Example 49
Project: RMG-Py Source File: draw.py
def draw(self, network, format, path=None):
"""
Draw the potential energy surface for the given `network` as a Cairo
surface of the given `format`. If `path` is given, the surface is
saved to that location on disk.
"""
try:
import cairocffi as cairo
except ImportError:
try:
import cairo
except ImportError:
logging.warning('Cairo not found; potential energy surface will not be drawn.')
return
self.network = network
# The order of wells is as follows:
# - Reactant channels come first (to the left)
# - Isomers are in the middle
# - Product channels come last (to the right)
# This is done because most people will read the PES from left to right
wells = []
wells.extend(network.reactants)
wells.extend(network.isomers)
wells.extend(network.products)
# Generate the bounding rectangles for each configuration label
labelRects = []
for well in wells:
labelRects.append(self.__getLabelSize(well, format=format))
# Get energy range (use kJ/mol internally)
E0min, E0max = self.__getEnergyRange()
E0min *= 0.001; E0max *= 0.001
# Drawing parameters
padding = self.options['padding']
wellWidth = self.options['wellWidth']
wellSpacing = self.options['wellSpacing']
Eslope = self.options['Eslope']
TSwidth = self.options['TSwidth']
E0_offset = self.options['E0offset'] * 0.001
# Choose multiplier to convert energies to desired units (on figure only)
Eunits = self.options['Eunits']
try:
Emult = {'J/mol': 1.0, 'kJ/mol': 0.001, 'cal/mol': 1.0/4.184, 'kcal/mol': 1.0/4184., 'cm^-1': 1.0/11.962}[Eunits]
except KeyError:
raise Exception('Invalid value "{0}" for Eunits parameter.'.format(Eunits))
# Determine height required for drawing
Eheight = self.__getTextSize('0.0', format=format)[3] + 6
y_E0 = (E0max - 0.0) * Eslope + padding + Eheight
height = (E0max - E0min) * Eslope + 2 * padding + Eheight + 6
for i in range(len(wells)):
if 0.001 * wells[i].E0 == E0min:
height += labelRects[i][3]
break
# Determine naive position of each well (one per column)
coordinates = numpy.zeros((len(wells), 2), numpy.float64)
x = padding
for i in range(len(wells)):
well = wells[i]
rect = labelRects[i]
thisWellWidth = max(wellWidth, rect[2])
E0 = 0.001 * well.E0
y = y_E0 - E0 * Eslope
coordinates[i] = [x + 0.5 * thisWellWidth, y]
x += thisWellWidth + wellSpacing
width = x + padding - wellSpacing
# Determine the rectangles taken up by each well
# We'll use this to merge columns safely so that wells don't overlap
wellRects = []
for i in range(len(wells)):
l, t, w, h = labelRects[i]
x, y = coordinates[i,:]
if w < wellWidth: w = wellWidth
t -= 6 + Eheight
h += 6 + Eheight
wellRects.append([l + x - 0.5 * w, t + y + 6, w, h])
# Squish columns together from the left where possible until an isomer is encountered
oldLeft = numpy.min(coordinates[:,0])
Nleft = wells.index(network.isomers[0])-1
columns = []
for i in range(Nleft, -1, -1):
top = wellRects[i][1]
bottom = top + wellRects[i][3]
for j in range(len(columns)):
for c in columns[j]:
top0 = wellRects[c][1]
bottom0 = top + wellRects[c][3]
if (top >= top0 and top <= bottom0) or (top <= top0 and top0 <= bottom):
# Can't put it in this column
break
else:
# Can put it in this column
columns[j].append(i)
break
else:
# Needs a new column
columns.append([i])
for column in columns:
columnWidth = max([wellRects[c][2] for c in column])
x = coordinates[column[0]+1,0] - 0.5 * wellRects[column[0]+1][2] - wellSpacing - 0.5 * columnWidth
for c in column:
delta = x - coordinates[c,0]
wellRects[c][0] += delta
coordinates[c,0] += delta
newLeft = numpy.min(coordinates[:,0])
coordinates[:,0] -= newLeft - oldLeft
# Squish columns together from the right where possible until an isomer is encountered
Nright = wells.index(network.isomers[-1])+1
columns = []
for i in range(Nright, len(wells)):
top = wellRects[i][1]
bottom = top + wellRects[i][3]
for j in range(len(columns)):
for c in columns[j]:
top0 = wellRects[c][1]
bottom0 = top0 + wellRects[c][3]
if (top >= top0 and top <= bottom0) or (top <= top0 and top0 <= bottom):
# Can't put it in this column
break
else:
# Can put it in this column
columns[j].append(i)
break
else:
# Needs a new column
columns.append([i])
for column in columns:
columnWidth = max([wellRects[c][2] for c in column])
x = coordinates[column[0]-1,0] + 0.5 * wellRects[column[0]-1][2] + wellSpacing + 0.5 * columnWidth
for c in column:
delta = x - coordinates[c,0]
wellRects[c][0] += delta
coordinates[c,0] += delta
width = max([rect[2]+rect[0] for rect in wellRects]) - min([rect[0] for rect in wellRects]) + 2 * padding
# Draw to the final surface
surface = createNewSurface(format=format, path=path, width=width, height=height)
cr = cairo.Context(surface)
# Some global settings
cr.select_font_face("sans")
cr.set_font_size(self.options['fontSizeNormal'])
# Fill the background with white
cr.set_source_rgba(1.0, 1.0, 1.0, 1.0)
cr.paint()
# # DEBUG: Draw well bounding rectangles
# cr.save()
# cr.set_line_width(1.0)
# for rect in wellRects:
# cr.rectangle(*rect)
# cr.set_source_rgba(0.0, 0.0, 1.0, 0.5)
# cr.stroke()
# cr.restore()
# Draw path reactions
for rxn in network.pathReactions:
for reac in range(len(wells)):
if wells[reac].species == rxn.reactants:
break
else:
raise Exception
for prod in range(len(wells)):
if wells[prod].species == rxn.products:
break
else:
raise Exception
E0_reac = wells[reac].E0 * 0.001 - E0_offset
E0_prod = wells[prod].E0 * 0.001 - E0_offset
E0_TS = rxn.transitionState.conformer.E0.value_si * 0.001 - E0_offset
if reac < prod:
x1, y1 = coordinates[reac,:]
x2, y2 = coordinates[prod,:]
else:
x1, y1 = coordinates[prod,:]
x2, y2 = coordinates[reac,:]
x1 += wellSpacing / 2.0; x2 -= wellSpacing / 2.0
if abs(E0_TS - E0_reac) > 0.1 and abs(E0_TS - E0_prod) > 0.1:
if len(rxn.reactants) == 2:
if reac < prod: x0 = x1 + wellSpacing * 0.5
else: x0 = x2 - wellSpacing * 0.5
elif len(rxn.products) == 2:
if reac < prod: x0 = x2 - wellSpacing * 0.5
else: x0 = x1 + wellSpacing * 0.5
else:
x0 = 0.5 * (x1 + x2)
y0 = y_E0 - (E0_TS + E0_offset) * Eslope
width1 = (x0 - x1)
width2 = (x2 - x0)
# Draw horizontal line for TS
cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
cr.set_line_width(2.0)
cr.move_to(x0 - TSwidth/2.0, y0)
cr.line_to(x0+TSwidth/2.0, y0)
cr.stroke()
# Add background and text for energy
E0 = "{0:.1f}".format(E0_TS * 1000. * Emult)
extents = cr.text_extents(E0)
x = x0 - extents[2] / 2.0; y = y0 - 6.0
cr.rectangle(x + extents[0] - 2.0, y + extents[1] - 2.0, extents[2] + 4.0, extents[3] + 4.0)
cr.set_source_rgba(1.0, 1.0, 1.0, 0.75)
cr.fill()
cr.move_to(x, y)
cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
cr.show_text(E0)
# Draw Bezier curve connecting reactants and products through TS
cr.set_source_rgba(0.0, 0.0, 0.0, 0.5)
cr.set_line_width(1.0)
cr.move_to(x1, y1)
cr.curve_to(x1 + width1/8.0, y1, x0 - width1/8.0 - TSwidth/2.0, y0, x0 - TSwidth/2.0, y0)
cr.move_to(x0 + TSwidth/2.0, y0)
cr.curve_to(x0 + width2/8.0 + TSwidth/2.0, y0, x2 - width2/8.0, y2, x2, y2)
cr.stroke()
else:
width = (x2 - x1)
# Draw Bezier curve connecting reactants and products through TS
cr.set_source_rgba(0.0, 0.0, 0.0, 0.5)
cr.set_line_width(1.0)
cr.move_to(x1, y1)
cr.curve_to(x1 + width/4.0, y1, x2 - width/4.0, y2, x2, y2)
cr.stroke()
# Draw wells (after path reactions so that they are on top)
for i, well in enumerate(wells):
x0, y0 = coordinates[i,:]
# Draw horizontal line for well
cr.set_line_width(4.0)
cr.move_to(x0 - wellWidth/2.0, y0)
cr.line_to(x0 + wellWidth/2.0, y0)
cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
cr.stroke()
# Add background and text for energy
E0 = well.E0 * 0.001 - E0_offset
E0 = "{0:.1f}".format(E0 * 1000. * Emult)
extents = cr.text_extents(E0)
x = x0 - extents[2] / 2.0; y = y0 - 6.0
cr.rectangle(x + extents[0] - 2.0, y + extents[1] - 2.0, extents[2] + 4.0, extents[3] + 4.0)
cr.set_source_rgba(1.0, 1.0, 1.0, 0.75)
cr.fill()
cr.move_to(x, y)
cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
cr.show_text(E0)
# Draw background and text for label
x = x0 - 0.5 * labelRects[i][2]
y = y0 + 6
cr.rectangle(x, y, labelRects[i][2], labelRects[i][3])
cr.set_source_rgba(1.0, 1.0, 1.0, 0.75)
cr.fill()
self.__drawLabel(well, cr, x, y, format=format)
# Finish Cairo drawing
if format == 'png':
surface.write_to_png(path)
else:
surface.finish()
0
Example 50
def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
penalty, dual, verbose, max_iter, tol,
random_state=None, multi_class='ovr',
loss='logistic_regression', epsilon=0.1,
sample_weight=None):
"""Used by Logistic Regression (and CV) and LinearSVC.
Preprocessing is done in this function before supplying it to liblinear.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training vector, where n_samples in the number of samples and
n_features is the number of features.
y : array-like, shape (n_samples,)
Target vector relative to X
C : float
Inverse of cross-validation parameter. Lower the C, the more
the penalization.
fit_intercept : bool
Whether or not to fit the intercept, that is to add a intercept
term to the decision function.
intercept_scaling : float
LibLinear internally penalizes the intercept and this term is subject
to regularization just like the other terms of the feature vector.
In order to avoid this, one should increase the intercept_scaling.
such that the feature vector becomes [x, intercept_scaling].
class_weight : {dict, 'balanced'}, optional
Weights associated with classes in the form ``{class_label: weight}``.
If not given, all classes are supposed to have weight one. For
multi-output problems, a list of dicts can be provided in the same
order as the columns of y.
The "balanced" mode uses the values of y to automatically adjust
weights inversely proportional to class frequencies in the input data
as ``n_samples / (n_classes * np.bincount(y))``
penalty : str, {'l1', 'l2'}
The norm of the penalty used in regularization.
dual : bool
Dual or primal formulation,
verbose : int
Set verbose to any positive number for verbosity.
max_iter : int
Number of iterations.
tol : float
Stopping condition.
random_state : int seed, RandomState instance, or None (default)
The seed of the pseudo random number generator to use when
shuffling the data.
multi_class : str, {'ovr', 'crammer_singer'}
`ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
optimizes a joint objective over all classes.
While `crammer_singer` is interesting from an theoretical perspective
as it is consistent it is seldom used in practice and rarely leads to
better accuracy and is more expensive to compute.
If `crammer_singer` is chosen, the options loss, penalty and dual will
be ignored.
loss : str, {'logistic_regression', 'hinge', 'squared_hinge',
'epsilon_insensitive', 'squared_epsilon_insensitive}
The loss function used to fit the model.
epsilon : float, optional (default=0.1)
Epsilon parameter in the epsilon-insensitive loss function. Note
that the value of this parameter depends on the scale of the target
variable y. If unsure, set epsilon=0.
sample_weight: array-like, optional
Weights assigned to each sample.
Returns
-------
coef_ : ndarray, shape (n_features, n_features + 1)
The coefficient vector got by minimizing the objective function.
intercept_ : float
The intercept term added to the vector.
n_iter_ : int
Maximum number of iterations run across all classes.
"""
if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
enc = LabelEncoder()
y_ind = enc.fit_transform(y)
classes_ = enc.classes_
if len(classes_) < 2:
raise ValueError("This solver needs samples of at least 2 classes"
" in the data, but the data contains only one"
" class: %r" % classes_[0])
class_weight_ = compute_class_weight(class_weight, classes_, y)
else:
class_weight_ = np.empty(0, dtype=np.float64)
y_ind = y
liblinear.set_verbosity_wrap(verbose)
rnd = check_random_state(random_state)
if verbose:
print('[LibLinear]', end='')
# LinearSVC breaks when intercept_scaling is <= 0
bias = -1.0
if fit_intercept:
if intercept_scaling <= 0:
raise ValueError("Intercept scaling is %r but needs to be greater than 0."
" To disable fitting an intercept,"
" set fit_intercept=False." % intercept_scaling)
else:
bias = intercept_scaling
libsvm.set_verbosity_wrap(verbose)
libsvm_sparse.set_verbosity_wrap(verbose)
liblinear.set_verbosity_wrap(verbose)
# LibLinear wants targets as doubles, even for classification
y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
if sample_weight is None:
sample_weight = np.ones(X.shape[0])
else:
sample_weight = np.array(sample_weight, dtype=np.float64, order='C')
check_consistent_length(sample_weight, X)
solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
raw_coef_, n_iter_ = liblinear.train_wrap(
X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
class_weight_, max_iter, rnd.randint(np.iinfo('i').max),
epsilon, sample_weight)
# Regarding rnd.randint(..) in the above signature:
# seed for srand in range [0..INT_MAX); due to limitations in Numpy
# on 32-bit platforms, we can't get to the UINT_MAX limit that
# srand supports
n_iter_ = max(n_iter_)
if n_iter_ >= max_iter and verbose > 0:
warnings.warn("Liblinear failed to converge, increase "
"the number of iterations.", ConvergenceWarning)
if fit_intercept:
coef_ = raw_coef_[:, :-1]
intercept_ = intercept_scaling * raw_coef_[:, -1]
else:
coef_ = raw_coef_
intercept_ = 0.
return coef_, intercept_, n_iter_