Here are the examples of the python api numpy.argsort taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
174 Examples
0
Example 101
Project: GPflow Source File: FITCvsVFE.py
def snelsonDemo():
from matplotlib import pyplot as plt
from IPython import embed
xtrain,ytrain,xtest,ytest = getTrainingTestData()
#run exact inference on training data.
exact_model = getRegressionModel(xtrain,ytrain)
exact_model.optimize(maxiter= 2000000, tol=tol)
figA, axes = plt.subplots(1,1)
inds = np.argsort( xtrain.flatten() )
axes.plot( xtrain[inds,:], ytrain[inds,:], 'ro' )
plotPredictions( axes, exact_model, 'g', None )
figB, axes = plt.subplots(3,2)
#run sparse model on training data intialized from exact optimal solution.
VFEmodel, VFEcb = trainSparseModel(xtrain,ytrain,exact_model,False,xtest,ytest)
FITCmodel, FITCcb = trainSparseModel(xtrain,ytrain,exact_model,True,xtest,ytest)
print "Exact model parameters \n"
printModelParameters( exact_model )
print "Sparse model parameters for VFE optimization \n"
printModelParameters( VFEmodel )
print "Sparse model parameters for FITC optimization \n"
printModelParameters( FITCmodel )
VFEiters = FITCcb.n_iters
VFElog_likelihoods = stretch( len(VFEiters), VFEcb.log_likelihoods )
VFEhold_out_likelihood = stretch( len(VFEiters), VFEcb.hold_out_likelihood )
plotComparisonFigure( xtrain, VFEmodel, exact_model, axes[0,0], axes[1,0], axes[2,0], VFEiters, VFElog_likelihoods.tolist(), VFEhold_out_likelihood.tolist(), "VFE" )
plotComparisonFigure( xtrain, FITCmodel, exact_model, axes[0,1], axes[1,1], axes[2,1],FITCcb.n_iters, FITCcb.log_likelihoods, FITCcb.hold_out_likelihood , "FITC" )
axes[0,0].set_title('VFE', loc='center', fontdict = {'fontsize': 22} )
axes[0,1].set_title('FITC', loc='center', fontdict = {'fontsize': 22} )
embed()
0
Example 102
def remove_duplicates(data):
"""
NAME:
remove_duplicates
PURPOSE:
remove duplicates from an array
INPUT:
data - array
OUTPUT:
array w/ duplicates removed
HISTORY:
2014-06-23 - Written - Bovy (IAS)
"""
if not _ESUTIL_LOADED:
raise ImportError("apogee.tools.read.remove_duplicates function requires the esutil module for catalog matching")
tdata= copy.copy(data)
#Match the data against itself
if _ESUTIL_VERSION[1] >= 5 and _ESUTIL_VERSION >= 3:
h= esutil.htm.Matcher(10,data['RA'],data['DEC'])
m1,m2,d12 = h.match(data['RA'],data['DEC'],
2./3600.,maxmatch=0) #all matches
else:
h=esutil.htm.HTM()
htmrev2,minid,maxid = h.match_prepare(data['RA'],data['DEC'])
m1,m2,d12 = h.match(data['RA'],data['DEC'],
data['RA'],data['DEC'],
2./3600.,maxmatch=0, #all matches
htmrev2=htmrev2,minid=minid,maxid=maxid)
sindx= numpy.argsort(m1)
sm1= m1[sindx]
dup= sm1[1:] == sm1[:-1]
for d in tqdm.tqdm(sm1[:-1][dup]):
#Find the matches for just this duplicate
if _ESUTIL_VERSION[1] >= 5 and _ESUTIL_VERSION >= 3:
nm1,nm2,nd12= h.match(data['RA'][d],data['DEC'][d],
2./3600.,maxmatch=0) #all matches
else:
nm1,nm2,nd12= h.match(data['RA'][d],data['DEC'][d],
data['RA'],data['DEC'],
2./3600.,maxmatch=0, #all matches
htmrev2=htmrev2,minid=minid,maxid=maxid)
#If some matches are commissioning data or have bad ak, rm from consideration
comindx= numpy.array(['apogee.n.c'.encode('utf-8') in s for s in data['APSTAR_ID'][nm2]])
comindx+= numpy.array(['apogee.s.c'.encode('utf-8') in s for s in data['APSTAR_ID'][nm2]])
goodak= (True-numpy.isnan(data['AK_TARG'][nm2]))\
*(data['AK_TARG'][nm2] > -50.)
hisnr= numpy.argmax(data['SNR'][nm2]*(True-comindx)*goodak) #effect. make com zero SNR
if numpy.amax(data['SNR'][nm2]*(True-comindx)*goodak) == 0.: #all commissioning or bad ak, treat all equally
hisnr= numpy.argmax(data['SNR'][nm2])
tindx= numpy.ones(len(nm2),dtype='bool')
tindx[hisnr]= False
tdata['RA'][nm2[tindx]]= -9999
return tdata[tdata['RA'] != -9999]
0
Example 103
Project: chaco Source File: ticks.py
def auto_interval ( data_low, data_high, max_ticks=9 ):
""" Calculates the tick interval for a range.
The boundaries for the data to be plotted on the axis are::
data_bounds = (data_low,data_high)
The function chooses the number of tick marks, which can be between
3 and max_ticks marks (including end points), and chooses tick intervals at
1, 2, 2.5, 5, 10, 20, ...
Returns
-------
interval : float
tick mark interval for axis
"""
range = float( data_high ) - float( data_low )
# We'll choose from between 2 and 8 tick marks.
# Preference is given to more ticks:
# Note reverse order and see kludge below...
divisions = arange( max_ticks-1, 2.0, -1.0 ) # for max_ticks=9, ( 7, 6, ..., 3 )
# Calculate the intervals for the divisions:
candidate_intervals = range / divisions
# Get magnitudes and mantissas for each candidate:
magnitudes = 10.0 ** floor( log10( candidate_intervals ) )
mantissas = candidate_intervals / magnitudes
# List of "pleasing" intervals between ticks on graph.
# Only the first magnitude are listed, higher mags others are inferred:
magic_intervals = array( ( 1.0, 2.0, 2.5, 5.0, 10.0 ) )
# Calculate the absolute differences between the candidates
# (with magnitude removed) and the magic intervals:
differences = abs( magic_intervals[:,newaxis] - mantissas )
# Find the division and magic interval combo that produce the
# smallest differences:
# KLUDGE: 'argsort' doesn't preserve the order of equal values,
# so we subtract a small, index dependent amount from each difference
# to force correct ordering.
sh = shape( differences )
small = 2.2e-16 * arange( sh[1] ) * arange( sh[0] )[:,newaxis]
small = small[::-1,::-1] #reverse the order
differences = differences - small
# ? Numeric should allow keyword "axis" ? comment out for now
#best_mantissa = minimum.reduce(differences,axis=0)
#best_magic = minimum.reduce(differences,axis=-1)
best_mantissa = minimum.reduce( differences, 0 )
best_magic = minimum.reduce( differences, -1 )
magic_index = argsort( best_magic )[0]
mantissa_index = argsort( best_mantissa )[0]
# The best interval is the magic_interval multiplied by the magnitude
# of the best mantissa:
interval = magic_intervals[ magic_index ]
magnitude = magnitudes[ mantissa_index ]
result = interval * magnitude
if result == 0.0:
result = finfo(float).eps
return result
0
Example 104
def benchmark(clf):
global train_duration, test_duration
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
if isinstance(clf, (GensimFastText, FastText)):
clf.fit(train_text, y_train)
train_time = time() - t0
else:
clf.fit(X_train, y_train)
train_time = train_duration + (time() - t0)
print("train time: %0.3fs" % train_time)
t0 = time()
if isinstance(clf, (GensimFastText, FastText)):
pred = clf.predict(test_text)
test_time = time() - t0
# fix unknown predictions
pred = [most_freq if p is None else p for p in pred]
else:
pred = clf.predict(X_test)
test_time = test_duration + (time() - t0)
print("test time: %0.3fs" % test_time)
score = metrics.f1_score(y_test, pred, average='macro')
print("macro F1: %0.3f" % score)
if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
if opts.print_top10 and feature_names is not None:
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print(trim("%s: %s"
% (category, " ".join(feature_names[top10]))))
print()
if opts.print_report:
print("classification report:")
print(metrics.classification_report(y_test, pred,
target_names=categories))
if opts.print_cm:
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))
print()
clf_descr = str(clf).split('(')[0]
return clf_descr, score, train_time, test_time
0
Example 105
Project: polara Source File: evaluation.py
def get_ranking_scores(matched_predictions, feedback_data, switch_positive, alternative=True):
users_num, topk, holdout = matched_predictions.shape
ideal_scores_idx = np.argsort(feedback_data, axis=1)[:, ::-1] #returns column index only
ideal_scores_idx = np.ravel_multi_index((np.arange(feedback_data.shape[0])[:, None], ideal_scores_idx), dims=feedback_data.shape)
where = np.ma.where if np.ma.is_masked(feedback_data) else np.where
is_positive = feedback_data >= switch_positive
positive_feedback = where(is_positive, feedback_data, 0)
negative_feedback = where(~is_positive, -feedback_data, 0)
relevance_scores_pos = (matched_predictions * positive_feedback[:, None, :]).sum(axis=2)
relevance_scores_neg = (matched_predictions * negative_feedback[:, None, :]).sum(axis=2)
ideal_scores_pos = positive_feedback.ravel()[ideal_scores_idx]
ideal_scores_neg = negative_feedback.ravel()[ideal_scores_idx]
discount_num = max(holdout, topk)
if alternative:
discount = np.log2(np.arange(2, discount_num+2))
relevance_scores_pos = 2**relevance_scores_pos - 1
relevance_scores_neg = 2**relevance_scores_neg - 1
ideal_scores_pos = 2**ideal_scores_pos - 1
ideal_scores_neg = 2**ideal_scores_neg - 1
else:
discount = np.hstack([1, np.log(np.arange(2, discount_num+1))])
dcg = (relevance_scores_pos / discount[:topk]).sum(axis=1)
dcl = (relevance_scores_neg / -discount[:topk]).sum(axis=1)
idcg = (ideal_scores_pos / discount[:holdout]).sum(axis=1)
idcl = (ideal_scores_neg / -discount[:holdout]).sum(axis=1)
ndcg = unmask(np.nansum(dcg / idcg) / users_num)
ndcl = unmask(np.nansum(dcl / idcl) / users_num)
ranking_score = namedtuple('Ranking', ['nDCG', 'nDCL'])._make([ndcg, ndcl])
return ranking_score
0
Example 106
Project: tractor Source File: ipes.py
def ipe_err_plots(X1, Y1, X2, Y2, tt, xlim, ps, semilogx=True):
siglim = -6,6
plt.clf()
plt.hist(Y1, 50, range=siglim, histtype='step', color='r')
plt.hist(Y2, 50, range=siglim, histtype='step', color='b')
plt.xlim(siglim)
plt.xlabel('Inter-ipe difference / error')
plt.title(tt)
ps.savefig()
plt.clf()
plt.subplots_adjust(hspace=0.25, top=0.95)
plt.subplot(2,1,1)
red = (1., 0.2, 0.2)
if semilogx:
plt.semilogx(X1, Y1, '.', color=red, alpha=0.5)
else:
plt.plot(X1, Y1, '.', color=red, alpha=0.5)
plt.title('SDSS Photo')
plt.subplot(2,1,2)
blue = (0.3,0.3,1.0)
if semilogx:
plt.semilogx(X2, Y2, '.', color=blue, alpha=0.5)
else:
plt.plot(X2, Y2, '.', color=blue, alpha=0.5)
plt.title('Tractor')
for i,x,y in [(1,X1,Y1), (2,X2,Y2)]:
plt.subplot(2,1,i)
I = np.argsort(x)
S = np.linspace(0, len(x), 11).astype(int)
xm,ym,ys = [],[],[]
ysigs = []
for ilo,ihi in zip(S[:-1], S[1:]):
J = I[ilo:ihi]
xm.append(np.mean(x[J]))
ym.append(np.median(y[J]))
ysigs.append([np.percentile(y[J], scipy.stats.norm.cdf(s)*100)
for s in [-2,-1,1,2]])
ysigs = np.array(ysigs)
xm = np.array(xm)
y1,y2,y3,y4 = ysigs.T
W = 1.03
plt.plot(np.vstack([xm/W,xm*W,xm*W,xm/W,xm/W]), np.vstack([y2,y2,y3,y3,y2]), 'k-', lw=1, alpha=0.75)
plt.plot(np.vstack([xm,xm]), np.vstack([y1,y2]), 'k-', lw=1)
plt.plot(np.vstack([xm,xm]), np.vstack([y3,y4]), 'k-', lw=1)
W = 1.02
plt.plot(np.vstack([xm/W,xm*W]), np.vstack([ym,ym]), 'k-', lw=2)
plt.plot(np.vstack([xm/W,xm*W]), np.vstack([y2,y2]), 'k-', lw=2)
plt.plot(np.vstack([xm/W,xm*W]), np.vstack([y3,y3]), 'k-', lw=2)
plt.plot(xm, y1, 'ko', mec='k', mfc='k', ms=3)
plt.plot(xm, y4, 'ko', mec='k', mfc='k', ms=3)
plt.xlim(xlim)
plt.ylim(siglim)
for s in np.sqrt(2.) * np.arange(-2, 2+1):
plt.axhline(s, color='k', alpha=0.25 + (s==0)*0.25)
plt.ylabel('Inter-ipe difference / error')
plt.xlabel(tt)
ps.savefig()
plt.clf()
plt.subplots_adjust(hspace=0.25, top=0.95)
plt.subplot(2,1,1)
if semilogx:
plt.semilogx(X1, Y1, '.', color=red, alpha=0.5)
else:
plt.plot(X1, Y1, '.', color=red, alpha=0.5)
plt.title('SDSS Photo')
plt.subplot(2,1,2)
if semilogx:
plt.semilogx(X2, Y2, '.', color=blue, alpha=0.5)
else:
plt.plot(X2, Y2, '.', color=blue, alpha=0.5)
plt.title('Tractor')
for i,x,y in [(1,X1,Y1), (2,X2,Y2)]:
plt.subplot(2,1,i)
I = np.argsort(x)
S = np.linspace(0, len(x), 11).astype(int)
xm,ym = [],[]
ysigs = []
for ilo,ihi in zip(S[:-1], S[1:]):
J = I[ilo:ihi]
xm.append(np.mean(x[J]))
ym.append(np.median(y[J]))
ysigs.append([np.percentile(y[J], scipy.stats.norm.cdf(s)*100)
for s in [-2,-1,1,2]])
ysigs = np.array(ysigs)
xm = np.array(xm)
y1,y2,y3,y4 = ysigs.T
plt.plot(xm, ym, 'ko', ms=5)
plt.plot(xm, y2, 'ks', mec='k', mfc='none', mew=1.5, ms=5)
plt.plot(xm, y3, 'ks', mec='k', mfc='none', mew=1.5, ms=5)
plt.plot(xm, y1, 'kv', mec='k', mfc='none', mew=1.5, ms=5)
plt.plot(xm, y4, 'k^', mec='k', mfc='none', mew=1.5, ms=5)
plt.xlim(xlim)
plt.ylim(siglim)
for s in np.sqrt(2.) * np.arange(-2, 2+1):
plt.axhline(s, color='k', alpha=0.25 + (s==0)*0.25)
plt.ylabel('Inter-ipe difference / error')
plt.xlabel(tt)
ps.savefig()
0
Example 107
Project: kaggle-dr Source File: align_util.py
def has_notch(self, radius_additive=OUTER_RADIUS_FULL_SIZE_PHOTO):
"""
Checks if there is an identification tab in the top right of the
image. Tries to achieve few false positives.
TODO: check for short substring of 1's in thresholded image
to indicate peak, will help discard false positives
"""
perim_info = self.scan_perimeter_intensity(self.i_img_center, self.j_img_center, self.I, radius_additive)
top_perim_info = perim_info[:3,:]
# re-sort top_perim_info by degrees
top_perim_info = top_perim_info[np.argsort(top_perim_info[:,0]),:]
top_degrees = top_perim_info[:,0]
top_ij = np.array(top_perim_info[:,1:3], dtype=int)
top_intensities = top_perim_info[:,3]
# check that at least 2 points are adjacent
diffs = np.diff(top_degrees)
if not (diffs.min() == 1):
return 0
back_two = diffs.tolist().index(1)
adjacents = slice(1,3) if back_two else slice(0,2)
# check that at least 2 top points are on the right side of img
shifted_top_degrees = (top_degrees + 90) % 360
if not (sum(shifted_top_degrees < 180) >= 2):
return 0
# check that level of adjacents is high enough
adjacent_ijs = top_ij[adjacents,:]
adjacent_mean_intensities = self.I[adjacent_ijs.T[0], adjacent_ijs.T[1],:].mean(axis=1)
if not sum(adjacent_mean_intensities > 15) > 0:
return 0
return 1
0
Example 108
def sortrows(a, i=0, index_out=False, recurse=True):
""" Sorts array "a" by columns i
Parameters
------------
a : np.ndarray
array to be sorted
i : int (optional)
column to be sorted by, taken as 0 by default
index_out : bool (optional)
return the index I such that a(I) = sortrows(a,i). Default = False
recurse : bool (optional)
recursively sort by each of the columns. i.e.
once column i is sort, we sort the smallest column number
etc. True by default.
Returns
--------
a : np.ndarray
The array 'a' sorted in descending order by column i
I : np.ndarray (optional)
The index such that a[I, :] = sortrows(a, i). Only return if
index_out = True
Examples
---------
>>> a = array([[1,2],[3,1],[2,3]])
>>> b = sortrows(a,0)
>>> b
array([[1, 2],
[2, 3],
[3, 1]])
c, I = sortrows(a,1,True)
>>> c
array([[3, 1],
[1, 2],
[2, 3]])
>>> I
array([1, 0, 2])
>>> a[I,:] - c
array([[0, 0],
[0, 0],
[0, 0]])
"""
I = np.argsort(a[:, i])
a = a[I, :]
# We recursively call sortrows to make sure it is sorted best by every
# column
if recurse & (len(a[0]) > i + 1):
for b in np.unique(a[:, i]):
ids = a[:, i] == b
colids = range(i) + range(i+1, len(a[0]))
a[np.ix_(ids, colids)], I2 = sortrows(a[np.ix_(ids, colids)],
0, True, True)
I[ids] = I[np.nonzero(ids)[0][I2]]
if index_out:
return a, I
else:
return a
0
Example 109
def compute_correlograms(spiketimes,
clusters,
clusters_to_update=None,
ncorrbins=None,
corrbin=None,
sample_rate=None,
):
if ncorrbins is None:
ncorrbins = NCORRBINS_DEFAULT
if corrbin is None:
corrbin = CORRBIN_DEFAULT
# Sort spiketimes for the computation of the CCG.
ind = np.argsort(spiketimes)
spiketimes = spiketimes[ind]
clusters = clusters[ind]
window_size = corrbin * ncorrbins
# unique clusters
clusters_unique = np.unique(clusters)
# clusters to update
if clusters_to_update is None:
clusters_to_update = clusters_unique
# Select requested clusters.
ind = np.in1d(clusters, clusters_to_update)
spiketimes = spiketimes[ind]
clusters = clusters[ind]
assert spiketimes.shape == clusters.shape
assert np.all(np.in1d(clusters, clusters_to_update))
assert sample_rate > 0.
assert 0 < corrbin < window_size
C = correlograms(spiketimes,
clusters,
cluster_ids=clusters_to_update,
sample_rate=sample_rate,
bin_size=corrbin,
window_size=window_size,
)
dic = {(c0, c1): C[i, j, :]
for i, c0 in enumerate(clusters_to_update)
for j, c1 in enumerate(clusters_to_update)}
return dic
0
Example 110
Project: MagicCube Source File: simple_cube.py
def draw_cube(self, rot=None, zloc=None):
"""Draw a cube on the axes.
The first time this is called, it will create a set of polygons
representing the cube faces. On initial calls, it will update
these polygon faces with a given rotation and observer location.
Parameters
----------
rot : Quaternion object
The quaternion representing the rotation
zloc : float
The location of the observer on the z-axis (adjusts perspective)
"""
if rot is None:
rot = self.current_rot
if zloc is None:
zloc = self.current_zloc
self.current_rot = rot
self.current_zloc = zloc
if self._cube_poly is None:
self._cube_poly = [plt.Polygon(self.faces[i, :, :2],
facecolor=self.stickercolors[i],
alpha=0.9)
for i in range(6)]
[self.add_patch(self._cube_poly[i]) for i in range(6)]
faces = self.project_points(self.faces, rot, zloc)
zorder = np.argsort(np.argsort(faces[:, :4, 2].sum(1)))
[self._cube_poly[i].set_zorder(10 * zorder[i]) for i in range(6)]
[self._cube_poly[i].set_xy(faces[i, :, :2]) for i in range(6)]
self.figure.canvas.draw()
0
Example 111
Project: mtpy Source File: niblettbostick.py
def interpolate_strike_angles(angles, in_periods):
"""
expect 2 arrays
1. sort ascending by periods
2. loop over angles to find 'nan' values (i.e. 1D layers)
3. determine linear interpolation between bounding 2D strike angles
4. if 1D on top or bottom, set to 0 degrees
"""
new_angles = copy.copy(angles)
#sort in ascending order:
orig_sorting = np.argsort(in_periods)
back_sorting = np.argsort(orig_sorting)
angles = angles[orig_sorting]
periods = in_periods[orig_sorting]
in_line = 0
while in_line < len(angles):
curr_per = periods[in_line]
curr_ang = angles[in_line]
if np.isnan(curr_ang):
if in_line in [0, len(angles)-1]:
new_angles[in_line] = 0.
in_line += 1
continue
#otherwise:
#use value before current one:
ang1 = new_angles[in_line - 1]
per1 = periods[in_line - 1]
#check for next non-nan:
ang2 = None
jj = in_line
print in_line
while jj < len(angles):
jj += 1
if not np.isnan(angles[jj]):
per2 = periods[jj]
ang2 = angles[jj]
break
#catch case of all nan:
if ang2 is None:
ang2 = 0.
delta_per = per2-per1
delta_ang = ang2-ang1
per_step = periods[in_line] - per1
new_angles[in_line] = ang1 + delta_ang/delta_per * per_step
print jj
in_line += 1
#asserting correct order (same as input) of the angles:
return new_angles[back_sorting]
0
Example 112
Project: C-PAC Source File: python_ncut_lib.py
def ncut( W, nbEigenValues ):
# parameters
offset=.5
maxiterations=100
eigsErrorTolerence=1e-6
truncMin=1e-6
eps=2.2204e-16
m=shape(W)[1]
# make sure that W is symmetric, this is a computationally expensive operation, only use for debugging
#if (W-W.transpose()).sum() != 0:
# print "W should be symmetric!"
# exit(0)
# degrees and regularization
# S Yu Understanding Popout through Repulsion CVPR 2001
# Allows negative values as well as improves invertability
# of d for small numbers
# i bet that this is what improves the stability of the eigen
d=abs(W).sum(0)
dr=0.5*(d-W.sum(0))
d=d+offset*2
dr=dr+offset
# calculation of the normalized LaPlacian
W=W+spdiags(dr,[0],m,m,"csc")
Dinvsqrt=spdiags((1.0/sqrt(d+eps)),[0],m,m,"csc")
P=Dinvsqrt*(W*Dinvsqrt);
# perform the eigen decomposition
eigen_val,eigen_vec=eigsh(P,nbEigenValues,maxiter=maxiterations,tol=eigsErrorTolerence,which='LA')
# sort the eigen_vals so that the first
# is the largest
i=argsort(-eigen_val)
eigen_val=eigen_val[i]
eigen_vec=eigen_vec[:,i]
# normalize the returned eigenvectors
eigen_vec=Dinvsqrt*matrix(eigen_vec)
norm_ones=norm(ones((m,1)))
for i in range(0,shape(eigen_vec)[1]):
eigen_vec[:,i]=(eigen_vec[:,i] / norm(eigen_vec[:,i]))*norm_ones
if eigen_vec[0,i] != 0:
eigen_vec[:,i] = -1 * eigen_vec[:,i] * sign( eigen_vec[0,i] )
return(eigen_val, eigen_vec)
0
Example 113
def main():
# faint
#RA,DEC,N = 17.5600, 1.1053, 1
# bright neighbour
#RA,DEC,N = 358.5436, 0.7213, 2
# known; too bright
#RA,DEC,N = 52.64687, -0.42678, 3
# known; z = 19.5; parallax -0.003+-0.142;
# dRA/dt +0.084+-0.010; dDec/dt 0.011+-0.009 [arcsec/yr]
RA,DEC,N = 342.47285, 0.73458, 4
zmag,dradt,ddecdt = 19.5, 0.084, 0.011
# TAI = 4412910179.40 / Number of seconds since Nov 17 1858
# -> deg/year
dradt /= 3600.
ddecdt /= 3600.
pfn = 'faint%i.pickle' % N
if os.path.exists(pfn):
print('Reading pickle', pfn)
X = unpickle_from_file(pfn)
else:
#X = getdata(358.5436, 0.7213)
X = getdata(RA, DEC)
pickle_to_file(X, pfn)
(ims,skies,tais,rcfs) = X
print('Got', len(rcfs), 'fields')
omit = [94, 5759]
uruns = set()
I = []
for i,(r,c,f) in enumerate(rcfs):
if r in uruns:
continue
if r in omit:
print('Omitting run', r)
continue
uruns.add(r)
I.append(i)
print('Cut to', len(uruns), 'unique runs')
rcfs = [rcfs[i] for i in I]
ims = [ims[i] for i in I]
skies = [skies[i] for i in I]
tais = [tais[i] for i in I]
I = np.argsort(tais)
rcfs = [rcfs[i] for i in I]
ims = [ims[i] for i in I]
skies = [skies[i] for i in I]
tais = [tais[i] for i in I]
tai0 = np.mean(tais)
#tractor = Tractor([])
#for i,im in enumerate(ims):
# tractor.addImage(im)
zrs = [np.array([-1.,+5.]) * std + sky for sky,std in skies]
# figsize
fs1 = (4,4)
plotpos0 = [0.01, 0.01, 0.98, 0.92]
imsum = None
slo,shi = None,None
for i,im in enumerate(ims):
zr = zrs[i]
ima = dict(interpolation='nearest', origin='lower',
vmin=zr[0], vmax=zr[1], cmap='gray')
data = im.getImage()
if imsum is None:
imsum = data
slo,shi = zr[0],zr[1]
else:
imsum += data
slo += zr[0]
shi += zr[1]
plt.figure(figsize=fs1)
plt.clf()
plt.gca().set_position(plotpos0)
plt.imshow(data, **ima)
#plt.title('Data %s' % im.name)
plt.xticks([],[])
plt.yticks([],[])
mysavefig('faint-data%02i' % i)
# in yrs
dt = (tais[i] - tai0) / (86400. * 365.25)
tractor = Tractor([])
tractor.addImage(im)
src = PointSource(RaDecPos(RA + dradt * dt, DEC + ddecdt * dt),
Mags(z=zmag))
tractor.addSource(src)
mod = tractor.getModelImage(im)
plt.clf()
plt.gca().set_position(plotpos0)
plt.imshow(mod, **ima)
plt.xticks([],[])
plt.yticks([],[])
mysavefig('faint-mod%02i' % i)
plt.figure(figsize=(8,4))
plt.clf()
plt.subplots_adjust(left=0.01, bottom=0.01, right=0.99, top=0.99,
wspace=0.01, hspace=0)
plt.subplot(1,2,1)
plt.imshow(data, **ima)
plt.xticks([],[])
plt.yticks([],[])
plt.subplot(1,2,2)
plt.imshow(mod, **ima)
plt.xticks([],[])
plt.yticks([],[])
mysavefig('faint-datamod%02i' % i)
plt.clf()
plt.gca().set_position(plotpos0)
plt.imshow(imsum, interpolation='nearest', origin='lower',
vmin=slo, vmax=shi, cmap='gray')
plt.title('Data sum')
plt.xticks([],[])
plt.yticks([],[])
mysavefig('faint-data-sum')
shi = slo + (shi - slo) / np.sqrt(len(ims))
plt.clf()
plt.gca().set_position(plotpos0)
plt.imshow(imsum, interpolation='nearest', origin='lower',
vmin=slo, vmax=shi, cmap='gray')
plt.title('Data sum')
plt.xticks([],[])
plt.yticks([],[])
mysavefig('faint-data-sum2')
plt.clf()
plt.gca().set_position(plotpos0)
plt.imshow(imsum, interpolation='nearest', origin='lower',
cmap='gray')
plt.title('Data sum')
plt.xticks([],[])
plt.yticks([],[])
mysavefig('faint-data-sum3')
0
Example 114
Project: CMT Source File: CMT.py
def process_frame(self, im_gray):
tracked_keypoints, _ = util.track(self.im_prev, im_gray, self.active_keypoints)
(center, scale_estimate, rotation_estimate, tracked_keypoints) = self.estimate(tracked_keypoints)
# Detect keypoints, compute descriptors
keypoints_cv = self.detector.detect(im_gray)
keypoints_cv, features = self.descriptor.compute(im_gray, keypoints_cv)
# Create list of active keypoints
active_keypoints = zeros((0, 3))
# Get the best two matches for each feature
matches_all = self.matcher.knnMatch(features, self.features_database, 2)
# Get all matches for selected features
if not any(isnan(center)):
selected_matches_all = self.matcher.knnMatch(features, self.selected_features, len(self.selected_features))
# For each keypoint and its descriptor
if len(keypoints_cv) > 0:
transformed_springs = scale_estimate * util.rotate(self.springs, -rotation_estimate)
for i in range(len(keypoints_cv)):
# Retrieve keypoint location
location = np.array(keypoints_cv[i].pt)
# First: Match over whole image
# Compute distances to all descriptors
matches = matches_all[i]
distances = np.array([m.distance for m in matches])
# Convert distances to confidences, do not weight
combined = 1 - distances / self.DESC_LENGTH
classes = self.database_classes
# Get best and second best index
bestInd = matches[0].trainIdx
secondBestInd = matches[1].trainIdx
# Compute distance ratio according to Lowe
ratio = (1 - combined[0]) / (1 - combined[1])
# Extract class of best match
keypoint_class = classes[bestInd]
# If distance ratio is ok and absolute distance is ok and keypoint class is not background
if ratio < self.THR_RATIO and combined[0] > self.THR_CONF and keypoint_class != 0:
# Add keypoint to active keypoints
new_kpt = append(location, keypoint_class)
active_keypoints = append(active_keypoints, array([new_kpt]), axis=0)
# In a second step, try to match difficult keypoints
# If structural constraints are applicable
if not any(isnan(center)):
# Compute distances to initial descriptors
matches = selected_matches_all[i]
distances = np.array([m.distance for m in matches])
# Re-order the distances based on indexing
idxs = np.argsort(np.array([m.trainIdx for m in matches]))
distances = distances[idxs]
# Convert distances to confidences
confidences = 1 - distances / self.DESC_LENGTH
# Compute the keypoint location relative to the object center
relative_location = location - center
# Compute the distances to all springs
displacements = util.L2norm(transformed_springs - relative_location)
# For each spring, calculate weight
weight = displacements < self.THR_OUTLIER # Could be smooth function
combined = weight * confidences
classes = self.selected_classes
# Sort in descending order
sorted_conf = argsort(combined)[::-1] # reverse
# Get best and second best index
bestInd = sorted_conf[0]
secondBestInd = sorted_conf[1]
# Compute distance ratio according to Lowe
ratio = (1 - combined[bestInd]) / (1 - combined[secondBestInd])
# Extract class of best match
keypoint_class = classes[bestInd]
# If distance ratio is ok and absolute distance is ok and keypoint class is not background
if ratio < self.THR_RATIO and combined[bestInd] > self.THR_CONF and keypoint_class != 0:
# Add keypoint to active keypoints
new_kpt = append(location, keypoint_class)
# Check whether same class already exists
if active_keypoints.size > 0:
same_class = np.nonzero(active_keypoints[:, 2] == keypoint_class)
active_keypoints = np.delete(active_keypoints, same_class, axis=0)
active_keypoints = append(active_keypoints, array([new_kpt]), axis=0)
# If some keypoints have been tracked
if tracked_keypoints.size > 0:
# Extract the keypoint classes
tracked_classes = tracked_keypoints[:, 2]
# If there already are some active keypoints
if active_keypoints.size > 0:
# Add all tracked keypoints that have not been matched
associated_classes = active_keypoints[:, 2]
missing = ~np.in1d(tracked_classes, associated_classes)
active_keypoints = append(active_keypoints, tracked_keypoints[missing, :], axis=0)
# Else use all tracked keypoints
else:
active_keypoints = tracked_keypoints
# Update object state estimate
_ = active_keypoints
self.center = center
self.scale_estimate = scale_estimate
self.rotation_estimate = rotation_estimate
self.tracked_keypoints = tracked_keypoints
self.active_keypoints = active_keypoints
self.im_prev = im_gray
self.keypoints_cv = keypoints_cv
_ = time.time()
self.tl = (nan, nan)
self.tr = (nan, nan)
self.br = (nan, nan)
self.bl = (nan, nan)
self.bb = array([nan, nan, nan, nan])
self.has_result = False
if not any(isnan(self.center)) and self.active_keypoints.shape[0] > self.num_initial_keypoints / 10:
self.has_result = True
tl = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_tl[None, :], rotation_estimate).squeeze())
tr = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_tr[None, :], rotation_estimate).squeeze())
br = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_br[None, :], rotation_estimate).squeeze())
bl = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_bl[None, :], rotation_estimate).squeeze())
min_x = min((tl[0], tr[0], br[0], bl[0]))
min_y = min((tl[1], tr[1], br[1], bl[1]))
max_x = max((tl[0], tr[0], br[0], bl[0]))
max_y = max((tl[1], tr[1], br[1], bl[1]))
self.tl = tl
self.tr = tr
self.bl = bl
self.br = br
self.bb = np.array([min_x, min_y, max_x - min_x, max_y - min_y])
0
Example 115
Project: brut Source File: util.py
def rfp_curve(yp, Y, **kwargs):
""" Plot the false positive rate as a function of recall """
import matplotlib.pyplot as plt
npos = Y.sum()
nneg = Y.size - npos
ind = np.argsort(yp)[::-1]
y = Y[ind]
yp = yp[ind]
recall = (1. * np.cuemsum(y == 1)) / npos
false_pos = (1. * np.cuemsum(y == 0)) / nneg
r = 1.0 * ((yp > 0) & (y == 1)).sum() / npos
fp = 1.0 * ((yp > 0) & (y == 0)).sum() / nneg
l, = plt.plot(recall, false_pos, **kwargs)
plt.plot([r], [fp], 'o', c=l.get_color())
plt.xlabel('Recall')
plt.ylabel('False Positive')
plt.title("R=%0.3f, FP=%0.4f" % (r, fp))
ax = plt.gca()
ax.grid(which='major', axis='x',
linewidth=0.75, linestyle='-', color='0.75')
ax.grid(which='minor', axis='x',
linewidth=0.25, linestyle='-', color='0.75')
ax.grid(which='major', axis='y',
linewidth=0.75, linestyle='-', color='0.75')
ax.grid(which='minor', axis='y',
linewidth=0.25, linestyle='-', color='0.75')
return recall, false_pos
0
Example 116
Project: hyperopt Source File: plotting.py
def main_plot_vars(trials, bandit=None, do_show=True, fontsize=10,
colorize_best=None,
columns=5,
):
# -- import here because file-level import is too early
import matplotlib.pyplot as plt
idxs, vals = miscs_to_idxs_vals(trials.miscs)
losses = trials.losses()
finite_losses = [y for y in losses if y not in (None, float('inf'))]
asrt = np.argsort(finite_losses)
if colorize_best != None:
colorize_thresh = finite_losses[asrt[colorize_best + 1]]
else:
# -- set to lower than best (disabled)
colorize_thresh = finite_losses[asrt[0]] - 1
loss_min = min(finite_losses)
loss_max = max(finite_losses)
print 'finite loss range', loss_min, loss_max, colorize_thresh
loss_by_tid = dict(zip(trials.tids, losses))
def color_fn(lossval):
if lossval is None:
return (1, 1, 1)
else:
t = 4 * (lossval - loss_min) / (loss_max - loss_min + .0001)
if t < 1:
return t, 0, 0
if t < 2:
return 2-t, t-1, 0
if t < 3:
return 0, 3-t, t-2
return 0, 0, 4-t
def color_fn_bw(lossval):
if lossval in (None, float('inf')):
return (1, 1, 1)
else:
t = (lossval - loss_min) / (loss_max - loss_min + .0001)
if lossval < colorize_thresh:
return (0., 1. - t, 0.) # -- red best black worst
else:
return (t, t, t) # -- white=worst, black=best
all_labels = list(idxs.keys())
titles = ['%s (%s)' % (label, bandit.params[label].name)
for label in all_labels]
order = np.argsort(titles)
C = columns
R = int(np.ceil(len(all_labels) / float(C)))
for plotnum, varnum in enumerate(order):
#print varnum, titles[varnum]
label = all_labels[varnum]
plt.subplot(R, C, plotnum + 1)
#print '-' * 80
#print 'Node', label
# hide x ticks
ticks_num, ticks_txt = plt.xticks()
plt.xticks(ticks_num, ['' for i in xrange(len(ticks_num))])
dist_name = bandit.params[label].name
x = idxs[label]
if 'log' in dist_name:
y = np.log(vals[label])
else:
y = vals[label]
plt.title(titles[varnum], fontsize=fontsize)
c = map(color_fn_bw, [loss_by_tid[ii] for ii in idxs[label]])
if len(y):
plt.scatter(x, y, c=c)
if 'log' in dist_name:
nums, texts = plt.yticks()
plt.yticks(nums, ['%.2e' % np.exp(t) for t in nums])
if do_show:
plt.show()
0
Example 117
def map_index(self, screen_pt, threshold=2.0, outside_returns_none=True,
index_only=True):
""" Maps a screen space point to an index into the plot's index array.
Parameters
----------
screen_pts: tuple of x-array, y-array
2 arrays (or values) screen space coordinates.
threshold : float
Optional screen-space distance allowed between *screen_pt* and the
plot; if non-zero, then a *screen_pt* within this distance is
mapped to the neared plot index. (This feature is useful for sparse
data.)
outside_returns_none : Boolean
If True, then if *screen_pt* is outside the range of the data, the
method returns None. If False, it returns the nearest end index in
such a case.
index_only : Boolean
This is included for compatibity with the base class, but is
ignored, as it is always true for 1D plots.
Returns
-------
index : int
An index into the index array. If the input point cannot be mapped
to an index, then None is returned.
If *screen_pt* corresponds to multiple indices, then only the first
index is returned.
"""
data_pt = self.map_data(screen_pt)
if ((data_pt < self.index_mapper.range.low) or \
(data_pt > self.index_mapper.range.high)) and \
outside_returns_none:
return None
if self._cached_data_pts_sorted is None:
self._cached_data_argsort = argsort(self._cached_data)
self._cached_data_pts_sorted = self._cached_data[self._cached_data_argsort]
# XXX better to just use argmin(abs(data - data_pt))?
data = self._cached_data_pts_sorted
try:
ndx = reverse_map_1d(data, data_pt, "ascending")
except IndexError:
if outside_returns_none:
return None
else:
if data_pt < data[0]:
return 0
else:
return len(data) - 1
orig_ndx = self._cached_data_argsort[ndx]
if threshold == 0.0:
return orig_ndx
screen_points = self._cached_screen_pts
x = screen_points[orig_ndx]
if self.orientation == 'v':
x0 = screen_pt[1]
else:
x0 = screen_pt[0]
if abs(x - x0) <= threshold:
return orig_ndx
else:
return None
0
Example 118
Project: vsm Source File: structarr.py
def enum_sort(arr, indices=[], field_name='i', filter_nan=False):
"""
Takes a 1-dimensional array and returns a sorted array with matching
indices from the original array.
:param arr: A structured 1-dimensional array.
:type arr: array
:param indices: List of indices. If `indices` is empty, then `indices`
is set to a range of indices for the length of `arr`.
Default is an empty list.
:type indices: list, optional
:param field_name: Name for indices in the structured array dtype.
Default is 'i'.
:type field_name: string, optional
:param filter_nan: If `True`, Not a Number values are filtered.
Default is `False`.
:type filter_nan: boolean, optional
:returns: A sorted structured array.
**Examples**
>>> arr = np.array([7,3,1,8,2])
>>> enum_sort(arr)
array([(3, 8), (0, 7), (1, 3), (4, 2), (2, 1)],
dtype=[('i', '<i8'), ('value', '<i8')])
"""
idx = np.argsort(arr)
if len(indices) == 0:
indices = np.arange(arr.shape[0], dtype=np.int)
else:
indices = np.array(indices)
dt = [(field_name, indices.dtype), ('value', arr.dtype)]
new_arr = np.empty(shape=arr.shape, dtype=dt)
new_arr[field_name] = indices[idx]
new_arr['value'] = arr[idx]
if filter_nan:
new_arr = new_arr[np.isfinite(new_arr['value'])]
return new_arr[::-1]
0
Example 119
Project: bolt Source File: array.py
def transpose(self, *axes):
"""
Return an array with the axes transposed.
This operation will incur a swap unless the
desiured permutation can be obtained
only by transpoing the keys or the values.
Parameters
----------
axes : None, tuple of ints, or n ints
If None, will reverse axis order.
"""
if len(axes) == 0:
p = arange(self.ndim-1, -1, -1)
else:
p = asarray(argpack(axes))
istransposeable(p, range(self.ndim))
split = self.split
# compute the keys/value axes that need to be swapped
new_keys, new_values = p[:split], p[split:]
swapping_keys = sort(new_values[new_values < split])
swapping_values = sort(new_keys[new_keys >= split])
stationary_keys = sort(new_keys[new_keys < split])
stationary_values = sort(new_values[new_values >= split])
# compute the permutation that the swap causes
p_swap = r_[stationary_keys, swapping_values, swapping_keys, stationary_values]
# compute the extra permutation (p_x) on top of this that
# needs to happen to get the full permutation desired
p_swap_inv = argsort(p_swap)
p_x = p_swap_inv[p]
p_keys, p_values = p_x[:split], p_x[split:]-split
# perform the swap and the the within key/value permutations
arr = self.swap(swapping_keys, swapping_values-split)
arr = arr.keys.transpose(tuple(p_keys.tolist()))
arr = arr.values.transpose(tuple(p_values.tolist()))
return arr
0
Example 120
Project: CLAtoolkit Source File: utils.py
def nmf(platform, no_topics, course_code, start_date=None, end_date=None):
docuements,ids = get_allcontent_byplatform(platform, course_code, start_date=start_date, end_date=end_date)
if len(docuements)<5:
d3_dataset = ""
topic_output = "Not enough text to run Topic Modeling."
else:
min_df = 2
tfidf = TfidfVectorizer(stop_words=ENGLISH_STOP_WORDS, lowercase=True, strip_accents="unicode", use_idf=True, norm="l2", min_df = min_df, ngram_range=(1,4))
A = tfidf.fit_transform(docuements)
num_terms = len(tfidf.vocabulary_)
terms = [""] * num_terms
for term in tfidf.vocabulary_.keys():
terms[ tfidf.vocabulary_[term] ] = term
model = decomposition.NMF(init="nndsvd", n_components=no_topics, max_iter=1000)
W = model.fit_transform(A)
H = model.components_
nmf_topic_terms = {}
nmf_topic_docs = {}
nmf_topic_doc_ids = {}
for topic_index in range( H.shape[0] ):
top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
term_ranking = [terms[i] for i in top_indices]
nmf_topic_terms[topic_index] = term_ranking
#tmp = "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
for topic_index in range( W.shape[1] ):
top_indices = np.argsort( W[:,topic_index] )[::-1][0:10]
doc_ranking = [docuements[i] for i in top_indices]
id_ranking = [ids[i] for i in top_indices]
nmf_topic_docs[topic_index] = doc_ranking
nmf_topic_doc_ids[topic_index] = id_ranking
topic_output = ""
for topic in nmf_topic_terms:
topic_output = topic_output + "<h2>Topic %d</h2>" % (topic + 1)
topic_output = topic_output + "<p>"
for term in nmf_topic_terms[topic]:
topic_output = topic_output + '<a onClick="searchandhighlight(\'%s\')" class="btn btn-default btn-xs">%s</a> ' % (term, term)
topic_output = topic_output + "<p>"
topic_output = topic_output + "<ul>"
for doc in nmf_topic_docs[topic]:
topic_output = topic_output + "<li>%s</li>" % (doc)
topic_output = topic_output + "</ul>"
#print nmf_topic_doc_ids
# find the % sentiment classification of each topic
classification_dict = None
classifier = 'VaderSentiment'
'''
if classifier == "VaderSentiment":
classification_dict = {'Positive':0, 'Neutral':0, 'Negative':0}
elif classifier == "NaiveBayes_t1.model":
classification_dict = {'Triggering':0, 'Exploration':0, 'Integration':0, 'Resolution':0, 'Other':0}
'''
piebubblechart = {}
feature_matrix = np.zeros(shape=(no_topics,3))
for topic in nmf_topic_terms:
vals = ""
radius = 0
topiclabel = "Topic %d" % (topic + 1)
classification_dict = {'Positive':0, 'Neutral':0, 'Negative':0}
kwargs = {'classifier':classifier, 'xapistatement__course_code': course_code, 'xapistatement__id__in':nmf_topic_doc_ids[topic]}
#print Classification.objects.values('classification').filter(**kwargs).order_by().annotate(Count('classification')).query
counts = Classification.objects.values('classification').filter(**kwargs).order_by().annotate(Count('classification'))
for count in counts:
#print count
classification_dict[count['classification']] = count['classification__count']
vals = "%d,%d,%d" % (classification_dict['Positive'],classification_dict['Negative'],classification_dict['Neutral'])
print vals
radius = classification_dict['Positive'] + classification_dict['Negative'] + classification_dict['Neutral']
feature_matrix[topic,0] = classification_dict['Positive']
feature_matrix[topic,1] = classification_dict['Negative']
feature_matrix[topic,2] = classification_dict['Neutral']
piebubblechart[topic] = {'label':topiclabel, 'vals':vals, 'radius':radius}
# Perform Affinity Propogation
af = AffinityPropagation(preference=-50).fit(feature_matrix)
cluster_centers_indices = af.cluster_centers_indices_
aflabels = af.labels_
n_clusters_ = len(cluster_centers_indices)
#print 'n_clusters_', n_clusters_, aflabels
#print feature_matrix
# generate piebubblechart dataset for d3.js
#print piebubblechart
d3_dataset = ""
for topic in piebubblechart:
#print topic
# output format - {label: "Topic 1", vals: [10, 20], cluster: 1, radius: 30},
d3_dataset = d3_dataset + '{label: "%s", vals: [%s], cluster: %d, radius: %d},' % (piebubblechart[topic]['label'], piebubblechart[topic]['vals'], aflabels[topic], piebubblechart[topic]['radius'])
return topic_output, d3_dataset
0
Example 121
def from_tsv(tsv_path, output_path, phenotype_name, phenotype_metadata_path, gzip, warning_callback=None,
error_callback=None, progress_callback=None):
def get_kmer_length(tsv_path):
with open(tsv_path, "r") as f:
f.next()
kmer_len = len(f.next().split("\t")[0])
return kmer_len
def get_kmer_count(tsv):
# XXX: This is can break if the file format changes!
# Assumes that each line has the format kmer_seq\tV\tV\t...V\n with one binary V per genome
total_size = getsize(tsv)
with open(tsv, "r") as f:
header_size = len(f.next())
line_size = len(f.next())
content_size = float(total_size) - header_size
if content_size % line_size != 0:
raise Exception()
return int(content_size / line_size)
# Execution callback functions
if warning_callback is None:
warning_callback = lambda w: logging.warning(w)
if error_callback is None:
def normal_raise(exception):
raise exception
error_callback = normal_raise
if progress_callback is None:
progress_callback = lambda t, p: None
if (phenotype_name is None and phenotype_metadata_path is not None) or (
phenotype_name is not None and phenotype_metadata_path is None):
raise ValueError("If a phenotype is specified, it must have a name and a metadata file.")
kmer_len = get_kmer_length(tsv_path)
kmer_count = get_kmer_count(tsv_path)
kmer_dtype = 'S' + str(kmer_len)
kmer_by_matrix_column_dtype = _minimum_uint_size(kmer_count)
compression = "gzip" if gzip > 0 else None
compression_opts = gzip if gzip > 0 else None
tsv_block_size = min(kmer_count, BLOCK_SIZE)
h5py_file = _create_hdf5_file_no_chunk_caching(output_path)
h5py_file.attrs["created"] = time()
h5py_file.attrs["uuid"] = str(uuid1())
h5py_file.attrs["genome_source_type"] = "tsv"
h5py_file.attrs["genomic_data"] = tsv_path
h5py_file.attrs["phenotype_name"] = phenotype_name if phenotype_name is not None else "NA"
h5py_file.attrs[
"phenotype_metadata_source"] = phenotype_metadata_path if phenotype_metadata_path is not None else "NA"
h5py_file.attrs["compression"] = "gzip (level %d)" % gzip
# Read list of genome identifiers
reader = pd.read_table(tsv_path, sep='\t', index_col=0, iterator=True, engine="c")
genome_ids = reader.get_chunk(1).columns.values
del reader
logging.debug("The k-mer matrix contains %d genomes." % len(genome_ids))
if len(set(genome_ids)) < len(genome_ids):
error_callback(Exception("The genomic data contains genomes with the same identifier."))
# Extract/write the metadata
if phenotype_name is not None:
genome_ids, labels = _parse_metadata(phenotype_metadata_path, genome_ids, warning_callback, error_callback)
# Sort the genomes by label for optimal better performance
logging.debug("Sorting genomes by metadata label for optimal performance.")
sorter = np.argsort(labels)
genome_ids = genome_ids[sorter]
labels = labels[sorter]
logging.debug("Creating the phenotype metadata dataset.")
phenotype = h5py_file.create_dataset("phenotype", data=labels, dtype=PHENOTYPE_LABEL_DTYPE)
phenotype.attrs["name"] = phenotype_name
del phenotype, labels
# Write genome ids
logging.debug("Creating the genome identifier dataset.")
h5py_file.create_dataset("genome_identifiers",
data=genome_ids,
compression=compression,
compression_opts=compression_opts)
# Initialize kmers (kmer_list) dataset
logging.debug("Creating the kmer sequence dataset.")
kmers = h5py_file.create_dataset("kmer_sequences",
shape=(kmer_count,),
dtype=kmer_dtype,
compression=compression,
compression_opts=compression_opts)
# Initialize kmer_matrix dataset
logging.debug("Creating the kmer matrix dataset.")
kmer_matrix = h5py_file.create_dataset("kmer_matrix",
shape=(
int(ceil(1.0 * len(genome_ids) / KMER_MATRIX_PACKING_SIZE)), kmer_count),
dtype=KMER_MATRIX_DTYPE,
compression=compression,
compression_opts=compression_opts,
chunks=(1, tsv_block_size))
# Initialize kmer_by_matrix_column dataset
logging.debug("Creating the kmer sequence/matrix column mapping dataset.")
kmer_by_matrix_column = h5py_file.create_dataset("kmer_by_matrix_column",
shape=(kmer_count,),
dtype=kmer_by_matrix_column_dtype,
compression=compression,
compression_opts=compression_opts)
logging.debug("Transferring the data from TSV to HDF5.")
tsv_reader = pd.read_table(tsv_path, index_col='kmers', sep='\t', chunksize=tsv_block_size)
n_blocks = int(ceil(1.0 * kmer_count / tsv_block_size))
n_copied_blocks = 0.
for i, chunk in enumerate(tsv_reader):
logging.debug("Block %d/%d." % (i + 1, n_blocks))
progress_callback("Creating", n_copied_blocks / n_blocks)
logging.debug("Reading data from TSV file.")
kmers_data = chunk.index.values.astype(kmer_dtype)
read_block_size = kmers_data.shape[0]
block_start = i * tsv_block_size
block_stop = block_start + read_block_size
n_copied_blocks += 0.5
progress_callback("Creating", n_copied_blocks / n_blocks)
if block_start > kmer_count:
break
logging.debug("Writing data to HDF5.")
kmers[block_start:block_stop] = kmers_data
attribute_classification_sorted_by_strains = chunk[genome_ids]
attribute_classification_data = attribute_classification_sorted_by_strains.T.values.astype(np.uint8)
logging.debug("Packing the data.")
kmer_matrix[:, block_start:block_stop] = _pack_binary_bytes_to_ints(attribute_classification_data,
pack_size=KMER_MATRIX_PACKING_SIZE)
n_copied_blocks += 0.5
progress_callback("Creating", n_copied_blocks / n_blocks)
# Write kmer_by_matrix_column
kmer_by_matrix_column[block_start:block_stop] = np.arange(block_start, block_stop, dtype=kmer_by_matrix_column)
logging.debug("Garbage collection.")
gc.collect() # Clear the memory objects created during the iteration, or else the memory will keep growing.
h5py_file.close()
logging.debug("Dataset creation completed.")
0
Example 122
def from_contigs(contig_list_path, output_path, kmer_size, filter_singleton, phenotype_name, phenotype_metadata_path,
gzip, temp_dir, nb_cores, verbose, progress, warning_callback=None,
error_callback=None):
compression = "gzip" if gzip > 0 else None
compression_opts = gzip if gzip > 0 else None
# Execution callback functions
if warning_callback is None:
warning_callback = lambda w: logging.warning(w)
if error_callback is None:
def normal_raise(exception):
raise exception
error_callback = normal_raise
# Make sure that the tmp data is unique to the current process
temp_dir = join(temp_dir, str(getpid()))
if not exists(temp_dir):
mkdir(temp_dir)
if (phenotype_name is None and phenotype_metadata_path is not None) or (
phenotype_name is not None and phenotype_metadata_path is None):
error_callback(ValueError("If a phenotype is specified, it must have a name and a metadata file."))
# Read list of genome identifiers
contig_file_by_genome_id = dict(l.split() for l in open(contig_list_path, "r"))
logging.debug("The k-mer matrix contains %d genomes." % len(contig_file_by_genome_id))
if len(set(contig_file_by_genome_id.keys())) < len(contig_file_by_genome_id.keys()):
error_callback(Exception("The genomic data contains genomes with the same identifier."))
h5py_file = _create_hdf5_file_no_chunk_caching(output_path)
h5py_file.attrs["created"] = time()
h5py_file.attrs["uuid"] = str(uuid1())
h5py_file.attrs["genome_source_type"] = "contigs"
h5py_file.attrs["genomic_data"] = contig_list_path
h5py_file.attrs["phenotype_name"] = phenotype_name if phenotype_name is not None else "NA"
h5py_file.attrs[
"phenotype_metadata_source"] = phenotype_metadata_path if phenotype_metadata_path is not None else "NA"
h5py_file.attrs["filter"] = filter_singleton
h5py_file.attrs["compression"] = "gzip (level %d)" % gzip
# Extract/write the metadata
if phenotype_name is not None:
genome_ids, labels = _parse_metadata(phenotype_metadata_path, contig_file_by_genome_id.keys(), warning_callback,
error_callback)
# Sort the genomes by label for optimal better performance
logging.debug("Sorting genomes by metadata label for optimal performance.")
sorter = np.argsort(labels)
genome_ids = genome_ids[sorter]
labels = labels[sorter]
logging.debug("Creating the phenotype metadata dataset.")
phenotype = h5py_file.create_dataset("phenotype", data=labels, dtype=PHENOTYPE_LABEL_DTYPE)
phenotype.attrs["name"] = phenotype_name
del phenotype, labels
# Write genome ids
logging.debug("Creating the genome identifier dataset.")
h5py_file.create_dataset("genome_identifiers",
data=genome_ids,
compression=compression,
compression_opts=compression_opts)
h5py_file.close()
logging.debug("Initializing DSK.")
# Preparing input file for multidsk
files_sorted = ["%s\n" % contig_file_by_genome_id[id] for id in genome_ids]
open(join(temp_dir, "list_contigs_files"), "w").writelines(files_sorted)
# Calling multidsk
contigs_count_kmers(file_path=join(temp_dir, "list_contigs_files"),
out_dir=temp_dir,
kmer_size=kmer_size,
abundance_min=1,
out_compress=gzip,
nb_cores=nb_cores,
verbose=int(verbose),
progress=progress)
logging.debug("K-mers counting completed.")
# Preparing input file for dsk2kover
list_contigs = [join(temp_dir, basename(splitext(file)[0]) + ".h5") for file in files_sorted]
file_dsk_output = open(join(temp_dir, "list_h5"), "w")
for line in list_contigs:
file_dsk_output.write(line + "\n")
file_dsk_output.close()
# Calling dsk2kover
logging.debug("Initializing DSK2Kover.")
contigs_pack_kmers(file_path=join(temp_dir, "list_h5"),
out_path=output_path,
filter_singleton=filter_singleton,
kmer_length=kmer_size,
compression=gzip,
chunk_size=BLOCK_SIZE,
nb_genomes=len(genome_ids),
progress=progress)
logging.debug("Removing temporary files.")
rmtree(temp_dir)
# progress_callback("dsk2kover", 1)
logging.debug("Dataset creation completed.")
0
Example 123
Project: eulerian-magnification Source File: base.py
def show_frequencies(vid_data, fps, bounds=None):
"""Graph the average value of the video as well as the frequency strength"""
averages = []
if bounds:
for x in range(1, vid_data.shape[0] - 1):
averages.append(vid_data[x, bounds[2]:bounds[3], bounds[0]:bounds[1], :].sum())
else:
for x in range(1, vid_data.shape[0] - 1):
averages.append(vid_data[x, :, :, :].sum())
averages = averages - min(averages)
charts_x = 1
charts_y = 2
pyplot.figure(figsize=(20, 10))
pyplot.subplots_adjust(hspace=.7)
pyplot.subplot(charts_y, charts_x, 1)
pyplot.title("Pixel Average")
pyplot.xlabel("Time")
pyplot.ylabel("Brightness")
pyplot.plot(averages)
freqs = scipy.fftpack.fftfreq(len(averages), d=1.0 / fps)
fft = abs(scipy.fftpack.fft(averages))
idx = np.argsort(freqs)
pyplot.subplot(charts_y, charts_x, 2)
pyplot.title("FFT")
pyplot.xlabel("Freq (Hz)")
freqs = freqs[idx]
fft = fft[idx]
freqs = freqs[len(freqs) / 2 + 1:]
fft = fft[len(fft) / 2 + 1:]
pyplot.plot(freqs, abs(fft))
pyplot.show()
0
Example 124
Project: EDeN Source File: obabel.py
def find_nearest_neighbors(mol, distances, current_idx, **kwargs):
# Most common elements in our galaxy with atomic number:
# 1 Hydrogen
# 2 Helium
# 8 Oxygen
# 6 Carbon
# 10 Neon
# 26 Iron
# 7 Nitrogen
# 14 Silicon
# 12 Magnesium
# 16 Sulfur
atom_types = kwargs.get('atom_types', [1, 2, 8, 6, 10, 26, 7, 14, 12, 16])
similarity_fn = kwargs.get('similarity_fn', lambda x: 1. / (x + 1))
k = kwargs.get('k', 3)
threshold = kwargs.get('threshold', 0)
sorted_indices = np.argsort(distances[current_idx - 1, ])
# Obs: nearest_atoms will contain the atom index, which starts in 0
nearest_atoms = []
for atomic_no in atom_types:
# Don't remove current atom from list:
atom_idx = [atom.idx for atom in mol if atom.atomicnum == atomic_no]
# Indices cannot be compared directly with idx
if len(atom_idx) >= k:
nearest_atoms.append(
[id for id in sorted_indices if id + 1 in atom_idx][:k])
else:
nearest_atoms.append(
[id for id in sorted_indices if id + 1 in atom_idx] +
[None] * (k - len(atom_idx)))
# The following expression flattens the list
nearest_atoms = [x for sublist in nearest_atoms for x in sublist]
# Replace idx for distances, assign an arbitrarily large distance for None
nearest_atoms = [distances[current_idx - 1, i]
if i is not None else 1e10 for i in nearest_atoms]
# If a threshold value is entered, filter the list of distances
if threshold > 0:
nearest_atoms = [x if x <= threshold else 1e10 for x in nearest_atoms]
# Finally apply the similarity function to the resulting list and return
nearest_atoms = [similarity_fn(x) if similarity_fn(
x) > np.spacing(1e10) else 0 for x in nearest_atoms]
return nearest_atoms
0
Example 125
Project: deepchem Source File: conformers.py
def prune_conformers(self, mol):
"""
Prune conformers from a molecule using an RMSD threshold, starting
with the lowest energy conformer.
Parameters
----------
mol : RDKit Mol
Molecule.
Returns
-------
A new RDKit Mol containing the chosen conformers, sorted by
increasing energy.
"""
if self.rmsd_threshold < 0 or mol.GetNumConformers() <= 1:
return mol
energies = self.get_conformer_energies(mol)
rmsd = self.get_conformer_rmsd(mol)
sort = np.argsort(energies) # sort by increasing energy
keep = [] # always keep lowest-energy conformer
discard = []
for i in sort:
# always keep lowest-energy conformer
if len(keep) == 0:
keep.append(i)
continue
# discard conformers after max_conformers is reached
if len(keep) >= self.max_conformers:
discard.append(i)
continue
# get RMSD to selected conformers
this_rmsd = rmsd[i][np.asarray(keep, dtype=int)]
# discard conformers within the RMSD threshold
if np.all(this_rmsd >= self.rmsd_threshold):
keep.append(i)
else:
discard.append(i)
# create a new molecule to hold the chosen conformers
# this ensures proper conformer IDs and energy-based ordering
new = Chem.Mol(mol)
new.RemoveAllConformers()
conf_ids = [conf.GetId() for conf in mol.GetConformers()]
for i in keep:
conf = mol.GetConformer(conf_ids[i])
new.AddConformer(conf, assignId=True)
return new
0
Example 126
Project: spark-ml-streaming Source File: kmeans.py
def run(self, lgn=None):
viz = None
closestpoint = lambda centers, p: argmin(cdist(centers, array([p])))
centers = self.centers
modeltime = 0
model = []
# loop over batches
for i in range(1, self.nbatches):
print('generating batch %g' % i)
# drift means the points will slowly drift by adding noise to the position
if self.update == 'drift':
centers += random.randn(centers.shape[0], centers.shape[1]) * 0.15
# jump means every 15 batches the points will shift
if self.update == 'jump':
if i % self.interval == 0:
if self.transition:
centers = asarray(self.transition)
else:
base = random.rand(self.ncenters * self.ndims) * 1 + 2
delta = asarray([-d if random.rand() > 0.5 else d for d in base])\
.reshape(self.ncenters, self.ndims)
centers = centers + delta
# generate the points by sampling from the clusters and write to disk
npoints = self.npoints
pts, labels = make_blobs(npoints, self.ndims, centers, cluster_std=self.std)
self.writepoints(pts, i)
time.sleep(1)
# get the latest model (after waiting)
model, modeltime = loadrecent(self.dataout + '/*-model.txt', modeltime, model)
# plot an update (if we got a valid model)
if len(model) == self.ncenters:
clrs = labels
order = argsort(labels)
clrs = clrs[order]
pts = pts[order]
s = ones(self.npoints) * 10
if self.ndims == 1:
pts = vstack((pts, model[:,None]))
else:
pts = vstack((pts, model))
clrs = hstack((clrs, ones(self.ncenters) * 5))
s = hstack((s, ones(self.ncenters) * 10))
# wait a few iterations before plotting
if (lgn is not None) & (i > 5):
# scatter plot for two dimensions
if self.ndims == 2:
if viz is None:
viz = lgn.scatterstreaming(pts[:, 0], pts[:, 1], label=clrs, size=s)
else:
viz.append(pts[:, 0], pts[:, 1], label=clrs, size=s)
# line plot for one dimension
elif self.ndims == 1:
if viz is None:
viz = lgn.linestreaming(pts, label=clrs, size=s/2)
else:
viz.append(pts, label=clrs, size=s/2)
else:
raise Exception('Plotting only supported with 1 or 2 dimensions')
0
Example 127
Project: pyksc Source File: col_to_cluster.py
def stacked_bars(labels, data_plots, out_fpath, label_translation, ref=True):
x_locations = [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]
data_class = {}
data_label = {}
for data, _, class_num in data_plots:
best_idx = np.argsort(data)[::-1][:4]
best_cls = np.array(data)[best_idx]
best_lbl = np.array(labels)[best_idx]
data_class[label_translation[class_num]] = best_cls
data_label[label_translation[class_num]] = best_lbl
bar_data = []
bar_labels = []
for cls in sorted(data_class):
bar_data.extend(data_class[cls])
bar_labels.extend(data_label[cls])
colors = ['b', 'g', 'm', 'r', 'y', 'c', '#A617A1', '#2B5700', 'w',
'#FF7300', 'k'] * 3
colored={}
if ref:
to_use = set(REFERRER_ABBRV.values())
else:
to_use = set(CATEG_ABBRV.values())
for i, l in enumerate(to_use):
colored[l] = colors[i]
for x, y, l in zip(x_locations, bar_data, bar_labels):
c = colored[l]
plt.bar(left=x, height=y, color=c, width=1, alpha=0.5)
plt.text(x + .75, y, l, va='bottom', ha='center', rotation=45)
plt.xlim(xmin=0, xmax=21)
plt.xlabel('Cluster')
if ref:
plt.ylim(ymin=0, ymax=.31)
plt.ylabel('Fraction of Views in Cluster')
else:
plt.ylim(ymin=0, ymax=.4)
plt.ylabel('Fraction of Videos in Cluster')
plt.xticks([3, 8, 13, 18], ['$C0$', '$C1$', '$C2$', '$C3'])
plt.savefig(out_fpath)
0
Example 128
def stage0(**kwargs):
ps = PlotSequence('cfht')
decals = CfhtDecals()
B = decals.get_bricks()
print('Bricks:')
B.about()
ra,dec = 190.0, 11.0
#bands = 'ugri'
bands = 'gri'
B.cut(np.argsort(degrees_between(ra, dec, B.ra, B.dec)))
print('Nearest bricks:', B.ra[:5], B.dec[:5], B.brickid[:5])
brick = B[0]
pixscale = 0.186
#W,H = 1024,1024
#W,H = 2048,2048
#W,H = 3600,3600
W,H = 4800,4800
targetwcs = wcs_for_brick(brick, pixscale=pixscale, W=W, H=H)
ccdfn = 'cfht-ccds.fits'
if os.path.exists(ccdfn):
T = fits_table(ccdfn)
else:
T = get_ccd_list()
T.writeto(ccdfn)
print(len(T), 'CCDs')
T.cut(ccds_touching_wcs(targetwcs, T))
print(len(T), 'CCDs touching brick')
T.cut(np.array([b in bands for b in T.filter]))
print(len(T), 'in bands', bands)
ims = []
for t in T:
im = CfhtImage(t)
# magzp = hdr['PHOT_C'] + 2.5 * np.log10(hdr['EXPTIME'])
# fwhm = t.seeing / (pixscale * 3600)
# print '-> FWHM', fwhm, 'pix'
im.seeing = t.seeing
im.pixscale = t.pixscale
print('seeing', t.seeing)
print('pixscale', im.pixscale*3600, 'arcsec/pix')
im.run_calibs(t.ra, t.dec, im.pixscale, W=t.width, H=t.height)
ims.append(im)
# Read images, clip to ROI
targetrd = np.array([targetwcs.pixelxy2radec(x,y) for x,y in
[(1,1),(W,1),(W,H),(1,H),(1,1)]])
keepims = []
tims = []
for im in ims:
print()
print('Reading expnum', im.expnum, 'name', im.extname, 'band', im.band, 'exptime', im.exptime)
band = im.band
wcs = im.read_wcs()
imh,imw = wcs.imageh,wcs.imagew
imgpoly = [(1,1),(1,imh),(imw,imh),(imw,1)]
ok,tx,ty = wcs.radec2pixelxy(targetrd[:-1,0], targetrd[:-1,1])
tpoly = zip(tx,ty)
clip = clip_polygon(imgpoly, tpoly)
clip = np.array(clip)
#print 'Clip', clip
if len(clip) == 0:
continue
x0,y0 = np.floor(clip.min(axis=0)).astype(int)
x1,y1 = np.ceil (clip.max(axis=0)).astype(int)
slc = slice(y0,y1+1), slice(x0,x1+1)
## FIXME -- it seems I got lucky and the cross product is
## negative == clockwise, as required by clip_polygon. One
## could check this and reverse the polygon vertex order.
# dx0,dy0 = tx[1]-tx[0], ty[1]-ty[0]
# dx1,dy1 = tx[2]-tx[1], ty[2]-ty[1]
# cross = dx0*dy1 - dx1*dy0
# print 'Cross:', cross
print('Image slice: x [%i,%i], y [%i,%i]' % (x0,x1, y0,y1))
print('Reading image from', im.imgfn, 'HDU', im.hdu)
img,imghdr = im.read_image(header=True, slice=slc)
goodpix = (img != 0)
print('Number of pixels == 0:', np.sum(img == 0))
print('Number of pixels != 0:', np.sum(goodpix))
if np.sum(goodpix) == 0:
continue
# print 'Image shape', img.shape
print('Image range', img.min(), img.max())
print('Goodpix image range:', (img[goodpix]).min(), (img[goodpix]).max())
if img[goodpix].min() == img[goodpix].max():
print('No dynamic range in image')
continue
# print 'Reading invvar from', im.wtfn, 'HDU', im.hdu
# invvar = im.read_invvar(slice=slc)
# # print 'Invvar shape', invvar.shape
# # print 'Invvar range:', invvar.min(), invvar.max()
# invvar[goodpix == 0] = 0.
# if np.all(invvar == 0.):
# print 'Skipping zero-invvar image'
# continue
# assert(np.all(np.isfinite(img)))
# assert(np.all(np.isfinite(invvar)))
# assert(not(np.all(invvar == 0.)))
# # Estimate per-pixel noise via Blanton's 5-pixel MAD
# slice1 = (slice(0,-5,10),slice(0,-5,10))
# slice2 = (slice(5,None,10),slice(5,None,10))
# # print 'sliced shapes:', img[slice1].shape, img[slice2].shape
# # print 'good shape:', (goodpix[slice1] * goodpix[slice2]).shape
# # print 'good values:', np.unique(goodpix[slice1] * goodpix[slice2])
# # print 'sliced[good] shapes:', (img[slice1] - img[slice2])[goodpix[slice1] * goodpix[slice2]].shape
# mad = np.median(np.abs(img[slice1] - img[slice2])[goodpix[slice1] * goodpix[slice2]].ravel())
# sig1 = 1.4826 * mad / np.sqrt(2.)
# print 'MAD sig1:', sig1
# # invvar was 1 or 0
# invvar *= (1./(sig1**2))
# medsky = np.median(img[goodpix])
# Read full image for sig1 and sky estimate
fullimg = im.read_image()
fullgood = (fullimg != 0)
# Estimate per-pixel noise via Blanton's 5-pixel MAD
slice1 = (slice(0,-5,10),slice(0,-5,10))
slice2 = (slice(5,None,10),slice(5,None,10))
mad = np.median(np.abs(fullimg[slice1] - fullimg[slice2])[fullgood[slice1] * fullgood[slice2]].ravel())
sig1 = 1.4826 * mad / np.sqrt(2.)
print('MAD sig1:', sig1)
medsky = np.median(fullimg[fullgood])
invvar = np.zeros_like(img)
invvar[goodpix] = 1./sig1**2
# Median-smooth sky subtraction
plt.clf()
dimshow(np.round((img-medsky) / sig1), vmin=-3, vmax=5)
plt.title('Scalar median: %s' % im.name)
ps.savefig()
# medsky = np.zeros_like(img)
# # astrometry.util.util
# median_smooth(img, np.logical_not(goodpix), 256, medsky)
fullmed = np.zeros_like(fullimg)
median_smooth(fullimg - medsky, np.logical_not(fullgood), 256, fullmed)
fullmed += medsky
medimg = fullmed[slc]
plt.clf()
dimshow(np.round((img - medimg) / sig1), vmin=-3, vmax=5)
plt.title('Median filtered: %s' % im.name)
ps.savefig()
#print 'Subtracting median:', medsky
#img -= medsky
img -= medimg
primhdr = im.read_image_primary_header()
magzp = decals.get_zeropoint_for(im)
print('magzp', magzp)
zpscale = NanoMaggies.zeropointToScale(magzp)
print('zpscale', zpscale)
# Scale images to Nanomaggies
img /= zpscale
sig1 /= zpscale
invvar *= zpscale**2
orig_zpscale = zpscale
zpscale = 1.
assert(np.sum(invvar > 0) > 0)
print('After scaling:')
print('sig1', sig1)
print('invvar range', invvar.min(), invvar.max())
print('image range', img.min(), img.max())
assert(np.all(np.isfinite(img)))
assert(np.all(np.isfinite(invvar)))
assert(np.isfinite(sig1))
plt.clf()
lo,hi = -5*sig1, 10*sig1
n,b,p = plt.hist(img[goodpix].ravel(), 100, range=(lo,hi), histtype='step', color='k')
xx = np.linspace(lo, hi, 200)
plt.plot(xx, max(n)*np.exp(-xx**2 / (2.*sig1**2)), 'r-')
plt.xlim(lo,hi)
plt.title('Pixel histogram: %s' % im.name)
ps.savefig()
twcs = ConstantFitsWcs(wcs)
if x0 or y0:
twcs.setX0Y0(x0,y0)
info = im.get_image_info()
fullh,fullw = info['dims']
# read fit PsfEx model
psfex = PsfEx.fromFits(im.psffitfn)
print('Read', psfex)
# HACK -- highly approximate PSF here!
#psf_fwhm = imghdr['FWHM']
#psf_fwhm = im.seeing
psf_fwhm = im.seeing / (im.pixscale * 3600)
print('PSF FWHM', psf_fwhm, 'pixels')
psf_sigma = psf_fwhm / 2.35
psf = NCircularGaussianPSF([psf_sigma],[1.])
print('img type', img.dtype)
tim = Image(img, invvar=invvar, wcs=twcs, psf=psf,
photocal=LinearPhotoCal(zpscale, band=band),
sky=ConstantSky(0.), name=im.name + ' ' + band)
tim.zr = [-3. * sig1, 10. * sig1]
tim.sig1 = sig1
tim.band = band
tim.psf_fwhm = psf_fwhm
tim.psf_sigma = psf_sigma
tim.sip_wcs = wcs
tim.x0,tim.y0 = int(x0),int(y0)
tim.psfex = psfex
tim.imobj = im
mn,mx = tim.zr
tim.ima = dict(interpolation='nearest', origin='lower', cmap='gray',
vmin=mn, vmax=mx)
tims.append(tim)
keepims.append(im)
ims = keepims
print('Computing resampling...')
# save resampling params
for tim in tims:
wcs = tim.sip_wcs
subh,subw = tim.shape
subwcs = wcs.get_subimage(tim.x0, tim.y0, subw, subh)
tim.subwcs = subwcs
try:
Yo,Xo,Yi,Xi,rims = resample_with_wcs(targetwcs, subwcs, [], 2)
except OverlapError:
print('No overlap')
continue
if len(Yo) == 0:
continue
tim.resamp = (Yo,Xo,Yi,Xi)
print('Creating coadds...')
# Produce per-band coadds, for plots
coimgs = []
cons = []
for ib,band in enumerate(bands):
coimg = np.zeros((H,W), np.float32)
con = np.zeros((H,W), np.uint8)
for tim in tims:
if tim.band != band:
continue
(Yo,Xo,Yi,Xi) = tim.resamp
if len(Yo) == 0:
continue
nn = (tim.getInvvar()[Yi,Xi] > 0)
coimg[Yo,Xo] += tim.getImage ()[Yi,Xi] * nn
con [Yo,Xo] += nn
# print
# print 'tim', tim.name
# print 'number of resampled pix:', len(Yo)
# reim = np.zeros_like(coimg)
# ren = np.zeros_like(coimg)
# reim[Yo,Xo] = tim.getImage()[Yi,Xi] * nn
# ren[Yo,Xo] = nn
# print 'number of resampled pix with positive invvar:', ren.sum()
# plt.clf()
# plt.subplot(2,2,1)
# mn,mx = [np.percentile(reim[ren>0], p) for p in [25,95]]
# print 'Percentiles:', mn,mx
# dimshow(reim, vmin=mn, vmax=mx)
# plt.colorbar()
# plt.subplot(2,2,2)
# dimshow(con)
# plt.colorbar()
# plt.subplot(2,2,3)
# dimshow(reim, vmin=tim.zr[0], vmax=tim.zr[1])
# plt.colorbar()
# plt.subplot(2,2,4)
# plt.hist(reim.ravel(), 100, histtype='step', color='b')
# plt.hist(tim.getImage().ravel(), 100, histtype='step', color='r')
# plt.suptitle('%s: %s' % (band, tim.name))
# ps.savefig()
coimg /= np.maximum(con,1)
coimgs.append(coimg)
cons .append(con)
plt.clf()
dimshow(get_rgb(coimgs, bands))
ps.savefig()
plt.clf()
for i,b in enumerate(bands):
plt.subplot(2,2,i+1)
dimshow(cons[i], ticks=False)
plt.title('%s band' % b)
plt.colorbar()
plt.suptitle('Number of exposures')
ps.savefig()
print('Grabbing SDSS sources...')
bandlist = [b for b in bands]
cat,T = get_sdss_sources(bandlist, targetwcs)
# record coordinates in target brick image
ok,T.tx,T.ty = targetwcs.radec2pixelxy(T.ra, T.dec)
T.tx -= 1
T.ty -= 1
T.itx = np.clip(np.round(T.tx).astype(int), 0, W-1)
T.ity = np.clip(np.round(T.ty).astype(int), 0, H-1)
plt.clf()
dimshow(get_rgb(coimgs, bands))
ax = plt.axis()
plt.plot(T.tx, T.ty, 'o', mec=green, mfc='none', ms=10, mew=1.5)
plt.axis(ax)
plt.title('SDSS sources')
ps.savefig()
print('Detmaps...')
# Render the detection maps
detmaps = dict([(b, np.zeros((H,W), np.float32)) for b in bands])
detivs = dict([(b, np.zeros((H,W), np.float32)) for b in bands])
for tim in tims:
iv = tim.getInvvar()
psfnorm = 1./(2. * np.sqrt(np.pi) * tim.psf_sigma)
detim = tim.getImage().copy()
detim[iv == 0] = 0.
detim = gaussian_filter(detim, tim.psf_sigma) / psfnorm**2
detsig1 = tim.sig1 / psfnorm
subh,subw = tim.shape
detiv = np.zeros((subh,subw), np.float32) + (1. / detsig1**2)
detiv[iv == 0] = 0.
(Yo,Xo,Yi,Xi) = tim.resamp
detmaps[tim.band][Yo,Xo] += detiv[Yi,Xi] * detim[Yi,Xi]
detivs [tim.band][Yo,Xo] += detiv[Yi,Xi]
rtn = dict()
for k in ['T', 'coimgs', 'cons', 'detmaps', 'detivs',
'targetrd', 'pixscale', 'targetwcs', 'W','H',
'bands', 'tims', 'ps', 'brick', 'cat']:
rtn[k] = locals()[k]
return rtn
0
Example 129
Project: mtpy Source File: bayesian1d.py
def generate_input_file(edifilename, outputdir=None):
eo = EDI.Edi()
eo.readfile(edifilename)
filebase = op.splitext(op.split(edifilename)[-1])[0]
outfilename1 = '{0}_bayesian1d_z.in'.format(filebase)
outfilename2 = '{0}_bayesian1d_zvar.in'.format(filebase)
outdir = op.split(edifilename)[0]
if outputdir is not None:
try:
if not op.isdir(outputdir):
os.makedirs(outputdir)
outdir = outputdir
except:
pass
outfn1 = op.join(outdir,outfilename1)
outfn2 = op.join(outdir,outfilename2)
outfn1 = MTfh.make_unique_filename(outfn1)
outfn2 = MTfh.make_unique_filename(outfn2)
freqs = eo.freq
z_array = eo.Z.z
zerr_array = eo.Z.zerr
if len(freqs) != len(z_array):
raise MTex.MTpyError_edi_file('ERROR in Edi file {0} - number of '\
'freqs different from length of Z array'.format(eo.filename))
sorting = np.argsort(freqs)
outstring1 = ''
outstring2 = ''
for idx in sorting:
z = z_array[idx]
zerr = zerr_array[idx]
f = freqs[idx]
outstring1 += '{0}\t'.format(f)
outstring2 += '{0}\t'.format(f)
for i in np.arange(2):
for j in np.arange(2):
if np.imag(z[i%2,(j+1)/2]) < 0 :
z_string = '{0}-{1}i'.format(np.real(z[i%2,(j+1)/2]),
np.abs(np.imag(z[i%2,(j+1)/2])))
else:
z_string = '{0}+{1}i'.format(np.real(z[i%2,(j+1)/2]),
np.imag(z[i%2,(j+1)/2]))
zerr_string = '{0}'.format(zerr[i%2,(j+1)/2])
outstring1 += '{0}\t'.format(z_string)
outstring2 += '{0}\t'.format(zerr_string)
outstring1 = outstring1.rstrip() + '\n'
outstring2 = outstring2.rstrip() + '\n'
Fout1 = open(outfn1,'w')
Fout2 = open(outfn2,'w')
Fout1.write(outstring1.expandtabs(4))
Fout2.write(outstring2.expandtabs(4))
Fout1.close()
Fout2.close()
return outfn1,outfn2
0
Example 130
Project: yatsm Source File: test_cache.py
@xfail
def test_update_cache_file_add_obs(cachefile, example_cache,
example_timeseries):
""" Grab a subset of test data and see if we get more data back """
stack_images = example_timeseries['images']
stack_image_IDs = example_timeseries['image_IDs']
# Presort and subset for comparison
sort_idx = np.argsort(example_cache['image_IDs'])
test_Y = example_cache['Y'][:, sort_idx, :]
test_IDs = example_cache['image_IDs'][sort_idx]
size_1 = 100
size_2 = 200
sort_idx = np.argsort(stack_image_IDs)[:size_2]
stack_images = stack_images[sort_idx]
stack_IDs = stack_image_IDs[sort_idx]
# Create reduced dataset to add to
np.savez_compressed('test.npz',
Y=test_Y[:, :size_1, :],
image_IDs=test_IDs[:size_1])
# Write update and read back
cache.update_cache_file(stack_images, stack_IDs,
'test.npz', 'test_new.npz',
0, io.gdal_reader)
updated = np.load('test_new.npz')
# Test and clean update
np.testing.assert_equal(test_Y[:, :size_2, :], updated['Y'])
np.testing.assert_equal(test_IDs[:size_2], updated['image_IDs'])
os.remove('test.npz')
os.remove('test_new.npz')
0
Example 131
def estimate(self, keypoints):
center = array((nan, nan))
scale_estimate = nan
med_rot = nan
# At least 2 keypoints are needed for scale
if keypoints.size > 1:
# Extract the keypoint classes
keypoint_classes = keypoints[:, 2].squeeze().astype(np.int)
# Retain singular dimension
if keypoint_classes.size == 1:
keypoint_classes = keypoint_classes[None]
# Sort
ind_sort = argsort(keypoint_classes)
keypoints = keypoints[ind_sort]
keypoint_classes = keypoint_classes[ind_sort]
# Get all combinations of keypoints
all_combs = array([val for val in itertools.product(range(keypoints.shape[0]), repeat=2)])
# But exclude comparison with itself
all_combs = all_combs[all_combs[:, 0] != all_combs[:, 1], :]
# Measure distance between allcombs[0] and allcombs[1]
ind1 = all_combs[:, 0]
ind2 = all_combs[:, 1]
class_ind1 = keypoint_classes[ind1] - 1
class_ind2 = keypoint_classes[ind2] - 1
duplicate_classes = class_ind1 == class_ind2
if not all(duplicate_classes):
ind1 = ind1[~duplicate_classes]
ind2 = ind2[~duplicate_classes]
class_ind1 = class_ind1[~duplicate_classes]
class_ind2 = class_ind2[~duplicate_classes]
pts_allcombs0 = keypoints[ind1, :2]
pts_allcombs1 = keypoints[ind2, :2]
# This distance might be 0 for some combinations,
# as it can happen that there is more than one keypoint at a single location
dists = util.L2norm(pts_allcombs0 - pts_allcombs1)
original_dists = self.squareform[class_ind1, class_ind2]
scalechange = dists / original_dists
# Compute angles
angles = np.empty((pts_allcombs0.shape[0]))
v = pts_allcombs1 - pts_allcombs0
angles = np.arctan2(v[:, 1], v[:, 0])
original_angles = self.angles[class_ind1, class_ind2]
angle_diffs = angles - original_angles
# Fix long way angles
long_way_angles = np.abs(angle_diffs) > math.pi
angle_diffs[long_way_angles] = angle_diffs[long_way_angles] - np.sign(angle_diffs[long_way_angles]) * 2 * math.pi
scale_estimate = median(scalechange)
if not self.estimate_scale:
scale_estimate = 1;
med_rot = median(angle_diffs)
if not self.estimate_rotation:
med_rot = 0;
keypoint_class = keypoints[:, 2].astype(np.int)
votes = keypoints[:, :2] - scale_estimate * (util.rotate(self.springs[keypoint_class - 1], med_rot))
# Remember all votes including outliers
self.votes = votes
# Compute pairwise distance between votes
pdist = scipy.spatial.distance.pdist(votes)
# Compute linkage between pairwise distances
linkage = scipy.cluster.hierarchy.linkage(pdist)
# Perform hierarchical distance-based clustering
T = scipy.cluster.hierarchy.fcluster(linkage, self.THR_OUTLIER, criterion='distance')
# Count votes for each cluster
cnt = np.bincount(T) # Dummy 0 label remains
# Get largest class
Cmax = argmax(cnt)
# Identify inliers (=members of largest class)
inliers = T == Cmax
# inliers = med_dists < THR_OUTLIER
# Remember outliers
self.outliers = keypoints[~inliers, :]
# Stop tracking outliers
keypoints = keypoints[inliers, :]
# Remove outlier votes
votes = votes[inliers, :]
# Compute object center
center = np.mean(votes, axis=0)
return (center, scale_estimate, med_rot, keypoints)
0
Example 132
Project: tractor Source File: testblob2.py
def oldjunk():
cutToPrimary = True
if False:
stars = [
# David's nearby pairs of F stars
(3900., 0., 118.37066, 52.527073),
(3705., 0., 130.17654, 52.750081),
# High stellar density
#(0., 0., 270.0, 0.003),
]
# Dustin's stars
# (4472.001, 0.02514649, 246.47016, 19.066909),
# (5196.53, 0.02490235, 240.09403, 37.404078),
# (6179.05, 0.6324392, 310.47791, 57.523221),
# (6021.875, 0.7000019, 150.52443, -0.478836),
# (7757.096, 0.06507664, 305.11144, -12.957655),
# (8088.685, 0.2436366, 253.11475, 11.60716),
# (8395.096, 0.7563477, 188.34439, 63.442057),
# (9201.74, 178, 93.971719, 0.56302169),
# ]
T = fits_table('stars2.fits')
print('Read stars:')
T.about()
stars.extend(zip(T.teff, T.teff_sigma, T.ra, T.dec))
# reformat
stars = [(ra,dec,[('T_EFF',teff,'Effective temperature'),
('DT_EFF',dteff,'Effective temperate error')],
cutToPrimary) for teff,dteff,ra,dec in stars]
elif False:
sdss = DR9(basedir=tempdir)
sdss.useLocalTree()
# near M87 / Virgo cluster
run,camcol,field = 3836,2,258
pofn = sdss.retrieve('photoObj', run, camcol, field)
T = fits_table(pofn, columns=[
'parent', 'objid', 'ra', 'dec', 'fracdev', 'objc_type', 'modelflux',
'theta_dev', 'theta_deverr', 'ab_dev', 'ab_deverr', 'phi_dev_deg',
'theta_exp', 'theta_experr', 'ab_exp', 'ab_experr', 'phi_exp_deg',
'resolve_status', 'nchild', 'flags', 'objc_flags',
'run','camcol','field','id'])
print(len(T), 'objects')
T.cut(T.objc_type == 3)
print(len(T), 'galaxies')
T.cut(T.nchild == 0)
print(len(T), 'children')
T.cut(np.argsort(-T.modelflux[:,2]))
# keep only one child in each blend family
parents = set()
keepi = []
for i in range(len(T)):
if T.parent[i] in parents:
continue
keepi.append(i)
parents.add(T.parent[i])
T.cut(np.array(keepi))
print(len(T), 'unique blend families')
T = T[:25]
stars = [(ra,dec,[],cutToPrimary) for ra,dec in zip(T.ra, T.dec)]
else:
# stars = [ (0.,0., 131.59054, 0.66408610),
# (0.,0., 147.34576, 0.51657783 ),
# ]
for args in stars:
ra,dec = stars[:2]
try:
fns = oneblob(*stars)
except:
import traceback
traceback.print_exc()
continue
if plots:
stamp_pattern = 'stamp-%%s-%.4f-%.4f.fits' % (ra, dec)
bands = 'ugriz'
fns = ['cat'] + [stamp_pattern % band for band in bands]
for j,fn in enumerate(fns[1:]):
print('Filename', fn)
F = fitsio.FITS(fn)
n = len(F) / 2
print('n ext:', n)
cols = int(np.ceil(np.sqrt(n)))
rows = int(np.ceil(n / float(cols)))
plt.clf()
for i,ext in enumerate(range(0, len(F), 2)):
plt.subplot(rows, cols, i+1)
hdr = F[ext].read_header()
dimshow(F[ext].read(), ticks=False)
plt.title('RCF %i/%i/%i' % (hdr['RUN'], hdr['CAMCOL'], hdr['FIELD']))
plt.suptitle('%s band' % bands[j])
plt.savefig(fn.replace('.fits','.png'))
F.close()
del F
0
Example 133
def __init__(self, variable, quantile, name=None):
"""Initializes quantile estimator.
Args:
variable: A string representing the variable to _calculate the quantile.
quantile: The quantile to be _calculated (range is [0,1]).
name: A string for the column name of results.
"""
def _calculate(data, weights):
"""Calculates the quantile for a weighted array.
Args:
data: A pandas dataframe
weights: A numpy array of weights.
Returns:
The weighted quantile of data[variable].
"""
values = data[variable].values
indices = np.argsort(values)
if quantile > 0.5:
# Because we're interating through the indices we should
# choose the presumably quicker side to start.
local_quantile = 1 - quantile
previous_ii = indices[-1]
indices = reversed(indices)
else:
local_quantile = quantile
previous_ii = indices[0]
threshold = weights.sum() * local_quantile
previous_accuemulated_weight = 0
# We follow the usual convention for the median: let
# lower=floor(threshold) and upper = ceil(threshold). If
# lower==upper we return values[lower] otherwise we return the
# average of values[lower] and values[upper].
for ii in indices:
if weights[ii] > 0:
if previous_accuemulated_weight > threshold:
# The previous element passed the threshold itself and thus
# values[lower]==values[upper].
return values[previous_ii]
elif previous_accuemulated_weight == threshold:
return (values[ii] + values[previous_ii]) / 2
else:
previous_ii = ii
previous_accuemulated_weight += weights[ii]
if name is None:
name = "quantile({}, {:.2f})".format(variable, quantile)
super(Quantile, self).__init__(name, _calculate, "scalar")
0
Example 134
def __call__(self, mat, threshold = None, sort_results = True):
'''
All any local maximum that are greater than threshhold up to a total of
max_length.
To save time arrays that hold the maxes and vals that are created
once and reused for each call. This means that local maximum detection
is not thread safe. If using this class with threads create an instance
for each thread.
@param mat: 2d Real Matrix input.
@param threshold: Mininum value of local maxima.
@param sort_results: set to False to save time and return an unorderd list.
@returns: maxes,vals
'''
maxes = self.maxes
vals = self.vals
r,c = mat.shape
max_length = self.max_length
if threshold != None:
count = weave.inline(
'''
int count = 0;
for( int i = 1; i < r-1 ; i++){
for(int j = 1; j < c-1 ; j++){
// Check if the current location meets the threshold
if (mat(i,j) > threshold &&
mat(i,j) > mat(i,j-1) &&
mat(i,j) > mat(i,j+1) &&
mat(i,j) > mat(i-1,j-1) &&
mat(i,j) > mat(i-1,j) &&
mat(i,j) > mat(i-1,j+1) &&
mat(i,j) > mat(i+1,j-1) &&
mat(i,j) > mat(i+1,j) &&
mat(i,j) > mat(i+1,j+1)){
// This is a local max
maxes(count,0) = i;
maxes(count,1) = j;
vals(count) = mat(i,j);
count += 1;
if(count == max_length){
i = r;
j = c;
}
}
}
}
return_val = count;
''',
arg_names=['mat','maxes','vals','max_length','threshold','r','c'],
type_converters=weave.converters.blitz,
)
else:
count = weave.inline(
'''
int count = 0;
for( int i = 1; i < r-1 ; i++){
for(int j = 1; j < c-1 ; j++){
// Check if the current location meets the threshold
if (mat(i,j) > mat(i,j-1) &&
mat(i,j) > mat(i,j+1) &&
mat(i,j) > mat(i-1,j-1) &&
mat(i,j) > mat(i-1,j) &&
mat(i,j) > mat(i-1,j+1) &&
mat(i,j) > mat(i+1,j-1) &&
mat(i,j) > mat(i+1,j) &&
mat(i,j) > mat(i+1,j+1)){
// This is a local max
maxes(count,0) = i;
maxes(count,1) = j;
vals(count) = mat(i,j);
count += 1;
if(count == max_length){
i = r;
j = c;
}
}
}
}
return_val = count;
''',
arg_names=['mat','maxes','vals','max_length','r','c'],
type_converters=weave.converters.blitz,
)
if sort_results == False:
return maxes[:count,:].copy(),vals[:count].copy()
order = np.argsort(vals[:count])[::-1]
maxes = maxes[order]
vals = vals[order]
#print vals
#print maxes
return maxes,vals
0
Example 135
Project: hyperion Source File: filter.py
def to_hdf5_group(self, group, name):
self.check_all_set()
# Get spectral coordinate in Hz and transmision in fractional terms
nu = self.spectral_coord.to(u.Hz, equivalencies=u.spectral()).value
tr = self.transmission.to(u.one).value
# Sort in order of increasing Hz
order = np.argsort(nu)
nu = nu[order]
tr = tr[order]
# Get other parameters for the normalization
nu0 = self.central_spectral_coord.to(u.Hz, equivalencies=u.spectral()).value
alpha = self.alpha
beta = self._beta
# Here we normalize the filter before passing it to Hyperion
tr_norm = (tr / nu ** (1 + beta)
/ nu0 ** alpha
/ integrate(nu, tr / nu ** (1. + alpha + beta)))
# Now multiply by nu so that Hyperion returns nu * Fnu
tr_norm *= nu
dset = group.create_dataset(name, data=np.array(list(zip(nu, tr, tr_norm)),
dtype=[('nu', float),
('tr', float),
('tn', float)]))
dset.attrs['name'] = np.string_(self.name)
dset.attrs['alpha'] = self.alpha
dset.attrs['beta'] = self._beta
dset.attrs['nu0'] = self.central_spectral_coord.to(u.Hz, equivalencies=u.spectral()).value
0
Example 136
def main():
import optparse
parser = optparse.OptionParser('%prog: [opts] <profile>...')
parser.add_option('--merge', action='store_true', help='Merge input files into one big profile')
opt,args = parser.parse_args()
if len(args) == 0:
parser.print_help()
sys.exit(0)
if opt.merge:
p = pstats.Stats(args[0])
#p.add(*args[1:])
for fn in args[1:]:
p.add(fn)
P = [p]
else:
P = [pstats.Stats(fn) for fn in args]
for p in P:
print()
print('Cumulative time')
print()
#p = pstats.Stats(fn)
p = p.strip_dirs()
p.sort_stats('cuemulative').print_stats(100)
print()
print('Callees ordered by cuemulative time:')
print()
p.print_callees(40)
print()
print('Time')
print()
#p = pstats.Stats(fn)
p.sort_stats('time').print_stats(40)
p.print_callees()
width,lst = p.get_print_list([40])
#print 'lst', lst
if lst:
p.calc_callees()
name_size = width
arrow = '->'
print('lst length:', len(lst))
for func in lst:
#print 'func', func
if func in p.all_callees:
p.print_call_heading(width, "called...")
print(pstats.func_std_string(func).ljust(name_size) + arrow, end=' ')
print()
#p.print_call_line(width, func, p.all_callees[func])
cc = p.all_callees[func]
#print 'Callees:', cc
TT = []
CT = []
for func,value in cc.items():
#print 'func,value', func, value
if isinstance(value, tuple):
nc, ccx, tt, ct = value
TT.append(tt)
CT.append(ct)
#print ' ', func, ct, tt
else:
print('NON-TUPLE', value)
I = np.argsort(CT)
FV = list(cc.items())
for i in reversed(I[-40:]):
func,value = FV[i]
name = pstats.func_std_string(func)
if isinstance(value, tuple):
nc, ccx, tt, ct = value
if nc != ccx:
substats = '%d/%d' % (nc, ccx)
else:
substats = '%d' % (nc,)
substats = '%-20s %s %s %s' % (substats, pstats.f8(tt), pstats.f8(ct), name)
print(' ' + substats)
else:
p.print_call_line(width, func, {})
print()
print()
0
Example 137
Project: hyperopt-nnet Source File: pylearn_pca.py
def pca_from_cov(cov, lower=0, max_components=None, max_energy_fraction=None):
"""Return (eigvals, eigvecs) of data with covariance `cov`.
The returned eigvals will be a np ndarray vector.
The returned eigvecs will be a np ndarray matrix whose *cols* are the eigenvectors.
This is recommended for retrieving many components from high-dimensional data.
:param cov: data covariance matrix
:type cov: a np ndarray
:returns: (eigvals, eigvecs) of decomposition
"""
w, v = scipy.linalg.eigh(a=cov, lower=lower)
# definition of eigh
# a * v[:,i] = w[i] * vr[:,i]
# v.H * v = identity
# total variance (vartot) can be computed at this point:
vartot = w.sum()
# sort the eigenvals and vecs by decreasing magnitude
a = np.argsort(w)[::-1]
w = w[a]
v = v[:,a]
if max_components != None:
w = w[:max_components]
v = v[:, :max_components]
if max_energy_fraction != None:
if not (0.0 <= max_energy_fraction <= 1.0):
raise ValueError('illegal value for max_energy_fraction', max_energy_fraction)
if max_energy_fraction < 1.0:
energy = 0
i = 0
while (energy < max_energy_fraction * vartot) and (i < len(w)):
energy += w[i]
i += 1
w = w[:(i-1)]
v = v[:,:(i-1)]
return w,v
0
Example 138
Project: DeBaCl Source File: utils.py
def knn_graph(X, k, method='brute_force', leaf_size=30):
"""
Compute the symmetric k-nearest neighbor graph for a set of points. Assume
a Euclidean distance metric.
Parameters
----------
X : numpy array | list [numpy arrays]
Data points, with each row as an observation.
k : int
The number of points to consider as neighbors of any given observation.
method : {'brute-force', 'kd-tree', 'ball-tree'}, optional
Computing method.
- 'brute-force': computes the (Euclidean) distance between all O(n^2)
pairs of rows in 'X', then for every point finds the k-nearest. It is
limited to tens of thousands of observations (depending on available
RAM).
- 'kd-tree': partitions the data into axis-aligned rectangles to avoid
computing all O(n^2) pairwise distances. Much faster than
'brute-force', but only works for data in fewer than about 20
dimensions. Requires the scikit-learn library.
- 'ball-tree': partitions the data into balls and uses the metric
property of euclidean distance to avoid computing all O(n^2)
distances. Typically much faster than 'brute-force', and works with
up to a few hundred dimensions. Requires the scikit-learn library.
leaf_size : int, optional
For the 'kd-tree' and 'ball-tree' methods, the number of observations
in the leaf nodes. Leaves are not split further, so distance
computations within leaf nodes are done by brute force. 'leaf_size' is
ignored for the 'brute-force' method.
Returns
-------
neighbors : numpy array
Each row contains the nearest neighbors of the corresponding row in
'X', indicated by row indices.
radii : list[float]
For each row of 'X' the distance to its k'th nearest neighbor
(including itself).
See Also
--------
epsilon_graph
Examples
--------
>>> X = numpy.random.rand(100, 2)
>>> knn, radii = debacl.utils.knn_graph(X, k=8, method='kd-tree')
"""
n, p = X.shape
if method == 'kd_tree':
if _HAS_SKLEARN:
kdtree = _sknbr.KDTree(X, leaf_size=leaf_size, metric='euclidean')
distances, neighbors = kdtree.query(X, k=k, return_distance=True,
sort_results=True)
radii = distances[:, -1]
else:
raise ImportError("The scikit-learn library could not be loaded." +
" It is required for the 'kd-tree' method.")
if method == 'ball_tree':
if _HAS_SKLEARN:
btree = _sknbr.BallTree(X, leaf_size=leaf_size, metric='euclidean')
distances, neighbors = btree.query(X, k=k, return_distance=True,
sort_results=True)
radii = distances[:, -1]
else:
raise ImportError("The scikit-learn library could not be loaded." +
" It is required for the 'ball-tree' method.")
else: # assume brute-force
if not _HAS_SCIPY:
raise ImportError("The 'scipy' module could not be loaded. " +
"It is required for the 'brute_force' method " +
"for building a knn similarity graph.")
d = _spd.pdist(X, metric='euclidean')
D = _spd.squareform(d)
rank = _np.argsort(D, axis=1)
neighbors = rank[:, 0:k]
k_nbr = neighbors[:, -1]
radii = D[_np.arange(n), k_nbr]
return neighbors, radii
0
Example 139
Project: hyphae_ani Source File: hyphae_mergetest2.py
def step(self):
self.itt += 1
num = self.num
try:
k = self.DQ.pop()
except IndexError:
## no more live nodes.
return False, False
self.C[k] += 1
if self.C[k]>CK_MAX:
## node is dead
## this is inefficient. but it does not matter for small canvases
#self.ctx.set_source_rgb(*CONTRASTC)
#self.circle(self.X[k],self.Y[k],ONE*4)
return True, False
#r = RAD + random()*ONE*R_RAND_SIZE
r = self.R[k]*RAD_SCALE if self.D[k]>-1 else self.R[k]
b = num if self.D[k]>-1 else self.B[k]
if r<ONE:
## node dies
self.ctx.set_source_rgb(*CONTRASTC)
self.circle(self.X[k],self.Y[k],ONE*4)
self.C[k] = CK_MAX+1
return True, False
ge = self.GE[k]+1 if self.D[k]>-1 else self.GE[k]
angle = normal()*SEARCH_ANGLE_MAX
the = self.THE[k] + (1.-1./((ge+1)**SEARCH_ANGLE_EXP))*angle
x = self.X[k] + sin(the)*r
y = self.Y[k] + cos(the)*r
## stop nodes at edge of canvas
if x>X_MAX or x<X_MIN or y>Y_MAX or y<Y_MIN:
## node is outside canvas
return True, False
## stop nodes at edge of circle
## remember to set initial node inside circle.
#circle_rad = sqrt(square(x-0.5)+square(y-0.5))
#if circle_rad>CIRCLE_RADIUS:
### node is outside circle
#return True,False
try:
inds = near_zone_inds(x,y,self.Z,k)
except IndexError:
## node is outside zonemapped area
self.DQ.appendleft(k)
return True, False
good = True
if len(inds)>0:
dd = square(self.X[inds]-x) + square(self.Y[inds]-y)
sqrt(dd,dd)
mask = dd*2 >= self.R[inds]+r
good = mask.all()
if good:
self.X[num] = x
self.Y[num] = y
self.R[num] = r
self.THE[num] = the
self.P[num] = k
self.GE[num] = ge
self.B[num] = b
## set first descendant if node has no descendants
if self.D[k]<0:
self.D[k] = num
z = get_z(x,y)
self.Z[z].append(num)
#self.ctx.set_line_width(ONE*2)
#self.ctx.set_source_rgb(*FRONT)
#self.line(self.X[k],self.Y[k],x,y)
#self.ctx.set_source_rgb(*CONTRASTB)
#self.circle(x,y,ONE*4)
#self.ctx.set_line_width(ONE)
#self.ctx.set_source_rgb(*CONTRASTA)
#self.circle_stroke(x,y,r*0.5)
self.ctx.set_source_rgb(*FRONT)
self.circles(self.X[k],self.Y[k],x,y,r*0.2)
#self.ctx.set_source_rgb(*CONTRASTB)
#self.circle_stroke(x,y,r*0.5)
self.DQ.appendleft(num)
self.DQ.appendleft(k)
self.num += 1
## node was added
return True, True
if not good and len(inds)>0:
## can we merge two branches?
if mask.sum()>1:
ms = np.argsort(dd)
mks = ms[:2]
mk = inds[mks[0]]
#if 2*dd[mks[0]]<dd[mks[1]] and self.P[mk]!=k and self.C[mk]<CK_MAX:
if 2*dd[mks[0]]<dd[mks[1]] and self.P[mk]!=k and self.C[mk]<CK_MAX:
dx = self.X[mk] - self.X[k]
dy = self.Y[mk] - self.Y[k]
dd = sqrt(dx*dx+dy*dy)
beta = arctan2(dy,dx)
x = self.X[k] + cos(beta)*dd*0.5
y = self.Y[k] + sin(beta)*dd*0.5
self.X[num] = x
self.Y[num] = y
self.R[num] = r
self.THE[num] = beta
self.P[num] = k
self.GE[num] = ge
self.C[num] = CK_MAX+1
## set first descendant if node has no descendants
if self.D[k]<0:
self.D[k] = num
z = get_z(x,y)
self.Z[z].append(num)
#self.DQ.appendleft(num)
self.DQ.appendleft(k)
self.num += 1
self.ctx.set_source_rgb(1,0,0)
self.circle(x,y,r*0.1)
#self.ctx.set_source_rgb(*CONTRASTB)
#self.circles(self.X[k],self.Y[k],self.X[mk],self.Y[mk],r*0.2)
##self.C[k] = CK_MAX+1
#self.C[mks[0]] = CK_MAX+1
return True, True
## failed to place node
self.DQ.appendleft(k)
return True, False
0
Example 140
Project: chaco Source File: multi_line_plot.py
def _gather_points(self):
"""
Collects the data points that are within the bounds of the plot and
caches them.
"""
if self._cache_valid:
return
if not self.index or not self.value:
return
index = self.index.get_data()
varray = self._trace_data
if varray.size == 0:
self._cached_data_pts = []
self._cached_valid = True
return
coordinates = self.yindex.get_data()
if self.fast_clip:
coord_min = float(coordinates[0])
coord_max = coordinates[-1]
slice_min = max(0,ceil((varray.shape[0]-1)*(self.value_range.low - coord_min)/(coord_max - coord_min)))
slice_max = min(varray.shape[0], 1+floor((varray.shape[0]-1)*(self.value_range.high - coord_min)/(coord_max - coord_min)))
varray = varray[slice_min:slice_max]
# FIXME: The y coordinates must also be sliced to match varray.
# Check to see if the data is completely outside the view region.
outside = False
# Check x coordinates.
low, high = self.index.get_bounds()
if low > self.index_range.high or high < self.index_range.low:
outside = True
# Check y coordinates. Use varray because it is nased on the yindex,
# but has been shifted up or down depending on the values.
ylow, yhigh = varray.min(), varray.max()
if ylow > self.value_range.high or yhigh < self.value_range.low:
outside = True
if outside:
self._cached_data_pts = []
self._cached_valid = True
return
if len(index) == 0 or varray.shape[0] == 0 or varray.shape[1] == 0 \
or len(index) != varray.shape[1]:
self._cached_data_pts = []
self._cache_valid = True
return
size_diff = varray.shape[1] - len(index)
if size_diff > 0:
warnings.warn('Chaco.LinePlot: value.shape[1] %d - len(index) %d = %d\n' \
% (varray.shape[1], len(index), size_diff))
index_max = len(index)
varray = varray[:,:index_max]
else:
index_max = varray.shape[1]
index = index[:index_max]
# Split the index and value raw data into non-NaN chunks.
# nan_mask is a boolean M by N array.
nan_mask = invert(isnan(varray)) & invert(isnan(index))
blocks_list = []
for nm in nan_mask:
blocks = [b for b in arg_find_runs(nm, "flat") if nm[b[0]] != 0]
blocks_list.append(blocks)
line_points = []
for k, blocks in enumerate(blocks_list):
points = []
for block in blocks:
start, end = block
block_index = index[start:end]
block_value = varray[k, start:end]
index_mask = self.index_mapper.range.mask_data(block_index)
runs = [r for r in arg_find_runs(index_mask, "flat") \
if index_mask[r[0]] != 0]
# Check to see if our data view region is between two points in the
# index data. If so, then we have to reverse map our current view
# into the appropriate index and draw the bracketing points.
if runs == []:
data_pt = self.map_data((self.x_mapper.low_pos, self.y_mapper.low_pos))
if self.index.sort_order == "none":
indices = argsort(index)
sorted_index = take(index, indices)
sorted_value = take(varray[k], indices)
sort = 1
else:
sorted_index = index
sorted_value = varray[k]
if self.index.sort_order == "ascending":
sort = 1
else:
sort = -1
ndx = bin_search(sorted_index, data_pt, sort)
if ndx == -1:
# bin_search can return -1 if data_pt is outside the bounds
# of the source data
continue
z = transpose(array((sorted_index[ndx:ndx+2],
sorted_value[ndx:ndx+2])))
points.append(z)
else:
# Expand the width of every group of points so we draw the lines
# up to their next point, outside the plot area
data_end = len(index_mask)
for run in runs:
start, end = run
if start != 0:
start -= 1
if end != data_end:
end += 1
run_data = transpose(array((block_index[start:end],
block_value[start:end])))
points.append(run_data)
line_points.append(points)
self._cached_data_pts = line_points
self._cache_valid = True
return
0
Example 141
Project: DeepSurv Source File: deep_surv.py
def train(self,
train_data, valid_data= None,
n_epochs = 500,
validation_frequency = 10,
patience = 1000, improvement_threshold = 0.99999, patience_increase = 2,
verbose = True,
update_fn = lasagne.updates.nesterov_momentum,
**kwargs):
"""
Trains a DeepSurv network on the provided training data and evalutes
it on the validation data.
Parameters:
train_data: dictionary with the following keys:
'x' : (n,d) array of observations (dtype = float32).
't' : (n) array of observed time events (dtype = float32).
'e' : (n) array of observed time indicators (dtype = int32).
valid_data: optional. A dictionary with the following keys:
'x' : (n,d) array of observations.
't' : (n) array of observed time events.
'e' : (n) array of observed time indicators.
standardize: True or False. Set the offset and scale of
standardization layey to the mean and standard deviation of the
training data.
n_epochs: integer for the maximum number of epochs the network will
train for.
validation_frequency: how often the network computes the validation
metrics. Decreasing validation_frequency increases training speed.
patience: minimum number of epochs to train for. Once patience is
reached, looks at validation improvement to increase patience or
early stop.
improvement_threshold: percentage of improvement needed to increase
patience.
patience_increase: multiplier to patience if threshold is reached.
verbose: True or False. Print additionally messages to stdout.
update_fn: lasagne update function for training.
Default: lasagne.updates.nesterov_momentum
**kwargs: additional parameters to provide _get_train_valid_fn.
Parameters used to provide configurations to update_fn.
Returns:
metrics: a dictionary of training metrics that include:
'train': a list of loss values for each training epoch
'train_ci': a list of C-indices for each training epoch
'best_params': a list of numpy arrays containing the parameters
when the network had the best validation loss
'best_params_idx': the epoch at which best_params was found
If valid_data is provided, the metrics also contain:
'valid': a list of validation loss values for each validation frequency
'valid_ci': a list of validation C-indiices for each validation frequency
'best_validation_loss': the best validation loss found during training
'best_valid_ci': the max validation C-index found during training
"""
if verbose:
print '[INFO] Training CoxMLP'
train_loss = []
train_ci = []
x_train, e_train, t_train = train_data['x'], train_data['e'], train_data['t']
# Sort Training Data for Accurate Likelihood
sort_idx = numpy.argsort(t_train)[::-1]
x_train = x_train[sort_idx]
e_train = e_train[sort_idx]
t_train = t_train[sort_idx]
# Set Standardization layer offset and scale to training data mean and std
if self.standardize:
self.offset = x_train.mean(axis = 0)
self.scale = x_train.std(axis = 0)
if valid_data:
valid_loss = []
valid_ci = []
x_valid, e_valid, t_valid = valid_data['x'], valid_data['e'], valid_data['t']
# Sort Validation Data
sort_idx = numpy.argsort(t_valid)[::-1]
x_valid = x_valid[sort_idx]
e_valid = e_valid[sort_idx]
t_valid = t_valid[sort_idx]
# Initialize Metrics
best_validation_loss = numpy.inf
best_params = None
best_params_idx = -1
# Initialize Training Parameters
lr = theano.shared(numpy.array(self.learning_rate,
dtype = numpy.float32))
momentum = numpy.array(0, dtype= numpy.float32)
train_fn, valid_fn = self._get_train_valid_fn(
L1_reg=self.L1_reg, L2_reg=self.L2_reg,
learning_rate=lr,
momentum = momentum,
update_fn = update_fn, **kwargs
)
start = time.time()
for epoch in xrange(n_epochs):
# Power-Learning Rate Decay
lr = self.learning_rate / (1 + epoch * self.lr_decay)
if self.momentum and epoch >= 10:
momentum = self.momentum
loss = train_fn(x_train, e_train)
train_loss.append(loss)
ci_train = self.get_concordance_index(
x_train,
t_train,
e_train,
)
train_ci.append(ci_train)
if valid_data and (epoch % validation_frequency == 0):
validation_loss = valid_fn(x_valid, e_valid)
valid_loss.append(validation_loss)
ci_valid = self.get_concordance_index(
x_valid,
t_valid,
e_valid
)
valid_ci.append(ci_valid)
if validation_loss < best_validation_loss:
# improve patience if loss improves enough
if validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, epoch * patience_increase)
best_params = [param.copy().eval() for param in self.params]
best_params_idx = epoch
best_validation_loss = validation_loss
if patience <= epoch:
break
if verbose:
print('Finished Training with %d iterations in %0.2fs' % (
epoch + 1, time.time() - start
))
metrics = {
'train': train_loss,
'best_params': best_params,
'best_params_idx' : best_params_idx,
'train_ci' : train_ci
}
if valid_data:
metrics.update({
'valid' : valid_loss,
'valid_ci': valid_ci,
'best_valid_ci': max(valid_ci),
'best_validation_loss':best_validation_loss
})
return metrics
0
Example 142
def plot(self, ax, data, _aes):
(data, _aes) = self._update_data(data, _aes)
variables = _aes.data
x = data[variables['x']]
y = data[variables['y']]
params = {'alpha': 0.2}
se = self.params.get('se', True)
method = self.params.get('method', 'lm')
level = self.params.get('level', 0.95)
window = self.params.get('window', None)
span = self.params.get('span', 2/3.)
if method == "lm":
x, y, y1, y2 = smoothers.lm(x, y, 1-level)
elif method == "ma":
x, y, y1, y2 = smoothers.mavg(x, y, window=window)
else:
x, y, y1, y2 = smoothers.lowess(x, y, span=span)
smoothed_data = pd.DataFrame(dict(x=x, y=y, y1=y1, y2=y2))
smoothed_data = smoothed_data.sort('x')
params = self._get_plot_args(data, _aes)
if 'alpha' not in params:
params['alpha'] = 0.2
order = np.argsort(x)
if self.params.get('se', True)==True:
if is_date(smoothed_data.x.iloc[0]):
dtype = smoothed_data.x.iloc[0].__class__
x = np.array([i.toordinal() for i in smoothed_data.x])
ax.fill_between(x, smoothed_data.y1, smoothed_data.y2, **params)
new_ticks = [dtype(i) for i in ax.get_xticks()]
ax.set_xticklabels(new_ticks)
else:
ax.fill_between(smoothed_data.x, smoothed_data.y1, smoothed_data.y2, **params)
if self.params.get('fit', True)==True:
del params['alpha']
ax.plot(smoothed_data.x, smoothed_data.y, **params)
0
Example 143
Project: imutils Source File: object_detection.py
def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes are integers, convert them to floats -- this
# is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# compute the area of the bounding boxes and grab the indexes to sort
# (in the case that no probabilities are provided, simply sort on the
# bottom-left y-coordinate)
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = y2
# if probabilities are provided, sort on them instead
if probs is not None:
idxs = probs
# sort the indexes
idxs = np.argsort(idxs)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value
# to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of the bounding
# box and the smallest (x, y) coordinates for the end of the bounding
# box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have overlap greater
# than the provided overlap threshold
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked
return boxes[pick].astype("int")
0
Example 144
Project: pygsp Source File: graph.py
def compute_fourier_basis(self, smallest_first=True, force_recompute=False, **kwargs):
r"""
Compute the fourier basis of the graph.
Parameters
----------
smallest_first: bool
Define the order of the eigenvalues.
Default is smallest first (True).
force_recompute: bool
Force to recompute the Fourier basis if already existing.
Note
----
'G.compute_fourier_basis()' computes a full eigendecomposition of
the graph Laplacian G.L:
.. L = U Lambda U*
.. math:: {\cal L} = U \Lambda U^*
where $\Lambda$ is a diagonal matrix of the Laplacian eigenvalues.
*G.e* is a column vector of length *G.N* containing the Laplacian
eigenvalues. The largest eigenvalue is stored in *G.lmax*.
The eigenvectors are stored as column vectors of *G.U* in the same
order that the eigenvalues. Finally, the coherence of the
Fourier basis is in *G.mu*.
Example
-------
>>> from pygsp import graphs
>>> N = 50
>>> G = graphs.Sensor(N)
>>> G.compute_fourier_basis()
References
----------
cite ´chung1997spectral´
"""
if hasattr(self, 'e') or hasattr(self, 'U'):
if force_recompute:
self.logger.warning("This graph already has a Fourier basis. Recomputing.")
else:
self.logger.error("This graph already has a Fourier basis. Stopping.")
return
if self.N > 3000:
self.logger.warning("Performing full eigendecomposition of a large "
"matrix may take some time.")
if not hasattr(self, 'L'):
raise AttributeError("Graph Laplacian is missing")
eigenvectors, eigenvalues, _ = sp.linalg.svd(self.L.todense())
inds = np.argsort(eigenvalues)
if not smallest_first:
inds = inds[::-1]
self.e = np.sort(eigenvalues)
self.lmax = np.max(self.e)
self.U = eigenvectors[:, inds]
self.mu = np.max(np.abs(self.U))
0
Example 145
Project: refinery Source File: PrintTopics.py
def main():
parser = argparse.ArgumentParser()
parser.add_argument('dataName', type=str,
help='name of python script that produces data to analyze.')
parser.add_argument('allocModelName', type=str,
help='name of allocation model. {MixModel, DPMixModel}')
parser.add_argument('obsModelName', type=str,
help='name of observation model. {Gauss, ZMGauss}')
parser.add_argument('algName', type=str,
help='name of learning algorithms to consider, {EM, VB, moVB, soVB}.')
parser.add_argument('--jobname', type=str, default='defaultjob',
help='name of experiment whose results should be plotted')
parser.add_argument('--topW', type=int, default=10,
help='the number of top words printed for a given topic')
parser.add_argument('--taskids', type=str, default=None,
help="int ids for tasks (individual runs) of the given job to plot." + \
'Ex: "1" or "3" or "1,2,3" or "1-6"')
parser.add_argument('--savefilename', type=str, default=None,
help="absolute path to directory to save figure")
parser.add_argument('--iterid', type=int, default=None)
args = parser.parse_args()
rootpath = os.path.join(os.environ['BNPYOUTDIR'], args.dataName, \
args.allocModelName, args.obsModelName)
jobpath = os.path.join(rootpath, args.algName, args.jobname)
if not os.path.exists(jobpath):
raise ValueError("No such path: %s" % (jobpath))
taskids = PlotELBO.parse_task_ids(jobpath, args.taskids)
Data = loadData(jobpath)
if args.savefilename is not None and len(taskids) > 0:
try:
args.savefilename % ('1')
except TypeError:
raise ValueError("Missing or bad format string in savefilename %s" %
(args.savefilename)
)
for taskid in taskids:
taskpath = os.path.join(jobpath, taskid)
if args.iterid is None:
prefix = "Best"
else:
prefix = "Iter%05d" % (args.iterid)
hmodel = bnpy.ioutil.ModelReader.load_model(taskpath, prefix)
# Print top words across all topics
learnedK = hmodel.allocModel.K
savefid = taskpath + "/top_words.txt"
fid = open(savefid,'w+')
for k in xrange(learnedK):
lamvec = hmodel.obsModel.comp[k].lamvec
elamvec = lamvec / lamvec.sum()
topW_ind = np.argsort(elamvec)[-args.topW:]
for w in xrange(args.topW):
word = str(Data.vocab_dict[topW_ind[w]])
fid.write( word + ", " )
fid.write("...\n")
fid.close()
0
Example 146
Project: lopq Source File: search.py
def multisequence(x, centroids):
"""
Implementation of multi-sequence algorithm for traversing a multi-index.
The algorithm is described in http://download.yandex.ru/company/cvpr2012.pdf.
:param ndarray x:
a query vector
:param list centroids:
a list of ndarrays containing cluster centroids for each subvector
:yields int d:
the cell distance approximation used to order cells
:yields tuple cell:
the cell indices
"""
# Infer parameters
splits = len(centroids)
V = centroids[0].shape[0]
# Compute distances to each coarse cluster and sort
cluster_dists = []
sorted_inds = []
for cx, split in iterate_splits(x, splits):
dists = ((cx - centroids[split]) ** 2).sum(axis=1)
inds = np.argsort(dists)
cluster_dists.append(dists)
sorted_inds.append(inds)
# Some helper functions used below
def cell_for_inds(inds):
return tuple([sorted_inds[s][i] for s, i in enumerate(inds)])
def dist_for_cell(cell):
return sum([cluster_dists[s][i] for s, i in enumerate(cell)])
def inds_in_range(inds):
for i in inds:
if i >= V:
return False
return True
# Initialize priority queue
h = []
traversed = set()
start_inds = tuple(0 for _ in xrange(splits))
start_dist = dist_for_cell(cell_for_inds(start_inds))
heapq.heappush(h, (start_dist, start_inds))
# Traverse cells
while len(h):
d, inds = heapq.heappop(h)
yield d, cell_for_inds(inds)
traversed.add(inds)
# Add neighboring cells to queue
if inds[1] == 0 or (inds[0] + 1, inds[1] - 1) in traversed:
c = (inds[0] + 1, inds[1])
if inds_in_range(c):
dist = dist_for_cell(cell_for_inds(c))
heapq.heappush(h, (dist, c))
if inds[0] == 0 or (inds[0] - 1, inds[1] + 1) in traversed:
c = (inds[0], inds[1] + 1)
if inds_in_range(c):
dist = dist_for_cell(cell_for_inds(c))
heapq.heappush(h, (dist, c))
0
Example 147
Project: qiime Source File: plot_semivariogram.py
def fit_semivariogram(xxx_todo_changeme, xxx_todo_changeme1, model, ranges):
""" Creates semivariogram values from two distance matrices.
:Parameters:
x_file : array matrix distance matrix for x
distance matrix
y_file : file handle
distance matrix file handle
model: string
model to fit
ranges: list
the list of ranges to bin the data
:Returns:
x_vals: array
Values for x
y_vals: array
Values for y
y_fit: array
Values for y fitted from model
"""
(x_samples, x_distmtx) = xxx_todo_changeme
(y_samples, y_distmtx) = xxx_todo_changeme1
if x_samples != y_samples:
lbl_x = list(argsort(x_samples))
if lbl_x != range(len(lbl_x)):
tmp = x_distmtx[:, lbl_x]
x_distmtx = tmp[lbl_x, :]
lbl_y = list(argsort(y_samples))
if lbl_y != range(len(lbl_y)):
tmp = y_distmtx[:, lbl_y]
y_distmtx = tmp[lbl_y, :]
# get upper triangle from matrix in a 1d array
x_tmp_vals = x_distmtx.compress(tri(len(x_distmtx)).ravel() == 0)
y_tmp_vals = y_distmtx.compress(tri(len(y_distmtx)).ravel() == 0)
# sorting lists and transforming to arrays
x_vals, y_vals = [], []
for i in argsort(x_tmp_vals):
x_vals.append(x_tmp_vals[i])
y_vals.append(y_tmp_vals[i])
x_vals = asarray(x_vals)
y_vals = asarray(y_vals)
# fitting model
fit_func = FitModel(x_vals, y_vals, model)
y_fit, params, func_text = fit_func()
x_fit = x_vals
# section for bins
if ranges != []:
# creating bins in x
min = 0
x_bins = []
for r in ranges[:-1]:
x_bins.extend(arange(min, r[1], r[0]))
min = r[1]
x_bins.extend(arange(min, max(x_vals), ranges[-1][0]))
x_bins[-1] = max(x_vals)
x_vals, hist = hist_bins(x_bins, x_vals)
# avg per bin, y values
y_tmp = []
for i, val in enumerate(hist):
if i == 0:
low = val
continue
high = low + val
y_tmp.append(mean(y_vals[low:high]))
low = high
y_vals = asarray(y_tmp)
# removing nans
x_vals = x_vals[~isnan(y_vals)]
y_vals = y_vals[~isnan(y_vals)]
return x_vals, y_vals, x_fit, y_fit, func_text
0
Example 148
Project: lifelines Source File: __init__.py
def _concordance_index(event_times, predicted_event_times, event_observed):
"""Find the concordance index in n * log(n) time.
Assumes the data has been verified by lifelines.utils.concordance_index first.
"""
# Here's how this works.
#
# It would be pretty easy to do if we had no censored data and no ties. There, the basic idea
# would be to iterate over the cases in order of their true event time (from least to greatest),
# while keeping track of a pool of *predicted* event times for all cases previously seen (= all
# cases that we know should be ranked lower than the case we're looking at currently).
#
# If the pool has O(log n) insert and O(log n) RANK (i.e., "how many things in the pool have
# value less than x"), then the following algorithm is n log n:
#
# Sort the times and predictions by time, increasing
# n_pairs, n_correct := 0
# pool := {}
# for each prediction p:
# n_pairs += len(pool)
# n_correct += rank(pool, p)
# add p to pool
#
# There are three complications: tied ground truth values, tied predictions, and censored
# observations.
#
# - To handle tied true event times, we modify the inner loop to work in *batches* of observations
# p_1, ..., p_n whose true event times are tied, and then add them all to the pool
# simultaneously at the end.
#
# - To handle tied predictions, which should each count for 0.5, we switch to
# n_correct += min_rank(pool, p)
# n_tied += count(pool, p)
#
# - To handle censored observations, we handle each batch of tied, censored observations just
# after the batch of observations that died at the same time (since those censored observations
# are comparable all the observations that died at the same time or previously). However, we do
# NOT add them to the pool at the end, because they are NOT comparable with any observations
# that leave the study afterward--whether or not those observations get censored.
died_mask = event_observed.astype(bool)
# TODO: is event_times already sorted? That would be nice...
died_truth = event_times[died_mask]
ix = np.argsort(died_truth)
died_truth = died_truth[ix]
died_pred = predicted_event_times[died_mask][ix]
censored_truth = event_times[~died_mask]
ix = np.argsort(censored_truth)
censored_truth = censored_truth[ix]
censored_pred = predicted_event_times[~died_mask][ix]
censored_ix = 0
died_ix = 0
times_to_compare = _BTree(np.unique(died_pred))
num_pairs = 0
num_correct = 0
num_tied = 0
def handle_pairs(truth, pred, first_ix):
"""
Handle all pairs that exited at the same time as truth[first_ix].
Returns:
(pairs, correct, tied, next_ix)
new_pairs: The number of new comparisons performed
new_correct: The number of comparisons correctly predicted
next_ix: The next index that needs to be handled
"""
next_ix = first_ix
while next_ix < len(truth) and truth[next_ix] == truth[first_ix]:
next_ix += 1
pairs = len(times_to_compare) * (next_ix - first_ix)
correct = 0
tied = 0
for i in range(first_ix, next_ix):
rank, count = times_to_compare.rank(pred[i])
correct += rank
tied += count
return (pairs, correct, tied, next_ix)
# we iterate through cases sorted by exit time:
# - First, all cases that died at time t0. We add these to the sortedlist of died times.
# - Then, all cases that were censored at time t0. We DON'T add these since they are NOT
# comparable to subsequent elements.
while True:
has_more_censored = censored_ix < len(censored_truth)
has_more_died = died_ix < len(died_truth)
# Should we look at some censored indices next, or died indices?
if has_more_censored and (not has_more_died
or died_truth[died_ix] > censored_truth[censored_ix]):
pairs, correct, tied, next_ix = handle_pairs(censored_truth, censored_pred, censored_ix)
censored_ix = next_ix
elif has_more_died and (not has_more_censored
or died_truth[died_ix] <= censored_truth[censored_ix]):
pairs, correct, tied, next_ix = handle_pairs(died_truth, died_pred, died_ix)
for pred in died_pred[died_ix:next_ix]:
times_to_compare.insert(pred)
died_ix = next_ix
else:
assert not (has_more_died or has_more_censored)
break
num_pairs += pairs
num_correct += correct
num_tied += tied
if num_pairs == 0:
raise ZeroDivisionError("No admissable pairs in the dataset.")
return (num_correct + num_tied / 2) / num_pairs
0
Example 149
Project: PyGraphArt Source File: graph.py
def tsp_hamilton_cycle(self, start):
sorted_edges = np.argsort(self.costs)
constraints = self.costs.copy()
constraints.fill(None)
self.upper_bound = 0
self.final_x = None
edges = np.array([i for i in self.edges])
opt_c = np.Infinity
if self.oriented:
print 'tsp only on unoriented graph'
return
problems = np.array([], dtype=np.object)
problems = np.append(problems, None)
problems[0] = constraints
while len(problems) != 0:
_ = problems[0]
np.delete(problems, 0)
one_tree = np.array([], dtype=np.int)
chosen = np.empty(len(edges), dtype=np.bool)
chosen.fill(False)
for i in sorted_edges:
if constraints[i] is not None:
if constraints[i] == 1:
chosen[i] = True
if len(one_tree) > 0:
one_tree = np.vstack((one_tree, [edges[i]]))
else:
one_tree = [edges[i]]
# visited_nodes = np.unique(
# np.append(visited_nodes, edges[i]))
for i in sorted_edges:
todo1 = todo2 = False
if len(one_tree) > 0:
bincount = np.bincount(np.hstack(one_tree))
if len(bincount) > edges[i][0]:
if bincount[edges[i][0]] <= 1:
todo1 = True
else:
todo1 = True
if len(bincount) > edges[i][1]:
if np.bincount(np.hstack(one_tree))[edges[i][1]] <= 1:
todo2 = True
else:
todo2 = True
else:
todo1 = todo2 = True
cycle = False
if len(one_tree) > 0:
cycle = self.is_cyclic(
self.edges_to_fs(np.vstack((one_tree, [edges[i]]))))
if (constraints[i] is None) and todo1 and todo2 and not cycle:
chosen[i] = True
if len(one_tree) > 0:
one_tree = np.vstack((one_tree, [edges[i]]))
else:
one_tree = [edges[i]]
# visited_nodes = np.unique(
# np.append(visited_nodes, edges[i]))
if len(edges[chosen]) == self.n-1:
break
for i in sorted_edges[~chosen]:
if not np.any(one_tree == i):
if len(one_tree) > 0:
one_tree = np.vstack((one_tree, [edges[i]]))
else:
one_tree = [edges[i]]
chosen[i] = True
break
lowerbound = np.sum(self.costs[chosen])
if lowerbound > opt_c:
continue
#
if np.all(np.bincount(np.hstack(one_tree)) == 2):
# np.max(np.bincount(np.hstack(one_tree))) == 2:
if lowerbound < opt_c:
opt_c = lowerbound
continue
node = np.argmax(np.bincount(np.hstack(one_tree)))
eds = self.edges[np.bitwise_or(constraints == 1, constraints == 0)]
if len(eds) > 0:
if np.sum(np.hstack(eds) == node) == 2:
continue
min_cost = np.Infinity
for i in sorted_edges:
if node in edges[i] and self.costs[i] < min_cost:
branch_i = i
constraints[branch_i] = 0
cycle = self.is_cyclic(self.edges_to_fs(edges[constraints == 1]))
if not cycle:
problems = np.append(problems, None)
problems[-1] = constraints
constraints[branch_i] = 1
cycle = self.is_cyclic(self.edges_to_fs(edges[constraints == 1]))
if not cycle:
problems = np.append(problems, None)
problems[-1] = constraints
return edges[constraints == 1]
0
Example 150
Project: tribeflow Source File: tmat-toyplot.py
def main(model):
store = pd.HDFStore(model)
from_ = store['from_'][0][0]
to = store['to'][0][0]
assert from_ == 0
trace_fpath = store['trace_fpath'][0][0]
Theta_zh = store['Theta_zh'].values
Psi_oz = store['Psi_sz'].values
count_z = store['count_z'].values[:, 0]
Psi_oz = Psi_oz / Psi_oz.sum(axis=0)
Psi_zo = (Psi_oz * count_z).T
Psi_zo = Psi_zo / Psi_zo.sum(axis=0)
obj2id = dict(store['source2id'].values)
hyper2id = dict(store['hyper2id'].values)
id2obj = dict((v, k) for k, v in obj2id.items())
ZtZ = Psi_zo.dot(Psi_oz)
ZtZ = ZtZ / ZtZ.sum(axis=0)
L = ZtZ
#ZtZ[ZtZ < (ZtZ.mean())] = 0
L[ZtZ >= 1.0 / (len(ZtZ))] = 1
L[L != 1] = 0
colormap = toyplot.color.brewer.map("Purples", domain_min=0, domain_max=1, reverse=True)
print(colormap)
canvas = toyplot.matrix((L.T, colormap), label="P[z' | z]", \
colorshow=False, tlabel="To z'", llabel="From")[0]
#canvas.axes(ylabel='From z', xlabel='To z\'')
toyplot.pdf.render(canvas, 'tmat.pdf')
model = SpectralCoclustering(n_clusters=3)
model.fit(L)
fit_data = L[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
canvas = toyplot.matrix((fit_data, colormap), label="P[z' | z']", \
colorshow=False)[0]
toyplot.pdf.render(canvas, 'tmat-cluster.pdf')
#AtA = Psi_oz.dot(Psi_zo)
#np.fill_diagonal(AtA, 0)
#AtA = AtA / AtA.sum(axis=0)
store.close()