numpy.argsort

Here are the examples of the python api numpy.argsort taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

174 Examples 7

Example 101

Project: GPflow Source File: FITCvsVFE.py
def snelsonDemo():
    from matplotlib import pyplot as plt
    from IPython import embed
    xtrain,ytrain,xtest,ytest = getTrainingTestData()

    #run exact inference on training data.
    exact_model = getRegressionModel(xtrain,ytrain)
    exact_model.optimize(maxiter= 2000000, tol=tol)

    figA, axes = plt.subplots(1,1)
    inds = np.argsort( xtrain.flatten() )
    axes.plot( xtrain[inds,:], ytrain[inds,:], 'ro' )
    plotPredictions( axes, exact_model, 'g', None )

    figB, axes = plt.subplots(3,2)
   
    #run sparse model on training data intialized from exact optimal solution.
    VFEmodel, VFEcb = trainSparseModel(xtrain,ytrain,exact_model,False,xtest,ytest)
    FITCmodel, FITCcb = trainSparseModel(xtrain,ytrain,exact_model,True,xtest,ytest)

    print "Exact model parameters \n"
    printModelParameters( exact_model )
    print "Sparse model parameters for VFE optimization \n"
    printModelParameters( VFEmodel )
    print "Sparse model parameters for FITC optimization \n"
    printModelParameters( FITCmodel )
    
    VFEiters = FITCcb.n_iters
    VFElog_likelihoods = stretch( len(VFEiters), VFEcb.log_likelihoods )
    VFEhold_out_likelihood = stretch(  len(VFEiters), VFEcb.hold_out_likelihood )
    
    plotComparisonFigure( xtrain, VFEmodel, exact_model, axes[0,0], axes[1,0], axes[2,0], VFEiters, VFElog_likelihoods.tolist(), VFEhold_out_likelihood.tolist(), "VFE" )
    plotComparisonFigure( xtrain, FITCmodel, exact_model, axes[0,1], axes[1,1], axes[2,1],FITCcb.n_iters, FITCcb.log_likelihoods, FITCcb.hold_out_likelihood , "FITC" )

    axes[0,0].set_title('VFE', loc='center', fontdict = {'fontsize': 22} )
    axes[0,1].set_title('FITC', loc='center', fontdict = {'fontsize': 22} )
        
    embed()

Example 102

Project: apogee Source File: read.py
Function: remove_duplicates
def remove_duplicates(data):
    """
    NAME:
       remove_duplicates
    PURPOSE:
       remove duplicates from an array
    INPUT:
       data - array
    OUTPUT:
       array w/ duplicates removed
    HISTORY:
       2014-06-23 - Written - Bovy (IAS)
    """
    if not _ESUTIL_LOADED:
        raise ImportError("apogee.tools.read.remove_duplicates function requires the esutil module for catalog matching")
    tdata= copy.copy(data)
    #Match the data against itself
    if _ESUTIL_VERSION[1] >= 5 and _ESUTIL_VERSION >= 3:
        h= esutil.htm.Matcher(10,data['RA'],data['DEC'])
        m1,m2,d12 = h.match(data['RA'],data['DEC'],
                            2./3600.,maxmatch=0) #all matches
    else:
        h=esutil.htm.HTM()
        htmrev2,minid,maxid = h.match_prepare(data['RA'],data['DEC'])
        m1,m2,d12 = h.match(data['RA'],data['DEC'],
                            data['RA'],data['DEC'],
                            2./3600.,maxmatch=0, #all matches
                            htmrev2=htmrev2,minid=minid,maxid=maxid)
    sindx= numpy.argsort(m1)
    sm1= m1[sindx]
    dup= sm1[1:] == sm1[:-1]
    for d in tqdm.tqdm(sm1[:-1][dup]):
        #Find the matches for just this duplicate
        if _ESUTIL_VERSION[1] >= 5 and _ESUTIL_VERSION >= 3:
            nm1,nm2,nd12= h.match(data['RA'][d],data['DEC'][d],
                                  2./3600.,maxmatch=0) #all matches
        else:
            nm1,nm2,nd12= h.match(data['RA'][d],data['DEC'][d],
                                  data['RA'],data['DEC'],
                                  2./3600.,maxmatch=0, #all matches
                                  htmrev2=htmrev2,minid=minid,maxid=maxid)
        #If some matches are commissioning data or have bad ak, rm from consideration
        comindx= numpy.array(['apogee.n.c'.encode('utf-8') in s for s in data['APSTAR_ID'][nm2]])
        comindx+= numpy.array(['apogee.s.c'.encode('utf-8') in s for s in data['APSTAR_ID'][nm2]])
        goodak= (True-numpy.isnan(data['AK_TARG'][nm2]))\
            *(data['AK_TARG'][nm2] > -50.)
        hisnr= numpy.argmax(data['SNR'][nm2]*(True-comindx)*goodak) #effect. make com zero SNR
        if numpy.amax(data['SNR'][nm2]*(True-comindx)*goodak) == 0.: #all commissioning or bad ak, treat all equally
            hisnr= numpy.argmax(data['SNR'][nm2])
        tindx= numpy.ones(len(nm2),dtype='bool')
        tindx[hisnr]= False
        tdata['RA'][nm2[tindx]]= -9999
    return tdata[tdata['RA'] != -9999]

Example 103

Project: chaco Source File: ticks.py
def auto_interval ( data_low, data_high, max_ticks=9 ):
    """ Calculates the tick interval for a range.

        The boundaries for the data to be plotted on the axis are::

            data_bounds = (data_low,data_high)

        The function chooses the number of tick marks, which can be between
        3 and max_ticks marks (including end points), and chooses tick intervals at
        1, 2, 2.5, 5, 10, 20, ...

        Returns
        -------
        interval : float
            tick mark interval for axis
    """
    range = float( data_high ) - float( data_low )

    # We'll choose from between 2 and 8 tick marks.
    # Preference is given to more ticks:
    #   Note reverse order and see kludge below...
    divisions = arange( max_ticks-1, 2.0, -1.0 ) # for max_ticks=9, ( 7, 6, ..., 3 )

    # Calculate the intervals for the divisions:
    candidate_intervals = range / divisions

    # Get magnitudes and mantissas for each candidate:
    magnitudes = 10.0 ** floor( log10( candidate_intervals ) )
    mantissas  = candidate_intervals / magnitudes

    # List of "pleasing" intervals between ticks on graph.
    # Only the first magnitude are listed, higher mags others are inferred:
    magic_intervals = array( ( 1.0, 2.0, 2.5, 5.0, 10.0 ) )

    # Calculate the absolute differences between the candidates
    # (with magnitude removed) and the magic intervals:
    differences = abs( magic_intervals[:,newaxis] - mantissas )

    # Find the division and magic interval combo that produce the
    # smallest differences:

    # KLUDGE: 'argsort' doesn't preserve the order of equal values,
    # so we subtract a small, index dependent amount from each difference
    # to force correct ordering.
    sh    = shape( differences )
    small = 2.2e-16 * arange( sh[1] ) * arange( sh[0] )[:,newaxis]
    small = small[::-1,::-1] #reverse the order
    differences = differences - small

    # ? Numeric should allow keyword "axis" ? comment out for now
    #best_mantissa = minimum.reduce(differences,axis=0)
    #best_magic = minimum.reduce(differences,axis=-1)
    best_mantissa  = minimum.reduce( differences,  0 )
    best_magic     = minimum.reduce( differences, -1 )
    magic_index    = argsort( best_magic )[0]
    mantissa_index = argsort( best_mantissa )[0]

    # The best interval is the magic_interval multiplied by the magnitude
    # of the best mantissa:
    interval  = magic_intervals[ magic_index ]
    magnitude = magnitudes[ mantissa_index ]
    result    = interval * magnitude
    if result == 0.0:
        result = finfo(float).eps
    return result

Example 104

Project: ShallowLearn Source File: document_classification_20newsgroups.py
Function: benchmark
def benchmark(clf):
    global train_duration, test_duration
    print('_' * 80)
    print("Training: ")
    print(clf)
    t0 = time()
    if isinstance(clf, (GensimFastText, FastText)):
        clf.fit(train_text, y_train)
        train_time = time() - t0
    else:
        clf.fit(X_train, y_train)
        train_time = train_duration + (time() - t0)
    print("train time: %0.3fs" % train_time)

    t0 = time()
    if isinstance(clf, (GensimFastText, FastText)):
        pred = clf.predict(test_text)
        test_time = time() - t0
        # fix unknown predictions
        pred = [most_freq if p is None else p for p in pred]
    else:
        pred = clf.predict(X_test)
        test_time = test_duration + (time() - t0)
    print("test time:  %0.3fs" % test_time)

    score = metrics.f1_score(y_test, pred, average='macro')
    print("macro F1:   %0.3f" % score)

    if hasattr(clf, 'coef_'):
        print("dimensionality: %d" % clf.coef_.shape[1])
        print("density: %f" % density(clf.coef_))

        if opts.print_top10 and feature_names is not None:
            print("top 10 keywords per class:")
            for i, category in enumerate(categories):
                top10 = np.argsort(clf.coef_[i])[-10:]
                print(trim("%s: %s"
                      % (category, " ".join(feature_names[top10]))))
        print()

    if opts.print_report:
        print("classification report:")
        print(metrics.classification_report(y_test, pred,
                                            target_names=categories))

    if opts.print_cm:
        print("confusion matrix:")
        print(metrics.confusion_matrix(y_test, pred))

    print()
    clf_descr = str(clf).split('(')[0]
    return clf_descr, score, train_time, test_time

Example 105

Project: polara Source File: evaluation.py
def get_ranking_scores(matched_predictions, feedback_data, switch_positive, alternative=True):
    users_num, topk, holdout = matched_predictions.shape
    ideal_scores_idx = np.argsort(feedback_data, axis=1)[:, ::-1] #returns column index only
    ideal_scores_idx = np.ravel_multi_index((np.arange(feedback_data.shape[0])[:, None], ideal_scores_idx), dims=feedback_data.shape)

    where = np.ma.where if np.ma.is_masked(feedback_data) else np.where
    is_positive = feedback_data >= switch_positive
    positive_feedback = where(is_positive, feedback_data, 0)
    negative_feedback = where(~is_positive, -feedback_data, 0)

    relevance_scores_pos = (matched_predictions * positive_feedback[:, None, :]).sum(axis=2)
    relevance_scores_neg = (matched_predictions * negative_feedback[:, None, :]).sum(axis=2)
    ideal_scores_pos = positive_feedback.ravel()[ideal_scores_idx]
    ideal_scores_neg = negative_feedback.ravel()[ideal_scores_idx]

    discount_num = max(holdout, topk)
    if alternative:
        discount = np.log2(np.arange(2, discount_num+2))
        relevance_scores_pos = 2**relevance_scores_pos - 1
        relevance_scores_neg = 2**relevance_scores_neg - 1
        ideal_scores_pos = 2**ideal_scores_pos - 1
        ideal_scores_neg = 2**ideal_scores_neg - 1
    else:
        discount = np.hstack([1, np.log(np.arange(2, discount_num+1))])

    dcg = (relevance_scores_pos / discount[:topk]).sum(axis=1)
    dcl = (relevance_scores_neg / -discount[:topk]).sum(axis=1)
    idcg = (ideal_scores_pos / discount[:holdout]).sum(axis=1)
    idcl = (ideal_scores_neg / -discount[:holdout]).sum(axis=1)
    ndcg = unmask(np.nansum(dcg / idcg) / users_num)
    ndcl = unmask(np.nansum(dcl / idcl) / users_num)
    ranking_score = namedtuple('Ranking', ['nDCG', 'nDCL'])._make([ndcg, ndcl])
    return ranking_score

Example 106

Project: tractor Source File: ipes.py
def ipe_err_plots(X1, Y1, X2, Y2, tt, xlim, ps, semilogx=True):
	siglim = -6,6
	plt.clf()
	plt.hist(Y1, 50, range=siglim, histtype='step', color='r')
	plt.hist(Y2, 50, range=siglim, histtype='step', color='b')
	plt.xlim(siglim)
	plt.xlabel('Inter-ipe difference / error')
	plt.title(tt)
	ps.savefig()

	plt.clf()
	plt.subplots_adjust(hspace=0.25, top=0.95)
	plt.subplot(2,1,1)
	red = (1., 0.2, 0.2)
	if semilogx:
		plt.semilogx(X1, Y1, '.', color=red, alpha=0.5)
	else:
		plt.plot(X1, Y1, '.', color=red, alpha=0.5)
	plt.title('SDSS Photo')
	plt.subplot(2,1,2)
	blue = (0.3,0.3,1.0)
	if semilogx:
		plt.semilogx(X2, Y2, '.', color=blue, alpha=0.5)
	else:
		plt.plot(X2, Y2, '.', color=blue, alpha=0.5)
	plt.title('Tractor')

	for i,x,y in [(1,X1,Y1), (2,X2,Y2)]:
		plt.subplot(2,1,i)
		I = np.argsort(x)
		S = np.linspace(0, len(x), 11).astype(int)
		xm,ym,ys = [],[],[]
		ysigs = []
		for ilo,ihi in zip(S[:-1], S[1:]):
			J = I[ilo:ihi]
			xm.append(np.mean(x[J]))
			ym.append(np.median(y[J]))
			ysigs.append([np.percentile(y[J], scipy.stats.norm.cdf(s)*100)
						  for s in [-2,-1,1,2]])
		ysigs = np.array(ysigs)
		xm = np.array(xm)
		y1,y2,y3,y4 = ysigs.T

		W = 1.03
		plt.plot(np.vstack([xm/W,xm*W,xm*W,xm/W,xm/W]), np.vstack([y2,y2,y3,y3,y2]), 'k-', lw=1, alpha=0.75)
		plt.plot(np.vstack([xm,xm]), np.vstack([y1,y2]), 'k-', lw=1)
		plt.plot(np.vstack([xm,xm]), np.vstack([y3,y4]), 'k-', lw=1)

		W = 1.02
		plt.plot(np.vstack([xm/W,xm*W]), np.vstack([ym,ym]), 'k-', lw=2)
		plt.plot(np.vstack([xm/W,xm*W]), np.vstack([y2,y2]), 'k-', lw=2)
		plt.plot(np.vstack([xm/W,xm*W]), np.vstack([y3,y3]), 'k-', lw=2)
		plt.plot(xm, y1, 'ko', mec='k', mfc='k', ms=3)
		plt.plot(xm, y4, 'ko', mec='k', mfc='k', ms=3)

		plt.xlim(xlim)
		plt.ylim(siglim)
		for s in np.sqrt(2.) * np.arange(-2, 2+1):
			plt.axhline(s, color='k', alpha=0.25 + (s==0)*0.25)
	plt.ylabel('Inter-ipe difference / error')
	plt.xlabel(tt)
	ps.savefig()


	plt.clf()
	plt.subplots_adjust(hspace=0.25, top=0.95)
	plt.subplot(2,1,1)
	if semilogx:
		plt.semilogx(X1, Y1, '.', color=red, alpha=0.5)
	else:
		plt.plot(X1, Y1, '.', color=red, alpha=0.5)
	plt.title('SDSS Photo')
	plt.subplot(2,1,2)
	if semilogx:
		plt.semilogx(X2, Y2, '.', color=blue, alpha=0.5)
	else:
		plt.plot(X2, Y2, '.', color=blue, alpha=0.5)
	plt.title('Tractor')
	for i,x,y in [(1,X1,Y1), (2,X2,Y2)]:
		plt.subplot(2,1,i)
		I = np.argsort(x)
		S = np.linspace(0, len(x), 11).astype(int)
		xm,ym = [],[]
		ysigs = []
		for ilo,ihi in zip(S[:-1], S[1:]):
			J = I[ilo:ihi]
			xm.append(np.mean(x[J]))
			ym.append(np.median(y[J]))
			ysigs.append([np.percentile(y[J], scipy.stats.norm.cdf(s)*100)
						  for s in [-2,-1,1,2]])
		ysigs = np.array(ysigs)
		xm = np.array(xm)
		y1,y2,y3,y4 = ysigs.T
		plt.plot(xm, ym, 'ko', ms=5)
		plt.plot(xm, y2, 'ks', mec='k', mfc='none', mew=1.5, ms=5)
		plt.plot(xm, y3, 'ks', mec='k', mfc='none', mew=1.5, ms=5)
		plt.plot(xm, y1, 'kv', mec='k', mfc='none', mew=1.5, ms=5)
		plt.plot(xm, y4, 'k^', mec='k', mfc='none', mew=1.5, ms=5)
		plt.xlim(xlim)
		plt.ylim(siglim)
		for s in np.sqrt(2.) * np.arange(-2, 2+1):
			plt.axhline(s, color='k', alpha=0.25 + (s==0)*0.25)
	plt.ylabel('Inter-ipe difference / error')
	plt.xlabel(tt)
	ps.savefig()

Example 107

Project: kaggle-dr Source File: align_util.py
    def has_notch(self, radius_additive=OUTER_RADIUS_FULL_SIZE_PHOTO):
        """
        Checks if there is an identification tab in the top right of the
        image. Tries to achieve few false positives.

        TODO: check for short substring of 1's in thresholded image
        to indicate peak, will help discard false positives
        """
        perim_info = self.scan_perimeter_intensity(self.i_img_center, self.j_img_center, self.I, radius_additive)
        top_perim_info = perim_info[:3,:]
        # re-sort top_perim_info by degrees
        top_perim_info = top_perim_info[np.argsort(top_perim_info[:,0]),:]
        top_degrees = top_perim_info[:,0]
        top_ij = np.array(top_perim_info[:,1:3], dtype=int)
        top_intensities = top_perim_info[:,3]

        # check that at least 2 points are adjacent
        diffs = np.diff(top_degrees)
        if not (diffs.min() == 1):
            return 0
        back_two = diffs.tolist().index(1)
        adjacents = slice(1,3) if back_two else slice(0,2)
        # check that at least 2 top points are on the right side of img
        shifted_top_degrees = (top_degrees + 90) % 360
        if not (sum(shifted_top_degrees < 180) >= 2):
            return 0
        # check that level of adjacents is high enough
        adjacent_ijs = top_ij[adjacents,:]
        adjacent_mean_intensities = self.I[adjacent_ijs.T[0], adjacent_ijs.T[1],:].mean(axis=1)
        if not sum(adjacent_mean_intensities > 15) > 0:
            return 0
        return 1

Example 108

Project: pydem Source File: utils.py
Function: sort_rows
def sortrows(a, i=0, index_out=False, recurse=True):
    """ Sorts array "a" by columns i

    Parameters
    ------------
    a : np.ndarray
        array to be sorted
    i : int (optional)
        column to be sorted by, taken as 0 by default
    index_out : bool (optional)
        return the index I such that a(I) = sortrows(a,i). Default = False
    recurse : bool (optional)
        recursively sort by each of the columns. i.e.
        once column i is sort, we sort the smallest column number
        etc. True by default.

    Returns
    --------
    a : np.ndarray
        The array 'a' sorted in descending order by column i
    I : np.ndarray (optional)
        The index such that a[I, :] = sortrows(a, i). Only return if
        index_out = True

    Examples
    ---------
    >>> a = array([[1,2],[3,1],[2,3]])
    >>> b = sortrows(a,0)
    >>> b
    array([[1, 2],
           [2, 3],
           [3, 1]])
    c, I = sortrows(a,1,True)

    >>> c
    array([[3, 1],
           [1, 2],
           [2, 3]])
    >>> I
    array([1, 0, 2])
    >>> a[I,:] - c
    array([[0, 0],
           [0, 0],
           [0, 0]])
    """
    I = np.argsort(a[:, i])
    a = a[I, :]
    # We recursively call sortrows to make sure it is sorted best by every
    # column
    if recurse & (len(a[0]) > i + 1):
        for b in np.unique(a[:, i]):
            ids = a[:, i] == b
            colids = range(i) + range(i+1, len(a[0]))
            a[np.ix_(ids, colids)], I2 = sortrows(a[np.ix_(ids, colids)],
                                                  0, True, True)
            I[ids] = I[np.nonzero(ids)[0][I2]]

    if index_out:
        return a, I
    else:
        return a

Example 109

Project: klustaviewa Source File: correlograms.py
Function: compute_correlograms
def compute_correlograms(spiketimes,
                         clusters,
                         clusters_to_update=None,
                         ncorrbins=None,
                         corrbin=None,
                         sample_rate=None,
                         ):

    if ncorrbins is None:
        ncorrbins = NCORRBINS_DEFAULT
    if corrbin is None:
        corrbin = CORRBIN_DEFAULT

    # Sort spiketimes for the computation of the CCG.
    ind = np.argsort(spiketimes)
    spiketimes = spiketimes[ind]
    clusters = clusters[ind]

    window_size = corrbin * ncorrbins

    # unique clusters
    clusters_unique = np.unique(clusters)

    # clusters to update
    if clusters_to_update is None:
        clusters_to_update = clusters_unique

    # Select requested clusters.
    ind = np.in1d(clusters, clusters_to_update)
    spiketimes = spiketimes[ind]
    clusters = clusters[ind]

    assert spiketimes.shape == clusters.shape
    assert np.all(np.in1d(clusters, clusters_to_update))
    assert sample_rate > 0.
    assert 0 < corrbin < window_size

    C = correlograms(spiketimes,
                     clusters,
                     cluster_ids=clusters_to_update,
                     sample_rate=sample_rate,
                     bin_size=corrbin,
                     window_size=window_size,
                     )
    dic = {(c0, c1): C[i, j, :]
           for i, c0 in enumerate(clusters_to_update)
           for j, c1 in enumerate(clusters_to_update)}
    return dic

Example 110

Project: MagicCube Source File: simple_cube.py
    def draw_cube(self, rot=None, zloc=None):
        """Draw a cube on the axes.

        The first time this is called, it will create a set of polygons
        representing the cube faces.  On initial calls, it will update
        these polygon faces with a given rotation and observer location.

        Parameters
        ----------
        rot : Quaternion object
            The quaternion representing the rotation
        zloc : float
            The location of the observer on the z-axis (adjusts perspective)
        """
        if rot is None:
            rot = self.current_rot
        if zloc is None:
            zloc = self.current_zloc

        self.current_rot = rot
        self.current_zloc = zloc

        if self._cube_poly is None:
            self._cube_poly = [plt.Polygon(self.faces[i, :, :2],
                                           facecolor=self.stickercolors[i],
                                           alpha=0.9)
                               for i in range(6)]
            [self.add_patch(self._cube_poly[i]) for i in range(6)]

        faces = self.project_points(self.faces, rot, zloc)
        zorder = np.argsort(np.argsort(faces[:, :4, 2].sum(1)))

        [self._cube_poly[i].set_zorder(10 * zorder[i]) for i in range(6)]
        [self._cube_poly[i].set_xy(faces[i, :, :2]) for i in range(6)]

        self.figure.canvas.draw()

Example 111

Project: mtpy Source File: niblettbostick.py
def interpolate_strike_angles(angles, in_periods):

    """
    expect 2 arrays

    1. sort ascending by periods
    2. loop over angles to find 'nan' values (i.e. 1D layers)
    3. determine linear interpolation between bounding 2D strike angles
    4. if 1D on top or bottom, set to 0 degrees
    """

    new_angles = copy.copy(angles)

    #sort in ascending order: 
    orig_sorting = np.argsort(in_periods)
    back_sorting = np.argsort(orig_sorting)
    angles = angles[orig_sorting]
    periods = in_periods[orig_sorting]

    in_line = 0 
    while in_line < len(angles):
        curr_per = periods[in_line]
        curr_ang = angles[in_line]
        if np.isnan(curr_ang):
            if in_line in [0, len(angles)-1]:
                new_angles[in_line] = 0.
                in_line += 1
                continue
		
            #otherwise:
            #use value before current one:
            ang1 = new_angles[in_line - 1]
            per1 = periods[in_line - 1]
            #check for next non-nan:
            ang2 = None

            jj = in_line
            print in_line
            while jj < len(angles):
                jj += 1 
                if not np.isnan(angles[jj]):
                    per2 = periods[jj]
                    ang2 = angles[jj]
                    break
            #catch case of all nan:
            if ang2 is None:
                ang2 = 0.  
                
            delta_per = per2-per1
            delta_ang = ang2-ang1
            per_step = periods[in_line] - per1
            new_angles[in_line] = ang1 + delta_ang/delta_per * per_step
            print jj
												
        in_line += 1

    #asserting correct order (same as input) of the angles:
    return new_angles[back_sorting]

Example 112

Project: C-PAC Source File: python_ncut_lib.py
def ncut( W, nbEigenValues ):
	# parameters
	offset=.5
	maxiterations=100
	eigsErrorTolerence=1e-6
	truncMin=1e-6
	eps=2.2204e-16

        m=shape(W)[1]

        # make sure that W is symmetric, this is a computationally expensive operation, only use for debugging
        #if (W-W.transpose()).sum() != 0:
	#	print "W should be symmetric!"
	#	exit(0)

	# degrees and regularization
 	# S Yu Understanding Popout through Repulsion CVPR 2001
	# Allows negative values as well as improves invertability
	# of d for small numbers
	# i bet that this is what improves the stability of the eigen
	d=abs(W).sum(0)
	dr=0.5*(d-W.sum(0))
	d=d+offset*2
	dr=dr+offset

	# calculation of the normalized LaPlacian
	W=W+spdiags(dr,[0],m,m,"csc")
	Dinvsqrt=spdiags((1.0/sqrt(d+eps)),[0],m,m,"csc")
	P=Dinvsqrt*(W*Dinvsqrt);
	
	# perform the eigen decomposition
	eigen_val,eigen_vec=eigsh(P,nbEigenValues,maxiter=maxiterations,tol=eigsErrorTolerence,which='LA')
	
	# sort the eigen_vals so that the first
	# is the largest
	i=argsort(-eigen_val)
	eigen_val=eigen_val[i]
	eigen_vec=eigen_vec[:,i]

	# normalize the returned eigenvectors
	eigen_vec=Dinvsqrt*matrix(eigen_vec)
	norm_ones=norm(ones((m,1)))
	for i in range(0,shape(eigen_vec)[1]):
		eigen_vec[:,i]=(eigen_vec[:,i] / norm(eigen_vec[:,i]))*norm_ones
		if eigen_vec[0,i] != 0:
			eigen_vec[:,i] = -1 * eigen_vec[:,i] * sign( eigen_vec[0,i] )

	return(eigen_val, eigen_vec)

Example 113

Project: tractor Source File: faint.py
Function: main
def main():
	# faint
	#RA,DEC,N = 17.5600, 1.1053, 1
	# bright neighbour
	#RA,DEC,N = 358.5436, 0.7213, 2
	# known; too bright
	#RA,DEC,N = 52.64687, -0.42678, 3
	# known; z = 19.5; parallax -0.003+-0.142;
	#   dRA/dt +0.084+-0.010; dDec/dt 0.011+-0.009 [arcsec/yr]
	RA,DEC,N = 342.47285, 0.73458, 4
	zmag,dradt,ddecdt = 19.5, 0.084, 0.011



	# TAI = 4412910179.40 / Number of seconds since Nov 17 1858

	# -> deg/year
	dradt /= 3600.
	ddecdt /= 3600.

	pfn = 'faint%i.pickle' % N
	if os.path.exists(pfn):
		print('Reading pickle', pfn)
		X = unpickle_from_file(pfn)
	else:
		#X = getdata(358.5436, 0.7213)
		X = getdata(RA, DEC)
		pickle_to_file(X, pfn)
	(ims,skies,tais,rcfs) = X
	print('Got', len(rcfs), 'fields')

	omit = [94, 5759]

	uruns = set()
	I = []
	for i,(r,c,f) in enumerate(rcfs):
		if r in uruns:
			continue
		if r in omit:
			print('Omitting run', r)
			continue
		uruns.add(r)
		I.append(i)
	print('Cut to', len(uruns), 'unique runs')
	rcfs = [rcfs[i] for i in I]
	ims = [ims[i] for i in I]
	skies = [skies[i] for i in I]
	tais = [tais[i] for i in I]

	I = np.argsort(tais)
	rcfs = [rcfs[i] for i in I]
	ims = [ims[i] for i in I]
	skies = [skies[i] for i in I]
	tais = [tais[i] for i in I]

	tai0 = np.mean(tais)

	#tractor = Tractor([])
	#for i,im in enumerate(ims):
	#	tractor.addImage(im)

	zrs = [np.array([-1.,+5.]) * std + sky for sky,std in skies]

	# figsize
	fs1 = (4,4)
	plotpos0 = [0.01, 0.01, 0.98, 0.92]

	imsum = None
	slo,shi = None,None

	for i,im in enumerate(ims):

		zr = zrs[i]
		ima = dict(interpolation='nearest', origin='lower',
				   vmin=zr[0], vmax=zr[1], cmap='gray')

		data = im.getImage()

		if imsum is None:
			imsum = data
			slo,shi = zr[0],zr[1]
		else:
			imsum += data
			slo += zr[0]
			shi += zr[1]
	
		plt.figure(figsize=fs1)
		plt.clf()
		plt.gca().set_position(plotpos0)
		plt.imshow(data, **ima)
		#plt.title('Data %s' % im.name)
		plt.xticks([],[])
		plt.yticks([],[])
		mysavefig('faint-data%02i' % i)

		# in yrs
		dt = (tais[i] - tai0) / (86400. * 365.25)

		tractor = Tractor([])
		tractor.addImage(im)
		src = PointSource(RaDecPos(RA + dradt * dt, DEC + ddecdt * dt),
						  Mags(z=zmag))
		tractor.addSource(src)
		mod = tractor.getModelImage(im)
		
		plt.clf()
		plt.gca().set_position(plotpos0)
		plt.imshow(mod, **ima)
		plt.xticks([],[])
		plt.yticks([],[])
		mysavefig('faint-mod%02i' % i)

		plt.figure(figsize=(8,4))
		plt.clf()
		plt.subplots_adjust(left=0.01, bottom=0.01, right=0.99, top=0.99,
							wspace=0.01, hspace=0)
		plt.subplot(1,2,1)
		plt.imshow(data, **ima)
		plt.xticks([],[])
		plt.yticks([],[])
		plt.subplot(1,2,2)
		plt.imshow(mod, **ima)
		plt.xticks([],[])
		plt.yticks([],[])
		mysavefig('faint-datamod%02i' % i)

	plt.clf()
	plt.gca().set_position(plotpos0)
	plt.imshow(imsum, interpolation='nearest', origin='lower',
			   vmin=slo, vmax=shi, cmap='gray')
	plt.title('Data sum')
	plt.xticks([],[])
	plt.yticks([],[])
	mysavefig('faint-data-sum')

	shi = slo + (shi - slo) / np.sqrt(len(ims))
	plt.clf()
	plt.gca().set_position(plotpos0)
	plt.imshow(imsum, interpolation='nearest', origin='lower',
			   vmin=slo, vmax=shi, cmap='gray')
	plt.title('Data sum')
	plt.xticks([],[])
	plt.yticks([],[])
	mysavefig('faint-data-sum2')

	plt.clf()
	plt.gca().set_position(plotpos0)
	plt.imshow(imsum, interpolation='nearest', origin='lower',
			   cmap='gray')
	plt.title('Data sum')
	plt.xticks([],[])
	plt.yticks([],[])
	mysavefig('faint-data-sum3')

Example 114

Project: CMT Source File: CMT.py
	def process_frame(self, im_gray):

		tracked_keypoints, _ = util.track(self.im_prev, im_gray, self.active_keypoints)
		(center, scale_estimate, rotation_estimate, tracked_keypoints) = self.estimate(tracked_keypoints)

		# Detect keypoints, compute descriptors
		keypoints_cv = self.detector.detect(im_gray) 
		keypoints_cv, features = self.descriptor.compute(im_gray, keypoints_cv)

		# Create list of active keypoints
		active_keypoints = zeros((0, 3)) 

		# Get the best two matches for each feature
		matches_all = self.matcher.knnMatch(features, self.features_database, 2)
		# Get all matches for selected features
		if not any(isnan(center)):
			selected_matches_all = self.matcher.knnMatch(features, self.selected_features, len(self.selected_features))


		# For each keypoint and its descriptor
		if len(keypoints_cv) > 0:
			transformed_springs = scale_estimate * util.rotate(self.springs, -rotation_estimate)
			for i in range(len(keypoints_cv)):

				# Retrieve keypoint location
				location = np.array(keypoints_cv[i].pt)

				# First: Match over whole image
				# Compute distances to all descriptors
				matches = matches_all[i]
				distances = np.array([m.distance for m in matches])

				# Convert distances to confidences, do not weight
				combined = 1 - distances / self.DESC_LENGTH

				classes = self.database_classes

				# Get best and second best index
				bestInd = matches[0].trainIdx
				secondBestInd = matches[1].trainIdx

				# Compute distance ratio according to Lowe
				ratio = (1 - combined[0]) / (1 - combined[1])

				# Extract class of best match
				keypoint_class = classes[bestInd]

				# If distance ratio is ok and absolute distance is ok and keypoint class is not background
				if ratio < self.THR_RATIO and combined[0] > self.THR_CONF and keypoint_class != 0:

					# Add keypoint to active keypoints
					new_kpt = append(location, keypoint_class)
					active_keypoints = append(active_keypoints, array([new_kpt]), axis=0)

				# In a second step, try to match difficult keypoints
				# If structural constraints are applicable
				if not any(isnan(center)):

					# Compute distances to initial descriptors
					matches = selected_matches_all[i]				
					distances = np.array([m.distance for m in matches])
					# Re-order the distances based on indexing
					idxs = np.argsort(np.array([m.trainIdx for m in matches]))
					distances = distances[idxs]					

					# Convert distances to confidences
					confidences = 1 - distances / self.DESC_LENGTH

					# Compute the keypoint location relative to the object center
					relative_location = location - center

					# Compute the distances to all springs
					displacements = util.L2norm(transformed_springs - relative_location)

					# For each spring, calculate weight
					weight = displacements < self.THR_OUTLIER  # Could be smooth function

					combined = weight * confidences

					classes = self.selected_classes

					# Sort in descending order
					sorted_conf = argsort(combined)[::-1]  # reverse

					# Get best and second best index
					bestInd = sorted_conf[0]
					secondBestInd = sorted_conf[1]

					# Compute distance ratio according to Lowe
					ratio = (1 - combined[bestInd]) / (1 - combined[secondBestInd])

					# Extract class of best match
					keypoint_class = classes[bestInd]

					# If distance ratio is ok and absolute distance is ok and keypoint class is not background
					if ratio < self.THR_RATIO and combined[bestInd] > self.THR_CONF and keypoint_class != 0:

						# Add keypoint to active keypoints
						new_kpt = append(location, keypoint_class)

						# Check whether same class already exists
						if active_keypoints.size > 0:
							same_class = np.nonzero(active_keypoints[:, 2] == keypoint_class)
							active_keypoints = np.delete(active_keypoints, same_class, axis=0)

						active_keypoints = append(active_keypoints, array([new_kpt]), axis=0)

		# If some keypoints have been tracked
		if tracked_keypoints.size > 0:

			# Extract the keypoint classes
			tracked_classes = tracked_keypoints[:, 2]

			# If there already are some active keypoints
			if active_keypoints.size > 0:

				# Add all tracked keypoints that have not been matched
				associated_classes = active_keypoints[:, 2]
				missing = ~np.in1d(tracked_classes, associated_classes)
				active_keypoints = append(active_keypoints, tracked_keypoints[missing, :], axis=0)

			# Else use all tracked keypoints
			else:
				active_keypoints = tracked_keypoints

		# Update object state estimate
		_ = active_keypoints
		self.center = center
		self.scale_estimate = scale_estimate
		self.rotation_estimate = rotation_estimate
		self.tracked_keypoints = tracked_keypoints
		self.active_keypoints = active_keypoints
		self.im_prev = im_gray
		self.keypoints_cv = keypoints_cv
		_ = time.time()

		self.tl = (nan, nan)
		self.tr = (nan, nan)
		self.br = (nan, nan)
		self.bl = (nan, nan)

		self.bb = array([nan, nan, nan, nan])

		self.has_result = False
		if not any(isnan(self.center)) and self.active_keypoints.shape[0] > self.num_initial_keypoints / 10:
			self.has_result = True

			tl = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_tl[None, :], rotation_estimate).squeeze())
			tr = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_tr[None, :], rotation_estimate).squeeze())
			br = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_br[None, :], rotation_estimate).squeeze())
			bl = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_bl[None, :], rotation_estimate).squeeze())

			min_x = min((tl[0], tr[0], br[0], bl[0]))
			min_y = min((tl[1], tr[1], br[1], bl[1]))
			max_x = max((tl[0], tr[0], br[0], bl[0]))
			max_y = max((tl[1], tr[1], br[1], bl[1]))

			self.tl = tl
			self.tr = tr
			self.bl = bl
			self.br = br

			self.bb = np.array([min_x, min_y, max_x - min_x, max_y - min_y])

Example 115

Project: brut Source File: util.py
def rfp_curve(yp, Y, **kwargs):
    """ Plot the false positive rate as a function of recall """
    import matplotlib.pyplot as plt

    npos = Y.sum()
    nneg = Y.size - npos
    ind = np.argsort(yp)[::-1]
    y = Y[ind]
    yp = yp[ind]

    recall = (1. * np.cuemsum(y == 1)) / npos
    false_pos = (1. * np.cuemsum(y == 0)) / nneg

    r = 1.0 * ((yp > 0) & (y == 1)).sum() / npos
    fp = 1.0 * ((yp > 0) & (y == 0)).sum() / nneg

    l, = plt.plot(recall, false_pos, **kwargs)
    plt.plot([r], [fp], 'o', c=l.get_color())
    plt.xlabel('Recall')
    plt.ylabel('False Positive')
    plt.title("R=%0.3f, FP=%0.4f" % (r, fp))
    ax = plt.gca()

    ax.grid(which='major', axis='x',
            linewidth=0.75, linestyle='-', color='0.75')
    ax.grid(which='minor', axis='x',
            linewidth=0.25, linestyle='-', color='0.75')
    ax.grid(which='major', axis='y',
            linewidth=0.75, linestyle='-', color='0.75')
    ax.grid(which='minor', axis='y',
            linewidth=0.25, linestyle='-', color='0.75')

    return recall, false_pos

Example 116

Project: hyperopt Source File: plotting.py
def main_plot_vars(trials, bandit=None, do_show=True, fontsize=10,
        colorize_best=None,
        columns=5,
        ):
    # -- import here because file-level import is too early
    import matplotlib.pyplot as plt

    idxs, vals = miscs_to_idxs_vals(trials.miscs)
    losses = trials.losses()
    finite_losses = [y for y in losses if y not in (None, float('inf'))]
    asrt = np.argsort(finite_losses)
    if colorize_best != None:
        colorize_thresh = finite_losses[asrt[colorize_best + 1]]
    else:
        # -- set to lower than best (disabled)
        colorize_thresh = finite_losses[asrt[0]] - 1

    loss_min = min(finite_losses)
    loss_max = max(finite_losses)
    print 'finite loss range', loss_min, loss_max, colorize_thresh

    loss_by_tid = dict(zip(trials.tids, losses))

    def color_fn(lossval):
        if lossval is None:
            return (1, 1, 1)
        else:
            t = 4 * (lossval - loss_min) / (loss_max - loss_min + .0001)
            if t < 1:
                return t, 0, 0
            if t < 2:
                return 2-t, t-1, 0
            if t < 3:
                return 0, 3-t, t-2
            return 0, 0, 4-t

    def color_fn_bw(lossval):
        if lossval in (None, float('inf')):
            return (1, 1, 1)
        else:
            t = (lossval - loss_min) / (loss_max - loss_min + .0001)
            if lossval < colorize_thresh:
                return (0., 1. - t, 0.)  # -- red best black worst
            else:
                return (t, t, t)    # -- white=worst, black=best

    all_labels = list(idxs.keys())
    titles = ['%s (%s)' % (label, bandit.params[label].name)
            for label in all_labels]
    order = np.argsort(titles)

    C = columns
    R = int(np.ceil(len(all_labels) / float(C)))

    for plotnum, varnum in enumerate(order):
        #print varnum, titles[varnum]
        label = all_labels[varnum]
        plt.subplot(R, C, plotnum + 1)
        #print '-' * 80
        #print 'Node', label

        # hide x ticks
        ticks_num, ticks_txt = plt.xticks()
        plt.xticks(ticks_num, ['' for i in xrange(len(ticks_num))])

        dist_name = bandit.params[label].name
        x = idxs[label]
        if 'log' in dist_name:
            y = np.log(vals[label])
        else:
            y = vals[label]
        plt.title(titles[varnum], fontsize=fontsize)
        c = map(color_fn_bw, [loss_by_tid[ii] for ii in idxs[label]])
        if len(y):
            plt.scatter(x, y, c=c)
        if 'log' in dist_name:
            nums, texts = plt.yticks()
            plt.yticks(nums, ['%.2e' % np.exp(t) for t in nums])

    if do_show:
        plt.show()

Example 117

Project: chaco Source File: base_1d_plot.py
Function: map_index
    def map_index(self, screen_pt, threshold=2.0, outside_returns_none=True,
                  index_only=True):
        """ Maps a screen space point to an index into the plot's index array.

        Parameters
        ----------

        screen_pts: tuple of x-array, y-array
            2 arrays (or values) screen space coordinates.
        threshold : float
            Optional screen-space distance allowed between *screen_pt* and the
            plot; if non-zero, then a *screen_pt* within this distance is
            mapped to the neared plot index. (This feature is useful for sparse
            data.)
        outside_returns_none : Boolean
            If True, then if *screen_pt* is outside the range of the data, the
            method returns None. If False, it returns the nearest end index in
            such a case.
        index_only : Boolean
            This is included for compatibity with the base class, but is
            ignored, as it is always true for 1D plots.

        Returns
        -------

        index : int
            An index into the index array. If the input point cannot be mapped
            to an index, then None is returned.

            If *screen_pt* corresponds to multiple indices, then only the first
            index is returned.

        """
        data_pt = self.map_data(screen_pt)

        if ((data_pt < self.index_mapper.range.low) or \
                (data_pt > self.index_mapper.range.high)) and \
                outside_returns_none:
            return None

        if self._cached_data_pts_sorted is None:
            self._cached_data_argsort = argsort(self._cached_data)
            self._cached_data_pts_sorted = self._cached_data[self._cached_data_argsort]

        # XXX better to just use argmin(abs(data - data_pt))?

        data = self._cached_data_pts_sorted
        try:
            ndx = reverse_map_1d(data, data_pt, "ascending")
        except IndexError:
            if outside_returns_none:
                return None
            else:
                if data_pt < data[0]:
                    return 0
                else:
                    return len(data) - 1

        orig_ndx = self._cached_data_argsort[ndx]

        if threshold == 0.0:
            return orig_ndx

        screen_points = self._cached_screen_pts
        x = screen_points[orig_ndx]
        if self.orientation == 'v':
            x0 = screen_pt[1]
        else:
            x0 = screen_pt[0]

        if abs(x - x0) <= threshold:
            return orig_ndx
        else:
            return None

Example 118

Project: vsm Source File: structarr.py
def enum_sort(arr, indices=[], field_name='i', filter_nan=False):
    """
    Takes a 1-dimensional array and returns a sorted array with matching
    indices from the original array.

    :param arr: A structured 1-dimensional array.
    :type arr: array
    
    :param indices: List of indices. If `indices` is empty, then `indices`
        is set to a range of indices for the length of `arr`. 
        Default is an empty list.
    :type indices: list, optional
    
    :param field_name: Name for indices in the structured array dtype. 
        Default is 'i'.
    :type field_name: string, optional
    
    :param filter_nan: If `True`, Not a Number values are filtered. 
        Default is `False`.
    :type filter_nan: boolean, optional

    :returns: A sorted structured array.

    **Examples**

    >>> arr = np.array([7,3,1,8,2])
    >>> enum_sort(arr)
    array([(3, 8), (0, 7), (1, 3), (4, 2), (2, 1)], 
          dtype=[('i', '<i8'), ('value', '<i8')])
    """
    idx = np.argsort(arr)
    if len(indices) == 0:
        indices = np.arange(arr.shape[0], dtype=np.int)
    else:
        indices = np.array(indices)
	
    dt = [(field_name, indices.dtype), ('value', arr.dtype)]

    new_arr = np.empty(shape=arr.shape, dtype=dt)
    new_arr[field_name] = indices[idx]
    new_arr['value'] = arr[idx]

    if filter_nan:
        new_arr = new_arr[np.isfinite(new_arr['value'])]
        
    return new_arr[::-1]

Example 119

Project: bolt Source File: array.py
    def transpose(self, *axes):
        """
        Return an array with the axes transposed.

        This operation will incur a swap unless the
        desiured permutation can be obtained
        only by transpoing the keys or the values.

        Parameters
        ----------
        axes : None, tuple of ints, or n ints
            If None, will reverse axis order.
        """
        if len(axes) == 0:
            p = arange(self.ndim-1, -1, -1)
        else:
            p = asarray(argpack(axes))

        istransposeable(p, range(self.ndim))

        split = self.split

        # compute the keys/value axes that need to be swapped
        new_keys, new_values = p[:split], p[split:]
        swapping_keys = sort(new_values[new_values < split])
        swapping_values = sort(new_keys[new_keys >= split])
        stationary_keys = sort(new_keys[new_keys < split])
        stationary_values = sort(new_values[new_values >= split])

        # compute the permutation that the swap causes
        p_swap = r_[stationary_keys, swapping_values, swapping_keys, stationary_values]

        # compute the extra permutation (p_x)  on top of this that
        # needs to happen to get the full permutation desired
        p_swap_inv = argsort(p_swap)
        p_x = p_swap_inv[p]
        p_keys, p_values = p_x[:split], p_x[split:]-split

        # perform the swap and the the within key/value permutations
        arr = self.swap(swapping_keys, swapping_values-split)
        arr = arr.keys.transpose(tuple(p_keys.tolist()))
        arr = arr.values.transpose(tuple(p_values.tolist()))

        return arr

Example 120

Project: CLAtoolkit Source File: utils.py
def nmf(platform, no_topics, course_code, start_date=None, end_date=None):
    docuements,ids = get_allcontent_byplatform(platform, course_code, start_date=start_date, end_date=end_date)
    if len(docuements)<5:
        d3_dataset = ""
        topic_output = "Not enough text to run Topic Modeling."
    else:
        min_df = 2
        tfidf = TfidfVectorizer(stop_words=ENGLISH_STOP_WORDS, lowercase=True, strip_accents="unicode", use_idf=True, norm="l2", min_df = min_df, ngram_range=(1,4))
        A = tfidf.fit_transform(docuements)

        num_terms = len(tfidf.vocabulary_)
        terms = [""] * num_terms
        for term in tfidf.vocabulary_.keys():
            terms[ tfidf.vocabulary_[term] ] = term

        model = decomposition.NMF(init="nndsvd", n_components=no_topics, max_iter=1000)
        W = model.fit_transform(A)
        H = model.components_

        nmf_topic_terms = {}
        nmf_topic_docs = {}
        nmf_topic_doc_ids = {}

        for topic_index in range( H.shape[0] ):
            top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
            term_ranking = [terms[i] for i in top_indices]
            nmf_topic_terms[topic_index] = term_ranking
            #tmp = "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )

        for topic_index in range( W.shape[1] ):
            top_indices = np.argsort( W[:,topic_index] )[::-1][0:10]
            doc_ranking = [docuements[i] for i in top_indices]
            id_ranking = [ids[i] for i in top_indices]
            nmf_topic_docs[topic_index] = doc_ranking
            nmf_topic_doc_ids[topic_index] = id_ranking

        topic_output = ""
        for topic in nmf_topic_terms:
            topic_output = topic_output + "<h2>Topic %d</h2>" % (topic + 1)
            topic_output = topic_output + "<p>"
            for term in nmf_topic_terms[topic]:
                topic_output = topic_output + '<a onClick="searchandhighlight(\'%s\')" class="btn btn-default btn-xs">%s</a> ' % (term, term)
            topic_output = topic_output + "<p>"
            topic_output = topic_output + "<ul>"
            for doc in nmf_topic_docs[topic]:
                topic_output = topic_output + "<li>%s</li>" % (doc)
            topic_output = topic_output + "</ul>"

        #print nmf_topic_doc_ids
        # find the % sentiment classification of each topic
        classification_dict = None
        classifier = 'VaderSentiment'
        '''
        if classifier == "VaderSentiment":
            classification_dict = {'Positive':0, 'Neutral':0, 'Negative':0}
        elif classifier == "NaiveBayes_t1.model":
            classification_dict = {'Triggering':0, 'Exploration':0, 'Integration':0, 'Resolution':0, 'Other':0}
        '''
        piebubblechart = {}
        feature_matrix = np.zeros(shape=(no_topics,3))

        for topic in nmf_topic_terms:
            vals = ""
            radius = 0
            topiclabel = "Topic %d" % (topic + 1)

            classification_dict = {'Positive':0, 'Neutral':0, 'Negative':0}
            kwargs = {'classifier':classifier, 'xapistatement__course_code': course_code, 'xapistatement__id__in':nmf_topic_doc_ids[topic]}
            #print Classification.objects.values('classification').filter(**kwargs).order_by().annotate(Count('classification')).query
            counts = Classification.objects.values('classification').filter(**kwargs).order_by().annotate(Count('classification'))
            for count in counts:
                #print count
                classification_dict[count['classification']] = count['classification__count']
            vals = "%d,%d,%d" % (classification_dict['Positive'],classification_dict['Negative'],classification_dict['Neutral'])
            print vals
            radius = classification_dict['Positive'] + classification_dict['Negative'] + classification_dict['Neutral']
            feature_matrix[topic,0] = classification_dict['Positive']
            feature_matrix[topic,1] = classification_dict['Negative']
            feature_matrix[topic,2] = classification_dict['Neutral']
            piebubblechart[topic] = {'label':topiclabel, 'vals':vals, 'radius':radius}

        # Perform Affinity Propogation
        af = AffinityPropagation(preference=-50).fit(feature_matrix)
        cluster_centers_indices = af.cluster_centers_indices_
        aflabels = af.labels_

        n_clusters_ = len(cluster_centers_indices)
        #print 'n_clusters_', n_clusters_, aflabels
        #print feature_matrix

        # generate piebubblechart dataset for d3.js
        #print piebubblechart
        d3_dataset = ""
        for topic in piebubblechart:
            #print topic
            # output format - {label: "Topic 1", vals: [10, 20], cluster: 1, radius: 30},
            d3_dataset = d3_dataset + '{label: "%s", vals: [%s], cluster: %d, radius: %d},' % (piebubblechart[topic]['label'], piebubblechart[topic]['vals'], aflabels[topic], piebubblechart[topic]['radius'])

    return topic_output, d3_dataset

Example 121

Project: kover Source File: create.py
Function: from_tsv
def from_tsv(tsv_path, output_path, phenotype_name, phenotype_metadata_path, gzip, warning_callback=None,
             error_callback=None, progress_callback=None):
    def get_kmer_length(tsv_path):
        with open(tsv_path, "r") as f:
            f.next()
            kmer_len = len(f.next().split("\t")[0])
        return kmer_len

    def get_kmer_count(tsv):
        # XXX: This is can break if the file format changes!
        #      Assumes that each line has the format kmer_seq\tV\tV\t...V\n with one binary V per genome
        total_size = getsize(tsv)
        with open(tsv, "r") as f:
            header_size = len(f.next())
            line_size = len(f.next())
        content_size = float(total_size) - header_size
        if content_size % line_size != 0:
            raise Exception()
        return int(content_size / line_size)

    # Execution callback functions
    if warning_callback is None:
        warning_callback = lambda w: logging.warning(w)
    if error_callback is None:
        def normal_raise(exception):
            raise exception

        error_callback = normal_raise
    if progress_callback is None:
        progress_callback = lambda t, p: None

    if (phenotype_name is None and phenotype_metadata_path is not None) or (
                    phenotype_name is not None and phenotype_metadata_path is None):
        raise ValueError("If a phenotype is specified, it must have a name and a metadata file.")

    kmer_len = get_kmer_length(tsv_path)
    kmer_count = get_kmer_count(tsv_path)

    kmer_dtype = 'S' + str(kmer_len)
    kmer_by_matrix_column_dtype = _minimum_uint_size(kmer_count)
    compression = "gzip" if gzip > 0 else None
    compression_opts = gzip if gzip > 0 else None
    tsv_block_size = min(kmer_count, BLOCK_SIZE)

    h5py_file = _create_hdf5_file_no_chunk_caching(output_path)
    h5py_file.attrs["created"] = time()
    h5py_file.attrs["uuid"] = str(uuid1())
    h5py_file.attrs["genome_source_type"] = "tsv"
    h5py_file.attrs["genomic_data"] = tsv_path
    h5py_file.attrs["phenotype_name"] = phenotype_name if phenotype_name is not None else "NA"
    h5py_file.attrs[
        "phenotype_metadata_source"] = phenotype_metadata_path if phenotype_metadata_path is not None else "NA"
    h5py_file.attrs["compression"] = "gzip (level %d)" % gzip

    # Read list of genome identifiers
    reader = pd.read_table(tsv_path, sep='\t', index_col=0, iterator=True, engine="c")
    genome_ids = reader.get_chunk(1).columns.values
    del reader
    logging.debug("The k-mer matrix contains %d genomes." % len(genome_ids))
    if len(set(genome_ids)) < len(genome_ids):
        error_callback(Exception("The genomic data contains genomes with the same identifier."))

    # Extract/write the metadata
    if phenotype_name is not None:
        genome_ids, labels = _parse_metadata(phenotype_metadata_path, genome_ids, warning_callback, error_callback)
        # Sort the genomes by label for optimal better performance
        logging.debug("Sorting genomes by metadata label for optimal performance.")
        sorter = np.argsort(labels)
        genome_ids = genome_ids[sorter]
        labels = labels[sorter]
        logging.debug("Creating the phenotype metadata dataset.")
        phenotype = h5py_file.create_dataset("phenotype", data=labels, dtype=PHENOTYPE_LABEL_DTYPE)
        phenotype.attrs["name"] = phenotype_name
        del phenotype, labels

    # Write genome ids
    logging.debug("Creating the genome identifier dataset.")
    h5py_file.create_dataset("genome_identifiers",
                             data=genome_ids,
                             compression=compression,
                             compression_opts=compression_opts)

    # Initialize kmers (kmer_list) dataset
    logging.debug("Creating the kmer sequence dataset.")
    kmers = h5py_file.create_dataset("kmer_sequences",
                                     shape=(kmer_count,),
                                     dtype=kmer_dtype,
                                     compression=compression,
                                     compression_opts=compression_opts)

    # Initialize kmer_matrix dataset
    logging.debug("Creating the kmer matrix dataset.")
    kmer_matrix = h5py_file.create_dataset("kmer_matrix",
                                           shape=(
                                               int(ceil(1.0 * len(genome_ids) / KMER_MATRIX_PACKING_SIZE)), kmer_count),
                                           dtype=KMER_MATRIX_DTYPE,
                                           compression=compression,
                                           compression_opts=compression_opts,
                                           chunks=(1, tsv_block_size))

    # Initialize kmer_by_matrix_column dataset
    logging.debug("Creating the kmer sequence/matrix column mapping dataset.")
    kmer_by_matrix_column = h5py_file.create_dataset("kmer_by_matrix_column",
                                                     shape=(kmer_count,),
                                                     dtype=kmer_by_matrix_column_dtype,
                                                     compression=compression,
                                                     compression_opts=compression_opts)

    logging.debug("Transferring the data from TSV to HDF5.")
    tsv_reader = pd.read_table(tsv_path, index_col='kmers', sep='\t', chunksize=tsv_block_size)
    n_blocks = int(ceil(1.0 * kmer_count / tsv_block_size))
    n_copied_blocks = 0.
    for i, chunk in enumerate(tsv_reader):
        logging.debug("Block %d/%d." % (i + 1, n_blocks))
        progress_callback("Creating", n_copied_blocks / n_blocks)
        logging.debug("Reading data from TSV file.")
        kmers_data = chunk.index.values.astype(kmer_dtype)
        read_block_size = kmers_data.shape[0]
        block_start = i * tsv_block_size
        block_stop = block_start + read_block_size
        n_copied_blocks += 0.5
        progress_callback("Creating", n_copied_blocks / n_blocks)

        if block_start > kmer_count:
            break

        logging.debug("Writing data to HDF5.")
        kmers[block_start:block_stop] = kmers_data
        attribute_classification_sorted_by_strains = chunk[genome_ids]
        attribute_classification_data = attribute_classification_sorted_by_strains.T.values.astype(np.uint8)
        logging.debug("Packing the data.")
        kmer_matrix[:, block_start:block_stop] = _pack_binary_bytes_to_ints(attribute_classification_data,
                                                                            pack_size=KMER_MATRIX_PACKING_SIZE)
        n_copied_blocks += 0.5
        progress_callback("Creating", n_copied_blocks / n_blocks)

        # Write kmer_by_matrix_column
        kmer_by_matrix_column[block_start:block_stop] = np.arange(block_start, block_stop, dtype=kmer_by_matrix_column)
        logging.debug("Garbage collection.")
        gc.collect()  # Clear the memory objects created during the iteration, or else the memory will keep growing.

    h5py_file.close()

    logging.debug("Dataset creation completed.")

Example 122

Project: kover Source File: create.py
Function: from_contigs
def from_contigs(contig_list_path, output_path, kmer_size, filter_singleton, phenotype_name, phenotype_metadata_path,
                 gzip, temp_dir, nb_cores, verbose, progress, warning_callback=None,
                 error_callback=None):
    compression = "gzip" if gzip > 0 else None
    compression_opts = gzip if gzip > 0 else None

    # Execution callback functions
    if warning_callback is None:
        warning_callback = lambda w: logging.warning(w)
    if error_callback is None:
        def normal_raise(exception):
            raise exception

        error_callback = normal_raise

    # Make sure that the tmp data is unique to the current process
    temp_dir = join(temp_dir, str(getpid()))
    if not exists(temp_dir):
        mkdir(temp_dir)

    if (phenotype_name is None and phenotype_metadata_path is not None) or (
                    phenotype_name is not None and phenotype_metadata_path is None):
        error_callback(ValueError("If a phenotype is specified, it must have a name and a metadata file."))

    # Read list of genome identifiers
    contig_file_by_genome_id = dict(l.split() for l in open(contig_list_path, "r"))

    logging.debug("The k-mer matrix contains %d genomes." % len(contig_file_by_genome_id))
    if len(set(contig_file_by_genome_id.keys())) < len(contig_file_by_genome_id.keys()):
        error_callback(Exception("The genomic data contains genomes with the same identifier."))

    h5py_file = _create_hdf5_file_no_chunk_caching(output_path)
    h5py_file.attrs["created"] = time()
    h5py_file.attrs["uuid"] = str(uuid1())
    h5py_file.attrs["genome_source_type"] = "contigs"
    h5py_file.attrs["genomic_data"] = contig_list_path
    h5py_file.attrs["phenotype_name"] = phenotype_name if phenotype_name is not None else "NA"
    h5py_file.attrs[
        "phenotype_metadata_source"] = phenotype_metadata_path if phenotype_metadata_path is not None else "NA"
    h5py_file.attrs["filter"] = filter_singleton
    h5py_file.attrs["compression"] = "gzip (level %d)" % gzip

    # Extract/write the metadata
    if phenotype_name is not None:
        genome_ids, labels = _parse_metadata(phenotype_metadata_path, contig_file_by_genome_id.keys(), warning_callback,
                                             error_callback)
        # Sort the genomes by label for optimal better performance
        logging.debug("Sorting genomes by metadata label for optimal performance.")
        sorter = np.argsort(labels)
        genome_ids = genome_ids[sorter]
        labels = labels[sorter]
        logging.debug("Creating the phenotype metadata dataset.")
        phenotype = h5py_file.create_dataset("phenotype", data=labels, dtype=PHENOTYPE_LABEL_DTYPE)
        phenotype.attrs["name"] = phenotype_name
        del phenotype, labels

    # Write genome ids
    logging.debug("Creating the genome identifier dataset.")
    h5py_file.create_dataset("genome_identifiers",
                             data=genome_ids,
                             compression=compression,
                             compression_opts=compression_opts)
    h5py_file.close()

    logging.debug("Initializing DSK.")

    # Preparing input file for multidsk
    files_sorted = ["%s\n" % contig_file_by_genome_id[id] for id in genome_ids]
    open(join(temp_dir, "list_contigs_files"), "w").writelines(files_sorted)

    # Calling multidsk
    contigs_count_kmers(file_path=join(temp_dir, "list_contigs_files"),
                        out_dir=temp_dir,
                        kmer_size=kmer_size,
                        abundance_min=1,
                        out_compress=gzip,
                        nb_cores=nb_cores,
                        verbose=int(verbose),
                        progress=progress)
    logging.debug("K-mers counting completed.")

    # Preparing input file for dsk2kover
    list_contigs = [join(temp_dir, basename(splitext(file)[0]) + ".h5") for file in files_sorted]
    file_dsk_output = open(join(temp_dir, "list_h5"), "w")
    for line in list_contigs:
        file_dsk_output.write(line + "\n")
    file_dsk_output.close()

    # Calling dsk2kover
    logging.debug("Initializing DSK2Kover.")
    contigs_pack_kmers(file_path=join(temp_dir, "list_h5"),
                       out_path=output_path,
                       filter_singleton=filter_singleton,
                       kmer_length=kmer_size,
                       compression=gzip,
                       chunk_size=BLOCK_SIZE,
                       nb_genomes=len(genome_ids),
                       progress=progress)

    logging.debug("Removing temporary files.")
    rmtree(temp_dir)

    # progress_callback("dsk2kover", 1)
    logging.debug("Dataset creation completed.")

Example 123

Project: eulerian-magnification Source File: base.py
def show_frequencies(vid_data, fps, bounds=None):
    """Graph the average value of the video as well as the frequency strength"""
    averages = []

    if bounds:
        for x in range(1, vid_data.shape[0] - 1):
            averages.append(vid_data[x, bounds[2]:bounds[3], bounds[0]:bounds[1], :].sum())
    else:
        for x in range(1, vid_data.shape[0] - 1):
            averages.append(vid_data[x, :, :, :].sum())

    averages = averages - min(averages)

    charts_x = 1
    charts_y = 2
    pyplot.figure(figsize=(20, 10))
    pyplot.subplots_adjust(hspace=.7)

    pyplot.subplot(charts_y, charts_x, 1)
    pyplot.title("Pixel Average")
    pyplot.xlabel("Time")
    pyplot.ylabel("Brightness")
    pyplot.plot(averages)

    freqs = scipy.fftpack.fftfreq(len(averages), d=1.0 / fps)
    fft = abs(scipy.fftpack.fft(averages))
    idx = np.argsort(freqs)

    pyplot.subplot(charts_y, charts_x, 2)
    pyplot.title("FFT")
    pyplot.xlabel("Freq (Hz)")
    freqs = freqs[idx]
    fft = fft[idx]

    freqs = freqs[len(freqs) / 2 + 1:]
    fft = fft[len(fft) / 2 + 1:]
    pyplot.plot(freqs, abs(fft))

    pyplot.show()

Example 124

Project: EDeN Source File: obabel.py
def find_nearest_neighbors(mol, distances, current_idx, **kwargs):

    # Most common elements in our galaxy with atomic number:
    # 1 Hydrogen
    # 2 Helium
    # 8 Oxygen
    # 6 Carbon
    # 10 Neon
    # 26 Iron
    # 7 Nitrogen
    # 14 Silicon
    # 12 Magnesium
    # 16 Sulfur
    atom_types = kwargs.get('atom_types', [1, 2, 8, 6, 10, 26, 7, 14, 12, 16])
    similarity_fn = kwargs.get('similarity_fn', lambda x: 1. / (x + 1))
    k = kwargs.get('k', 3)
    threshold = kwargs.get('threshold', 0)
    sorted_indices = np.argsort(distances[current_idx - 1, ])

    # Obs: nearest_atoms will contain the atom index, which starts in 0
    nearest_atoms = []

    for atomic_no in atom_types:
        # Don't remove current atom from list:
        atom_idx = [atom.idx for atom in mol if atom.atomicnum == atomic_no]
        # Indices cannot be compared directly with idx
        if len(atom_idx) >= k:
            nearest_atoms.append(
                [id for id in sorted_indices if id + 1 in atom_idx][:k])
        else:
            nearest_atoms.append(
                [id for id in sorted_indices if id + 1 in atom_idx] +
                [None] * (k - len(atom_idx)))

    # The following expression flattens the list
    nearest_atoms = [x for sublist in nearest_atoms for x in sublist]
    # Replace idx for distances, assign an arbitrarily large distance for None
    nearest_atoms = [distances[current_idx - 1, i]
                     if i is not None else 1e10 for i in nearest_atoms]
    # If a threshold value is entered, filter the list of distances
    if threshold > 0:
        nearest_atoms = [x if x <= threshold else 1e10 for x in nearest_atoms]
    # Finally apply the similarity function to the resulting list and return
    nearest_atoms = [similarity_fn(x) if similarity_fn(
        x) > np.spacing(1e10) else 0 for x in nearest_atoms]

    return nearest_atoms

Example 125

Project: deepchem Source File: conformers.py
  def prune_conformers(self, mol):
    """
    Prune conformers from a molecule using an RMSD threshold, starting
    with the lowest energy conformer.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.

    Returns
    -------
    A new RDKit Mol containing the chosen conformers, sorted by
    increasing energy.
    """
    if self.rmsd_threshold < 0 or mol.GetNumConformers() <= 1:
      return mol
    energies = self.get_conformer_energies(mol)
    rmsd = self.get_conformer_rmsd(mol)

    sort = np.argsort(energies)  # sort by increasing energy
    keep = []  # always keep lowest-energy conformer
    discard = []
    for i in sort:
      # always keep lowest-energy conformer
      if len(keep) == 0:
        keep.append(i)
        continue

      # discard conformers after max_conformers is reached
      if len(keep) >= self.max_conformers:
        discard.append(i)
        continue

      # get RMSD to selected conformers
      this_rmsd = rmsd[i][np.asarray(keep, dtype=int)]

      # discard conformers within the RMSD threshold
      if np.all(this_rmsd >= self.rmsd_threshold):
        keep.append(i)
      else:
        discard.append(i)

    # create a new molecule to hold the chosen conformers
    # this ensures proper conformer IDs and energy-based ordering
    new = Chem.Mol(mol)
    new.RemoveAllConformers()
    conf_ids = [conf.GetId() for conf in mol.GetConformers()]
    for i in keep:
      conf = mol.GetConformer(conf_ids[i])
      new.AddConformer(conf, assignId=True)
    return new

Example 126

Project: spark-ml-streaming Source File: kmeans.py
    def run(self, lgn=None):

        viz = None

        closestpoint = lambda centers, p: argmin(cdist(centers, array([p])))

        centers = self.centers
        modeltime = 0
        model = []

        # loop over batches
        for i in range(1, self.nbatches):

            print('generating batch %g' % i)

            # drift means the points will slowly drift by adding noise to the position
            if self.update == 'drift':
                centers += random.randn(centers.shape[0], centers.shape[1]) * 0.15

            # jump means every 15 batches the points will shift
            if self.update == 'jump':
                if i % self.interval == 0:
                    if self.transition:
                        centers = asarray(self.transition)
                    else:
                        base = random.rand(self.ncenters * self.ndims) * 1 + 2
                        delta = asarray([-d if random.rand() > 0.5 else d for d in base])\
                            .reshape(self.ncenters, self.ndims)
                        centers = centers + delta

            # generate the points by sampling from the clusters and write to disk
            npoints = self.npoints
            pts, labels = make_blobs(npoints, self.ndims, centers, cluster_std=self.std)
            self.writepoints(pts, i)
            time.sleep(1)

            # get the latest model (after waiting)
            model, modeltime = loadrecent(self.dataout + '/*-model.txt', modeltime, model)

            # plot an update (if we got a valid model)
            if len(model) == self.ncenters:

                clrs = labels
                order = argsort(labels)
                clrs = clrs[order]
                pts = pts[order]
                s = ones(self.npoints) * 10

                if self.ndims == 1:
                    pts = vstack((pts, model[:,None]))
                else:
                    pts = vstack((pts, model))
                clrs = hstack((clrs, ones(self.ncenters) * 5))
                s = hstack((s, ones(self.ncenters) * 10))

                # wait a few iterations before plotting
                if (lgn is not None) & (i > 5):

                    # scatter plot for two dimensions
                    if self.ndims == 2:
                        if viz is None:
                            viz = lgn.scatterstreaming(pts[:, 0], pts[:, 1], label=clrs, size=s)
                        else:
                            viz.append(pts[:, 0], pts[:, 1], label=clrs, size=s)

                    # line plot for one dimension
                    elif self.ndims == 1:
                        if viz is None:
                            viz = lgn.linestreaming(pts, label=clrs, size=s/2)
                        else:
                            viz.append(pts, label=clrs, size=s/2)

                    else:
                        raise Exception('Plotting only supported with 1 or 2 dimensions')

Example 127

Project: pyksc Source File: col_to_cluster.py
def stacked_bars(labels, data_plots, out_fpath, label_translation, ref=True):
    
    x_locations = [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]

    data_class = {}
    data_label = {}
    for data, _, class_num in data_plots:
        
        best_idx = np.argsort(data)[::-1][:4]
        best_cls = np.array(data)[best_idx] 
        best_lbl = np.array(labels)[best_idx]

        data_class[label_translation[class_num]] = best_cls
        data_label[label_translation[class_num]] = best_lbl

    bar_data   = []
    bar_labels = []
    for cls in sorted(data_class):
        bar_data.extend(data_class[cls])
        bar_labels.extend(data_label[cls])
    
    colors = ['b', 'g', 'm', 'r', 'y', 'c', '#A617A1', '#2B5700', 'w', 
              '#FF7300', 'k'] * 3

    colored={}
    if ref:
        to_use = set(REFERRER_ABBRV.values())
    else:
        to_use = set(CATEG_ABBRV.values())

    for i, l in enumerate(to_use):
        colored[l] = colors[i]

    for x, y, l in zip(x_locations, bar_data, bar_labels):
      
        c = colored[l]
        plt.bar(left=x, height=y, color=c, width=1, alpha=0.5)
        plt.text(x + .75, y, l, va='bottom', ha='center', rotation=45)
    
    plt.xlim(xmin=0, xmax=21)
    plt.xlabel('Cluster')
    if ref:
        plt.ylim(ymin=0, ymax=.31)
        plt.ylabel('Fraction of Views in Cluster')
    else:
        plt.ylim(ymin=0, ymax=.4)
        plt.ylabel('Fraction of Videos in Cluster')

    plt.xticks([3, 8, 13, 18],  ['$C0$', '$C1$', '$C2$', '$C3'])
    plt.savefig(out_fpath)

Example 128

Project: tractor Source File: cfht.py
Function: stage0
def stage0(**kwargs):
    ps = PlotSequence('cfht')

    decals = CfhtDecals()
    B = decals.get_bricks()
    print('Bricks:')
    B.about()

    ra,dec = 190.0, 11.0

    #bands = 'ugri'
    bands = 'gri'
    
    B.cut(np.argsort(degrees_between(ra, dec, B.ra, B.dec)))
    print('Nearest bricks:', B.ra[:5], B.dec[:5], B.brickid[:5])

    brick = B[0]
    pixscale = 0.186
    #W,H = 1024,1024
    #W,H = 2048,2048
    #W,H = 3600,3600
    W,H = 4800,4800

    targetwcs = wcs_for_brick(brick, pixscale=pixscale, W=W, H=H)
    ccdfn = 'cfht-ccds.fits'
    if os.path.exists(ccdfn):
        T = fits_table(ccdfn)
    else:
        T = get_ccd_list()
        T.writeto(ccdfn)
    print(len(T), 'CCDs')
    T.cut(ccds_touching_wcs(targetwcs, T))
    print(len(T), 'CCDs touching brick')

    T.cut(np.array([b in bands for b in T.filter]))
    print(len(T), 'in bands', bands)

    ims = []
    for t in T:
        im = CfhtImage(t)
        # magzp = hdr['PHOT_C'] + 2.5 * np.log10(hdr['EXPTIME'])
        # fwhm = t.seeing / (pixscale * 3600)
        # print '-> FWHM', fwhm, 'pix'
        im.seeing = t.seeing
        im.pixscale = t.pixscale
        print('seeing', t.seeing)
        print('pixscale', im.pixscale*3600, 'arcsec/pix')
        im.run_calibs(t.ra, t.dec, im.pixscale, W=t.width, H=t.height)
        ims.append(im)


    # Read images, clip to ROI
    targetrd = np.array([targetwcs.pixelxy2radec(x,y) for x,y in
                         [(1,1),(W,1),(W,H),(1,H),(1,1)]])
    keepims = []
    tims = []
    for im in ims:
        print()
        print('Reading expnum', im.expnum, 'name', im.extname, 'band', im.band, 'exptime', im.exptime)
        band = im.band
        wcs = im.read_wcs()
        imh,imw = wcs.imageh,wcs.imagew
        imgpoly = [(1,1),(1,imh),(imw,imh),(imw,1)]
        ok,tx,ty = wcs.radec2pixelxy(targetrd[:-1,0], targetrd[:-1,1])
        tpoly = zip(tx,ty)
        clip = clip_polygon(imgpoly, tpoly)
        clip = np.array(clip)
        #print 'Clip', clip
        if len(clip) == 0:
            continue
        x0,y0 = np.floor(clip.min(axis=0)).astype(int)
        x1,y1 = np.ceil (clip.max(axis=0)).astype(int)
        slc = slice(y0,y1+1), slice(x0,x1+1)

        ## FIXME -- it seems I got lucky and the cross product is
        ## negative == clockwise, as required by clip_polygon. One
        ## could check this and reverse the polygon vertex order.
        # dx0,dy0 = tx[1]-tx[0], ty[1]-ty[0]
        # dx1,dy1 = tx[2]-tx[1], ty[2]-ty[1]
        # cross = dx0*dy1 - dx1*dy0
        # print 'Cross:', cross

        print('Image slice: x [%i,%i], y [%i,%i]' % (x0,x1, y0,y1))
        print('Reading image from', im.imgfn, 'HDU', im.hdu)
        img,imghdr = im.read_image(header=True, slice=slc)
        goodpix = (img != 0)
        print('Number of pixels == 0:', np.sum(img == 0))
        print('Number of pixels != 0:', np.sum(goodpix))
        if np.sum(goodpix) == 0:
            continue
        # print 'Image shape', img.shape
        print('Image range', img.min(), img.max())
        print('Goodpix image range:', (img[goodpix]).min(), (img[goodpix]).max())
        if img[goodpix].min() == img[goodpix].max():
            print('No dynamic range in image')
            continue
        # print 'Reading invvar from', im.wtfn, 'HDU', im.hdu
        # invvar = im.read_invvar(slice=slc)
        # # print 'Invvar shape', invvar.shape
        # # print 'Invvar range:', invvar.min(), invvar.max()
        # invvar[goodpix == 0] = 0.
        # if np.all(invvar == 0.):
        #     print 'Skipping zero-invvar image'
        #     continue
        # assert(np.all(np.isfinite(img)))
        # assert(np.all(np.isfinite(invvar)))
        # assert(not(np.all(invvar == 0.)))
        # # Estimate per-pixel noise via Blanton's 5-pixel MAD
        # slice1 = (slice(0,-5,10),slice(0,-5,10))
        # slice2 = (slice(5,None,10),slice(5,None,10))
        # # print 'sliced shapes:', img[slice1].shape, img[slice2].shape
        # # print 'good shape:', (goodpix[slice1] * goodpix[slice2]).shape
        # # print 'good values:', np.unique(goodpix[slice1] * goodpix[slice2])
        # # print 'sliced[good] shapes:', (img[slice1] -  img[slice2])[goodpix[slice1] * goodpix[slice2]].shape
        # mad = np.median(np.abs(img[slice1] - img[slice2])[goodpix[slice1] * goodpix[slice2]].ravel())
        # sig1 = 1.4826 * mad / np.sqrt(2.)
        # print 'MAD sig1:', sig1
        # # invvar was 1 or 0
        # invvar *= (1./(sig1**2))
        # medsky = np.median(img[goodpix])

        # Read full image for sig1 and sky estimate
        fullimg = im.read_image()
        fullgood = (fullimg != 0)
        # Estimate per-pixel noise via Blanton's 5-pixel MAD
        slice1 = (slice(0,-5,10),slice(0,-5,10))
        slice2 = (slice(5,None,10),slice(5,None,10))
        mad = np.median(np.abs(fullimg[slice1] - fullimg[slice2])[fullgood[slice1] * fullgood[slice2]].ravel())
        sig1 = 1.4826 * mad / np.sqrt(2.)
        print('MAD sig1:', sig1)
        medsky = np.median(fullimg[fullgood])
        invvar = np.zeros_like(img)
        invvar[goodpix] = 1./sig1**2

        # Median-smooth sky subtraction
        plt.clf()
        dimshow(np.round((img-medsky) / sig1), vmin=-3, vmax=5)
        plt.title('Scalar median: %s' % im.name)
        ps.savefig()

        # medsky = np.zeros_like(img)
        # # astrometry.util.util
        # median_smooth(img, np.logical_not(goodpix), 256, medsky)
        fullmed = np.zeros_like(fullimg)
        median_smooth(fullimg - medsky, np.logical_not(fullgood), 256, fullmed)
        fullmed += medsky
        medimg = fullmed[slc]
        
        plt.clf()
        dimshow(np.round((img - medimg) / sig1), vmin=-3, vmax=5)
        plt.title('Median filtered: %s' % im.name)
        ps.savefig()
        
        #print 'Subtracting median:', medsky
        #img -= medsky
        img -= medimg
        
        primhdr = im.read_image_primary_header()

        magzp = decals.get_zeropoint_for(im)
        print('magzp', magzp)
        zpscale = NanoMaggies.zeropointToScale(magzp)
        print('zpscale', zpscale)

        # Scale images to Nanomaggies
        img /= zpscale
        sig1 /= zpscale
        invvar *= zpscale**2
        orig_zpscale = zpscale

        zpscale = 1.
        assert(np.sum(invvar > 0) > 0)
        print('After scaling:')
        print('sig1', sig1)
        print('invvar range', invvar.min(), invvar.max())
        print('image range', img.min(), img.max())

        assert(np.all(np.isfinite(img)))
        assert(np.all(np.isfinite(invvar)))
        assert(np.isfinite(sig1))

        plt.clf()
        lo,hi = -5*sig1, 10*sig1
        n,b,p = plt.hist(img[goodpix].ravel(), 100, range=(lo,hi), histtype='step', color='k')
        xx = np.linspace(lo, hi, 200)
        plt.plot(xx, max(n)*np.exp(-xx**2 / (2.*sig1**2)), 'r-')
        plt.xlim(lo,hi)
        plt.title('Pixel histogram: %s' % im.name)
        ps.savefig()

        twcs = ConstantFitsWcs(wcs)
        if x0 or y0:
            twcs.setX0Y0(x0,y0)

        info = im.get_image_info()
        fullh,fullw = info['dims']

        # read fit PsfEx model
        psfex = PsfEx.fromFits(im.psffitfn)
        print('Read', psfex)

        # HACK -- highly approximate PSF here!
        #psf_fwhm = imghdr['FWHM']
        #psf_fwhm = im.seeing

        psf_fwhm = im.seeing / (im.pixscale * 3600)
        print('PSF FWHM', psf_fwhm, 'pixels')
        psf_sigma = psf_fwhm / 2.35
        psf = NCircularGaussianPSF([psf_sigma],[1.])

        print('img type', img.dtype)
        
        tim = Image(img, invvar=invvar, wcs=twcs, psf=psf,
                    photocal=LinearPhotoCal(zpscale, band=band),
                    sky=ConstantSky(0.), name=im.name + ' ' + band)
        tim.zr = [-3. * sig1, 10. * sig1]
        tim.sig1 = sig1
        tim.band = band
        tim.psf_fwhm = psf_fwhm
        tim.psf_sigma = psf_sigma
        tim.sip_wcs = wcs
        tim.x0,tim.y0 = int(x0),int(y0)
        tim.psfex = psfex
        tim.imobj = im
        mn,mx = tim.zr
        tim.ima = dict(interpolation='nearest', origin='lower', cmap='gray',
                       vmin=mn, vmax=mx)
        tims.append(tim)
        keepims.append(im)

    ims = keepims

    print('Computing resampling...')
    # save resampling params
    for tim in tims:
        wcs = tim.sip_wcs
        subh,subw = tim.shape
        subwcs = wcs.get_subimage(tim.x0, tim.y0, subw, subh)
        tim.subwcs = subwcs
        try:
            Yo,Xo,Yi,Xi,rims = resample_with_wcs(targetwcs, subwcs, [], 2)
        except OverlapError:
            print('No overlap')
            continue
        if len(Yo) == 0:
            continue
        tim.resamp = (Yo,Xo,Yi,Xi)

    print('Creating coadds...')
    # Produce per-band coadds, for plots
    coimgs = []
    cons = []
    for ib,band in enumerate(bands):
        coimg = np.zeros((H,W), np.float32)
        con   = np.zeros((H,W), np.uint8)
        for tim in tims:
            if tim.band != band:
                continue
            (Yo,Xo,Yi,Xi) = tim.resamp
            if len(Yo) == 0:
                continue
            nn = (tim.getInvvar()[Yi,Xi] > 0)
            coimg[Yo,Xo] += tim.getImage ()[Yi,Xi] * nn
            con  [Yo,Xo] += nn

            # print
            # print 'tim', tim.name
            # print 'number of resampled pix:', len(Yo)
            # reim = np.zeros_like(coimg)
            # ren  = np.zeros_like(coimg)
            # reim[Yo,Xo] = tim.getImage()[Yi,Xi] * nn
            # ren[Yo,Xo] = nn
            # print 'number of resampled pix with positive invvar:', ren.sum()
            # plt.clf()
            # plt.subplot(2,2,1)
            # mn,mx = [np.percentile(reim[ren>0], p) for p in [25,95]]
            # print 'Percentiles:', mn,mx
            # dimshow(reim, vmin=mn, vmax=mx)
            # plt.colorbar()
            # plt.subplot(2,2,2)
            # dimshow(con)
            # plt.colorbar()
            # plt.subplot(2,2,3)
            # dimshow(reim, vmin=tim.zr[0], vmax=tim.zr[1])
            # plt.colorbar()
            # plt.subplot(2,2,4)
            # plt.hist(reim.ravel(), 100, histtype='step', color='b')
            # plt.hist(tim.getImage().ravel(), 100, histtype='step', color='r')
            # plt.suptitle('%s: %s' % (band, tim.name))
            # ps.savefig()

        coimg /= np.maximum(con,1)
        coimgs.append(coimg)
        cons  .append(con)

    plt.clf()
    dimshow(get_rgb(coimgs, bands))
    ps.savefig()

    plt.clf()
    for i,b in enumerate(bands):
        plt.subplot(2,2,i+1)
        dimshow(cons[i], ticks=False)
        plt.title('%s band' % b)
        plt.colorbar()
    plt.suptitle('Number of exposures')
    ps.savefig()

    print('Grabbing SDSS sources...')
    bandlist = [b for b in bands]
    cat,T = get_sdss_sources(bandlist, targetwcs)
    # record coordinates in target brick image
    ok,T.tx,T.ty = targetwcs.radec2pixelxy(T.ra, T.dec)
    T.tx -= 1
    T.ty -= 1
    T.itx = np.clip(np.round(T.tx).astype(int), 0, W-1)
    T.ity = np.clip(np.round(T.ty).astype(int), 0, H-1)

    plt.clf()
    dimshow(get_rgb(coimgs, bands))
    ax = plt.axis()
    plt.plot(T.tx, T.ty, 'o', mec=green, mfc='none', ms=10, mew=1.5)
    plt.axis(ax)
    plt.title('SDSS sources')
    ps.savefig()

    print('Detmaps...')
    # Render the detection maps
    detmaps = dict([(b, np.zeros((H,W), np.float32)) for b in bands])
    detivs  = dict([(b, np.zeros((H,W), np.float32)) for b in bands])
    for tim in tims:
        iv = tim.getInvvar()
        psfnorm = 1./(2. * np.sqrt(np.pi) * tim.psf_sigma)
        detim = tim.getImage().copy()
        detim[iv == 0] = 0.
        detim = gaussian_filter(detim, tim.psf_sigma) / psfnorm**2
        detsig1 = tim.sig1 / psfnorm
        subh,subw = tim.shape
        detiv = np.zeros((subh,subw), np.float32) + (1. / detsig1**2)
        detiv[iv == 0] = 0.
        (Yo,Xo,Yi,Xi) = tim.resamp
        detmaps[tim.band][Yo,Xo] += detiv[Yi,Xi] * detim[Yi,Xi]
        detivs [tim.band][Yo,Xo] += detiv[Yi,Xi]

    rtn = dict()
    for k in ['T', 'coimgs', 'cons', 'detmaps', 'detivs',
              'targetrd', 'pixscale', 'targetwcs', 'W','H',
              'bands', 'tims', 'ps', 'brick', 'cat']:
        rtn[k] = locals()[k]
    return rtn

Example 129

Project: mtpy Source File: bayesian1d.py
def generate_input_file(edifilename, outputdir=None):

	eo = EDI.Edi()
	eo.readfile(edifilename)
	filebase = op.splitext(op.split(edifilename)[-1])[0]

	outfilename1 = '{0}_bayesian1d_z.in'.format(filebase)
	outfilename2 = '{0}_bayesian1d_zvar.in'.format(filebase)
	outdir = op.split(edifilename)[0]

	if outputdir is not None:
		try:
			if not op.isdir(outputdir):
				os.makedirs(outputdir)
				outdir = outputdir
		except:
			pass

	outfn1 = op.join(outdir,outfilename1)
	outfn2 = op.join(outdir,outfilename2)

	outfn1 = MTfh.make_unique_filename(outfn1)
	outfn2 = MTfh.make_unique_filename(outfn2)


	freqs = eo.freq

	z_array = eo.Z.z
	zerr_array = eo.Z.zerr

	if len(freqs) != len(z_array):
		raise MTex.MTpyError_edi_file('ERROR in Edi file {0} - number of '\
			'freqs different from length of Z array'.format(eo.filename))


	sorting = np.argsort(freqs)

	outstring1 = ''
	outstring2 = ''

	for idx in sorting:
		z = z_array[idx]
		zerr = zerr_array[idx]
		f = freqs[idx]
		outstring1 += '{0}\t'.format(f)
		outstring2 += '{0}\t'.format(f)
		for i in np.arange(2):
			for j in np.arange(2):
				if np.imag(z[i%2,(j+1)/2]) < 0 :
					z_string = '{0}-{1}i'.format(np.real(z[i%2,(j+1)/2]),
													 np.abs(np.imag(z[i%2,(j+1)/2])))
				else:
					z_string = '{0}+{1}i'.format(np.real(z[i%2,(j+1)/2]),
													 np.imag(z[i%2,(j+1)/2]))

				zerr_string = '{0}'.format(zerr[i%2,(j+1)/2])


				outstring1 += '{0}\t'.format(z_string)
				outstring2 += '{0}\t'.format(zerr_string)

		outstring1 = outstring1.rstrip() + '\n'
		outstring2 = outstring2.rstrip() + '\n'

	Fout1 = open(outfn1,'w')
	Fout2 = open(outfn2,'w')
	Fout1.write(outstring1.expandtabs(4))
	Fout2.write(outstring2.expandtabs(4))
	Fout1.close()
	Fout2.close()

	return outfn1,outfn2

Example 130

Project: yatsm Source File: test_cache.py
@xfail
def test_update_cache_file_add_obs(cachefile, example_cache,
                                   example_timeseries):
    """ Grab a subset of test data and see if we get more data back """
    stack_images = example_timeseries['images']
    stack_image_IDs = example_timeseries['image_IDs']

    # Presort and subset for comparison
    sort_idx = np.argsort(example_cache['image_IDs'])
    test_Y = example_cache['Y'][:, sort_idx, :]
    test_IDs = example_cache['image_IDs'][sort_idx]

    size_1 = 100
    size_2 = 200

    sort_idx = np.argsort(stack_image_IDs)[:size_2]
    stack_images = stack_images[sort_idx]
    stack_IDs = stack_image_IDs[sort_idx]

    # Create reduced dataset to add to
    np.savez_compressed('test.npz',
                        Y=test_Y[:, :size_1, :],
                        image_IDs=test_IDs[:size_1])

    # Write update and read back
    cache.update_cache_file(stack_images, stack_IDs,
                            'test.npz', 'test_new.npz',
                            0, io.gdal_reader)
    updated = np.load('test_new.npz')

    # Test and clean update
    np.testing.assert_equal(test_Y[:, :size_2, :], updated['Y'])
    np.testing.assert_equal(test_IDs[:size_2], updated['image_IDs'])

    os.remove('test.npz')
    os.remove('test_new.npz')

Example 131

Project: CMT Source File: CMT.py
Function: estimate
	def estimate(self, keypoints):

		center = array((nan, nan))
		scale_estimate = nan
		med_rot = nan

		# At least 2 keypoints are needed for scale
		if keypoints.size > 1:

			# Extract the keypoint classes
			keypoint_classes = keypoints[:, 2].squeeze().astype(np.int) 

			# Retain singular dimension
			if keypoint_classes.size == 1:
				keypoint_classes = keypoint_classes[None]

			# Sort
			ind_sort = argsort(keypoint_classes)
			keypoints = keypoints[ind_sort]
			keypoint_classes = keypoint_classes[ind_sort]

			# Get all combinations of keypoints
			all_combs = array([val for val in itertools.product(range(keypoints.shape[0]), repeat=2)])	

			# But exclude comparison with itself
			all_combs = all_combs[all_combs[:, 0] != all_combs[:, 1], :]

			# Measure distance between allcombs[0] and allcombs[1]
			ind1 = all_combs[:, 0] 
			ind2 = all_combs[:, 1]

			class_ind1 = keypoint_classes[ind1] - 1
			class_ind2 = keypoint_classes[ind2] - 1

			duplicate_classes = class_ind1 == class_ind2

			if not all(duplicate_classes):
				ind1 = ind1[~duplicate_classes]
				ind2 = ind2[~duplicate_classes]

				class_ind1 = class_ind1[~duplicate_classes]
				class_ind2 = class_ind2[~duplicate_classes]

				pts_allcombs0 = keypoints[ind1, :2]
				pts_allcombs1 = keypoints[ind2, :2]

				# This distance might be 0 for some combinations,
				# as it can happen that there is more than one keypoint at a single location
				dists = util.L2norm(pts_allcombs0 - pts_allcombs1)

				original_dists = self.squareform[class_ind1, class_ind2]

				scalechange = dists / original_dists

				# Compute angles
				angles = np.empty((pts_allcombs0.shape[0]))

				v = pts_allcombs1 - pts_allcombs0
				angles = np.arctan2(v[:, 1], v[:, 0])
				
				original_angles = self.angles[class_ind1, class_ind2]

				angle_diffs = angles - original_angles

				# Fix long way angles
				long_way_angles = np.abs(angle_diffs) > math.pi

				angle_diffs[long_way_angles] = angle_diffs[long_way_angles] - np.sign(angle_diffs[long_way_angles]) * 2 * math.pi

				scale_estimate = median(scalechange)
				if not self.estimate_scale:
					scale_estimate = 1;

				med_rot = median(angle_diffs)
				if not self.estimate_rotation:
					med_rot = 0;

				keypoint_class = keypoints[:, 2].astype(np.int)
				votes = keypoints[:, :2] - scale_estimate * (util.rotate(self.springs[keypoint_class - 1], med_rot))

				# Remember all votes including outliers
				self.votes = votes

				# Compute pairwise distance between votes
				pdist = scipy.spatial.distance.pdist(votes)

				# Compute linkage between pairwise distances
				linkage = scipy.cluster.hierarchy.linkage(pdist)

				# Perform hierarchical distance-based clustering
				T = scipy.cluster.hierarchy.fcluster(linkage, self.THR_OUTLIER, criterion='distance')

				# Count votes for each cluster
				cnt = np.bincount(T)  # Dummy 0 label remains
				
				# Get largest class
				Cmax = argmax(cnt)

				# Identify inliers (=members of largest class)
				inliers = T == Cmax
				# inliers = med_dists < THR_OUTLIER

				# Remember outliers
				self.outliers = keypoints[~inliers, :]

				# Stop tracking outliers
				keypoints = keypoints[inliers, :]

				# Remove outlier votes
				votes = votes[inliers, :]

				# Compute object center
				center = np.mean(votes, axis=0)

		return (center, scale_estimate, med_rot, keypoints)

Example 132

Project: tractor Source File: testblob2.py
def oldjunk():
    cutToPrimary = True
    if False:
        stars = [
            # David's nearby pairs of F stars
            (3900., 0., 118.37066, 52.527073),
            (3705., 0., 130.17654, 52.750081),
            # High stellar density
            #(0., 0., 270.0, 0.003),
            ]
        # Dustin's stars
        # (4472.001,	0.02514649,	246.47016,	19.066909),
        # (5196.53,   0.02490235, 240.09403,  37.404078),
        # (6179.05,   0.6324392,  310.47791,  57.523221),
        # (6021.875, 0.7000019, 150.52443, -0.478836),
        # (7757.096, 0.06507664, 305.11144, -12.957655),
        # (8088.685, 0.2436366, 253.11475, 11.60716),
        # (8395.096, 0.7563477, 188.34439, 63.442057),
        # (9201.74,  178, 93.971719, 0.56302169),
        # ]
    
        T = fits_table('stars2.fits')
        print('Read stars:')
        T.about()
        stars.extend(zip(T.teff, T.teff_sigma, T.ra, T.dec))

        # reformat
        stars = [(ra,dec,[('T_EFF',teff,'Effective temperature'),
                          ('DT_EFF',dteff,'Effective temperate error')],
                  cutToPrimary) for teff,dteff,ra,dec in stars]
        
    elif False:

        sdss = DR9(basedir=tempdir)
        sdss.useLocalTree()
        # near M87 / Virgo cluster
        run,camcol,field = 3836,2,258
        pofn = sdss.retrieve('photoObj', run, camcol, field)
        T = fits_table(pofn, columns=[
            'parent', 'objid', 'ra', 'dec', 'fracdev', 'objc_type', 'modelflux',
            'theta_dev', 'theta_deverr', 'ab_dev', 'ab_deverr', 'phi_dev_deg',
            'theta_exp', 'theta_experr', 'ab_exp', 'ab_experr', 'phi_exp_deg',
            'resolve_status', 'nchild', 'flags', 'objc_flags',
            'run','camcol','field','id'])
        print(len(T), 'objects')
        T.cut(T.objc_type == 3)
        print(len(T), 'galaxies')
        T.cut(T.nchild == 0)
        print(len(T), 'children')
        T.cut(np.argsort(-T.modelflux[:,2]))

        # keep only one child in each blend family
        parents = set()
        keepi = []
        for i in range(len(T)):
            if T.parent[i] in parents:
                continue
            keepi.append(i)
            parents.add(T.parent[i])
        T.cut(np.array(keepi))
        print(len(T), 'unique blend families')
        T = T[:25]

        stars = [(ra,dec,[],cutToPrimary) for ra,dec in zip(T.ra, T.dec)]

    else:
        # stars = [ (0.,0., 131.59054,  0.66408610),
        #           (0.,0., 147.34576,  0.51657783 ),
        #           ]
        
        for args in stars:
            ra,dec = stars[:2]
            try:
                fns = oneblob(*stars)
            except:
                import traceback
                traceback.print_exc()
                continue
            if plots:
                stamp_pattern = 'stamp-%%s-%.4f-%.4f.fits' % (ra, dec)
                bands = 'ugriz'
                fns = ['cat'] + [stamp_pattern % band for band in bands]
                for j,fn in enumerate(fns[1:]):
                    print('Filename', fn)
                    F = fitsio.FITS(fn)
                    n = len(F) / 2
                    print('n ext:', n)
                    cols = int(np.ceil(np.sqrt(n)))
                    rows = int(np.ceil(n / float(cols)))
                    plt.clf()
                    for i,ext in enumerate(range(0, len(F), 2)):
                        plt.subplot(rows, cols, i+1)
                        hdr = F[ext].read_header()
                        dimshow(F[ext].read(), ticks=False)
                        plt.title('RCF %i/%i/%i' % (hdr['RUN'], hdr['CAMCOL'], hdr['FIELD']))
                    plt.suptitle('%s band' % bands[j])
                    plt.savefig(fn.replace('.fits','.png'))
                    F.close()
                    del F

Example 133

Project: meterstick Source File: metrics.py
Function: init
  def __init__(self, variable, quantile, name=None):
    """Initializes quantile estimator.

    Args:
      variable: A string representing the variable to _calculate the quantile.
      quantile: The quantile to be _calculated (range is [0,1]).
      name: A string for the column name of results.
    """

    def _calculate(data, weights):
      """Calculates the quantile for a weighted array.

      Args:
        data: A pandas dataframe
        weights: A numpy array of weights.

      Returns:
        The weighted quantile of data[variable].
      """

      values = data[variable].values

      indices = np.argsort(values)

      if quantile > 0.5:
        # Because we're interating through the indices we should
        # choose the presumably quicker side to start.
        local_quantile = 1 - quantile
        previous_ii = indices[-1]
        indices = reversed(indices)
      else:
        local_quantile = quantile
        previous_ii = indices[0]

      threshold = weights.sum() * local_quantile
      previous_accuemulated_weight = 0

      # We follow the usual convention for the median: let
      # lower=floor(threshold) and upper = ceil(threshold). If
      # lower==upper we return values[lower] otherwise we return the
      # average of values[lower] and values[upper].

      for ii in indices:
        if weights[ii] > 0:
          if previous_accuemulated_weight > threshold:
            # The previous element passed the threshold itself and thus
            # values[lower]==values[upper].
            return values[previous_ii]
          elif previous_accuemulated_weight == threshold:
            return (values[ii] + values[previous_ii]) / 2
          else:
            previous_ii = ii
            previous_accuemulated_weight += weights[ii]

    if name is None:
      name = "quantile({}, {:.2f})".format(variable, quantile)

    super(Quantile, self).__init__(name, _calculate, "scalar")

Example 134

Project: pyvision Source File: fast_util.py
Function: call
    def __call__(self, mat, threshold = None, sort_results = True):
        '''
        All any local maximum that are greater than threshhold up to a total of 
        max_length.
        
        To save time arrays that hold the maxes and vals that are created 
        once and reused for each call.  This means that local maximum detection
        is not thread safe. If using this class with threads create an instance
        for each thread.
        
        @param mat: 2d Real Matrix input.
        @param threshold: Mininum value of local maxima.
        @param sort_results: set to False to save time and return an unorderd list.
        
        @returns: maxes,vals
        '''
        maxes = self.maxes
        vals = self.vals
        r,c = mat.shape
        max_length = self.max_length
        
        if threshold != None:
            count = weave.inline(
                '''  
                int count = 0;
                
                for( int i = 1; i < r-1 ; i++){
                    for(int j = 1; j < c-1 ; j++){
                        // Check if the current location meets the threshold
                        
                        if (mat(i,j) > threshold    &&
                            mat(i,j) > mat(i,j-1)   &&
                            mat(i,j) > mat(i,j+1)   &&
                            mat(i,j) > mat(i-1,j-1) &&
                            mat(i,j) > mat(i-1,j)   &&
                            mat(i,j) > mat(i-1,j+1) &&
                            mat(i,j) > mat(i+1,j-1) &&
                            mat(i,j) > mat(i+1,j)   &&
                            mat(i,j) > mat(i+1,j+1)){
                        
                            // This is a local max
                            maxes(count,0) = i;
                            maxes(count,1) = j;
                            vals(count) = mat(i,j);
                            count += 1;
                            
                            if(count == max_length){
                                i = r;
                                j = c;
                            }
                        }   
                    }
                }
    
                return_val = count;
                ''',
                arg_names=['mat','maxes','vals','max_length','threshold','r','c'],
                type_converters=weave.converters.blitz,
            )
        else:
            count = weave.inline(
                '''  
                int count = 0;
                
                for( int i = 1; i < r-1 ; i++){
                    for(int j = 1; j < c-1 ; j++){
                        // Check if the current location meets the threshold
                        
                        if (mat(i,j) > mat(i,j-1)   &&
                            mat(i,j) > mat(i,j+1)   &&
                            mat(i,j) > mat(i-1,j-1) &&
                            mat(i,j) > mat(i-1,j)   &&
                            mat(i,j) > mat(i-1,j+1) &&
                            mat(i,j) > mat(i+1,j-1) &&
                            mat(i,j) > mat(i+1,j)   &&
                            mat(i,j) > mat(i+1,j+1)){
                        
                            // This is a local max
                            maxes(count,0) = i;
                            maxes(count,1) = j;
                            vals(count) = mat(i,j);
                            count += 1;
                            
                            if(count == max_length){
                                i = r;
                                j = c;
                            }
                        }   
                    }
                }
    
                return_val = count;
                ''',
                arg_names=['mat','maxes','vals','max_length','r','c'],
                type_converters=weave.converters.blitz,
            )
        
        if sort_results == False:
            return maxes[:count,:].copy(),vals[:count].copy()
        
        order = np.argsort(vals[:count])[::-1]
        maxes = maxes[order]
        vals = vals[order]
        
        #print vals
        #print maxes
        
        return maxes,vals

Example 135

Project: hyperion Source File: filter.py
    def to_hdf5_group(self, group, name):

        self.check_all_set()

        # Get spectral coordinate in Hz and transmision in fractional terms
        nu = self.spectral_coord.to(u.Hz, equivalencies=u.spectral()).value
        tr = self.transmission.to(u.one).value

        # Sort in order of increasing Hz
        order = np.argsort(nu)
        nu = nu[order]
        tr = tr[order]

        # Get other parameters for the normalization
        nu0 = self.central_spectral_coord.to(u.Hz, equivalencies=u.spectral()).value
        alpha = self.alpha
        beta = self._beta

        # Here we normalize the filter before passing it to Hyperion
        tr_norm = (tr / nu ** (1 + beta)
                   / nu0 ** alpha
                   / integrate(nu, tr / nu ** (1. + alpha + beta)))

        # Now multiply by nu so that Hyperion returns nu * Fnu
        tr_norm *= nu

        dset = group.create_dataset(name, data=np.array(list(zip(nu, tr, tr_norm)),
                                                        dtype=[('nu', float),
                                                               ('tr', float),
                                                               ('tn', float)]))

        dset.attrs['name'] = np.string_(self.name)

        dset.attrs['alpha'] = self.alpha
        dset.attrs['beta'] = self._beta
        dset.attrs['nu0'] = self.central_spectral_coord.to(u.Hz, equivalencies=u.spectral()).value

Example 136

Project: tractor Source File: profile-stats.py
Function: main
def main():
    import optparse
    parser = optparse.OptionParser('%prog: [opts] <profile>...')
    parser.add_option('--merge', action='store_true', help='Merge input files into one big profile')
    opt,args = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        sys.exit(0)


    if opt.merge:
        p = pstats.Stats(args[0])
        #p.add(*args[1:])
        for fn in args[1:]:
            p.add(fn)
        P = [p]
    else:
        P = [pstats.Stats(fn) for fn in args]
    
    for p in P:
        print()
        print('Cumulative time')
        print()
        #p = pstats.Stats(fn)
        p = p.strip_dirs()
        p.sort_stats('cuemulative').print_stats(100)

        print()
        print('Callees ordered by cuemulative time:')
        print()
        p.print_callees(40)

        print()
        print('Time')
        print()

        #p = pstats.Stats(fn)
        p.sort_stats('time').print_stats(40)
        p.print_callees()


        width,lst = p.get_print_list([40])
        #print 'lst', lst
        if lst:
            p.calc_callees()
            name_size = width
            arrow = '->'
            print('lst length:', len(lst))
            for func in lst:
                #print 'func', func
                if func in p.all_callees:
                    p.print_call_heading(width, "called...")
                    print(pstats.func_std_string(func).ljust(name_size) + arrow, end=' ')
                    print()
                    #p.print_call_line(width, func, p.all_callees[func])
                    cc = p.all_callees[func]
                    #print 'Callees:', cc
                    TT = []
                    CT = []
                    for func,value in cc.items():
                        #print 'func,value', func, value
                        if isinstance(value, tuple):
                            nc, ccx, tt, ct = value
                            TT.append(tt)
                            CT.append(ct)
                            #print '  ', func, ct, tt
                        else:
                            print('NON-TUPLE', value)

                    I = np.argsort(CT)
                    FV = list(cc.items())
                    for i in reversed(I[-40:]):
                        func,value = FV[i]
                        name = pstats.func_std_string(func)
                        if isinstance(value, tuple):
                            nc, ccx, tt, ct = value
                            if nc != ccx:
                                substats = '%d/%d' % (nc, ccx)
                            else:
                                substats = '%d' % (nc,)
                            substats = '%-20s %s %s  %s' % (substats, pstats.f8(tt), pstats.f8(ct), name)
                        print('   ' + substats)

                else:
                    p.print_call_line(width, func, {})
                print()
                print()

Example 137

Project: hyperopt-nnet Source File: pylearn_pca.py
def pca_from_cov(cov, lower=0, max_components=None, max_energy_fraction=None):
    """Return (eigvals, eigvecs) of data with covariance `cov`.

    The returned eigvals will be a np ndarray vector.
    The returned eigvecs will be a np ndarray matrix whose *cols* are the eigenvectors.

    This is recommended for retrieving many components from high-dimensional data.

    :param cov: data covariance matrix
    :type cov: a np ndarray 

    :returns: (eigvals, eigvecs) of decomposition
    """

    w, v = scipy.linalg.eigh(a=cov, lower=lower)
    # definition of eigh
    #  a * v[:,i] = w[i] * vr[:,i]
    #  v.H * v = identity


    # total variance (vartot) can be computed at this point:
    vartot = w.sum()

    # sort the eigenvals and vecs by decreasing magnitude
    a = np.argsort(w)[::-1]
    w = w[a]
    v = v[:,a]

    if max_components != None:
        w = w[:max_components]
        v = v[:, :max_components]

    if max_energy_fraction != None:
        if not (0.0 <= max_energy_fraction <= 1.0):
            raise ValueError('illegal value for max_energy_fraction', max_energy_fraction)
        if max_energy_fraction < 1.0:
            energy = 0
            i = 0
            while (energy < max_energy_fraction * vartot) and (i < len(w)):
                energy += w[i]
                i += 1
            w = w[:(i-1)]
            v = v[:,:(i-1)]
    return w,v

Example 138

Project: DeBaCl Source File: utils.py
def knn_graph(X, k, method='brute_force', leaf_size=30):
    """
    Compute the symmetric k-nearest neighbor graph for a set of points. Assume
    a Euclidean distance metric.

    Parameters
    ----------
    X : numpy array | list [numpy arrays]
        Data points, with each row as an observation.

    k : int
        The number of points to consider as neighbors of any given observation.

    method : {'brute-force', 'kd-tree', 'ball-tree'}, optional
        Computing method.

        - 'brute-force': computes the (Euclidean) distance between all O(n^2)
          pairs of rows in 'X', then for every point finds the k-nearest. It is
          limited to tens of thousands of observations (depending on available
          RAM).

        - 'kd-tree': partitions the data into axis-aligned rectangles to avoid
          computing all O(n^2) pairwise distances. Much faster than
          'brute-force', but only works for data in fewer than about 20
          dimensions. Requires the scikit-learn library.

        - 'ball-tree': partitions the data into balls and uses the metric
          property of euclidean distance to avoid computing all O(n^2)
          distances. Typically much faster than 'brute-force', and works with
          up to a few hundred dimensions. Requires the scikit-learn library.

    leaf_size : int, optional
        For the 'kd-tree' and 'ball-tree' methods, the number of observations
        in the leaf nodes. Leaves are not split further, so distance
        computations within leaf nodes are done by brute force. 'leaf_size' is
        ignored for the 'brute-force' method.

    Returns
    -------
    neighbors : numpy array
        Each row contains the nearest neighbors of the corresponding row in
        'X', indicated by row indices.

    radii : list[float]
        For each row of 'X' the distance to its k'th nearest neighbor
        (including itself).

    See Also
    --------
    epsilon_graph

    Examples
    --------
    >>> X = numpy.random.rand(100, 2)
    >>> knn, radii = debacl.utils.knn_graph(X, k=8, method='kd-tree')
    """

    n, p = X.shape

    if method == 'kd_tree':
        if _HAS_SKLEARN:
            kdtree = _sknbr.KDTree(X, leaf_size=leaf_size, metric='euclidean')
            distances, neighbors = kdtree.query(X, k=k, return_distance=True,
                                                sort_results=True)
            radii = distances[:, -1]
        else:
            raise ImportError("The scikit-learn library could not be loaded." +
                              " It is required for the 'kd-tree' method.")

    if method == 'ball_tree':
        if _HAS_SKLEARN:
            btree = _sknbr.BallTree(X, leaf_size=leaf_size, metric='euclidean')
            distances, neighbors = btree.query(X, k=k, return_distance=True,
                                               sort_results=True)
            radii = distances[:, -1]
        else:
            raise ImportError("The scikit-learn library could not be loaded." +
                              " It is required for the 'ball-tree' method.")

    else:  # assume brute-force
        if not _HAS_SCIPY:
            raise ImportError("The 'scipy' module could not be loaded. " +
                              "It is required for the 'brute_force' method " +
                              "for building a knn similarity graph.")

        d = _spd.pdist(X, metric='euclidean')
        D = _spd.squareform(d)
        rank = _np.argsort(D, axis=1)
        neighbors = rank[:, 0:k]

        k_nbr = neighbors[:, -1]
        radii = D[_np.arange(n), k_nbr]

    return neighbors, radii

Example 139

Project: hyphae_ani Source File: hyphae_mergetest2.py
  def step(self):

    self.itt += 1
    num = self.num

    try:

      k = self.DQ.pop()

    except IndexError:

      ## no more live nodes.
      return False, False

    self.C[k] += 1

    if self.C[k]>CK_MAX:

      ## node is dead

      ## this is inefficient. but it does not matter for small canvases
      #self.ctx.set_source_rgb(*CONTRASTC)
      #self.circle(self.X[k],self.Y[k],ONE*4)

      return True, False

    #r = RAD + random()*ONE*R_RAND_SIZE
    r = self.R[k]*RAD_SCALE if self.D[k]>-1 else self.R[k]
    b = num if self.D[k]>-1 else self.B[k]

    if r<ONE:

      ## node dies

      self.ctx.set_source_rgb(*CONTRASTC)
      self.circle(self.X[k],self.Y[k],ONE*4)

      self.C[k] = CK_MAX+1
      return True, False

    ge = self.GE[k]+1 if self.D[k]>-1 else self.GE[k]

    angle = normal()*SEARCH_ANGLE_MAX
    the = self.THE[k] + (1.-1./((ge+1)**SEARCH_ANGLE_EXP))*angle

    x = self.X[k] + sin(the)*r
    y = self.Y[k] + cos(the)*r

    ## stop nodes at edge of canvas
    if x>X_MAX or x<X_MIN or y>Y_MAX or y<Y_MIN:

      ## node is outside canvas
      return True, False

    ## stop nodes at edge of circle
    ## remember to set initial node inside circle.
    #circle_rad = sqrt(square(x-0.5)+square(y-0.5))
    #if circle_rad>CIRCLE_RADIUS:

      ### node is outside circle
      #return True,False
    
    try:

      inds = near_zone_inds(x,y,self.Z,k)
    except IndexError:

      ## node is outside zonemapped area
      self.DQ.appendleft(k)
      return True, False

    good = True
    if len(inds)>0:
      dd = square(self.X[inds]-x) + square(self.Y[inds]-y)

      sqrt(dd,dd)
      mask = dd*2 >= self.R[inds]+r
      good = mask.all()
      
    if good: 
      self.X[num] = x
      self.Y[num] = y
      self.R[num] = r
      self.THE[num] = the
      self.P[num] = k
      self.GE[num] = ge
      self.B[num] = b

      ## set first descendant if node has no descendants
      if self.D[k]<0:
        self.D[k] = num

      z = get_z(x,y) 

      self.Z[z].append(num)

      #self.ctx.set_line_width(ONE*2)
      #self.ctx.set_source_rgb(*FRONT)
      #self.line(self.X[k],self.Y[k],x,y)

      #self.ctx.set_source_rgb(*CONTRASTB)
      #self.circle(x,y,ONE*4)

      #self.ctx.set_line_width(ONE)
      #self.ctx.set_source_rgb(*CONTRASTA)
      #self.circle_stroke(x,y,r*0.5)

      self.ctx.set_source_rgb(*FRONT)
      self.circles(self.X[k],self.Y[k],x,y,r*0.2)

      #self.ctx.set_source_rgb(*CONTRASTB)
      #self.circle_stroke(x,y,r*0.5)

      self.DQ.appendleft(num)
      self.DQ.appendleft(k)

      self.num += 1

      ## node was added
      return True, True

    if not good and len(inds)>0:

      ## can we merge two branches?

      if mask.sum()>1:

        ms = np.argsort(dd)
        mks = ms[:2]
        mk = inds[mks[0]]

        #if 2*dd[mks[0]]<dd[mks[1]] and self.P[mk]!=k and self.C[mk]<CK_MAX:
        if 2*dd[mks[0]]<dd[mks[1]] and self.P[mk]!=k and self.C[mk]<CK_MAX:

          dx = self.X[mk] - self.X[k]
          dy = self.Y[mk] - self.Y[k]
          dd = sqrt(dx*dx+dy*dy)
          beta = arctan2(dy,dx)

          x = self.X[k] + cos(beta)*dd*0.5
          y = self.Y[k] + sin(beta)*dd*0.5

          self.X[num] = x
          self.Y[num] = y
          self.R[num] = r
          self.THE[num] = beta
          self.P[num] = k
          self.GE[num] = ge

          self.C[num] = CK_MAX+1

          ## set first descendant if node has no descendants
          if self.D[k]<0:
            self.D[k] = num

          z = get_z(x,y) 
          self.Z[z].append(num)

          #self.DQ.appendleft(num)
          self.DQ.appendleft(k)

          self.num += 1

          self.ctx.set_source_rgb(1,0,0)
          self.circle(x,y,r*0.1)

          #self.ctx.set_source_rgb(*CONTRASTB)
          #self.circles(self.X[k],self.Y[k],self.X[mk],self.Y[mk],r*0.2)

          ##self.C[k] = CK_MAX+1
          #self.C[mks[0]] = CK_MAX+1
          
          return True, True

    ## failed to place node
    self.DQ.appendleft(k)
    return True, False

Example 140

Project: chaco Source File: multi_line_plot.py
    def _gather_points(self):
        """
        Collects the data points that are within the bounds of the plot and
        caches them.
        """

        if self._cache_valid:
            return

        if not self.index or not self.value:
            return

        index = self.index.get_data()
        varray = self._trace_data

        if varray.size == 0:
            self._cached_data_pts = []
            self._cached_valid = True
            return

        coordinates = self.yindex.get_data()

        if self.fast_clip:
            coord_min = float(coordinates[0])
            coord_max = coordinates[-1]
            slice_min = max(0,ceil((varray.shape[0]-1)*(self.value_range.low - coord_min)/(coord_max - coord_min)))
            slice_max = min(varray.shape[0], 1+floor((varray.shape[0]-1)*(self.value_range.high - coord_min)/(coord_max - coord_min)))
            varray = varray[slice_min:slice_max]
            # FIXME: The y coordinates must also be sliced to match varray.

        # Check to see if the data is completely outside the view region.
        outside = False
        # Check x coordinates.
        low, high = self.index.get_bounds()
        if low > self.index_range.high or high < self.index_range.low:
            outside = True

        # Check y coordinates. Use varray because it is nased on the yindex,
        # but has been shifted up or down depending on the values.
        ylow, yhigh = varray.min(), varray.max()
        if ylow > self.value_range.high or yhigh < self.value_range.low:
            outside = True

        if outside:
            self._cached_data_pts = []
            self._cached_valid = True
            return

        if len(index) == 0 or varray.shape[0] == 0 or varray.shape[1] == 0 \
                or len(index) != varray.shape[1]:
            self._cached_data_pts = []
            self._cache_valid = True
            return

        size_diff = varray.shape[1] - len(index)
        if size_diff > 0:
            warnings.warn('Chaco.LinePlot: value.shape[1] %d - len(index) %d = %d\n' \
                          % (varray.shape[1], len(index), size_diff))
            index_max = len(index)
            varray = varray[:,:index_max]
        else:
            index_max = varray.shape[1]
            index = index[:index_max]

        # Split the index and value raw data into non-NaN chunks.
        # nan_mask is a boolean M by N array.
        nan_mask = invert(isnan(varray)) & invert(isnan(index))
        blocks_list = []
        for nm in nan_mask:
            blocks = [b for b in arg_find_runs(nm, "flat") if nm[b[0]] != 0]
            blocks_list.append(blocks)

        line_points = []
        for k, blocks in enumerate(blocks_list):
            points = []
            for block in blocks:
                start, end = block
                block_index = index[start:end]
                block_value = varray[k, start:end]
                index_mask = self.index_mapper.range.mask_data(block_index)

                runs = [r for r in arg_find_runs(index_mask, "flat") \
                        if index_mask[r[0]] != 0]

                # Check to see if our data view region is between two points in the
                # index data.  If so, then we have to reverse map our current view
                # into the appropriate index and draw the bracketing points.
                if runs == []:
                    data_pt = self.map_data((self.x_mapper.low_pos, self.y_mapper.low_pos))
                    if self.index.sort_order == "none":
                        indices = argsort(index)
                        sorted_index = take(index, indices)
                        sorted_value = take(varray[k], indices)
                        sort = 1
                    else:
                        sorted_index = index
                        sorted_value = varray[k]
                        if self.index.sort_order == "ascending":
                            sort = 1
                        else:
                            sort = -1
                    ndx = bin_search(sorted_index, data_pt, sort)
                    if ndx == -1:
                        # bin_search can return -1 if data_pt is outside the bounds
                        # of the source data
                        continue


                    z = transpose(array((sorted_index[ndx:ndx+2],
                                                   sorted_value[ndx:ndx+2])))
                    points.append(z)

                else:
                    # Expand the width of every group of points so we draw the lines
                    # up to their next point, outside the plot area
                    data_end = len(index_mask)
                    for run in runs:
                        start, end = run
                        if start != 0:
                            start -= 1
                        if end != data_end:
                            end += 1

                        run_data = transpose(array((block_index[start:end],
                                                    block_value[start:end])))
                        points.append(run_data)
            line_points.append(points)

        self._cached_data_pts = line_points
        self._cache_valid = True
        return

Example 141

Project: DeepSurv Source File: deep_surv.py
    def train(self,
    train_data, valid_data= None,
    n_epochs = 500,
    validation_frequency = 10,
    patience = 1000, improvement_threshold = 0.99999, patience_increase = 2,
    verbose = True,
    update_fn = lasagne.updates.nesterov_momentum,
    **kwargs):
        """
        Trains a DeepSurv network on the provided training data and evalutes
            it on the validation data.

        Parameters:
            train_data: dictionary with the following keys:
                'x' : (n,d) array of observations (dtype = float32).
                't' : (n) array of observed time events (dtype = float32).
                'e' : (n) array of observed time indicators (dtype = int32).
            valid_data: optional. A dictionary with the following keys:
                'x' : (n,d) array of observations.
                't' : (n) array of observed time events.
                'e' : (n) array of observed time indicators.
            standardize: True or False. Set the offset and scale of
                standardization layey to the mean and standard deviation of the
                training data.
            n_epochs: integer for the maximum number of epochs the network will
                train for.
            validation_frequency: how often the network computes the validation
                metrics. Decreasing validation_frequency increases training speed.
            patience: minimum number of epochs to train for. Once patience is
                reached, looks at validation improvement to increase patience or
                early stop.
            improvement_threshold: percentage of improvement needed to increase
                patience.
            patience_increase: multiplier to patience if threshold is reached.
            verbose: True or False. Print additionally messages to stdout.
            update_fn: lasagne update function for training.
                Default: lasagne.updates.nesterov_momentum
            **kwargs: additional parameters to provide _get_train_valid_fn.
                Parameters used to provide configurations to update_fn.

        Returns:
            metrics: a dictionary of training metrics that include:
                'train': a list of loss values for each training epoch
                'train_ci': a list of C-indices for each training epoch
                'best_params': a list of numpy arrays containing the parameters
                    when the network had the best validation loss
                'best_params_idx': the epoch at which best_params was found
            If valid_data is provided, the metrics also contain:
                'valid': a list of validation loss values for each validation frequency
                'valid_ci': a list of validation C-indiices for each validation frequency
                'best_validation_loss': the best validation loss found during training
                'best_valid_ci': the max validation C-index found during training
        """
        if verbose:
            print '[INFO] Training CoxMLP'

        train_loss = []
        train_ci = []
        x_train, e_train, t_train = train_data['x'], train_data['e'], train_data['t']

        # Sort Training Data for Accurate Likelihood
        sort_idx = numpy.argsort(t_train)[::-1]
        x_train = x_train[sort_idx]
        e_train = e_train[sort_idx]
        t_train = t_train[sort_idx]

        # Set Standardization layer offset and scale to training data mean and std
        if self.standardize:
            self.offset = x_train.mean(axis = 0)
            self.scale = x_train.std(axis = 0)

        if valid_data:
            valid_loss = []
            valid_ci = []
            x_valid, e_valid, t_valid = valid_data['x'], valid_data['e'], valid_data['t']

            # Sort Validation Data
            sort_idx = numpy.argsort(t_valid)[::-1]
            x_valid = x_valid[sort_idx]
            e_valid = e_valid[sort_idx]
            t_valid = t_valid[sort_idx]

        # Initialize Metrics
        best_validation_loss = numpy.inf
        best_params = None
        best_params_idx = -1

        # Initialize Training Parameters
        lr = theano.shared(numpy.array(self.learning_rate,
                                    dtype = numpy.float32))
        momentum = numpy.array(0, dtype= numpy.float32)

        train_fn, valid_fn = self._get_train_valid_fn(
            L1_reg=self.L1_reg, L2_reg=self.L2_reg,
            learning_rate=lr,
            momentum = momentum,
            update_fn = update_fn, **kwargs
        )

        start = time.time()
        for epoch in xrange(n_epochs):
            # Power-Learning Rate Decay
            lr = self.learning_rate / (1 + epoch * self.lr_decay)

            if self.momentum and epoch >= 10:
                momentum = self.momentum

            loss = train_fn(x_train, e_train)
            train_loss.append(loss)

            ci_train = self.get_concordance_index(
                x_train,
                t_train,
                e_train,
            )
            train_ci.append(ci_train)

            if valid_data and (epoch % validation_frequency == 0):
                validation_loss = valid_fn(x_valid, e_valid)
                valid_loss.append(validation_loss)

                ci_valid = self.get_concordance_index(
                    x_valid,
                    t_valid,
                    e_valid
                )
                valid_ci.append(ci_valid)

                if validation_loss < best_validation_loss:
                    # improve patience if loss improves enough
                    if validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, epoch * patience_increase)

                    best_params = [param.copy().eval() for param in self.params]
                    best_params_idx = epoch
                    best_validation_loss = validation_loss

            if patience <= epoch:
                break

        if verbose:
            print('Finished Training with %d iterations in %0.2fs' % (
                epoch + 1, time.time() - start
            ))

        metrics = {
            'train': train_loss,
            'best_params': best_params,
            'best_params_idx' : best_params_idx,
            'train_ci' : train_ci
        }
        if valid_data:
            metrics.update({
                'valid' : valid_loss,
                'valid_ci': valid_ci,
                'best_valid_ci': max(valid_ci),
                'best_validation_loss':best_validation_loss
            })

        return metrics

Example 142

Project: ggplot Source File: stat_smooth.py
Function: plot
    def plot(self, ax, data, _aes):
        (data, _aes) = self._update_data(data, _aes)
        variables = _aes.data
        x = data[variables['x']]
        y = data[variables['y']]

        params = {'alpha': 0.2}

        se = self.params.get('se', True)
        method = self.params.get('method', 'lm')
        level = self.params.get('level', 0.95)
        window = self.params.get('window', None)
        span = self.params.get('span', 2/3.)

        if method == "lm":
            x, y, y1, y2 = smoothers.lm(x, y, 1-level)
        elif method == "ma":
            x, y, y1, y2 = smoothers.mavg(x, y, window=window)
        else:
            x, y, y1, y2 = smoothers.lowess(x, y, span=span)

        smoothed_data = pd.DataFrame(dict(x=x, y=y, y1=y1, y2=y2))
        smoothed_data = smoothed_data.sort('x')

        params = self._get_plot_args(data, _aes)
        if 'alpha' not in params:
            params['alpha'] = 0.2

        order = np.argsort(x)
        if self.params.get('se', True)==True:
            if is_date(smoothed_data.x.iloc[0]):
                dtype = smoothed_data.x.iloc[0].__class__
                x = np.array([i.toordinal() for i in smoothed_data.x])
                ax.fill_between(x, smoothed_data.y1, smoothed_data.y2, **params)
                new_ticks = [dtype(i) for i in ax.get_xticks()]
                ax.set_xticklabels(new_ticks)
            else:
                ax.fill_between(smoothed_data.x, smoothed_data.y1, smoothed_data.y2, **params)
        if self.params.get('fit', True)==True:
            del params['alpha']
            ax.plot(smoothed_data.x, smoothed_data.y, **params)

Example 143

Project: imutils Source File: object_detection.py
def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
	# if there are no boxes, return an empty list
	if len(boxes) == 0:
		return []

	# if the bounding boxes are integers, convert them to floats -- this
	# is important since we'll be doing a bunch of divisions
	if boxes.dtype.kind == "i":
		boxes = boxes.astype("float")

	# initialize the list of picked indexes
	pick = []

	# grab the coordinates of the bounding boxes
	x1 = boxes[:, 0]
	y1 = boxes[:, 1]
	x2 = boxes[:, 2]
	y2 = boxes[:, 3]

	# compute the area of the bounding boxes and grab the indexes to sort
	# (in the case that no probabilities are provided, simply sort on the
	# bottom-left y-coordinate)
	area = (x2 - x1 + 1) * (y2 - y1 + 1)
	idxs = y2

	# if probabilities are provided, sort on them instead
	if probs is not None:
		idxs = probs

	# sort the indexes
	idxs = np.argsort(idxs)

	# keep looping while some indexes still remain in the indexes list
	while len(idxs) > 0:
		# grab the last index in the indexes list and add the index value
		# to the list of picked indexes
		last = len(idxs) - 1
		i = idxs[last]
		pick.append(i)

		# find the largest (x, y) coordinates for the start of the bounding
		# box and the smallest (x, y) coordinates for the end of the bounding
		# box
		xx1 = np.maximum(x1[i], x1[idxs[:last]])
		yy1 = np.maximum(y1[i], y1[idxs[:last]])
		xx2 = np.minimum(x2[i], x2[idxs[:last]])
		yy2 = np.minimum(y2[i], y2[idxs[:last]])

		# compute the width and height of the bounding box
		w = np.maximum(0, xx2 - xx1 + 1)
		h = np.maximum(0, yy2 - yy1 + 1)

		# compute the ratio of overlap
		overlap = (w * h) / area[idxs[:last]]

		# delete all indexes from the index list that have overlap greater
		# than the provided overlap threshold
		idxs = np.delete(idxs, np.concatenate(([last],
			np.where(overlap > overlapThresh)[0])))

	# return only the bounding boxes that were picked
	return boxes[pick].astype("int")

Example 144

Project: pygsp Source File: graph.py
    def compute_fourier_basis(self, smallest_first=True, force_recompute=False, **kwargs):
        r"""
        Compute the fourier basis of the graph.

        Parameters
        ----------
        smallest_first: bool
            Define the order of the eigenvalues.
            Default is smallest first (True).
        force_recompute: bool
            Force to recompute the Fourier basis if already existing.

        Note
        ----
        'G.compute_fourier_basis()' computes a full eigendecomposition of
        the graph Laplacian G.L:

        .. L = U Lambda U*

        .. math:: {\cal L} = U \Lambda U^*

        where $\Lambda$ is a diagonal matrix of the Laplacian eigenvalues.

        *G.e* is a column vector of length *G.N* containing the Laplacian
        eigenvalues. The largest eigenvalue is stored in *G.lmax*.
        The eigenvectors are stored as column vectors of *G.U* in the same
        order that the eigenvalues. Finally, the coherence of the
        Fourier basis is in *G.mu*.

        Example
        -------
        >>> from pygsp import graphs
        >>> N = 50
        >>> G = graphs.Sensor(N)
        >>> G.compute_fourier_basis()

        References
        ----------
        cite ´chung1997spectral´

        """
        if hasattr(self, 'e') or hasattr(self, 'U'):
            if force_recompute:
                self.logger.warning("This graph already has a Fourier basis. Recomputing.")
            else:
                self.logger.error("This graph already has a Fourier basis. Stopping.")
                return

        if self.N > 3000:
            self.logger.warning("Performing full eigendecomposition of a large "
                           "matrix may take some time.")

        if not hasattr(self, 'L'):
            raise AttributeError("Graph Laplacian is missing")

        eigenvectors, eigenvalues, _ = sp.linalg.svd(self.L.todense())

        inds = np.argsort(eigenvalues)
        if not smallest_first:
            inds = inds[::-1]

        self.e = np.sort(eigenvalues)
        self.lmax = np.max(self.e)
        self.U = eigenvectors[:, inds]
        self.mu = np.max(np.abs(self.U))

Example 145

Project: refinery Source File: PrintTopics.py
def main():
  parser = argparse.ArgumentParser()  
  parser.add_argument('dataName', type=str,
        help='name of python script that produces data to analyze.')
  parser.add_argument('allocModelName', type=str,
        help='name of allocation model. {MixModel, DPMixModel}')
  parser.add_argument('obsModelName', type=str,
        help='name of observation model. {Gauss, ZMGauss}')
  parser.add_argument('algName', type=str,
        help='name of learning algorithms to consider, {EM, VB, moVB, soVB}.')
  parser.add_argument('--jobname', type=str, default='defaultjob',
        help='name of experiment whose results should be plotted')
  parser.add_argument('--topW', type=int, default=10,
        help='the number of top words printed for a given topic')        
  parser.add_argument('--taskids', type=str, default=None,
        help="int ids for tasks (individual runs) of the given job to plot." + \
              'Ex: "1" or "3" or "1,2,3" or "1-6"')
  parser.add_argument('--savefilename', type=str, default=None,
        help="absolute path to directory to save figure")
  parser.add_argument('--iterid', type=int, default=None)
  args = parser.parse_args()


  rootpath = os.path.join(os.environ['BNPYOUTDIR'], args.dataName, \
                              args.allocModelName, args.obsModelName)
  jobpath = os.path.join(rootpath, args.algName, args.jobname)

  if not os.path.exists(jobpath):
    raise ValueError("No such path: %s" % (jobpath))
  
  taskids = PlotELBO.parse_task_ids(jobpath, args.taskids)

  Data = loadData(jobpath)

  if args.savefilename is not None and len(taskids) > 0:
    try:
      args.savefilename % ('1')
    except TypeError:
      raise ValueError("Missing or bad format string in savefilename %s" %  
                        (args.savefilename)
                      )
     
  for taskid in taskids:
    taskpath = os.path.join(jobpath, taskid)
    if args.iterid is None:
      prefix = "Best"
    else:
      prefix = "Iter%05d" % (args.iterid)
    hmodel = bnpy.ioutil.ModelReader.load_model(taskpath, prefix)
    # Print top words across all topics
    learnedK = hmodel.allocModel.K
    savefid = taskpath + "/top_words.txt"
    fid = open(savefid,'w+')
    for k in xrange(learnedK):
        lamvec = hmodel.obsModel.comp[k].lamvec
        elamvec = lamvec / lamvec.sum()
        topW_ind = np.argsort(elamvec)[-args.topW:]
        for w in xrange(args.topW):
            word = str(Data.vocab_dict[topW_ind[w]])
            fid.write( word + ", " )
        fid.write("...\n")
    fid.close()

Example 146

Project: lopq Source File: search.py
def multisequence(x, centroids):
    """
    Implementation of multi-sequence algorithm for traversing a multi-index.

    The algorithm is described in http://download.yandex.ru/company/cvpr2012.pdf.

    :param ndarray x:
        a query vector
    :param list centroids:
        a list of ndarrays containing cluster centroids for each subvector

    :yields int d:
        the cell distance approximation used to order cells
    :yields tuple cell:
        the cell indices
    """

    # Infer parameters
    splits = len(centroids)
    V = centroids[0].shape[0]

    # Compute distances to each coarse cluster and sort
    cluster_dists = []
    sorted_inds = []
    for cx, split in iterate_splits(x, splits):

        dists = ((cx - centroids[split]) ** 2).sum(axis=1)
        inds = np.argsort(dists)

        cluster_dists.append(dists)
        sorted_inds.append(inds)

    # Some helper functions used below
    def cell_for_inds(inds):
        return tuple([sorted_inds[s][i] for s, i in enumerate(inds)])

    def dist_for_cell(cell):
        return sum([cluster_dists[s][i] for s, i in enumerate(cell)])

    def inds_in_range(inds):
        for i in inds:
            if i >= V:
                return False
        return True

    # Initialize priority queue
    h = []
    traversed = set()
    start_inds = tuple(0 for _ in xrange(splits))
    start_dist = dist_for_cell(cell_for_inds(start_inds))
    heapq.heappush(h, (start_dist, start_inds))

    # Traverse cells
    while len(h):
        d, inds = heapq.heappop(h)
        yield d, cell_for_inds(inds)
        traversed.add(inds)

        # Add neighboring cells to queue
        if inds[1] == 0 or (inds[0] + 1, inds[1] - 1) in traversed:
            c = (inds[0] + 1, inds[1])
            if inds_in_range(c):
                dist = dist_for_cell(cell_for_inds(c))
                heapq.heappush(h, (dist, c))

        if inds[0] == 0 or (inds[0] - 1, inds[1] + 1) in traversed:
            c = (inds[0], inds[1] + 1)
            if inds_in_range(c):
                dist = dist_for_cell(cell_for_inds(c))
                heapq.heappush(h, (dist, c))

Example 147

Project: qiime Source File: plot_semivariogram.py
def fit_semivariogram(xxx_todo_changeme, xxx_todo_changeme1, model, ranges):
    """ Creates semivariogram values from two distance matrices.
    :Parameters:
       x_file : array matrix distance matrix for x
           distance matrix
       y_file : file handle
           distance matrix file handle
       model: string
           model to fit
       ranges: list
           the list of ranges to bin the data

   :Returns:
       x_vals: array
           Values for x
       y_vals: array
           Values for y
       y_fit: array
           Values for y fitted from model
    """
    (x_samples, x_distmtx) = xxx_todo_changeme
    (y_samples, y_distmtx) = xxx_todo_changeme1
    if x_samples != y_samples:
        lbl_x = list(argsort(x_samples))
        if lbl_x != range(len(lbl_x)):
            tmp = x_distmtx[:, lbl_x]
            x_distmtx = tmp[lbl_x, :]

        lbl_y = list(argsort(y_samples))
        if lbl_y != range(len(lbl_y)):
            tmp = y_distmtx[:, lbl_y]
            y_distmtx = tmp[lbl_y, :]

    # get upper triangle from matrix in a 1d array
    x_tmp_vals = x_distmtx.compress(tri(len(x_distmtx)).ravel() == 0)
    y_tmp_vals = y_distmtx.compress(tri(len(y_distmtx)).ravel() == 0)

    # sorting lists and transforming to arrays
    x_vals, y_vals = [], []
    for i in argsort(x_tmp_vals):
        x_vals.append(x_tmp_vals[i])
        y_vals.append(y_tmp_vals[i])
    x_vals = asarray(x_vals)
    y_vals = asarray(y_vals)

    # fitting model
    fit_func = FitModel(x_vals, y_vals, model)
    y_fit, params, func_text = fit_func()
    x_fit = x_vals

    # section for bins
    if ranges != []:
        # creating bins in x
        min = 0
        x_bins = []
        for r in ranges[:-1]:
            x_bins.extend(arange(min, r[1], r[0]))
            min = r[1]
        x_bins.extend(arange(min, max(x_vals), ranges[-1][0]))
        x_bins[-1] = max(x_vals)

        x_vals, hist = hist_bins(x_bins, x_vals)

        # avg per bin, y values
        y_tmp = []
        for i, val in enumerate(hist):
            if i == 0:
                low = val
                continue
            high = low + val

            y_tmp.append(mean(y_vals[low:high]))

            low = high
        y_vals = asarray(y_tmp)

        # removing nans
        x_vals = x_vals[~isnan(y_vals)]
        y_vals = y_vals[~isnan(y_vals)]

    return x_vals, y_vals, x_fit, y_fit, func_text

Example 148

Project: lifelines Source File: __init__.py
def _concordance_index(event_times, predicted_event_times, event_observed):
    """Find the concordance index in n * log(n) time.

    Assumes the data has been verified by lifelines.utils.concordance_index first.
    """
    # Here's how this works.
    #
    # It would be pretty easy to do if we had no censored data and no ties. There, the basic idea
    # would be to iterate over the cases in order of their true event time (from least to greatest),
    # while keeping track of a pool of *predicted* event times for all cases previously seen (= all
    # cases that we know should be ranked lower than the case we're looking at currently).
    #
    # If the pool has O(log n) insert and O(log n) RANK (i.e., "how many things in the pool have
    # value less than x"), then the following algorithm is n log n:
    #
    # Sort the times and predictions by time, increasing
    # n_pairs, n_correct := 0
    # pool := {}
    # for each prediction p:
    #     n_pairs += len(pool)
    #     n_correct += rank(pool, p)
    #     add p to pool
    #
    # There are three complications: tied ground truth values, tied predictions, and censored
    # observations.
    #
    # - To handle tied true event times, we modify the inner loop to work in *batches* of observations
    # p_1, ..., p_n whose true event times are tied, and then add them all to the pool
    # simultaneously at the end.
    #
    # - To handle tied predictions, which should each count for 0.5, we switch to
    #     n_correct += min_rank(pool, p)
    #     n_tied += count(pool, p)
    #
    # - To handle censored observations, we handle each batch of tied, censored observations just
    # after the batch of observations that died at the same time (since those censored observations
    # are comparable all the observations that died at the same time or previously). However, we do
    # NOT add them to the pool at the end, because they are NOT comparable with any observations
    # that leave the study afterward--whether or not those observations get censored.

    died_mask = event_observed.astype(bool)
    # TODO: is event_times already sorted? That would be nice...
    died_truth = event_times[died_mask]
    ix = np.argsort(died_truth)
    died_truth = died_truth[ix]
    died_pred = predicted_event_times[died_mask][ix]

    censored_truth = event_times[~died_mask]
    ix = np.argsort(censored_truth)
    censored_truth = censored_truth[ix]
    censored_pred = predicted_event_times[~died_mask][ix]

    censored_ix = 0
    died_ix = 0
    times_to_compare = _BTree(np.unique(died_pred))
    num_pairs = 0
    num_correct = 0
    num_tied = 0

    def handle_pairs(truth, pred, first_ix):
        """
        Handle all pairs that exited at the same time as truth[first_ix].

        Returns:
          (pairs, correct, tied, next_ix)
          new_pairs: The number of new comparisons performed
          new_correct: The number of comparisons correctly predicted
          next_ix: The next index that needs to be handled
        """
        next_ix = first_ix
        while next_ix < len(truth) and truth[next_ix] == truth[first_ix]:
            next_ix += 1
        pairs = len(times_to_compare) * (next_ix - first_ix)
        correct = 0
        tied = 0
        for i in range(first_ix, next_ix):
            rank, count = times_to_compare.rank(pred[i])
            correct += rank
            tied += count

        return (pairs, correct, tied, next_ix)

    # we iterate through cases sorted by exit time:
    # - First, all cases that died at time t0. We add these to the sortedlist of died times.
    # - Then, all cases that were censored at time t0. We DON'T add these since they are NOT
    #   comparable to subsequent elements.
    while True:
        has_more_censored = censored_ix < len(censored_truth)
        has_more_died = died_ix < len(died_truth)
        # Should we look at some censored indices next, or died indices?
        if has_more_censored and (not has_more_died
                                  or died_truth[died_ix] > censored_truth[censored_ix]):
            pairs, correct, tied, next_ix = handle_pairs(censored_truth, censored_pred, censored_ix)
            censored_ix = next_ix
        elif has_more_died and (not has_more_censored
                                or died_truth[died_ix] <= censored_truth[censored_ix]):
            pairs, correct, tied, next_ix = handle_pairs(died_truth, died_pred, died_ix)
            for pred in died_pred[died_ix:next_ix]:
                times_to_compare.insert(pred)
            died_ix = next_ix
        else:
            assert not (has_more_died or has_more_censored)
            break

        num_pairs += pairs
        num_correct += correct
        num_tied += tied

    if num_pairs == 0:
        raise ZeroDivisionError("No admissable pairs in the dataset.")

    return (num_correct + num_tied / 2) / num_pairs

Example 149

Project: PyGraphArt Source File: graph.py
    def tsp_hamilton_cycle(self, start):
        sorted_edges = np.argsort(self.costs)
        constraints = self.costs.copy()
        constraints.fill(None)
        self.upper_bound = 0
        self.final_x = None
        edges = np.array([i for i in self.edges])
        opt_c = np.Infinity
        if self.oriented:
            print 'tsp only on unoriented graph'
            return
        problems = np.array([], dtype=np.object)
        problems = np.append(problems, None)
        problems[0] = constraints
        while len(problems) != 0:
            _ = problems[0]
            np.delete(problems, 0)
            one_tree = np.array([], dtype=np.int)
            chosen = np.empty(len(edges), dtype=np.bool)
            chosen.fill(False)
            for i in sorted_edges:
                if constraints[i] is not None:
                    if constraints[i] == 1:
                        chosen[i] = True
                        if len(one_tree) > 0:
                            one_tree = np.vstack((one_tree, [edges[i]]))
                        else:
                            one_tree = [edges[i]]

                        # visited_nodes = np.unique(
                        #   np.append(visited_nodes, edges[i]))

            for i in sorted_edges:
                todo1 = todo2 = False
                if len(one_tree) > 0:
                    bincount = np.bincount(np.hstack(one_tree))
                    if len(bincount) > edges[i][0]:
                        if bincount[edges[i][0]] <= 1:
                            todo1 = True
                    else:
                        todo1 = True
                    if len(bincount) > edges[i][1]:
                        if np.bincount(np.hstack(one_tree))[edges[i][1]] <= 1:
                            todo2 = True
                    else:
                        todo2 = True
                else:
                    todo1 = todo2 = True

                cycle = False
                if len(one_tree) > 0:
                    cycle = self.is_cyclic(
                        self.edges_to_fs(np.vstack((one_tree, [edges[i]]))))

                if (constraints[i] is None) and todo1 and todo2 and not cycle:
                    chosen[i] = True
                    if len(one_tree) > 0:
                        one_tree = np.vstack((one_tree, [edges[i]]))
                    else:
                        one_tree = [edges[i]]
                    # visited_nodes = np.unique(
                    #    np.append(visited_nodes, edges[i]))
                    if len(edges[chosen]) == self.n-1:
                        break
            for i in sorted_edges[~chosen]:
                if not np.any(one_tree == i):
                    if len(one_tree) > 0:
                        one_tree = np.vstack((one_tree, [edges[i]]))
                    else:
                        one_tree = [edges[i]]
                    chosen[i] = True
                    break
            lowerbound = np.sum(self.costs[chosen])
            if lowerbound > opt_c:
                continue
            #

            if np.all(np.bincount(np.hstack(one_tree)) == 2):
                # np.max(np.bincount(np.hstack(one_tree))) == 2:

                if lowerbound < opt_c:
                    opt_c = lowerbound
                    continue

            node = np.argmax(np.bincount(np.hstack(one_tree)))
            eds = self.edges[np.bitwise_or(constraints == 1, constraints == 0)]
            if len(eds) > 0:
                if np.sum(np.hstack(eds) == node) == 2:
                    continue
            min_cost = np.Infinity
            for i in sorted_edges:
                if node in edges[i] and self.costs[i] < min_cost:
                    branch_i = i

            constraints[branch_i] = 0
            cycle = self.is_cyclic(self.edges_to_fs(edges[constraints == 1]))
            if not cycle:
                problems = np.append(problems, None)
                problems[-1] = constraints

            constraints[branch_i] = 1
            cycle = self.is_cyclic(self.edges_to_fs(edges[constraints == 1]))
            if not cycle:
                problems = np.append(problems, None)
                problems[-1] = constraints

        return edges[constraints == 1]

Example 150

Project: tribeflow Source File: tmat-toyplot.py
def main(model):
    store = pd.HDFStore(model)
    
    from_ = store['from_'][0][0]
    to = store['to'][0][0]
    assert from_ == 0
    
    trace_fpath = store['trace_fpath'][0][0]
    Theta_zh = store['Theta_zh'].values
    Psi_oz = store['Psi_sz'].values
    count_z = store['count_z'].values[:, 0]

    Psi_oz = Psi_oz / Psi_oz.sum(axis=0)
    Psi_zo = (Psi_oz * count_z).T
    Psi_zo = Psi_zo / Psi_zo.sum(axis=0)
    obj2id = dict(store['source2id'].values)
    hyper2id = dict(store['hyper2id'].values)
    id2obj = dict((v, k) for k, v in obj2id.items())

    ZtZ = Psi_zo.dot(Psi_oz)
    ZtZ = ZtZ / ZtZ.sum(axis=0)
    L = ZtZ
    #ZtZ[ZtZ < (ZtZ.mean())] = 0
    L[ZtZ >= 1.0 / (len(ZtZ))] = 1
    L[L != 1] = 0

    colormap = toyplot.color.brewer.map("Purples", domain_min=0, domain_max=1, reverse=True)
    print(colormap)
    canvas = toyplot.matrix((L.T, colormap), label="P[z' | z]", \
            colorshow=False, tlabel="To z'", llabel="From")[0]
    #canvas.axes(ylabel='From z', xlabel='To z\'')
    toyplot.pdf.render(canvas, 'tmat.pdf')

    model = SpectralCoclustering(n_clusters=3)
    model.fit(L)
    fit_data = L[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]
    canvas = toyplot.matrix((fit_data, colormap), label="P[z' | z']", \
            colorshow=False)[0]
    toyplot.pdf.render(canvas, 'tmat-cluster.pdf')
    
    #AtA = Psi_oz.dot(Psi_zo)
    #np.fill_diagonal(AtA, 0)
    #AtA = AtA / AtA.sum(axis=0)

    store.close()
See More Examples - Go to Next Page
Page 1 Page 2 Page 3 Selected Page 4