numpy.sum

Here are the examples of the python api numpy.sum taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

View license
def AMS(answers, predictions, sample_weight):
    """ Predictions are classes """
    assert len(answers) == len(predictions) == len(sample_weight)
    predictions = column_or_1d(predictions)
    total_s = numpy.sum(sample_weight[answers > 0.5])
    total_b = numpy.sum(sample_weight[answers < 0.5])
    s = numpy.sum(sample_weight[answers * predictions > 0.5])
    b = numpy.sum(sample_weight[(1 - answers) * predictions > 0.5])
    s *= real_s / total_s
    b *= real_b / total_b
    br = 10.
    radicand = 2 * ( (s+b+br) * numpy.log(1.0 + s/(b+br)) - s)
    if radicand < 0:
        raise ValueError('Radicand is negative')
    else:
        return numpy.sqrt(radicand)

Example 2

Project: RMG-Py
Source File: simpleTest.py
View license
    def testColliderModel(self):
        """
        Test the solver's ability to simulate a model with collision efficiencies.
        """
        chemFile = os.path.join(os.path.dirname(__file__),'files','collider_model','chem.inp')
        dictionaryFile = os.path.join(os.path.dirname(__file__),'files','collider_model','species_dictionary.txt')
        speciesList, reactionList = loadChemkinFile(chemFile, dictionaryFile)

        
        smilesDict = {'H':'[H]','HO2':'[O]O','O2':'[O][O]','Ar':'[Ar]','N2':'N#N','CO2':'O=C=O','CH3':'[CH3]','CH4':'C'}
        speciesDict = {}
        for name, smiles in smilesDict.iteritems():
            mol = Molecule(SMILES=smiles)
            for species in speciesList:
                if species.isIsomorphic(mol):
                    speciesDict[name] = species
                    break
                
                
        T = 1000 # K
        P = 10 # Pa            
        initialMoleFractions = {speciesDict['O2']:0.5,
                        speciesDict['H']:0.5,
                        speciesDict['CO2']:1.0, 
                        speciesDict['Ar']:4.0}
        
        # Initialize the model
        rxnSystem = SimpleReactor(T,P,initialMoleFractions=initialMoleFractions,termination=None)
        rxnSystem.initializeModel(speciesList, reactionList, [], [])
        
        # Advance to time = 0.1 s
        rxnSystem.advance(0.1)
        # Compare simulated mole fractions with expected mole fractions from CHEMKIN
        simulatedMoleFracs = rxnSystem.y/numpy.sum(rxnSystem.y)
        expectedMoleFracs = numpy.array([0.6666667,0,0,0,0.1666667,0, 0.08333333,0.08333333,2.466066000000000E-10,0,0,0,0,0])
        for i in range(len(simulatedMoleFracs)):
            self.assertAlmostEqual(simulatedMoleFracs[i],expectedMoleFracs[i])
            
        # Advance to time = 5 s
        rxnSystem.advance(5)
        # Compare simulated mole fractions with expected mole fractions from CHEMKIN
        simulatedMoleFracs = rxnSystem.y/numpy.sum(rxnSystem.y)
        expectedMoleFracs = numpy.array([0.6666667,0,0,0, 0.1666667,0,0.08333332,0.08333332,1.233033000000000E-08,0,0,0,0,0])
        for i in range(len(simulatedMoleFracs)):
            self.assertAlmostEqual(simulatedMoleFracs[i],expectedMoleFracs[i])
        
        # Try a new set of conditions
        
        T = 850 # K
        P = 200 # Pa        
        initialMoleFractions = {speciesDict['O2']:0.5,
                        speciesDict['H']:1,
                        speciesDict['CO2']:1, 
                        speciesDict['N2']:4,
                        speciesDict['CH3']:1}
        
        # Initialize the model
        rxnSystem = SimpleReactor(T,P,initialMoleFractions=initialMoleFractions,termination=None)
        rxnSystem.initializeModel(speciesList, reactionList, [], [])
        
        # Advance to time = 5 s
        rxnSystem.advance(5)  

        # Compare simulated mole fractions with expected mole fractions from CHEMKIN
        simulatedMoleFracs = rxnSystem.y/numpy.sum(rxnSystem.y)
        expectedMoleFracs = numpy.array([0,0,0,0.5487241, 0.137181,0, 0.1083234, 0.0685777, 1.280687000000000E-05,  0,0,0,   0.1083362, 0.02884481])
        for i in range(len(simulatedMoleFracs)):
            self.assertAlmostEqual(simulatedMoleFracs[i],expectedMoleFracs[i])
            

Example 3

Project: pyscf
Source File: addons.py
View license
def mom_occ_(mf, occorb, setocc):
    '''Use maximum overlap method to determine occupation number for each orbital in every
    iteration. It can be applied to unrestricted HF/KS and restricted open-shell
    HF/KS.'''
    if isinstance(mf, pyscf.scf.uhf.UHF):
        coef_occ_a = occorb[0][:, setocc[0]>0] 
        coef_occ_b = occorb[1][:, setocc[1]>0]
    elif isinstance(mf, pyscf.scf.rohf.ROHF):
        if mf.mol.spin != int(numpy.sum(setocc[0]) - numpy.sum(setocc[1])) :
            raise ValueError('Wrong occupation setting for restricted open-shell calculation.') 
        coef_occ_a = occorb[:, setocc[0]>0]
        coef_occ_b = occorb[:, setocc[1]>0]
    else:
        raise AssertionError('Can not support this class of instance.')
    def get_occ(mo_energy=None, mo_coeff=None): 
        if mo_energy is None: mo_energy = mf.mo_energy
        if mo_coeff is None: mo_coeff = mf.mo_coeff
        if isinstance(mf, pyscf.scf.rohf.ROHF): mo_coeff = numpy.array([mo_coeff, mo_coeff])
        mo_occ = numpy.zeros_like(setocc)
        nocc_a = int(numpy.sum(setocc[0]))
        nocc_b = int(numpy.sum(setocc[1]))
        s_a = reduce(numpy.dot, (coef_occ_a.T, mf.get_ovlp(), mo_coeff[0])) 
        s_b = reduce(numpy.dot, (coef_occ_b.T, mf.get_ovlp(), mo_coeff[1]))
        #choose a subset of mo_coeff, which maximizes <old|now>
        idx_a = numpy.argsort(numpy.einsum('ij,ij->j', s_a, s_a))
        idx_b = numpy.argsort(numpy.einsum('ij,ij->j', s_b, s_b))
        mo_occ[0][idx_a[-nocc_a:]] = 1.
        mo_occ[1][idx_b[-nocc_b:]] = 1.

        if mf.verbose >= logger.DEBUG: 
            logger.info(mf, ' New alpha occ pattern: %s', mo_occ[0]) 
            logger.info(mf, ' New beta occ pattern: %s', mo_occ[1]) 
        if mf.verbose >= logger.DEBUG1:
            if mo_energy.ndim == 2: 
                logger.info(mf, ' Current alpha mo_energy(sorted) = %s', mo_energy[0]) 
                logger.info(mf, ' Current beta mo_energy(sorted) = %s', mo_energy[1])
            elif mo_energy.ndim == 1:
                logger.info(mf, ' Current mo_energy(sorted) = %s', mo_energy)

        if (int(numpy.sum(mo_occ[0])) != nocc_a):
            log.error(self, 'mom alpha electron occupation numbers do not match: %d, %d', 
                      nocc_a, int(numpy.sum(mo_occ[0])))
        if (int(numpy.sum(mo_occ[1])) != nocc_b):
            log.error(self, 'mom alpha electron occupation numbers do not match: %d, %d', 
                      nocc_b, int(numpy.sum(mo_occ[1])))

        #output 1-dimension occupation number for restricted open-shell
        if isinstance(mf, pyscf.scf.rohf.ROHF): mo_occ = mo_occ[0, :] + mo_occ[1, :]
        return mo_occ
    mf.get_occ = get_occ
    return mf

Example 4

Project: hifive
Source File: fend.py
View license
    def plot_statistics(self):
        width, height = 800, 400
        c = Drawing(width + 5, height + 5)
        data = []
        where = numpy.where(numpy.sum(self.fend_sizes[:, :-1], axis=0))[0]
        for i in where:
            data.append(tuple(zip(self.fend_sizes[:, -1],
                        self.fend_sizes[:, i] / float(numpy.sum(self.fend_sizes[:, i])))))
        data.append(tuple(zip(self.fend_sizes[:, -1], numpy.sum(self.fend_sizes[:, :-1], axis=1) /
                    float(numpy.sum(self.fend_sizes[:, :-1])))))
        lp = LinePlot()
        lp.x = 40
        lp.y = 20
        lp.height = height - 20
        lp.width = width - 40
        lp.data = data
        lp.joinedLines = 1
        def findcolor(i, n):
            H = i * 6.0 / float(n)
            X = 1.0 - abs(H % 2 - 1.0)
            if H < 1.0:
                return (1.0, X, 0., 1.0)
            elif H < 2.0:
                return (X, 1.0, 0., 1.0)
            elif H < 3.0:
                return (0., 1.0, X, 1.0)
            elif H < 4.0:
                return (0., X, 1.0, 1.0)
            elif H < 5.0:
                return (X, 0., 1.0, 1.0)
            else:
                return (1.0, 0., X, 1.0)

        def axisstring(i):
            return "%0.1E" % numpy.exp(i)

        lp.xValueAxis.labelTextFormat = axisstring
        for i in range(where.shape[0]):
            lp.lines[i].strokeColor = Color(*findcolor(i, where.shape[0]))
        c.add(lp)
        #c.drawString(220, 0, "Length of fend (bp)")
        #c.drawString(0, 110, "Percent of fends")
        return c

Example 5

Project: oq-hazardlib
Source File: complex_fault.py
View license
def _float_ruptures(rupture_area, rupture_length, cell_area, cell_length):
    """
    Get all possible unique rupture placements on the fault surface.

    :param rupture_area:
        The area of the rupture to float on the fault surface, in squared km.
    :param rupture_length:
        The target length (spatial extension along fault trace) of the rupture,
        in km.
    :param cell_area:
        2d numpy array representing area of mesh cells in squared km.
    :param cell_length:
        2d numpy array of the shape as ``cell_area`` representing cells'
        length in km.
    :returns:
        A list of slice objects. Number of items in the list is equal to number
        of possible locations of the requested rupture on the fault surface.
        Each slice can be used to get a portion of the whole fault surface mesh
        that would represent the location of the rupture.
    """
    nrows, ncols = cell_length.shape

    if rupture_area >= numpy.sum(cell_area):
        # requested rupture area exceeds the total surface area.
        # return the single slice that doesn't cut anything out.
        return [slice(None)]

    rupture_slices = []

    dead_ends = set()
    for row in range(nrows):
        for col in range(ncols):
            if col in dead_ends:
                continue
            # find the lengths of all possible subsurfaces containing
            # only the current row and from the current column till
            # the last one.
            lengths_acc = numpy.add.accumulate(cell_length[row, col:])
            # find the "best match" number of columns, the one that gives
            # the least difference between actual and requested rupture
            # length (note that we only consider top row here, mainly
            # for simplicity: it's not yet clear how many rows will we
            # end up with).
            rup_cols = numpy.argmin(numpy.abs(lengths_acc - rupture_length))
            last_col = rup_cols + col + 1
            if last_col == ncols and lengths_acc[rup_cols] < rupture_length:
                # rupture doesn't fit along length (the requested rupture
                # length is greater than the length of the part of current
                # row that starts from the current column).
                if col != 0:
                    # if we are not in the first column, it means that we
                    # hit the right border, so we need to go to the next
                    # row.
                    break

            # now try to find the optimum (the one providing the closest
            # to requested area) number of rows.
            areas_acc = numpy.sum(cell_area[row:, col:last_col], axis=1)
            areas_acc = numpy.add.accumulate(areas_acc, axis=0)
            rup_rows = numpy.argmin(numpy.abs(areas_acc - rupture_area))
            last_row = rup_rows + row + 1
            if last_row == nrows and areas_acc[rup_rows] < rupture_area:
                # rupture doesn't fit along width.
                # we can try to extend it along length but only if we are
                # at the first row
                if row == 0:
                    if last_col == ncols:
                        # there is no place to extend, exiting
                        return rupture_slices
                    else:
                        # try to extend along length
                        areas_acc = numpy.sum(cell_area[:, col:], axis=0)
                        areas_acc = numpy.add.accumulate(areas_acc, axis=0)
                        rup_cols = numpy.argmin(numpy.abs(areas_acc
                                                          - rupture_area))
                        last_col = rup_cols + col + 1
                        if last_col == ncols \
                                and areas_acc[rup_cols] < rupture_area:
                            # still doesn't fit, return
                            return rupture_slices
                else:
                    # row is not the first and the required area exceeds
                    # available area starting from target row and column.
                    # mark the column as "dead end" so we don't create
                    # one more rupture from the same column on all
                    # subsequent rows.
                    dead_ends.add(col)

            # here we add 1 to last row and column numbers because we want
            # to return slices for cutting the mesh of vertices, not the cell
            # data (like cell_area or cell_length).
            rupture_slices.append((slice(row, last_row + 1),
                                   slice(col, last_col + 1)))
    return rupture_slices

Example 6

Project: inversehaar
Source File: inversehaar.py
View license
    def __init__(self, cascade, docloud_context):
        """Make a model from a :class:`.Cascade`."""
        super(CascadeModel, self).__init__("Inverse haar cascade",
                                           docloud_context=docloud_context)

        cell_vars = [self.continuous_var(
                        name=cascade.grid.cell_names[i],
                        lb=0., ub=MAX_PIXEL_VALUE)
                        for i in range(cascade.grid.num_cells)]
        feature_vars = {idx: self.binary_var(name="feature_{}".format(idx))
                        for idx in range(len(cascade.features))}

        for stage in cascade.stages:
            # Add constraints for the feature vars.
            #
            # If the classifier's pass value is greater than its fail value,
            # then add a constraint equivalent to the following:
            #   
            #   feature var set => corresponding feature is present in image
            #
            # Conversely, if the classifier's pass vlaue is less than its fail
            # value, add a constraint equivalent to:
            #
            #   corresponding feature is present in image => feature var set
            for classifier in stage.weak_classifiers:
                feature_vec = numpy.sum(
                             cascade.grid.rect_to_cell_vec(r) * r.weight
                             for r in cascade.features[classifier.feature_idx])
                feature_vec /= (cascade.width * cascade.height)
                thr = classifier.threshold
                feature_var = feature_vars[classifier.feature_idx]
                feature_val = sum(cell_vars[i] * feature_vec[i]
                                  for i in numpy.argwhere(
                                                  feature_vec != 0.).flatten())
                if classifier.pass_val >= classifier.fail_val:
                    big_num = 0.1 + thr - numpy.sum(numpy.min(
                             [feature_vec, numpy.zeros(feature_vec.shape)],
                             axis=0))
                    self.add_constraint(feature_val - feature_var * big_num >=
                                                                 thr - big_num)
                else:
                    big_num = 0.1 + numpy.sum(numpy.max(
                             [feature_vec, numpy.zeros(feature_vec.shape)],
                             axis=0)) - thr
                    self.add_constraint(feature_val - feature_var * big_num <=
                                                                           thr)

            # Enforce that the sum of features present in this stage exceeds
            # the stage threshold.
            fail_val_total = sum(c.fail_val for c in stage.weak_classifiers)
            adjusted_stage_threshold = stage.threshold
            self.add_constraint(sum((c.pass_val - c.fail_val) *
                                                    feature_vars[c.feature_idx] 
                         for c in stage.weak_classifiers) >=
                                     adjusted_stage_threshold - fail_val_total)

        self.cascade = cascade
        self.cell_vars = cell_vars
        self.feature_vars = feature_vars

Example 7

Project: inversehaar
Source File: inversehaar.py
View license
    def __init__(self, cascade, docloud_context):
        """Make a model from a :class:`.Cascade`."""
        super(CascadeModel, self).__init__("Inverse haar cascade",
                                           docloud_context=docloud_context)

        cell_vars = [self.continuous_var(
                        name=cascade.grid.cell_names[i],
                        lb=0., ub=MAX_PIXEL_VALUE)
                        for i in range(cascade.grid.num_cells)]
        feature_vars = {idx: self.binary_var(name="feature_{}".format(idx))
                        for idx in range(len(cascade.features))}

        for stage in cascade.stages:
            # Add constraints for the feature vars.
            #
            # If the classifier's pass value is greater than its fail value,
            # then add a constraint equivalent to the following:
            #   
            #   feature var set => corresponding feature is present in image
            #
            # Conversely, if the classifier's pass vlaue is less than its fail
            # value, add a constraint equivalent to:
            #
            #   corresponding feature is present in image => feature var set
            for classifier in stage.weak_classifiers:
                feature_vec = numpy.sum(
                             cascade.grid.rect_to_cell_vec(r) * r.weight
                             for r in cascade.features[classifier.feature_idx])
                feature_vec /= (cascade.width * cascade.height)
                thr = classifier.threshold
                feature_var = feature_vars[classifier.feature_idx]
                feature_val = sum(cell_vars[i] * feature_vec[i]
                                  for i in numpy.argwhere(
                                                  feature_vec != 0.).flatten())
                if classifier.pass_val >= classifier.fail_val:
                    big_num = 0.1 + thr - numpy.sum(numpy.min(
                             [feature_vec, numpy.zeros(feature_vec.shape)],
                             axis=0))
                    self.add_constraint(feature_val - feature_var * big_num >=
                                                                 thr - big_num)
                else:
                    big_num = 0.1 + numpy.sum(numpy.max(
                             [feature_vec, numpy.zeros(feature_vec.shape)],
                             axis=0)) - thr
                    self.add_constraint(feature_val - feature_var * big_num <=
                                                                           thr)

            # Enforce that the sum of features present in this stage exceeds
            # the stage threshold.
            fail_val_total = sum(c.fail_val for c in stage.weak_classifiers)
            adjusted_stage_threshold = stage.threshold
            self.add_constraint(sum((c.pass_val - c.fail_val) *
                                                    feature_vars[c.feature_idx] 
                         for c in stage.weak_classifiers) >=
                                     adjusted_stage_threshold - fail_val_total)

        self.cascade = cascade
        self.cell_vars = cell_vars
        self.feature_vars = feature_vars

Example 8

Project: yap
Source File: yap_preprocess.py
View license
def write_basecount_matrix(file_basecount_dict):
    '''
    Prepare output filename information and write base count matrix 
    results to a file
    '''
    def write_final_matrix(
            file_basecount_matrix,
            basecount_metrics_filename,
            basecount_file_basename,
            ):
        '''Writes basecount matrix to file'''
        file_w_handler = open(basecount_metrics_filename, 'a+')
        len_matrix = len(file_basecount_matrix)
        len_row = len(file_basecount_matrix[0])
        file_w_handler.write("# A C T G N \n")
        for i in range(len_matrix):
            row_sum = numpy.sum(file_basecount_matrix[i])
            if row_sum != 0:
                file_w_handler.write(str(i) + " ")
                for j in range(len_row):
                    file_w_handler.write(
                        str(file_basecount_matrix[i][j]) + " ")
                file_w_handler.write("\n")
        file_w_handler.close()
        #create plot 
        os.system("gplot.sh " + basecount_metrics_filename +
                  " " + basecount_file_basename + '.eps')
    #For each sample, prepare basecount output filename information 
    for filename_key in file_basecount_dict.iterkeys():
        path_name, file_name = os.path.split(filename_key)
        file_name, extension = os.path.splitext(file_name)
        barcode_basecount_dict = file_basecount_dict[filename_key]
        for barcode in barcode_basecount_dict.iterkeys():
            barcode_value = yap_tools.rename_barcode(barcode)
            barcode_dir_path = wd.workflow_output_path + "/" + file_name + "/" + barcode
            preprocess_output_dir = barcode_dir_path + "/" + "preprocess_output"
            file_basecount_matrix1 = barcode_basecount_dict[barcode][0]
            file_basecount_matrix2 = barcode_basecount_dict[barcode][1]
            if barcode_value != '':
                basecount_file_basename1 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + "_basecountmetrics_1"
                basecount_metrics_filename1 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + \
                    "_basecountmetrics_1" + '.txt'
                basecount_file_basename2 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + "_basecountmetrics_2"
                basecount_metrics_filename2 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + \
                    "_basecountmetrics_2" + '.txt'
            else:
                basecount_file_basename1 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_1"
                basecount_metrics_filename1 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_1" + '.txt'
                basecount_file_basename2 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_2"
                basecount_metrics_filename2 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_2" + '.txt'

            if (numpy.sum(file_basecount_matrix1) > 0):
                print  "For filename = ", file_name, "barcode = ", barcode, " ...producing the basecount metrics results for first paired file", "\n"
                #pass output filename informaton to write function 
                write_final_matrix(
                    file_basecount_matrix1,
                    basecount_metrics_filename1,
                    basecount_file_basename1,
                    )
            else:
                print "No data for finename = ", file_name, " barcode = ", barcode, " ...skipping the basecount metrics output for first paired file", "\n"
            if (numpy.sum(file_basecount_matrix2) > 0):

                print "For filename = ", file_name, "barcode = ", barcode, " ...producing the basecount metrics results second paired file", "\n"
                #pass output filename informaton to write function 
                write_final_matrix(
                    file_basecount_matrix2,
                    basecount_metrics_filename2,
                    basecount_file_basename2,
                    )
            else:
                print "No data for finename = ", file_name, " barcode = ", barcode, " ...skipping the basecount metrics output for second paired file", "\n"

Example 9

Project: yap
Source File: yap_preprocess.py
View license
def write_basecount_matrix(file_basecount_dict):
    '''
    Prepare output filename information and write base count matrix 
    results to a file
    '''
    def write_final_matrix(
            file_basecount_matrix,
            basecount_metrics_filename,
            basecount_file_basename,
            ):
        '''Writes basecount matrix to file'''
        file_w_handler = open(basecount_metrics_filename, 'a+')
        len_matrix = len(file_basecount_matrix)
        len_row = len(file_basecount_matrix[0])
        file_w_handler.write("# A C T G N \n")
        for i in range(len_matrix):
            row_sum = numpy.sum(file_basecount_matrix[i])
            if row_sum != 0:
                file_w_handler.write(str(i) + " ")
                for j in range(len_row):
                    file_w_handler.write(
                        str(file_basecount_matrix[i][j]) + " ")
                file_w_handler.write("\n")
        file_w_handler.close()
        #create plot 
        os.system("gplot.sh " + basecount_metrics_filename +
                  " " + basecount_file_basename + '.eps')
    #For each sample, prepare basecount output filename information 
    for filename_key in file_basecount_dict.iterkeys():
        path_name, file_name = os.path.split(filename_key)
        file_name, extension = os.path.splitext(file_name)
        barcode_basecount_dict = file_basecount_dict[filename_key]
        for barcode in barcode_basecount_dict.iterkeys():
            barcode_value = yap_tools.rename_barcode(barcode)
            barcode_dir_path = wd.workflow_output_path + "/" + file_name + "/" + barcode
            preprocess_output_dir = barcode_dir_path + "/" + "preprocess_output"
            file_basecount_matrix1 = barcode_basecount_dict[barcode][0]
            file_basecount_matrix2 = barcode_basecount_dict[barcode][1]
            if barcode_value != '':
                basecount_file_basename1 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + "_basecountmetrics_1"
                basecount_metrics_filename1 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + \
                    "_basecountmetrics_1" + '.txt'
                basecount_file_basename2 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + "_basecountmetrics_2"
                basecount_metrics_filename2 = preprocess_output_dir + '/' + \
                    file_name + "_" + barcode_value + \
                    "_basecountmetrics_2" + '.txt'
            else:
                basecount_file_basename1 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_1"
                basecount_metrics_filename1 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_1" + '.txt'
                basecount_file_basename2 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_2"
                basecount_metrics_filename2 = preprocess_output_dir + \
                    '/' + file_name + "_basecountmetrics_2" + '.txt'

            if (numpy.sum(file_basecount_matrix1) > 0):
                print  "For filename = ", file_name, "barcode = ", barcode, " ...producing the basecount metrics results for first paired file", "\n"
                #pass output filename informaton to write function 
                write_final_matrix(
                    file_basecount_matrix1,
                    basecount_metrics_filename1,
                    basecount_file_basename1,
                    )
            else:
                print "No data for finename = ", file_name, " barcode = ", barcode, " ...skipping the basecount metrics output for first paired file", "\n"
            if (numpy.sum(file_basecount_matrix2) > 0):

                print "For filename = ", file_name, "barcode = ", barcode, " ...producing the basecount metrics results second paired file", "\n"
                #pass output filename informaton to write function 
                write_final_matrix(
                    file_basecount_matrix2,
                    basecount_metrics_filename2,
                    basecount_file_basename2,
                    )
            else:
                print "No data for finename = ", file_name, " barcode = ", barcode, " ...skipping the basecount metrics output for second paired file", "\n"

Example 10

Project: nupic
Source File: anomaly_likelihood_test.py
View license
  def testUpdateAnomalyLikelihoods(self):
    """
    A slight more complex test. This calls estimateAnomalyLikelihoods
    to estimate the distribution on fake data, followed by several calls
    to updateAnomalyLikelihoods.
    """

    #------------------------------------------
    # Step 1. Generate an initial estimate using fake distribution of anomaly
    # scores.
    data1 = _generateSampleData(mean=0.2)[0:1000]
    _, _, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1, averagingWindow=5)
    )

    #------------------------------------------
    # Step 2. Generate some new data with a higher average anomaly
    # score. Using the estimator from step 1, to compute likelihoods. Now we
    # should see a lot more anomalies.
    data2 = _generateSampleData(mean=0.6)[0:300]
    likelihoods2, avgRecordList2, estimatorParams2 = (
      an.updateAnomalyLikelihoods(data2, estimatorParams)
    )
    self.assertEqual(len(likelihoods2), len(data2))
    self.assertEqual(len(avgRecordList2), len(data2))
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    # The new running total should be different
    self.assertNotEqual(estimatorParams2["movingAverage"]["total"],
                        estimatorParams["movingAverage"]["total"])

    # We should have many more samples where likelihood is < 0.01, but not all
    self.assertGreaterEqual(numpy.sum(likelihoods2 < 0.01), 25)
    self.assertLessEqual(numpy.sum(likelihoods2 < 0.01), 250)

    #------------------------------------------
    # Step 3. Generate some new data with the expected average anomaly score. We
    # should see fewer anomalies than in Step 2.
    data3 = _generateSampleData(mean=0.2)[0:1000]
    likelihoods3, avgRecordList3, estimatorParams3 = (
      an.updateAnomalyLikelihoods(data3, estimatorParams2)
    )

    self.assertEqual(len(likelihoods3), len(data3))
    self.assertEqual(len(avgRecordList3), len(data3))
    self.assertTrue(an.isValidEstimatorParams(estimatorParams3))

    # The new running total should be different
    self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                        estimatorParams["movingAverage"]["total"])
    self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                        estimatorParams2["movingAverage"]["total"])

    # We should have a small number samples where likelihood is < 0.02, but at
    # least one
    self.assertGreaterEqual(numpy.sum(likelihoods3 < 0.01), 1)
    self.assertLessEqual(numpy.sum(likelihoods3 < 0.01), 100)

    #------------------------------------------
    # Step 4. Validate that sending data incrementally is the same as sending
    # in one batch
    allData = data1
    allData.extend(data2)
    allData.extend(data3)

    # Compute moving average of all the data and check it's the same
    _, historicalValuesAll, totalAll = (
      an._anomalyScoreMovingAverage(allData, windowSize=5)
    )
    self.assertEqual(sum(historicalValuesAll),
                     sum(estimatorParams3["movingAverage"]["historicalValues"]))
    self.assertEqual(totalAll,
                     estimatorParams3["movingAverage"]["total"])

Example 11

Project: nupic
Source File: anomaly_likelihood_test.py
View license
  def testUpdateAnomalyLikelihoods(self):
    """
    A slight more complex test. This calls estimateAnomalyLikelihoods
    to estimate the distribution on fake data, followed by several calls
    to updateAnomalyLikelihoods.
    """

    #------------------------------------------
    # Step 1. Generate an initial estimate using fake distribution of anomaly
    # scores.
    data1 = _generateSampleData(mean=0.2)[0:1000]
    _, _, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1, averagingWindow=5)
    )

    #------------------------------------------
    # Step 2. Generate some new data with a higher average anomaly
    # score. Using the estimator from step 1, to compute likelihoods. Now we
    # should see a lot more anomalies.
    data2 = _generateSampleData(mean=0.6)[0:300]
    likelihoods2, avgRecordList2, estimatorParams2 = (
      an.updateAnomalyLikelihoods(data2, estimatorParams)
    )
    self.assertEqual(len(likelihoods2), len(data2))
    self.assertEqual(len(avgRecordList2), len(data2))
    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    # The new running total should be different
    self.assertNotEqual(estimatorParams2["movingAverage"]["total"],
                        estimatorParams["movingAverage"]["total"])

    # We should have many more samples where likelihood is < 0.01, but not all
    self.assertGreaterEqual(numpy.sum(likelihoods2 < 0.01), 25)
    self.assertLessEqual(numpy.sum(likelihoods2 < 0.01), 250)

    #------------------------------------------
    # Step 3. Generate some new data with the expected average anomaly score. We
    # should see fewer anomalies than in Step 2.
    data3 = _generateSampleData(mean=0.2)[0:1000]
    likelihoods3, avgRecordList3, estimatorParams3 = (
      an.updateAnomalyLikelihoods(data3, estimatorParams2)
    )

    self.assertEqual(len(likelihoods3), len(data3))
    self.assertEqual(len(avgRecordList3), len(data3))
    self.assertTrue(an.isValidEstimatorParams(estimatorParams3))

    # The new running total should be different
    self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                        estimatorParams["movingAverage"]["total"])
    self.assertNotEqual(estimatorParams3["movingAverage"]["total"],
                        estimatorParams2["movingAverage"]["total"])

    # We should have a small number samples where likelihood is < 0.02, but at
    # least one
    self.assertGreaterEqual(numpy.sum(likelihoods3 < 0.01), 1)
    self.assertLessEqual(numpy.sum(likelihoods3 < 0.01), 100)

    #------------------------------------------
    # Step 4. Validate that sending data incrementally is the same as sending
    # in one batch
    allData = data1
    allData.extend(data2)
    allData.extend(data3)

    # Compute moving average of all the data and check it's the same
    _, historicalValuesAll, totalAll = (
      an._anomalyScoreMovingAverage(allData, windowSize=5)
    )
    self.assertEqual(sum(historicalValuesAll),
                     sum(estimatorParams3["movingAverage"]["historicalValues"]))
    self.assertEqual(totalAll,
                     estimatorParams3["movingAverage"]["total"])

Example 12

Project: nupic
Source File: sparse_binary_matrix_test.py
View license
  def testResize(self):
    # 1. Resize to 0,0 (equivalent to clear)

    m = _RGEN.randint(4,10)
    n = _RGEN.randint(6,10)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)

    a.resize(0,0)

    if a.capacity() != 0:
      error('resize to 0,0 /capacity')

    if a.nRows() != 0:
      error('resize to 0,0 /nRows')

    if a.nCols() != 0:
      error('resize to 0,0 /nCols')

    if a.nNonZeros() != 0:
      error('resize to 0,0 /nNonZeros')

    # 2. Resize to larger size

    m = _RGEN.randint(4,10)
    n = _RGEN.randint(6,10)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)

    # 2.1 More rows only

    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(2*a.nRows(),a.nCols())

    if a.nRows() != 2*old_nrows or a.nCols() != old_ncols:
      error('resize to more rows, 1')

    if a.nNonZeros() != old_nnzr:
      error('resize to more rows, 2')

    # 2.2 More cols only

    old_nrows = a.nRows()
    a.resize(a.nRows(), 2*a.nCols())

    if a.nRows() != old_nrows or a.nCols() != 2*old_ncols:
      error('resize to more cols, 1')

    if a.nNonZeros() != old_nnzr:
      error('resize to more cols, 2')

    # 2.3 More rows and cols

    m = _RGEN.randint(4,10)
    n = _RGEN.randint(6,10)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)
    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(2*a.nRows(),2*a.nCols())

    if a.nRows() != 2*old_nrows or a.nCols() != 2*old_ncols:
      error('resize to more rows and cols, 1')

    if a.nNonZeros() != old_nnzr:
      error('resize to more rows and cols, 2')

    # 3. Resize to smaller size
    m = _RGEN.randint(10,20)
    n = _RGEN.randint(10,20)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)

    # 3.1 Less rows only

    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(a.nRows()/2,a.nCols())

    if a.nRows() != old_nrows/2 or a.nCols() != old_ncols:
      error('resize to less rows, 1')

    if a.nNonZeros() != numpy.sum(x[:old_nrows/2]):
      error('resize to less rows, 2')

    # 2.2 Less cols only

    old_nrows = a.nRows()
    a.resize(a.nRows(), a.nCols()/2)

    if a.nRows() != old_nrows or a.nCols() != old_ncols/2:
      error('resize to less cols, 1')

    if a.nNonZeros() != numpy.sum(x[:a.nRows(),:old_ncols/2]):
      error('resize to less cols, 2')

    # 2.3 Less rows and cols

    m = _RGEN.randint(10,20)
    n = _RGEN.randint(10,20)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)
    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(a.nRows()/2,a.nCols()/2)

    if a.nRows() != old_nrows/2 or a.nCols() != old_ncols/2:
      error('resize to less rows and cols, 1')

    if a.nNonZeros() != numpy.sum(x[:old_nrows/2,:old_ncols/2]):
      error('resize to less rows and cols, 2')

Example 13

Project: nupic
Source File: sparse_binary_matrix_test.py
View license
  def testResize(self):
    # 1. Resize to 0,0 (equivalent to clear)

    m = _RGEN.randint(4,10)
    n = _RGEN.randint(6,10)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)

    a.resize(0,0)

    if a.capacity() != 0:
      error('resize to 0,0 /capacity')

    if a.nRows() != 0:
      error('resize to 0,0 /nRows')

    if a.nCols() != 0:
      error('resize to 0,0 /nCols')

    if a.nNonZeros() != 0:
      error('resize to 0,0 /nNonZeros')

    # 2. Resize to larger size

    m = _RGEN.randint(4,10)
    n = _RGEN.randint(6,10)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)

    # 2.1 More rows only

    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(2*a.nRows(),a.nCols())

    if a.nRows() != 2*old_nrows or a.nCols() != old_ncols:
      error('resize to more rows, 1')

    if a.nNonZeros() != old_nnzr:
      error('resize to more rows, 2')

    # 2.2 More cols only

    old_nrows = a.nRows()
    a.resize(a.nRows(), 2*a.nCols())

    if a.nRows() != old_nrows or a.nCols() != 2*old_ncols:
      error('resize to more cols, 1')

    if a.nNonZeros() != old_nnzr:
      error('resize to more cols, 2')

    # 2.3 More rows and cols

    m = _RGEN.randint(4,10)
    n = _RGEN.randint(6,10)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)
    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(2*a.nRows(),2*a.nCols())

    if a.nRows() != 2*old_nrows or a.nCols() != 2*old_ncols:
      error('resize to more rows and cols, 1')

    if a.nNonZeros() != old_nnzr:
      error('resize to more rows and cols, 2')

    # 3. Resize to smaller size
    m = _RGEN.randint(10,20)
    n = _RGEN.randint(10,20)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)

    # 3.1 Less rows only

    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(a.nRows()/2,a.nCols())

    if a.nRows() != old_nrows/2 or a.nCols() != old_ncols:
      error('resize to less rows, 1')

    if a.nNonZeros() != numpy.sum(x[:old_nrows/2]):
      error('resize to less rows, 2')

    # 2.2 Less cols only

    old_nrows = a.nRows()
    a.resize(a.nRows(), a.nCols()/2)

    if a.nRows() != old_nrows or a.nCols() != old_ncols/2:
      error('resize to less cols, 1')

    if a.nNonZeros() != numpy.sum(x[:a.nRows(),:old_ncols/2]):
      error('resize to less cols, 2')

    # 2.3 Less rows and cols

    m = _RGEN.randint(10,20)
    n = _RGEN.randint(10,20)
    x = _RGEN.randint(0,2,(m,n))
    x[m/2] = 0
    a = self.Matrix.__class__(x)
    old_nrows = a.nRows()
    old_ncols = a.nCols()
    old_nnzr = a.nNonZeros()
    a.resize(a.nRows()/2,a.nCols()/2)

    if a.nRows() != old_nrows/2 or a.nCols() != old_ncols/2:
      error('resize to less rows and cols, 1')

    if a.nNonZeros() != numpy.sum(x[:old_nrows/2,:old_ncols/2]):
      error('resize to less rows and cols, 2')

Example 14

Project: dl4mt-c2c
Source File: char_base.py
View license
def init_params(options):
    params = OrderedDict()

    print "new char_base initialise..."
    print "source dictionary size: %d" % options['n_words_src']
    # embedding
    params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word_src'])
    params['Wemb_dec'] = norm_weight(options['n_words'], options['dim_word'])

    params = get_layer('multi_scale_conv_encoder')[0](options, params, prefix='multi_scale_conv_enc1', dim=options['dim_word_src'], width=options['conv_width'], nkernels=options['conv_nkernels'])

    for ii in xrange(options['highway']):
        params = get_layer('hw')[0](options, params, prefix="hw_network{}".format(ii+1), dim=numpy.sum(options['conv_nkernels']))

    params = get_layer('gru')[0](options, params,
                                 prefix='encoder',
                                 nin=numpy.sum(options['conv_nkernels']),
                                 dim=options['enc_dim'])
    params = get_layer('gru')[0](options, params,
                                 prefix='encoderr',
                                 nin=numpy.sum(options['conv_nkernels']),
                                 dim=options['enc_dim'])
    ctxdim = 2 * options['enc_dim']

    params = get_layer('ff')[0](options, params,
                                prefix='ff_init_state_char',
                                nin=ctxdim,
                                nout=options['dec_dim'])
    params = get_layer('ff')[0](options, params,
                                prefix='ff_init_state_word',
                                nin=ctxdim,
                                nout=options['dec_dim'])

    print "target dictionary size: %d" % options['n_words']
    # decoder
    params = get_layer('two_layer_gru_decoder')[0](options, params,
                                                   prefix='decoder',
                                                   nin=options['dim_word'],
                                                   dim_char=options['dec_dim'],
                                                   dim_word=options['dec_dim'],
                                                   dimctx=ctxdim)

    # readout
    params = get_layer('fff')[0](options, params, prefix='ff_logit_rnn',
                                 nin1=options['dec_dim'], nin2=options['dec_dim'],
                                 nout=options['dim_word'], ortho=False)
    params = get_layer('ff')[0](options, params, prefix='ff_logit_prev',
                                nin=options['dim_word'],
                                nout=options['dim_word'],
                                ortho=False)
    params = get_layer('ff')[0](options, params, prefix='ff_logit_ctx',
                                nin=ctxdim,
                                nout=options['dim_word'],
                                ortho=False)
    params = get_layer('ff')[0](options, params, prefix='ff_logit',
                                nin=options['dim_word'],
                                nout=options['n_words'])

    return params

Example 15

Project: pele
Source File: rmsfit.py
View license
def findrotation_kearsley(x1, x2, align_com=True):
    """Return the rotation matrix which aligns XB with XA
    
    Return the matrix which
    aligns structure XB to be as similar as possible to structure XA.
    To be precise, rotate XB, so as to minimize the distance |XA - XB|.

    Rotations will be done around the origin, not the center of mass

    Rotational alignment follows the prescription of
    Kearsley, Acta Cryst. A, 45, 208-210, 1989
    http://dx.doi.org/10.1107/S0108767388010128
    """
    if x1.size != x2.size:
        raise ValueError("dimension of arrays does not match")
    
    # reshape the arrays
    x1 = x1.reshape([-1,3]).copy()
    x2 = x2.reshape([-1,3]).copy()
    # determine number of atoms
    natoms = x1.shape[0]
    
    # set both com to zero
    if align_com:
        com1 = np.sum(x1,axis=0) / float(natoms)
        com2 = np.sum(x2,axis=0) / float(natoms)
        x1 -= com1
        x2 -= com2

    x1 = x1.ravel() 
    x2 = x2.ravel()
    
    # TODO: this is very dirty!
    #########################################
    # Create matrix QMAT
    #########################################

    QMAT = np.zeros([4,4], np.float64)
    for J1 in xrange(natoms):
        J2 = 3* J1 -1
        XM = x1[J2+1] - x2[J2+1]
        YM = x1[J2+2] - x2[J2+2]
        ZM = x1[J2+3] - x2[J2+3]
        XP = x1[J2+1] + x2[J2+1]
        YP = x1[J2+2] + x2[J2+2]
        ZP = x1[J2+3] + x2[J2+3]
        QMAT[0,0] = QMAT[0,0] + XM**2 + YM**2 + ZM**2
        QMAT[0,1] = QMAT[0,1] - YP*ZM + YM*ZP
        QMAT[0,2] = QMAT[0,2] - XM*ZP + XP*ZM
        QMAT[0,3] = QMAT[0,3] - XP*YM + XM*YP
        QMAT[1,1] = QMAT[1,1] + YP**2 + ZP**2 + XM**2
        QMAT[1,2] = QMAT[1,2] + XM*YM - XP*YP
        QMAT[1,3] = QMAT[1,3] + XM*ZM - XP*ZP
        QMAT[2,2] = QMAT[2,2] + XP**2 + ZP**2 + YM**2
        QMAT[2,3] = QMAT[2,3] + YM*ZM - YP*ZP
        QMAT[3,3] = QMAT[3,3] + XP**2 + YP**2 + ZM**2

    QMAT[1,0] = QMAT[0,1]
    QMAT[2,0] = QMAT[0,2]
    QMAT[2,1] = QMAT[1,2]
    QMAT[3,0] = QMAT[0,3]
    QMAT[3,1] = QMAT[1,3]
    QMAT[3,2] = QMAT[2,3]

    ###########################################
    """
    Find eigenvalues and eigenvectors of QMAT.  The eigenvector corresponding
    to the smallest eigenvalue is the quaternion which rotates XB into best
    alignment with XA.  The smallest eigenvalue is the squared distance between
    the resulting structures.
    """
    ###########################################
    (eigs, vecs) = np.linalg.eig(QMAT)

    imin = np.argmin(eigs)
    eigmin = eigs[imin] # the minimum eigenvector
    Q2 = vecs[:,imin]  # the eigenvector corresponding to the minimum eigenvalue
    if eigmin < 0.:
        if abs(eigmin) < 1e-6:
            eigmin = 0.
        else:
            print 'minDist> WARNING minimum eigenvalue is ',eigmin,' change to absolute value'
            eigmin = -eigmin

    dist = np.sqrt(eigmin) # this is the minimized distance between the two structures

    Q2 = np.real_if_close(Q2, 1e-10)
    if np.iscomplexobj(Q2):
        raise ValueError("Q2 is complex")
    return dist, rotations.q2mx(Q2)

Example 16

View license
    def __init__(self, variables, cardinality, values):
        """
        Initialize a Joint Probability Distribution class.

        Defined above, we have the following mapping from variable
        assignments to the index of the row vector in the value field:

        +-----+-----+-----+-------------------------+
        |  x1 |  x2 |  x3 |    P(x1, x2, x2)        |
        +-----+-----+-----+-------------------------+
        | x1_0| x2_0| x3_0|    P(x1_0, x2_0, x3_0)  |
        +-----+-----+-----+-------------------------+
        | x1_1| x2_0| x3_0|    P(x1_1, x2_0, x3_0)  |
        +-----+-----+-----+-------------------------+
        | x1_0| x2_1| x3_0|    P(x1_0, x2_1, x3_0)  |
        +-----+-----+-----+-------------------------+
        | x1_1| x2_1| x3_0|    P(x1_1, x2_1, x3_0)  |
        +-----+-----+-----+-------------------------+
        | x1_0| x2_0| x3_1|    P(x1_0, x2_0, x3_1)  |
        +-----+-----+-----+-------------------------+
        | x1_1| x2_0| x3_1|    P(x1_1, x2_0, x3_1)  |
        +-----+-----+-----+-------------------------+
        | x1_0| x2_1| x3_1|    P(x1_0, x2_1, x3_1)  |
        +-----+-----+-----+-------------------------+
        | x1_1| x2_1| x3_1|    P(x1_1, x2_1, x3_1)  |
        +-----+-----+-----+-------------------------+

        Parameters
        ----------
        variables: list
            List of scope of Joint Probability Distribution.
        cardinality: list, array_like
            List of cardinality of each variable
        value: list, array_like
            List or array of values of factor.
            A Joint Probability Distribution's values are stored in a row
            vector in the value using an ordering such that the left-most
            variables as defined in the variable field cycle through their
            values the fastest.

        Examples
        --------
        >>> import numpy as np
        >>> from pgmpy.factors.discrete import JointProbabilityDistribution
        >>> prob = JointProbabilityDistribution(['x1', 'x2', 'x3'], [2, 2, 2], np.ones(8)/8)
        >>> print(prob)
        x1    x2    x3      P(x1,x2,x3)
        ----  ----  ----  -------------
        x1_0  x2_0  x3_0         0.1250
        x1_0  x2_0  x3_1         0.1250
        x1_0  x2_1  x3_0         0.1250
        x1_0  x2_1  x3_1         0.1250
        x1_1  x2_0  x3_0         0.1250
        x1_1  x2_0  x3_1         0.1250
        x1_1  x2_1  x3_0         0.1250
        x1_1  x2_1  x3_1         0.1250
       """
        if np.isclose(np.sum(values), 1):
            super(JointProbabilityDistribution, self).__init__(variables, cardinality, values)
        else:
            raise ValueError("The probability values doesn't sum to 1.")

Example 17

View license
def plot_all(df, groups, subject=None, figsize=(11.69, 5),
             strip_nsubj=10, title='Summary report'):
    import matplotlib.gridspec as gridspec
    colnames = [v for gnames in groups for v in gnames]
    lengs = [len(el) for el in groups]
    ncols = np.sum(lengs)

    fig = plt.figure(figsize=figsize)
    gs = gridspec.GridSpec(1, len(groups), width_ratios=lengs)

    subjects = sorted(pd.unique(df.subject.ravel()))
    nsubj = len(subjects)
    subid = subject
    if subid is not None:
        try:
            subid = int(subid)
        except ValueError:
            pass

    axes = []
    for i, snames in enumerate(groups):
        axes.append(plt.subplot(gs[i]))

        if nsubj > strip_nsubj:
            sns.violinplot(data=df[snames], ax=axes[-1])
        else:
            stdf = df.copy()
            if subid is not None:
                stdf = stdf.loc[stdf['subject'] != subid]
            sns.stripplot(data=stdf[snames], ax=axes[-1], jitter=0.25)

        axes[-1].set_xticklabels(
            [el.get_text() for el in axes[-1].get_xticklabels()],
            rotation='vertical')
        plt.ticklabel_format(style='sci', axis='y', scilimits=(-1, 1))
        # df[snames].plot(kind='box', ax=axes[-1])

        # If we know the subject, place a star for each scan
        if subject is not None:
            subdf = df.loc[df['subject'] == subid]
            scans = sorted(pd.unique(subdf.scan.ravel()))
            nstars = len(scans)
            for j, s in enumerate(snames):
                vals = []
                for k, scid in enumerate(scans):
                    val = subdf.loc[df.scan == scid, [s]].iloc[0, 0]
                    vals.append(val)

                if len(vals) != nstars:
                    continue

                pos = [j]
                if nstars > 1:
                    pos = np.linspace(j-0.3, j+0.3, num=nstars)

                axes[-1].plot(
                    pos, vals, ms=9, mew=.8, linestyle='None',
                    color='w', marker='*', markeredgecolor='k',
                    zorder=10)

    fig.suptitle(title)
    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    plt.subplots_adjust(top=0.85)
    return fig

Example 18

View license
def pac_metric(solution, prediction, task=BINARY_CLASSIFICATION):
    """
    Probabilistic Accuracy based on log_loss metric.

    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.
    :param solution:
    :param prediction:
    :param task:
    :return:
    """
    if task == BINARY_CLASSIFICATION:
        if len(solution.shape) == 1:
            # Solution won't be touched - no copy
            solution = solution.reshape((-1, 1))
        elif len(solution.shape) == 2:
            if solution.shape[1] > 1:
                raise ValueError('Solution array must only contain one class '
                                 'label, but contains %d' % solution.shape[1])
        else:
            raise ValueError('Solution.shape %s' % solution.shape)
        solution = solution.copy()

        if len(prediction.shape) == 2:
            if prediction.shape[1] > 2:
                raise ValueError('A prediction array with probability values '
                                 'for %d classes is not a binary '
                                 'classification problem' % prediction.shape[1])
            # Prediction will be copied into a new binary array - no copy
            prediction = prediction[:, 1].reshape((-1, 1))
        else:
            raise ValueError('Invalid prediction shape %s' % prediction.shape)

    elif task == MULTICLASS_CLASSIFICATION:
        if len(solution.shape) == 1:
            solution = create_multiclass_solution(solution, prediction)
        elif len(solution.shape) == 2:
            if solution.shape[1] > 1:
                raise ValueError('Solution array must only contain one class '
                                 'label, but contains %d' % solution.shape[1])
            else:
                solution = create_multiclass_solution(solution.reshape((-1, 1)),
                                                      prediction)
        else:
            raise ValueError('Solution.shape %s' % solution.shape)
    elif task == MULTILABEL_CLASSIFICATION:
        solution = solution.copy()
    else:
        raise NotImplementedError('auc_metric does not support task type %s'
                                  % task)
    solution, prediction = normalize_array(solution, prediction.copy())

    [sample_num, label_num] = solution.shape
    if label_num == 1:
        task = BINARY_CLASSIFICATION
    eps = 1e-7
    # Compute the base log loss (using the prior probabilities)
    pos_num = 1. * np.sum(solution, axis=0, dtype=float)  # float conversion!
    frac_pos = pos_num / sample_num  # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    the_log_loss = log_loss(solution, prediction, task)

    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp
    # because it is an NL operation
    pac = np.mean(np.exp(-the_log_loss))
    base_pac = np.mean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score

Example 19

Project: auto-sklearn
Source File: base.py
View license
    @classmethod
    def _get_hyperparameter_search_space(cls, cs, dataset_properties, exclude,
                                         include, pipeline):
        if include is None:
            include = {}

        keys = [pair[0] for pair in pipeline]
        for key in include:
            if key not in keys:
                raise ValueError('Invalid key in include: %s; should be one '
                                 'of %s' % (key, keys))

        if exclude is None:
            exclude = {}

        keys = [pair[0] for pair in pipeline]
        for key in exclude:
            if key not in keys:
                raise ValueError('Invalid key in exclude: %s; should be one '
                                 'of %s' % (key, keys))

        if 'sparse' not in dataset_properties:
            # This dataset is probaby dense
            dataset_properties['sparse'] = False
        if 'signed' not in dataset_properties:
            # This dataset probably contains unsigned data
            dataset_properties['signed'] = False

        matches = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline, dataset_properties, include=include, exclude=exclude)

        # Now we have only legal combinations at this step of the pipeline
        # Simple sanity checks
        assert np.sum(matches) != 0, "No valid pipeline found."

        assert np.sum(matches) <= np.size(matches), \
            "'matches' is not binary; %s <= %d, %s" % \
            (str(np.sum(matches)), np.size(matches), str(matches.shape))

        # Iterate each dimension of the matches array (each step of the
        # pipeline) to see if we can add a hyperparameter for that step
        for node_idx, n_ in enumerate(pipeline):
            node_name, node = n_
            is_choice = hasattr(node, "get_available_components")

            # if the node isn't a choice we can add it immediately because it
            #  must be active (if it wouldn't, np.sum(matches) would be zero
            if not is_choice:
                cs.add_configuration_space(node_name,
                    node.get_hyperparameter_search_space(dataset_properties))
            # If the node isn't a choice, we have to figure out which of it's
            #  choices are actually legal choices
            else:
                choices_list = autosklearn.pipeline.create_searchspace_util.\
                    find_active_choices(matches, node, node_idx,
                                        dataset_properties,
                                        include.get(node_name),
                                        exclude.get(node_name))
                cs.add_configuration_space(node_name,
                    node.get_hyperparameter_search_space(
                        dataset_properties, include=choices_list))
        # And now add forbidden parameter configurations
        # According to matches
        if np.sum(matches) < np.size(matches):
            cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
                conf_space=cs, pipeline=pipeline, matches=matches,
                dataset_properties=dataset_properties, include=include,
                exclude=exclude)

        return cs

Example 20

View license
    def test_get_match_array_sparse_and_dense(self):
        # preproc is empty
        preprocessors = OrderedDict()
        preprocessors['pca'] = PCA
        classifiers = OrderedDict()
        classifiers['lda'] = LDA
        # Sparse + dense
        class Preprocessors(object):
            @classmethod
            def get_available_components(self, *args, **kwargs):
                return preprocessors

        class Classifiers(object):
            @classmethod
            def get_available_components(self, *args, **kwargs):
                return classifiers

        # Dense
        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, PCA), (1, LDA)), dataset_properties={'sparse': True})
        self.assertEqual(numpy.sum(m), 0)

        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, PCA), (1, LDA)), dataset_properties={'sparse': False})
        self.assertEqual(m, [[1]])

        # Sparse
        preprocessors['tSVD'] = TruncatedSVD
        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, LDA)),
            dataset_properties={'sparse': True})
        self.assertEqual(m[0], [0])  # pca
        self.assertEqual(m[1], [1])  # svd

        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, LDA)),
            dataset_properties={'sparse': False})
        self.assertEqual(m[0], [1])  # pca
        self.assertEqual(m[1], [0])  # svd

        preprocessors['none'] = NoPreprocessing
        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, LDA)),
            dataset_properties={'sparse': True})
        self.assertEqual(m[0, :], [0])  # pca
        self.assertEqual(m[1, :], [1])  # tsvd
        self.assertEqual(m[2, :], [0])  # none

        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, LDA)),
            dataset_properties={'sparse': False})
        self.assertEqual(m[0, :], [1])  # pca
        self.assertEqual(m[1, :], [0])  # tsvd
        self.assertEqual(m[2, :], [1])  # none

        classifiers['libsvm'] = LibLinear_SVC
        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, Classifiers)),
            dataset_properties={'sparse': False})
        self.assertListEqual(list(m[0, :]), [1, 1])  # pca
        self.assertListEqual(list(m[1, :]), [0, 0])  # tsvd
        self.assertListEqual(list(m[2, :]), [1, 1])  # none

        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, Classifiers)),
            dataset_properties={'sparse': True})
        self.assertListEqual(list(m[0, :]), [0, 0])  # pca
        self.assertListEqual(list(m[1, :]), [1, 1])  # tsvd
        self.assertListEqual(list(m[2, :]), [0, 1])  # none

        # Do fancy 3d stuff
        preprocessors['random_trees'] = RandomTreesEmbedding
        m = autosklearn.pipeline.create_searchspace_util.get_match_array(
            pipeline=((0, Preprocessors), (1, Preprocessors), (2, Classifiers)),
            dataset_properties={'sparse': False})
        # PCA followed by truncated SVD is forbidden
        self.assertEqual(list(m[0].flatten()), [1, 1, 0, 0, 1, 1, 0, 1])
        # Truncated SVD is forbidden
        self.assertEqual(list(m[1].flatten()), [0, 0, 0, 0, 0, 0, 0, 0])
        # Truncated SVD is forbidden after no_preprocessing
        self.assertEqual(list(m[2].flatten()), [1, 1, 0, 0, 1, 1, 0, 1])
        # PCA is forbidden, truncatedSVD allowed after random trees embedding
        # lda only allowed after truncatedSVD
        self.assertEqual(list(m[3].flatten()), [0, 0, 1, 1, 0, 1, 0, 1])

Example 21

Project: pycalphad
Source File: lower_convex_hull.py
View license
def lower_convex_hull(global_grid, result_array, verbose=False):
    """
    Find the simplices on the lower convex hull satisfying the specified
    conditions in the result array.

    Parameters
    ----------
    global_grid : Dataset
        A sample of the energy surface of the system.
    result_array : Dataset
        This object will be modified!
        Coordinates correspond to conditions axes.
    verbose : bool
        Display details to stdout. Useful for debugging.

    Returns
    -------
    None. Results are written to result_array.

    Notes
    -----
    This routine will not check if any simplex is degenerate.
    Degenerate simplices will manifest with duplicate or NaN indices.

    Examples
    --------
    None yet.
    """
    indep_conds = sorted([x for x in sorted(result_array.coords.keys()) if x in ['T', 'P']])
    comp_conds = sorted([x for x in sorted(result_array.coords.keys()) if x.startswith('X_')])
    pot_conds = sorted([x for x in sorted(result_array.coords.keys()) if x.startswith('MU_')])

    # Determine starting combinations of chemical potentials and compositions
    # TODO: Check Gibbs phase rule compliance

    if len(pot_conds) > 0:
        raise NotImplementedError('Chemical potential conditions are not yet supported')

    # FIRST CASE: Only composition conditions specified
    # We only need to compute the dependent composition value directly
    # Initialize trial points as lowest energy point in the system
    if (len(comp_conds) > 0) and (len(pot_conds) == 0):
        comp_values = cartesian([result_array.coords[cond] for cond in comp_conds])
        # Insert dependent composition value
        # TODO: Handle W(comp) as well as X(comp) here
        specified_components = set([x[2:] for x in comp_conds])
        dependent_component = set(result_array.coords['component'].values) - specified_components
        dependent_component = list(dependent_component)
        if len(dependent_component) != 1:
            raise ValueError('Number of dependent components is different from one')
        insert_idx = sorted(result_array.coords['component'].values).index(dependent_component[0])
        comp_values = np.concatenate((comp_values[..., :insert_idx],
                                      1 - np.sum(comp_values, keepdims=True, axis=-1),
                                      comp_values[..., insert_idx:]),
                                     axis=-1)
        # Prevent compositions near an edge from going negative
        comp_values[np.nonzero(comp_values < MIN_SITE_FRACTION)] = MIN_SITE_FRACTION*10
        # TODO: Assumes N=1
        comp_values /= comp_values.sum(axis=-1, keepdims=True)
        #print(comp_values)

    # SECOND CASE: Only chemical potential conditions specified
    # TODO: Implementation of chemical potential

    # THIRD CASE: Mixture of composition and chemical potential conditions
    # TODO: Implementation of mixed conditions

    # factored out via profiling
    result_array_GM_values = result_array.GM.values
    result_array_points_values = result_array.points.values
    result_array_MU_values = result_array.MU.values
    result_array_NP_values = result_array.NP.values
    result_array_X_values = result_array.X.values
    result_array_Y_values = result_array.Y.values
    result_array_Phase_values = result_array.Phase.values
    global_grid_GM_values = global_grid.GM.values
    global_grid_X_values = global_grid.X.values

    it = np.nditer(result_array_GM_values, flags=['multi_index'])
    comp_coord_shape = tuple(len(result_array.coords[cond]) for cond in comp_conds)
    while not it.finished:
        indep_idx = it.multi_index[:len(indep_conds)]
        comp_idx = np.ravel_multi_index(it.multi_index[len(indep_conds):], comp_coord_shape)
        idx_global_grid_X_values = global_grid_X_values[indep_idx]
        idx_global_grid_GM_values = global_grid_GM_values[indep_idx]
        idx_comp_values = comp_values[comp_idx]
        idx_result_array_MU_values = result_array_MU_values[it.multi_index]
        idx_result_array_NP_values = result_array_NP_values[it.multi_index]
        idx_result_array_GM_values = result_array_GM_values[it.multi_index]
        idx_result_array_points_values = result_array_points_values[it.multi_index]
        result_array_GM_values[it.multi_index] = \
            hyperplane(idx_global_grid_X_values, idx_global_grid_GM_values,
                       idx_comp_values, idx_result_array_MU_values,
                       idx_result_array_NP_values, idx_result_array_points_values)
        # Copy phase values out
        points = result_array_points_values[it.multi_index]
        result_array_Phase_values[it.multi_index] = global_grid.Phase.values[indep_idx].take(points, axis=0)
        result_array_X_values[it.multi_index] = global_grid.X.values[indep_idx].take(points, axis=0)
        result_array_Y_values[it.multi_index] = global_grid.Y.values[indep_idx].take(points, axis=0)
        # Special case: Sometimes fictitious points slip into the result
        # This can happen when we calculate stoichimetric phases by themselves
        if '_FAKE_' in result_array_Phase_values[it.multi_index]:
            # Chemical potentials are meaningless in this case
            idx_result_array_MU_values[...] = 0
            new_energy = 0.
            molesum = 0.
            for idx in range(len(result_array_Phase_values[it.multi_index])):
                midx = it.multi_index + (idx,)
                if result_array_Phase_values[midx] == '_FAKE_':
                    result_array_Phase_values[midx] = ''
                    result_array_X_values[midx] = np.nan
                    result_array_Y_values[midx] = np.nan
                    idx_result_array_NP_values[idx] = np.nan
                else:
                    new_energy += idx_result_array_NP_values[idx] * global_grid.GM.values[np.index_exp[indep_idx + (points[idx],)]]
                    molesum += idx_result_array_NP_values[idx]
            result_array_GM_values[it.multi_index] = new_energy / molesum
        it.iternext()
    del result_array['points']
    return result_array

Example 22

Project: pycollada
Source File: polylist.py
View license
    def __init__(self, sources, material, index, vcounts, xmlnode=None):
        """A Polylist should not be created manually. Instead, call the
        :meth:`collada.geometry.Geometry.createPolylist` method after
        creating a geometry instance.
        """

        if len(sources) == 0: raise DaeIncompleteError('A polylist set needs at least one input for vertex positions')
        if not 'VERTEX' in sources: raise DaeIncompleteError('Polylist requires vertex input')

        #find max offset
        max_offset = max([ max([input[0] for input in input_type_array])
                          for input_type_array in sources.values() if len(input_type_array) > 0])

        self.material = material
        self.index = index
        self.indices = self.index
        self.nindices = max_offset + 1
        self.vcounts = vcounts
        self.sources = sources
        self.index.shape = (-1, self.nindices)
        self.npolygons = len(self.vcounts)
        self.nvertices = numpy.sum(self.vcounts) if len(self.index) > 0 else 0
        self.polyends = numpy.cumsum(self.vcounts)
        self.polystarts = self.polyends - self.vcounts
        self.polyindex = numpy.dstack((self.polystarts, self.polyends))[0]

        if len(self.index) > 0:
            self._vertex = sources['VERTEX'][0][4].data
            self._vertex_index = self.index[:,sources['VERTEX'][0][0]]
            self.maxvertexindex = numpy.max( self._vertex_index )
            checkSource(sources['VERTEX'][0][4], ('X', 'Y', 'Z'), self.maxvertexindex)
        else:
            self._vertex = None
            self._vertex_index = None
            self.maxvertexindex = -1

        if 'NORMAL' in sources and len(sources['NORMAL']) > 0 and len(self.index) > 0:
            self._normal = sources['NORMAL'][0][4].data
            self._normal_index = self.index[:,sources['NORMAL'][0][0]]
            self.maxnormalindex = numpy.max( self._normal_index )
            checkSource(sources['NORMAL'][0][4], ('X', 'Y', 'Z'), self.maxnormalindex)
        else:
            self._normal = None
            self._normal_index = None
            self.maxnormalindex = -1

        if 'TEXCOORD' in sources and len(sources['TEXCOORD']) > 0 \
                and len(self.index) > 0:
            self._texcoordset = tuple([texinput[4].data
                for texinput in sources['TEXCOORD']])
            self._texcoord_indexset = tuple([ self.index[:,sources['TEXCOORD'][i][0]]
                for i in xrange(len(sources['TEXCOORD'])) ])
            self.maxtexcoordsetindex = [numpy.max(each)
                for each in self._texcoord_indexset]
            for i, texinput in enumerate(sources['TEXCOORD']):
                checkSource(texinput[4], ('S', 'T'), self.maxtexcoordsetindex[i])
        else:
            self._texcoordset = tuple()
            self._texcoord_indexset = tuple()
            self.maxtexcoordsetindex = -1

        if xmlnode is not None:
            self.xmlnode = xmlnode
            """ElementTree representation of the line set."""
        else:
            txtindices = ' '.join(map(str, self.indices.flatten().tolist()))
            acclen = len(self.indices)

            self.xmlnode = E.polylist(count=str(self.npolygons),
                    material=self.material)

            all_inputs = []
            for semantic_list in self.sources.values():
                all_inputs.extend(semantic_list)
            for offset, semantic, sourceid, set, src in all_inputs:
                inpnode = E.input(offset=str(offset), semantic=semantic,
                        source=sourceid)
                if set is not None:
                    inpnode.set('set', str(set))
                self.xmlnode.append(inpnode)

            vcountnode = E.vcount(' '.join(map(str, self.vcounts)))
            self.xmlnode.append(vcountnode)
            self.xmlnode.append(E.p(txtindices))

Example 23

Project: numexpr
Source File: test_numexpr.py
View license
    def test_reductions(self):
        # Check that they compile OK.
        assert_equal(disassemble(
            NumExpr("sum(x**2+2, axis=None)", [('x', double)])),
                     [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'),
                      (b'add_ddd', b't3', b't3', b'c2[2.0]'),
                      (b'sum_ddn', b'r0', b't3', None)])
        assert_equal(disassemble(
            NumExpr("sum(x**2+2, axis=1)", [('x', double)])),
                     [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'),
                      (b'add_ddd', b't3', b't3', b'c2[2.0]'),
                      (b'sum_ddn', b'r0', b't3', 1)])
        assert_equal(disassemble(
            NumExpr("prod(x**2+2, axis=2)", [('x', double)])),
                     [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'),
                      (b'add_ddd', b't3', b't3', b'c2[2.0]'),
                      (b'prod_ddn', b'r0', b't3', 2)])
        # Check that full reductions work.
        x = zeros(100000) + .01  # checks issue #41
        assert_allclose(evaluate("sum(x+2,axis=None)"), sum(x + 2, axis=None))
        assert_allclose(evaluate("sum(x+2,axis=0)"), sum(x + 2, axis=0))
        assert_allclose(evaluate("prod(x,axis=0)"), prod(x, axis=0))
        assert_allclose(evaluate("min(x)"), np.min(x))
        assert_allclose(evaluate("max(x,axis=0)"), np.max(x, axis=0))

        x = arange(10.0)
        assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0))

        x = arange(100.0)
        assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0))
        assert_allclose(evaluate("min(x-1,axis=0)"), np.min(x - 1, axis=0))
        assert_allclose(evaluate("max(x-1,axis=0)"), np.max(x - 1, axis=0))
        x = linspace(0.1, 1.0, 2000)
        assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0))
        assert_allclose(evaluate("min(x-1,axis=0)"), np.min(x - 1, axis=0))
        assert_allclose(evaluate("max(x-1,axis=0)"), np.max(x - 1, axis=0))

        # Check that reductions along an axis work
        y = arange(9.0).reshape(3, 3)
        assert_allclose(evaluate("sum(y**2, axis=1)"), sum(y ** 2, axis=1))
        assert_allclose(evaluate("sum(y**2, axis=0)"), sum(y ** 2, axis=0))
        assert_allclose(evaluate("sum(y**2, axis=None)"), sum(y ** 2, axis=None))
        assert_allclose(evaluate("prod(y**2, axis=1)"), prod(y ** 2, axis=1))
        assert_allclose(evaluate("prod(y**2, axis=0)"), prod(y ** 2, axis=0))
        assert_allclose(evaluate("prod(y**2, axis=None)"), prod(y ** 2, axis=None))
        assert_allclose(evaluate("min(y**2, axis=1)"), np.min(y ** 2, axis=1))
        assert_allclose(evaluate("min(y**2, axis=0)"), np.min(y ** 2, axis=0))
        assert_allclose(evaluate("min(y**2, axis=None)"), np.min(y ** 2, axis=None))
        assert_allclose(evaluate("max(y**2, axis=1)"), np.max(y ** 2, axis=1))
        assert_allclose(evaluate("max(y**2, axis=0)"), np.max(y ** 2, axis=0))
        assert_allclose(evaluate("max(y**2, axis=None)"), np.max(y ** 2, axis=None))
        # Check integers
        x = arange(10.)
        x = x.astype(int)
        assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0))
        # Check longs
        x = x.astype(long)
        assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0))
        # Check complex
        x = x + .1j
        assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0))
        assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0))

Example 24

Project: pymanopt
Source File: test_tensorflow.py
View license
    def setUp(self):
        n1 = self.n1 = 3
        n2 = self.n2 = 4
        n3 = self.n3 = 5
        n4 = self.n4 = 6
        n5 = self.n5 = 7
        n6 = self.n6 = 8

        x = tf.Variable(tf.zeros([n1]))
        y = tf.Variable(tf.zeros([n2, n3]))
        z = tf.Variable(tf.zeros([n4, n5, n6]))
        f = (tf.exp(tf.reduce_sum(x**2)) + tf.exp(tf.reduce_sum(y**2)) +
             tf.exp(tf.reduce_sum(z**2)))

        self.cost = f
        self.arg = [x, y, z]

        self.y = y = (rnd.randn(n1).astype(float32) * 1e-3,
                      rnd.randn(n2, n3).astype(float32) * 1e-3,
                      rnd.randn(n4, n5, n6).astype(float32) * 1e-3)
        self.a = a = (rnd.randn(n1).astype(float32) * 1e-3,
                      rnd.randn(n2, n3).astype(float32) * 1e-3,
                      rnd.randn(n4, n5, n6).astype(float32) * 1e-3)

        self.correct_cost = (np.exp(np.sum(y[0]**2)) +
                             np.exp(np.sum(y[1]**2)) +
                             np.exp(np.sum(y[2]**2)))

        # CALCULATE CORRECT GRAD
        g1 = 2 * y[0] * np.exp(np.sum(y[0] ** 2))
        g2 = 2 * y[1] * np.exp(np.sum(y[1] ** 2))
        g3 = 2 * y[2] * np.exp(np.sum(y[2] ** 2))

        self.correct_grad = (g1, g2, g3)

        # CALCULATE CORRECT HESS
        # 1. VECTOR
        Ymat = np.matrix(y[0])
        Amat = np.matrix(a[0])

        diag = np.eye(n1)

        H = np.exp(np.sum(y[0] ** 2)) * (4 * Ymat.T.dot(Ymat) + 2 * diag)

        # Then 'left multiply' H by A
        h1 = np.array(Amat.dot(H)).flatten()

        # 2. MATRIX
        # First form hessian tensor H (4th order)
        Y1 = y[1].reshape(n2, n3, 1, 1)
        Y2 = y[1].reshape(1, 1, n2, n3)

        # Create an m x n x m x n array with diag[i,j,k,l] == 1 iff
        # (i == k and j == l), this is a 'diagonal' tensor.
        diag = np.eye(n2 * n3).reshape(n2, n3, n2, n3)

        H = np.exp(np.sum(y[1] ** 2)) * (4 * Y1 * Y2 + 2 * diag)

        # Then 'right multiply' H by A
        Atensor = a[1].reshape(1, 1, n2, n3)

        h2 = np.sum(H * Atensor, axis=(2, 3))

        # 3. Tensor3
        # First form hessian tensor H (6th order)
        Y1 = y[2].reshape(n4, n5, n6, 1, 1, 1)
        Y2 = y[2].reshape(1, 1, 1, n4, n5, n6)

        # Create an n1 x n2 x n3 x n1 x n2 x n3 diagonal tensor
        diag = np.eye(n4 * n5 * n6).reshape(n4, n5, n6, n4, n5, n6)

        H = np.exp(np.sum(y[2] ** 2)) * (4 * Y1 * Y2 + 2 * diag)

        # Then 'right multiply' H by A
        Atensor = a[2].reshape(1, 1, 1, n4, n5, n6)

        h3 = np.sum(H * Atensor, axis=(3, 4, 5))

        self.correct_hess = (h1, h2, h3)
        self.backend = TensorflowBackend()

Example 25

Project: pymanopt
Source File: test_theano.py
View license
    def setUp(self):
        x = T.vector()
        y = T.matrix()
        z = T.tensor3()
        f = T.exp(T.sum(x**2)) + T.exp(T.sum(y**2)) + T.exp(T.sum(z**2))

        self.cost = f
        self.arg = [x, y, z]

        n1 = self.n1 = 3
        n2 = self.n2 = 4
        n3 = self.n3 = 5
        n4 = self.n4 = 6
        n5 = self.n5 = 7
        n6 = self.n6 = 8

        self.y = y = (rnd.randn(n1), rnd.randn(n2, n3), rnd.randn(n4, n5, n6))
        self.a = a = (rnd.randn(n1), rnd.randn(n2, n3), rnd.randn(n4, n5, n6))

        self.correct_cost = (np.exp(np.sum(y[0]**2)) +
                             np.exp(np.sum(y[1]**2)) +
                             np.exp(np.sum(y[2]**2)))

        # CALCULATE CORRECT GRAD
        g1 = 2 * y[0] * np.exp(np.sum(y[0] ** 2))
        g2 = 2 * y[1] * np.exp(np.sum(y[1] ** 2))
        g3 = 2 * y[2] * np.exp(np.sum(y[2] ** 2))

        self.correct_grad = (g1, g2, g3)

        # CALCULATE CORRECT HESS
        # 1. VECTOR
        Ymat = np.matrix(y[0])
        Amat = np.matrix(a[0])

        diag = np.eye(n1)

        H = np.exp(np.sum(y[0] ** 2)) * (4 * Ymat.T.dot(Ymat) + 2 * diag)

        # Then 'left multiply' H by A
        h1 = np.array(Amat.dot(H)).flatten()

        # 2. MATRIX
        # First form hessian tensor H (4th order)
        Y1 = y[1].reshape(n2, n3, 1, 1)
        Y2 = y[1].reshape(1, 1, n2, n3)

        # Create an m x n x m x n array with diag[i,j,k,l] == 1 iff
        # (i == k and j == l), this is a 'diagonal' tensor.
        diag = np.eye(n2 * n3).reshape(n2, n3, n2, n3)

        H = np.exp(np.sum(y[1] ** 2)) * (4 * Y1 * Y2 + 2 * diag)

        # Then 'right multiply' H by A
        Atensor = a[1].reshape(1, 1, n2, n3)

        h2 = np.sum(H * Atensor, axis=(2, 3))

        # 3. Tensor3
        # First form hessian tensor H (6th order)
        Y1 = y[2].reshape(n4, n5, n6, 1, 1, 1)
        Y2 = y[2].reshape(1, 1, 1, n4, n5, n6)

        # Create an n1 x n2 x n3 x n1 x n2 x n3 diagonal tensor
        diag = np.eye(n4 * n5 * n6).reshape(n4, n5, n6, n4, n5, n6)

        H = np.exp(np.sum(y[2] ** 2)) * (4 * Y1 * Y2 + 2 * diag)

        # Then 'right multiply' H by A
        Atensor = a[2].reshape(1, 1, 1, n4, n5, n6)

        h3 = np.sum(H * Atensor, axis=(3, 4, 5))

        self.correct_hess = (h1, h2, h3)
        self.backend = TheanoBackend()

Example 26

Project: VASPy
Source File: electro.py
View license
    def plotsum(self, xcol, ycols, fill=True,
                show_dbc=True, show_fermi=True):
        '''
        绘制多列加合的图像.

        Parameter
        ---------
        xcol: int
            column number of data for x values
        ycols: tuple of int
            column numbers of data for y values
            (start, stop[, step])
        Example:
        >>> a.plotsum(0, (1, 3))
        >>> a.plotsum(0, (5, 10, 2))
        '''
        x = self.data[:, xcol]
        if len(ycols) == 2:
            start, stop = ycols
            step = 1
        else:
            start, stop, step = ycols
        ys = self.data[:, start:stop:step]
        y = np.sum(ys, axis=1)
        ymax = np.max(y)
        #plot
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(x, y, linewidth=5, color='#104E8B')
        #plot fermi energy auxiliary line
        if show_fermi:
            #Fermi verical line
            xfermi = np.array([0.0]*50)
            yfermi = np.linspace(0, int(ymax+1), 50)
            ax.plot(xfermi, yfermi, linestyle='dashed',
                    color='#4A708B', linewidth=3)
        # fill area from minus infinit to 0
        if fill:
            minus_x = np.array([i for i in x if i <= 0])
            minus_y = y[: len(minus_x)]
            ax.fill_between(minus_x, minus_y, facecolor='#B9D3EE',
                            interpolate=True)
        # show d band center line
        if show_dbc:
            dbc = self.get_dband_center()
            x_dbc = np.array([dbc]*50)
            y_dbc = np.linspace(0, int(ymax+1), 50)
            ax.plot(x_dbc, y_dbc, linestyle='dashed',
                    color='#C67171', linewidth=3)

        ax.set_xlabel(r'$\bf{E - E_F(eV)}$', fontdict={'fontsize': 20})
        ax.set_ylabel(r'$\bf{pDOS(arb. unit)}$', fontdict={'fontsize': 20})
        fig.show()

        return

Example 27

Project: category2vec
Source File: sent2vec.py
View license
    def train(self, sentences, total_words=None, word_count=0, sent_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of unicode strings.

        """
        logger.info("training model with %i workers on %i sentences and %i features, "
                    "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s" %
                    (self.workers, self.sents_len, self.layer1_size, self.sg, self.hs, self.sample, self.negative))

        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), [1.0]
        word_count = [word_count]
        sent_count = [sent_count]
        total_words = total_words or sum(v.count * v.sample_probability for v in itervalues(self.vocab))
        total_sents = self.total_sents #it's now different from self.sents_len
        jobs = Queue(maxsize=2 * self.workers)  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock()  # for shared state (=number of words trained so far, log reports...)

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)  # each thread must have its own work memory
            neu1 = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                if self.update_mode == 0:
                    alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                else:
                    alpha = self.alpha
                job_words = sum(train_sent_vec(self, self.sents[sent_no], sentence, alpha, work, neu1, self.sents_grad[sent_no])
                                for sent_no, sentence in job)
                with lock:
                    word_count[0] += job_words
                    sent_count[0] += chunksize
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% sents, alpha %.05f, %.0f words/s" %
                                    (100.0 * sent_count[0] / total_sents, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        def prepare_sentences():
            for sent_tuple in sentences:
                sentence = sent_tuple[0]
                sent_id  = sent_tuple[1]
                sent_no = self.sent_no_hash[sent_id]
                sampled = [self.vocab.get(word, None) for word in sentence
                           if word in self.vocab and (self.vocab[word].sample_probability >= 1.0 or self.vocab[word].sample_probability >= random.random_sample())]
                yield (sent_no, sampled)

        # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue
        for job_no, job in enumerate(utils.grouper(prepare_sentences(), chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
                    (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0))

        return word_count[0]

Example 28

Project: category2vec
Source File: word2vec.py
View license
    def train(self, sentences, total_words=None, word_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of unicode strings.

        """
        if FAST_VERSION < 0:
            import warnings
            warnings.warn("Cython compilation failed, training will be slow. Do you have Cython installed? `pip install cython`")
        logger.info("training model with %i workers on %i vocabulary and %i features, "
            "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s and optimization=%i" %
                    (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative,FAST_VERSION))

        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), [1.0]
        word_count = [word_count]
        total_words = total_words or int(sum(v.count * v.sample_probability for v in itervalues(self.vocab)))
        jobs = Queue(maxsize=2 * self.workers)  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock()  # for shared state (=number of words trained so far, log reports...)

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = zeros(self.layer1_size, dtype=REAL)  # each thread must have its own work memory
            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                if self.sg:
                    job_words = sum(train_sentence_sg(self, sentence, alpha, work) for sentence in job)
                else:
                    job_words = sum(train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job)
                with lock:
                    word_count[0] += job_words
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
                            (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        def prepare_sentences():
            for sentence in sentences:
                # avoid calling random_sample() where prob >= 1, to speed things up a little:
                sampled = [self.vocab[word] for word in sentence
                    if word in self.vocab and (self.vocab[word].sample_probability >= 1.0 or self.vocab[word].sample_probability >= random.random_sample())]
                yield sampled

        # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue
        for job_no, job in enumerate(utils.grouper(prepare_sentences(), chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
            (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0))

        return word_count[0]

Example 29

Project: cortex
Source File: __init__.py
View license
def make_datasets(C, split=[0.7, 0.2, 0.1], idx=None,
                  train_batch_size=None,
                  valid_batch_size=None,
                  test_batch_size=None,
                  **dataset_args):
    '''Constructs train/valid/test datasets with idx or split.

    If idx is None, use split ratios to create indices.

    Arguments:
        C (Dataset).
        split (Optional[list]: Split ratios over total.
        idx (Optional[list]: Indices for train/valid/test datasets.
        train_batch_size (Optional[int])
        valid_batch_size (Optional[int])
        test_batch_size (Optional[int])

    Returns:
        Dataset: train dataset
        Dataset: valid dataset
        Dataset: test dataset
        list: Indices for if split is created.

    '''
    if idx is None:
        assert split is not None
        if round(np.sum(split), 5) != 1. or len(split) != 3:
            raise ValueError(split)
        dummy = C(batch_size=1, **dataset_args)
        N = dummy.X.shape[0]
        idx = range(N)
        random.shuffle(idx)
        split_idx = []
        accum = 0
        for s in split:
            s_i = int(s * N + accum)
            split_idx.append(s_i)
            accum += s_i

        train_idx = idx[:split_idx[0]]
        valid_idx = idx[split_idx[0]:split_idx[1]]
        test_idx = idx[split_idx[1]:]
        idx = [train_idx, valid_idx, test_idx]
    if train_batch_size is not None and len(train_idx) > 0:
        train = C(idx=idx[0], batch_size=train_batch_size, mode='train',
                  **dataset_args)
    else:
        train = None
    if valid_batch_size is not None and len(valid_idx) > 0:
        valid = C(idx=idx[1], batch_size=valid_batch_size, mode='valid',
                  **dataset_args)
    else:
        valid = None
    if test_batch_size is not None and len(test_idx) > 0:
        test = C(idx=idx[2], batch_size=test_batch_size, mode='test',
                 **dataset_args)
    else:
        test = None

    return train, valid, test, idx

Example 30

Project: pandashells
Source File: lomb_scargle_lib.py
View license
def lomb_scargle(df, time_col, val_col, interp_exponent=0, freq_order=False):
    """
    :type df: pandas.DataFrame
    :param df: An input dataframe

    :type time_col: str
    :param time_col: The column of the dataframe holding the timestamps

    :type val_col: str
    :param val_col: The column of the dataframe holding the observations

    :type interp_exp: int
    :param interp_exp: Interpolate the spectrum by this power of two

    :type freq_order: bool
    :param freq_order: If set to True spectrum is returned in frequency order
                       instead of period order (default=False)

    :rtype: Pandas DataFrame
    :returns: A dataframe with columns: period, freq, power, amplitude
    """
    # do imports here to avoid loading plot libraries when this
    # module is loaded in __init__.py
    # which then doesn't allow for doing matplotlib.use() later
    import gatspy

    # only care about timestamped values
    df = df[[time_col, val_col]].dropna()

    # standardize column names, remove mean from values, and sort by time
    warnings.filterwarnings('ignore')  # TODO: Update this filter when pandas removes support for sort_index
    df = df.rename(columns={time_col: 't', val_col: 'y'}).sort_index(by=['t'])
    warnings.resetwarnings()

    df['y'] = df['y'] - df.y.mean()

    #  compute total energy in the time series
    E_in = np.sum((df.y * df.y))

    # appropriately zero-pad the timeseries before taking spectrum
    pre_pad_length = len(df)
    t_pad, y_pad = _compute_pad(df.t.values, interp_exponent=interp_exponent)
    if len(t_pad) > 0:
        df = df.append(
            pd.DataFrame({'t': t_pad, 'y': y_pad}), ignore_index=True)

    # fit the lombs scargle model to the time series
    model = gatspy.periodic.LombScargleFast()
    model.fit(df.t.values, df.y.values, 1)

    # compute params for getting results out of lomb scargle fit
    f0, df, N = _compute_params(df.t.values)
    f = f0 + df * np.arange(N)
    p = 1. / f

    # retrieve the lomb scarge fit and normalize for power / amplitude
    yf = model.score_frequency_grid(f0, df, N)
    yf_power = 2 * yf * E_in * len(yf) / float(pre_pad_length) ** 2
    yf_amp = np.sqrt(yf_power)

    # generate the output dataframe
    df = pd.DataFrame(
        {'freq': f, 'period': p, 'power': yf_power, 'amp': yf_amp}
    )[['period', 'freq', 'power', 'amp']]

    # order by period if desired
    if not freq_order:
        warnings.filterwarnings('ignore')  # TODO: Update this filter when pandas removes support for sort_index
        df = df.sort_index(by='period')
        warnings.resetwarnings()

    return df

Example 31

Project: discomll
Source File: naivebayes.py
View license
def reduce_fit(interface, state, label, inp):
    """
    Function separates aggregation of continuous and discrete features.
    For continuous features it aggregates partially calculated means and variances and returns them. For discrete
    features it aggregates pairs and returns them. Pairs with label occurrences are used to calculate prior probabilities
    """
    from disco.util import kvgroup  # function for grouping values by key
    import numpy as np

    out = interface.output(0)  # all outputted pairs have the same output label

    # model of naive Bayes stores label names, sum of all label occurrences and pairs
    # (feature index, feature values) for discrete features which are needed to optimize predict phase.
    fit_model = {"y_labels": [], "y_sum": 0, "iv": set()}
    combiner = {}  # combiner maintains correct order of means and variances.
    means, variances = [], []
    k_prev = ""

    for key, value in kvgroup(inp):  # input pairs are sorted and grouped by key
        k_split = key.split(state["delimiter"])  # pair is split

        if len(k_split) == 3:  # discrete features
            # store pair (feature index, feature value)
            fit_model["iv"].add(tuple(k_split[1:]))
            # aggregate and output occurrences of a pair
            out.add(tuple(k_split), sum(value))

        elif len(k_split) == 2:  # continuous features

            # if label is different than previous.
            # This enables calculation of all variances and means for every feature for current label.
            if k_split[0] != k_prev and k_prev != "":
                mean, var = zip(*[combiner[key] for key in sorted(combiner.keys())])
                means.append(mean)
                variances.append(var)

            # number of elements, partial mean, partial variance.
            n_a = mean_a = var_a = 0
            # code aggregates partially calculated means and variances
            for n_b, mean_b, var_b in value:
                n_ab = n_a + n_b
                var_a = ((n_a * var_a + n_b * var_b) / float(n_ab)) + (
                    n_a * n_b * ((mean_b - mean_a) / float(n_ab)) ** 2)
                mean_a = (n_a * mean_a + n_b * mean_b) / float(n_ab)
                n_a = n_ab

            # maintains correct order of statistics for every feature
            combiner[int(k_split[1])] = (mean_a, var_a + 1e-9)
            k_prev = k_split[0]

        else:  # aggregates label occurrences
            fit_model[key] = np.sum(value)
            fit_model["y_sum"] += fit_model[key]  # sum of all label occurrences
            fit_model["y_labels"].append(key)

    # if statistics for continuous features were not output in last iteration
    if len(means) > 0:
        mean, var = zip(*[combiner[key] for key in sorted(combiner.keys())])
        out.add("mean", np.array(means + [mean], dtype=np.float32))
        variances = np.array(variances + [var], dtype=np.float32)
        out.add("var", variances)
        out.add("var_log", np.log(np.pi * variances))

    # calculation of prior probabilities
    prior = [fit_model[y_label] / float(fit_model["y_sum"]) for y_label in fit_model["y_labels"]]
    out.add("prior", np.array(prior, dtype=np.float32))
    out.add("prior_log", np.log(prior))
    out.add("iv", list(fit_model["iv"]))
    out.add("y_labels", fit_model["y_labels"])

Example 32

View license
def active_contour(image, snake, alpha=0.01, beta=0.1,
                   w_line=0, w_edge=1, gamma=0.01,
                   bc='periodic', max_px_move=1.0,
                   max_iterations=2500, convergence=0.1):
    """Active contour model.

    Active contours by fitting snakes to features of images. Supports single
    and multichannel 2D images. Snakes can be periodic (for segmentation) or
    have fixed and/or free ends.

    Parameters
    ----------
    image : (N, M) or (N, M, 3) ndarray
        Input image.
    snake : (N, 2) ndarray
        Initialisation coordinates of snake. For periodic snakes, it should
        not include duplicate endpoints.
    alpha : float, optional
        Snake length shape parameter. Higher values makes snake contract
        faster.
    beta : float, optional
        Snake smoothness shape parameter. Higher values makes snake smoother.
    w_line : float, optional
        Controls attraction to brightness. Use negative values to attract to
        dark regions.
    w_edge : float, optional
        Controls attraction to edges. Use negative values to repel snake from
        edges.
    gamma : float, optional
        Explicit time stepping parameter.
    bc : {'periodic', 'free', 'fixed'}, optional
        Boundary conditions for worm. 'periodic' attaches the two ends of the
        snake, 'fixed' holds the end-points in place, and'free' allows free
        movement of the ends. 'fixed' and 'free' can be combined by parsing
        'fixed-free', 'free-fixed'. Parsing 'fixed-fixed' or 'free-free'
        yields same behaviour as 'fixed' and 'free', respectively.
    max_px_move : float, optional
        Maximum pixel distance to move per iteration.
    max_iterations : int, optional
        Maximum iterations to optimize snake shape.
    convergence: float, optional
        Convergence criteria.

    Returns
    -------
    snake : (N, 2) ndarray
        Optimised snake, same shape as input parameter.

    References
    ----------
    .. [1]  Kass, M.; Witkin, A.; Terzopoulos, D. "Snakes: Active contour
            models". International Journal of Computer Vision 1 (4): 321
            (1988).

    Examples
    --------
    >>> from skimage.draw import circle_perimeter
    >>> from skimage.filters import gaussian

    Create and smooth image:

    >>> img = np.zeros((100, 100))
    >>> rr, cc = circle_perimeter(35, 45, 25)
    >>> img[rr, cc] = 1
    >>> img = gaussian(img, 2)

    Initiliaze spline:

    >>> s = np.linspace(0, 2*np.pi,100)
    >>> init = 50*np.array([np.cos(s), np.sin(s)]).T+50

    Fit spline to image:

    >>> snake = active_contour(img, init, w_edge=0, w_line=1) #doctest: +SKIP
    >>> dist = np.sqrt((45-snake[:, 0])**2 +(35-snake[:, 1])**2) #doctest: +SKIP
    >>> int(np.mean(dist)) #doctest: +SKIP
    25

    """
    split_version = scipy.__version__.split('.')
    if not(split_version[-1].isdigit()):
        split_version.pop()
    scipy_version = list(map(int, split_version))
    new_scipy = scipy_version[0] > 0 or \
                (scipy_version[0] == 0 and scipy_version[1] >= 14)
    if not new_scipy:
        raise NotImplementedError('You are using an old version of scipy. '
                      'Active contours is implemented for scipy versions '
                      '0.14.0 and above.')

    max_iterations = int(max_iterations)
    if max_iterations <= 0:
        raise ValueError("max_iterations should be >0.")
    convergence_order = 10
    valid_bcs = ['periodic', 'free', 'fixed', 'free-fixed',
                 'fixed-free', 'fixed-fixed', 'free-free']
    if bc not in valid_bcs:
        raise ValueError("Invalid boundary condition.\n" +
                         "Should be one of: "+", ".join(valid_bcs)+'.')
    img = img_as_float(image)
    RGB = img.ndim == 3

    # Find edges using sobel:
    if w_edge != 0:
        if RGB:
            edge = [sobel(img[:, :, 0]), sobel(img[:, :, 1]),
                    sobel(img[:, :, 2])]
        else:
            edge = [sobel(img)]
        for i in range(3 if RGB else 1):
            edge[i][0, :] = edge[i][1, :]
            edge[i][-1, :] = edge[i][-2, :]
            edge[i][:, 0] = edge[i][:, 1]
            edge[i][:, -1] = edge[i][:, -2]
    else:
        edge = [0]

    # Superimpose intensity and edge images:
    if RGB:
        img = w_line*np.sum(img, axis=2) \
            + w_edge*sum(edge)
    else:
        img = w_line*img + w_edge*edge[0]

    # Interpolate for smoothness:
    if new_scipy:
        intp = RectBivariateSpline(np.arange(img.shape[1]),
                                   np.arange(img.shape[0]),
                                   img.T, kx=2, ky=2, s=0)
    else:
        intp = np.vectorize(interp2d(np.arange(img.shape[1]),
                        np.arange(img.shape[0]), img, kind='cubic',
                        copy=False, bounds_error=False, fill_value=0))

    x, y = snake[:, 0].astype(np.float), snake[:, 1].astype(np.float)
    xsave = np.empty((convergence_order, len(x)))
    ysave = np.empty((convergence_order, len(x)))

    # Build snake shape matrix for Euler equation
    n = len(x)
    a = np.roll(np.eye(n), -1, axis=0) + \
        np.roll(np.eye(n), -1, axis=1) - \
        2*np.eye(n)  # second order derivative, central difference
    b = np.roll(np.eye(n), -2, axis=0) + \
        np.roll(np.eye(n), -2, axis=1) - \
        4*np.roll(np.eye(n), -1, axis=0) - \
        4*np.roll(np.eye(n), -1, axis=1) + \
        6*np.eye(n)  # fourth order derivative, central difference
    A = -alpha*a + beta*b

    # Impose boundary conditions different from periodic:
    sfixed = False
    if bc.startswith('fixed'):
        A[0, :] = 0
        A[1, :] = 0
        A[1, :3] = [1, -2, 1]
        sfixed = True
    efixed = False
    if bc.endswith('fixed'):
        A[-1, :] = 0
        A[-2, :] = 0
        A[-2, -3:] = [1, -2, 1]
        efixed = True
    sfree = False
    if bc.startswith('free'):
        A[0, :] = 0
        A[0, :3] = [1, -2, 1]
        A[1, :] = 0
        A[1, :4] = [-1, 3, -3, 1]
        sfree = True
    efree = False
    if bc.endswith('free'):
        A[-1, :] = 0
        A[-1, -3:] = [1, -2, 1]
        A[-2, :] = 0
        A[-2, -4:] = [-1, 3, -3, 1]
        efree = True

    # Only one inversion is needed for implicit spline energy minimization:
    inv = scipy.linalg.inv(A+gamma*np.eye(n))

    # Explicit time stepping for image energy minimization:
    for i in range(max_iterations):
        if new_scipy:
            fx = intp(x, y, dx=1, grid=False)
            fy = intp(x, y, dy=1, grid=False)
        else:
            fx = intp(x, y, dx=1)
            fy = intp(x, y, dy=1)
        if sfixed:
            fx[0] = 0
            fy[0] = 0
        if efixed:
            fx[-1] = 0
            fy[-1] = 0
        if sfree:
            fx[0] *= 2
            fy[0] *= 2
        if efree:
            fx[-1] *= 2
            fy[-1] *= 2
        xn = np.dot(inv, gamma*x + fx)
        yn = np.dot(inv, gamma*y + fy)

        # Movements are capped to max_px_move per iteration:
        dx = max_px_move*np.tanh(xn-x)
        dy = max_px_move*np.tanh(yn-y)
        if sfixed:
            dx[0] = 0
            dy[0] = 0
        if efixed:
            dx[-1] = 0
            dy[-1] = 0
        x += dx
        y += dy

        # Convergence criteria needs to compare to a number of previous
        # configurations since oscillations can occur.
        j = i % (convergence_order+1)
        if j < convergence_order:
            xsave[j, :] = x
            ysave[j, :] = y
        else:
            dist = np.min(np.max(np.abs(xsave-x[None, :]) +
                   np.abs(ysave-y[None, :]), 1))
            if dist < convergence:
                break

    return np.array([x, y]).T

Example 33

Project: scikit-learn
Source File: calibration.py
View license
    def fit(self, X, y, sample_weight=None):
        """Fit the calibrated model

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        y : array-like, shape (n_samples,)
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """
        X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'],
                         force_all_finite=False)
        X, y = indexable(X, y)
        le = LabelBinarizer().fit(y)
        self.classes_ = le.classes_

        # Check that each cross-validation fold can have at least one
        # example per class
        n_folds = self.cv if isinstance(self.cv, int) \
            else self.cv.n_folds if hasattr(self.cv, "n_folds") else None
        if n_folds and \
                np.any([np.sum(y == class_) < n_folds for class_ in
                        self.classes_]):
            raise ValueError("Requesting %d-fold cross-validation but provided"
                             " less than %d examples for at least one class."
                             % (n_folds, n_folds))

        self.calibrated_classifiers_ = []
        if self.base_estimator is None:
            # we want all classifiers that don't expose a random_state
            # to be deterministic (and we don't want to expose this one).
            base_estimator = LinearSVC(random_state=0)
        else:
            base_estimator = self.base_estimator

        if self.cv == "prefit":
            calibrated_classifier = _CalibratedClassifier(
                base_estimator, method=self.method)
            if sample_weight is not None:
                calibrated_classifier.fit(X, y, sample_weight)
            else:
                calibrated_classifier.fit(X, y)
            self.calibrated_classifiers_.append(calibrated_classifier)
        else:
            cv = check_cv(self.cv, y, classifier=True)
            fit_parameters = signature(base_estimator.fit).parameters
            estimator_name = type(base_estimator).__name__
            if (sample_weight is not None
                    and "sample_weight" not in fit_parameters):
                warnings.warn("%s does not support sample_weight. Samples"
                              " weights are only used for the calibration"
                              " itself." % estimator_name)
                base_estimator_sample_weight = None
            else:
                base_estimator_sample_weight = sample_weight
            for train, test in cv.split(X, y):
                this_estimator = clone(base_estimator)
                if base_estimator_sample_weight is not None:
                    this_estimator.fit(
                        X[train], y[train],
                        sample_weight=base_estimator_sample_weight[train])
                else:
                    this_estimator.fit(X[train], y[train])

                calibrated_classifier = _CalibratedClassifier(
                    this_estimator, method=self.method,
                    classes=self.classes_)
                if sample_weight is not None:
                    calibrated_classifier.fit(X[test], y[test],
                                              sample_weight[test])
                else:
                    calibrated_classifier.fit(X[test], y[test])
                self.calibrated_classifiers_.append(calibrated_classifier)

        return self

Example 34

Project: scikit-learn
Source File: test_text.py
View license
def test_vectorizer():
    # raw documents as an iterator
    train_data = iter(ALL_FOOD_DOCS[:-1])
    test_data = [ALL_FOOD_DOCS[-1]]
    n_train = len(ALL_FOOD_DOCS) - 1

    # test without vocabulary
    v1 = CountVectorizer(max_df=0.5)
    counts_train = v1.fit_transform(train_data)
    if hasattr(counts_train, 'tocsr'):
        counts_train = counts_train.tocsr()
    assert_equal(counts_train[0, v1.vocabulary_["pizza"]], 2)

    # build a vectorizer v1 with the same vocabulary as the one fitted by v1
    v2 = CountVectorizer(vocabulary=v1.vocabulary_)

    # compare that the two vectorizer give the same output on the test sample
    for v in (v1, v2):
        counts_test = v.transform(test_data)
        if hasattr(counts_test, 'tocsr'):
            counts_test = counts_test.tocsr()

        vocabulary = v.vocabulary_
        assert_equal(counts_test[0, vocabulary["salad"]], 1)
        assert_equal(counts_test[0, vocabulary["tomato"]], 1)
        assert_equal(counts_test[0, vocabulary["water"]], 1)

        # stop word from the fixed list
        assert_false("the" in vocabulary)

        # stop word found automatically by the vectorizer DF thresholding
        # words that are high frequent across the complete corpus are likely
        # to be not informative (either real stop words of extraction
        # artifacts)
        assert_false("copyright" in vocabulary)

        # not present in the sample
        assert_equal(counts_test[0, vocabulary["coke"]], 0)
        assert_equal(counts_test[0, vocabulary["burger"]], 0)
        assert_equal(counts_test[0, vocabulary["beer"]], 0)
        assert_equal(counts_test[0, vocabulary["pizza"]], 0)

    # test tf-idf
    t1 = TfidfTransformer(norm='l1')
    tfidf = t1.fit(counts_train).transform(counts_train).toarray()
    assert_equal(len(t1.idf_), len(v1.vocabulary_))
    assert_equal(tfidf.shape, (n_train, len(v1.vocabulary_)))

    # test tf-idf with new data
    tfidf_test = t1.transform(counts_test).toarray()
    assert_equal(tfidf_test.shape, (len(test_data), len(v1.vocabulary_)))

    # test tf alone
    t2 = TfidfTransformer(norm='l1', use_idf=False)
    tf = t2.fit(counts_train).transform(counts_train).toarray()
    assert_equal(t2.idf_, None)

    # test idf transform with unlearned idf vector
    t3 = TfidfTransformer(use_idf=True)
    assert_raises(ValueError, t3.transform, counts_train)

    # test idf transform with incompatible n_features
    X = [[1, 1, 5],
         [1, 1, 0]]
    t3.fit(X)
    X_incompt = [[1, 3],
                 [1, 3]]
    assert_raises(ValueError, t3.transform, X_incompt)

    # L1-normalized term frequencies sum to one
    assert_array_almost_equal(np.sum(tf, axis=1), [1.0] * n_train)

    # test the direct tfidf vectorizer
    # (equivalent to term count vectorizer + tfidf transformer)
    train_data = iter(ALL_FOOD_DOCS[:-1])
    tv = TfidfVectorizer(norm='l1')

    tv.max_df = v1.max_df
    tfidf2 = tv.fit_transform(train_data).toarray()
    assert_false(tv.fixed_vocabulary_)
    assert_array_almost_equal(tfidf, tfidf2)

    # test the direct tfidf vectorizer with new data
    tfidf_test2 = tv.transform(test_data).toarray()
    assert_array_almost_equal(tfidf_test, tfidf_test2)

    # test transform on unfitted vectorizer with empty vocabulary
    v3 = CountVectorizer(vocabulary=None)
    assert_raises(ValueError, v3.transform, train_data)

    # ascii preprocessor?
    v3.set_params(strip_accents='ascii', lowercase=False)
    assert_equal(v3.build_preprocessor(), strip_accents_ascii)

    # error on bad strip_accents param
    v3.set_params(strip_accents='_gabbledegook_', preprocessor=None)
    assert_raises(ValueError, v3.build_preprocessor)

    # error with bad analyzer type
    v3.set_params = '_invalid_analyzer_type_'
    assert_raises(ValueError, v3.build_analyzer)

Example 35

Project: scikit-learn
Source File: huber.py
View license
def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
    """Returns the Huber loss and the gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features + 1,) or (n_features + 2,)
        Feature vector.
        w[:n_features] gives the coefficients
        w[-1] gives the scale factor and if the intercept is fit w[-2]
        gives the intercept factor.

    X : ndarray, shape (n_samples, n_features)
        Input data.

    y : ndarray, shape (n_samples,)
        Target vector.

    epsilon : float
        Robustness of the Huber estimator.

    alpha : float
        Regularization parameter.

    sample_weight : ndarray, shape (n_samples,), optional
        Weight assigned to each sample.

    Returns
    -------
    loss: float
        Huber loss.

    gradient: ndarray, shape (len(w))
        Returns the derivative of the Huber loss with respect to each
        coefficient, intercept and the scale as a vector.
    """
    X_is_sparse = sparse.issparse(X)
    _, n_features = X.shape
    fit_intercept = (n_features + 2 == w.shape[0])
    if fit_intercept:
        intercept = w[-2]
    sigma = w[-1]
    w = w[:n_features]
    n_samples = np.sum(sample_weight)

    # Calculate the values where |y - X'w -c / sigma| > epsilon
    # The values above this threshold are outliers.
    linear_loss = y - safe_sparse_dot(X, w)
    if fit_intercept:
        linear_loss -= intercept
    abs_linear_loss = np.abs(linear_loss)
    outliers_mask = abs_linear_loss > epsilon * sigma

    # Calculate the linear loss due to the outliers.
    # This is equal to (2 * M * |y - X'w -c / sigma| - M**2) * sigma
    outliers = abs_linear_loss[outliers_mask]
    num_outliers = np.count_nonzero(outliers_mask)
    n_non_outliers = X.shape[0] - num_outliers

    # n_sq_outliers includes the weight give to the outliers while
    # num_outliers is just the number of outliers.
    outliers_sw = sample_weight[outliers_mask]
    n_sw_outliers = np.sum(outliers_sw)
    outlier_loss = (2. * epsilon * np.sum(outliers_sw * outliers) -
                    sigma * n_sw_outliers * epsilon ** 2)

    # Calculate the quadratic loss due to the non-outliers.-
    # This is equal to |(y - X'w - c)**2 / sigma**2| * sigma
    non_outliers = linear_loss[~outliers_mask]
    weighted_non_outliers = sample_weight[~outliers_mask] * non_outliers
    weighted_loss = np.dot(weighted_non_outliers.T, non_outliers)
    squared_loss = weighted_loss / sigma

    if fit_intercept:
        grad = np.zeros(n_features + 2)
    else:
        grad = np.zeros(n_features + 1)

    # Gradient due to the squared loss.
    X_non_outliers = -axis0_safe_slice(X, ~outliers_mask, n_non_outliers)
    grad[:n_features] = (
        2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

    # Gradient due to the linear loss.
    signed_outliers = np.ones_like(outliers)
    signed_outliers_mask = linear_loss[outliers_mask] < 0
    signed_outliers[signed_outliers_mask] = -1.0
    X_outliers = axis0_safe_slice(X, outliers_mask, num_outliers)
    sw_outliers = sample_weight[outliers_mask] * signed_outliers
    grad[:n_features] -= 2. * epsilon * (
        safe_sparse_dot(sw_outliers, X_outliers))

    # Gradient due to the penalty.
    grad[:n_features] += alpha * 2. * w

    # Gradient due to sigma.
    grad[-1] = n_samples
    grad[-1] -= n_sw_outliers * epsilon ** 2
    grad[-1] -= squared_loss / sigma

    # Gradient due to the intercept.
    if fit_intercept:
        grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma
        grad[-2] -= 2. * epsilon * np.sum(sw_outliers)

    loss = n_samples * sigma + squared_loss + outlier_loss
    loss += alpha * np.dot(w, w)
    return loss, grad

Example 36

Project: imbalanced-learn
Source File: adasyn.py
View license
    def _sample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : ndarray, shape (n_samples, )
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : ndarray, shape (n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : ndarray, shape (n_samples_new)
            The corresponding label of `X_resampled`

        """
        random_state = check_random_state(self.random_state)

        # Keep the samples from the majority class
        X_resampled = X.copy()
        y_resampled = y.copy()

        # Define the number of sample to create
        # We handle only two classes problem for the moment.
        if self.ratio == 'auto':
            num_samples = (self.stats_c_[self.maj_c_] -
                           self.stats_c_[self.min_c_])
        else:
            num_samples = int((self.ratio * self.stats_c_[self.maj_c_]) -
                              self.stats_c_[self.min_c_])

        # Start by separating minority class features and target values.
        X_min = X[y == self.min_c_]

        # Print if verbose is true
        self.logger.debug('Finding the %s nearest neighbours ...',
                          self.nn_.n_neighbors - 1)

        # Look for k-th nearest neighbours, excluding, of course, the
        # point itself.
        self.nn_.fit(X)

        # Get the distance to the NN
        _, ind_nn = self.nn_.kneighbors(X_min)

        # Compute the ratio of majority samples next to minority samples
        ratio_nn = (np.sum(y[ind_nn[:, 1:]] == self.maj_c_, axis=1) /
                    (self.nn_.n_neighbors - 1))
        # Check that we found at least some neighbours belonging to the
        # majority class
        if not np.sum(ratio_nn):
            raise RuntimeError('Not any neigbours belong to the majority'
                               ' class. This case will induce a NaN case with'
                               ' a division by zero. ADASYN is not suited for'
                               ' this specific dataset. Use SMOTE.')
        # Normalize the ratio
        ratio_nn /= np.sum(ratio_nn)

        # Compute the number of sample to be generated
        num_samples_nn = np.round(ratio_nn * num_samples).astype(int)

        # For each minority samples
        for x_i, x_i_nn, num_sample_i in zip(X_min, ind_nn, num_samples_nn):

            # Pick-up the neighbors wanted
            nn_zs = random_state.randint(1, high=self.nn_.n_neighbors,
                                         size=num_sample_i)

            # Create a new sample
            for nn_z in nn_zs:
                step = random_state.uniform()
                x_gen = x_i + step * (x_i - X[x_i_nn[nn_z], :])
                X_resampled = np.vstack((X_resampled, x_gen))
                y_resampled = np.hstack((y_resampled, self.min_c_))

        self.logger.info('Over-sampling performed: %s', Counter(
            y_resampled))

        return X_resampled, y_resampled

Example 37

Project: imbalanced-learn
Source File: nearmiss.py
View license
    def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
                              sel_strategy='nearest'):
        """Select the appropriate samples depending of the strategy selected.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Original samples.

        y : ndarray, shape (n_samples, )
            Associated label to X.

        dist_vec : ndarray, shape (n_samples, )
            The distance matrix to the nearest neigbour.

        num_samples: int
            The desired number of samples to select.

        key : str or int,
            The target class.

        sel_strategy : str, optional (default='nearest')
            Strategy to select the samples. Either 'nearest' or 'farthest'

        Returns
        -------
        X_sel : ndarray, shape (num_samples, n_features)
            Selected samples.

        y_sel : ndarray, shape (num_samples, )
            The associated label.

        idx_sel : ndarray, shape (num_samples, )
            The list of the indices of the selected samples.

        """

        # Compute the distance considering the farthest neighbour
        dist_avg_vec = np.sum(dist_vec[:, -self.nn_.n_neighbors:], axis=1)

        self.logger.debug('The size of the distance matrix is %s',
                          dist_vec.shape)
        self.logger.debug('The size of the samples that can be selected is %s',
                          X[y == key].shape)

        if dist_vec.shape[0] != X[y == key].shape[0]:
            raise RuntimeError('The samples to be selected do not correspond'
                               ' to the distance matrix given. Ensure that'
                               ' both `X[y == key]` and `dist_vec` are'
                               ' related.')

        # Sort the list of distance and get the index
        if sel_strategy == 'nearest':
            sort_way = False
        elif sel_strategy == 'farthest':
            sort_way = True
        else:
            raise NotImplementedError

        sorted_idx = sorted(range(len(dist_avg_vec)),
                            key=dist_avg_vec.__getitem__,
                            reverse=sort_way)

        # Throw a warning to tell the user that we did not have enough samples
        # to select and that we just select everything
        if len(sorted_idx) < num_samples:
            warnings.warn('The number of the samples to be selected is larger'
                          ' than the number of samples available. The'
                          ' balancing ratio cannot be ensure and all samples'
                          ' will be returned.')

        # Select the desired number of samples
        sel_idx = sorted_idx[:num_samples]

        return (X[y == key][sel_idx], y[y == key][sel_idx],
                np.flatnonzero(y == key)[sel_idx])

Example 38

Project: scipy
Source File: common.py
View license
def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
                           rtol=0.01, max_iter=10):
    """Solve a trust-region problem arising in least-squares minimization.
    
    This function implements a method described by J. J. More [1]_ and used
    in MINPACK, but it relies on a single SVD of Jacobian instead of series
    of Cholesky decompositions. Before running this function, compute:
    ``U, s, VT = svd(J, full_matrices=False)``.
    
    Parameters
    ----------
    n : int
        Number of variables.
    m : int
        Number of residuals.
    uf : ndarray
        Computed as U.T.dot(f).
    s : ndarray
        Singular values of J.
    V : ndarray
        Transpose of VT.
    Delta : float
        Radius of a trust region.
    initial_alpha : float, optional
        Initial guess for alpha, which might be available from a previous
        iteration. If None, determined automatically.
    rtol : float, optional
        Stopping tolerance for the root-finding procedure. Namely, the
        solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
    max_iter : int, optional
        Maximum allowed number of iterations for the root-finding procedure.
    
    Returns
    -------
    p : ndarray, shape (n,)
        Found solution of a trust-region problem.
    alpha : float
        Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
        Sometimes called Levenberg-Marquardt parameter.
    n_iter : int
        Number of iterations made by root-finding procedure. Zero means
        that Gauss-Newton step was selected as the solution.
    
    References
    ----------
    .. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
           and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
           in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
    """
    def phi_and_derivative(alpha, suf, s, Delta):
        """Function of which to find zero.
        
        It is defined as "norm of regularized (by alpha) least-squares
        solution minus `Delta`". Refer to [1]_.
        """
        denom = s**2 + alpha
        p_norm = norm(suf / denom)
        phi = p_norm - Delta
        phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
        return phi, phi_prime

    suf = s * uf

    # Check if J has full rank and try Gauss-Newton step.
    if m >= n:
        threshold = EPS * m * s[0]
        full_rank = s[-1] > threshold
    else:
        full_rank = False

    if full_rank:
        p = -V.dot(uf / s)
        if norm(p) <= Delta:
            return p, 0.0, 0

    alpha_upper = norm(suf) / Delta

    if full_rank:
        phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
        alpha_lower = -phi / phi_prime
    else:
        alpha_lower = 0.0

    if initial_alpha is None or not full_rank and initial_alpha == 0:
        alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
    else:
        alpha = initial_alpha

    for it in range(max_iter):
        if alpha < alpha_lower or alpha > alpha_upper:
            alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)

        phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)

        if phi < 0:
            alpha_upper = alpha

        ratio = phi / phi_prime
        alpha_lower = max(alpha_lower, alpha - ratio)
        alpha -= (phi + Delta) * ratio / Delta

        if np.abs(phi) < rtol * Delta:
            break

    p = -V.dot(suf / (s**2 + alpha))

    # Make the norm of p equal to Delta, p is changed only slightly during
    # this. It is done to prevent p lie outside the trust region (which can
    # cause problems later).
    p *= Delta / norm(p)

    return p, alpha, it + 1

Example 39

Project: scipy
Source File: dogbox.py
View license
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
           loss_function, tr_solver, tr_options, verbose):
    f = f0
    f_true = f.copy()
    nfev = 1

    J = J0
    njev = 1

    if loss_function is not None:
        rho = loss_function(f)
        cost = 0.5 * np.sum(rho[0])
        J, f = scale_for_robust_loss_function(J, f, rho)
    else:
        cost = 0.5 * np.dot(f, f)

    g = compute_grad(J, f)

    jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac'
    if jac_scale:
        scale, scale_inv = compute_jac_scale(J)
    else:
        scale, scale_inv = x_scale, 1 / x_scale

    Delta = norm(x0 * scale_inv, ord=np.inf)
    if Delta == 0:
        Delta = 1.0

    on_bound = np.zeros_like(x0, dtype=int)
    on_bound[np.equal(x0, lb)] = -1
    on_bound[np.equal(x0, ub)] = 1

    x = x0
    step = np.empty_like(x0)

    if max_nfev is None:
        max_nfev = x0.size * 100

    termination_status = None
    iteration = 0
    step_norm = None
    actual_reduction = None

    if verbose == 2:
        print_header_nonlinear()

    while True:
        active_set = on_bound * g < 0
        free_set = ~active_set

        g_free = g[free_set]
        g_full = g.copy()
        g[active_set] = 0

        g_norm = norm(g, ord=np.inf)
        if g_norm < gtol:
            termination_status = 1

        if verbose == 2:
            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
                                      step_norm, g_norm)

        if termination_status is not None or nfev == max_nfev:
            break

        x_free = x[free_set]
        lb_free = lb[free_set]
        ub_free = ub[free_set]
        scale_free = scale[free_set]

        # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
        if tr_solver == 'exact':
            J_free = J[:, free_set]
            newton_step = lstsq(J_free, -f)[0]

            # Coefficients for the quadratic model along the anti-gradient.
            a, b = build_quadratic_1d(J_free, g_free, -g_free)
        elif tr_solver == 'lsmr':
            Jop = aslinearoperator(J)

            # We compute lsmr step in scaled variables and then
            # transform back to normal variables, if lsmr would give exact lsq
            # solution this would be equivalent to not doing any
            # transformations, but from experience it's better this way.

            # We pass active_set to make computations as if we selected
            # the free subset of J columns, but without actually doing any
            # slicing, which is expensive for sparse matrices and impossible
            # for LinearOperator.

            lsmr_op = lsmr_operator(Jop, scale, active_set)
            newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
            newton_step *= scale_free

            # Components of g for active variables were zeroed, so this call
            # is correct and equivalent to using J_free and g_free.
            a, b = build_quadratic_1d(Jop, g, -g)

        actual_reduction = -1.0
        while actual_reduction <= 0 and nfev < max_nfev:
            tr_bounds = Delta * scale_free

            step_free, on_bound_free, tr_hit = dogleg_step(
                x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)

            step.fill(0.0)
            step[free_set] = step_free

            if tr_solver == 'exact':
                predicted_reduction = -evaluate_quadratic(J_free, g_free,
                                                          step_free)
            elif tr_solver == 'lsmr':
                predicted_reduction = -evaluate_quadratic(Jop, g, step)

            x_new = x + step
            f_new = fun(x_new)
            nfev += 1

            step_h_norm = norm(step * scale_inv, ord=np.inf)

            if not np.all(np.isfinite(f_new)):
                Delta = 0.25 * step_h_norm
                continue

            # Usual trust-region step quality estimation.
            if loss_function is not None:
                cost_new = loss_function(f_new, cost_only=True)
            else:
                cost_new = 0.5 * np.dot(f_new, f_new)
            actual_reduction = cost - cost_new

            Delta, ratio = update_tr_radius(
                Delta, actual_reduction, predicted_reduction,
                step_h_norm, tr_hit
            )

            step_norm = norm(step)
            termination_status = check_termination(
                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)

            if termination_status is not None:
                break

        if actual_reduction > 0:
            on_bound[free_set] = on_bound_free

            x = x_new
            # Set variables exactly at the boundary.
            mask = on_bound == -1
            x[mask] = lb[mask]
            mask = on_bound == 1
            x[mask] = ub[mask]

            f = f_new
            f_true = f.copy()

            cost = cost_new

            J = jac(x, f)
            njev += 1

            if loss_function is not None:
                rho = loss_function(f)
                J, f = scale_for_robust_loss_function(J, f, rho)

            g = compute_grad(J, f)

            if jac_scale:
                scale, scale_inv = compute_jac_scale(J, scale_inv)
        else:
            step_norm = 0
            actual_reduction = 0

        iteration += 1

    if termination_status is None:
        termination_status = 0

    return OptimizeResult(
        x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
        active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)

Example 40

Project: scipy
Source File: trf.py
View license
def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev,
               x_scale, loss_function, tr_solver, tr_options, verbose):
    x = x0.copy()

    f = f0
    f_true = f.copy()
    nfev = 1

    J = J0
    njev = 1
    m, n = J.shape

    if loss_function is not None:
        rho = loss_function(f)
        cost = 0.5 * np.sum(rho[0])
        J, f = scale_for_robust_loss_function(J, f, rho)
    else:
        cost = 0.5 * np.dot(f, f)

    g = compute_grad(J, f)

    jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac'
    if jac_scale:
        scale, scale_inv = compute_jac_scale(J)
    else:
        scale, scale_inv = x_scale, 1 / x_scale

    v, dv = CL_scaling_vector(x, g, lb, ub)
    v[dv != 0] *= scale_inv[dv != 0]
    Delta = norm(x0 * scale_inv / v**0.5)
    if Delta == 0:
        Delta = 1.0

    g_norm = norm(g * v, ord=np.inf)

    f_augmented = np.zeros((m + n))
    if tr_solver == 'exact':
        J_augmented = np.empty((m + n, n))
    elif tr_solver == 'lsmr':
        reg_term = 0.0
        regularize = tr_options.pop('regularize', True)

    if max_nfev is None:
        max_nfev = x0.size * 100

    alpha = 0.0  # "Levenberg-Marquardt" parameter

    termination_status = None
    iteration = 0
    step_norm = None
    actual_reduction = None

    if verbose == 2:
        print_header_nonlinear()

    while True:
        v, dv = CL_scaling_vector(x, g, lb, ub)

        g_norm = norm(g * v, ord=np.inf)
        if g_norm < gtol:
            termination_status = 1

        if verbose == 2:
            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
                                      step_norm, g_norm)

        if termination_status is not None or nfev == max_nfev:
            break

        # Now compute variables in "hat" space. Here we also account for
        # scaling introduced by `x_scale` parameter. This part is a bit tricky,
        # you have to write down the formulas and see how the trust-region
        # problem is formulated when the two types of scaling are applied.
        # The idea is that first we apply `x_scale` and then apply Coleman-Li
        # approach in the new variables.

        # v is recomputed in the variables after applying `x_scale`, note that
        # components which were identically 1 not affected.
        v[dv != 0] *= scale_inv[dv != 0]

        # Here we apply two types of scaling.
        d = v**0.5 * scale

        # C = diag(g * scale) Jv
        diag_h = g * dv * scale

        # After all this were done, we continue normally.

        # "hat" gradient.
        g_h = d * g

        f_augmented[:m] = f
        if tr_solver == 'exact':
            J_augmented[:m] = J * d
            J_h = J_augmented[:m]  # Memory view.
            J_augmented[m:] = np.diag(diag_h**0.5)
            U, s, V = svd(J_augmented, full_matrices=False)
            V = V.T
            uf = U.T.dot(f_augmented)
        elif tr_solver == 'lsmr':
            J_h = right_multiplied_operator(J, d)

            if regularize:
                a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h)
                to_tr = Delta / norm(g_h)
                ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
                reg_term = -ag_value / Delta**2

            lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5)
            gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0]
            S = np.vstack((g_h, gn_h)).T
            S, _ = qr(S, mode='economic')
            JS = J_h.dot(S)  # LinearOperator does dot too.
            B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S)
            g_S = S.T.dot(g_h)

        # theta controls step back step ratio from the bounds.
        theta = max(0.995, 1 - g_norm)

        actual_reduction = -1
        while actual_reduction <= 0 and nfev < max_nfev:
            if tr_solver == 'exact':
                p_h, alpha, n_iter = solve_lsq_trust_region(
                    n, m, uf, s, V, Delta, initial_alpha=alpha)
            elif tr_solver == 'lsmr':
                p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
                p_h = S.dot(p_S)

            p = d * p_h  # Trust-region solution in the original space.
            step, step_h, predicted_reduction = select_step(
                x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta)

            x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
            f_new = fun(x_new)
            nfev += 1

            step_h_norm = norm(step_h)

            if not np.all(np.isfinite(f_new)):
                Delta = 0.25 * step_h_norm
                continue

            # Usual trust-region step quality estimation.
            if loss_function is not None:
                cost_new = loss_function(f_new, cost_only=True)
            else:
                cost_new = 0.5 * np.dot(f_new, f_new)
            actual_reduction = cost - cost_new
            # Correction term is specific to the algorithm,
            # vanishes in unbounded case.
            correction = 0.5 * np.dot(step_h * diag_h, step_h)

            Delta_new, ratio = update_tr_radius(
                Delta, actual_reduction - correction, predicted_reduction,
                step_h_norm, step_h_norm > 0.95 * Delta
            )
            alpha *= Delta / Delta_new
            Delta = Delta_new

            step_norm = norm(step)
            termination_status = check_termination(
                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)

            if termination_status is not None:
                break

        if actual_reduction > 0:
            x = x_new

            f = f_new
            f_true = f.copy()

            cost = cost_new

            J = jac(x, f)
            njev += 1

            if loss_function is not None:
                rho = loss_function(f)
                J, f = scale_for_robust_loss_function(J, f, rho)

            g = compute_grad(J, f)

            if jac_scale:
                scale, scale_inv = compute_jac_scale(J, scale_inv)
        else:
            step_norm = 0
            actual_reduction = 0

        iteration += 1

    if termination_status is None:
        termination_status = 0

    active_mask = find_active_constraints(x, lb, ub, rtol=xtol)
    return OptimizeResult(
        x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
        active_mask=active_mask, nfev=nfev, njev=njev,
        status=termination_status)

Example 41

Project: scipy
Source File: trf.py
View license
def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev,
                  x_scale, loss_function, tr_solver, tr_options, verbose):
    x = x0.copy()

    f = f0
    f_true = f.copy()
    nfev = 1

    J = J0
    njev = 1
    m, n = J.shape

    if loss_function is not None:
        rho = loss_function(f)
        cost = 0.5 * np.sum(rho[0])
        J, f = scale_for_robust_loss_function(J, f, rho)
    else:
        cost = 0.5 * np.dot(f, f)

    g = compute_grad(J, f)

    jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac'
    if jac_scale:
        scale, scale_inv = compute_jac_scale(J)
    else:
        scale, scale_inv = x_scale, 1 / x_scale

    Delta = norm(x0 * scale_inv)
    if Delta == 0:
        Delta = 1.0

    if tr_solver == 'lsmr':
        reg_term = 0
        damp = tr_options.pop('damp', 0.0)
        regularize = tr_options.pop('regularize', True)

    if max_nfev is None:
        max_nfev = x0.size * 100

    alpha = 0.0  # "Levenberg-Marquardt" parameter

    termination_status = None
    iteration = 0
    step_norm = None
    actual_reduction = None

    if verbose == 2:
        print_header_nonlinear()

    while True:
        g_norm = norm(g, ord=np.inf)
        if g_norm < gtol:
            termination_status = 1

        if verbose == 2:
            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
                                      step_norm, g_norm)

        if termination_status is not None or nfev == max_nfev:
            break

        d = scale
        g_h = d * g

        if tr_solver == 'exact':
            J_h = J * d
            U, s, V = svd(J_h, full_matrices=False)
            V = V.T
            uf = U.T.dot(f)
        elif tr_solver == 'lsmr':
            J_h = right_multiplied_operator(J, d)

            if regularize:
                a, b = build_quadratic_1d(J_h, g_h, -g_h)
                to_tr = Delta / norm(g_h)
                ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
                reg_term = -ag_value / Delta**2

            damp_full = (damp**2 + reg_term)**0.5
            gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0]
            S = np.vstack((g_h, gn_h)).T
            S, _ = qr(S, mode='economic')
            JS = J_h.dot(S)
            B_S = np.dot(JS.T, JS)
            g_S = S.T.dot(g_h)

        actual_reduction = -1
        while actual_reduction <= 0 and nfev < max_nfev:
            if tr_solver == 'exact':
                step_h, alpha, n_iter = solve_lsq_trust_region(
                    n, m, uf, s, V, Delta, initial_alpha=alpha)
            elif tr_solver == 'lsmr':
                p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
                step_h = S.dot(p_S)

            predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h)
            step = d * step_h
            x_new = x + step
            f_new = fun(x_new)
            nfev += 1

            step_h_norm = norm(step_h)

            if not np.all(np.isfinite(f_new)):
                Delta = 0.25 * step_h_norm
                continue

            # Usual trust-region step quality estimation.
            if loss_function is not None:
                cost_new = loss_function(f_new, cost_only=True)
            else:
                cost_new = 0.5 * np.dot(f_new, f_new)
            actual_reduction = cost - cost_new

            Delta_new, ratio = update_tr_radius(
                Delta, actual_reduction, predicted_reduction,
                step_h_norm, step_h_norm > 0.95 * Delta)
            alpha *= Delta / Delta_new
            Delta = Delta_new

            step_norm = norm(step)
            termination_status = check_termination(
                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)

            if termination_status is not None:
                break

        if actual_reduction > 0:
            x = x_new

            f = f_new
            f_true = f.copy()

            cost = cost_new

            J = jac(x, f)
            njev += 1

            if loss_function is not None:
                rho = loss_function(f)
                J, f = scale_for_robust_loss_function(J, f, rho)

            g = compute_grad(J, f)

            if jac_scale:
                scale, scale_inv = compute_jac_scale(J, scale_inv)
        else:
            step_norm = 0
            actual_reduction = 0

        iteration += 1

    if termination_status is None:
        termination_status = 0

    active_mask = np.zeros_like(x)
    return OptimizeResult(
        x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
        active_mask=active_mask, nfev=nfev, njev=njev,
        status=termination_status)

Example 42

Project: scot
Source File: varbase.py
View license
def test_whiteness(data, h, p=0, repeats=100, get_q=False, n_jobs=1,
                   verbose=0, random_state=None):
    """Test if signals are white (serially uncorrelated up to a lag of h).

    This function calculates the Li-McLeod Portmanteau test statistic Q to test
    against the null hypothesis H0 (the residuals are white) [1]_.
    Surrogate data for H0 is created by sampling from random permutations of
    the residuals.

    Usually, the returned p-value is compared against a pre-defined type I
    error level of alpha=0.05 or alpha=0.01. If p<=alpha, the hypothesis of
    white residuals is rejected, which indicates that the VAR model does not
    adequately describe the data.

    Parameters
    ----------
    data : array, shape (trials, channels, samples) or (channels, samples)
        Epoched or continuous data set.
    h : int
        Maximum lag that is included in the test statistic.
    p : int, optional
        Model order (if `data` are the residuals resulting from fitting a VAR
        model).
    repeats : int, optional
        Number of samples to create under the null hypothesis.
    get_q : bool, optional
        Return Q statistic along with *p*-value
    n_jobs : int | None, optional
        Number of jobs to run in parallel. If set to None, joblib is not used
        at all. See `joblib.Parallel` for details.
    verbose : int
        Verbosity level passed to joblib.

    Returns
    -------
    pr : float
        Probability of observing a more extreme value of Q under the assumption
        that H0 is true.
    q0 : list of float, optional (`get_q`)
        Individual surrogate estimates that were used for estimating the
        distribution of Q under H0.
    q : float, optional (`get_q`)
        Value of the Q statistic of the residuals.

    Notes
    -----
    According to [2]_, h must satisfy h = O(n^0.5), where n is the length (time
    samples) of the residuals.

    References
    ----------
    .. [1] H. Lütkepohl, "New Introduction to Multiple Time Series Analysis",
           2005, Springer, Berlin, Germany.
    .. [2] J.R.M. Hosking, "The Multivariate Portmanteau Statistic", 1980, J.
           Am. Statist. Assoc.
    """
    res = data[:, :, p:]
    t, m, n = res.shape
    nt = (n - p) * t

    q0 = _calc_q_h0(repeats, res, h, nt, n_jobs, verbose,
                    random_state=random_state)[:, 2, -1]
    q = _calc_q_statistic(res, h, nt)[2, -1]

    # probability of observing a result more extreme than q
    # under the null-hypothesis
    pr = np.sum(q0 >= q) / repeats

    if get_q:
        return pr, q0, q
    else:
        return pr

Example 43

Project: sfepy
Source File: lcbc_operators.py
View license
    def make_global_operator(self, adi, new_only=False):
        """
        Assemble all LCBC operators into a single matrix.

        Parameters
        ----------
        adi : DofInfo
            The active DOF information.
        new_only : bool
            If True, the operator columns will contain only new DOFs.

        Returns
        -------
        mtx_lc : csr_matrix
            The global LCBC operator in the form of a CSR matrix.
        rhs_lc : array
            The right-hand side for non-homogeneous LCBCs.
        lcdi : DofInfo
            The global active LCBC-constrained DOF information.
        """
        self.finalize()

        if len(self) == 0: return (None,) * 3

        n_dof = self.variables.adi.ptr[-1]
        n_constrained = nm.sum([val for val in six.itervalues(self.n_master)])
        n_dof_free = nm.sum([val for val in six.itervalues(self.n_free)])
        n_dof_new = nm.sum([val for val in six.itervalues(self.n_new)])
        n_dof_active = nm.sum([val for val in six.itervalues(self.n_active)])

        output('dofs: total %d, free %d, constrained %d, new %d'\
               % (n_dof, n_dof_free, n_constrained, n_dof_new))
        output(' -> active %d' % (n_dof_active))

        adi = self.variables.adi
        lcdi, ndi, fdi = self.lcdi, self.ndi, self.fdi

        rows = []
        cols = []
        data = []

        lcbc_mask = nm.ones(n_dof, dtype=nm.bool)
        is_homogeneous = True
        for ii, op in enumerate(self):
            rvar_name = op.var_names[0]
            roff = adi.indx[rvar_name].start

            irs = roff + op.ameq
            lcbc_mask[irs] = False

            if op.get('rhs', None) is not None:
                is_homogeneous = False

        if not is_homogeneous:
            vec_lc = nm.zeros(n_dof, dtype=nm.float64)

        else:
            vec_lc = None

        for ii, op in enumerate(self):
            rvar_name = op.var_names[0]
            roff = adi.indx[rvar_name].start

            irs = roff + op.ameq

            cvar_name = op.var_names[1]
            if cvar_name is None:
                if new_only:
                    coff = ndi.indx[rvar_name].start

                else:
                    coff = lcdi.indx[rvar_name].start + fdi.n_dof[rvar_name]

                iis, icols = self.ics[rvar_name]
                ici = nm.searchsorted(iis, ii)
                ics = nm.arange(coff + icols[ici], coff + icols[ici+1])
                if isinstance(op.mtx, sp.spmatrix):
                    lr, lc, lv = sp.find(op.mtx)
                    rows.append(irs[lr])
                    cols.append(ics[lc])
                    data.append(lv)

                else:
                    _irs, _ics = nm.meshgrid(irs, ics)
                    rows.append(_irs.ravel())
                    cols.append(_ics.ravel())
                    data.append(op.mtx.T.ravel())

            else:
                coff = lcdi.indx[cvar_name].start

                lr, lc, lv = sp.find(op.mtx)
                ii1 = nm.where(lcbc_mask[adi.indx[cvar_name]])[0]
                ii2 = nm.searchsorted(ii1, lc)

                rows.append(roff + lr)
                cols.append(coff + ii2)
                data.append(lv)

            if vec_lc is not None:
                vec_lc[irs] += op.get('rhs', 0)

        rows = nm.concatenate(rows)
        cols = nm.concatenate(cols)
        data = nm.concatenate(data)

        if new_only:
            mtx_lc = sp.coo_matrix((data, (rows, cols)),
                                   shape=(n_dof, n_dof_new))

        else:
            mtx_lc = sp.coo_matrix((data, (rows, cols)),
                                   shape=(n_dof, n_dof_active))

            ir = nm.where(lcbc_mask)[0]
            ic = nm.empty((n_dof_free,), dtype=nm.int32)
            for var_name in adi.var_names:
                ii = nm.arange(fdi.n_dof[var_name], dtype=nm.int32)
                ic[fdi.indx[var_name]] = lcdi.indx[var_name].start + ii

            mtx_lc2 = sp.coo_matrix((nm.ones((ir.shape[0],)), (ir, ic)),
                                    shape=(n_dof, n_dof_active),
                                    dtype=nm.float64)

            mtx_lc = mtx_lc + mtx_lc2

        mtx_lc = mtx_lc.tocsr()

        return mtx_lc, vec_lc, lcdi

Example 44

Project: sfepy
Source File: geometry.py
View license
def get_simplex_circumcentres(coors, force_inside_eps=None):
    """
    Compute the circumcentres of `n_s` simplices in 1D, 2D and 3D.

    Parameters
    ----------
    coors : array
        The coordinates of the simplices with `n_v` vertices given in an
        array of shape `(n_s, n_v, dim)`, where `dim` is the space
        dimension and `2 <= n_v <= (dim + 1)`.
    force_inside_eps : float, optional
        If not None, move the circumcentres that are outside of their
        simplices or closer to their boundary then `force_inside_eps` so
        that they are inside the simplices at the distance given by
        `force_inside_eps`. It is ignored for edges.

    Returns
    -------
    centres : array
        The circumcentre coordinates as an array of shape `(n_s, dim)`.
    """
    n_s, n_v, dim = coors.shape

    assert_(2 <= n_v <= (dim + 1))
    assert_(1 <= dim <= 3)

    if n_v == 2: # Edges.
        centres = 0.5 * nm.sum(coors, axis=1)

    else:
        if n_v == 3: # Triangles.
            a2 = norm(coors[:,1,:] - coors[:,2,:], squared=True)
            b2 = norm(coors[:,0,:] - coors[:,2,:], squared=True)
            c2 = norm(coors[:,0,:] - coors[:,1,:], squared=True)

            bar_coors = nm.c_[a2 * (-a2 + b2 + c2),
                              b2 * (a2 - b2 + c2),
                              c2 * (a2 + b2 - c2)]

        elif n_v == 4: # Tetrahedrons.
            a2 = norm(coors[:,2,:] - coors[:,1,:], squared=True)
            b2 = norm(coors[:,2,:] - coors[:,0,:], squared=True)
            c2 = norm(coors[:,1,:] - coors[:,0,:], squared=True)
            d2 = norm(coors[:,3,:] - coors[:,0,:], squared=True)
            e2 = norm(coors[:,3,:] - coors[:,1,:], squared=True)
            f2 = norm(coors[:,3,:] - coors[:,2,:], squared=True)
            bar_coors = nm.c_[(d2 * a2 * (f2 + e2 - a2)
                               + b2 * e2 * (a2 + f2 - e2)
                               + c2 * f2 * (e2 + a2 - f2)
                               - 2 * a2 * e2 * f2),
                              (e2 * b2 * (f2 + d2 - b2)
                               +  c2 * f2 * (d2 + b2 - f2)
                               +  a2 * d2 * (b2 + f2 - d2)
                               - 2 * b2 * d2 * f2),
                              (f2 * c2 * (e2 + d2 - c2)
                               +  b2 * e2 * (d2 + c2 - e2)
                               +  a2 * d2 * (c2 + e2 - d2)
                               - 2 * c2 * e2 * d2),
                              (d2 * a2 * (b2 + c2 - a2)
                               +  e2 * b2 * (c2 + a2 - b2)
                               +  f2 * c2 * (a2 + b2 - c2)
                               - 2 * a2 * b2 * c2)]

        else:
            raise ValueError('unsupported simplex! (%d vertices)' % n_v)

        bar_coors /= nm.sum(bar_coors, axis=1)[:,None]
        if force_inside_eps is not None:
            bc = 1.0 / n_v
            limit = 0.9 * bc
            bar_centre = nm.array([bc] * n_v, dtype=nm.float64)

            eps = float(force_inside_eps)
            if eps > limit:
                output('force_inside_eps is too big, adjusting! (%e -> %e)'
                       % (eps, limit))
                eps = limit

            # Flag is True where the barycentre is closer to the simplex
            # boundary then eps, or outside of the simplex.
            mb = nm.min(bar_coors, axis=1)
            flag = nm.where(mb < eps)[0]

            # Move the bar_coors[flag] towards bar_centre so that it is
            # inside at the eps distance.
            mb = mb[flag]
            alpha = ((eps - mb) / (bar_centre[0] - mb))[:,None]

            bar_coors[flag] = (1.0 - alpha) * bar_coors[flag] \
                              + alpha * bar_centre[None,:]

        centres = transform_bar_to_space_coors(bar_coors, coors)

    return centres

Example 45

Project: sfepy
Source File: utils.py
View license
def dot_sequences(mtx, vec, mode='AB'):
    """
    Computes dot product for each pair of items in the two sequences.

    Equivalent to

    >>> out = nm.empty((vec.shape[0], mtx.shape[1], vec.shape[2]),
    >>>                dtype=vec.dtype)
    >>> for ir in range(mtx.shape[0]):
    >>>     out[ir] = nm.dot(mtx[ir], vec[ir])

    Parameters
    ----------
    mtx : array
        The array of matrices with shape `(n_item, m, n)`.
    vec : array
        The array of vectors with shape `(n_item, a)` or matrices with shape
        `(n_item, a, b)`.
    mode : one of 'AB', 'ATB', 'ABT', 'ATBT'

        The mode of the dot product - the corresponding axes are dotted
        together:

        'AB'   : `a = n`
        'ATB'  : `a = m`
        'ABT'  : `b = n` (*)
        'ATBT' : `b = m` (*)

        (*) The 'BT' part is ignored for the vector second argument.

    Returns
    -------
    out : array
       The resulting array.

    Notes
    -----
    Uses `numpy.core.umath_tests.matrix_multiply()` if available, which is much
    faster than the default implementation.

    The default implementation uses `numpy.sum()` and element-wise
    multiplication. For r-D arrays `(n_1, ..., n_r, ?, ?)` the arrays
    are first reshaped to `(n_1 * ... * n_r, ?, ?)`, then the dot is
    performed, and finally the shape is restored to `(n_1, ..., n_r, ?, ?)`.
    """
    if matrix_multiply is not None:
        if vec.ndim == mtx.ndim:
            squeeze = False

        else:
            squeeze = True
            vec = vec[..., None]

        if 'BT' in mode:
            ax = list(range(vec.ndim))
            vec = vec.transpose((ax[:-2]) + [ax[-1], ax[-2]])

        if 'AT' in mode:
            ax = list(range(mtx.ndim))
            mtx = mtx.transpose((ax[:-2]) + [ax[-1], ax[-2]])

        out = matrix_multiply(mtx, vec)
        if squeeze:
            out = out[..., 0]

    else:
        if (vec.ndim == 2) and (mtx.ndim == 3):
            if mode in ('AB', 'ABT'):
                out = nm.sum(mtx * vec[:, None, :], axis=2)

            else:
                out = nm.sum(mtx * vec[:, :, None], axis=1)

        elif (vec.ndim == 3) and (mtx.ndim == 3):

            if mode == 'AB':
                out = nm.empty((vec.shape[0], mtx.shape[1], vec.shape[2]),
                               dtype=vec.dtype)

                for ic in range(vec.shape[2]):
                    out[:, :, ic] = dot_sequences(mtx, vec[:, :, ic], mode=mode)

            elif mode == 'ABT':
                out = nm.empty((vec.shape[0], mtx.shape[1], vec.shape[1]),
                               dtype=vec.dtype)

                for ic in range(vec.shape[1]):
                    out[:, :, ic] = dot_sequences(mtx, vec[:, ic, :], mode=mode)


            elif mode == 'ATB':
                out = nm.empty((vec.shape[0], mtx.shape[2], vec.shape[2]),
                               dtype=vec.dtype)

                for ic in range(vec.shape[2]):
                    out[:, :, ic] = dot_sequences(mtx, vec[:, :, ic], mode=mode)

            elif mode == 'ATBT':
                out = nm.empty((vec.shape[0], mtx.shape[2], vec.shape[1]),
                               dtype=vec.dtype)

                for ic in range(vec.shape[1]):
                    out[:, :, ic] = dot_sequences(mtx, vec[:, ic, :], mode=mode)

            else:
                raise ValueError('unknown dot mode! (%s)' % mode)

        elif (vec.ndim >= 4) and (mtx.ndim >= 4) and (vec.ndim == mtx.ndim):
            mtx_seq = nm.reshape(mtx,
                                 (nm.prod(mtx.shape[0:-2], dtype=int),)
                                 + mtx.shape[-2:])

            vec_seq = nm.reshape(vec,
                                 (nm.prod(vec.shape[0:-2], dtype=int),)
                                 + vec.shape[-2:])

            out_seq = dot_sequences(mtx_seq, vec_seq, mode=mode)
            out = nm.reshape(out_seq, mtx.shape[0:-2] + out_seq.shape[-2:])

        else:
            raise ValueError('unsupported operand shape')

    return out

Example 46

Project: sfepy
Source File: optimize.py
View license
    def __call__(self, x0, conf=None, obj_fun=None, obj_fun_grad=None,
                 status=None, obj_args=None):
        conf = get_default(conf, self.conf)
        obj_fun = get_default(obj_fun, self.obj_fun)
        obj_fun_grad = get_default(obj_fun_grad, self.obj_fun_grad)
        status = get_default(status, self.status)
        obj_args = get_default(obj_args, self.obj_args)

        if conf.output:
            globals()['output'] = conf.output

        output('entering optimization loop...')

        nc_of, tt_of, fn_of = wrap_function(obj_fun, obj_args)
        nc_ofg, tt_ofg, fn_ofg = wrap_function(obj_fun_grad, obj_args)

        time_stats = {'of' : tt_of, 'ofg': tt_ofg, 'check' : []}

        ofg = None

        it = 0
        xit = x0.copy()
        while 1:

            of = fn_of(xit)

            if it == 0:
                of0 = ofit0 = of_prev = of
                of_prev_prev = of + 5000.0

            if ofg is None:
                ofg = fn_ofg(xit)

            if conf.check:
                tt = time.clock()
                check_gradient(xit, ofg, fn_of, conf.delta, conf.check)
                time_stats['check'].append(time.clock() - tt)

            ofg_norm = nla.norm(ofg, conf.norm)

            ret = conv_test(conf, it, of, ofit0, ofg_norm)
            if ret >= 0:
                break
            ofit0 = of

            ##
            # Backtrack (on errors).
            alpha = conf.ls0
            can_ls = True
            while 1:
                xit2 = xit - alpha * ofg
                aux = fn_of(xit2)

                if self.log is not None:
                    self.log(of, ofg_norm, alpha, it)

                if aux is None:
                    alpha *= conf.ls_red_warp
                    can_ls = False
                    output('warp: reducing step (%f)' % alpha)
                elif conf.ls and conf.ls_method == 'backtracking':
                    if aux < of * conf.ls_on: break
                    alpha *= conf.ls_red
                    output('backtracking: reducing step (%f)' % alpha)
                else:
                    of_prev_prev = of_prev
                    of_prev = aux
                    break

                if alpha < conf.ls_min:
                    if aux is None:
                        raise RuntimeError('giving up...')
                    output('linesearch failed, continuing anyway')
                    break

            # These values are modified by the line search, even if it fails
            of_prev_bak = of_prev
            of_prev_prev_bak = of_prev_prev

            if conf.ls and can_ls and conf.ls_method == 'full':
                output('full linesearch...')
                alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \
                    linesearch.line_search(fn_of,fn_ofg,xit,
                                           -ofg,ofg,of_prev,of_prev_prev,
                                           c2=0.4)
                if alpha is None:  # line search failed -- use different one.
                    alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \
                        sopt.line_search(fn_of,fn_ofg,xit,
                                         -ofg,ofg,of_prev_bak,
                                         of_prev_prev_bak)
                    if alpha is None or alpha == 0:
                        # This line search also failed to find a better
                        # solution.
                        ret = 3
                        break
                output(' -> alpha: %.8e' % alpha)
            else:
                if conf.ls_method == 'full':
                    output('full linesearch off (%s and %s)'
                           % (conf.ls, can_ls))
                ofg1 = None

            if self.log is not None:
                self.log.plot_vlines(color='g', linewidth=0.5)

            xit = xit - alpha * ofg
            if ofg1 is None:
                ofg = None
            else:
                ofg = ofg1.copy()

            for key, val in six.iteritems(time_stats):
                if len(val):
                    output('%10s: %7.2f [s]' % (key, val[-1]))

            it = it + 1

        output('status:               %d' % ret)
        output('initial value:        %.8e' % of0)
        output('current value:        %.8e' % of)
        output('iterations:           %d' % it)
        output('function evaluations: %d in %.2f [s]'
               % (nc_of[0], nm.sum(time_stats['of'])))
        output('gradient evaluations: %d in %.2f [s]'
               % (nc_ofg[0], nm.sum(time_stats['ofg'])))

        if self.log is not None:
            self.log(of, ofg_norm, alpha, it)

            if conf.log.plot is not None:
                self.log(save_figure=conf.log.plot, finished=True)

            else:
                self.log(finished=True)

        if status is not None:
            status['log'] = self.log
            status['status'] = status
            status['of0'] = of0
            status['of'] = of
            status['it'] = it
            status['nc_of'] = nc_of[0]
            status['nc_ofg'] = nc_ofg[0]
            status['time_stats'] = time_stats

        return xit

Example 47

Project: PyParticles
Source File: draw_vector_field.py
View license
    def _draw_field( self , key ):
        
        sz = self.__X.shape[0]
        
        transf = tr.Transformations()
        transf.set_points_tuple_size(6)
        
        self.__fields[key]["fun"]( self.__V , self.__X )
        
        # Vector in spherical coordinates
        self.__Vs[:,0] = np.sqrt( np.sum( self.__V**2 , 1 ) )
        self.__Vs[:,1] = np.arccos( self.__V[:,2] / self.__Vs[:,0]  ) 
        self.__Vs[:,2] = np.arctan( np.divide( self.__V[:,1] , self.__V[:,0] ) )
        
        err_nan = np.isnan( self.__Vs[:,2] )
        self.__Vs[err_nan,2] = np.sign( self.__V[err_nan,2] ) * np.pi / 2.0
        
        unit = self.__fields[key]["unit"]
        
        for i in range(sz):
            
            x = self.__X[i,0] / self.__density
            y = self.__X[i,1] / self.__density
            z = self.__X[i,2] / self.__density
            
            transf.push_matrix()
            transf.translation( x , y , z )
            
            transf.rotZ( self.__Vs[i,2] )
            transf.rotY( -( np.pi/2.0 - self.__Vs[i,1] ) )
            
            le = self.__Vs[i,0] / unit
            
            transf.append_point( [ 0 , 0 , 0 ] )
            transf.append_point( [ le , 0 , 0 ] )
            
            transf.append_point( [ le , 0 , 0 ] )
            transf.append_point( [ 0.8*le , 0.1*le , 0 ] )
            
            transf.append_point( [ le , 0 , 0 ] )
            transf.append_point( [ 0.8*le , -0.1*le , 0 ] )
            
            transf.pop_matrix()
        
        color = np.zeros((4))
        
        glBegin(GL_LINES)
            
        for  pts in transf :
            self.__fields[key]["color_fun"]( color , pts[0] )
            
            glColor4f( color[0] , color[1] , color[2] , color[3] )
            
            glVertex3fv( pts[0] )
            glVertex3fv( pts[1] )
            
            glVertex3fv( pts[2] )
            glVertex3fv( pts[3] )
            
            glVertex3fv( pts[4] )
            glVertex3fv( pts[5] )
            
        glEnd()

Example 48

Project: pyhawkes
Source File: harness.py
View license
def fit_network_hawkes_vb(S, S_test, dt, dt_max, output_path,
                          model_args={}, standard_model=None,
                          N_samples=100, time_limit=8*60*60):

    T,K = S.shape

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print "Loading VB results from ", output_path
            results = cPickle.load(f)
    else:
        print "Fitting the data with a network Hawkes model using batch VB"

        test_model = DiscreteTimeNetworkHawkesModelGammaMixtureSBM(K=K, dt=dt, dt_max=dt_max,
                                                                             **model_args)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Precompute F_test
        F_test = test_model.basis.convolve_with_basis(S_test)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Batch variational inference
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test)]
        times = [0]
        for itr in progprint_xrange(N_samples):
            # Update the model
            tic = time.time()
            test_model.meanfield_coordinate_descent_step()
            times.append(time.time() - tic)

            # Resample from variational posterior to compute log prob and hlls
            test_model.resample_from_mf()
            # samples.append(test_model.copy_sample())
            samples.append(copy.deepcopy(test_model.get_parameters()))


            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test, F=F_test))

            # Save this sample
            # with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print "Saving VB samples to ", output_path
            cPickle.dump(results, f, protocol=-1)

    return results

Example 49

Project: spectralDNS
Source File: OrrSommerfeld.py
View license
def update(context):
    
    c = context
    params = config.params
    solver = config.solver

    if (params.tstep % params.plot_step == 0 or
        params.tstep % params.compute_energy == 0):        
        U = solver.get_velocity(**context)
    
    # Use GL for postprocessing
    global im1, im2, im3, OS, e0
    if not plt is None:
        if im1 is None and solver.rank == 0 and params.plot_step > 0:
            plt.figure()
            im1 = plt.contourf(c.X[1,:,:,0], c.X[0,:,:,0], c.U[0,:,:,0], 100)
            plt.colorbar(im1)
            plt.draw()

            plt.figure()
            im2 = plt.contourf(c.X[1,:,:,0], c.X[0,:,:,0], c.U[1,:,:,0] - (1-c.X[0,:,:,0]**2), 100)
            plt.colorbar(im2)
            plt.draw()

            plt.figure()
            im3 = plt.quiver(c.X[1, :,:,0], c.X[0,:,:,0], c.U[1,:,:,0]-(1-c.X[0,:,:,0]**2), c.U[0,:,:,0])
            plt.draw()
            
            plt.pause(1e-6)
    
        if params.tstep % params.plot_step == 0 and solver.rank == 0 and params.plot_step > 0:
            im1.ax.clear()
            im1.ax.contourf(c.X[1, :,:,0], c.X[0, :,:,0], U[0, :, :, 0], 100) 
            im1.autoscale()
            im2.ax.clear()
            im2.ax.contourf(c.X[1, :,:,0], c.X[0, :,:,0], U[1, :, :, 0]-(1-c.X[0,:,:,0]**2), 100)         
            im2.autoscale()
            im3.set_UVC(U[1,:,:,0]-(1-c.X[0,:,:,0]**2), U[0,:,:,0])
            plt.pause(1e-6)

    if params.tstep % params.compute_energy == 0:
        U_tmp = c.work[(U, 0)]
        U = solver.get_velocity(**context)
        pert = (U[1] - (1-c.X[0]**2))**2 + U[0]**2
        e1 = 0.5*c.FST.dx(pert, c.ST.quad)
        exact = exp(2*imag(OS.eigval)*(params.t))
        initOS(OS, U_tmp, c.X, t=params.t)
        pert = (U[0] - U_tmp[0])**2 + (U[1]-U_tmp[1])**2
        e2 = 0.5*c.FST.dx(pert, c.ST.quad)
        
        #ST.quad = 'GL'
        #kw['SB'].quad = 'GL'
        #X[:] = FST.get_local_mesh(ST)
        #initOS(OS, U_tmp, F_tmp, X, t=0)
        #e00 = 0.5*energy(U_tmp[0]**2+(U_tmp[1]-(1-X[0]**2))**2, params.N, comm, rank, params.L)        
        #pert = (U[1] - (1-X[0]**2))**2 + U[0]**2
        #e11 = 0.5*energy(pert, params.N, comm, rank, params.L, X[0,:,0,0])
        #initOS(OS, U_tmp, F_tmp, X, t=params.t)
        #pert = (U[0] - U_tmp[0])**2 + (U[1]-U_tmp[1])**2
        #e22 = 0.5*energy(pert, params.N, comm, rank, params.L, X[0,:,0,0])
        #ST.quad = 'GC'
        #kw['SB'].quad = 'GC'
        #X[:] = FST.get_local_mesh(ST)
        
        if solver.rank == 0:
            #print("Time %2.5f Norms %2.16e %2.16e %2.16e %2.16e" %(params.t, e1/e0, exact, e1/e0-exact, sqrt(e2)))
            print("Time %2.5f Norms %2.16e %2.16e %2.16e %2.16e" %(params.t, e1/e0, exact, e1/e0-exact, sqrt(sum(pert))))

Example 50

Project: permute
Source File: stratified.py
View license
def stratified_two_sample(
        group,
        condition,
        response,
        stat='mean',
        alternative="greater",
        reps=10**5,
        keep_dist=False,
        seed=None):
    r"""
    One-sided or two-sided, two-sample permutation test for equality of
    two means, with p-value estimated by simulated random sampling with
    reps replications.

    Tests the hypothesis that x and y are a random partition of x,y
    against the alternative that x comes from a population with mean

    (a) greater than that of the population from which y comes,
        if side = 'greater'
    (b) less than that of the population from which y comes,
        if side = 'less'
    (c) different from that of the population from which y comes,
        if side = 'two-sided'

    Permutations are carried out within the given groups.  Under the null
    hypothesis, observations within each group are exchangeable.

    If ``keep_dist``, return the distribution of values of the test statistic;
    otherwise, return only the number of permutations for which the value of
    the test statistic and p-value.

    Parameters
    ----------
    group : array-like
        Group memberships
    condition : array-like
        Treatment conditions, of the same length as group
    response : array-like
        Responses, of the same length as group
    stat : {'mean', 't'}
        The test statistic.

        (a) If stat == 'mean', the test statistic is (mean(x) - mean(y))
            (equivalently, sum(x), since those are monotonically related),
            omitting NaNs, which therefore can be used to code non-responders
        (b) If stat == 't', the test statistic is the two-sample t-statistic--
            but the p-value is still estimated by the randomization,
            approximating the permutation distribution.
            The t-statistic is computed using scipy.stats.ttest_ind
        (c) If stat == 'mean_within_strata', the test statistic is the
            difference in means within each stratum, added across strata.
        (d) If stat is a function (a callable object), the test statistic is
            that function.  The function should take a permutation of the
            pooled data and compute the test function from it. For instance,
            if the test statistic is the Kolmogorov-Smirnov distance between
            the empirical distributions of the two samples,
            $max_t |F_x(t) - F_y(t)|$, the test statistic could be written:

            f = lambda u: np.max( \
                [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y))
                for v in u]\
                )
    alternative : {'greater', 'less', 'two-sided'}
        The alternative hypothesis to test
    reps : int
        Number of permutations
    keep_dist : bool
        flag for whether to store and return the array of values
        of the test statistic
    seed : RandomState instance or {None, int, RandomState instance}
        If None, the pseudorandom number generator is the RandomState
        instance used by `np.random`;
        If int, seed is the seed used by the random number generator;
        If RandomState instance, seed is the pseudorandom number generator.

    Returns
    -------
    float
        the estimated p-value
    float
        the test statistic
    list
        The distribution of test statistics.
        These values are only returned if `keep_dist` == True
    """

    prng = get_prng(seed)

    ordering = condition.argsort()
    response = response[ordering]
    condition = condition[ordering]
    group = group[ordering]
    ntreat = np.sum(condition == condition[0])

    groups = np.unique(group)
    conditions = np.unique(condition)
    # If stat is callable, use it as the test function. Otherwise, look in the
    # dictionary
    stats = {
        'mean': lambda u: np.nanmean(u[:ntreat]) - np.nanmean(u[ntreat:]),
        't': lambda u: ttest_ind(
            u[:len(x)][~np.isnan(u[:ntreat])],
            u[len(x):][~np.isnan(u[ntreat:])],
            equal_var=True)[0],
        'mean_within_strata': lambda u: stratified_permutationtest_mean(group,
                                                                        condition,
                                                                        u,
                                                                        groups,
                                                                        conditions)
    }
    if callable(stat):
        tst_fun = stat
    else:
        tst_fun = stats[stat]

    thePvalue = {
        'greater': lambda p: p,
        'less': lambda p: 1 - p,
        'two-sided': lambda p: 2 * np.min([p, 1 - p])
    }
    observed_tst = tst_fun(response)

    if keep_dist:
        dist = np.empty(reps)
        for i in range(reps):
            dist[i] = tst_fun(permute_within_groups(
                response, group, seed=prng))
        hits = np.sum(dist >= observed_tst)
        return thePvalue[alternative](hits / reps), observed_tst, dist
    else:
        hits = np.sum([(tst_fun(permute_within_groups(
            response, group, seed=prng)) >= observed_tst)
            for i in range(reps)])
        return thePvalue[alternative](hits / reps), observed_tst