numpy.r_

Here are the examples of the python api numpy.r_ taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 151

Project: pybasicbayes
Source File: test_negbin.py
View license
    @property
    def hyperparameter_settings(self):
        return (dict(r_discrete_distn=np.r_[0.,0,0,1,1,1],alpha_0=5,beta_0=5),)

Example 152

Project: pybasicbayes
Source File: test_negbin.py
View license
    @property
    def hyperparameter_settings(self):
        return (dict(r_discrete_distn=np.r_[0.,0,0,1,1,1],alpha_0=5,beta_0=5),)

Example 153

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_api(self):
        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        #####################################################
        # Train and standardize cov and then apply to test
        #####################################################

        cov_train, unit_trained = self.covariate_whole[train_idx,:].read().standardize(Unit(),return_trained=True)
        cov_test = self.covariate_whole[test_idx,:].read().standardize(unit_trained)

        #####################################################
        # standardize whole kernel from snps (both ways) and then pull out the 3 parts
        #####################################################
        
        whole_kernel = SnpKernel(self.covariate_whole,Unit()).read().standardize(DiagKtoN())
        train_kernel = whole_kernel[train_idx].read(order='A',view_ok=True)
        test_kernel = whole_kernel[train_idx,test_idx].read(order='A',view_ok=True)
        test_test_kernel = whole_kernel[test_idx,test_idx].read(order='A',view_ok=True)

        #####################################################
        # create train_train, train_test, and test_test based on just the training snps (both standardizations)
        #####################################################

        K_train = SnpKernel(self.snpreader_whole[train_idx,:],Unit(),block_size=100)
        train_train_kernel, snp_trained, kernel_trained = K_train._read_with_standardizing(to_kerneldata=True, kernel_standardizer=DiagKtoN(), return_trained=True)

        K_whole_test = _SnpWholeTest(train=self.snpreader_whole[train_idx,:],test=self.snpreader_whole[test_idx,:],standardizer=snp_trained,block_size=100)
        train_idx2 = K_whole_test.iid0_to_index(self.snpreader_whole.iid[train_idx]) #The new reader may have the iids in a different order than the original reader
        train_test_kernel = K_whole_test[train_idx2,:].read().standardize(kernel_trained)

        test_idx2 = K_whole_test.iid0_to_index(self.snpreader_whole.iid[test_idx])
        test_test_kernel = K_whole_test[test_idx2,:].read().standardize(kernel_trained)

        #####################################################
        # How does predict look with whole_test as input?
        #####################################################

        # a. - standardize whole up front
        whole_kernel = SnpKernel(self.snpreader_whole,Unit(),block_size=100).read().standardize()
        train_kernel = whole_kernel[train_idx].read(order='A',view_ok=True)
        whole_test_kernel = whole_kernel[:,test_idx].read(order='A',view_ok=True)
        fastlmm1 = FastLMM(snp_standardizer=SS_Identity(), kernel_standardizer=KS_Identity())
        fastlmm1.fit(K0_train=train_kernel, X=self.covariate_whole, y=self.pheno_whole) #iid intersection means we won't really be using whole covar or pheno
        predicted_pheno, covar = fastlmm1.predict(K0_whole_test=whole_test_kernel, X=self.covariate_whole)
        output_file = self.file_name("whole")
        Dat.write(output_file,predicted_pheno)
        self.compare_files(predicted_pheno,"whole")

        # b -- just files
        fastlmm2 = FastLMM()
        fastlmm2.fit(K0_train=self.snpreader_whole[train_idx,:], X=self.covariate_whole, y=self.pheno_whole[train_idx,:]) #iid intersection means we won't really be using whole covar
        predicted_pheno, covar = fastlmm2.predict(K0_whole_test=self.snpreader_whole[test_idx,:], X=self.covariate_whole)
        self.compare_files(predicted_pheno,"one")

Example 154

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lr_as_lmm(self):
            do_plot = False
            #later why does this test case generate two intersect info messages instead of just one?
            import pylab
            logging.info("TestLmmTrain test_lr_as_lmm")

            ###############################################################
            # Create a linear data set with just a little noise
            ###############################################################

            train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
            test_idx  = np.r_[0:10] # the first 10 iids

            #make covar just numbers 0,1,...
            covar = self.covariate_whole.read()
            covar.val = np.array([[float(num)] for num in xrange(covar.iid_count)])
            covar._name = 'np.array([[float(num)] for num in xrange(covar.iid_count)])'
            covariate_train = covar[train_idx,:].read()
            covariate_test = covar[test_idx,:].read()


            #make pheno  # pheno = 2*covar+100+normal(0,1)*10
            pheno = self.pheno_whole.read()
            np.random.seed(0)
            pheno.val = covar.val * 2.0 + 100 + np.random.normal(size=covar.val.shape)*10

            pheno_train = pheno[train_idx,:].read()
            pheno_test = pheno[test_idx,:].read()

            if do_plot:
                #Plot training x and y, testing x and y
                pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
                pylab.suptitle("Plot training x and y, testing x and y")
                pylab.show()

            ###############################################################
            # Show that linear regression does a good job predicting
            ###############################################################

            Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
            Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
            lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
            bs=lsqSol[0] #weights
            r2=lsqSol[1] #squared residuals
            D=lsqSol[2]  #rank of design matrix
            N=pheno_train.iid_count
            REML = False
            if not REML:
                sigma2 = float(r2/N)
                nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
            else:
                sigma2 = float(r2 / (N-D))
                nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
                nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

            predicted = Xtest.dot(bs)
            yerr = [np.sqrt(sigma2)] * len(predicted)
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle("real linear regression: actual to prediction")
                pylab.show()

            ###############################################################
            # Use LMM as LR and apply test on train
            ###############################################################
            for force_full_rank in [True, False]:
                #Learn model, save, load
                fastlmmx = FastLMM(GB_goal=2,force_full_rank=force_full_rank).fit(K0_train=covariate_train, X=None, y=pheno_train)
                
                
                filename = self.tempout_dir + "/model_lr_as_lmm.flm.p"
                pstutil.create_directory_if_necessary(filename)
                joblib.dump(fastlmmx, filename) 
                fastlmm = joblib.load(filename)


                do_test_on_train = True
                if do_test_on_train:
                    #Predict with model (test on train)
                    predicted_pheno, covar = fastlmm.predict(K0_whole_test=covariate_train, X=None) #test on train
                    output_file = self.file_name("lr_as_lmma_")
                    Dat.write(output_file,predicted_pheno)
                    covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                    output_file = self.file_name("lr_as_lmma.cov_")
                    Dat.write(output_file,covar2)

                    yerr = np.sqrt(np.diag(covar.val))
                    predicted = predicted_pheno.val
                    if do_plot:
                        pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                        pylab.xlim([0, 50])
                        pylab.ylim([100, 200])
                        pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                        pylab.suptitle("test on train: train X to true target (green) and prediction (red)")
                        pylab.show()

                    self.compare_files(predicted_pheno,"lr_as_lmma_")
                    self.compare_files(covar2,"lr_as_lmma.cov_")

                ###############################################################
                # Use LMM as LR and apply test on test
                ###############################################################

                #Predict with model (test on test)
                predicted_pheno, covar  = fastlmm.predict(K0_whole_test=covariate_test, X=None) #test on train
                output_file = self.file_name("lr_as_lmmb_")
                Dat.write(output_file,predicted_pheno)
                covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                output_file = self.file_name("lr_as_lmmb.cov_")
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar.val))
                predicted = predicted_pheno.val
                if do_plot:
                    pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                    pylab.xlim([-1, 10])
                    pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                    pylab.suptitle("test on test: test X to true target (green) and prediction (red)")
                    pylab.show()
                    ## Plot y and predicted y (test on train)
                    #pylab.plot(pheno_test.val,predicted_pheno.val,".")
                    #pylab.suptitle(name+": test on test: true target to prediction")
                    #pylab.show()

                self.compare_files(predicted_pheno,"lr_as_lmmb_")
                self.compare_files(covar2,"lr_as_lmmb.cov_")

Example 155

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lr2(self):
        do_plot = False

        import pylab
        logging.info("TestLmmTrain test_lr2")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        #make covar just numbers 0,1,...
        covar = self.covariate_whole.read()
        covar.val = np.array([[float(num)] for num in xrange(covar.iid_count)])
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()
        K0_whole_test = KernelIdentity(covar.iid,covariate_test.iid)

        #make pheno  # pheno = 2*covar+100+normal(0,1)*10
        pheno = self.pheno_whole.read()
        np.random.seed(0)
        pheno.val = covar.val * 2.0 + 100 + np.random.normal(size=covar.val.shape)*10

        pheno_train = pheno[train_idx,:].read()
        pheno_test = pheno[test_idx,:].read()

        if do_plot:
            #Plot training x and y, testing x and y
            pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
            pylab.suptitle("Plot training x and y, testing x and y")
            pylab.show()

        Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
        Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
        lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
        bs=lsqSol[0] #weights
        r2=lsqSol[1] #squared residuals
        D=lsqSol[2]  #rank of design matrix
        N=pheno_train.iid_count
        REML = False
        if not REML:
            sigma2 = float(r2/N)
            nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
        else:
            sigma2 = float(r2 / (N-D))
            nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
            nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

        predicted = Xtest.dot(bs)
        yerr = [np.sqrt(sigma2)] * len(predicted)
        if do_plot:
            pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
            pylab.xlim([-1, 10])
            pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
            pylab.suptitle("real linear regression: actual to prediction")
            pylab.show()

        #These should all give the same result
        first_name = None
        for name,K0_train,K0_whole_test in [("Identity Kernel",
                                            KernelIdentity(self.snpreader_whole.iid[train_idx]),
                                            KernelIdentity(self.snpreader_whole.iid,test=self.snpreader_whole.iid[test_idx])),
                                      #!!!later("sid_count=0", self.snpreader_whole[train_idx,[]],self.snpreader_whole[test_idx,[]])
                                      ]:
            logging.info(name)
            first_name = first_name or name
            #Learn model, save, load
            fastlmmx = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                
                
            filename = self.tempout_dir + "/model_lr2.flm.p"
            joblib.dump(fastlmmx, filename) 
            fastlmm = joblib.load(filename)


            do_test_on_train = True
            if do_test_on_train:
                #Predict with model (test on train)
                predicted_pheno, covar = fastlmm.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                output_file = self.file_name("lr2a_"+name)
                Dat.write(output_file,predicted_pheno)
                covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                output_file = self.file_name("lr2a.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar.val))
                predicted = predicted_pheno.val
                if do_plot:
                    pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                    pylab.xlim([0, 50])
                    pylab.ylim([100, 200])
                    pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_pheno,"lr2a_"+first_name)
                self.compare_files(covar2,"lr2a.cov_"+first_name)

            #Predict with model (test on test)
            predicted_pheno, covar  = fastlmm.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on train
            output_file = self.file_name("lr2b_"+name)
            Dat.write(output_file,predicted_pheno)
            covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
            output_file = self.file_name("lr2b.cov_"+name)
            Dat.write(output_file,covar2)

            yerr = np.sqrt(np.diag(covar.val))
            predicted = predicted_pheno.val
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                pylab.show()
                ## Plot y and predicted y (test on train)
                #pylab.plot(pheno_test.val,predicted_pheno.val,".")
                #pylab.suptitle(name+": test on test: true target to prediction")
                #pylab.show()

            self.compare_files(predicted_pheno,"lr2b_"+first_name)
            self.compare_files(covar2,"lr2b.cov_"+first_name)

Example 156

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_twoK(self):
        logging.info("TestLmmTrain test_twoK")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        G0_train = self.snpreader_whole[train_idx,:]
        covariate_train = self.covariate_whole[train_idx,:]
        pheno_train = self.pheno_whole[train_idx,:]

        fastlmm1 = FastLMM(GB_goal=2).fit(K0_train=G0_train, K1_train=G0_train, X=covariate_train, y=pheno_train)
        filename = self.tempout_dir + "/model_one.flm.p"
        pstutil.create_directory_if_necessary(filename)
        joblib.dump(fastlmm1, filename) 
        fastlmm2 = joblib.load(filename)

                
        # predict on test set
        G0_test = self.snpreader_whole[test_idx,:]
        covariate_test = self.covariate_whole[test_idx,:]

        predicted_pheno, covar = fastlmm2.predict(K0_whole_test=G0_test, K1_whole_test=G0_test, X=covariate_test)

        output_file = self.file_name("one")
        Dat.write(output_file,predicted_pheno)

        pheno_actual = self.pheno_whole[test_idx,:].read().val[:,0]

        #pylab.plot(pheno_actual, predicted_pheno.val,".")
        #pylab.show()


        self.compare_files(predicted_pheno,"one")

Example 157

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lr(self):
        import matplotlib.pyplot as plt
        import pylab


        logging.info("TestLmmTrain test_lr")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        G0_train = self.snpreader_whole[train_idx,:]
        covariate_train3 = self.covariate_whole[train_idx,:].read()
        covariate_train3.val = np.array([[float(num)] for num in xrange(covariate_train3.iid_count)])
        pheno_train3 = self.pheno_whole[train_idx,:].read()
        np.random.seed(0)
        pheno_train3.val = covariate_train3.val * 2.0 + 100 + np.random.normal(size=covariate_train3.val.shape) # y = 2*x+100+normal(0,1)

        ##Plot training x and y
        #pylab.plot(covariate_train3.val, pheno_train3.val,".")
        #pylab.show()

        for force_full_rank,force_low_rank in [(True,False),(False,True)]:
            #Learn model, save, load
            fastlmm3x = FastLMM(force_full_rank=force_full_rank,force_low_rank=force_low_rank,GB_goal=2).fit(K0_train=G0_train, X=covariate_train3, y=pheno_train3)
            filename = self.tempout_dir + "/model_lr.flm.p"
            pstutil.create_directory_if_necessary(filename)
            joblib.dump(fastlmm3x, filename) 
            fastlmm3 = joblib.load(filename)


            #Predict with model (test on train)
            predicted_pheno, covar = fastlmm3.predict(K0_whole_test=G0_train, X=covariate_train3) #test on train
            output_file = self.file_name("lr")
            Dat.write(output_file,predicted_pheno)

            ## Plot training x and y, and training x with predicted y
            #do_plot = True 
            #if do_plot:
            #    pylab.plot(covariate_train3.val, pheno_train3.val,covariate_train3.val,predicted_pheno.val,".")
            #    pylab.show()

            #    # Plot y and predicted y (test on train)
            #    pheno_actual = pheno_train3.val[:,0]
            #    pylab.plot(pheno_actual,predicted_pheno.val,".")
            #    pylab.show()


            self.compare_files(predicted_pheno,"lr")

Example 158

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lmm(self):
        do_plot = False
        iid_count = 500
        seed = 0


        import pylab
        logging.info("TestLmmTrain test_lmm")

        iid = [["cid{0}P{1}".format(iid_index,iid_index//250)]*2 for iid_index in xrange(iid_count)]
        train_idx = np.r_[10:iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids


        #Every person is 100% related to everyone in one of 5 families
        K0a = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
        for iid_index0 in xrange(iid_count):
            for iid_index1 in xrange(iid_count):
                K0a.val[iid_index0,iid_index1] = 1 if iid_index0 % 5 == iid_index1 % 5 else 0
                if iid_index1 < iid_index0:
                    assert K0a.val[iid_index0,iid_index1] == K0a.val[iid_index1,iid_index0]

        #every person lives on a line from 0 to 1
        # They are related to every other person as a function of distance on the line
        np.random.seed(seed)
        home = np.random.random([iid_count])
        K0b = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
        for iid_index in xrange(iid_count):
            K0b.val[iid_index,:] = 1 - np.abs(home-home[iid_index])**.1

        #make covar just numbers 0,1,...
        covar = SnpData(iid=iid,sid=["x"],val=np.array([[float(num)] for num in xrange(iid_count)]))
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()

        for name, h2, K0 in [("clones", 1, K0a),("line_world",.75,K0b)]:

            sigma2x = 100
            varg = sigma2x * h2
            vare = sigma2x * (1-h2)

            #######################################################################
            #make pheno  # pheno = 2*covar+100+normal(0,1)*2.5+normal(0,K)*7.5
            #######################################################################
            #random.multivariate_normal is sensitive to mkl_num_thread, so we control it.
            if 'MKL_NUM_THREADS' in os.environ:
                mkl_num_thread = os.environ['MKL_NUM_THREADS']
            else:
                mkl_num_thread = None
            os.environ['MKL_NUM_THREADS'] = '1'
            np.random.seed(seed)
            p1 = covar.val * 2.0 + 100
            p2 = np.random.normal(size=covar.val.shape)*np.sqrt(vare)
            p3 = (np.random.multivariate_normal(np.zeros(iid_count),K0.val)*np.sqrt(varg)).reshape(-1,1)
            if mkl_num_thread is not None:
                os.environ['MKL_NUM_THREADS'] = mkl_num_thread
            else:
                del os.environ['MKL_NUM_THREADS']
            pheno = SnpData(iid=iid,sid=["pheno0"],val= p1 + p2 + p3)

            pheno_train = pheno[train_idx,:].read()
            pheno_test = pheno[test_idx,:].read()

            if do_plot:
                #Plot training x and y, testing x and y
                pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
                pylab.suptitle(name + ": Plot training x and y, testing x and y")
                pylab.show()

            Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
            Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
            lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
            bs=lsqSol[0] #weights
            r2=lsqSol[1] #squared residuals
            D=lsqSol[2]  #rank of design matrix
            N=pheno_train.iid_count
            REML = False
            if not REML:
                sigma2 = float(r2/N)
                nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
            else:
                sigma2 = float(r2 / (N-D))
                nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
                nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

            predicted = Xtest.dot(bs)
            yerr = [np.sqrt(sigma2)] * len(predicted)
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name + ": real linear regression: actual to prediction")
                pylab.show()

            for factor in [1,100,.02]:
                K0 = K0.read()
                K0.val *= factor

                K0_train = K0[train_idx]
                K0_whole_test = K0[:,test_idx]

                #Learn model, save, load
                fastlmmx = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                v2 = np.var(p2)
                v3 = np.var(p3)
                logging.debug("Original h2 of {0}. Generated h2 of {1}. Learned h2 of {2}".format(h2, v3/(v2+v3), fastlmmx.h2raw))
                
                
                filename = self.tempout_dir + "/model_lmm.flm.p"
                pstutil.create_directory_if_necessary(filename)
                joblib.dump(fastlmmx, filename) 
                fastlmm = joblib.load(filename)


                do_test_on_train = True
                if do_test_on_train:
                    #Predict with model (test on train)
                    predicted_pheno, covar_pheno = fastlmm.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                    output_file = self.file_name("lmma_"+name)
                    Dat.write(output_file,predicted_pheno)
                    covar2 = SnpData(iid=covar_pheno.row,sid=covar_pheno.col[:,1],val=covar_pheno.val) #kludge to write kernel to text format
                    output_file = self.file_name("lmma.cov_"+name)
                    Dat.write(output_file,covar2)

                    yerr = np.sqrt(np.diag(covar_pheno.val))
                    predicted = predicted_pheno.val
                    if do_plot:
                        pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                        pylab.xlim([0, 50])
                        pylab.ylim([100, 200])
                        pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                        pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                        pylab.show()

                    self.compare_files(predicted_pheno,"lmma_"+name)
                    self.compare_files(covar2,"lmma.cov_"+name)

                    predicted_pheno0, covar_pheno0 = fastlmm.predict(K0_whole_test=K0_train[:,0], X=covariate_train[0,:]) #test on train #0
                    assert np.abs(predicted_pheno0.val[0,0] - predicted_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
                    assert np.abs(covar_pheno0.val[0,0] - covar_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"


                #Predict with model (test on test)
                predicted_phenoB, covar_phenoB  = fastlmm.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on test
                output_file = self.file_name("lmmb_"+name)
                Dat.write(output_file,predicted_phenoB)
                covar2 = SnpData(iid=covar_phenoB.row,sid=covar_phenoB.col[:,1],val=covar_phenoB.val) #kludge to write kernel to text format
                output_file = self.file_name("lmmb.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar_phenoB.val))
                predicted = predicted_phenoB.val
                if do_plot:
                    pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                    pylab.xlim([-1, 10])
                    pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_phenoB,"lmmb_"+name)
                self.compare_files(covar2,"lmmb.cov_"+name)

                predicted_phenoB0, covar_phenoB0  = fastlmm.predict(K0_whole_test=K0_whole_test[:,0], X=covariate_test[0,:]) #test on a single test case
                assert np.abs(predicted_phenoB0.val[0,0] - predicted_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
                assert np.abs(covar_phenoB0.val[0,0] - covar_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"

                #Predict with model test on some train and some test
                some_idx = range(covar.iid_count)
                some_idx.remove(train_idx[0])
                some_idx.remove(test_idx[0])
                covariate_some = covar[some_idx,:]
                K0_whole_some = K0[:,some_idx]
                predicted_phenoC, covar_phenoC  = fastlmm.predict(K0_whole_test=K0_whole_some, X=covariate_some)
                for idxC, iidC in enumerate(predicted_phenoC.iid):
                    meanC = predicted_phenoC.val[idxC]
                    varC = covar_phenoC.val[idxC,idxC]
                    if iidC in predicted_pheno.iid:
                        predicted_pheno_ref = predicted_pheno
                        covar_pheno_ref = covar_pheno
                    else:
                        assert iidC in predicted_phenoB.iid
                        predicted_pheno_ref = predicted_phenoB
                        covar_pheno_ref = covar_phenoB
                    idx_ref = predicted_pheno_ref.iid_to_index([iidC])[0]
                    mean_ref = predicted_pheno_ref.val[idx_ref]
                    var_ref = covar_pheno_ref.val[idx_ref,idx_ref]
                    assert np.abs(meanC - mean_ref) < 1e-6
                    assert np.abs(varC - var_ref) < 1e-6

Example 159

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_snps(self):
        logging.info("TestLmmTrain test_snps")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        # Show it using the snps
        G0_train = self.snpreader_whole[train_idx,:]
        covariate_train3 = self.covariate_whole[train_idx,:].read()
        pheno_train3 = self.pheno_whole[train_idx,:].read()
        pheno_train3.val = G0_train[:,0:1].read().val*2

        #pylab.plot(G0_train[:,0:1].read().val[:,0], pheno_train3.val[:,0],".")
        #pylab.show()

        #Learn model, save, load
        fastlmm3x = FastLMM(GB_goal=2).fit(K0_train=G0_train, X=covariate_train3, y=pheno_train3)
        filename = self.tempout_dir + "/model_snps.flm.p"
        pstutil.create_directory_if_necessary(filename)
        joblib.dump(fastlmm3x, filename) 
        fastlmm3 = joblib.load(filename)


        #Predict with model (test on train)
        predicted_pheno, covar = fastlmm3.predict(K0_whole_test=G0_train, X=covariate_train3) #test on train
        output_file = self.file_name("snps")
        Dat.write(output_file,predicted_pheno)

        ### Plot training x and y, and training x with predicted y
        #pylab.plot(G0_train[:,0:1].read().val[:,0], pheno_train3.val,".",G0_train[:,0:1].read().val[:,0],predicted_pheno.val,".")
        #pylab.show()

        ### Plot y and predicted y (test on train)
        #pheno_actual = pheno_train3.val[:,0]
        #pylab.plot(pheno_actual,predicted_pheno.val,".")
        #pylab.show()


        self.compare_files(predicted_pheno,"snps")

Example 160

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_api(self):
        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        #####################################################
        # Train and standardize cov and then apply to test
        #####################################################

        cov_train, unit_trained = self.covariate_whole[train_idx,:].read().standardize(Unit(),return_trained=True)
        cov_test = self.covariate_whole[test_idx,:].read().standardize(unit_trained)

        #####################################################
        # standardize whole kernel from snps (both ways) and then pull out the 3 parts
        #####################################################
        
        whole_kernel = SnpKernel(self.covariate_whole,Unit()).read().standardize(DiagKtoN())
        train_kernel = whole_kernel[train_idx].read(order='A',view_ok=True)
        test_kernel = whole_kernel[train_idx,test_idx].read(order='A',view_ok=True)
        test_test_kernel = whole_kernel[test_idx,test_idx].read(order='A',view_ok=True)

        #####################################################
        # create train_train, train_test, and test_test based on just the training snps (both standardizations)
        #####################################################

        K_train = SnpKernel(self.snpreader_whole[train_idx,:],Unit(),block_size=100)
        train_train_kernel, snp_trained, kernel_trained = K_train._read_with_standardizing(to_kerneldata=True, kernel_standardizer=DiagKtoN(), return_trained=True)

        K_whole_test = _SnpWholeTest(train=self.snpreader_whole[train_idx,:],test=self.snpreader_whole[test_idx,:],standardizer=snp_trained,block_size=100)
        train_idx2 = K_whole_test.iid0_to_index(self.snpreader_whole.iid[train_idx]) #The new reader may have the iids in a different order than the original reader
        train_test_kernel = K_whole_test[train_idx2,:].read().standardize(kernel_trained)

        test_idx2 = K_whole_test.iid0_to_index(self.snpreader_whole.iid[test_idx])
        test_test_kernel = K_whole_test[test_idx2,:].read().standardize(kernel_trained)

        #####################################################
        # How does predict look with whole_test as input?
        #####################################################

        # a. - standardize whole up front
        whole_kernel = SnpKernel(self.snpreader_whole,Unit(),block_size=100).read().standardize()
        train_kernel = whole_kernel[train_idx].read(order='A',view_ok=True)
        whole_test_kernel = whole_kernel[:,test_idx].read(order='A',view_ok=True)
        fastlmm1 = FastLMM(snp_standardizer=SS_Identity(), kernel_standardizer=KS_Identity())
        fastlmm1.fit(K0_train=train_kernel, X=self.covariate_whole, y=self.pheno_whole) #iid intersection means we won't really be using whole covar or pheno
        predicted_pheno, covar = fastlmm1.predict(K0_whole_test=whole_test_kernel, X=self.covariate_whole)
        output_file = self.file_name("whole")
        Dat.write(output_file,predicted_pheno)
        self.compare_files(predicted_pheno,"whole")

        # b -- just files
        fastlmm2 = FastLMM()
        fastlmm2.fit(K0_train=self.snpreader_whole[train_idx,:], X=self.covariate_whole, y=self.pheno_whole[train_idx,:]) #iid intersection means we won't really be using whole covar
        predicted_pheno, covar = fastlmm2.predict(K0_whole_test=self.snpreader_whole[test_idx,:], X=self.covariate_whole)
        self.compare_files(predicted_pheno,"one")

Example 161

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_kernel(self):
        logging.info("TestLmmTrain test_kernel")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        # Show it using the snps
        K0_train = self.snpreader_whole[train_idx,:].read_kernel(Unit())
        covariate_train3 = self.covariate_whole[train_idx,:].read()
        pheno_train3 = self.pheno_whole[train_idx,:].read()
        pheno_train3.val = self.snpreader_whole[train_idx,0:1].read().val*2
        assert np.array_equal(K0_train.iid,covariate_train3.iid), "Expect iids to be the same (so that early and late Unit standardization will give the same result)"
        assert np.array_equal(K0_train.iid,pheno_train3.iid), "Expect iids to be the same (so that early and late Unit standardization will give the same result)"

        #pylab.plot(G0_train[:,0:1].read().val[:,0], pheno_train3.val[:,0],".")
        #pylab.show()

        #Learn model, save, load
        fastlmm3x = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train3, y=pheno_train3)
        filename = self.tempout_dir + "/model_snps.flm.p"
        pstutil.create_directory_if_necessary(filename)
        joblib.dump(fastlmm3x, filename) 
        fastlmm3 = joblib.load(filename)


        #Predict with model (test on train)
        predicted_pheno, covar = fastlmm3.predict(K0_whole_test=K0_train, X=covariate_train3) #test on train
        output_file = self.file_name("kernel")
        Dat.write(output_file,predicted_pheno)

        #### Plot training x and y, and training x with predicted y
        #pylab.plot(self.snpreader_whole[train_idx,0:1].read().val[:,0], pheno_train3.val,".",self.snpreader_whole[train_idx,0:1].read().val[:,0],predicted_pheno.val,".")
        #pylab.show()

        #### Plot y and predicted y (test on train)
        #pheno_actual = pheno_train3.val[:,0]
        #pylab.plot(pheno_actual,predicted_pheno.val,".")
        #pylab.show()


        self.compare_files(predicted_pheno,"snps") #"kernel" and "snps" test cases should give the same results

Example 162

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_one(self):
        logging.info("TestLmmTrain test_one")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        G0_train = self.snpreader_whole[train_idx,:]
        covariate_train = self.covariate_whole[train_idx,:]
        pheno_train = self.pheno_whole[train_idx,:]

        fastlmm1 = FastLMM(GB_goal=2).fit(K0_train=G0_train, X=covariate_train, y=pheno_train)
        filename = self.tempout_dir + "/model_one.flm.p"
        pstutil.create_directory_if_necessary(filename)
        joblib.dump(fastlmm1, filename) 
        fastlmm2 = joblib.load(filename)
                
        # predict on test set
        G0_test = self.snpreader_whole[test_idx,:]
        covariate_test = self.covariate_whole[test_idx,:]

        predicted_pheno, covar = fastlmm2.predict(K0_whole_test=G0_test, X=covariate_test)

        output_file = self.file_name("one")
        Dat.write(output_file,predicted_pheno)

        pheno_actual = self.pheno_whole[test_idx,:].read().val[:,0]

        #pylab.plot(pheno_actual, predicted_pheno.val,".")
        #pylab.show()


        self.compare_files(predicted_pheno,"one")

Example 163

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_kernel_one(self):
        logging.info("TestLmmTrain test_kernel_one")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        K0_train = SnpKernel(self.snpreader_whole[train_idx,:],standardizer=Unit())
        covariate_train = self.covariate_whole[train_idx,:]
        pheno_train = self.pheno_whole[train_idx,:]
        assert np.array_equal(K0_train.iid,covariate_train.iid), "Expect iids to be the same (so that early and late Unit standardization will give the same result)"
        assert np.array_equal(K0_train.iid,pheno_train.iid), "Expect iids to be the same (so that early and late Unit standardization will give the same result)"

        fastlmm1 = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
        filename = self.tempout_dir + "/model_kernel_one.flm.p"
        pstutil.create_directory_if_necessary(filename)
        joblib.dump(fastlmm1, filename) 
        fastlmm2 = joblib.load(filename)

                
        # predict on test set
        G0_test = self.snpreader_whole[test_idx,:]
        covariate_test = self.covariate_whole[test_idx,:]

        predicted_pheno, covar = fastlmm2.predict(K0_whole_test=G0_test, X=covariate_test)

        output_file = self.file_name("kernel_one")
        Dat.write(output_file,predicted_pheno)

        pheno_actual = self.pheno_whole[test_idx,:].read().val[:,0]

        #pylab.plot(pheno_actual, predicted_pheno.val,".")
        #pylab.show()


        self.compare_files(predicted_pheno,"one") #Expect same results as SNPs "one"

Example 164

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lr_as_lmm(self):
            do_plot = False
            #later why does this test case generate two intersect info messages instead of just one?
            import pylab
            logging.info("TestLmmTrain test_lr_as_lmm")

            ###############################################################
            # Create a linear data set with just a little noise
            ###############################################################

            train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
            test_idx  = np.r_[0:10] # the first 10 iids

            #make covar just numbers 0,1,...
            covar = self.covariate_whole.read()
            covar.val = np.array([[float(num)] for num in xrange(covar.iid_count)])
            covar._name = 'np.array([[float(num)] for num in xrange(covar.iid_count)])'
            covariate_train = covar[train_idx,:].read()
            covariate_test = covar[test_idx,:].read()


            #make pheno  # pheno = 2*covar+100+normal(0,1)*10
            pheno = self.pheno_whole.read()
            np.random.seed(0)
            pheno.val = covar.val * 2.0 + 100 + np.random.normal(size=covar.val.shape)*10

            pheno_train = pheno[train_idx,:].read()
            pheno_test = pheno[test_idx,:].read()

            if do_plot:
                #Plot training x and y, testing x and y
                pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
                pylab.suptitle("Plot training x and y, testing x and y")
                pylab.show()

            ###############################################################
            # Show that linear regression does a good job predicting
            ###############################################################

            Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
            Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
            lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
            bs=lsqSol[0] #weights
            r2=lsqSol[1] #squared residuals
            D=lsqSol[2]  #rank of design matrix
            N=pheno_train.iid_count
            REML = False
            if not REML:
                sigma2 = float(r2/N)
                nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
            else:
                sigma2 = float(r2 / (N-D))
                nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
                nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

            predicted = Xtest.dot(bs)
            yerr = [np.sqrt(sigma2)] * len(predicted)
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle("real linear regression: actual to prediction")
                pylab.show()

            ###############################################################
            # Use LMM as LR and apply test on train
            ###############################################################
            for force_full_rank in [True, False]:
                #Learn model, save, load
                fastlmmx = FastLMM(GB_goal=2,force_full_rank=force_full_rank).fit(K0_train=covariate_train, X=None, y=pheno_train)
                
                
                filename = self.tempout_dir + "/model_lr_as_lmm.flm.p"
                pstutil.create_directory_if_necessary(filename)
                joblib.dump(fastlmmx, filename) 
                fastlmm = joblib.load(filename)


                do_test_on_train = True
                if do_test_on_train:
                    #Predict with model (test on train)
                    predicted_pheno, covar = fastlmm.predict(K0_whole_test=covariate_train, X=None) #test on train
                    output_file = self.file_name("lr_as_lmma_")
                    Dat.write(output_file,predicted_pheno)
                    covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                    output_file = self.file_name("lr_as_lmma.cov_")
                    Dat.write(output_file,covar2)

                    yerr = np.sqrt(np.diag(covar.val))
                    predicted = predicted_pheno.val
                    if do_plot:
                        pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                        pylab.xlim([0, 50])
                        pylab.ylim([100, 200])
                        pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                        pylab.suptitle("test on train: train X to true target (green) and prediction (red)")
                        pylab.show()

                    self.compare_files(predicted_pheno,"lr_as_lmma_")
                    self.compare_files(covar2,"lr_as_lmma.cov_")

                ###############################################################
                # Use LMM as LR and apply test on test
                ###############################################################

                #Predict with model (test on test)
                predicted_pheno, covar  = fastlmm.predict(K0_whole_test=covariate_test, X=None) #test on train
                output_file = self.file_name("lr_as_lmmb_")
                Dat.write(output_file,predicted_pheno)
                covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                output_file = self.file_name("lr_as_lmmb.cov_")
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar.val))
                predicted = predicted_pheno.val
                if do_plot:
                    pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                    pylab.xlim([-1, 10])
                    pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                    pylab.suptitle("test on test: test X to true target (green) and prediction (red)")
                    pylab.show()
                    ## Plot y and predicted y (test on train)
                    #pylab.plot(pheno_test.val,predicted_pheno.val,".")
                    #pylab.suptitle(name+": test on test: true target to prediction")
                    #pylab.show()

                self.compare_files(predicted_pheno,"lr_as_lmmb_")
                self.compare_files(covar2,"lr_as_lmmb.cov_")

Example 165

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lr2(self):
        do_plot = False

        import pylab
        logging.info("TestLmmTrain test_lr2")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        #make covar just numbers 0,1,...
        covar = self.covariate_whole.read()
        covar.val = np.array([[float(num)] for num in xrange(covar.iid_count)])
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()
        K0_whole_test = KernelIdentity(covar.iid,covariate_test.iid)

        #make pheno  # pheno = 2*covar+100+normal(0,1)*10
        pheno = self.pheno_whole.read()
        np.random.seed(0)
        pheno.val = covar.val * 2.0 + 100 + np.random.normal(size=covar.val.shape)*10

        pheno_train = pheno[train_idx,:].read()
        pheno_test = pheno[test_idx,:].read()

        if do_plot:
            #Plot training x and y, testing x and y
            pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
            pylab.suptitle("Plot training x and y, testing x and y")
            pylab.show()

        Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
        Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
        lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
        bs=lsqSol[0] #weights
        r2=lsqSol[1] #squared residuals
        D=lsqSol[2]  #rank of design matrix
        N=pheno_train.iid_count
        REML = False
        if not REML:
            sigma2 = float(r2/N)
            nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
        else:
            sigma2 = float(r2 / (N-D))
            nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
            nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

        predicted = Xtest.dot(bs)
        yerr = [np.sqrt(sigma2)] * len(predicted)
        if do_plot:
            pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
            pylab.xlim([-1, 10])
            pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
            pylab.suptitle("real linear regression: actual to prediction")
            pylab.show()

        #These should all give the same result
        first_name = None
        for name,K0_train,K0_whole_test in [("Identity Kernel",
                                            KernelIdentity(self.snpreader_whole.iid[train_idx]),
                                            KernelIdentity(self.snpreader_whole.iid,test=self.snpreader_whole.iid[test_idx])),
                                      #!!!later("sid_count=0", self.snpreader_whole[train_idx,[]],self.snpreader_whole[test_idx,[]])
                                      ]:
            logging.info(name)
            first_name = first_name or name
            #Learn model, save, load
            fastlmmx = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                
                
            filename = self.tempout_dir + "/model_lr2.flm.p"
            joblib.dump(fastlmmx, filename) 
            fastlmm = joblib.load(filename)


            do_test_on_train = True
            if do_test_on_train:
                #Predict with model (test on train)
                predicted_pheno, covar = fastlmm.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                output_file = self.file_name("lr2a_"+name)
                Dat.write(output_file,predicted_pheno)
                covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                output_file = self.file_name("lr2a.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar.val))
                predicted = predicted_pheno.val
                if do_plot:
                    pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                    pylab.xlim([0, 50])
                    pylab.ylim([100, 200])
                    pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_pheno,"lr2a_"+first_name)
                self.compare_files(covar2,"lr2a.cov_"+first_name)

            #Predict with model (test on test)
            predicted_pheno, covar  = fastlmm.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on train
            output_file = self.file_name("lr2b_"+name)
            Dat.write(output_file,predicted_pheno)
            covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
            output_file = self.file_name("lr2b.cov_"+name)
            Dat.write(output_file,covar2)

            yerr = np.sqrt(np.diag(covar.val))
            predicted = predicted_pheno.val
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                pylab.show()
                ## Plot y and predicted y (test on train)
                #pylab.plot(pheno_test.val,predicted_pheno.val,".")
                #pylab.suptitle(name+": test on test: true target to prediction")
                #pylab.show()

            self.compare_files(predicted_pheno,"lr2b_"+first_name)
            self.compare_files(covar2,"lr2b.cov_"+first_name)

Example 166

Project: FaST-LMM
Source File: test_linear_regression.py
View license
    def test_lr_real(self):
        do_plot = False

        import pylab
        logging.info("TestLinRegTrain test_lr_real")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        #make covar just numbers 0,1,...
        covar = self.covariate_whole.read()
        covar.val = np.array([[float(num)] for num in xrange(covar.iid_count)])
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()
        K0_test_test = KernelIdentity(covariate_test.iid)

        #make pheno  # pheno = 2*covar+100+normal(0,1)*10
        pheno = self.pheno_whole.read()
        np.random.seed(0)
        pheno.val = covar.val * 2.0 + 100 + np.random.normal(size=covar.val.shape)*10

        pheno_train = pheno[train_idx,:].read()
        pheno_test = pheno[test_idx,:].read()

        if do_plot:
            #Plot training x and y, testing x and y
            pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
            pylab.suptitle("Plot training x and y, testing x and y")
            pylab.show()

        Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
        Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
        lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
        bs=lsqSol[0] #weights
        r2=lsqSol[1] #squared residuals
        D=lsqSol[2]  #rank of design matrix
        N=pheno_train.iid_count
        REML = False
        if not REML:
            sigma2 = float(r2/N)
            nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
        else:
            sigma2 = float(r2 / (N-D))
            nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
            nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

        predicted = Xtest.dot(bs)
        yerr = [np.sqrt(sigma2)] * len(predicted)
        if do_plot:
            pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
            pylab.xlim([-1, 10])
            pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
            pylab.suptitle("real linear regression: actual to prediction")
            pylab.show()

        #These should all give the same result
        first_name = None
        for name,K0_train,K0_whole_test in [("Identity Kernel",None,None)]:

            first_name = first_name or name
            #Learn model, save, load
            modelx = LinearRegression().fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                
                
            filename = self.tempout_dir + "/model_lr_real.flm.p"
            pstutil.create_directory_if_necessary(filename)
            joblib.dump(modelx, filename) 
            model = joblib.load(filename)

            do_test_on_train = True
            if do_test_on_train:
                #Predict with model (test on train)
                predicted_pheno, covar = model.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                output_file = self.file_name("lr_reala_"+name)
                Dat.write(output_file,predicted_pheno)
                covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                output_file = self.file_name("lr_reala.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar.val))
                predicted = predicted_pheno.val
                if do_plot:
                    pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                    pylab.xlim([0, 50])
                    pylab.ylim([100, 200])
                    pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_pheno,"lr2a_"+first_name)
                self.compare_files(covar2,"lr2a.cov_"+first_name)

            #Predict with model (test on test)
            predicted_pheno, covar  = model.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on train
            output_file = self.file_name("lr_realb_"+name)
            Dat.write(output_file,predicted_pheno)
            covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
            output_file = self.file_name("lr_realb.cov_"+name)
            Dat.write(output_file,covar2)

            yerr = np.sqrt(np.diag(covar.val))
            predicted = predicted_pheno.val
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                pylab.show()
                ## Plot y and predicted y (test on train)
                #pylab.plot(pheno_test.val,predicted_pheno.val,".")
                #pylab.suptitle(name+": test on test: true target to prediction")
                #pylab.show()

            self.compare_files(predicted_pheno,"lr2b_"+first_name)
            self.compare_files(covar2,"lr2b.cov_"+first_name)

Example 167

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lr(self):
        import matplotlib.pyplot as plt
        import pylab


        logging.info("TestLmmTrain test_lr")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        G0_train = self.snpreader_whole[train_idx,:]
        covariate_train3 = self.covariate_whole[train_idx,:].read()
        covariate_train3.val = np.array([[float(num)] for num in xrange(covariate_train3.iid_count)])
        pheno_train3 = self.pheno_whole[train_idx,:].read()
        np.random.seed(0)
        pheno_train3.val = covariate_train3.val * 2.0 + 100 + np.random.normal(size=covariate_train3.val.shape) # y = 2*x+100+normal(0,1)

        ##Plot training x and y
        #pylab.plot(covariate_train3.val, pheno_train3.val,".")
        #pylab.show()

        for force_full_rank,force_low_rank in [(True,False),(False,True)]:
            #Learn model, save, load
            fastlmm3x = FastLMM(force_full_rank=force_full_rank,force_low_rank=force_low_rank,GB_goal=2).fit(K0_train=G0_train, X=covariate_train3, y=pheno_train3)
            filename = self.tempout_dir + "/model_lr.flm.p"
            pstutil.create_directory_if_necessary(filename)
            joblib.dump(fastlmm3x, filename) 
            fastlmm3 = joblib.load(filename)


            #Predict with model (test on train)
            predicted_pheno, covar = fastlmm3.predict(K0_whole_test=G0_train, X=covariate_train3) #test on train
            output_file = self.file_name("lr")
            Dat.write(output_file,predicted_pheno)

            ## Plot training x and y, and training x with predicted y
            #do_plot = True 
            #if do_plot:
            #    pylab.plot(covariate_train3.val, pheno_train3.val,covariate_train3.val,predicted_pheno.val,".")
            #    pylab.show()

            #    # Plot y and predicted y (test on train)
            #    pheno_actual = pheno_train3.val[:,0]
            #    pylab.plot(pheno_actual,predicted_pheno.val,".")
            #    pylab.show()


            self.compare_files(predicted_pheno,"lr")

Example 168

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_lmm(self):
        do_plot = False
        iid_count = 500
        seed = 0


        import pylab
        logging.info("TestLmmTrain test_lmm")

        iid = [["cid{0}P{1}".format(iid_index,iid_index//250)]*2 for iid_index in xrange(iid_count)]
        train_idx = np.r_[10:iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids


        #Every person is 100% related to everyone in one of 5 families
        K0a = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
        for iid_index0 in xrange(iid_count):
            for iid_index1 in xrange(iid_count):
                K0a.val[iid_index0,iid_index1] = 1 if iid_index0 % 5 == iid_index1 % 5 else 0
                if iid_index1 < iid_index0:
                    assert K0a.val[iid_index0,iid_index1] == K0a.val[iid_index1,iid_index0]

        #every person lives on a line from 0 to 1
        # They are related to every other person as a function of distance on the line
        np.random.seed(seed)
        home = np.random.random([iid_count])
        K0b = KernelData(iid=iid,val=np.empty([iid_count,iid_count]),name="related by distance")
        for iid_index in xrange(iid_count):
            K0b.val[iid_index,:] = 1 - np.abs(home-home[iid_index])**.1

        #make covar just numbers 0,1,...
        covar = SnpData(iid=iid,sid=["x"],val=np.array([[float(num)] for num in xrange(iid_count)]))
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()

        for name, h2, K0 in [("clones", 1, K0a),("line_world",.75,K0b)]:

            sigma2x = 100
            varg = sigma2x * h2
            vare = sigma2x * (1-h2)

            #######################################################################
            #make pheno  # pheno = 2*covar+100+normal(0,1)*2.5+normal(0,K)*7.5
            #######################################################################
            #random.multivariate_normal is sensitive to mkl_num_thread, so we control it.
            if 'MKL_NUM_THREADS' in os.environ:
                mkl_num_thread = os.environ['MKL_NUM_THREADS']
            else:
                mkl_num_thread = None
            os.environ['MKL_NUM_THREADS'] = '1'
            np.random.seed(seed)
            p1 = covar.val * 2.0 + 100
            p2 = np.random.normal(size=covar.val.shape)*np.sqrt(vare)
            p3 = (np.random.multivariate_normal(np.zeros(iid_count),K0.val)*np.sqrt(varg)).reshape(-1,1)
            if mkl_num_thread is not None:
                os.environ['MKL_NUM_THREADS'] = mkl_num_thread
            else:
                del os.environ['MKL_NUM_THREADS']
            pheno = SnpData(iid=iid,sid=["pheno0"],val= p1 + p2 + p3)

            pheno_train = pheno[train_idx,:].read()
            pheno_test = pheno[test_idx,:].read()

            if do_plot:
                #Plot training x and y, testing x and y
                pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
                pylab.suptitle(name + ": Plot training x and y, testing x and y")
                pylab.show()

            Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
            Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
            lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
            bs=lsqSol[0] #weights
            r2=lsqSol[1] #squared residuals
            D=lsqSol[2]  #rank of design matrix
            N=pheno_train.iid_count
            REML = False
            if not REML:
                sigma2 = float(r2/N)
                nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
            else:
                sigma2 = float(r2 / (N-D))
                nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
                nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

            predicted = Xtest.dot(bs)
            yerr = [np.sqrt(sigma2)] * len(predicted)
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name + ": real linear regression: actual to prediction")
                pylab.show()

            for factor in [1,100,.02]:
                K0 = K0.read()
                K0.val *= factor

                K0_train = K0[train_idx]
                K0_whole_test = K0[:,test_idx]

                #Learn model, save, load
                fastlmmx = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                v2 = np.var(p2)
                v3 = np.var(p3)
                logging.debug("Original h2 of {0}. Generated h2 of {1}. Learned h2 of {2}".format(h2, v3/(v2+v3), fastlmmx.h2raw))
                
                
                filename = self.tempout_dir + "/model_lmm.flm.p"
                pstutil.create_directory_if_necessary(filename)
                joblib.dump(fastlmmx, filename) 
                fastlmm = joblib.load(filename)


                do_test_on_train = True
                if do_test_on_train:
                    #Predict with model (test on train)
                    predicted_pheno, covar_pheno = fastlmm.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                    output_file = self.file_name("lmma_"+name)
                    Dat.write(output_file,predicted_pheno)
                    covar2 = SnpData(iid=covar_pheno.row,sid=covar_pheno.col[:,1],val=covar_pheno.val) #kludge to write kernel to text format
                    output_file = self.file_name("lmma.cov_"+name)
                    Dat.write(output_file,covar2)

                    yerr = np.sqrt(np.diag(covar_pheno.val))
                    predicted = predicted_pheno.val
                    if do_plot:
                        pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                        pylab.xlim([0, 50])
                        pylab.ylim([100, 200])
                        pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                        pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                        pylab.show()

                    self.compare_files(predicted_pheno,"lmma_"+name)
                    self.compare_files(covar2,"lmma.cov_"+name)

                    predicted_pheno0, covar_pheno0 = fastlmm.predict(K0_whole_test=K0_train[:,0], X=covariate_train[0,:]) #test on train #0
                    assert np.abs(predicted_pheno0.val[0,0] - predicted_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
                    assert np.abs(covar_pheno0.val[0,0] - covar_pheno.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"


                #Predict with model (test on test)
                predicted_phenoB, covar_phenoB  = fastlmm.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on test
                output_file = self.file_name("lmmb_"+name)
                Dat.write(output_file,predicted_phenoB)
                covar2 = SnpData(iid=covar_phenoB.row,sid=covar_phenoB.col[:,1],val=covar_phenoB.val) #kludge to write kernel to text format
                output_file = self.file_name("lmmb.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar_phenoB.val))
                predicted = predicted_phenoB.val
                if do_plot:
                    pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                    pylab.xlim([-1, 10])
                    pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_phenoB,"lmmb_"+name)
                self.compare_files(covar2,"lmmb.cov_"+name)

                predicted_phenoB0, covar_phenoB0  = fastlmm.predict(K0_whole_test=K0_whole_test[:,0], X=covariate_test[0,:]) #test on a single test case
                assert np.abs(predicted_phenoB0.val[0,0] - predicted_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"
                assert np.abs(covar_phenoB0.val[0,0] - covar_phenoB.val[0,0]) < 1e-6, "Expect a single case to get the same prediction as a set of cases"

                #Predict with model test on some train and some test
                some_idx = range(covar.iid_count)
                some_idx.remove(train_idx[0])
                some_idx.remove(test_idx[0])
                covariate_some = covar[some_idx,:]
                K0_whole_some = K0[:,some_idx]
                predicted_phenoC, covar_phenoC  = fastlmm.predict(K0_whole_test=K0_whole_some, X=covariate_some)
                for idxC, iidC in enumerate(predicted_phenoC.iid):
                    meanC = predicted_phenoC.val[idxC]
                    varC = covar_phenoC.val[idxC,idxC]
                    if iidC in predicted_pheno.iid:
                        predicted_pheno_ref = predicted_pheno
                        covar_pheno_ref = covar_pheno
                    else:
                        assert iidC in predicted_phenoB.iid
                        predicted_pheno_ref = predicted_phenoB
                        covar_pheno_ref = covar_phenoB
                    idx_ref = predicted_pheno_ref.iid_to_index([iidC])[0]
                    mean_ref = predicted_pheno_ref.val[idx_ref]
                    var_ref = covar_pheno_ref.val[idx_ref,idx_ref]
                    assert np.abs(meanC - mean_ref) < 1e-6
                    assert np.abs(varC - var_ref) < 1e-6

Example 169

Project: FaST-LMM
Source File: test_fastlmm_predictor.py
View license
    def test_kernel(self):
        logging.info("TestLmmTrain test_kernel")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        # Show it using the snps
        K0_train = self.snpreader_whole[train_idx,:].read_kernel(Unit())
        covariate_train3 = self.covariate_whole[train_idx,:].read()
        pheno_train3 = self.pheno_whole[train_idx,:].read()
        pheno_train3.val = self.snpreader_whole[train_idx,0:1].read().val*2
        assert np.array_equal(K0_train.iid,covariate_train3.iid), "Expect iids to be the same (so that early and late Unit standardization will give the same result)"
        assert np.array_equal(K0_train.iid,pheno_train3.iid), "Expect iids to be the same (so that early and late Unit standardization will give the same result)"

        #pylab.plot(G0_train[:,0:1].read().val[:,0], pheno_train3.val[:,0],".")
        #pylab.show()

        #Learn model, save, load
        fastlmm3x = FastLMM(GB_goal=2).fit(K0_train=K0_train, X=covariate_train3, y=pheno_train3)
        filename = self.tempout_dir + "/model_snps.flm.p"
        pstutil.create_directory_if_necessary(filename)
        joblib.dump(fastlmm3x, filename) 
        fastlmm3 = joblib.load(filename)


        #Predict with model (test on train)
        predicted_pheno, covar = fastlmm3.predict(K0_whole_test=K0_train, X=covariate_train3) #test on train
        output_file = self.file_name("kernel")
        Dat.write(output_file,predicted_pheno)

        #### Plot training x and y, and training x with predicted y
        #pylab.plot(self.snpreader_whole[train_idx,0:1].read().val[:,0], pheno_train3.val,".",self.snpreader_whole[train_idx,0:1].read().val[:,0],predicted_pheno.val,".")
        #pylab.show()

        #### Plot y and predicted y (test on train)
        #pheno_actual = pheno_train3.val[:,0]
        #pylab.plot(pheno_actual,predicted_pheno.val,".")
        #pylab.show()


        self.compare_files(predicted_pheno,"snps") #"kernel" and "snps" test cases should give the same results

Example 170

Project: FaST-LMM
Source File: test_linear_regression.py
View license
    def test_lr_real(self):
        do_plot = False

        import pylab
        logging.info("TestLinRegTrain test_lr_real")

        train_idx = np.r_[10:self.snpreader_whole.iid_count] # iids 10 and on
        test_idx  = np.r_[0:10] # the first 10 iids

        #make covar just numbers 0,1,...
        covar = self.covariate_whole.read()
        covar.val = np.array([[float(num)] for num in xrange(covar.iid_count)])
        covariate_train = covar[train_idx,:].read()
        covariate_test = covar[test_idx,:].read()
        K0_test_test = KernelIdentity(covariate_test.iid)

        #make pheno  # pheno = 2*covar+100+normal(0,1)*10
        pheno = self.pheno_whole.read()
        np.random.seed(0)
        pheno.val = covar.val * 2.0 + 100 + np.random.normal(size=covar.val.shape)*10

        pheno_train = pheno[train_idx,:].read()
        pheno_test = pheno[test_idx,:].read()

        if do_plot:
            #Plot training x and y, testing x and y
            pylab.plot(covariate_train.val, pheno_train.val,".",covariate_test.val, pheno_test.val,".")
            pylab.suptitle("Plot training x and y, testing x and y")
            pylab.show()

        Xtrain = np.c_[covariate_train.val,np.ones((covariate_train.iid_count,1))]
        Xtest = np.c_[covariate_test.val,np.ones((covariate_test.iid_count,1))]
        lsqSol = np.linalg.lstsq(Xtrain, pheno_train.val[:,0])
        bs=lsqSol[0] #weights
        r2=lsqSol[1] #squared residuals
        D=lsqSol[2]  #rank of design matrix
        N=pheno_train.iid_count
        REML = False
        if not REML:
            sigma2 = float(r2/N)
            nLL =  N*0.5*np.log(2*np.pi*sigma2) + N*0.5
        else:
            sigma2 = float(r2 / (N-D))
            nLL = N*0.5*np.log(2*np.pi*sigma2) + 0.5/sigma2*r2;
            nLL -= 0.5*D*np.log(2*np.pi*sigma2);#REML term

        predicted = Xtest.dot(bs)
        yerr = [np.sqrt(sigma2)] * len(predicted)
        if do_plot:
            pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
            pylab.xlim([-1, 10])
            pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
            pylab.suptitle("real linear regression: actual to prediction")
            pylab.show()

        #These should all give the same result
        first_name = None
        for name,K0_train,K0_whole_test in [("Identity Kernel",None,None)]:

            first_name = first_name or name
            #Learn model, save, load
            modelx = LinearRegression().fit(K0_train=K0_train, X=covariate_train, y=pheno_train)
                
                
            filename = self.tempout_dir + "/model_lr_real.flm.p"
            pstutil.create_directory_if_necessary(filename)
            joblib.dump(modelx, filename) 
            model = joblib.load(filename)

            do_test_on_train = True
            if do_test_on_train:
                #Predict with model (test on train)
                predicted_pheno, covar = model.predict(K0_whole_test=K0_train, X=covariate_train) #test on train
                output_file = self.file_name("lr_reala_"+name)
                Dat.write(output_file,predicted_pheno)
                covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
                output_file = self.file_name("lr_reala.cov_"+name)
                Dat.write(output_file,covar2)

                yerr = np.sqrt(np.diag(covar.val))
                predicted = predicted_pheno.val
                if do_plot:
                    pylab.plot(covariate_train.val, pheno_train.val,"g.",covariate_train.val, predicted,"r.")
                    pylab.xlim([0, 50])
                    pylab.ylim([100, 200])
                    pylab.errorbar(covariate_train.val, predicted,yerr,linestyle='None')
                    pylab.suptitle(name+": test on train: train X to true target (green) and prediction (red)")
                    pylab.show()

                self.compare_files(predicted_pheno,"lr2a_"+first_name)
                self.compare_files(covar2,"lr2a.cov_"+first_name)

            #Predict with model (test on test)
            predicted_pheno, covar  = model.predict(K0_whole_test=K0_whole_test, X=covariate_test) #test on train
            output_file = self.file_name("lr_realb_"+name)
            Dat.write(output_file,predicted_pheno)
            covar2 = SnpData(iid=covar.row,sid=covar.col[:,1],val=covar.val) #kludge to write kernel to text format
            output_file = self.file_name("lr_realb.cov_"+name)
            Dat.write(output_file,covar2)

            yerr = np.sqrt(np.diag(covar.val))
            predicted = predicted_pheno.val
            if do_plot:
                pylab.plot(covariate_test.val, pheno_test.val,"g.",covariate_test.val, predicted,"r.")
                pylab.xlim([-1, 10])
                pylab.errorbar(covariate_test.val, predicted,yerr,linestyle='None')
                pylab.suptitle(name+": test on test: test X to true target (green) and prediction (red)")
                pylab.show()
                ## Plot y and predicted y (test on train)
                #pylab.plot(pheno_test.val,predicted_pheno.val,".")
                #pylab.suptitle(name+": test on test: true target to prediction")
                #pylab.show()

            self.compare_files(predicted_pheno,"lr2b_"+first_name)
            self.compare_files(covar2,"lr2b.cov_"+first_name)

Example 171

Project: mne-python
Source File: search_light.py
View license
def _gl_transform(estimators, X, method):
    """Transform the dataset.

    This will apply each estimator to all slices of the data.

    Parameters
    ----------
    X : array, shape (n_samples, nd_features, n_slices)
        The training input samples. For each data slice, a clone estimator
        is fitted independently. The feature dimension can be multidimensional
        e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators)

    Returns
    -------
    Xt : array, shape (n_samples, n_slices)
        The transformed values generated by each estimator.
    """
    n_sample, n_iter = X.shape[0], X.shape[-1]
    for ii, est in enumerate(estimators):
        # stack generalized data for faster prediction
        X_stack = X.transpose(np.r_[0, X.ndim - 1, range(1, X.ndim - 1)])
        X_stack = X_stack.reshape(np.r_[n_sample * n_iter, X_stack.shape[2:]])
        transform = getattr(est, method)
        _y_pred = transform(X_stack)
        # unstack generalizations
        if _y_pred.ndim == 2:
            _y_pred = np.reshape(_y_pred, [n_sample, n_iter, _y_pred.shape[1]])
        else:
            shape = np.r_[n_sample, n_iter, _y_pred.shape[1:]].astype(int)
            _y_pred = np.reshape(_y_pred, shape)
        # Initialize array of predictions on the first transform iteration
        if ii == 0:
            y_pred = _gl_init_pred(_y_pred, X, len(estimators))
        y_pred[:, ii, ...] = _y_pred
    return y_pred

Example 172

Project: mne-python
Source File: search_light.py
View license
def _gl_transform(estimators, X, method):
    """Transform the dataset.

    This will apply each estimator to all slices of the data.

    Parameters
    ----------
    X : array, shape (n_samples, nd_features, n_slices)
        The training input samples. For each data slice, a clone estimator
        is fitted independently. The feature dimension can be multidimensional
        e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators)

    Returns
    -------
    Xt : array, shape (n_samples, n_slices)
        The transformed values generated by each estimator.
    """
    n_sample, n_iter = X.shape[0], X.shape[-1]
    for ii, est in enumerate(estimators):
        # stack generalized data for faster prediction
        X_stack = X.transpose(np.r_[0, X.ndim - 1, range(1, X.ndim - 1)])
        X_stack = X_stack.reshape(np.r_[n_sample * n_iter, X_stack.shape[2:]])
        transform = getattr(est, method)
        _y_pred = transform(X_stack)
        # unstack generalizations
        if _y_pred.ndim == 2:
            _y_pred = np.reshape(_y_pred, [n_sample, n_iter, _y_pred.shape[1]])
        else:
            shape = np.r_[n_sample, n_iter, _y_pred.shape[1:]].astype(int)
            _y_pred = np.reshape(_y_pred, shape)
        # Initialize array of predictions on the first transform iteration
        if ii == 0:
            y_pred = _gl_init_pred(_y_pred, X, len(estimators))
        y_pred[:, ii, ...] = _y_pred
    return y_pred

Example 173

Project: mne-python
Source File: _compute_forward.py
View license
def _bem_specify_coils(bem, coils, coord_frame, mults, n_jobs):
    """Set up for computing the solution at a set of MEG coils.

    Parameters
    ----------
    bem : dict
        BEM information
    coils : list of dict, len(n_MEG_sensors)
        MEG sensor information dicts
    coord_frame : int
        Class constant identifying coordinate frame
    mults : ndarray, shape (1, n_BEM_vertices)
        Multiplier for every vertex in BEM
    n_jobs : int
        Number of jobs to run in parallel

    Returns
    -------
    sol: ndarray, shape (n_MEG_sensors, n_BEM_vertices)
        MEG solution
    """
    # Make sure MEG coils are in MRI coordinate frame to match BEM coords
    coils, coord_frame = _check_coil_frame(coils, coord_frame, bem)

    # leaving this in in case we want to easily add in the future
    # if method != 'simple':  # in ['ferguson', 'urankar']:
    #     raise NotImplementedError

    # Compute the weighting factors to obtain the magnetic field in the linear
    # potential approximation

    # Process each of the surfaces
    rmags, cosmags, ws, bins = _concatenate_coils(coils)
    lens = np.cumsum(np.r_[0, [len(s['rr']) for s in bem['surfs']]])
    sol = np.zeros((bins[-1] + 1, bem['solution'].shape[1]))

    lims = np.concatenate([np.arange(0, sol.shape[0], 100), [sol.shape[0]]])
    # Compute coeffs for each surface, one at a time
    for o1, o2, surf, mult in zip(lens[:-1], lens[1:],
                                  bem['surfs'], bem['field_mult']):
        coeff = _lin_field_coeff(surf, mult, rmags, cosmags, ws, bins, n_jobs)
        # put through the bem (in chunks to save memory)
        for start, stop in zip(lims[:-1], lims[1:]):
            sol[start:stop] += np.dot(coeff[start:stop],
                                      bem['solution'][o1:o2])
    sol *= mults
    return sol

Example 174

Project: mne-python
Source File: _compute_forward.py
View license
def _bem_specify_coils(bem, coils, coord_frame, mults, n_jobs):
    """Set up for computing the solution at a set of MEG coils.

    Parameters
    ----------
    bem : dict
        BEM information
    coils : list of dict, len(n_MEG_sensors)
        MEG sensor information dicts
    coord_frame : int
        Class constant identifying coordinate frame
    mults : ndarray, shape (1, n_BEM_vertices)
        Multiplier for every vertex in BEM
    n_jobs : int
        Number of jobs to run in parallel

    Returns
    -------
    sol: ndarray, shape (n_MEG_sensors, n_BEM_vertices)
        MEG solution
    """
    # Make sure MEG coils are in MRI coordinate frame to match BEM coords
    coils, coord_frame = _check_coil_frame(coils, coord_frame, bem)

    # leaving this in in case we want to easily add in the future
    # if method != 'simple':  # in ['ferguson', 'urankar']:
    #     raise NotImplementedError

    # Compute the weighting factors to obtain the magnetic field in the linear
    # potential approximation

    # Process each of the surfaces
    rmags, cosmags, ws, bins = _concatenate_coils(coils)
    lens = np.cumsum(np.r_[0, [len(s['rr']) for s in bem['surfs']]])
    sol = np.zeros((bins[-1] + 1, bem['solution'].shape[1]))

    lims = np.concatenate([np.arange(0, sol.shape[0], 100), [sol.shape[0]]])
    # Compute coeffs for each surface, one at a time
    for o1, o2, surf, mult in zip(lens[:-1], lens[1:],
                                  bem['surfs'], bem['field_mult']):
        coeff = _lin_field_coeff(surf, mult, rmags, cosmags, ws, bins, n_jobs)
        # put through the bem (in chunks to save memory)
        for start, stop in zip(lims[:-1], lims[1:]):
            sol[start:stop] += np.dot(coeff[start:stop],
                                      bem['solution'][o1:o2])
    sol *= mults
    return sol

Example 175

Project: mne-python
Source File: raw.py
View license
    @verbose
    def __init__(self, fname, allow_maxshield=False, preload=False,
                 add_eeg_ref=False, verbose=None):  # noqa: D102
        fnames = [op.realpath(fname)]
        del fname
        split_fnames = []

        raws = []
        for ii, fname in enumerate(fnames):
            do_check_fname = fname not in split_fnames
            raw, next_fname = self._read_raw_file(fname, allow_maxshield,
                                                  preload, do_check_fname)
            raws.append(raw)
            if next_fname is not None:
                if not op.exists(next_fname):
                    warn('Split raw file detected but next file %s does not '
                         'exist.' % next_fname)
                    continue
                # process this file next
                fnames.insert(ii + 1, next_fname)
                split_fnames.append(next_fname)

        _check_raw_compatibility(raws)

        super(Raw, self).__init__(
            copy.deepcopy(raws[0].info), False,
            [r.first_samp for r in raws], [r.last_samp for r in raws],
            [r.filename for r in raws], [r._raw_extras for r in raws],
            raws[0].orig_format, None, verbose=verbose)
        from ...epochs import _dep_eeg_ref
        add_eeg_ref = _dep_eeg_ref(add_eeg_ref)

        # combine information from each raw file to construct self
        if add_eeg_ref and _needs_eeg_average_ref_proj(self.info):
            eeg_ref = make_eeg_average_ref_proj(self.info, activate=False)
            self.add_proj(eeg_ref)

        # combine annotations
        self.annotations = raws[0].annotations
        if any([r.annotations for r in raws[1:]]):
            first_samps = list()
            last_samps = list()
            for r in raws:
                first_samps = np.r_[first_samps, r.first_samp]
                last_samps = np.r_[last_samps, r.last_samp]
                self.annotations = _combine_annotations((self.annotations,
                                                         r.annotations),
                                                        last_samps,
                                                        first_samps,
                                                        r.info['sfreq'])
        if preload:
            self._preload_data(preload)
        else:
            self.preload = False

Example 176

Project: mne-python
Source File: raw.py
View license
    @verbose
    def __init__(self, fname, allow_maxshield=False, preload=False,
                 add_eeg_ref=False, verbose=None):  # noqa: D102
        fnames = [op.realpath(fname)]
        del fname
        split_fnames = []

        raws = []
        for ii, fname in enumerate(fnames):
            do_check_fname = fname not in split_fnames
            raw, next_fname = self._read_raw_file(fname, allow_maxshield,
                                                  preload, do_check_fname)
            raws.append(raw)
            if next_fname is not None:
                if not op.exists(next_fname):
                    warn('Split raw file detected but next file %s does not '
                         'exist.' % next_fname)
                    continue
                # process this file next
                fnames.insert(ii + 1, next_fname)
                split_fnames.append(next_fname)

        _check_raw_compatibility(raws)

        super(Raw, self).__init__(
            copy.deepcopy(raws[0].info), False,
            [r.first_samp for r in raws], [r.last_samp for r in raws],
            [r.filename for r in raws], [r._raw_extras for r in raws],
            raws[0].orig_format, None, verbose=verbose)
        from ...epochs import _dep_eeg_ref
        add_eeg_ref = _dep_eeg_ref(add_eeg_ref)

        # combine information from each raw file to construct self
        if add_eeg_ref and _needs_eeg_average_ref_proj(self.info):
            eeg_ref = make_eeg_average_ref_proj(self.info, activate=False)
            self.add_proj(eeg_ref)

        # combine annotations
        self.annotations = raws[0].annotations
        if any([r.annotations for r in raws[1:]]):
            first_samps = list()
            last_samps = list()
            for r in raws:
                first_samps = np.r_[first_samps, r.first_samp]
                last_samps = np.r_[last_samps, r.last_samp]
                self.annotations = _combine_annotations((self.annotations,
                                                         r.annotations),
                                                        last_samps,
                                                        first_samps,
                                                        r.info['sfreq'])
        if preload:
            self._preload_data(preload)
        else:
            self.preload = False

Example 177

Project: mne-python
Source File: _3d.py
View license
def plot_sparse_source_estimates(src, stcs, colors=None, linewidth=2,
                                 fontsize=18, bgcolor=(.05, 0, .1),
                                 opacity=0.2, brain_color=(0.7,) * 3,
                                 show=True, high_resolution=False,
                                 fig_name=None, fig_number=None, labels=None,
                                 modes=('cone', 'sphere'),
                                 scale_factors=(1, 0.6),
                                 verbose=None, **kwargs):
    """Plot source estimates obtained with sparse solver.

    Active dipoles are represented in a "Glass" brain.
    If the same source is active in multiple source estimates it is
    displayed with a sphere otherwise with a cone in 3D.

    Parameters
    ----------
    src : dict
        The source space.
    stcs : instance of SourceEstimate or list of instances of SourceEstimate
        The source estimates (up to 3).
    colors : list
        List of colors
    linewidth : int
        Line width in 2D plot.
    fontsize : int
        Font size.
    bgcolor : tuple of length 3
        Background color in 3D.
    opacity : float in [0, 1]
        Opacity of brain mesh.
    brain_color : tuple of length 3
        Brain color.
    show : bool
        Show figures if True.
    high_resolution : bool
        If True, plot on the original (non-downsampled) cortical mesh.
    fig_name :
        Mayavi figure name.
    fig_number :
        Matplotlib figure number.
    labels : ndarray or list of ndarrays
        Labels to show sources in clusters. Sources with the same
        label and the waveforms within each cluster are presented in
        the same color. labels should be a list of ndarrays when
        stcs is a list ie. one label for each stc.
    modes : list
        Should be a list, with each entry being ``'cone'`` or ``'sphere'``
        to specify how the dipoles should be shown.
    scale_factors : list
        List of floating point scale factors for the markers.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see :func:`mne.verbose`
        and :ref:`Logging documentation <tut_logging>` for more).
    **kwargs : kwargs
        Keyword arguments to pass to mlab.triangular_mesh.
    """
    known_modes = ['cone', 'sphere']
    if not isinstance(modes, (list, tuple)) or \
            not all(mode in known_modes for mode in modes):
        raise ValueError('mode must be a list containing only '
                         '"cone" or "sphere"')
    if not isinstance(stcs, list):
        stcs = [stcs]
    if labels is not None and not isinstance(labels, list):
        labels = [labels]

    if colors is None:
        colors = COLORS

    linestyles = ['-', '--', ':']

    # Show 3D
    lh_points = src[0]['rr']
    rh_points = src[1]['rr']
    points = np.r_[lh_points, rh_points]

    lh_normals = src[0]['nn']
    rh_normals = src[1]['nn']
    normals = np.r_[lh_normals, rh_normals]

    if high_resolution:
        use_lh_faces = src[0]['tris']
        use_rh_faces = src[1]['tris']
    else:
        use_lh_faces = src[0]['use_tris']
        use_rh_faces = src[1]['use_tris']

    use_faces = np.r_[use_lh_faces, lh_points.shape[0] + use_rh_faces]

    points *= 170

    vertnos = [np.r_[stc.lh_vertno, lh_points.shape[0] + stc.rh_vertno]
               for stc in stcs]
    unique_vertnos = np.unique(np.concatenate(vertnos).ravel())

    from mayavi import mlab
    from matplotlib.colors import ColorConverter
    color_converter = ColorConverter()

    f = mlab.figure(figure=fig_name, bgcolor=bgcolor, size=(600, 600))
    mlab.clf()
    if mlab.options.backend != 'test':
        f.scene.disable_render = True
    with warnings.catch_warnings(record=True):  # traits warnings
        surface = mlab.triangular_mesh(points[:, 0], points[:, 1],
                                       points[:, 2], use_faces,
                                       color=brain_color,
                                       opacity=opacity, **kwargs)

    import matplotlib.pyplot as plt
    # Show time courses
    plt.figure(fig_number)
    plt.clf()

    colors = cycle(colors)

    logger.info("Total number of active sources: %d" % len(unique_vertnos))

    if labels is not None:
        colors = [advance_iterator(colors) for _ in
                  range(np.unique(np.concatenate(labels).ravel()).size)]

    for idx, v in enumerate(unique_vertnos):
        # get indices of stcs it belongs to
        ind = [k for k, vertno in enumerate(vertnos) if v in vertno]
        is_common = len(ind) > 1

        if labels is None:
            c = advance_iterator(colors)
        else:
            # if vertex is in different stcs than take label from first one
            c = colors[labels[ind[0]][vertnos[ind[0]] == v]]

        mode = modes[1] if is_common else modes[0]
        scale_factor = scale_factors[1] if is_common else scale_factors[0]

        if (isinstance(scale_factor, (np.ndarray, list, tuple)) and
                len(unique_vertnos) == len(scale_factor)):
            scale_factor = scale_factor[idx]

        x, y, z = points[v]
        nx, ny, nz = normals[v]
        with warnings.catch_warnings(record=True):  # traits
            mlab.quiver3d(x, y, z, nx, ny, nz, color=color_converter.to_rgb(c),
                          mode=mode, scale_factor=scale_factor)

        for k in ind:
            vertno = vertnos[k]
            mask = (vertno == v)
            assert np.sum(mask) == 1
            linestyle = linestyles[k]
            plt.plot(1e3 * stcs[k].times, 1e9 * stcs[k].data[mask].ravel(),
                     c=c, linewidth=linewidth, linestyle=linestyle)

    plt.xlabel('Time (ms)', fontsize=18)
    plt.ylabel('Source amplitude (nAm)', fontsize=18)

    if fig_name is not None:
        plt.title(fig_name)
    plt_show(show)

    surface.actor.property.backface_culling = True
    surface.actor.property.shading = True

    return surface

Example 178

Project: mne-python
Source File: _3d.py
View license
def plot_sparse_source_estimates(src, stcs, colors=None, linewidth=2,
                                 fontsize=18, bgcolor=(.05, 0, .1),
                                 opacity=0.2, brain_color=(0.7,) * 3,
                                 show=True, high_resolution=False,
                                 fig_name=None, fig_number=None, labels=None,
                                 modes=('cone', 'sphere'),
                                 scale_factors=(1, 0.6),
                                 verbose=None, **kwargs):
    """Plot source estimates obtained with sparse solver.

    Active dipoles are represented in a "Glass" brain.
    If the same source is active in multiple source estimates it is
    displayed with a sphere otherwise with a cone in 3D.

    Parameters
    ----------
    src : dict
        The source space.
    stcs : instance of SourceEstimate or list of instances of SourceEstimate
        The source estimates (up to 3).
    colors : list
        List of colors
    linewidth : int
        Line width in 2D plot.
    fontsize : int
        Font size.
    bgcolor : tuple of length 3
        Background color in 3D.
    opacity : float in [0, 1]
        Opacity of brain mesh.
    brain_color : tuple of length 3
        Brain color.
    show : bool
        Show figures if True.
    high_resolution : bool
        If True, plot on the original (non-downsampled) cortical mesh.
    fig_name :
        Mayavi figure name.
    fig_number :
        Matplotlib figure number.
    labels : ndarray or list of ndarrays
        Labels to show sources in clusters. Sources with the same
        label and the waveforms within each cluster are presented in
        the same color. labels should be a list of ndarrays when
        stcs is a list ie. one label for each stc.
    modes : list
        Should be a list, with each entry being ``'cone'`` or ``'sphere'``
        to specify how the dipoles should be shown.
    scale_factors : list
        List of floating point scale factors for the markers.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see :func:`mne.verbose`
        and :ref:`Logging documentation <tut_logging>` for more).
    **kwargs : kwargs
        Keyword arguments to pass to mlab.triangular_mesh.
    """
    known_modes = ['cone', 'sphere']
    if not isinstance(modes, (list, tuple)) or \
            not all(mode in known_modes for mode in modes):
        raise ValueError('mode must be a list containing only '
                         '"cone" or "sphere"')
    if not isinstance(stcs, list):
        stcs = [stcs]
    if labels is not None and not isinstance(labels, list):
        labels = [labels]

    if colors is None:
        colors = COLORS

    linestyles = ['-', '--', ':']

    # Show 3D
    lh_points = src[0]['rr']
    rh_points = src[1]['rr']
    points = np.r_[lh_points, rh_points]

    lh_normals = src[0]['nn']
    rh_normals = src[1]['nn']
    normals = np.r_[lh_normals, rh_normals]

    if high_resolution:
        use_lh_faces = src[0]['tris']
        use_rh_faces = src[1]['tris']
    else:
        use_lh_faces = src[0]['use_tris']
        use_rh_faces = src[1]['use_tris']

    use_faces = np.r_[use_lh_faces, lh_points.shape[0] + use_rh_faces]

    points *= 170

    vertnos = [np.r_[stc.lh_vertno, lh_points.shape[0] + stc.rh_vertno]
               for stc in stcs]
    unique_vertnos = np.unique(np.concatenate(vertnos).ravel())

    from mayavi import mlab
    from matplotlib.colors import ColorConverter
    color_converter = ColorConverter()

    f = mlab.figure(figure=fig_name, bgcolor=bgcolor, size=(600, 600))
    mlab.clf()
    if mlab.options.backend != 'test':
        f.scene.disable_render = True
    with warnings.catch_warnings(record=True):  # traits warnings
        surface = mlab.triangular_mesh(points[:, 0], points[:, 1],
                                       points[:, 2], use_faces,
                                       color=brain_color,
                                       opacity=opacity, **kwargs)

    import matplotlib.pyplot as plt
    # Show time courses
    plt.figure(fig_number)
    plt.clf()

    colors = cycle(colors)

    logger.info("Total number of active sources: %d" % len(unique_vertnos))

    if labels is not None:
        colors = [advance_iterator(colors) for _ in
                  range(np.unique(np.concatenate(labels).ravel()).size)]

    for idx, v in enumerate(unique_vertnos):
        # get indices of stcs it belongs to
        ind = [k for k, vertno in enumerate(vertnos) if v in vertno]
        is_common = len(ind) > 1

        if labels is None:
            c = advance_iterator(colors)
        else:
            # if vertex is in different stcs than take label from first one
            c = colors[labels[ind[0]][vertnos[ind[0]] == v]]

        mode = modes[1] if is_common else modes[0]
        scale_factor = scale_factors[1] if is_common else scale_factors[0]

        if (isinstance(scale_factor, (np.ndarray, list, tuple)) and
                len(unique_vertnos) == len(scale_factor)):
            scale_factor = scale_factor[idx]

        x, y, z = points[v]
        nx, ny, nz = normals[v]
        with warnings.catch_warnings(record=True):  # traits
            mlab.quiver3d(x, y, z, nx, ny, nz, color=color_converter.to_rgb(c),
                          mode=mode, scale_factor=scale_factor)

        for k in ind:
            vertno = vertnos[k]
            mask = (vertno == v)
            assert np.sum(mask) == 1
            linestyle = linestyles[k]
            plt.plot(1e3 * stcs[k].times, 1e9 * stcs[k].data[mask].ravel(),
                     c=c, linewidth=linewidth, linestyle=linestyle)

    plt.xlabel('Time (ms)', fontsize=18)
    plt.ylabel('Source amplitude (nAm)', fontsize=18)

    if fig_name is not None:
        plt.title(fig_name)
    plt_show(show)

    surface.actor.property.backface_culling = True
    surface.actor.property.shading = True

    return surface

Example 179

View license
    def test_index_resolvers_come_after_columns_with_the_same_name(self):
        n = 1  # noqa
        a = np.r_[20:101:20]

        df = DataFrame({'index': a, 'b': np.random.randn(a.size)})
        df.index.name = 'index'
        result = df.query('index > 5', engine=self.engine, parser=self.parser)
        expected = df[df['index'] > 5]
        assert_frame_equal(result, expected)

        df = DataFrame({'index': a,
                        'b': np.random.randn(a.size)})
        result = df.query('ilevel_0 > 5', engine=self.engine,
                          parser=self.parser)
        expected = df.loc[df.index[df.index > 5]]
        assert_frame_equal(result, expected)

        df = DataFrame({'a': a, 'b': np.random.randn(a.size)})
        df.index.name = 'a'
        result = df.query('a > 5', engine=self.engine, parser=self.parser)
        expected = df[df.a > 5]
        assert_frame_equal(result, expected)

        result = df.query('index > 5', engine=self.engine, parser=self.parser)
        expected = df.loc[df.index[df.index > 5]]
        assert_frame_equal(result, expected)

Example 180

View license
    def test_index_resolvers_come_after_columns_with_the_same_name(self):
        n = 1  # noqa
        a = np.r_[20:101:20]

        df = DataFrame({'index': a, 'b': np.random.randn(a.size)})
        df.index.name = 'index'
        result = df.query('index > 5', engine=self.engine, parser=self.parser)
        expected = df[df['index'] > 5]
        assert_frame_equal(result, expected)

        df = DataFrame({'index': a,
                        'b': np.random.randn(a.size)})
        result = df.query('ilevel_0 > 5', engine=self.engine,
                          parser=self.parser)
        expected = df.loc[df.index[df.index > 5]]
        assert_frame_equal(result, expected)

        df = DataFrame({'a': a, 'b': np.random.randn(a.size)})
        df.index.name = 'a'
        result = df.query('a > 5', engine=self.engine, parser=self.parser)
        expected = df[df.a > 5]
        assert_frame_equal(result, expected)

        result = df.query('index > 5', engine=self.engine, parser=self.parser)
        expected = df.loc[df.index[df.index > 5]]
        assert_frame_equal(result, expected)

Example 181

View license
    def _entropy(self, n, p):
        k = np.r_[0:n + 1]
        vals = self._pmf(k, n, p)
        return np.sum(entr(vals), axis=0)

Example 182

View license
    def _entropy(self, M, n, N):
        k = np.r_[N - (M - n):min(n, N) + 1]
        vals = self.pmf(k, M, n, N)
        return np.sum(entr(vals), axis=0)

Example 183

View license
    def _entropy(self, n, p):
        k = np.r_[0:n + 1]
        vals = self._pmf(k, n, p)
        return np.sum(entr(vals), axis=0)

Example 184

View license
    def _entropy(self, M, n, N):
        k = np.r_[N - (M - n):min(n, N) + 1]
        vals = self.pmf(k, M, n, N)
        return np.sum(entr(vals), axis=0)

Example 185

View license
def precision_recall_curve(y_true, probas_pred, pos_label=None,
                           sample_weight=None):
    """Compute precision-recall pairs for different probability thresholds

    Note: this implementation is restricted to the binary classification task.

    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
    true positives and ``fp`` the number of false positives. The precision is
    intuitively the ability of the classifier not to label as positive a sample
    that is negative.

    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
    true positives and ``fn`` the number of false negatives. The recall is
    intuitively the ability of the classifier to find all the positive samples.

    The last precision and recall values are 1. and 0. respectively and do not
    have a corresponding threshold.  This ensures that the graph starts on the
    x axis.

    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.

    Parameters
    ----------
    y_true : array, shape = [n_samples]
        True targets of binary classification in range {-1, 1} or {0, 1}.

    probas_pred : array, shape = [n_samples]
        Estimated probabilities or decision function.

    pos_label : int, optional (default=None)
        The label of the positive class

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

    Returns
    -------
    precision : array, shape = [n_thresholds + 1]
        Precision values such that element i is the precision of
        predictions with score >= thresholds[i] and the last element is 1.

    recall : array, shape = [n_thresholds + 1]
        Decreasing recall values such that element i is the recall of
        predictions with score >= thresholds[i] and the last element is 0.

    thresholds : array, shape = [n_thresholds <= len(np.unique(probas_pred))]
        Increasing thresholds on the decision function used to compute
        precision and recall.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import precision_recall_curve
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> precision, recall, thresholds = precision_recall_curve(
    ...     y_true, y_scores)
    >>> precision  # doctest: +ELLIPSIS
    array([ 0.66...,  0.5       ,  1.        ,  1.        ])
    >>> recall
    array([ 1. ,  0.5,  0.5,  0. ])
    >>> thresholds
    array([ 0.35,  0.4 ,  0.8 ])

    """
    fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred,
                                             pos_label=pos_label,
                                             sample_weight=sample_weight)

    precision = tps / (tps + fps)
    recall = tps / tps[-1]

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]

Example 186

View license
def test_oneclass_decision_function():
    # Test OneClassSVM decision function
    clf = svm.OneClassSVM()
    rnd = check_random_state(2)

    # Generate train data
    X = 0.3 * rnd.randn(100, 2)
    X_train = np.r_[X + 2, X - 2]

    # Generate some regular novel observations
    X = 0.3 * rnd.randn(20, 2)
    X_test = np.r_[X + 2, X - 2]
    # Generate some abnormal novel observations
    X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))

    # fit the model
    clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
    clf.fit(X_train)

    # predict things
    y_pred_test = clf.predict(X_test)
    assert_greater(np.mean(y_pred_test == 1), .9)
    y_pred_outliers = clf.predict(X_outliers)
    assert_greater(np.mean(y_pred_outliers == -1), .9)
    dec_func_test = clf.decision_function(X_test)
    assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
    dec_func_outliers = clf.decision_function(X_outliers)
    assert_array_equal((dec_func_outliers > 0).ravel(), y_pred_outliers == 1)

Example 187

View license
def precision_recall_curve(y_true, probas_pred, pos_label=None,
                           sample_weight=None):
    """Compute precision-recall pairs for different probability thresholds

    Note: this implementation is restricted to the binary classification task.

    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
    true positives and ``fp`` the number of false positives. The precision is
    intuitively the ability of the classifier not to label as positive a sample
    that is negative.

    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
    true positives and ``fn`` the number of false negatives. The recall is
    intuitively the ability of the classifier to find all the positive samples.

    The last precision and recall values are 1. and 0. respectively and do not
    have a corresponding threshold.  This ensures that the graph starts on the
    x axis.

    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.

    Parameters
    ----------
    y_true : array, shape = [n_samples]
        True targets of binary classification in range {-1, 1} or {0, 1}.

    probas_pred : array, shape = [n_samples]
        Estimated probabilities or decision function.

    pos_label : int, optional (default=None)
        The label of the positive class

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

    Returns
    -------
    precision : array, shape = [n_thresholds + 1]
        Precision values such that element i is the precision of
        predictions with score >= thresholds[i] and the last element is 1.

    recall : array, shape = [n_thresholds + 1]
        Decreasing recall values such that element i is the recall of
        predictions with score >= thresholds[i] and the last element is 0.

    thresholds : array, shape = [n_thresholds <= len(np.unique(probas_pred))]
        Increasing thresholds on the decision function used to compute
        precision and recall.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import precision_recall_curve
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> precision, recall, thresholds = precision_recall_curve(
    ...     y_true, y_scores)
    >>> precision  # doctest: +ELLIPSIS
    array([ 0.66...,  0.5       ,  1.        ,  1.        ])
    >>> recall
    array([ 1. ,  0.5,  0.5,  0. ])
    >>> thresholds
    array([ 0.35,  0.4 ,  0.8 ])

    """
    fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred,
                                             pos_label=pos_label,
                                             sample_weight=sample_weight)

    precision = tps / (tps + fps)
    recall = tps / tps[-1]

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]

Example 188

View license
def test_oneclass_decision_function():
    # Test OneClassSVM decision function
    clf = svm.OneClassSVM()
    rnd = check_random_state(2)

    # Generate train data
    X = 0.3 * rnd.randn(100, 2)
    X_train = np.r_[X + 2, X - 2]

    # Generate some regular novel observations
    X = 0.3 * rnd.randn(20, 2)
    X_test = np.r_[X + 2, X - 2]
    # Generate some abnormal novel observations
    X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))

    # fit the model
    clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
    clf.fit(X_train)

    # predict things
    y_pred_test = clf.predict(X_test)
    assert_greater(np.mean(y_pred_test == 1), .9)
    y_pred_outliers = clf.predict(X_outliers)
    assert_greater(np.mean(y_pred_outliers == -1), .9)
    dec_func_test = clf.decision_function(X_test)
    assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
    dec_func_outliers = clf.decision_function(X_outliers)
    assert_array_equal((dec_func_outliers > 0).ravel(), y_pred_outliers == 1)

Example 189

Project: py-findpeaks
Source File: peakdetect.py
View license
def _smooth(x, window_len=11, window='hanning'):
    """
    smooth the data using a window of the requested size.

    This method is based on the convolution of a scaled window on the signal.
    The signal is prepared by introducing reflected copies of the signal
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.

    input:
        x: the input signal
        window_len: the dimension of the smoothing window; should be an odd
            integer
        window: the type of window from 'flat', 'hanning', 'hamming',
            'bartlett', 'blackman'
            flat window will produce a moving average smoothing.
    output:
        the smoothed signal

    example:
    t = linspace(-2,2,0.1)
    x = sin(t)+randn(len(t))*0.1
    y = _smooth(x)

    see also:

    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman,
    numpy.convolve, scipy.signal.lfilter

    TODO: the window parameter could be the window itself if a list instead of
    a string
    """
    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."

    if window_len<3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise(ValueError,
            "Window is not one of '{0}', '{1}', '{2}', '{3}', '{4}'".format(
            *('flat', 'hanning', 'hamming', 'bartlett', 'blackman')))

    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w = np.ones(window_len,'d')
    else:
        w = eval('np.' + window + '(window_len)')

    y = np.convolve(w / w.sum(), s, mode = 'valid')
    return y

Example 190

Project: py-findpeaks
Source File: peakdetect.py
View license
def _smooth(x, window_len=11, window='hanning'):
    """
    smooth the data using a window of the requested size.

    This method is based on the convolution of a scaled window on the signal.
    The signal is prepared by introducing reflected copies of the signal
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.

    input:
        x: the input signal
        window_len: the dimension of the smoothing window; should be an odd
            integer
        window: the type of window from 'flat', 'hanning', 'hamming',
            'bartlett', 'blackman'
            flat window will produce a moving average smoothing.
    output:
        the smoothed signal

    example:
    t = linspace(-2,2,0.1)
    x = sin(t)+randn(len(t))*0.1
    y = _smooth(x)

    see also:

    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman,
    numpy.convolve, scipy.signal.lfilter

    TODO: the window parameter could be the window itself if a list instead of
    a string
    """
    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."

    if window_len<3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise(ValueError,
            "Window is not one of '{0}', '{1}', '{2}', '{3}', '{4}'".format(
            *('flat', 'hanning', 'hamming', 'bartlett', 'blackman')))

    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w = np.ones(window_len,'d')
    else:
        w = eval('np.' + window + '(window_len)')

    y = np.convolve(w / w.sum(), s, mode = 'valid')
    return y

Example 191

Project: nipy
Source File: maps_3d.py
View license
def affine_img_src(data, affine, scale=1, name='AffineImage',
                reverse_x=False):
    """ Make a Mayavi source defined by a 3D array and an affine, for
        wich the voxel of the 3D array are mapped by the affine.
        
        Parameters
        -----------
        data: 3D ndarray
            The data arrays
        affine: (4 x 4) ndarray
            The (4 x 4) affine matrix relating voxels to world
            coordinates.
        scale: float, optional
            An optional addition scaling factor.
        name: string, optional
            The name of the Mayavi source created.
        reverse_x: boolean, optional
            Reverse the x (lateral) axis. Useful to compared with
            images in radiologic convention.

        Notes
        ------
        The affine should be diagonal.
    """
    # Late import to avoid triggering wx imports before needed.
    try:
        from mayavi.sources.api import ArraySource
    except ImportError:
        # Try out old install of Mayavi, with namespace packages
        from enthought.mayavi.sources.api import ArraySource
    center = np.r_[0, 0, 0, 1]
    spacing = np.diag(affine)[:3].copy()
    origin = np.dot(affine, center)[:3]
    if reverse_x:
        # Radiologic convention
        spacing[0] *= -1
        origin[0] *= -1
    src = ArraySource(scalar_data=np.asarray(data, dtype=np.float),
                           name=name,
                           spacing=scale*spacing,
                           origin=scale*origin)
    return src 

Example 192

Project: nipy
Source File: maps_3d.py
View license
def affine_img_src(data, affine, scale=1, name='AffineImage',
                reverse_x=False):
    """ Make a Mayavi source defined by a 3D array and an affine, for
        wich the voxel of the 3D array are mapped by the affine.
        
        Parameters
        -----------
        data: 3D ndarray
            The data arrays
        affine: (4 x 4) ndarray
            The (4 x 4) affine matrix relating voxels to world
            coordinates.
        scale: float, optional
            An optional addition scaling factor.
        name: string, optional
            The name of the Mayavi source created.
        reverse_x: boolean, optional
            Reverse the x (lateral) axis. Useful to compared with
            images in radiologic convention.

        Notes
        ------
        The affine should be diagonal.
    """
    # Late import to avoid triggering wx imports before needed.
    try:
        from mayavi.sources.api import ArraySource
    except ImportError:
        # Try out old install of Mayavi, with namespace packages
        from enthought.mayavi.sources.api import ArraySource
    center = np.r_[0, 0, 0, 1]
    spacing = np.diag(affine)[:3].copy()
    origin = np.dot(affine, center)[:3]
    if reverse_x:
        # Radiologic convention
        spacing[0] *= -1
        origin[0] *= -1
    src = ArraySource(scalar_data=np.asarray(data, dtype=np.float),
                           name=name,
                           spacing=scale*spacing,
                           origin=scale*origin)
    return src 

Example 193

Project: nitime
Source File: autoregressive.py
View license
def AR_psd(ak, sigma_v, n_freqs=1024, sides='onesided'):
    r"""
    Compute the PSD of an AR process, based on the process coefficients and
    covariance

    n_freqs : int
        The number of spacings on the frequency grid from [-PI,PI).
        If sides=='onesided', n_freqs/2+1 frequencies are computed from [0,PI]

    sides : str (optional)
        Indicates whether to return a one-sided or two-sided PSD

    Returns
    -------
    (w, ar_psd)
    w : Array of normalized frequences from [-.5, .5) or [0,.5]
    ar_psd : A PSD estimate computed by sigma_v / |1-a(f)|**2 , where
             a(f) = DTFT(ak)


    """
    # compute the psd as |H(f)|**2, where H(f) is the transfer function
    # for this model s[n] = a1*s[n-1] + a2*s[n-2] + ... aP*s[n-P] + v[n]
    # Taken as a IIR system with unit-variance white noise input e[n]
    # and output s[n],
    # b0*e[n] = w0*s[n] + w1*s[n-1] + w2*s[n-2] + ... + wP*s[n-P],
    # where b0 = sqrt(VAR{v[n]}), w0 = 1, and wk = -ak for k>0
    # the transfer function here is H(f) = DTFT(w)
    # leading to Sxx(f)/Exx(f) = |H(f)|**2 = VAR{v[n]} / |W(f)|**2
    w, hw = freq_response(sigma_v ** 0.5, a=np.r_[1, -ak],
                          n_freqs=n_freqs, sides=sides)
    ar_psd = (hw * hw.conj()).real
    return (w, 2 * ar_psd) if sides == 'onesided' else (w, ar_psd)

Example 194

Project: nitime
Source File: autoregressive.py
View license
def transfer_function_xy(a, n_freqs=1024):
    r"""Helper routine to compute the transfer function H(w) based
    on sequence of coefficient matrices A(i). The z transforms
    follow from this definition:

    X[t] + sum_{k=1}^P a[k]X[t-k] = Err[t]

    Parameters
    ----------

    a : ndarray, shape (P, 2, 2)
      sequence of coef matrices describing an mAR process
    n_freqs : int, optional
      number of frequencies to compute in range [0,PI]

    Returns
    -------

    Hw : ndarray
      The transfer function from innovations process vector to
      mAR process X

    """
    # these concatenations follow from the observation that A(0) is
    # implicitly the identity matrix
    ai = np.r_[1, a[:, 0, 0]]
    bi = np.r_[0, a[:, 0, 1]]
    ci = np.r_[0, a[:, 1, 0]]
    di = np.r_[1, a[:, 1, 1]]

    # compute A(w) such that A(w)X(w) = Err(w)
    w, aw = freq_response(ai, n_freqs=n_freqs)
    _, bw = freq_response(bi, n_freqs=n_freqs)
    _, cw = freq_response(ci, n_freqs=n_freqs)
    _, dw = freq_response(di, n_freqs=n_freqs)

    #A = np.array([ [1-aw, -bw], [-cw, 1-dw] ])
    A = np.array([[aw, bw], [cw, dw]])
    # compute the transfer function from Err to X. Since Err(w) is 1(w),
    # the transfer function H(w) = A^(-1)(w)
    # (use 2x2 matrix shortcut)
    detA = (A[0, 0] * A[1, 1] - A[0, 1] * A[1, 0])
    Hw = np.array([[dw, -bw], [-cw, aw]])
    Hw /= detA
    return w, Hw

Example 195

Project: nitime
Source File: test_spectral.py
View license
def test_mtm_cross_spectrum():
    """

    Test the multi-taper cross-spectral estimation. Based on the example in
    doc/examples/multi_taper_coh.py

    """
    NW = 4
    K = 2 * NW - 1

    N = 2 ** 10
    n_reps = 10
    n_freqs = N

    tapers, eigs = tsa.dpss_windows(N, NW, 2 * NW - 1)

    est_psd = []
    for k in range(n_reps):
        data, nz, alpha = utils.ar_generator(N=N)
        fgrid, hz = tsa.freq_response(1.0, a=np.r_[1, -alpha], n_freqs=n_freqs)
        # 'one-sided', so multiply by 2:
        psd = 2 * (hz * hz.conj()).real

        tdata = tapers * data

        tspectra = fftpack.fft(tdata)

        L = N / 2 + 1
        sides = 'onesided'
        w, _ = utils.adaptive_weights(tspectra, eigs, sides=sides)

        sxx = tsa.mtm_cross_spectrum(tspectra, tspectra, w, sides=sides)
        est_psd.append(sxx)

    fxx = np.mean(est_psd, 0)

    psd_ratio = np.mean(fxx / psd)

    # This is a rather lenient test, making sure that the average ratio is 1 to
    # within an order of magnitude. That is, that they are equal on average:
    npt.assert_array_almost_equal(psd_ratio, 1, decimal=1)

    # Test raising of error in case the inputs don't make sense:
    npt.assert_raises(ValueError,
                      tsa.mtm_cross_spectrum,
                      tspectra, np.r_[tspectra, tspectra],
                      (w, w))

Example 196

Project: nitime
Source File: test_spectral.py
View license
@dec.slow
def test_multi_taper_psd_csd():
    """

    Test the multi taper psd and csd estimation functions.
    Based on the example in
    doc/examples/multi_taper_spectral_estimation.py

    """

    N = 2 ** 10
    n_reps = 10

    psd = []
    est_psd = []
    est_csd = []
    for jk in [True, False]:
        for k in range(n_reps):
            for adaptive in [True, False]:
                ar_seq, nz, alpha = utils.ar_generator(N=N, drop_transients=10)
                ar_seq -= ar_seq.mean()
                fgrid, hz = tsa.freq_response(1.0, a=np.r_[1, -alpha],
                                              n_freqs=N)
                psd.append(2 * (hz * hz.conj()).real)
                f, psd_mt, nu = tsa.multi_taper_psd(ar_seq, adaptive=adaptive,
                                                    jackknife=jk)
                est_psd.append(psd_mt)
                f, csd_mt = tsa.multi_taper_csd(np.vstack([ar_seq, ar_seq]),
                                               adaptive=adaptive)
                # Symmetrical in this case, so take one element out:
                est_csd.append(csd_mt[0][1])

        fxx = np.mean(psd, axis=0)
        fxx_est1 = np.mean(est_psd, axis=0)
        fxx_est2 = np.mean(est_csd, axis=0)

        # Tests the psd:
        psd_ratio1 = np.mean(fxx_est1 / fxx)
        npt.assert_array_almost_equal(psd_ratio1, 1, decimal=-1)
        # Tests the csd:
        psd_ratio2 = np.mean(fxx_est2 / fxx)
        npt.assert_array_almost_equal(psd_ratio2, 1, decimal=-1)

Example 197

Project: nupic
Source File: contour.py
View license
    def calc_label_rot_and_inline( self, slc, ind, lw, lc=None, spacing=5 ):
        """
        This function calculates the appropriate label rotation given
        the linecontour coordinates in screen units, the index of the
        label location and the label width.

        It will also break contour and calculate inlining if *lc* is
        not empty (lc defaults to the empty list if None).  *spacing*
        is the space around the label in pixels to leave empty.

        Do both of these tasks at once to avoid calling mlab.path_length
        multiple times, which is relatively costly.

        The method used here involves calculating the path length
        along the contour in pixel coordinates and then looking
        approximately label width / 2 away from central point to
        determine rotation and then to break contour if desired.
        """

        if lc is None: lc = []
        # Half the label width
        hlw = lw/2.0

        # Check if closed and, if so, rotate contour so label is at edge
        closed = mlab.is_closed_polygon(slc)
        if closed:
            slc = np.r_[ slc[ind:-1], slc[:ind+1] ]

            if len(lc): # Rotate lc also if not empty
                lc = np.r_[ lc[ind:-1], lc[:ind+1] ]

            ind = 0

        # Path length in pixel space
        pl = mlab.path_length(slc)
        pl = pl-pl[ind]

        # Use linear interpolation to get points around label
        xi = np.array( [ -hlw, hlw ] )
        if closed: # Look at end also for closed contours
            dp = np.array([pl[-1],0])
        else:
            dp = np.zeros_like(xi)

        ll = mlab.less_simple_linear_interpolation( pl, slc, dp+xi,
                                                     extrap=True )

        # get vector in pixel space coordinates from one point to other
        dd = np.diff( ll, axis=0 ).ravel()

        # Get angle of vector - must be calculated in pixel space for
        # text rotation to work correctly
        if np.all(dd==0): # Must deal with case of zero length label
            rotation = 0.0
        else:
            rotation = np.arctan2(dd[1], dd[0]) * 180.0 / np.pi

        # Fix angle so text is never upside-down
        if rotation > 90:
            rotation = rotation - 180.0
        if rotation < -90:
            rotation = 180.0 + rotation

        # Break contour if desired
        nlc = []
        if len(lc):
            # Expand range by spacing
            xi = dp + xi + np.array([-spacing,spacing])

            # Get indices near points of interest
            I = mlab.less_simple_linear_interpolation(
                pl, np.arange(len(pl)), xi, extrap=False )

            # If those indices aren't beyond contour edge, find x,y
            if (not np.isnan(I[0])) and int(I[0])<>I[0]:
                xy1 = mlab.less_simple_linear_interpolation(
                    pl, lc, [ xi[0] ] )

            if (not np.isnan(I[1])) and int(I[1])<>I[1]:
                xy2 = mlab.less_simple_linear_interpolation(
                    pl, lc, [ xi[1] ] )

            # Make integer
            I = [ np.floor(I[0]), np.ceil(I[1]) ]

            # Actually break contours
            if closed:
                # This will remove contour if shorter than label
                if np.all(~np.isnan(I)):
                    nlc.append( np.r_[ xy2, lc[I[1]:I[0]+1], xy1 ] )
            else:
                # These will remove pieces of contour if they have length zero
                if not np.isnan(I[0]):
                    nlc.append( np.r_[ lc[:I[0]+1], xy1 ] )
                if not np.isnan(I[1]):
                    nlc.append( np.r_[ xy2, lc[I[1]:] ] )

            # The current implementation removes contours completely
            # covered by labels.  Uncomment line below to keep
            # original contour if this is the preferred behavoir.
            #if not len(nlc): nlc = [ lc ]

        return (rotation,nlc)

Example 198

Project: nupic
Source File: contour.py
View license
    def labels(self, inline, inline_spacing):
        trans = self.ax.transData # A bit of shorthand

        for icon, lev, fsize, cvalue in zip(
            self.labelIndiceList, self.labelLevelList, self.labelFontSizeList,
            self.labelCValueList ):

            con = self.collections[icon]
            lw = self.get_label_width(lev, self.labelFmt, fsize)
            additions = []
            paths = con.get_paths()
            for segNum, linepath in enumerate(paths):
                lc = linepath.vertices # Line contour
                slc0 = trans.transform(lc) # Line contour in screen coords

                # For closed polygons, add extra point to avoid division by
                # zero in print_label and locate_label.  Other than these
                # functions, this is not necessary and should probably be
                # eventually removed.
                if mlab.is_closed_polygon( lc ):
                    slc = np.r_[ slc0, slc0[1:2,:] ]
                else:
                    slc = slc0

                if self.print_label(slc,lw): # Check if long enough for a label
                    x,y,ind  = self.locate_label(slc, lw)

                    if inline: lcarg = lc
                    else: lcarg = None
                    rotation,new=self.calc_label_rot_and_inline(
                        slc0, ind, lw, lcarg,
                        inline_spacing )

                    # Actually add the label
                    self.add_label(x,y,rotation,lev,cvalue)

                    # If inline, add new contours
                    if inline:
                        for n in new:
                            # Add path if not empty or single point
                            if len(n)>1: additions.append( path.Path(n) )
                else: # If not adding label, keep old path
                    additions.append(linepath)

            # After looping over all segments on a contour, remove old
            # paths and add new ones if inlining
            if inline:
                del paths[:]
                paths.extend(additions)

Example 199

Project: nupic
Source File: contour.py
View license
    def calc_label_rot_and_inline( self, slc, ind, lw, lc=None, spacing=5 ):
        """
        This function calculates the appropriate label rotation given
        the linecontour coordinates in screen units, the index of the
        label location and the label width.

        It will also break contour and calculate inlining if *lc* is
        not empty (lc defaults to the empty list if None).  *spacing*
        is the space around the label in pixels to leave empty.

        Do both of these tasks at once to avoid calling mlab.path_length
        multiple times, which is relatively costly.

        The method used here involves calculating the path length
        along the contour in pixel coordinates and then looking
        approximately label width / 2 away from central point to
        determine rotation and then to break contour if desired.
        """

        if lc is None: lc = []
        # Half the label width
        hlw = lw/2.0

        # Check if closed and, if so, rotate contour so label is at edge
        closed = mlab.is_closed_polygon(slc)
        if closed:
            slc = np.r_[ slc[ind:-1], slc[:ind+1] ]

            if len(lc): # Rotate lc also if not empty
                lc = np.r_[ lc[ind:-1], lc[:ind+1] ]

            ind = 0

        # Path length in pixel space
        pl = mlab.path_length(slc)
        pl = pl-pl[ind]

        # Use linear interpolation to get points around label
        xi = np.array( [ -hlw, hlw ] )
        if closed: # Look at end also for closed contours
            dp = np.array([pl[-1],0])
        else:
            dp = np.zeros_like(xi)

        ll = mlab.less_simple_linear_interpolation( pl, slc, dp+xi,
                                                     extrap=True )

        # get vector in pixel space coordinates from one point to other
        dd = np.diff( ll, axis=0 ).ravel()

        # Get angle of vector - must be calculated in pixel space for
        # text rotation to work correctly
        if np.all(dd==0): # Must deal with case of zero length label
            rotation = 0.0
        else:
            rotation = np.arctan2(dd[1], dd[0]) * 180.0 / np.pi

        # Fix angle so text is never upside-down
        if rotation > 90:
            rotation = rotation - 180.0
        if rotation < -90:
            rotation = 180.0 + rotation

        # Break contour if desired
        nlc = []
        if len(lc):
            # Expand range by spacing
            xi = dp + xi + np.array([-spacing,spacing])

            # Get indices near points of interest
            I = mlab.less_simple_linear_interpolation(
                pl, np.arange(len(pl)), xi, extrap=False )

            # If those indices aren't beyond contour edge, find x,y
            if (not np.isnan(I[0])) and int(I[0])<>I[0]:
                xy1 = mlab.less_simple_linear_interpolation(
                    pl, lc, [ xi[0] ] )

            if (not np.isnan(I[1])) and int(I[1])<>I[1]:
                xy2 = mlab.less_simple_linear_interpolation(
                    pl, lc, [ xi[1] ] )

            # Make integer
            I = [ np.floor(I[0]), np.ceil(I[1]) ]

            # Actually break contours
            if closed:
                # This will remove contour if shorter than label
                if np.all(~np.isnan(I)):
                    nlc.append( np.r_[ xy2, lc[I[1]:I[0]+1], xy1 ] )
            else:
                # These will remove pieces of contour if they have length zero
                if not np.isnan(I[0]):
                    nlc.append( np.r_[ lc[:I[0]+1], xy1 ] )
                if not np.isnan(I[1]):
                    nlc.append( np.r_[ xy2, lc[I[1]:] ] )

            # The current implementation removes contours completely
            # covered by labels.  Uncomment line below to keep
            # original contour if this is the preferred behavoir.
            #if not len(nlc): nlc = [ lc ]

        return (rotation,nlc)

Example 200

Project: nupic
Source File: contour.py
View license
    def labels(self, inline, inline_spacing):
        trans = self.ax.transData # A bit of shorthand

        for icon, lev, fsize, cvalue in zip(
            self.labelIndiceList, self.labelLevelList, self.labelFontSizeList,
            self.labelCValueList ):

            con = self.collections[icon]
            lw = self.get_label_width(lev, self.labelFmt, fsize)
            additions = []
            paths = con.get_paths()
            for segNum, linepath in enumerate(paths):
                lc = linepath.vertices # Line contour
                slc0 = trans.transform(lc) # Line contour in screen coords

                # For closed polygons, add extra point to avoid division by
                # zero in print_label and locate_label.  Other than these
                # functions, this is not necessary and should probably be
                # eventually removed.
                if mlab.is_closed_polygon( lc ):
                    slc = np.r_[ slc0, slc0[1:2,:] ]
                else:
                    slc = slc0

                if self.print_label(slc,lw): # Check if long enough for a label
                    x,y,ind  = self.locate_label(slc, lw)

                    if inline: lcarg = lc
                    else: lcarg = None
                    rotation,new=self.calc_label_rot_and_inline(
                        slc0, ind, lw, lcarg,
                        inline_spacing )

                    # Actually add the label
                    self.add_label(x,y,rotation,lev,cvalue)

                    # If inline, add new contours
                    if inline:
                        for n in new:
                            # Add path if not empty or single point
                            if len(n)>1: additions.append( path.Path(n) )
                else: # If not adding label, keep old path
                    additions.append(linepath)

            # After looping over all segments on a contour, remove old
            # paths and add new ones if inlining
            if inline:
                del paths[:]
                paths.extend(additions)