update v0.99.2

nghiavtr · Aug 21, 2018 · a70a553 · a70a553
1 parent 1bc295b
commit a70a553
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 83 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,3 +10,5 @@ Suggests: knitr
 License: GPL-3
 LazyData: true
 RoxygenNote: 6.0.1
+NeedsCompilation: no
+Packaged: 2018-08-21 04:05:55 UTC; nghiavtr
diff --git a/NEWS b/NEWS
@@ -1,5 +1,5 @@
-Date: 15 Aug 2018
-- Use t-test for weird isoform data, for example all samples of the control group are unexpressed, to avoid NA results in BPglm. Great thanks to Tian Mou for the contribution!
+Date: 21 Aug 2018
+- Fix issues for weird isoform data, for example all samples of the control group are unexpressed, to avoid NA results in BPglm. Great thanks to Tian Mou for the contribution!
 - Use RMarkdown instead of Sweave for vignette document.
 Date: 24 Feb 2017
 - Use quasi-Possion as the family function if the glm fitting with BPfam is not converged

diff --git a/R/BPglm.R b/R/BPglm.R
@@ -84,72 +84,89 @@ BPglm <- function (data, controlIds, design, coef=2, keepFit=FALSE,minExp=1e-4,
         i.converged=NA
         i.bpconverged=NA
         oo4=NA
+        par=NA
 
         x=data[i,]
-        control.x=x[controlIds]
-        bpstat=control.x
-        if (!is.null(extreme.quant)) bpstat=control.x[control.x <= quantile(control.x,prob=extreme.quant)]         
-        bpstat[bpstat<minExp]=0
+        #check if data is weird
+        gvar=tapply(x,design[,coef],var)
+        lowvar=sapply(gvar, function(z) sum(z<0.1*gvar))
+        isNearZeroVariance=sum(lowvar>0)>0
 
-        par=getInitParam(bpstat,para.num=4)
-        if (sum(bpstat>0) > 0.05*length(bpstat)){
-            ##### beta-Poisson estimation for control group
-            oo4=estimateBP(bpstat,para.num=4,tbreak.num=tbreak.num,break.thres=break.thres)
+        if (!isNearZeroVariance){
+            control.x=x[controlIds]
+            bpstat=control.x
+            if (!is.null(extreme.quant)) bpstat=control.x[control.x <= quantile(control.x,prob=extreme.quant)]         
+            bpstat[bpstat<minExp]=0
 
-            if (estIntPar){
-                bpstat2=bpstat[bpstat>0]
-                mypar=getInitParam(bpstat2,para.num=4)
-                oo.tmp=estimateBP(bpstat2,para.num=4,tbreak.num=tbreak.num,break.thres=break.thres,useExt=useExt,param0=mypar)                
-                if (!is.na(oo.tmp$PVAL)) mypar=oo.tmp$par
-                oo4e=estimateBP(bpstat,para.num=4,tbreak.num=tbreak.num,break.thres=break.thres,useExt=useExt,param0=mypar)
-                # select the better model
-                if (is.na(oo4$PVAL)) oo4=oo4e
-                else {
-                    if (!is.na(oo4e$PVAL) && oo4e$PVAL > oo4$PVAL && oo4e$X2>=0) oo4=oo4e              
-                }
+            par=getInitParam(bpstat,para.num=4)
+            if (sum(bpstat>0) > 0.05*length(bpstat)){
+                ##### beta-Poisson estimation for control group
+                oo4=estimateBP(bpstat,para.num=4,tbreak.num=tbreak.num,break.thres=break.thres)
 
+                if (estIntPar){
+                    bpstat2=bpstat[bpstat>0]
+                    mypar=getInitParam(bpstat2,para.num=4)
+                    oo.tmp=estimateBP(bpstat2,para.num=4,tbreak.num=tbreak.num,break.thres=break.thres,useExt=useExt,param0=mypar)                
+                    if (!is.na(oo.tmp$PVAL)) mypar=oo.tmp$par
+                    oo4e=estimateBP(bpstat,para.num=4,tbreak.num=tbreak.num,break.thres=break.thres,useExt=useExt,param0=mypar)
+                    # select the better model
+                    if (is.na(oo4$PVAL)) oo4=oo4e
+                    else {
+                        if (!is.na(oo4e$PVAL) && oo4e$PVAL > oo4$PVAL && oo4e$X2>=0) oo4=oo4e              
+                    }
+
+                }
+                par=oo4$par
             }
-            par=oo4$par
-        }
 
-        ##### start GLM
-        fdat=data.frame(x=x,design[,-1])
-        colnames(fdat)=c("x",colnames(design)[-1])
+            ##### start GLM
+            fdat=data.frame(x=x,design[,-1])
+            colnames(fdat)=c("x",colnames(design)[-1])
 
-        try({
-            alp=par[1];bet=par[2];lam1=par[3];lam2=par[4]
-            fam0=do.call("BPfam", list(alp=alp, bet=bet, lam1=lam1, lam2=lam2, link = "log"))
+            try({
+                alp=par[1];bet=par[2];lam1=par[3];lam2=par[4]
+                fam0=do.call("BPfam", list(alp=alp, bet=bet, lam1=lam1, lam2=lam2, link = "log"))
 
-            fit=glm(x~.,data=fdat,family=fam0)
-            i.pval=summary(fit)$coefficients[coef,4]
-            i.tval=summary(fit)$coefficients[coef,3]
-            i.bpconverged=fit$converged
-            ##### if the fitting is not converged, use quassipoisson
-            i.converged=i.bpconverged
-            if (!i.converged){
-                fit=glm(x~.,data=fdat,family=quasipoisson)
+                fit=glm(x~.,data=fdat,family=fam0)
                 i.pval=summary(fit)$coefficients[coef,4]
                 i.tval=summary(fit)$coefficients[coef,3]
-                i.converged=fit$converged
-            }            
-        }, silent=TRUE) # keep silent if errors occur
+                i.bpconverged=fit$converged
+                ##### if the fitting is not converged, use quassipoisson
+                i.converged=i.bpconverged
+                if (!i.converged){
+                    fit=glm(x~.,data=fdat,family=quasipoisson)
+                    i.pval=summary(fit)$coefficients[coef,4]
+                    i.tval=summary(fit)$coefficients[coef,3]
+                    i.converged=fit$converged
+                }            
+            }, silent=TRUE) # keep silent if errors occur
+        }
 
-        # Modification: if the fitting is not converged or all samples of one group are all unexpressed, use t.test via lm
-        x1=x; x1[x<minExp]=0
-        tcount=tapply(x1,design[,coef],sum)
-        if (sum(tcount==0)>0) i.pval=NA
+        # Modification: if the fitting is not converged or all samples of one group are all unexpressed, use t.test with unequal variance or Analysis of Variance
+        if (isNearZeroVariance) i.pval=NA
         if(is.na(i.pval)) {
             design_group=design
             colnames(design_group)[coef]="group"
             fdat_norm=data.frame(x=log2(x+1))
             fdat_norm=cbind(fdat_norm,design_group)
             fdat_norm=fdat_norm[,-2] #remove intercept
-            fit=lm(group~.,data=fdat_norm)
+            #fit=lm(group~.,data=fdat_norm)
+            #i.pval=summary(fit)$coefficients[2,4]
+            #i.tval=summary(fit)$coefficients[2,3]
+            #i.converged=NA #no information
+            gNum=length(unique(fdat_norm$group))
+            if (gNum==2){ #use t-test
+                gtest=t.test(x ~ group,data=fdat_norm)
+                i.pval=gtest$p.value
+                i.tval=gtest$statistic
+                i.converged=NA #no information
+            }else{ #use aov
+                gtest=aov(x ~ group,data=fdat_norm)
+                i.pval=summary(gtest)[[1]][["Pr(>F)"]][1]
+                i.tval=summary(gtest)[[1]][["F value"]][1]
+                i.converged=NA #no information
+            }
             #i.pval = t.test(x ~ group)$p.value
-            i.pval=summary(fit)$coefficients[2,4]
-            i.tval=summary(fit)$coefficients[2,3]
-            i.converged=NA #no information
-            #t.test(x ~ group,data=fdat_norm)$p.value
         }
         # End modification
 
@@ -167,16 +184,23 @@ BPglm <- function (data, controlIds, design, coef=2, keepFit=FALSE,minExp=1e-4,
     } # end of foreach
 
     }else{
-        fitRes=ind=TVAL=PVAL=NULL;
-        for (i in 1:nrow(data)){
-            x=data[i,]
-            fit=NA
-            i.pval=NA
-            i.tval=NA
-            i.converged=NA
-            i.bpconverged=NA
-            oo4=NA
+    fitRes=ind=TVAL=PVAL=NULL;
+    for (i in 1:nrow(data)){            
+        fit=NA
+        i.pval=NA
+        i.tval=NA
+        i.converged=NA
+        i.bpconverged=NA
+        oo4=NA
+        par=NA
 
+        x=data[i,]
+        #check if data is weird
+        gvar=tapply(x,design[,coef],var)
+        lowvar=sapply(gvar, function(z) sum(z<0.1*gvar))
+        isNearZeroVariance=sum(lowvar>0)>0
+
+        if (!isNearZeroVariance){
             control.x=x[controlIds]
             bpstat=control.x
             if (!is.null(extreme.quant)) bpstat=control.x[control.x <= quantile(control.x,prob=extreme.quant)]         
@@ -221,37 +245,47 @@ BPglm <- function (data, controlIds, design, coef=2, keepFit=FALSE,minExp=1e-4,
                     i.converged=fit$converged
                 }
             }, silent=TRUE) # keep silent if errors occur
+        }
 
-        # Modification: if the fitting is not converged or all samples of one group are all unexpressed, use t.test via lm
-        x1=x; x1[x<minExp]=0
-        tcount=tapply(x1,design[,coef],sum)
-        if (sum(tcount==0)>0) i.pval=NA
+        # Modification: if the fitting is not converged or all samples of one group are all unexpressed, use t.test with unequal variance or Analysis of Variance
+        if (isNearZeroVariance) i.pval=NA
         if(is.na(i.pval)) {
             design_group=design
             colnames(design_group)[coef]="group"
             fdat_norm=data.frame(x=log2(x+1))
             fdat_norm=cbind(fdat_norm,design_group)
             fdat_norm=fdat_norm[,-2] #remove intercept
-            fit=lm(group~.,data=fdat_norm)
+            #fit=lm(group~.,data=fdat_norm)
+            #i.pval=summary(fit)$coefficients[2,4]
+            #i.tval=summary(fit)$coefficients[2,3]
+            #i.converged=NA #no information
+            gNum=length(unique(fdat_norm$group))
+            if (gNum==2){ #use t-test
+                gtest=t.test(x ~ group,data=fdat_norm)
+                i.pval=gtest$p.value
+                i.tval=gtest$statistic
+                i.converged=NA #no information
+            }else{ #use aov
+                gtest=aov(x ~ group,data=fdat_norm)
+                i.pval=summary(gtest)[[1]][["Pr(>F)"]][1]
+                i.tval=summary(gtest)[[1]][["F value"]][1]
+                i.converged=NA #no information
+            }
             #i.pval = t.test(x ~ group)$p.value
-            i.pval=summary(fit)$coefficients[2,4]
-            i.tval=summary(fit)$coefficients[2,3]
-            i.converged=NA #no information
-            #t.test(x ~ group,data=fdat_norm)$p.value
         }
-        # End modification
-            res=list();            
-            res[["PVAL"]]=i.pval
-            res[["TVAL"]]=i.tval
-            res[["CONVERGED"]]=i.converged
-            res[["BPCONVERGED"]]=i.bpconverged
-            res[["ind"]]=i
-            if(keepFit) res[["fit"]]=fit else res[["fit"]]=NA
-            res[["par"]]=par;
-            res[["oo4"]]=oo4;
+        # End modification    
+    res=list();            
+    res[["PVAL"]]=i.pval
+    res[["TVAL"]]=i.tval
+    res[["CONVERGED"]]=i.converged
+    res[["BPCONVERGED"]]=i.bpconverged
+    res[["ind"]]=i
+    if(keepFit) res[["fit"]]=fit else res[["fit"]]=NA
+    res[["par"]]=par;
+    res[["oo4"]]=oo4;
 
-            fitRes=c(fitRes,res)            
-        }
+    fitRes=c(fitRes,res)            
+    }
     }
 
     ind=unlist(fitRes[which(names(fitRes)=="ind")])

diff --git a/inst/doc/BPSC.html b/inst/doc/BPSC.html
diff --git a/man/BPSC-package.Rd b/man/BPSC-package.Rd
@@ -13,7 +13,7 @@ Beta-Poisson model for single-cell RNA-seq data analyses
 Package: \tab BPSC\cr
 Type: \tab Package\cr
 Version: \tab 0.99.2\cr
-Date: \tab 2018-08-20\cr
+Date: \tab 2018-08-21\cr
 License: \tab GPL-3\cr
 LazyLoad: \tab yes\cr
 }