Load packages

You will need the RSkittleBrewer package for this vignette to run. Installation instructions are available here:

You will also need R version 3.1.0 or greater and Bioconductor 3.0 or greater. The zebrafishRNASeq package might need to be installed from source. These analyses are based on the devel version of sva (version 3.11.2 or greater).

library(zebrafishRNASeq)
library(RSkittleBrewer)
library(genefilter)
library(RUVSeq)
library(edgeR)
library(sva)
library(ffpe)
## Warning: replacing previous import by 'graphics::image' when loading
## 'methylumi'
library(RColorBrewer)
library(corrplot)
library(limma)
trop = RSkittleBrewer('tropical')

Load and process the zebrafish data

The first comparison will be on the zebrafish data like those in the RUVSeq vignette. For the vignette code see this link http://bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.R.

data(zfGenes)
filter = apply(zfGenes, 1, function(x) length(x[x>5])>=2)
filtered = zfGenes[filter,]
genes = rownames(filtered)[grep("^ENS", rownames(filtered))]
controls = grepl("^ERCC", rownames(filtered))
spikes =  rownames(filtered)[grep("^ERCC", rownames(filtered))]
group = as.factor(rep(c("Ctl", "Trt"), each=3))
set = newSeqExpressionSet(as.matrix(filtered),
                           phenoData = data.frame(group, row.names=colnames(filtered)))
dat0 = counts(set)

Estimate latent factors with different methods

## Set null and alternative models (ignore batch)
mod1 = model.matrix(~group)
mod0 = cbind(mod1[,1])

## Estimate batch with svaseq (unsupervised)
batch_unsup_sva = svaseq(dat0,mod1,mod0,n.sv=1)$sv
## Number of significant surrogate variables is:  1 
## Iteration (out of 5 ):1  2  3  4  5
## Estimate batch with svaseq (supervised)
batch_sup_sva = svaseq(dat0,mod1,mod0,controls=controls,n.sv=1)$sv
## sva warning: controls provided so supervised sva is being performed.
## Number of significant surrogate variables is:  1
## Estimate batch with pca
ldat0 = log(dat0 + 1)
batch_pca = svd(ldat0 - rowMeans(ldat0))$v[,1]

## Estimate batch with ruv (controls known)
batch_ruv_cp <- RUVg(dat0, cIdx= spikes, k=1)$W

## Estimate batch with ruv (residuals)
## this procedure follows the RUVSeq vignette
## http://www.bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf

x <- as.factor(group)
design <- model.matrix(~x)
y <- DGEList(counts=dat0, group=x)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
res <- residuals(fit, type="deviance")
seqUQ <- betweenLaneNormalization(dat0, which="upper")
controls = rep(TRUE,dim(dat0)[1])
batch_ruv_res = RUVr(seqUQ,controls,k=1,res)$W

## Estimate batch with ruv empirical controls
## this procedure follows the RUVSeq vignette
## http://www.bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf

y <- DGEList(counts=dat0, group=x)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)

fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef=2)

controls=rank(lrt$table$LR) <= 400
batch_ruv_emp <- RUVg(dat0, controls, k=1)$W

Plot batch estimates

## Plot the results
plot(batch_unsup_sva,pch=19,col=trop[1],main="unsupervised sva")

plot of chunk plotbatch

plot(batch_sup_sva,pch=19,col=trop[1],main="supervised sva")

plot of chunk plotbatch

plot(batch_pca,pch=19,col=trop[2],main="pca")

plot of chunk plotbatch

plot(batch_ruv_cp,pch=19,col=trop[3],main="control probes ruv")

plot of chunk plotbatch

plot(batch_ruv_res,pch=19,col=trop[3],main="residual ruv")

plot of chunk plotbatch

plot(batch_ruv_emp,pch=19,col=trop[3],main="empirical controls ruv")

plot of chunk plotbatch

Plot absolute correlation between batch estimates

batchEstimates = cbind(group,batch_unsup_sva,batch_sup_sva,
                       batch_pca,batch_ruv_cp,batch_ruv_res,batch_ruv_emp)
colnames(batchEstimates) = c("group","usva","ssva","pca","ruvcp","ruvres","ruvemp")

corr = abs(cor(batchEstimates))
cols = colorRampPalette(c(trop[2],"white",trop[1]))
par(mar=c(5,5,5,5))
corrplot(corr,method="ellipse",type="lower",col=cols(100),tl.pos="d")

plot of chunk corbatch

Compare results with different methods to supervised svaseq

dge <- DGEList(counts=dat0)
dge <- calcNormFactors(dge)
catplots = tstats = vector("list",7)
adj = c( "+ batch_sup_sva","+ batch_unsup_sva",
         "+ batch_ruv_cp", "+ batch_ruv_res",
         "+ batch_ruv_emp","+ batch_pca","")


for(i in 1:7){
  design = model.matrix(as.formula(paste0("~ group",adj[i])))
  v <- voom(dge,design,plot=FALSE)
  fit <- lmFit(v,design)
  fit <- eBayes(fit)
  tstats[[i]] = abs(fit$t[,2])
  names(tstats[[i]]) = as.character(1:dim(dat0)[1])
  catplots[[i]] = CATplot(-rank(tstats[[i]]),-rank(tstats[[1]]),maxrank=400,make.plot=F)
}

plot(catplots[[2]],ylim=c(0,1),col=trop[1],lwd=3,type="l",ylab="Concordance Between Supervised svaseq and other methods",xlab="Rank")
lines(catplots[[3]],col=trop[2],lwd=3)
lines(catplots[[4]],col=trop[2],lwd=3,lty=2)
lines(catplots[[5]],col=trop[2],lwd=3,lty=3)
lines(catplots[[6]],col=trop[3],lwd=3)
lines(catplots[[7]],col=trop[4],lwd=3)


legend(200,0.5,legend=c("Unsup. svaseq","RUV CP","RUV Res.", "RUV Emp.","PCA", "No adjustment"),col=trop[c(1,2,2,2,3,4)],lty=c(1,1,2,3,1,1),lwd=3)

plot of chunk comparede

Compare results with different methods to unsupervised svaseq

dge <- DGEList(counts=dat0)
dge <- calcNormFactors(dge)
catplots = tstats = vector("list",7)
adj = c( "+ batch_unsup_sva","+ batch_sup_sva",
         "+ batch_ruv_cp", "+ batch_ruv_res",
         "+ batch_ruv_emp","+ batch_pca","")


for(i in 1:7){
  design = model.matrix(as.formula(paste0("~ group",adj[i])))
  v <- voom(dge,design,plot=FALSE)
  fit <- lmFit(v,design)
  fit <- eBayes(fit)
  tstats[[i]] = abs(fit$t[,2])
  names(tstats[[i]]) = as.character(1:dim(dat0)[1])
  catplots[[i]] = CATplot(-rank(tstats[[i]]),-rank(tstats[[1]]),maxrank=400,make.plot=F)
}

plot(catplots[[2]],ylim=c(0,1),col=trop[1],lwd=3,type="l",ylab="Concordance Between Unsupervised svaseq and other methods",xlab="Rank")
lines(catplots[[3]],col=trop[2],lwd=3,lty=2)
lines(catplots[[4]],col=trop[2],lwd=3,lty=1)
lines(catplots[[5]],col=trop[2],lwd=3,lty=3)
lines(catplots[[6]],col=trop[3],lwd=3)
lines(catplots[[7]],col=trop[4],lwd=3)


legend(200,0.5,legend=c("Sup. svaseq","RUV CP","RUV Res.", "RUV Emp.","PCA", "No adjustment"),col=trop[c(1,2,2,2,3,4)],lty=c(1,2,1,3,1,1),lwd=3)

plot of chunk comparede2

Session Info

sessionInfo()
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] zebrafishRNASeq_0.99.2   sva_3.11.1              
##  [3] RUVSeq_0.99.2            RSkittleBrewer_1.1      
##  [5] RColorBrewer_1.0-5       mgcv_1.7-29             
##  [7] nlme_3.1-117             genefilter_1.47.5       
##  [9] ffpe_1.9.0               TTR_0.22-0              
## [11] xts_0.9-7                zoo_1.7-11              
## [13] edgeR_3.7.2              limma_3.21.6            
## [15] EDASeq_1.99.1            ShortRead_1.23.11       
## [17] GenomicAlignments_1.1.11 Rsamtools_1.17.22       
## [19] GenomicRanges_1.17.17    GenomeInfoDb_1.1.6      
## [21] corrplot_0.73            corpcor_1.6.6           
## [23] Biostrings_2.33.10       XVector_0.5.6           
## [25] IRanges_1.99.15          S4Vectors_0.0.8         
## [27] BiocParallel_0.7.2       Biobase_2.25.0          
## [29] BiocGenerics_0.11.2      knitr_1.6               
## 
## loaded via a namespace (and not attached):
##  [1] affy_1.43.2            affyio_1.33.0          annotate_1.43.4       
##  [4] AnnotationDbi_1.27.7   aroma.light_2.1.0      base64_1.1            
##  [7] BatchJobs_1.2          BBmisc_1.6             beanplot_1.1          
## [10] BiocInstaller_1.15.5   biomaRt_2.21.0         bitops_1.0-6          
## [13] brew_1.0-6             BSgenome_1.33.8        bumphunter_1.5.3      
## [16] codetools_0.2-8        colorspace_1.2-4       DBI_0.2-7             
## [19] DESeq_1.17.0           digest_0.6.4           doRNG_1.6             
## [22] evaluate_0.5.5         fail_1.2               foreach_1.4.2         
## [25] formatR_0.10           geneplotter_1.43.0     GenomicFeatures_1.17.9
## [28] grid_3.1.0             htmltools_0.2.4        hwriter_1.3           
## [31] illuminaio_0.7.0       iterators_1.0.7        KernSmooth_2.23-12    
## [34] lattice_0.20-29        latticeExtra_0.6-26    locfit_1.5-9.1        
## [37] lumi_2.17.0            MASS_7.3-33            Matrix_1.1-3          
## [40] matrixStats_0.8.14     mclust_4.3             methylumi_2.11.1      
## [43] minfi_1.11.7           multtest_2.21.0        nleqslv_2.1.1         
## [46] nor1mix_1.1-4          pkgmaker_0.22          plyr_1.8.1            
## [49] preprocessCore_1.27.0  R.methodsS3_1.6.1      R.oo_1.18.0           
## [52] Rcpp_0.11.1            RCurl_1.95-4.1         registry_0.2          
## [55] reshape_0.8.5          rmarkdown_0.2.46       rngtools_1.2.4        
## [58] RSQLite_0.11.4         rtracklayer_1.25.9     sendmailR_1.1-2       
## [61] sfsmisc_1.0-25         siggenes_1.39.0        splines_3.1.0         
## [64] stats4_3.1.0           stringr_0.6.2          survival_2.37-7       
## [67] tools_3.1.0            XML_3.98-1.1           xtable_1.7-3          
## [70] zlibbioc_1.11.1