You will need the RSkittleBrewer package for this vignette to run. Installation instructions are available here:
You will also need R version 3.1.0 or greater and Bioconductor 3.0 or greater. The zebrafishRNASeq package might need to be installed from source. These analyses are based on the devel version of sva (version 3.11.2 or greater).
library(zebrafishRNASeq)
library(RSkittleBrewer)
library(genefilter)
library(RUVSeq)
library(edgeR)
library(sva)
library(ffpe)
## Warning: replacing previous import by 'graphics::image' when loading
## 'methylumi'
library(RColorBrewer)
library(corrplot)
library(limma)
trop = RSkittleBrewer('tropical')
The first comparison will be on the zebrafish data like those in the RUVSeq vignette. For the vignette code see this link http://bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.R.
data(zfGenes)
filter = apply(zfGenes, 1, function(x) length(x[x>5])>=2)
filtered = zfGenes[filter,]
genes = rownames(filtered)[grep("^ENS", rownames(filtered))]
controls = grepl("^ERCC", rownames(filtered))
spikes = rownames(filtered)[grep("^ERCC", rownames(filtered))]
group = as.factor(rep(c("Ctl", "Trt"), each=3))
set = newSeqExpressionSet(as.matrix(filtered),
phenoData = data.frame(group, row.names=colnames(filtered)))
dat0 = counts(set)
## Set null and alternative models (ignore batch)
mod1 = model.matrix(~group)
mod0 = cbind(mod1[,1])
## Estimate batch with svaseq (unsupervised)
batch_unsup_sva = svaseq(dat0,mod1,mod0,n.sv=1)$sv
## Number of significant surrogate variables is: 1
## Iteration (out of 5 ):1 2 3 4 5
## Estimate batch with svaseq (supervised)
batch_sup_sva = svaseq(dat0,mod1,mod0,controls=controls,n.sv=1)$sv
## sva warning: controls provided so supervised sva is being performed.
## Number of significant surrogate variables is: 1
## Estimate batch with pca
ldat0 = log(dat0 + 1)
batch_pca = svd(ldat0 - rowMeans(ldat0))$v[,1]
## Estimate batch with ruv (controls known)
batch_ruv_cp <- RUVg(dat0, cIdx= spikes, k=1)$W
## Estimate batch with ruv (residuals)
## this procedure follows the RUVSeq vignette
## http://www.bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf
x <- as.factor(group)
design <- model.matrix(~x)
y <- DGEList(counts=dat0, group=x)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
res <- residuals(fit, type="deviance")
seqUQ <- betweenLaneNormalization(dat0, which="upper")
controls = rep(TRUE,dim(dat0)[1])
batch_ruv_res = RUVr(seqUQ,controls,k=1,res)$W
## Estimate batch with ruv empirical controls
## this procedure follows the RUVSeq vignette
## http://www.bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf
y <- DGEList(counts=dat0, group=x)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef=2)
controls=rank(lrt$table$LR) <= 400
batch_ruv_emp <- RUVg(dat0, controls, k=1)$W
## Plot the results
plot(batch_unsup_sva,pch=19,col=trop[1],main="unsupervised sva")
plot(batch_sup_sva,pch=19,col=trop[1],main="supervised sva")
plot(batch_pca,pch=19,col=trop[2],main="pca")
plot(batch_ruv_cp,pch=19,col=trop[3],main="control probes ruv")
plot(batch_ruv_res,pch=19,col=trop[3],main="residual ruv")
plot(batch_ruv_emp,pch=19,col=trop[3],main="empirical controls ruv")
batchEstimates = cbind(group,batch_unsup_sva,batch_sup_sva,
batch_pca,batch_ruv_cp,batch_ruv_res,batch_ruv_emp)
colnames(batchEstimates) = c("group","usva","ssva","pca","ruvcp","ruvres","ruvemp")
corr = abs(cor(batchEstimates))
cols = colorRampPalette(c(trop[2],"white",trop[1]))
par(mar=c(5,5,5,5))
corrplot(corr,method="ellipse",type="lower",col=cols(100),tl.pos="d")
dge <- DGEList(counts=dat0)
dge <- calcNormFactors(dge)
catplots = tstats = vector("list",7)
adj = c( "+ batch_sup_sva","+ batch_unsup_sva",
"+ batch_ruv_cp", "+ batch_ruv_res",
"+ batch_ruv_emp","+ batch_pca","")
for(i in 1:7){
design = model.matrix(as.formula(paste0("~ group",adj[i])))
v <- voom(dge,design,plot=FALSE)
fit <- lmFit(v,design)
fit <- eBayes(fit)
tstats[[i]] = abs(fit$t[,2])
names(tstats[[i]]) = as.character(1:dim(dat0)[1])
catplots[[i]] = CATplot(-rank(tstats[[i]]),-rank(tstats[[1]]),maxrank=400,make.plot=F)
}
plot(catplots[[2]],ylim=c(0,1),col=trop[1],lwd=3,type="l",ylab="Concordance Between Supervised svaseq and other methods",xlab="Rank")
lines(catplots[[3]],col=trop[2],lwd=3)
lines(catplots[[4]],col=trop[2],lwd=3,lty=2)
lines(catplots[[5]],col=trop[2],lwd=3,lty=3)
lines(catplots[[6]],col=trop[3],lwd=3)
lines(catplots[[7]],col=trop[4],lwd=3)
legend(200,0.5,legend=c("Unsup. svaseq","RUV CP","RUV Res.", "RUV Emp.","PCA", "No adjustment"),col=trop[c(1,2,2,2,3,4)],lty=c(1,1,2,3,1,1),lwd=3)
dge <- DGEList(counts=dat0)
dge <- calcNormFactors(dge)
catplots = tstats = vector("list",7)
adj = c( "+ batch_unsup_sva","+ batch_sup_sva",
"+ batch_ruv_cp", "+ batch_ruv_res",
"+ batch_ruv_emp","+ batch_pca","")
for(i in 1:7){
design = model.matrix(as.formula(paste0("~ group",adj[i])))
v <- voom(dge,design,plot=FALSE)
fit <- lmFit(v,design)
fit <- eBayes(fit)
tstats[[i]] = abs(fit$t[,2])
names(tstats[[i]]) = as.character(1:dim(dat0)[1])
catplots[[i]] = CATplot(-rank(tstats[[i]]),-rank(tstats[[1]]),maxrank=400,make.plot=F)
}
plot(catplots[[2]],ylim=c(0,1),col=trop[1],lwd=3,type="l",ylab="Concordance Between Unsupervised svaseq and other methods",xlab="Rank")
lines(catplots[[3]],col=trop[2],lwd=3,lty=2)
lines(catplots[[4]],col=trop[2],lwd=3,lty=1)
lines(catplots[[5]],col=trop[2],lwd=3,lty=3)
lines(catplots[[6]],col=trop[3],lwd=3)
lines(catplots[[7]],col=trop[4],lwd=3)
legend(200,0.5,legend=c("Sup. svaseq","RUV CP","RUV Res.", "RUV Emp.","PCA", "No adjustment"),col=trop[c(1,2,2,2,3,4)],lty=c(1,2,1,3,1,1),lwd=3)
sessionInfo()
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] zebrafishRNASeq_0.99.2 sva_3.11.1
## [3] RUVSeq_0.99.2 RSkittleBrewer_1.1
## [5] RColorBrewer_1.0-5 mgcv_1.7-29
## [7] nlme_3.1-117 genefilter_1.47.5
## [9] ffpe_1.9.0 TTR_0.22-0
## [11] xts_0.9-7 zoo_1.7-11
## [13] edgeR_3.7.2 limma_3.21.6
## [15] EDASeq_1.99.1 ShortRead_1.23.11
## [17] GenomicAlignments_1.1.11 Rsamtools_1.17.22
## [19] GenomicRanges_1.17.17 GenomeInfoDb_1.1.6
## [21] corrplot_0.73 corpcor_1.6.6
## [23] Biostrings_2.33.10 XVector_0.5.6
## [25] IRanges_1.99.15 S4Vectors_0.0.8
## [27] BiocParallel_0.7.2 Biobase_2.25.0
## [29] BiocGenerics_0.11.2 knitr_1.6
##
## loaded via a namespace (and not attached):
## [1] affy_1.43.2 affyio_1.33.0 annotate_1.43.4
## [4] AnnotationDbi_1.27.7 aroma.light_2.1.0 base64_1.1
## [7] BatchJobs_1.2 BBmisc_1.6 beanplot_1.1
## [10] BiocInstaller_1.15.5 biomaRt_2.21.0 bitops_1.0-6
## [13] brew_1.0-6 BSgenome_1.33.8 bumphunter_1.5.3
## [16] codetools_0.2-8 colorspace_1.2-4 DBI_0.2-7
## [19] DESeq_1.17.0 digest_0.6.4 doRNG_1.6
## [22] evaluate_0.5.5 fail_1.2 foreach_1.4.2
## [25] formatR_0.10 geneplotter_1.43.0 GenomicFeatures_1.17.9
## [28] grid_3.1.0 htmltools_0.2.4 hwriter_1.3
## [31] illuminaio_0.7.0 iterators_1.0.7 KernSmooth_2.23-12
## [34] lattice_0.20-29 latticeExtra_0.6-26 locfit_1.5-9.1
## [37] lumi_2.17.0 MASS_7.3-33 Matrix_1.1-3
## [40] matrixStats_0.8.14 mclust_4.3 methylumi_2.11.1
## [43] minfi_1.11.7 multtest_2.21.0 nleqslv_2.1.1
## [46] nor1mix_1.1-4 pkgmaker_0.22 plyr_1.8.1
## [49] preprocessCore_1.27.0 R.methodsS3_1.6.1 R.oo_1.18.0
## [52] Rcpp_0.11.1 RCurl_1.95-4.1 registry_0.2
## [55] reshape_0.8.5 rmarkdown_0.2.46 rngtools_1.2.4
## [58] RSQLite_0.11.4 rtracklayer_1.25.9 sendmailR_1.1-2
## [61] sfsmisc_1.0-25 siggenes_1.39.0 splines_3.1.0
## [64] stats4_3.1.0 stringr_0.6.2 survival_2.37-7
## [67] tools_3.1.0 XML_3.98-1.1 xtable_1.7-3
## [70] zlibbioc_1.11.1