Load packages

You will need the RSkittleBrewer, polyester, and ballgown packages for this vignette to run. Installation instructions are available here:

You will also need R version 3.1.0 or greater and Bioconductor 3.0 or greater. The zebrafishRNASeq package might need to be installed from source. These analyses are based on the devel version of sva (version 3.11.2 or greater).

library(zebrafishRNASeq)
library(RSkittleBrewer)
library(genefilter)
library(polyester)
library(RUVSeq)
library(edgeR)
library(sva)
library(ffpe)
library(RColorBrewer)
library(corrplot)
library(limma)
trop = RSkittleBrewer('tropical')
set.seed(3532333)

Load and filter zebrafish data

Here we are going to load the zebrafish data to get an overall feeling for the count data and relationship between mean and variance. We first filter data according to the RUVSeq vignette: http://bioconductor.org/packages/devel/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.R.

data(zfGenes)
filter <- apply(zfGenes, 1, function(x) length(x[x>5])>=2)
counts <- zfGenes[filter,]

Look at mean variance relationship

plot(rowMeans(log(counts+1)),rowVars(log(counts+1)),pch=19,col=trop[1])

plot of chunk unnamed-chunk-1

Estimate zero inflated negative binomial parameters from the zebrafish data

## Estimate the zero inflated negative binomial parameters
params = get_params(counts)

plot(rowMeans(log(counts+1)),rowVars(log(counts+1)),pch=19,col=trop[1],main="zebrafish data w/fit")
lines(params$fit,col=trop[2])

plot of chunk nbparams

Generate an equal sized simulated data set and compare

## Create some data from the model

dat0 = create_read_numbers(params$mu,params$fit,
                           params$p0,m=dim(counts)[1],n=dim(counts)[2],seed=53535)
## Generating data from baseline model.
par(mfrow=c(1,2))
plot(rowMeans(log(dat0+1)),rowVars(log(dat0+1)),pch=19,col=trop[2],main="simulated data",xlim=c(0,15),xlab="Mean(Count)", ylab="Var(Count)")
plot(rowMeans(log(counts+1)),rowVars(log(counts+1)),pch=19,col=trop[1],main="zebrafish data",xlim=c(0,15),xlab="Mean(Count)", ylab="Var(Count)")

plot of chunk simdata

Make sure there is no differential expression using voom

group = rep(c(0,1),each=3)
design = model.matrix(~group)
dge <- DGEList(counts=dat0)
dge <- calcNormFactors(dge)
v <- voom(dge,design,plot=TRUE)