Dependencies

This document depends on the following packages:

  library(devtools)
  library(Biobase)
  library(limma)
  library(edge)

To install these packages you can use the code (or if you are compiling the document, remove the eval=FALSE from the chunk.)

install.packages(c("devtools"))
source("http://www.bioconductor.org/biocLite.R")
biocLite(c("Biobase","limma","jdstorey/edge"))

Download the data

Here we are going to use some data from the paper Detection of redundant fusion transcripts as biomarkers or disease-specific therapeutic targets in breast cancer. that uses data from different normal human tissues (called the Illumina BodyMap data).

con =url("http://bowtie-bio.sourceforge.net/recount/ExpressionSets/bottomly_eset.RData")
load(file=con)
close(con)
bot = bottomly.eset
pdata=pData(bot)
edata=as.matrix(exprs(bot))
fdata = fData(bot)
ls()
## [1] "bot"           "bottomly.eset" "con"           "edata"        
## [5] "fdata"         "pdata"         "tropical"

Transform the data

Here we will transform the data and remove lowly expressed genes.

edata = log2(as.matrix(edata) + 1)
edata = edata[rowMeans(edata) > 10, ]

Fit many regression models at once.

mod = model.matrix(~ pdata$strain)
fit = lm.fit(mod,t(edata))
names(fit)
## [1] "coefficients"  "residuals"     "effects"       "rank"         
## [5] "fitted.values" "assign"        "qr"            "df.residual"

Compare to output of lm

fit$coefficients[,1]
##        (Intercept) pdata$strainDBA/2J 
##         10.4116634          0.3478919
tidy(lm(as.numeric(edata[1, ]) ~ pdata$strain))
##                 term   estimate std.error statistic      p.value
## 1        (Intercept) 10.4116634 0.1516267 68.666422 3.097880e-24
## 2 pdata$strainDBA/2J  0.3478919 0.2095024  1.660563 1.132148e-01

Look at the coefficients across genes

par(mfrow=c(1,2))
hist(fit$coefficients[1,],breaks=100,col=2,xlab="Intercept")
hist(fit$coefficients[2,],breaks=100,col=2,xlab="Strain")
abline(v=0,lwd=3,col=1)

Look at the residuals for a couple of genes

par(mfrow=c(1,2))
plot(fit$residuals[,1],col=2)
plot(fit$residuals[,2],col=2)

Fit many regressions with an adjustment

mod_adj = model.matrix(~ pdata$strain + as.factor(pdata$lane.number))
fit_adj = lm.fit(mod_adj,t(edata))
fit_adj$coefficients[,1]
##                   (Intercept)            pdata$strainDBA/2J 
##                   10.31359781                    0.28934825 
## as.factor(pdata$lane.number)2 as.factor(pdata$lane.number)3 
##                    0.05451431                    0.02502244 
## as.factor(pdata$lane.number)5 as.factor(pdata$lane.number)6 
##                    0.07200502                    0.38038016 
## as.factor(pdata$lane.number)7 as.factor(pdata$lane.number)8 
##                    0.21815863                    0.15103858

Fit many regressions with the limma package

fit_limma = lmFit(edata,mod_adj)
names(fit_limma)
##  [1] "coefficients"     "rank"             "assign"          
##  [4] "qr"               "df.residual"      "sigma"           
##  [7] "cov.coefficients" "stdev.unscaled"   "pivot"           
## [10] "Amean"            "method"           "design"
fit_limma$coefficients[1,]
##                   (Intercept)            pdata$strainDBA/2J 
##                   10.31359781                    0.28934825 
## as.factor(pdata$lane.number)2 as.factor(pdata$lane.number)3 
##                    0.05451431                    0.02502244 
## as.factor(pdata$lane.number)5 as.factor(pdata$lane.number)6 
##                    0.07200502                    0.38038016 
## as.factor(pdata$lane.number)7 as.factor(pdata$lane.number)8 
##                    0.21815863                    0.15103858
fit_adj$coefficients[,1]
##                   (Intercept)            pdata$strainDBA/2J 
##                   10.31359781                    0.28934825 
## as.factor(pdata$lane.number)2 as.factor(pdata$lane.number)3 
##                    0.05451431                    0.02502244 
## as.factor(pdata$lane.number)5 as.factor(pdata$lane.number)6 
##                    0.07200502                    0.38038016 
## as.factor(pdata$lane.number)7 as.factor(pdata$lane.number)8 
##                    0.21815863                    0.15103858

Fit many regressions with the edge package

edge_study = build_study(data=edata,grp=pdata$strain,adj.var=as.factor(pdata$lane.number))
fit_edge = fit_models(edge_study)
summary(fit_edge)
## 
## deFit Summary 
##  
## fit.full: 
##                    SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
## ENSMUSG00000000131      10.3      10.3      10.4      10.4      10.3
## ENSMUSG00000000149      10.6      10.6      10.8      10.8      10.7
## ENSMUSG00000000374      10.7      10.7      10.7      10.7      10.7
##                    SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
## ENSMUSG00000000131      10.3      10.4      10.7      10.5      10.5
## ENSMUSG00000000149      10.7      10.7      11.2      10.9      10.9
## ENSMUSG00000000374      10.7      10.7      11.0      10.8      10.8
##                    SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
## ENSMUSG00000000131      10.6      10.7      10.6      10.7      10.7
## ENSMUSG00000000149      10.6      10.7      10.6      10.6      10.6
## ENSMUSG00000000374      10.9      10.9      10.9      10.9      10.9
##                    SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
## ENSMUSG00000000131      11.0      11.0      10.8      10.8      10.8
## ENSMUSG00000000149      11.1      11.1      10.9      10.9      10.8
## ENSMUSG00000000374      11.2      11.2      11.0      11.0      10.9
##                    SRX033494
## ENSMUSG00000000131      10.8
## ENSMUSG00000000149      10.8
## ENSMUSG00000000374      10.9
## 
## fit.null: 
##                    SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
## ENSMUSG00000000131      10.4      10.4      10.5      10.5      10.4
## ENSMUSG00000000149      10.6      10.6      10.8      10.8      10.7
## ENSMUSG00000000374      10.8      10.8      10.8      10.8      10.7
##                    SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
## ENSMUSG00000000131      10.4      10.6      10.9      10.7      10.7
## ENSMUSG00000000149      10.7      10.6      11.1      10.9      10.8
## ENSMUSG00000000374      10.7      10.8      11.1      10.9      10.9
##                    SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
## ENSMUSG00000000131      10.4      10.5      10.4      10.6      10.6
## ENSMUSG00000000149      10.6      10.8      10.7      10.6      10.6
## ENSMUSG00000000374      10.8      10.8      10.7      10.8      10.8
##                    SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
## ENSMUSG00000000131      10.9      10.9      10.7      10.7      10.7
## ENSMUSG00000000149      11.1      11.1      10.9      10.9      10.8
## ENSMUSG00000000374      11.1      11.1      10.9      10.9      10.9
##                    SRX033494
## ENSMUSG00000000131      10.7
## ENSMUSG00000000149      10.8
## ENSMUSG00000000374      10.9
## 
## res.full: 
##                    SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
## ENSMUSG00000000131    -0.567     0.616    -0.806     0.654    -0.498
## ENSMUSG00000000149    -0.793     0.602    -0.986     0.551    -0.523
## ENSMUSG00000000374    -0.491     0.620    -0.697     0.646    -0.427
##                    SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
## ENSMUSG00000000131     0.748    -0.231    0.0237   0.00934    0.0511
## ENSMUSG00000000149     0.636    -0.362    0.5347   0.20909    0.1327
## ENSMUSG00000000374     0.756    -0.232   -0.0977  -0.09356    0.0165
##                    SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
## ENSMUSG00000000131   -0.0493    0.1523    -0.250    -0.345     0.576
## ENSMUSG00000000149    0.1911    0.4355    -0.112    -0.347     0.709
## ENSMUSG00000000374   -0.1285    0.0506    -0.329    -0.455     0.687
##                    SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
## ENSMUSG00000000131    -0.408    0.3841    -0.470     0.461    -0.284
## ENSMUSG00000000149    -0.516   -0.0182    -0.601     0.392    -0.615
## ENSMUSG00000000374    -0.325    0.4227    -0.401     0.495    -0.328
##                    SRX033494
## ENSMUSG00000000131     0.233
## ENSMUSG00000000149     0.482
## ENSMUSG00000000374     0.312
## 
## res.null: 
##                    SRX033480 SRX033488 SRX033481 SRX033489 SRX033482
## ENSMUSG00000000131    -0.664     0.520    -0.902     0.557    -0.594
## ENSMUSG00000000149    -0.764     0.631    -0.957     0.580    -0.494
## ENSMUSG00000000374    -0.558     0.554    -0.763     0.580    -0.493
##                    SRX033490 SRX033483 SRX033476 SRX033478 SRX033479
## ENSMUSG00000000131     0.651    -0.424    -0.169    -0.184    -0.142
## ENSMUSG00000000149     0.665    -0.303     0.593     0.268     0.191
## ENSMUSG00000000374     0.689    -0.365    -0.231    -0.226    -0.116
##                    SRX033472 SRX033473 SRX033474 SRX033475 SRX033491
## ENSMUSG00000000131    0.1436     0.345   -0.0568    -0.249     0.672
## ENSMUSG00000000149    0.1324     0.377   -0.1709    -0.376     0.680
## ENSMUSG00000000374    0.0044     0.183   -0.1960    -0.389     0.754
##                    SRX033484 SRX033492 SRX033485 SRX033493 SRX033486
## ENSMUSG00000000131    -0.311    0.4805    -0.374     0.557    -0.188
## ENSMUSG00000000149    -0.546   -0.0476    -0.630     0.363    -0.645
## ENSMUSG00000000374    -0.259    0.4892    -0.335     0.561    -0.262
##                    SRX033494
## ENSMUSG00000000131     0.330
## ENSMUSG00000000149     0.453
## ENSMUSG00000000374     0.378
## 
## beta.coef: 
##                    [,1]   [,2]    [,3]    [,4]  [,5]  [,6]   [,7]    [,8]
## ENSMUSG00000000131 10.3 0.0545  0.0250  0.0720 0.380 0.218 0.1510  0.2893
## ENSMUSG00000000149 10.6 0.1455  0.0784  0.0596 0.531 0.304 0.2083 -0.0881
## ENSMUSG00000000374 10.7 0.0322 -0.0246 -0.0164 0.273 0.121 0.0576  0.1993
## 
## stat.type: 
## [1] "lrt"
fit_edge@beta.coef[1,]
## [1] 10.31359781  0.05451431  0.02502244  0.07200502  0.38038016  0.21815863
## [7]  0.15103858  0.28934825
fit_limma$coefficients[1,]
##                   (Intercept)            pdata$strainDBA/2J 
##                   10.31359781                    0.28934825 
## as.factor(pdata$lane.number)2 as.factor(pdata$lane.number)3 
##                    0.05451431                    0.02502244 
## as.factor(pdata$lane.number)5 as.factor(pdata$lane.number)6 
##                    0.07200502                    0.38038016 
## as.factor(pdata$lane.number)7 as.factor(pdata$lane.number)8 
##                    0.21815863                    0.15103858

More information

You can find a lot more information on this model fitting strategy in:

Session information

Here is the session information

devtools::session_info()
##  setting  value                       
##  version  R version 3.2.1 (2015-06-18)
##  system   x86_64, darwin10.8.0        
##  ui       RStudio (0.99.447)          
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/New_York            
## 
##  package           * version     date      
##  acepack             1.3-3.3     2014-11-24
##  annotate            1.46.1      2015-07-11
##  AnnotationDbi     * 1.30.1      2015-04-26
##  assertthat          0.1         2013-12-06
##  BiasedUrn         * 1.06.1      2013-12-29
##  Biobase           * 2.28.0      2015-04-17
##  BiocGenerics      * 0.14.0      2015-04-17
##  BiocInstaller     * 1.18.4      2015-07-22
##  BiocParallel        1.2.20      2015-08-07
##  biomaRt             2.24.0      2015-04-17
##  Biostrings          2.36.3      2015-08-12
##  bitops              1.0-6       2013-08-17
##  bladderbatch      * 1.6.0       2015-08-26
##  broom             * 0.3.7       2015-05-06
##  caTools             1.17.1      2014-09-10
##  cluster             2.0.3       2015-07-21
##  colorspace          1.2-6       2015-03-11
##  corpcor             1.6.8       2015-07-08
##  curl                0.9.2       2015-08-08
##  DBI               * 0.3.1       2014-09-24
##  dendextend        * 1.1.0       2015-07-31
##  DESeq2            * 1.8.1       2015-05-02
##  devtools          * 1.8.0       2015-05-09
##  digest              0.6.8       2014-12-31
##  dplyr             * 0.4.3       2015-09-01
##  edge              * 2.1.0       2015-09-06
##  evaluate            0.7.2       2015-08-13
##  foreign             0.8-66      2015-08-19
##  formatR             1.2         2015-04-21
##  Formula           * 1.2-1       2015-04-07
##  futile.logger       1.4.1       2015-04-20
##  futile.options      1.0.0       2010-04-06
##  gdata               2.17.0      2015-07-04
##  genefilter        * 1.50.0      2015-04-17
##  geneLenDataBase   * 1.4.0       2015-09-06
##  geneplotter         1.46.0      2015-04-17
##  GenomeInfoDb      * 1.4.2       2015-08-15
##  GenomicAlignments   1.4.1       2015-04-24
##  GenomicFeatures     1.20.2      2015-08-14
##  GenomicRanges     * 1.20.5      2015-06-09
##  genstats          * 0.1.02      2015-09-05
##  ggplot2           * 1.0.1       2015-03-17
##  git2r               0.11.0      2015-08-12
##  GO.db               3.1.2       2015-09-06
##  goseq             * 1.20.0      2015-04-17
##  gplots            * 2.17.0      2015-05-02
##  gridExtra           2.0.0       2015-07-14
##  gtable              0.1.2       2012-12-05
##  gtools              3.5.0       2015-05-29
##  highr               0.5         2015-04-21
##  HistData          * 0.7-5       2014-04-26
##  Hmisc             * 3.16-0      2015-04-30
##  htmltools           0.2.6       2014-09-08
##  httr                1.0.0       2015-06-25
##  IRanges           * 2.2.7       2015-08-09
##  KernSmooth          2.23-15     2015-06-29
##  knitr             * 1.11        2015-08-14
##  lambda.r            1.1.7       2015-03-20
##  lattice           * 0.20-33     2015-07-14
##  latticeExtra        0.6-26      2013-08-15
##  lazyeval            0.1.10      2015-01-02
##  limma             * 3.24.15     2015-08-06
##  lme4                1.1-9       2015-08-20
##  locfit              1.5-9.1     2013-04-20
##  magrittr            1.5         2014-11-22
##  MASS              * 7.3-43      2015-07-16
##  Matrix            * 1.2-2       2015-07-08
##  MatrixEQTL        * 2.1.1       2015-02-03
##  memoise             0.2.1       2014-04-22
##  mgcv              * 1.8-7       2015-07-23
##  minqa               1.2.4       2014-10-09
##  mnormt              1.5-3       2015-05-25
##  munsell             0.4.2       2013-07-11
##  nlme              * 3.1-122     2015-08-19
##  nloptr              1.0.4       2014-08-04
##  nnet                7.3-10      2015-06-29
##  org.Hs.eg.db      * 3.1.2       2015-07-17
##  plyr                1.8.3       2015-06-12
##  preprocessCore    * 1.30.0      2015-04-17
##  proto               0.3-10      2012-12-22
##  psych               1.5.6       2015-07-08
##  qvalue            * 2.0.0       2015-04-17
##  R6                  2.1.1       2015-08-19
##  RColorBrewer        1.1-2       2014-12-07
##  Rcpp              * 0.12.0      2015-07-25
##  RcppArmadillo     * 0.5.400.2.0 2015-08-17
##  RCurl               1.95-4.7    2015-06-30
##  reshape2            1.4.1       2014-12-06
##  rmarkdown           0.7         2015-06-13
##  rpart               4.1-10      2015-06-29
##  Rsamtools           1.20.4      2015-06-01
##  RSkittleBrewer    * 1.1         2015-09-05
##  RSQLite           * 1.0.0       2014-10-25
##  rstudioapi          0.3.1       2015-04-07
##  rtracklayer         1.28.9      2015-08-19
##  rversions           1.0.2       2015-07-13
##  S4Vectors         * 0.6.5       2015-09-01
##  scales              0.3.0       2015-08-25
##  snm                 1.16.0      2015-04-17
##  snpStats          * 1.18.0      2015-04-17
##  stringi             0.5-5       2015-06-29
##  stringr             1.0.0       2015-04-30
##  survival          * 2.38-3      2015-07-02
##  sva               * 3.14.0      2015-04-17
##  tidyr               0.2.0       2014-12-05
##  UsingR            * 2.0-5       2015-08-06
##  whisker             0.3-2       2013-04-28
##  XML                 3.98-1.3    2015-06-30
##  xml2                0.1.2       2015-09-01
##  xtable              1.7-4       2014-09-12
##  XVector             0.8.0       2015-04-17
##  yaml                2.1.13      2014-06-12
##  zlibbioc            1.14.0      2015-04-17
##  source                                      
##  CRAN (R 3.2.0)                              
##  Bioconductor                                
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.2)                              
##  Github (jdstorey/edge@a1947b5)              
##  CRAN (R 3.2.2)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  Bioconductor                                
##  local                                       
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.2)                              
##  Bioconductor                                
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.2)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  Bioconductor                                
##  CRAN (R 3.2.2)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  Github (alyssafrazee/RSkittleBrewer@0a96a20)
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.0)                              
##  Bioconductor                                
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.2)                              
##  Bioconductor                                
##  Bioconductor                                
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.2)                              
##  CRAN (R 3.2.0)                              
##  CRAN (R 3.2.1)                              
##  CRAN (R 3.2.2)                              
##  CRAN (R 3.2.0)                              
##  Bioconductor                                
##  CRAN (R 3.2.0)                              
##  Bioconductor

It is also useful to compile the time the document was processed. This document was processed on: 2015-09-06.