Dependencies

This document depends on the following packages:

  library(devtools)
  library(Biobase)
  library(dendextend)

To install these packages you can use the code (or if you are compiling the document, remove the eval=FALSE from the chunk.)

install.packages(c("devtools","dendextend"))
source("http://www.bioconductor.org/biocLite.R")
biocLite(c("Biobase"))

General principles

Load some data

We will use this expression set to look at how we use plots and tables to check for different characteristics

con =url("http://bowtie-bio.sourceforge.net/recount/ExpressionSets/bodymap_eset.RData")
load(file=con)
close(con)
bm = bodymap.eset
pdata=pData(bm)
edata=as.data.frame(exprs(bm))
fdata = fData(bm)
ls()
## [1] "bm"           "bodymap.eset" "con"          "edata"       
## [5] "fdata"        "pdata"        "tropical"

Calculate distances between samples

First we log transform and remove lowly expressed genes, then calculate Euclidean distance

edata = edata[rowMeans(edata) > 5000,]
edata = log2(edata + 1)

# By default calculates the distance between rows
dist1 = dist(t(edata))

## Look at distance matrix
colramp = colorRampPalette(c(3,"white",2))(9)
heatmap(as.matrix(dist1),col=colramp,Colv=NA,Rowv=NA)

Now cluster the samples

Here we use the distance we previously calculated to perform a hierarchical clustering and plot the dendrogram:

hclust1 = hclust(dist1)
plot(hclust1)

We can also force all of the leaves to terminate at the same spot

plot(hclust1,hang=-1)

We can also color the dendrogram either into a fixed number of groups

dend = as.dendrogram(hclust1)
dend = color_labels(hclust1,4,col=1:4)
plot(dend)