## ----setup, echo=FALSE, results="hide", message=FALSE-------------------- knitr::opts_chunk$set(tidy=FALSE, cache=TRUE, dev="png", message=FALSE, error=FALSE, warning=TRUE) ## Libraries require("Biobase") require("NMF") require("cvxclustr") ## ----installation, eval=FALSE-------------------------------------------- # source("http://bioconductor.org/biocLite.R") # biocLite("DASC") ## ---- message=FALSE, results='hide', eval=TRUE--------------------------- library(DASC) data("esGolub") samples <- c(20,21,28,30) dat <- exprs(esGolub)[1:100,samples] pdat <- pData(esGolub)[samples,] ## use nrun = 50 or more for better convergence of results res <- DASC(edata = dat, pdata = pdat, factor = pdat$Cell, method = 'ama', type = 3, lambda = 1, rank = 2:3, nrun = 5, annotation="esGolub Dataset") #consensusmap(res) #plot(res) ## ---- message=FALSE, eval=TRUE------------------------------------------- ## libraries set.seed(99999) library(DESeq2) library(ggplot2) library(pcaExplorer) ## dataset rawCounts <- stanfordData$rawCounts metadata <- stanfordData$metadata ## ---- message=FALSE, eval=TRUE------------------------------------------- ## Using a smaller dataset idx <- which(metadata$tissue %in% c("adipose", "adrenal", "sigmoid")) rawCounts <- rawCounts[,idx] metadata <- metadata[idx,] ## ---- message=FALSE, eval=TRUE------------------------------------------- head(rawCounts) head(metadata) ## ---- message=FALSE, eval=TRUE------------------------------------------- ## Normalizing the dataset using DESeq2 dds <- DESeqDataSetFromMatrix(rawCounts, metadata, design = ~ species+tissue) dds <- estimateSizeFactors(dds) dat <- counts(dds, normalized = TRUE) lognormalizedCounts <- log2(dat + 1) ## ---- message=FALSE, eval=TRUE------------------------------------------- ## PCA plot using rld.dds <- rlog(dds) pcaplot(rld.dds, intgroup=c("tissue","species"), ntop=1000, pcX=1, pcY=2) ## ---- message=FALSE, eval=TRUE------------------------------------------- res <- DASC(edata = dat, pdata = metadata, factor = metadata$tissue, method = 'ama', type = 3, lambda = 1, rank = 2:3, nrun = 10, annotation = 'Stanford Dataset') ## ---- message=FALSE, eval=TRUE------------------------------------------- ## Consensus plot consensusmap(res) ## ---- message=FALSE, eval=TRUE------------------------------------------- ## Residual plot plot(res) ## ---- message=FALSE, eval=TRUE------------------------------------------- ## Batches -- dataset has 6 batches sample.clust <- data.frame(sample.name = colnames(lognormalizedCounts), clust = as.vector(predict(res$fit$`2`)), batch = metadata$seqBatch) ggplot(data = sample.clust, aes(x=c(1:6), y=clust, color=factor(clust))) + geom_point(size = 4) + xlab("Sample Number") + ylab("Cluster Number") ## ---- message=FALSE------------------------------------------------------ sessionInfo()