## ----style, echo = FALSE, results = 'asis'--------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE, message=FALSE-------------------------------------- library(knitr) opts_chunk$set(comment=NA, fig.align="center", warning=FALSE) ## Libraries require(Biobase) require(NMF) require(cvxclustr) ## ----installation, eval=FALSE---------------------------------------------- # source("http://bioconductor.org/biocLite.R") # biocLite("DASC") ## ---- message=FALSE, eval=TRUE--------------------------------------------- library(DASC) data("esGolub") samples <- c(20,21,28,30) dat <- exprs(esGolub)[1:100,samples] pdat <- pData(esGolub)[samples,] ## use nrun = 50 or more for better convergence of results res <- DASC(edata = dat, pdata = pdat, factor = pdat$Cell, method = 'ama', type = 3, lambda = 1, rank = 2:3, nrun = 5, annotation='esGolub Dataset') ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## libraries set.seed(99999) library(DESeq2) library(ggplot2) library(pcaExplorer) ## dataset rawCounts <- stanfordData$rawCounts metadata <- stanfordData$metadata ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## Using a smaller dataset idx <- which(metadata$tissue %in% c("adipose", "adrenal", "sigmoid")) rawCounts <- rawCounts[,idx] metadata <- metadata[idx,] ## ---- message=FALSE, eval=TRUE--------------------------------------------- head(rawCounts) head(metadata) ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## Normalizing the dataset using DESeq2 dds <- DESeqDataSetFromMatrix(rawCounts, metadata, design = ~ species+tissue) dds <- estimateSizeFactors(dds) dat <- counts(dds, normalized = TRUE) lognormalizedCounts <- log2(dat + 1) ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## PCA plot using rld.dds <- rlog(dds) pcaplot(rld.dds, intgroup=c("tissue","species"), ntop=1000, pcX=1, pcY=2) ## ---- message=FALSE, eval=TRUE--------------------------------------------- res <- DASC(edata = dat, pdata = metadata, factor = metadata$tissue, method = 'ama', type = 3, lambda = 1, rank = 2:3, nrun = 10, annotation = 'Stanford Dataset') ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## Consensus plot consensusmap(res) ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## Residual plot plot(res) ## ---- message=FALSE, eval=TRUE--------------------------------------------- ## Batches -- dataset has 6 batches sample.clust <- data.frame(sample.name = colnames(lognormalizedCounts), clust = as.vector(predict(res$fit$`2`)), batch = metadata$seqBatch) ggplot(data = sample.clust, aes(x=c(1:6), y=clust, color=factor(clust))) + geom_point(size = 4) + xlab("Sample Number") + ylab("Cluster Number") ## ---- message=FALSE, eval=FALSE-------------------------------------------- # # ## not running this part of the code for building package # ## Using entire dataset # rawCounts <- stanfordData$rawCounts # metadata <- stanfordData$metadata # dds <- DESeqDataSetFromMatrix(rawCounts, metadata, design = ~ species+tissue) # dds <- estimateSizeFactors(dds) # dat <- counts(dds, normalized = TRUE) # lognormalizedCounts <- log2(dat + 1) # # ## PCA Plot # rld.dds <- rlog(dds) # pcaplot(rld.dds, intgroup=c("tissue","species"), ntop = nrow(rld.dds), # pcX = 1, pcY = 2) # # ## Running DASC # res <- DASC(edata = dat, pdata = metadata, factor = metadata$tissue, # method = 'ama', type = 3, lambda = 1, rank = 2:10, # nrun = 100, annotation = 'Stanford Dataset') # # ## Consensus plot # consensusmap(res) # # ## Residual plot # plot(res) # # ## Clustering samples based on batches # sample.clust <- data.frame(sample.name = colnames(lognormalizedCounts), # clust = as.vector(predict(res$fit$`10`)), # batch = metadata$seqBatch) # ggplot(data = sample.clust, aes(x=c(1:26), y=clust, color=factor(clust))) + # geom_point(size = 4) + xlab("Sample Number") + ylab("Cluster Number") ## ---- out.width = "800px", echo=FALSE-------------------------------------- knitr::include_graphics("PCA_Plot_Fig1.png") knitr::include_graphics("Consensus_Plot_Fig2.png") knitr::include_graphics("NMF_Rank_Fig3.png") knitr::include_graphics("ClusterNumber_Fig4.png") ## ---- message=FALSE-------------------------------------------------------- sessionInfo()