## ---- echo=FALSE, results="hide", message=FALSE-------------------------- require(knitr) opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) ## ----style, echo=FALSE, results='asis'----------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE, message=FALSE------------------------------------ library(scran) register(SerialParam()) set.seed(100) ## ------------------------------------------------------------------------ ngenes <- 10000 ncells <- 200 mu <- 2^runif(ngenes, 3, 10) gene.counts <- matrix(rnbinom(ngenes*ncells, mu=mu, size=2), nrow=ngenes) ## ------------------------------------------------------------------------ library(org.Mm.eg.db) all.ensembl <- unique(toTable(org.Mm.egENSEMBL)$ensembl_id) rownames(gene.counts) <- sample(all.ensembl, ngenes) ## ------------------------------------------------------------------------ nspikes <- 100 ncells <- 200 mu <- 2^runif(nspikes, 3, 10) spike.counts <- matrix(rnbinom(nspikes*ncells, mu=mu, size=2), nrow=nspikes) rownames(spike.counts) <- paste0("ERCC-", seq_len(nspikes)) all.counts <- rbind(gene.counts, spike.counts) ## ------------------------------------------------------------------------ library(scran) sce <- newSCESet(countData=data.frame(all.counts)) sce <- calculateQCMetrics(sce, feature_controls=list( MySpikes=rep(c(FALSE, TRUE), c(ngenes, nspikes)) )) isSpike(sce) <- "MySpikes" ## ------------------------------------------------------------------------ sce <- computeSumFactors(sce) summary(sizeFactors(sce)) ## ------------------------------------------------------------------------ larger.sce <- newSCESet(countData=data.frame(cbind(all.counts, all.counts, all.counts))) clusters <- quickCluster(larger.sce) larger.sce <- computeSumFactors(larger.sce, cluster=clusters) ## ------------------------------------------------------------------------ sce2 <- computeSpikeFactors(sce) summary(sizeFactors(sce2)) ## ------------------------------------------------------------------------ sce <- computeSpikeFactors(sce, general.use=FALSE) ## ------------------------------------------------------------------------ sce <- normalize(sce) ## ------------------------------------------------------------------------ mm.pairs <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package="scran")) ## ------------------------------------------------------------------------ assigned <- cyclone(sce, pairs=mm.pairs) head(assigned$scores) ## ------------------------------------------------------------------------ phase <- rep("S", ncol(sce)) phase[assigned$scores$G1 > 0.5] <- "G1" phase[assigned$scores$G2M > 0.5] <- "G2M" phase[assigned$scores$G1 > 0.5 & assigned$scores$G2M > 0.5] <- "unknown" table(phase) ## ------------------------------------------------------------------------ fit <- trendVar(sce) ## ------------------------------------------------------------------------ decomp <- decomposeVar(sce, fit) top.hvgs <- order(decomp$bio, decreasing=TRUE) head(decomp[top.hvgs,]) ## ------------------------------------------------------------------------ plot(decomp$mean, decomp$total, xlab="Mean log-expression", ylab="Variance") o <- order(decomp$mean) lines(decomp$mean[o], decomp$tech[o], col="red", lwd=2) points(fit$mean, fit$var, col="red", pch=16) ## ------------------------------------------------------------------------ alt.fit <- trendVar(sce, use.spikes=FALSE) alt.decomp <- decomposeVar(sce, alt.fit) ## ------------------------------------------------------------------------ batch <- rep(c("1", "2"), each=100) design <- model.matrix(~batch) alt.fit2 <- trendVar(sce, design=design) alt.decomp2 <- decomposeVar(sce, alt.fit) ## ---- eval=FALSE--------------------------------------------------------- ## set.seed(102251) ## ncells <- 200 ## niters <- 20 ## var.GLM <- var.cv2 <- var.log <- list() ## means <- list() ## ## for (it in seq_len(10)) { ## cur.mean <- it*10 ## means[[it]] <- cur.mean ## ## counter1 <- integer(ncells) ## counter1[1] <- cur.mean*ncells ## counter2 <- rnbinom(ncells, mu=cur.mean, size=0.2) # Arguably, all of these are systematically more variable than above. ## counter3 <- rnbinom(ncells, mu=cur.mean, size=0.5) ## counter4 <- rnbinom(ncells, mu=cur.mean, size=1) ## ## # With GLMs. ## require(edgeR) ## y <- DGEList(rbind(counter1, counter2, counter3, counter4), lib.size=rep(1e6, ncells)) ## y <- estimateDisp(y, cbind(rep(1, ncells)), prior.df=0) ## var.GLM[[it]] <- y$tagwise.dispersion ## ## # With CV2. ## var.cv2[[it]] <- apply(y$counts, 1, var)/rowMeans(y$counts)^2 ## ## # With variances of log-values. ## var.log[[it]] <- apply(cpm(y, log=TRUE), 1, var) ## } ## ## means <- unlist(means) ## var.GLM <- do.call(cbind, var.GLM) ## var.cv2 <- do.call(cbind, var.cv2) ## var.log <- do.call(cbind, var.log) ## means <- matrix(means, nrow=nrow(var.GLM), ncol=ncol(var.GLM), byrow=TRUE) ## colors <- c("black", "red", "blue", "orange") ## ## par(mfrow=c(1,3)) ## plot(means, var.GLM, col=colors, pch=16, main="BCV") ## for (x in seq_len(nrow(means))) { ## lines(means[x,], var.GLM[x,], col=colors[x]) ## } ## plot(means, var.cv2, col=colors, pch=16, main="CV2") ## for (x in seq_len(nrow(means))) { ## lines(means[x,], var.cv2[x,], col=colors[x]) ## } ## plot(means, var.log, col=colors, pch=16, main="Variance of log") ## for (x in seq_len(nrow(means))) { ## lines(means[x,], var.log[x,], col=colors[x]) ## } ## ------------------------------------------------------------------------ null.dist <- correlateNull(ncol(sce)) cor.pairs <- correlatePairs(sce[top.hvgs[1:200],], null.dist=null.dist) head(cor.pairs) ## ------------------------------------------------------------------------ null.dist2 <- correlateNull(design=design, iter=1e5) # fewer iterations, to speed it up. cor.pairs2 <- correlatePairs(sce[top.hvgs[1:200],], null.dist=null.dist2, design=design) ## ---- eval=FALSE--------------------------------------------------------- ## set.seed(1023423) ## ncells <- 100 ## null.dist <- correlateNull(ncells) ## all.p <- list() ## for (it in 1:10000) { ## x1 <- rpois(ncells, lambda=10) ## x2 <- rpois(ncells, lambda=20) ## rho2 <- cor(rank(x1, ties.method="random"), rank(x2, ties.method="random"), method="spearman") ## all.p[[it]] <- sum(null.dist >= rho2)/length(null.dist) ## } ## sum(unlist(all.p) <= 0.01)/10000 ## sum(unlist(all.p) <= 0.05)/10000 ## ------------------------------------------------------------------------ set.seed(12120) design <- model.matrix(~factor(rep(1:5, 2))) y <- matrix(rnorm(1000, mean=rep(1:5, 5), sd=2), ncol=10, byrow=TRUE) null <- correlateNull(ncol(y)) out <- correlatePairs(y, design=design, null=null) plot(log10(sort(out$p.value)/1:nrow(out)*nrow(out))) # wrong null <- correlateNull(design=design, residuals=TRUE) out <- correlatePairs(y, design=design, null=null, residuals=TRUE) plot(log10(sort(out$p.value)/1:nrow(out)*nrow(out))) # right ## ------------------------------------------------------------------------ y <- convertTo(sce, type="edgeR") ## ------------------------------------------------------------------------ sessionInfo()