## ----echo=FALSE, results="hide", message=FALSE-------------------------------- require(knitr) opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) library(BiocStyle) self <- Biocpkg("scrapper") ## ----------------------------------------------------------------------------- library(scRNAseq) sce.z <- ZeiselBrainData() sce.z ## ----------------------------------------------------------------------------- # Finding the mitochondrial genes for QC purposes. is.mito <- grepl("^mt-", rownames(sce.z)) # We can set the number of threads higher in real applications, but # we want to avoid stressing out Bioconductor's build system. nthreads <- 2 library(scrapper) res <- analyze( sce.z, rna.subsets=list(mito=is.mito), num.threads=nthreads ) ## ----------------------------------------------------------------------------- tabulate(res$clusters) plot(res$tsne[,1], res$tsne[,2], pch=res$clusters) ## ----------------------------------------------------------------------------- # Top markers for each cluster, say, based on the median AUC: top.markers <- lapply(res$rna.markers$auc, function(x) { head(rownames(x)[order(x$median, decreasing=TRUE)], 10) }) head(top.markers) # Aggregating all marker statistics for the first cluster # (delta.mean is the same as the log-fold change in this case). df1 <- reportGroupMarkerStatistics(res$rna.markers, 1) df1 <- df1[order(df1$auc.median, decreasing=TRUE),] head(df1[,c("mean", "detected", "auc.median", "delta.mean.median")]) ## ----------------------------------------------------------------------------- sce <- convertAnalyzeResults(res) sce ## ----------------------------------------------------------------------------- counts <- assay(sce.z) rna.qc.metrics <- computeRnaQcMetrics(counts, subsets=list(mt=is.mito), num.threads=nthreads) rna.qc.thresholds <- suggestRnaQcThresholds(rna.qc.metrics) rna.qc.filter <- filterRnaQcMetrics(rna.qc.thresholds, rna.qc.metrics) filtered <- counts[,rna.qc.filter,drop=FALSE] ## ----------------------------------------------------------------------------- size.factors <- centerSizeFactors(rna.qc.metrics$sum[rna.qc.filter]) normalized <- normalizeCounts(filtered, size.factors) ## ----------------------------------------------------------------------------- gene.var <- modelGeneVariances(normalized, num.threads=nthreads) top.hvgs <- chooseHighlyVariableGenes(gene.var$statistics$residuals) pca <- runPca(normalized[top.hvgs,], num.threads=nthreads) ## ----------------------------------------------------------------------------- umap.out <- runUmap(pca$components, num.threads=nthreads) tsne.out <- runTsne(pca$components, num.threads=nthreads) snn.graph <- buildSnnGraph(pca$components, num.threads=nthreads) clust.out <- clusterGraph(snn.graph) ## ----------------------------------------------------------------------------- markers <- scoreMarkers(normalized, groups=clust.out$membership, num.threads=nthreads) ## ----------------------------------------------------------------------------- sce.t <- TasicBrainData() common <- intersect(rownames(sce.z), rownames(sce.t)) combined <- cbind(assay(sce.t)[common,], assay(sce.z)[common,]) block <- rep(c("tasic", "zeisel"), c(ncol(sce.t), ncol(sce.z))) ## ----------------------------------------------------------------------------- # No mitochondrial genes in the combined set, so we'll just skip it. blocked_res <- analyze(combined, block=block, num.threads=nthreads) # Visually check whether the datasets were suitably merged together. # Note that these two datasets don't have the same cell types, so # complete overlap should not be expected. plot(blocked_res$tsne[,1], blocked_res$tsne[,2], pch=16, col=factor(blocked_res$block)) ## ----------------------------------------------------------------------------- library(scrapper) rna.qc.metrics <- computeRnaQcMetrics(combined, subsets=list(), num.threads=nthreads) rna.qc.thresholds <- suggestRnaQcThresholds(rna.qc.metrics, block=block) rna.qc.filter <- filterRnaQcMetrics(rna.qc.thresholds, rna.qc.metrics, block=block) filtered <- combined[,rna.qc.filter,drop=FALSE] filtered.block <- block[rna.qc.filter] size.factors <- centerSizeFactors(rna.qc.metrics$sum[rna.qc.filter], block=filtered.block) normalized <- normalizeCounts(filtered, size.factors) gene.var <- modelGeneVariances(normalized, num.threads=nthreads, block=filtered.block) top.hvgs <- chooseHighlyVariableGenes(gene.var$statistics$residuals) pca <- runPca(normalized[top.hvgs,], num.threads=nthreads, block=filtered.block) # Now we do a MNN correction to get rid of the batch effect. corrected <- correctMnn(pca$components, block=filtered.block, num.threads=nthreads) umap.out <- runUmap(corrected$corrected, num.threads=nthreads) tsne.out <- runTsne(corrected$corrected, num.threads=nthreads) snn.graph <- buildSnnGraph(corrected$corrected, num.threads=nthreads) clust.out <- clusterGraph(snn.graph) markers <- scoreMarkers(normalized, groups=clust.out$membership, block=filtered.block, num.threads=nthreads) ## ----------------------------------------------------------------------------- aggregates <- aggregateAcrossCells(filtered, list(cluster=clust.out$membership, dataset=filtered.block)) ## ----------------------------------------------------------------------------- sce.s <- StoeckiusHashingData(mode=c("human", "adt1", "adt2")) sce.s <- sce.s[,1:5000] sce.s ## ----------------------------------------------------------------------------- rna.counts <- assay(sce.s) adt.counts <- rbind(assay(altExp(sce.s, "adt1")), assay(altExp(sce.s, "adt2"))) ## ----------------------------------------------------------------------------- is.mito <- grepl("^MT-", rownames(rna.counts)) is.igg <- grepl("^IgG", rownames(adt.counts)) multi_res <- analyze( rna.counts, adt.x=adt.counts, rna.subsets=list(mito=is.mito), adt.subsets=list(igg=is.igg), num.threads=nthreads ) ## ----------------------------------------------------------------------------- # QC in both modalities, only keeping the cells that pass in both. library(scrapper) rna.qc.metrics <- computeRnaQcMetrics(rna.counts, subsets=list(MT=is.mito), num.threads=nthreads) rna.qc.thresholds <- suggestRnaQcThresholds(rna.qc.metrics) rna.qc.filter <- filterRnaQcMetrics(rna.qc.thresholds, rna.qc.metrics) adt.qc.metrics <- computeAdtQcMetrics(adt.counts, subsets=list(IgG=is.igg), num.threads=nthreads) adt.qc.thresholds <- suggestAdtQcThresholds(adt.qc.metrics) adt.qc.filter <- filterAdtQcMetrics(adt.qc.thresholds, adt.qc.metrics) keep <- rna.qc.filter & adt.qc.filter rna.filtered <- rna.counts[,keep,drop=FALSE] adt.filtered <- adt.counts[,keep,drop=FALSE] rna.size.factors <- centerSizeFactors(rna.qc.metrics$sum[keep]) rna.normalized <- normalizeCounts(rna.filtered, rna.size.factors) adt.size.factors <- computeClrm1Factors(adt.filtered, num.threads=nthreads) adt.size.factors <- centerSizeFactors(adt.size.factors) adt.normalized <- normalizeCounts(adt.filtered, adt.size.factors) gene.var <- modelGeneVariances(rna.normalized, num.threads=nthreads) top.hvgs <- chooseHighlyVariableGenes(gene.var$statistics$residuals) rna.pca <- runPca(rna.normalized[top.hvgs,], num.threads=nthreads) # Combining the RNA-derived PCs with ADT expression. Here, there's very few ADT # tags so there's no need for further dimensionality reduction. combined <- scaleByNeighbors(list(rna.pca$components, as.matrix(adt.normalized)), num.threads=nthreads) umap.out <- runUmap(combined$combined, num.threads=nthreads) tsne.out <- runTsne(combined$combined, num.threads=nthreads) snn.graph <- buildSnnGraph(combined$combined, num.threads=nthreads) clust.out <- clusterGraph(snn.graph) rna.markers <- scoreMarkers(rna.normalized, groups=clust.out$membership, num.threads=nthreads) adt.markers <- scoreMarkers(adt.normalized, groups=clust.out$membership, num.threads=nthreads) ## ----------------------------------------------------------------------------- sessionInfo()