## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, fig.align = "center") ## ----packages, message=FALSE, warning=FALSE----------------------------------- # Packages library("scran") library("SingleR") library("ggplot2") library("scRNAseq") library("Coralysis") library("SingleCellExperiment") ## ----data preprocessing, message=FALSE, warning=FALSE------------------------- ### Objects used and QC steps from SingleR book: # https://bioconductor.org/books/release/SingleRBook/sc-mode.html ## Reference: muraro-pancreas-2016 # Import data ref <- MuraroPancreasData() # Remove unnecessary data & labels altExps(ref) <- NULL ref <- ref[, !is.na(ref$label) & ref$label != "unclear"] # Normalise ref <- logNormCounts(ref) counts(ref) <- NULL ## Query: grun-pancreas-2016 # Import data query <- GrunPancreasData() # Remove unnecessary data & labels query <- addPerCellQC(query) qc <- quickPerCellQC(colData(query), percent_subsets = "altexps_ERCC_percent", batch = query$donor, subset = query$donor %in% c("D17", "D7", "D2") ) query <- query[, !qc$discard] altExps(query) <- NULL # Normalise query <- logNormCounts(query) counts(query) <- NULL # Feature selection with 'scran' package for reference nhvg <- 500 set.seed(123) top.hvg <- getTopHVGs(ref, n = nhvg) # Subset object ref <- ref[top.hvg, ] # Filter out genes for query: # not required, only done to reduce query size object query <- query[top.hvg, ] ## ----train reference, message=TRUE, warning=FALSE----------------------------- # Train the reference set.seed(123) ref <- RunParallelDivisiveICP( object = ref, L = 10, k = 8, build.train.params = list(nhvg = 500), # default 2000 threads = 2 ) # runs without 'batch.label' # Compute reference PCA ref <- RunPCA(ref, return.model = TRUE, pca.method = "stats") # Compute reference UMAP set.seed(123) ref <- RunUMAP(ref, return.model = TRUE, umap.method = "uwot", dims = 1:15, n_neighbors = 30, min_dist = 0.3 ) ## ----viz reference, fig.width=6, fig.height=7--------------------------------- # Vizualize reference ref.celltype.plot <- PlotDimRed( object = ref, color.by = "label", dimred = "UMAP", point.size = 0.01, legend.nrow = 3, seed.color = 17 ) + ggtitle("Reference: ground-truth labels") ref.celltype.plot ## ----reference-mapping-------------------------------------------------------- # Reference-mapping set.seed(1024) query <- ReferenceMapping( ref = ref, query = query, ref.label = "label", project.umap = TRUE, dimred.name.prefix = "ref", k.nn = 5 ) ## ----prediction accuracy scores----------------------------------------------- # Compare against SingleR preds.singler <- SingleR(test = query, ref = ref, labels = ref$label, de.method = "wilcox") colData(query) <- cbind(colData(query), preds.singler) # Confusion matrix preds_x_truth <- table(query$coral_labels, query$labels) # Accuracy acc <- sum(diag(preds_x_truth)) / sum(preds_x_truth) # Print confusion matrix preds_x_truth ## ----prediction viz, fig.width=7.5, fig.height=7.25--------------------------- # Plot query and reference UMAP side-by-side # with ground-truth & predicted cell labels use.color <- c("acinar" = "#FF7F00", "alpha" = "#CCCCCC", "beta" = "#FDCDAC", "delta" = "#E6F5C9", "duct" = "#E31A1C", "endothelial" = "#377EB8", "epsilon" = "#7FC97F", "mesenchymal" = "#F0027F", "pp" = "#B3CDE3") query.singler.plot <- PlotDimRed( object = query, color.by = "labels", dimred = "refUMAP", point.size = 0.25, point.stroke = 0.25, legend.nrow = 3, use.color = use.color ) + ggtitle("Query: SingleR predictions") query.predicted.plot <- PlotDimRed( object = query, color.by = "coral_labels", dimred = "refUMAP", point.size = 0.25, point.stroke = 0.25, legend.nrow = 3, use.color = use.color ) + ggtitle("Query: Coralysis predictions") query.confidence.plot <- PlotExpression( object = query, color.by = "coral_probability", dimred = "refUMAP", point.size = 0.25, point.stroke = 0.1, color.scale = "viridis" ) + ggtitle("Query: Coralysis confidence") cowplot::plot_grid(ref.celltype.plot, query.singler.plot, query.predicted.plot, query.confidence.plot, ncol = 2, align = "vh" ) ## ----rsession----------------------------------------------------------------- # R session sessionInfo()