## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, fig.align = "center") ## ----installation, eval=FALSE------------------------------------------------- # # Installation # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("Coralysis") ## ----packages, message=FALSE, warning=FALSE----------------------------------- # Import packages library("scRNAseq") library("Coralysis") library("SingleCellExperiment") ## ----data--------------------------------------------------------------------- # Import data zeisel <- ZeiselBrainData() romanov <- RomanovBrainData() ## ----preprocess--------------------------------------------------------------- # Normalize zeisel <- scater::logNormCounts(zeisel) counts(zeisel) <- NULL altExps(zeisel) <- list() romanov <- scater::logNormCounts(romanov) counts(romanov) <- NULL # Intersected genes genes <- intersect(row.names(zeisel), row.names(romanov)) zeisel <- zeisel[genes, ] romanov <- romanov[genes, ] # Remove colData colData(zeisel) <- colData(zeisel)[, "level1class", drop = FALSE] colnames(colData(zeisel)) <- "cell_type" colData(romanov) <- colData(romanov)[, "level1 class", drop = FALSE] colnames(colData(romanov)) <- "cell_type" # Batch zeisel[["batch"]] <- "Zeisel et al." romanov[["batch"]] <- "Romanov et al." # Merge cells sce <- cbind(zeisel, romanov) rm(list = c("zeisel", "romanov")) # HVG hvg <- scran::getTopHVGs(sce, n = 2000) sce <- sce[hvg, ] ## ----prepare data------------------------------------------------------------- # Prepare data: # checks 'logcounts' format & removes non-expressed genes sce <- PrepareData(object = sce) ## ----integration-------------------------------------------------------------- # Multi-level integration set.seed(4591) # reproducibility of downsampling - 'icp.batch.size' sce <- RunParallelDivisiveICP( object = sce, batch.label = "batch", icp.batch.size = 1000, L = 10, threads = 2, RNGseed = 123 ) ## ----integrated emb----------------------------------------------------------- # Integrated embedding set.seed(125) sce <- RunPCA(object = sce) ## ----umap--------------------------------------------------------------------- # UMAP set.seed(1204) sce <- RunUMAP(object = sce, min_dist = 0.5, n_neighbors = 15) ## ----viz categorical vars, fig.width=10, fig.height=5.5----------------------- # Visualize categorical variables integrated emb. vars <- c("batch", "cell_type") plots <- lapply(X = vars, FUN = function(x) { PlotDimRed( object = sce, color.by = x, point.size = 0.25, point.stroke = 0.5, legend.nrow = 4 ) }) cowplot::plot_grid(plotlist = plots, ncol = 2, align = "vh") # join plots together ## ----viz categorical vars by batch, fig.width=10, fig.height=5.5-------------- plots[[2]] + ggplot2::facet_grid(~batch) ## ----viz gene exp markers, fig.width=8.5, fig.height=6------------------------ # Visualization of gene expression markers markers <- c("Acta2", "Myh11", "Tagln", "Cdh5", "Pecam1") plot.markers <- lapply(markers, function(x) { PlotExpression(sce, color.by = x, point.size = 0.5, point.stroke = 0.5) + ggplot2::facet_grid(~batch) }) cowplot::plot_grid(plotlist = plot.markers, ncol = 2) ## ----clustering, fig.width=5, fig.height=5.5---------------------------------- # Graph-based clustering set.seed(123) sce$cluster <- scran::clusterCells(sce, use.dimred = "PCA", BLUSPARAM = bluster::SNNGraphParam(k = 30, cluster.fun = "louvain") ) # Plot the clustering result plots[[3]] <- PlotDimRed( object = sce, color.by = "cluster", point.size = 0.25, point.stroke = 0.5, legend.nrow = 4 ) plots[[3]] ## ----viz clustering by batch, fig.width=8, fig.height=10.5-------------------- cowplot::plot_grid( plots[[2]] + ggplot2::facet_grid(~batch), plots[[3]] + ggplot2::facet_grid(~batch), ncol = 1 ) ## ----gene coefficients, message=FALSE, warning=FALSE-------------------------- # Get label gene coefficients by majority voting clts.gene.coeff <- MajorityVotingFeatures(object = sce, label = "cluster") # Print summary clts.gene.coeff$summary ## ----viz top coefficients----------------------------------------------------- ## Visualize top coefficients for cluster 10 and 12 clts <- c("10", "12") plot.coeff.markers <- list() for (i in clts) { top.markers <- clts.gene.coeff$feature_coeff[[i]] top.markers <- top.markers[order(top.markers[,2], decreasing = TRUE), ] top.markers <- top.markers[1:6, "feature"] plot.coeff.markers[[i]] <- lapply(top.markers, function(x) { PlotExpression(sce, color.by = x, point.size = 0.3, point.stroke = 0.5) + ggplot2::facet_grid(~batch) }) } ## ----viz - cluster 10, fig.width=9.5, fig.height=3.5-------------------------- # Cluster 10 cowplot::plot_grid(plotlist = plot.coeff.markers[[1]], ncol = 3) ## ----viz - cluster 12, fig.width=9.5, fig.height=3.5-------------------------- # Cluster 12 cowplot::plot_grid(plotlist = plot.coeff.markers[[2]], ncol = 3) ## ----rsession----------------------------------------------------------------- # R session sessionInfo()