## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") have_bioc <- requireNamespace("scRNAseq", quietly=TRUE) && requireNamespace("SummarizedExperiment", quietly=TRUE) if (!have_bioc) { message("Skipping entire vignette: please install scRNAseq & SummarizedExperiment from Bioconductor") knitr::opts_chunk$set(eval = FALSE) } else { library(scStability) library(scRNAseq) library(SummarizedExperiment) } ## ----------------------------------------------------------------------------- sce <- scRNAseq::ZeiselBrainData(location = FALSE) counts_matrix <- SummarizedExperiment::assay(sce, "counts") ## ----------------------------------------------------------------------------- library(Seurat) seurat_obj <- Seurat::CreateSeuratObject(counts = counts_matrix) cell_names <- colnames(seurat_obj@assays$RNA)[1:200] seurat_obj <- subset(seurat_obj, cells = cell_names) seurat_obj <- Seurat::NormalizeData(seurat_obj) seurat_obj <- Seurat::FindVariableFeatures(seurat_obj) seurat_obj <- Seurat::ScaleData(seurat_obj, verbose = FALSE) # We need a PCA as the input to the functions and so we will create one using Seurat::RunPCA(). Recommended to use as few principal components as is logical to speed up computation. You can check the number of PCs needed using a scree plot. seurat_obj <- Seurat::RunPCA(seurat_obj, npcs = 10) # Extract the PCA also for use in embedding functions input_pca <- Seurat::Embeddings(seurat_obj, reduction = "pca") ## ----------------------------------------------------------------------------- # Run the wrapper function with 100 different embeddings and cluster assignments being created and compared stability_results <- scStability::scStability(seurat_obj, n_runs = 15, dr_method = 'umap', clust_method = 'louvain', n_cores = 8) # As the function runs, output statistics will be printed. A density plot of the mean Kendall's tau per embedding and a violin plot of the cluster assignment NMI will also be printed. # To see what the mean embedding with the mean cluster assignment is, we can print the plot created print(stability_results$plot) # For statistics on each stage, we can extract the following: embedding_statistics <- stability_results$emb_stats cluster_statistics <- stability_results$clust_stats # Look at the mean Kendall's tau (correlation between embeddings) for each embedding print(embedding_statistics$mean_per_embedding) # Also look at the overall mean of means Kendall's tau print(embedding_statistics$mean) # Look at the mean NMI (similar to correlation) for each cluster assignment print(cluster_statistics$per_index_means) # Can also see the mean NMI over all the cluster assignments print(mean(cluster_statistics$per_index_means)) ## ----------------------------------------------------------------------------- # Create a list of embeddings using the input pca emb_list <- scStability::createEmb(input_pca, n_runs = 15, method = 'umap', n_cores = 8) # Compare the generated embedding list and look at the output statistics. A density plot will be generated which shows the density of the mean Kendall's tau for each embedding. embedding_stats <- scStability::compareEmb(emb_list, n_cores = 8) # Create and compare cluster assignments. A violin plot showing the distribution of mean NMI values cluster_stats <- scStability::clustStable(n_runs = 15, seurat_obj = seurat_obj, method = 'louvain', n_cores = 8)