## ----setup, include = FALSE, fig.show='hide'---------------------------------- knitr::knit_hooks$set(pngquant = knitr::hook_pngquant) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = "ragg_png", dpi = 72, fig.retina = 2, fig.align = "center", out.width = "100%", pngquant = "--speed=1 --quality=1-5" ) ## ----message=FALSE, fig.show='hide'------------------------------------------- # Load library library(scDiagnostics) # Load datasets data("reference_data") data("query_data") # Set seed for reproducibility set.seed(0) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Perform anomaly detection anomaly_output <- detectAnomaly(reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", pc_subset = 1:5, n_tree = 500, anomaly_treshold = 0.6) # Plot the output for the "CD4" cell type plot(anomaly_output, cell_type = "CD4", pc_subset = 1:5, data_type = "query") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Perform anomaly detection anomaly_output <- detectAnomaly(reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", pc_subset = 1:5, n_tree = 500, anomaly_treshold = 0.6) # Plot the global anomaly scores plot(anomaly_output, cell_type = NULL, # Plot all cell types pc_subset = 1:5, data_type = "query") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Detect anomalies and select top anomalies for further analysis anomaly_output <- detectAnomaly(reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", pc_subset = 1:10, n_tree = 500, anomaly_treshold = 0.5) # Select the top 6 anomalies based on the anomaly scores top6_anomalies <- names(sort(anomaly_output$Combined$reference_anomaly_scores, decreasing = TRUE)[1:6]) # Calculate cosine similarity between the top anomalies and top 25 PCs cosine_similarities <- calculateCellSimilarityPCA(reference_data, cell_names = top6_anomalies, pc_subset = 1:25, n_top_vars = 50) # Plot the cosine similarities across PCs round(cosine_similarities[, paste0("PC", 15:25)], 2) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Compute cell distances distance_data <- calculateCellDistances( query_data = query_data, reference_data = reference_data, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:10 ) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Identify top 6 anomalies for CD4 cells cd4_anomalies <- detectAnomaly( reference_data = reference_data, query_data = query_data, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:10, n_tree = 500, anomaly_treshold = 0.5) # Top 6 CD4 anomalies cd4_top6_anomalies <- names(sort(cd4_anomalies$CD4$query_anomaly_scores, decreasing = TRUE)[1:6]) # Plot distances for CD4 and CD8 cells plot(distance_data, ref_cell_type = "CD4", cell_names = cd4_top6_anomalies) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot distances for CD8 cells plot(distance_data, ref_cell_type = "CD8", cell_names = cd4_top6_anomalies) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Compute similarity measures for top 6 CD4 anomalies overlap_measures <- calculateCellDistancesSimilarity( query_data = query_data, reference_data = reference_data, cell_names = cd4_top6_anomalies, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:10) overlap_measures ## ----SessionInfo, echo=FALSE, message=FALSE, warning=FALSE, comment=NA, fig.show='hide'---- options(width = 80) sessionInfo()