params <- list(test = FALSE) ## ----setup, include=FALSE, message=FALSE-------------------------------------- knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) library(BiocStyle) ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("spicyWorkflow") ## ----load libraries, echo=FALSE, results="hide", warning=FALSE---------------- suppressPackageStartupMessages({ library(cytomapper) library(dplyr) library(ggplot2) library(simpleSeg) library(FuseSOM) library(ggpubr) library(scater) library(spicyR) library(ClassifyR) library(lisaClust) library(Statial) library(tidySingleCellExperiment) library(SpatialExperiment) library(SpatialDatasets) }) ## ----eval=FALSE--------------------------------------------------------------- # library(cytomapper) # library(dplyr) # library(ggplot2) # library(simpleSeg) # library(FuseSOM) # library(ggpubr) # library(scater) # library(spicyR) # library(ClassifyR) # library(lisaClust) # library(Statial) # library(tidySingleCellExperiment) # library(SpatialExperiment) # library(SpatialDatasets) ## ----set parameters----------------------------------------------------------- use_mc <- TRUE if (use_mc) { nCores <- max(parallel::detectCores()/2, 1) } else { nCores <- 2 } BPPARAM <- simpleSeg:::generateBPParam(nCores) theme_set(theme_classic()) ## ----load images-------------------------------------------------------------- pathToImages <- SpatialDatasets::Ferguson_Images() tmp <- tempfile() unzip(pathToImages, exdir = tmp) # Store images in a CytoImageList on_disk as h5 files to save memory. images <- cytomapper::loadImages( tmp, single_channel = TRUE, on_disk = TRUE, h5FilesPath = HDF5Array::getHDF5DumpDir(), BPPARAM = BPPARAM ) mcols(images) <- S4Vectors::DataFrame(imageID = names(images)) ## ----------------------------------------------------------------------------- cn <- channelNames(images) # Read in channel names head(cn) cn <- sub(".*_", "", cn) # Remove preceding letters cn <- sub(".ome", "", cn) # Remove the .ome head(cn) channelNames(images) <- cn # Reassign channel names ## ----------------------------------------------------------------------------- head(names(images)) nam <- sapply(strsplit(names(images), "_"), `[`, 3) head(nam) names(images) <- nam # Reassigning image names mcols(images)[["imageID"]] <- nam # Reassigning image names ## ----------------------------------------------------------------------------- masks <- simpleSeg(images, nucleus = c("HH3"), pca = TRUE, cellBody = "dilate", discSize = 3, sizeSelection = 20, transform = "sqrt", tissue = c("panCK", "CD45", "HH3"), cores = nCores ) ## ----visualise segmentation--------------------------------------------------- EBImage::display(colorLabels(masks[[1]])) ## ----------------------------------------------------------------------------- plotPixels(image = images["F3"], mask = masks["F3"], img_id = "imageID", colour_by = c("HH3"), display = "single", colour = list(HH3 = c("black","blue")), legend = NULL, bcg = list( HH3 = c(1, 1, 2) )) ## ----------------------------------------------------------------------------- plotPixels(image = images["F3"], mask = masks["F3"], img_id = "imageID", colour_by = c("HH3", "CD31", "FX111A"), display = "single", colour = list(HH3 = c("black","blue"), CD31 = c("black", "red"), FX111A = c("black", "green") ), legend = NULL, bcg = list( HH3 = c(1, 1, 2), CD31 = c(0, 1, 2), FX111A = c(0, 1, 1.5) )) ## ----------------------------------------------------------------------------- # Summarise the expression of each marker in each cell cells <- cytomapper::measureObjects(masks, images, img_id = "imageID", return_as = "spe", BPPARAM = BPPARAM) spatialCoordsNames(cells) <- c("x", "y") ## ----------------------------------------------------------------------------- clinical <- read.csv( system.file( "extdata/clinicalData_TMA1_2021_AF.csv", package = "spicyWorkflow" ) ) rownames(clinical) <- clinical$imageID clinical <- clinical[names(images), ] ## ----------------------------------------------------------------------------- colData(cells) <- cbind(colData(cells), clinical[cells$imageID, ]) ## ----eval=FALSE--------------------------------------------------------------- # save(cells, file = "spe_Ferguson_2022.rda") ## ----eval=FALSE--------------------------------------------------------------- # load(system.file("extdata/cells.rda", package = "spicyWorkflow")) ## ----fig.width=5, fig.height=5------------------------------------------------ # Plot densities of CD3 for each image. cells |> join_features(features = rownames(cells), shape = "wide", assay = "counts") |> ggplot(aes(x = CD3, colour = imageID)) + geom_density() + theme(legend.position = "none") ## ----------------------------------------------------------------------------- # Usually we specify a subset of the original markers which are informative to separating out distinct cell types for the UMAP and clustering. ct_markers <- c("podoplanin", "CD13", "CD31", "panCK", "CD3", "CD4", "CD8a", "CD20", "CD68", "CD16", "CD14", "HLADR", "CD66a") # ct_markers <- c("podoplanin", "CD13", "CD31", # "panCK", "CD3", "CD4", "CD8a", # "CD20", "CD68", "CD14", "CD16", # "CD66a") set.seed(51773) # Perform dimension reduction using UMAP. cells <- scater::runUMAP( cells, subset_row = ct_markers, exprs_values = "counts" ) # Select a subset of images to plot. someImages <- unique(cells$imageID)[c(1, 5, 10, 20, 30, 40)] # UMAP by imageID. scater::plotReducedDim( cells[, cells$imageID %in% someImages], dimred = "UMAP", colour_by = "imageID" ) ## ----fig.width=5, fig.height=5------------------------------------------------ # Leave out the nuclei markers from our normalisation process. useMarkers <- rownames(cells)[!rownames(cells) %in% c("DNA1", "DNA2", "HH3")] # Transform and normalise the marker expression of each cell type. cells <- normalizeCells(cells, markers = useMarkers, transformation = NULL, method = c("trim99", "mean", "PC1"), assayIn = "counts", cores = nCores ) # Plot densities of CD3 for each image cells |> join_features(features = rownames(cells), shape = "wide", assay = "norm") |> ggplot(aes(x = CD3, colour = imageID)) + geom_density() + theme(legend.position = "none") ## ----------------------------------------------------------------------------- set.seed(51773) # Perform dimension reduction using UMAP. cells <- scater::runUMAP( cells, subset_row = ct_markers, exprs_values = "norm", name = "normUMAP" ) someImages <- unique(cells$imageID)[c(1, 5, 10, 20, 30, 40)] # UMAP by imageID. scater::plotReducedDim( cells[, cells$imageID %in% someImages], dimred = "normUMAP", colour_by = "imageID" ) ## ----FuseSOM------------------------------------------------------------------ # Set seed. set.seed(51773) # Generate SOM and cluster cells into 10 groups cells <- runFuseSOM( cells, markers = ct_markers, assay = "norm", numClusters = 10 ) ## ----------------------------------------------------------------------------- cells <- estimateNumCluster(cells, kSeq = 2:30) optiPlot(cells, method = "gap") ## ----------------------------------------------------------------------------- # Visualise marker expression in each cluster. scater::plotGroupedHeatmap( cells, features = ct_markers, group = "clusters", exprs_values = "norm", center = TRUE, scale = TRUE, zlim = c(-3, 3), cluster_rows = FALSE, block = "clusters" ) ## ----------------------------------------------------------------------------- cells <- cells |> mutate(cellType = case_when( clusters == "cluster_1" ~ "GC", # Granulocytes clusters == "cluster_2" ~ "MC", # Myeloid cells clusters == "cluster_3" ~ "SC", # Squamous cells clusters == "cluster_4" ~ "EP", # Epithelial cells clusters == "cluster_5" ~ "SC", # Squamous cells clusters == "cluster_6" ~ "TC_CD4", # CD4 T cells clusters == "cluster_7" ~ "BC", # B cells clusters == "cluster_8" ~ "EC", # Endothelial cells clusters == "cluster_9" ~ "TC_CD8", # CD8 T cells clusters == "cluster_10" ~ "DC" # Dendritic cells )) ## ----------------------------------------------------------------------------- reducedDim(cells, "spatialCoords") <- spatialCoords(cells) cells |> filter(imageID == "F3") |> plotReducedDim("spatialCoords", colour_by = "cellType") ## ----------------------------------------------------------------------------- # Check cell type frequencies. cells$cellType |> table() |> sort() ## ----------------------------------------------------------------------------- # UMAP by cell type scater::plotReducedDim( cells[, cells$imageID %in% someImages], dimred = "normUMAP", colour_by = "cellType" ) ## ----------------------------------------------------------------------------- # Perform simple student's t-tests on the columns of the proportion matrix. testProp <- colTest(cells, condition = "group", feature = "cellType", type = "ttest") head(testProp) ## ----------------------------------------------------------------------------- prop <- getProp(cells, feature = "cellType") prop[1:5, 1:5] ## ----------------------------------------------------------------------------- clusterToUse <- rownames(testProp)[1] prop |> select(all_of(clusterToUse)) |> tibble::rownames_to_column("imageID") |> left_join(clinical, by = "imageID") |> ggplot(aes(x = group, y = .data[[clusterToUse]], fill = group)) + geom_boxplot() ## ----eval=FALSE--------------------------------------------------------------- # load(system.file("extdata/computed_cells.rda", package = "spicyWorkflow")) ## ----------------------------------------------------------------------------- spicyTest <- spicy(cells, condition = "group", cellTypeCol = "cellType", imageIDCol = "imageID", Rs = 1:10*10, sigma = 50, BPPARAM = BPPARAM) topPairs(spicyTest, n = 10) ## ----------------------------------------------------------------------------- # Visualise which relationships are changing the most. signifPlot( spicyTest, breaks = c(-1.5, 1.5, 0.5) ) ## ----------------------------------------------------------------------------- spicyBoxPlot(spicyTest, from = "SC", to = "GC") ## ----------------------------------------------------------------------------- spicyBoxPlot(spicyTest, rank = 1) ## ----------------------------------------------------------------------------- set.seed(51773) # Cluster cells into spatial regions with similar composition. cells <- lisaClust( cells, k = 4, sigma = 50, cellType = "cellType", BPPARAM = BPPARAM ) ## ----fig.height=5, fig.width=5------------------------------------------------ # Visualise the enrichment of each cell type in each region regionMap(cells, cellType = "cellType", limit = c(0.2, 2)) ## ----message=FALSE, warning=FALSE--------------------------------------------- cells |> filter(imageID == "F3") |> plotReducedDim("spatialCoords", colour_by = "region") ## ----------------------------------------------------------------------------- # Use hatching to visualise regions and cell types. hatchingPlot( cells, useImages = "F3", cellType = "cellType", nbp = 300 ) ## ----------------------------------------------------------------------------- # Test if the proportion of each region is associated # with progression status. testRegion <- colTest( cells, feature = "region", condition = "group", type = "ttest" ) testRegion ## ----------------------------------------------------------------------------- cells$m.cx <- spatialCoords(cells)[,"x"] cells$m.cy <- spatialCoords(cells)[,"y"] cells <- getDistances(cells, maxDist = 200, nCores = nCores, cellType = "cellType", spatialCoords = c("m.cx", "m.cy") ) ## ----------------------------------------------------------------------------- p <- plotStateChanges( cells = cells, cellType = "cellType", spatialCoords = c("m.cx", "m.cy"), type = "distances", image = "F3", from = "SC", to = "EP", marker = "podoplanin", size = 1, shape = 19, interactive = FALSE, plotModelFit = FALSE, method = "lm" ) p ## ----------------------------------------------------------------------------- state_dist <- calcStateChanges( cells = cells, cellType = "cellType", type = "distances", assay = 2, nCores = nCores, minCells = 100 ) head(state_dist[state_dist$imageID == "F3",], n = 10) ## ----------------------------------------------------------------------------- # Preparing outcome vector outcome <- cells$group[!duplicated(cells$imageID)] names(outcome) <- cells$imageID[!duplicated(cells$imageID)] # Preparing features for Statial distMat <- prepMatrix(state_dist) distMat <- distMat[names(outcome), ] # Remove some very small values distMat <- distMat[, colMeans(abs(distMat) > 0.0001) > .8] survivalResults <- colTest(distMat, outcome, type = "ttest") head(survivalResults) ## ----------------------------------------------------------------------------- fergusonTree <- treekoR::getClusterTree(t(assay(cells, "norm")), cells$cellType, hierarchy_method="hopach") parent1 <- c("TC_CD8", "TC_CD4", "DC") parent2 <- c("BC", "GC") parent3 <- c(parent1, parent2) parent4 <- c("MC", "EP", "SC") parent5 <- c(parent4, "EC") all = c(parent1, parent2, parent3, parent4, parent5) treeDf = Statial::parentCombinations(all, parent1, parent2, parent3, parent4, parent5) fergusonTree$clust_tree |> plot() ## ----------------------------------------------------------------------------- kontext <- Kontextual( cells = cells, cellType = "cellType", spatialCoords = c("m.cx", "m.cy"), parentDf = treeDf, r = 50, cores = nCores ) ## ----------------------------------------------------------------------------- # Converting Kontextual result into data matrix kontextMat <- prepMatrix(kontext) # Replace NAs with 0 kontextMat[is.na(kontextMat)] <- 0 survivalResults <- spicyR::colTest(kontextMat, outcome, type = "ttest") head(survivalResults) ## ----------------------------------------------------------------------------- # Create list to store data.frames data <- list() # Add proportions of each cell type in each image data[["Proportions"]] <- getProp(cells, "cellType") # Add pair-wise associations spicyMat <- bind(spicyTest) spicyMat[is.na(spicyMat)] <- 0 spicyMat <- spicyMat |> select(!condition) |> tibble::column_to_rownames("imageID") data[["SpicyR"]] <- spicyMat # Add proportions of each region in each image # to the list of dataframes. data[["LisaClust"]] <- getProp(cells, "region") # Add SpatioMark features data[["SpatioMark"]] <- distMat # Add Kontextual features data[["Kontextual"]] <- kontextMat ## ----------------------------------------------------------------------------- # Set seed set.seed(51773) # Perform cross-validation of a random forest model # with 100 repeats of 5-fold cross-validation. cv <- crossValidate( measurements = data, outcome = outcome, classifier = "randomForest", nFolds = 5, nRepeats = 50, nCores = nCores ) ## ----------------------------------------------------------------------------- # Calculate AUC for each cross-validation repeat and plot. performancePlot( cv, metric = "AUC", characteristicsList = list(x = "Assay Name"), orderingList = list("Assay Name" = c("Proportions", "SpicyR", "LisaClust", "Kontextual", "SpatioMark")) ) ## ----------------------------------------------------------------------------- samplesMetricMap(cv) ## ----------------------------------------------------------------------------- sessionInfo()