params <- list(test = FALSE) ## ----setup, include=FALSE, message=FALSE-------------------------------------- knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) library(BiocStyle) ## ---- eval=FALSE-------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("spicyWorkflow") ## ----load libraries, echo=FALSE, results="hide", warning=FALSE---------------- suppressPackageStartupMessages({ library(cytomapper) library(dplyr) library(ggplot2) library(simpleSeg) library(FuseSOM) library(ggpubr) library(scater) library(spicyR) library(ClassifyR) library(lisaClust) }) ## ---- eval=FALSE-------------------------------------------------------------- # library(cytomapper) # library(dplyr) # library(ggplot2) # library(simpleSeg) # library(FuseSOM) # library(ggpubr) # library(scater) # library(spicyR) # library(ClassifyR) # library(lisaClust) ## ----set parameters----------------------------------------------------------- use_mc <- FALSE if (use_mc) { nCores <- max(parallel::detectCores() - 1, 1) } else { nCores <- 2 } BPPARAM <- simpleSeg:::generateBPParam(nCores) theme_set(theme_classic()) ## ----load images-------------------------------------------------------------- pathToImages <- system.file("extdata/images", package = "spicyWorkflow") # Store images in a CytoImageList on_disk as h5 files to save memory. images <- cytomapper::loadImages( pathToImages, single_channel = TRUE, on_disk = TRUE, h5FilesPath = HDF5Array::getHDF5DumpDir(), BPPARAM = BPPARAM ) gc() ## ----load clincal data-------------------------------------------------------- # Read in clinical data, manipulate imageID and select columns clinical <- read.csv( system.file( "extdata/1-s2.0-S0092867421014860-mmc1.csv", package = "spicyWorkflow" ) ) clinical <- clinical |> mutate(imageID = paste0( "Point", PointNumber, "_pt", Patient_ID, "_", TMAD_Patient )) image_idx <- grep("normal", clinical$Tissue_Type) clinical$imageID[image_idx] <- paste0(clinical$imageID[image_idx], "_Normal") clinicalVariables <- c( "imageID", "Patient_ID", "Status", "Age", "SUBTYPE", "PAM50", "Treatment", "DCIS_grade", "Necrosis" ) rownames(clinical) <- clinical$imageID ## ----add clinical data-------------------------------------------------------- # Add the clinical data to mcols of images. mcols(images) <- clinical[names(images), clinicalVariables] ## ----segment------------------------------------------------------------------ # Generate segmentation masks masks <- simpleSeg( images, nucleus = c("HH3"), cellBody = "dilate", transform = "sqrt", sizeSelection = 40, discSize = 2, pca = TRUE, cores = nCores ) ## ----visualise segmentation--------------------------------------------------- # Visualise segmentation performance one way. EBImage::display(colorLabels(masks[[1]])) ## ----------------------------------------------------------------------------- # Visualise segmentation performance another way. cytomapper::plotPixels( image = images[1], mask = masks[1], img_id = "imageID", colour_by = c("PanKRT", "GLUT1", "HH3", "CD3", "CD20"), display = "single", colour = list( HH3 = c("black", "blue"), CD3 = c("black", "purple"), CD20 = c("black", "green"), GLUT1 = c("black", "red"), PanKRT = c("black", "yellow") ), bcg = list( HH3 = c(0, 1, 1.5), CD3 = c(0, 1, 1.5), CD20 = c(0, 1, 1.5), GLUT1 = c(0, 1, 1.5), PanKRT = c(0, 1, 1.5) ), legend = NULL ) ## ----------------------------------------------------------------------------- # Summarise the expression of each marker in each cell cells <- cytomapper::measureObjects( masks, images, img_id = "imageID", BPPARAM = BPPARAM ) ## ---- fig.width=5, fig.height=5----------------------------------------------- # Extract marker data and bind with information about images df <- as.data.frame(cbind(colData(cells), t(assay(cells, "counts")))) # Plots densities of CK7 for each image. ggplot(df, aes(x = CK7, colour = imageID)) + geom_density() + theme(legend.position = "none") ## ---- fig.width=5, fig.height=5----------------------------------------------- # Transform and normalise the marker expression of each cell type. # Use a square root transform, then trimmed the 99 quantile cells <- normalizeCells(cells, transformation = "asinh", method = c("trim99", "minMax", "PC1"), assayIn = "counts", cores = nCores ) # Extract normalised marker information. norm_df <- as.data.frame(cbind(colData(cells), t(assay(cells, "norm")))) # Plots densities of normalised CK7 for each image. ggplot(norm_df, aes(x = CK7, colour = imageID)) + geom_density() + theme(legend.position = "none") ## ----FuseSOM------------------------------------------------------------------ # The markers used in the original publication to gate cell types. useMarkers <- c( "PanKRT", "ECAD", "CK7", "VIM", "FAP", "CD31", "CK5", "SMA", "CD45", "CD4", "CD3", "CD8", "CD20", "CD68", "CD14", "CD11c", "HLADRDPDQ", "MPO", "Tryptase" ) # Set seed. set.seed(51773) # Generate SOM and cluster cells into 20 groups. cells <- runFuseSOM( cells, markers = useMarkers, assay = "norm", numClusters = 24 ) ## ----------------------------------------------------------------------------- # Visualise marker expression in each cluster. scater::plotGroupedHeatmap( cells, features = useMarkers, group = "clusters", exprs_values = "norm", center = TRUE, scale = TRUE, zlim = c(-3, 3), cluster_rows = FALSE ) ## ----------------------------------------------------------------------------- # Generate metrics for estimating the number of clusters. # As I've already run runFuseSOM I don't need to run generateSOM(). cells <- estimateNumCluster(cells, kSeq = 2:30) optiPlot(cells, method = "gap") ## ----------------------------------------------------------------------------- # Check cluster frequencies. colData(cells)$clusters |> table() |> sort() ## ----------------------------------------------------------------------------- set.seed(51773) # Perform dimension reduction using UMP. cells <- scater::runUMAP( cells, subset_row = useMarkers, exprs_values = "norm" ) # Select a subset of images to plot. someImages <- unique(colData(cells)$imageID)[c(1, 10, 20, 40, 50, 60)] # UMAP by cell type cluster. scater::plotReducedDim( cells[, colData(cells)$imageID %in% someImages], dimred = "UMAP", colour_by = "clusters" ) ## ----------------------------------------------------------------------------- # Select cells which belong to individuals with progressor status. cellsToUse <- cells$Status %in% c("nonprogressor", "progressor") # Perform simple wicoxon rank sum tests on the columns of the proportion matrix. testProp <- colTest(cells[, cellsToUse], condition = "Status", feature = "clusters" ) testProp ## ----------------------------------------------------------------------------- imagesToUse <- rownames(clinical)[clinical[, "Status"] %in% c("nonprogressor", "progressor")] prop <- getProp(cells, feature = "clusters") clusterToUse <- rownames(testProp)[1] boxplot(prop[imagesToUse, clusterToUse] ~ clinical[imagesToUse, "Status"]) ## ----------------------------------------------------------------------------- # Test for changes in pair-wise spatial relationships between cell types. spicyTest <- spicy( cells[, cellsToUse], condition = "Status", cellType = "clusters", imageID = "imageID", spatialCoords = c("m.cx", "m.cy"), Rs = c(20, 50, 100), sigma = 50, BPPARAM = BPPARAM ) topPairs(spicyTest, n = 10) ## ----------------------------------------------------------------------------- # Visualise which relationships are changing the most. signifPlot( spicyTest, breaks = c(-1.5, 3, 0.5) ) ## ----------------------------------------------------------------------------- set.seed(51773) # Cluster cells into spatial regions with similar composition. cells <- lisaClust( cells, k = 5, Rs = c(20, 50, 100), sigma = 50, spatialCoords = c("m.cx", "m.cy"), cellType = "clusters", BPPARAM = BPPARAM ) ## ---- fig.height=5, fig.width=5----------------------------------------------- # Visualise the enrichment of each cell type in each region regionMap(cells, cellType = "clusters", limit = c(0.2, 5)) ## ----------------------------------------------------------------------------- # Extract cell information and filter to specific image. df <- colData(cells) |> as.data.frame() |> filter(imageID == "Point2206_pt1116_31620") # Colour cells by their region. ggplot(df, aes(x = m.cx, y = m.cy, colour = region)) + geom_point() ## ----eval = FALSE------------------------------------------------------------- # # Use hatching to visualise regions and cell types. # hatchingPlot( # cells, # useImages = "Point2206_pt1116_31620", # cellType = "clusters", # spatialCoords = c("m.cx", "m.cy") # ) ## ----------------------------------------------------------------------------- # Use hatching to visualise regions and cell types. # Relabel the hatching of the regions. hatchingPlot( cells, useImages = "Point2206_pt1116_31620", cellType = "clusters", spatialCoords = c("m.cx", "m.cy"), window = "square", nbp = 300, line.spacing = 41 ) + scale_region_manual(values = c( region_1 = 2, region_2 = 1, region_3 = 5, region_4 = 4, region_5 = 3 )) + guides(colour = guide_legend(ncol = 2)) ## ----------------------------------------------------------------------------- # Test if the proportion of each region is associated # with progression status. testRegion <- colTest( cells[, cellsToUse], feature = "region", condition = "Status" ) testRegion ## ----message=FALSE, warning=FALSE--------------------------------------------- # Create list to store data.frames data <- list() # Add proportions of each cell type in each image data[["props"]] <- getProp(cells, "clusters") # Add pair-wise associations data[["dist"]] <- getPairwise( cells, spatialCoords = c("m.cx", "m.cy"), cellType = "clusters", Rs = c(20, 50, 100), sigma = 50, BPPARAM = BPPARAM ) data[["dist"]] <- as.data.frame(data[["dist"]]) # Add proportions of each region in each image # to the list of dataframes. data[["regions"]] <- getProp(cells, "region") # Subset data images with progression status and NA clinical variables. measurements <- lapply(data, function(x) x[imagesToUse, ]) # Set seed set.seed(51773) # Perform cross-validation of an elastic net model # with 100 repeats of 5-fold cross-validation. cv <- crossValidate( measurements = measurements, outcome = clinical[imagesToUse, "Status"], classifier = "GLM", nFolds = 5, nRepeats = 100, nCores = nCores ) ## ----------------------------------------------------------------------------- # Calculate AUC for each cross-validation repeat and plot. performancePlot( cv, metric = "AUC", characteristicsList = list(x = "Assay Name") ) ## ----------------------------------------------------------------------------- sessionInfo()