## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL, message = FALSE ) ## ----eval = FALSE------------------------------------------------------------- # if(!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("ProteinGymR") ## ----message = FALSE---------------------------------------------------------- library(ProteinGymR) library(tidyr) library(dplyr) library(stringr) library(ggplot2) library(ComplexHeatmap) library(AnnotationHub) ## ----import dms--------------------------------------------------------------- dms_data <- dms_substitutions() ## ----view studies------------------------------------------------------------- head(names(dms_data)) ## ----view assay--------------------------------------------------------------- head(dms_data[[1]]) ## ----queryEH------------------------------------------------------------------ eh <- ExperimentHub::ExperimentHub() AnnotationHub::query(eh, "ProteinGymR") dms_metadata <- eh[["EH9607"]] names(dms_metadata) ## ----ACE2--------------------------------------------------------------------- ACE2 <- dms_data[["ACE2_HUMAN_Chan_2020"]] ## ----heatmap, warning = FALSE, fig.wide = TRUE-------------------------------- ACE2 <- ACE2 |> dplyr::mutate( ref = str_sub(ACE2$mutant, 1, 1), pos = as.integer( gsub(".*?([0-9]+).*", "\\1", ACE2$mutant) ), alt = str_sub(ACE2$mutant, -1) ) ACE2 <- ACE2 |> select("ref", "pos", "alt", "DMS_score") head(ACE2) ## Reshape the data to wide format ACE2_wide <- ACE2 |> select(-ref) |> pivot_wider(names_from = alt, values_from = DMS_score) |> arrange(pos) ## Subset to first 100 position ACE2_wide <- ACE2_wide |> filter(pos <= 100) head(ACE2_wide) ## Convert to matrix pos <- ACE2_wide$pos alt <- colnames(ACE2_wide) alt <- alt[-c(1)] heatmap_matrix <- ACE2_wide |> select(2:length(ACE2_wide)) |> as.matrix() ## Set amino acid position as rownames of matrix rownames(heatmap_matrix) <- pos ## Transpose so position is x-axis heatmap_matrix <- t(heatmap_matrix) ## Reorder rows based on physiochemical properties phyiochem_order <- "DEKRHNQSTPGAVILMCFYW" phyiochem_order <- unlist(strsplit(phyiochem_order, split = "")) reordered_matrix <- heatmap_matrix[match(phyiochem_order, rownames(heatmap_matrix)), ] ## Create the heatmap ComplexHeatmap::Heatmap(reordered_matrix, name = "DMS Score", cluster_rows = FALSE, cluster_columns = FALSE, show_row_names = TRUE, show_column_names = TRUE) ## ----available_models--------------------------------------------------------- available_models() ## ----warning=FALSE, fig.wide = TRUE------------------------------------------- benchmark_models(metric = "AUC", models = c("GEMME", "CARP_600K", "ESM_1b", "EVmutation", "ProtGPT2")) ## ----sesh info---------------------------------------------------------------- sessionInfo()