## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE, results = 'markup', dev = 'png', # raster images to reduce HTML size dpi = 72, fig.retina = 1 ) ## ----eval=FALSE--------------------------------------------------------------- # # Install from Bioconductor # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("markeR") ## ----echo=FALSE--------------------------------------------------------------- library(markeR) ## ----example-gene-sets-vector, echo=FALSE------------------------------------- # Gene set without direction gene_set1 <- c("GeneA", "GeneB", "GeneC", "GeneD") # Gene set with direction gene_set2 <- data.frame( gene = c("GeneX", "GeneY", "GeneZ"), direction = c(1, -1, 1), stringsAsFactors = FALSE ) # Combine both into a named list gene_sets <- list( Set1 = gene_set1, Set2 = gene_set2 ) ## ----show-gene-set, echo=TRUE------------------------------------------------- # Example gene_sets ## ----loadsig------------------------------------------------------------------ # Load example gene sets data(genesets_example) ## ----loaddata----------------------------------------------------------------- # Load example expression data data(counts_example) counts_example[1:5, 1:5] ## ----load_metadata------------------------------------------------------------ # Load example metadata data(metadata_example) head(metadata_example) ## ----fig.width=8, fig.height=4, warning=FALSE, message=FALSE------------------ # Quantification approach: scores # Method: log2-median PlotScores(data = counts_example, metadata = metadata_example, gene_sets = genesets_example, Variable="Condition", method ="logmedian", nrow = 1, pointSize=4, title="Marthandan et al. 2016", widthTitle = 24, labsize=12, titlesize = 12) ## ----warning=FALSE, message=FALSE--------------------------------------------- Overall_Scores <- PlotScores(data = counts_example, metadata = metadata_example, gene_sets=genesets_example, Variable="Condition", method ="all", ncol = NULL, nrow = 1, widthTitle=30, limits = c(0,3.5), title="Marthandan et al. 2016", titlesize = 10, ColorValues = list(heatmap=c("#F9F4AE", "#B44141"), volcano=signature_colors <- c( HernandezSegura = "#A07395", REACTOME_CELLULAR_SENESCENCE = "#6B8E9E", LiteratureMarkers = "#CA7E45" ) ), mode="simple", widthlegend=30, sig_threshold=0.05, cohen_threshold=0.6, pointSize=6, colorPalette="Paired") ## ----Overall_Scores_heatmap, fig.width=10, fig.height=2, warning=FALSE, message=FALSE---- Overall_Scores$heatmap ## ----Overall_Scores_volcano, fig.width=6, fig.height=3, warning=FALSE, message=FALSE---- Overall_Scores$volcano ## ----roc_scores, fig.width=10, fig.height=3, warning=FALSE, message=FALSE----- ROC_Scores(data = counts_example, metadata = metadata_example, gene_sets=genesets_example, method = "all", variable ="Condition", colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"), grid = TRUE, spacing_annotation=0.3, ncol=NULL, nrow=1, mode = "simple", widthTitle = 28, titlesize = 10, title="Marthandan et al. 2016") ## ----FDRSim, fig.width=12, fig.height=3,warning=FALSE, message=FALSE---------- set.seed("123456") FPR_Simulation(data = counts_example, metadata = metadata_example, original_signatures = genesets_example, gene_list = row.names(counts_example), number_of_sims = 100, title = "Marthandan et al. 2016", widthTitle = 30, Variable = "Condition", titlesize = 12, pointSize = 5, labsize = 10, mode = "simple", ColorValues=NULL, ncol=NULL, nrow=1 ) ## ----DEGs, fig.width=10, fig.height=3, warning=FALSE, message=FALSE----------- # Build design matrix from variables (Condition) and apply a contrast. # The design matrix is constructed automatically using the variable "Condition". DEGs <- calculateDE(data = counts_example, metadata = metadata_example, variables = "Condition", contrasts = c("Senescent - Proliferative")) DEGs$`Senescent-Proliferative`[1:5,] ## ----DEGsvolcano, fig.width=10, fig.height=3,warning=FALSE, message=FALSE----- # Change order: gene sets in columns, contrast in rows plotVolcano(DEGs, genes = genesets_example, N = NULL, x = "logFC", y = "-log10(adj.P.Val)", pointSize = 2, color = "#6489B4", highlightcolor = "#05254A", highlightcolor_upreg = "#038C65", highlightcolor_downreg = "#8C0303",nointerestcolor = "#B7B7B7", threshold_y = NULL, threshold_x = NULL, xlab = NULL, ylab = NULL, ncol = NULL, nrow = NULL, title = "Marthandan et al. 2016", labsize = 10, widthlabs = 24, invert = TRUE) ## ----GSEA, warning=FALSE, message=FALSE--------------------------------------- GSEAresults <- runGSEA(DEGList = DEGs, gene_sets = genesets_example, stat = NULL, ContrastCorrection = FALSE) GSEAresults ## ----GSEA_plotenrichment, fig.width=10, fig.height=3, warning=FALSE, message=FALSE---- plotGSEAenrichment(GSEA_results=GSEAresults, DEGList=DEGs, gene_sets=genesets_example, widthTitle=40, grid = TRUE, titlesize = 10, nrow=1, ncol=3) ## ----GSEA_lollypop, fig.width=5, fig.height=4, out.width="60%", warning=FALSE, message=FALSE---- plotNESlollipop(GSEA_results=GSEAresults, saturation_value=NULL, nonsignif_color = "#F4F4F4", signif_color = "red", sig_threshold = 0.05, grid = FALSE, nrow = NULL, ncol = NULL, widthlabels=13, title=NULL, titlesize=12) ## ----------------------------------------------------------------------------- HernandezSegura_GeneSet <- list(HernandezSegura=genesets_example$HernandezSegura) ## ----load_metadata_discovery-------------------------------------------------- data(metadata_example) set.seed("123456") metadata_example$Researcher <- sample(c("John","Ana","Francisca"),39, replace = TRUE) metadata_example$DaysToSequencing <- sample(c(1:20),39, replace = TRUE) head(metadata_example) ## ----GSEA_varassoc, fig.width=6, fig.height=6,warning=FALSE, message=FALSE , out.width="70%"---- VariableAssociation( data = counts_example, metadata = metadata_example, method = "GSEA", cols = c("Condition","Researcher","DaysToSequencing"), mode = "simple", gene_set = HernandezSegura_GeneSet, saturation_value = NULL, nonsignif_color = "white", signif_color = "red", sig_threshold = 0.05, widthlabels = 30, labsize = 10, titlesize = 14, pointSize = 5 ) $plot ## ----variableassoc_score_sen, fig.width=7, fig.height=7,warning=FALSE, message=FALSE , out.width="70%"---- VariableAssociation(data = counts_example, metadata = metadata_example, method = "logmedian", cols = c("Condition","Researcher","DaysToSequencing"), gene_set = HernandezSegura_GeneSet, mode="simple", nonsignif_color = "white", signif_color = "red", saturation_value=NULL,sig_threshold = 0.05, widthlabels=30, labsize=10, titlesize=14, pointSize=5, discrete_colors=NULL, continuous_color = "#8C6D03", color_palette = "Set2")$Overall ## ----fig.width=6, fig.height=8------------------------------------------------ # Example data signature1 <- c("TP53", "BRCA1", "MYC", "EGFR", "CDK2") signature2 <- c("ATXN2", "FUS", "MTOR", "CASP3") signature_list <- list( "User_Apoptosis" = c("TP53", "CASP3", "BAX"), "User_CellCycle" = c("CDK2", "CDK4", "CCNB1", "MYC"), "User_DNARepair" = c("BRCA1", "RAD51", "ATM"), "User_MTOR" = c("MTOR", "AKT1", "RPS6KB1") ) geneset_similarity( signatures = list(Sig1 = signature1, Sig2 = signature2), other_user_signatures = signature_list, collection = "C2", subcollection = "CP:KEGG_LEGACY", metric = "odds_ratio", # Define gene universe (e.g., genes from HPA or your dataset) universe = unique(c( signature1, signature2, unlist(signature_list), msigdbr::msigdbr(species = "Homo sapiens", category = "C2")$gene_symbol )), or_threshold = 100, width_text=50, pval_threshold = 0.05 )$plot ## ----------------------------------------------------------------------------- sessionInfo()