## ----style, echo = FALSE, results = 'hide', warning=FALSE, message=FALSE------ BiocStyle::markdown() suppressPackageStartupMessages({ library(knitr) library(enrichViewNet) library(gprofiler2) library(ggplot2) }) ## Set it globally options(ggrepel.max.overlaps = Inf) set.seed(1214) ## ----graphDemo01, echo = FALSE, fig.align="center", fig.cap="A network where significant GO terms and genes are presented as nodes while edges connect each gene to its associated term(s).", out.width = '90%'---- knitr::include_graphics("demo01.jpeg") ## ----graphDemo02, echo=FALSE, fig.align="center", fig.cap="An enrichment map using significant Kegg terms where edges are connecting terms with overlapping genes.", out.width = '95%'---- knitr::include_graphics("demo_KEGG_emap_v03.jpg") ## ----installDemo01, eval=FALSE, warning=FALSE, message=FALSE------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("enrichViewNet") ## ----graphWorkflow, echo=FALSE, fig.align="center", fig.cap="The enrichViewNet general workflow", out.width = '100%'---- knitr::include_graphics("Figure_enrichViewNet_workflow_v05.jpg") ## ----graphListToGraph01, echo = FALSE, fig.align="left", fig.cap="From an enrichment list (A) to a Cytoscape network (B).", out.width = '100%'---- knitr::include_graphics("FromListToGraph_v03.jpg") ## ----gprofiler, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE---- ## Required library library(gprofiler2) ## The dataset of differentially expressed genes done between ## napabucasin treated and DMSO control parental (Froeling et al 2019) ## All genes tested are present data("parentalNapaVsDMSODEG") ## Retain significant results ## (absolute fold change superior to 1 and adjusted p-value inferior to 0.05) retained <- which(abs(parentalNapaVsDMSODEG$log2FoldChange) > 1 & parentalNapaVsDMSODEG$padj < 0.05) signRes <- parentalNapaVsDMSODEG[retained, ] ## Run one functional enrichment analysis using all significant genes ## The species is homo sapiens ("hsapiens") ## The g:SCS multiple testing correction method (Raudvere U et al 2019) ## The WikiPathways database is used ## Only the significant results are retained (significant=TRUE) ## The evidence codes are included in the results (evcodes=TRUE) ## A custom background included the tested genes is used gostres <- gprofiler2::gost( query=list(parental_napa_vs_DMSO=unique(signRes$EnsemblID)), organism="hsapiens", correction_method="g_SCS", sources=c("WP"), significant=TRUE, evcodes=TRUE, custom_bg=unique(parentalNapaVsDMSODEG$EnsemblID)) ## ----gostResult, echo=TRUE, eval=TRUE----------------------------------------- ## The 'gostres' object is a list of 2 entries ## The 'result' entry contains the enrichment results ## The 'meta' entry contains the metadata information ## Some columns of interest in the results gostres$result[1:4, c("query", "p_value", "term_size", "query_size", "intersection_size", "term_id")] ## The term names can be longer than the one shown gostres$result[19:22, c("term_id", "source", "term_name")] ## ----cytoscapeLogo01, echo = FALSE, fig.align="center", fig.cap="Cytoscape software logo.", out.width = '55%'---- knitr::include_graphics("cy3sticker.png") ## ----runCreateNetwork, echo=TRUE, eval=TRUE, message=FALSE-------------------- ## Load saved enrichment results between parental Napa vs DMSO data("parentalNapaVsDMSOEnrichment") ## Create network for REACTOME significant terms ## The 'removeRoot=TRUE' parameter removes the root term from the network ## The network will either by created in Cytoscape (if the application is open) ## or a CX file will be created in the temporary directory createNetwork(gostObject=parentalNapaVsDMSOEnrichment, source="REAC", removeRoot=TRUE, title="REACTOME_All", collection="parental_napa_vs_DMSO", fileName=file.path(tempdir(), "parentalNapaVsDMSOEnrichment.cx")) ## ----networkInCytoscape, echo=FALSE, fig.align="center", fig.cap="All reactome terms in a gene-term network loaded in Cytoscape.", out.width = '110%'---- knitr::include_graphics("cytoscape_reactome_all_parental_napa_vs_DMSO.png") ## ----runCreateNetworkSelected, echo=TRUE, eval=TRUE, message=FALSE------------ ## Load saved enrichment results between parental Napa vs DMSO data("parentalNapaVsDMSOEnrichment") ## List of terms of interest reactomeSelected <- c("REAC:R-HSA-9031628", "REAC:R-HSA-198725", "REAC:R-HSA-9614085", "REAC:R-HSA-9617828", "REAC:R-HSA-9614657", "REAC:R-HSA-73857", "REAC:R-HSA-74160", "REAC:R-HSA-381340") ## All enrichment results results <- parentalNapaVsDMSOEnrichment$result ## Retain selected results selectedRes <- results[which(results$term_id %in% reactomeSelected), ] ## Print the first selected terms selectedRes[, c("term_name")] ## ----runCreateNetworkSelected2, echo=TRUE, eval=TRUE, message=FALSE, fig.align="center", fig.cap="Enrichment map."---- ## Create network for REACTOME selected terms ## The 'source="TERM_ID"' parameter enable to specify a personalized ## list of terms of interest ## The network will either by created in Cytoscape (if the application is open) ## or a CX file will be created in the temporary directory createNetwork(gostObject=parentalNapaVsDMSOEnrichment, source="TERM_ID", termIDs=selectedRes$term_id, title="REACTOME_Selected", collection="parental_napa_vs_DMSO", fileName=file.path(tempdir(), "parentalNapaVsDMSO_REACTOME.cx")) ## ----networkInCytoscapeSelected, echo=FALSE, fig.align="center", fig.cap="Selected Reactome terms in a gene-term network loaded in Cytoscape.", out.width = '110%'---- knitr::include_graphics("cytoscape_with_selected_REACTOME_v01.png") ## ----networkFinalReactome, echo=FALSE, fig.align="center", fig.cap="Final Reactome network after customization inside Cytoscape.", out.width = '100%'---- knitr::include_graphics("REACTOME_Selected.jpeg") ## ----gprofiler2, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE---- ## Required library library(gprofiler2) ## The dataset of differentially expressed genes done between ## napabucasin treated and DMSO control parental (Froeling et al 2019) ## All genes tested are present data("parentalNapaVsDMSODEG") ## Retain significant results ## (absolute fold change superior to 1 and adjusted p-value inferior to 0.05) retained <- which(abs(parentalNapaVsDMSODEG$log2FoldChange) > 1 & parentalNapaVsDMSODEG$padj < 0.05) signRes <- parentalNapaVsDMSODEG[retained, ] ## Run one functional enrichment analysis using all significant genes ## The species is homo sapiens ("hsapiens") ## The g:SCS multiple testing correction method (Raudvere U et al 2019) ## The WikiPathways database is used ## Only the significant results are retained (significant=TRUE) ## The evidence codes are included in the results (evcodes=TRUE) ## A custom background included the tested genes is used gostres <- gprofiler2::gost( query=list(parental_napa_vs_DMSO=unique(signRes$EnsemblID)), organism="hsapiens", correction_method="g_SCS", sources=c("WP"), significant=TRUE, evcodes=TRUE, custom_bg=unique(parentalNapaVsDMSODEG$EnsemblID)) ## ----gostResult2, echo=TRUE, eval=TRUE---------------------------------------- ## The 'gostres' object is a list of 2 entries ## The 'result' entry contains the enrichment results ## The 'meta' entry contains the metadata information ## Some columns of interest in the results gostres$result[1:4, c("query", "p_value", "term_size", "query_size", "intersection_size", "term_id")] ## The term names can be longer than the one shown gostres$result[19:22, c("term_id", "source", "term_name")] ## ----runCreateEmap01, echo=TRUE, eval=TRUE, fig.cap="A Kegg enrichment map where terms with overlapping significant genes cluster together.", fig.align="center"---- ## Load saved enrichment results between parental Napa vs DMSO data(parentalNapaVsDMSOEnrichment) ## Set seed to ensure reproducible results set.seed(121) ## Create network for all Kegg terms ## All terms will be shown even if there is overlapping createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="KEGG") ## ----runCreateEmapTerms, echo=TRUE, eval=TRUE, fig.cap="An enrichment map showing only the user selected terms.", fig.align="center"---- ## Load saved enrichment results between parental Napa vs DMSO data(parentalNapaVsDMSOEnrichment) ## The term IDs must correspond to the IDs present in the "term_id" column head(parentalNapaVsDMSOEnrichment$result[, c("query", "term_id", "term_name")], n=3) ## List of selected terms from different sources termID <- c("KEGG:04115", "WP:WP4963", "KEGG:04010", "REAC:R-HSA-5675221", "REAC:R-HSA-112409", "WP:WP382") ## Set seed to ensure reproducible results set.seed(222) ## Create network for all selected terms createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="TERM_ID", termIDs=termID) ## ----runCreateEmap02, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.cap="An enrichment map with a different seed.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(91) ## Create network for all Kegg terms createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="KEGG") ## ----runCreateEmap03, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.cap="An enrichment map with personalized colors.", fig.align="center"---- ## The ggplot2 library is required library(ggplot2) ## Set seed to ensure reproducible results set.seed(91) ## Create network for all Kegg terms graphKegg <- createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="KEGG") ## Nodes with lowest p-values will be in orange and highest p-values in black ## The title of the legend is also modified graphKegg + scale_color_continuous(name="P-value adjusted", low="orange", high="black") ## ----emapMulti01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg enrichment results for 2 different experiments.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(2121) ## The dataset of functional enriched terms for two experiments: ## napabucasin treated and DMSO control parental and ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("parentalNapaVsDMSOEnrichment") data("rosaNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects gostObjectList <- list(parentalNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment) ## The queryList is a list of query names retained for each of the enrichment ## object (same order). Beware that a enrichment object can contain more than ## one query. query_01 <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] query_02 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] queryList <- list(query_01, query_02) ## Enrichment map where the groups are the KEGG results for the 2 different ## experiments createEnrichMapMultiBasic(gostObjectList=gostObjectList, queryList=queryList, source="KEGG", removeRoot=TRUE) ## ----emapMultiSeed, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.cap="An enrichment map using the same data fromt the previous one but with a different seed.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(5) ## Enrichment map where the groups are the KEGG results for the 2 different ## experiments createEnrichMapMultiBasic(gostObjectList=gostObjectList, queryList=queryList, source="KEGG", removeRoot=TRUE) ## ----emapMultiCustom, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map using KEGG terms from two enrichment analyses with personalized colors and legend."---- ## Required library library(ggplot2) ## Set seed to ensure reproducible results set.seed(5) ## Enrichment map where the groups are the KEGG results for the 2 different ## experiments createEnrichMapMultiBasic(gostObjectList=gostObjectList, queryList=queryList, source="KEGG", removeRoot=TRUE) + scale_fill_manual(name="Groups", breaks = queryList, values = c("cyan4", "bisque3"), labels = c("parental", "rosa")) + theme(legend.title = element_text(face="bold")) ## ----emapMultiComplex01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO analysis.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(3221) ## The dataset of functional enriched terms for one experiment: ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, the same enrichment object is used twice gostObjectList <- list(rosaNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_01 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object. ## The number of rows must correspond to the number of enrichment objects/ ## The query name must be present in the enrichment object. ## The source can be: "GO:BP" for Gene Ontology Biological Process, ## "GO:CC" for Gene Ontology Cellular Component, "GO:MF" for Gene Ontology ## Molecular Function, "KEGG" for Kegg, "REAC" for Reactome, ## "TF" for TRANSFAC, "MIRNA" for miRTarBase, "CORUM" for CORUM database, ## "HP" for Human phenotype ontology and "WP" for WikiPathways or ## "TERM_ID" when a list of terms is specified. ## The termsIDs is an empty string except when the source is set to "TERM_ID". ## The group names are going to be used in the legend and should be unique to ## each group. queryInfo <- data.frame(queryName=c(query_01, query_01), source=c("KEGG", "REAC"), removeRoot=c(TRUE, TRUE), termIDs=c("", ""), groupName=c("Kegg", "Reactome"), stringsAsFactors=FALSE) ## Enrichment map where the groups are the KEGG and Reactome results for the ## same experiment createEnrichMapMultiComplex(gostObjectList=gostObjectList, queryInfo=queryInfo) ## ----emapMultiCustom2, echo=TRUE, warning=FALSE, message=FALSE, collapse=FALSE, eval=TRUE, fig.cap="An enrichment map using selected terms related to MAP kinases and interleukin in two different experiments."---- ## Set seed to ensure reproducible results set.seed(28) ## The datasets of functional enriched terms for the two experiments: ## napabucasin treated and DMSO control expressing Rosa26 control vector and ## napabucasin treated and DMSO control parental MiaPaCa2 cells ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") data("parentalNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, the same enrichment object is used twice ## The order of the objects must respect the order on the queryInfo data frame ## In this case: ## 1. rosa dataset (for MAP kinases) ## 2. parental dataset (for MAP kinases) ## 3. rosa dataset (for interleukin) ## 4. parental dataset (for interleukin) gostObjectList <- list(rosaNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_rosa <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] query_parental <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] ## List of selected terms that will be shown in each group rosa_mapk <- "GO:0017017,GO:0033549,KEGG:04010,WP:WP382" rosa_il <- "KEGG:04657,WP:WP4754" parental_mapk <- paste0("GO:0017017,GO:0033549,KEGG:04010,", "REAC:R-HSA-5675221,REAC:R-HSA-112409,WP:WP382") parental_il <- "WP:WP4754,WP:WP395" ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object ## The number of rows must correspond to the number of enrichment objects ## The query name must be present in the enrichment object ## The source is set to "TERM_ID" so that the terms present in termIDs column ## will be used ## The group name will be used for the legend, the same name cannot be ## used twice queryInfo <- data.frame(queryName=c(query_rosa, query_parental, query_rosa, query_parental), source=c("TERM_ID", "TERM_ID", "TERM_ID", "TERM_ID"), removeRoot=c(FALSE, FALSE, FALSE, FALSE), termIDs=c(rosa_mapk, parental_mapk, rosa_il, parental_il), groupName=c("rosa - MAP kinases", "parental - MAP kinases", "rosa - Interleukin", "parental - Interleukin"), stringsAsFactors=FALSE) ## Enrichment map where the groups TODO createEnrichMapMultiComplex(gostObjectList=gostObjectList, queryInfo=queryInfo) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()