## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, message = FALSE, comment = "#>" ) ## ----install, eval=FALSE------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("msigdb") ## ----load-packages, message=FALSE--------------------------------------------- library(msigdb) library(ExperimentHub) library(GSEABase) ## ----get-msigdb--------------------------------------------------------------- eh = ExperimentHub() query(eh , 'msigdb') ## ----download-msigdb-sym-id--------------------------------------------------- eh[['EH5421']] ## ----download-msigdb-sym-accessor--------------------------------------------- #metadata are displayed msigdb.v7.2.hs.SYM(metadata = TRUE) #data are loaded msigdb.v7.2.hs.SYM() ## ----download-msigdb-sym-getMsigdb-------------------------------------------- #use the custom accessor to select a specific version of MSigDB msigdb.v7.2.hs.SYM = getMsigdb('hs', 'SYM') msigdb.v7.2.hs.SYM ## ----append-kegg-------------------------------------------------------------- msigdb.v7.2.hs.SYM = appendKEGG(msigdb.v7.2.hs.SYM) msigdb.v7.2.hs.SYM ## ----process-gsc-------------------------------------------------------------- length(msigdb.v7.2.hs.SYM) ## ----access-gs---------------------------------------------------------------- gs = msigdb.v7.2.hs.SYM[[1000]] gs #get genes in the signature geneIds(gs) #get collection type collectionType(gs) #get MSigDB category bcCategory(collectionType(gs)) #get MSigDB subcategory bcSubCategory(collectionType(gs)) #get description description(gs) #get details details(gs) ## ----summarise-gsc------------------------------------------------------------ #calculate the number of signatures in each category table(sapply(lapply(msigdb.v7.2.hs.SYM, collectionType), bcCategory)) #calculate the number of signatures in each subcategory table(sapply(lapply(msigdb.v7.2.hs.SYM, collectionType), bcSubCategory)) #plot the distribution of sizes hist(sapply(lapply(msigdb.v7.2.hs.SYM, geneIds), length), main = 'MSigDB signature size distribution', xlab = 'Signature size') ## ----list-collections--------------------------------------------------------- listCollections(msigdb.v7.2.hs.SYM) listSubCollections(msigdb.v7.2.hs.SYM) ## ----------------------------------------------------------------------------- #retrieeve the hallmarks gene sets subsetCollection(msigdb.v7.2.hs.SYM, 'h') #retrieve the biological processes category of gene ontology subsetCollection(msigdb.v7.2.hs.SYM, 'c5', 'GO:BP') ## ----load-limma, message=FALSE------------------------------------------------ library(limma) #create expression data allg = unique(unlist(geneIds(msigdb.v7.2.hs.SYM))) emat = matrix(0, nrow = length(allg), ncol = 6) rownames(emat) = allg colnames(emat) = paste0('sample', 1:6) head(emat) ## ----subset-msigdb------------------------------------------------------------ #retrieve collections hallmarks = subsetCollection(msigdb.v7.2.hs.SYM, 'h') msigdb_ids = geneIds(hallmarks) #convert gene sets into a list of gene indices fry_indices = ids2indices(msigdb_ids, rownames(emat)) fry_indices[1:2] ## ----download-msig-sym-id-mouse----------------------------------------------- msigdb.v7.2.mm.SYM = msigdb.v7.2.mm.SYM() msigdb.v7.2.mm.SYM ## ----sessionInfo-------------------------------------------------------------- sessionInfo()