library(RITANdata)
library(RITAN)A feature of RITAN is the ability to account for false-discovery rates across resources. However, the relationships among genesets is not always obvious. That is, two resources may each have a term that is highly similar if not identical to the other. Ideally, you would pick one of these terms to use, prior to analysis. The function geneset_overlap() helps with this process.
# Show the fraction of genes common to multiple disease-gene relationships
o <- geneset_overlap( geneset_list$DisGeNet )
plot(density(c(o)), main='', xlab='Fraction Shared Genes', ylab='Prob()')diag(o) <- NA # ignore self-overlap
i <- which( o > 0.8, arr.ind = TRUE )
mat <- o[ unique(i[,1]), unique(i[,2]) ]
heatmap( mat, col = rev(gray(seq(0,1,length.out = 15))),
         cexRow=.7, cexCol=0.7, margins = c(7,7) )# Show the fraction of genes common between disease-gene relationships and GO-Slim
o <- geneset_overlap( geneset_list$DisGeNet, geneset_list$GO_slim_generic )
o <- o[ , !( colnames(o) %in% c('biological_process', 'molecular_function', 'cellular_component')) ] # remove the root of each sub-ontology
plot(density(c(o)), main='', xlab='Fraction Shared Genes', ylab='Prob()')i <- which( o > 0.95, arr.ind = TRUE )
mat <- o[ unique(i[,1]), unique(i[,2]) ]
heatmap( mat, col = rev(gray(seq(0,1,length.out = 15))),
         cexRow=.7, cexCol=0.7, margins = c(7,7) )rownames(o)[ o[ , "chromosome_organization" ] > 0.66 ]## [1] "Coffin-Siris syndrome"                        
## [2] "Leukemia, Myeloid"                            
## [3] "Leukemia, Megakaryoblastic, of Down Syndrome" 
## [4] "Colorectal Neoplasms, Hereditary Nonpolyposis"
## [5] "Malignant neoplasm breast"Or, perhaps the genes themselves for diseases related to cell motility are of interest as a new geneset.
d <- rownames(o)[ o[ , "cell_motility" ] > 0.66 ]
str(d)##  chr [1:20] "Kartagener Syndrome" "Thromboembolism" ...new_geneset <- intersect( unique(unlist(geneset_list$DisGeNet[d])),
                          unique(unlist(geneset_list$GO_slim_generic$cell_motility)) )
str(new_geneset)##  chr [1:86] "CCDC40" "DNAH5" "DNAI1" "DRC1" "DYX1C1" "F2" "F7" "GAS6" ...