## ----imports, echo=FALSE,eval=TRUE, message=FALSE--------------------------
library(Onassis)
library(DT)
library(gplots)
library(org.Hs.eg.db)
## ----connectTodb, echo=TRUE,eval=FALSE-------------------------------------
# ## Running this function might take long time if the database has to be downloaded.
# geo_con <- connectToGEODB(system.file(getwd(), 'GEOmetadb.sqlite'))
#
# #Showing the experiment types available in GEO
# experiments <- experiment_types(geo_con)
#
# #Showing the organism types available in GEO
# species <- organism_types(geo_con)
#
# #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing"
# meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens')
#
# #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array
# expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570')
## ----experimentTypesshow, echo=FALSE, eval=TRUE----------------------------
experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis'))
knitr::kable(experiments[1:10], rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table 1: ', htmltools::em('Experiments available in GEO')),
options=list(
pageLength =5,
autoWidth = TRUE,
scrollX='300px',
rownames=FALSE))
## ----speciesShow, echo=FALSE,eval=TRUE-------------------------------------
species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis'))
knitr::kable(species[1:10], rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table 1: ', htmltools::em('Species available in GEO')),
options=list(
pageLength =5,
autoWidth = TRUE,
scrollX='300px',
rownames=FALSE))
## ----loadgeoMetadata, echo=TRUE, eval=TRUE---------------------------------
meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis'))
## ----printmeta, echo=FALSE,eval=TRUE---------------------------------------
methylation_tmp <- meth_metadata
methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50))
knitr::kable(methylation_tmp[1:10,], rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table 1: ', htmltools::em('Methylation profiling by high througput sequencing metadata from GEOmetadb.')),
options=list(
pageLength =5,
autoWidth = TRUE,
scrollX='300px',
rownames=FALSE))
# columnDefs = list(list(targets=10,
# render = JS(
# "function(data, type, row, meta) {",
# "return type === 'display' && data.length > 50 ?",
# "'' + data.substr(0, 50) + '...' : data;",
# "}")
# )))), callback = JS('table.page("next").draw(false);'))
## ----connectSRA, echo=TRUE,eval=FALSE--------------------------------------
# # Connection to the SRAmetadb and potential download of the sqlite file
# sra_con <- connectToSRADB()
#
# # Query for the ChIP-Seq experiments contained in GEO for human samples
# sra_chip_seq <- getSRAMetadata(sra_con, library_strategy='ChIP-Seq', library_source='GENOMIC', taxon_id=9606, center_name='GEO')
#
# # The following example allows to retrieve Bisulfite sequencing samples' metadata.
# bisulfite_seq <- getSRAMetadata(sra_con, library_strategy='Bisulfite-Seq', library_source='GENOMIC', taxon_id=9606, center_name='GEO' )
#
#
## ----readCHIP, echo=TRUE, eval=TRUE----------------------------------------
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds', package='Onassis'))
## ----printchromatinIP, echo=FALSE,eval=TRUE--------------------------------
knitr::kable(head(sra_chip_seq, 10), rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table: ', htmltools::em('ChIP-Seq metadata obtained from SRAdb.')),
options=list(
pageLength =5,
autoWidth = TRUE,
scrollX='300px',
rownames=FALSE))#,
#columnDefs = list(list(targets=9,
# render = JS(
# "function(data, type, row, meta) {",
# "return type === 'display' && data.length > 50 ?",
# "'' + data.substr(0, 50) + '...' : data;",
# "}")
#))), callback = JS('table.page("next").draw(false);'))
## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE---------
# If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded
sample_dict <- dictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT')
#Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs
obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs')
sample_dict <- dictionary(inputFileOrDb=obo, outputdir=getwd(), synonymType='ALL')
# Creation of a dictionary for human genes/proteins
require(org.Hs.eg.db)
targets <- dictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db')
## ----settingOptions, echo=TRUE,eval=TRUE-----------------------------------
#Showing configuration permutations
opts <- CMoptions()
combinations <- listCombinations(opts)
#Setting the combination of parameters through the paramValueIndex value
myopts <- CMoptions(c("CONTIGUOUS_MATCH", "CASE_INSENSITIVE", 'BIOLEMMATIZER', 'PUBMED', 'ON', 'YES', 'ALL'))
## ----annotateDF, echo=TRUE, eval=TRUE--------------------------------------
chipseq_dict_annot <- annotate(sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts)
## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE-----------------
#methylation_brenda_annot <- readRDS(system.file('extdata', 'vignette_data', 'methylation_brenda_annot.rds', package='Onassis'))
#UPDATE con ChIP-seq
knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table: ', htmltools::em('Annotations of the methylation profiling by high througput sequencing metadata obtained from GEO with BRENDA ontology concepts')),
options=list(
pageLength =10,
autoWidth = TRUE,
scrollX='300px',
rownames=FALSE))#,
#columnDefs = list(list(targets=1,
# render = JS(
# "function(data, type, row, meta) {",
# "return type === 'display' && data.length > 50 ?",
# "'' + data.substr(0, 50) + '...' : data;",
# "}")
#))), callback = JS('table.page("next").draw(false);'))
## ----annotateGenes, echo=TRUE, eval=TRUE, message=FALSE--------------------
#Finding the TARGET entities
target_entities <- annotate(inputFileorDf=sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets)
## ----printKable, echo=FALSE, eval=TRUE-------------------------------------
knitr::kable(target_entities, rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table: ', htmltools::em('Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)')),
options=list(
pageLength =10,
autoWidth = TRUE,
scrollX='100px',
rownames=FALSE,
columnDefs = list(list(targets= c(0,1,2,3,4),
render = JS(
"function(data, type, row, meta) {",
"return type === 'display' && data.length > 50 ?",
"'' + data.substr(0, 50) + '...' : data;",
"}")
))), callback = JS('table.page("next").draw(false);'))
## ----similarity, echo=TRUE, eval=TRUE, message=FALSE-----------------------
#Instantiating the Similarity
similarities <- showSimilarities()
## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE---------------
found_terms <- unique(chipseq_dict_annot$term_url)
n <- length(found_terms)
ontologyfile <- obo
pairwise_results <- data.frame(term1 = character(0), term2= character(0), value = double(0L))
for(i in 1:(n-1)){
term1 <- as.character(found_terms[i])
j = i + 1
for(k in j:n){
term2 <- as.character(found_terms[k])
two_term_similarity <- similarity(ontologyfile, term1, term2 )
new_row <- cbind(term1, term2, two_term_similarity)
pairwise_results <- rbind(pairwise_results, new_row )
}
}
pairwise_results <- unique(pairwise_results)
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url', all.x=TRUE)
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name'
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url', all.x=TRUE)
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name'
pairwise_results <- unique(pairwise_results)
## ----showing_similarity1, echo=FALSE, eval=TRUE, message=FALSE-------------
knitr::kable(pairwise_results, rownames=FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: top-left; text-align: left;',
'Table: ', htmltools::em('Pairwise similarities of cell line terms annotating the ChIP-seq metadata')),
options=list(
pageLength =10,
autoWidth = TRUE,
scrollX='100px',
rownames=FALSE))#,
#columnDefs = list(list(targets= 1,
# render = JS(
# "function(data, type, row, meta) {",
# "return type === 'display' && data.length > 50 ?",
# "'' + data.substr(0, 50) + '...' : data;",
# "}")
#))), callback = JS('table.page("next").draw(false);'))
## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE---------------
similarity(obo, found_terms[1:2], found_terms[3])
## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE---------------
annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id)))
n <- length(annotated_samples)
samples_results <- data.frame(sample1 = character(0), sample2= character(0), value = double(0L))
samples_results <- matrix(0, nrow=n, ncol=n)
rownames(samples_results) <- colnames(samples_results) <- annotated_samples
for(i in 1:(n-1)){
sample1 <- as.character(annotated_samples[i])
j = i + 1
for(k in j:n){
sample2 <- as.character(annotated_samples[k])
two_samples_similarity <- similarity(ontologyfile, sample1, sample2, chipseq_dict_annot)
samples_results[i, k] <- samples_results[k, i] <- two_samples_similarity
}
}
diag(samples_results) <- 1
heatmap.2(samples_results, density.info = "none", trace="none", main='Semantic similarity of annotated samples', margins=c(5,5))