## ----imports, echo=FALSE,eval=TRUE, message=FALSE-------------------------- library(Onassis) library(DT) library(gplots) library(org.Hs.eg.db) ## ----connectTodb, echo=TRUE,eval=FALSE------------------------------------- # ## Running this function might take long time if the database has to be downloaded. # geo_con <- connectToGEODB(system.file(getwd(), 'GEOmetadb.sqlite')) # # #Showing the experiment types available in GEO # experiments <- experiment_types(geo_con) # # #Showing the organism types available in GEO # species <- organism_types(geo_con) # # #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing" # meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens') # # #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array # expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570') ## ----experimentTypesshow, echo=FALSE, eval=TRUE---------------------------- experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis')) knitr::kable(experiments[1:10], rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table 1: ', htmltools::em('Experiments available in GEO')), options=list( pageLength =5, autoWidth = TRUE, scrollX='300px', rownames=FALSE)) ## ----speciesShow, echo=FALSE,eval=TRUE------------------------------------- species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis')) knitr::kable(species[1:10], rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table 1: ', htmltools::em('Species available in GEO')), options=list( pageLength =5, autoWidth = TRUE, scrollX='300px', rownames=FALSE)) ## ----loadgeoMetadata, echo=TRUE, eval=TRUE--------------------------------- meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis')) ## ----printmeta, echo=FALSE,eval=TRUE--------------------------------------- methylation_tmp <- meth_metadata methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50)) knitr::kable(methylation_tmp[1:10,], rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table 1: ', htmltools::em('Methylation profiling by high througput sequencing metadata from GEOmetadb.')), options=list( pageLength =5, autoWidth = TRUE, scrollX='300px', rownames=FALSE)) # columnDefs = list(list(targets=10, # render = JS( # "function(data, type, row, meta) {", # "return type === 'display' && data.length > 50 ?", # "'' + data.substr(0, 50) + '...' : data;", # "}") # )))), callback = JS('table.page("next").draw(false);')) ## ----connectSRA, echo=TRUE,eval=FALSE-------------------------------------- # # Connection to the SRAmetadb and potential download of the sqlite file # sra_con <- connectToSRADB() # # # Query for the ChIP-Seq experiments contained in GEO for human samples # sra_chip_seq <- getSRAMetadata(sra_con, library_strategy='ChIP-Seq', library_source='GENOMIC', taxon_id=9606, center_name='GEO') # # # The following example allows to retrieve Bisulfite sequencing samples' metadata. # bisulfite_seq <- getSRAMetadata(sra_con, library_strategy='Bisulfite-Seq', library_source='GENOMIC', taxon_id=9606, center_name='GEO' ) # # ## ----readCHIP, echo=TRUE, eval=TRUE---------------------------------------- sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds', package='Onassis')) ## ----printchromatinIP, echo=FALSE,eval=TRUE-------------------------------- knitr::kable(head(sra_chip_seq, 10), rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table: ', htmltools::em('ChIP-Seq metadata obtained from SRAdb.')), options=list( pageLength =5, autoWidth = TRUE, scrollX='300px', rownames=FALSE))#, #columnDefs = list(list(targets=9, # render = JS( # "function(data, type, row, meta) {", # "return type === 'display' && data.length > 50 ?", # "'' + data.substr(0, 50) + '...' : data;", # "}") #))), callback = JS('table.page("next").draw(false);')) ## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE--------- # If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded sample_dict <- dictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT') #Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs') sample_dict <- dictionary(inputFileOrDb=obo, outputdir=getwd(), synonymType='ALL') # Creation of a dictionary for human genes/proteins require(org.Hs.eg.db) targets <- dictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db') ## ----settingOptions, echo=TRUE,eval=TRUE----------------------------------- #Showing configuration permutations opts <- CMoptions() combinations <- listCombinations(opts) #Setting the combination of parameters through the paramValueIndex value myopts <- CMoptions(c("CONTIGUOUS_MATCH", "CASE_INSENSITIVE", 'BIOLEMMATIZER', 'PUBMED', 'ON', 'YES', 'ALL')) ## ----annotateDF, echo=TRUE, eval=TRUE-------------------------------------- chipseq_dict_annot <- annotate(sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts) ## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE----------------- #methylation_brenda_annot <- readRDS(system.file('extdata', 'vignette_data', 'methylation_brenda_annot.rds', package='Onassis')) #UPDATE con ChIP-seq knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table: ', htmltools::em('Annotations of the methylation profiling by high througput sequencing metadata obtained from GEO with BRENDA ontology concepts')), options=list( pageLength =10, autoWidth = TRUE, scrollX='300px', rownames=FALSE))#, #columnDefs = list(list(targets=1, # render = JS( # "function(data, type, row, meta) {", # "return type === 'display' && data.length > 50 ?", # "'' + data.substr(0, 50) + '...' : data;", # "}") #))), callback = JS('table.page("next").draw(false);')) ## ----annotateGenes, echo=TRUE, eval=TRUE, message=FALSE-------------------- #Finding the TARGET entities target_entities <- annotate(inputFileorDf=sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets) ## ----printKable, echo=FALSE, eval=TRUE------------------------------------- knitr::kable(target_entities, rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table: ', htmltools::em('Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)')), options=list( pageLength =10, autoWidth = TRUE, scrollX='100px', rownames=FALSE, columnDefs = list(list(targets= c(0,1,2,3,4), render = JS( "function(data, type, row, meta) {", "return type === 'display' && data.length > 50 ?", "'' + data.substr(0, 50) + '...' : data;", "}") ))), callback = JS('table.page("next").draw(false);')) ## ----similarity, echo=TRUE, eval=TRUE, message=FALSE----------------------- #Instantiating the Similarity similarities <- showSimilarities() ## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE--------------- found_terms <- unique(chipseq_dict_annot$term_url) n <- length(found_terms) ontologyfile <- obo pairwise_results <- data.frame(term1 = character(0), term2= character(0), value = double(0L)) for(i in 1:(n-1)){ term1 <- as.character(found_terms[i]) j = i + 1 for(k in j:n){ term2 <- as.character(found_terms[k]) two_term_similarity <- similarity(ontologyfile, term1, term2 ) new_row <- cbind(term1, term2, two_term_similarity) pairwise_results <- rbind(pairwise_results, new_row ) } } pairwise_results <- unique(pairwise_results) pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url', all.x=TRUE) colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name' pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url', all.x=TRUE) colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name' pairwise_results <- unique(pairwise_results) ## ----showing_similarity1, echo=FALSE, eval=TRUE, message=FALSE------------- knitr::kable(pairwise_results, rownames=FALSE, caption = htmltools::tags$caption( style = 'caption-side: top-left; text-align: left;', 'Table: ', htmltools::em('Pairwise similarities of cell line terms annotating the ChIP-seq metadata')), options=list( pageLength =10, autoWidth = TRUE, scrollX='100px', rownames=FALSE))#, #columnDefs = list(list(targets= 1, # render = JS( # "function(data, type, row, meta) {", # "return type === 'display' && data.length > 50 ?", # "'' + data.substr(0, 50) + '...' : data;", # "}") #))), callback = JS('table.page("next").draw(false);')) ## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE--------------- similarity(obo, found_terms[1:2], found_terms[3]) ## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE--------------- annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id))) n <- length(annotated_samples) samples_results <- data.frame(sample1 = character(0), sample2= character(0), value = double(0L)) samples_results <- matrix(0, nrow=n, ncol=n) rownames(samples_results) <- colnames(samples_results) <- annotated_samples for(i in 1:(n-1)){ sample1 <- as.character(annotated_samples[i]) j = i + 1 for(k in j:n){ sample2 <- as.character(annotated_samples[k]) two_samples_similarity <- similarity(ontologyfile, sample1, sample2, chipseq_dict_annot) samples_results[i, k] <- samples_results[k, i] <- two_samples_similarity } } diag(samples_results) <- 1 heatmap.2(samples_results, density.info = "none", trace="none", main='Semantic similarity of annotated samples', margins=c(5,5))