Packages
library(org.Hs.eg.db)
columns(org.Hs.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT"
## [5] "ENSEMBLTRANS" "ENTREZID" "ENZYME" "EVIDENCE"
## [9] "EVIDENCEALL" "GENENAME" "GO" "GOALL"
## [13] "IPI" "MAP" "OMIM" "ONTOLOGY"
## [17] "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG"
## [25] "UNIGENE" "UNIPROT"
mapIds(org.Hs.eg.db, c("BRCA1", "BRCA2"), "ENSEMBL", keytype="SYMBOL")
## 'select()' returned 1:1 mapping between keys and columns
## BRCA1 BRCA2
## "ENSG00000012048" "ENSG00000139618"
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
exons(TxDb.Hsapiens.UCSC.hg38.knownGene)
## GRanges object with 581036 ranges and 1 metadata column:
## seqnames ranges strand | exon_id
## <Rle> <IRanges> <Rle> | <integer>
## [1] chr1 [29554, 30039] + | 1
## [2] chr1 [30267, 30667] + | 2
## [3] chr1 [30366, 30503] + | 3
## [4] chr1 [30564, 30667] + | 4
## [5] chr1 [30976, 31097] + | 5
## ... ... ... ... . ...
## [581032] chrUn_KI270750v1 [148668, 148843] + | 581032
## [581033] chrUn_KI270752v1 [ 144, 268] + | 581033
## [581034] chrUn_KI270752v1 [ 21813, 21944] + | 581034
## [581035] chrUn_KI270752v1 [ 3497, 3623] - | 581035
## [581036] chrUn_KI270752v1 [ 9943, 10067] - | 581036
## -------
## seqinfo: 455 sequences (1 circular) from hg38 genome
exonsBy(TxDb.Hsapiens.UCSC.hg38.knownGene, "tx")
## GRangesList object of length 197782:
## $1
## GRanges object with 3 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## <Rle> <IRanges> <Rle> | <integer> <character> <integer>
## [1] chr1 [29554, 30039] + | 1 <NA> 1
## [2] chr1 [30564, 30667] + | 4 <NA> 2
## [3] chr1 [30976, 31097] + | 5 <NA> 3
##
## $2
## GRanges object with 2 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## [1] chr1 [30267, 30667] + | 2 <NA> 1
## [2] chr1 [30976, 31109] + | 6 <NA> 2
##
## $3
## GRanges object with 1 range and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## [1] chr1 [30366, 30503] + | 3 <NA> 1
##
## ...
## <197779 more elements>
## -------
## seqinfo: 455 sequences (1 circular) from hg38 genome
*.org
packages, e.g., org.Hs.eg.dbTxDb.*
packages, e.g., TxDb.Hsapiens.UCSC.hg38.knownGenecolumns()
mapIds()
, select()
exons()
, genes()
, transcripts()
, promoters()
, …exonsBy()
, …Web resources
library(biomaRt)
## Discover and then selected mart
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
## Gene symbols associated with GO-annotated MAP kinase
## activity (GO id GO:0004704)
getBM(attributes = c('entrezgene','hgnc_symbol'),
filters = 'go',
values = 'GO:0004707',
mart = ensembl)
## entrezgene hgnc_symbol
## 1 1432 MAPK14
## 2 5596 MAPK4
## 3 225689 MAPK15
## 4 5603 MAPK13
## 5 5601 MAPK9
## 6 51701 NLK
## 7 5594 MAPK1
## 8 5599 MAPK8
## 9 5602 MAPK10
## 10 6300 MAPK12
## 11 5597 MAPK6
## 12 5600 MAPK11
## 13 5598 MAPK7
## 14 5595 MAPK3
‘Hubs’
library(AnnotationHub)
AnnotationHub()
## updating metadata:
## retrieving 1 resource
## snapshotDate(): 2017-04-25
## AnnotationHub with 40134 records
## # snapshotDate(): 2017-04-25
## # $dataprovider: BroadInstitute, Ensembl, UCSC, Haemcode, ftp://ftp.ncbi....
## # $species: Homo sapiens, Mus musculus, Bos taurus, Pan troglodytes, Dani...
## # $rdataclass: GRanges, BigWigFile, FaFile, TwoBitFile, ChainFile, OrgDb,...
## # additional mcols(): taxonomyid, genome, description,
## # coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## # tags, rdatapath, sourceurl, sourcetype
## # retrieve records with, e.g., 'object[["AH2"]]'
##
## title
## AH2 | Ailuropoda_melanoleuca.ailMel1.69.dna.toplevel.fa
## AH3 | Ailuropoda_melanoleuca.ailMel1.69.dna_rm.toplevel.fa
## AH4 | Ailuropoda_melanoleuca.ailMel1.69.dna_sm.toplevel.fa
## AH5 | Ailuropoda_melanoleuca.ailMel1.69.ncrna.fa
## AH6 | Ailuropoda_melanoleuca.ailMel1.69.pep.all.fa
## ... ...
## AH56649 | org.Thermoplasmatales_archaeon_BRNA1.eg.sqlite
## AH56650 | org.Ignicoccus_hospitalis_KIN4|I.eg.sqlite
## AH56651 | org.Desulfurococcus_amylolyticus_DSM_16532.eg.sqlite
## AH56652 | org.Pandoravirus_dulcis.eg.sqlite
## AH56653 | org.Methanocaldococcus_infernus_ME.eg.sqlite
query(AnnotationHub(), "grasp") # see library(grasp2db)
## snapshotDate(): 2017-04-25
## AnnotationHub with 1 record
## # snapshotDate(): 2017-04-25
## # names(): AH21414
## # $dataprovider: NHLBI
## # $species: Homo sapiens
## # $rdataclass: SQLiteConnection
## # $rdatadateadded: 2015-01-08
## # $title: Bioconductor distribution of grasp2 v. 2.0.0.0
## # $description: Build 2.0.0.0 of the grasp2 data base, with 2,082 GWAS st...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: GRASP
## # $sourceurl: https://s3.amazonaws.com/NHLBI_public/GRASP/GraspFullDatase...
## # $sourcesize: NA
## # $tags: c("SNP", "Annotation", "GRASP2")
## # retrieve record with 'object[["AH21414"]]'
query(AnnotationHub(), c("release-88", "homo"))
## snapshotDate(): 2017-04-25
## AnnotationHub with 9 records
## # snapshotDate(): 2017-04-25
## # $dataprovider: Ensembl
## # $species: Homo sapiens
## # $rdataclass: TwoBitFile, GRanges
## # additional mcols(): taxonomyid, genome, description,
## # coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## # tags, rdatapath, sourceurl, sourcetype
## # retrieve records with, e.g., 'object[["AH53536"]]'
##
## title
## AH53536 | Homo_sapiens.GRCh38.88.abinitio.gtf
## AH53537 | Homo_sapiens.GRCh38.88.chr.gtf
## AH53538 | Homo_sapiens.GRCh38.88.chr_patch_hapl_scaff.gtf
## AH53539 | Homo_sapiens.GRCh38.88.gtf
## AH54337 | Homo_sapiens.GRCh38.cdna.all.2bit
## AH54338 | Homo_sapiens.GRCh38.dna.primary_assembly.2bit
## AH54339 | Homo_sapiens.GRCh38.dna_rm.primary_assembly.2bit
## AH54340 | Homo_sapiens.GRCh38.dna_sm.primary_assembly.2bit
## AH54341 | Homo_sapiens.GRCh38.ncrna.2bit
library(ExperimentHub)
ExperimentHub()
## updating metadata:
## retrieving 1 resource
## snapshotDate(): 2016-10-01
## ExperimentHub with 201 records
## # snapshotDate(): 2016-10-01
## # $dataprovider: Department of Psychology, Abdul Haq Campus, Federal Urdu...
## # $species: Homo Sapiens, Homo sapiens, Mus musculus
## # $rdataclass: ExpressionSet, CellMapperList, GAlignmentPairs, Summarized...
## # additional mcols(): taxonomyid, genome, description,
## # coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## # tags, rdatapath, sourceurl, sourcetype
## # retrieve records with, e.g., 'object[["EH1"]]'
##
## title
## EH1 | RNA-Sequencing and clinical data for 7706 tumor samples from ...
## EH164 | RNA-Sequencing and clinical data for 9246 tumor samples from ...
## EH165 | RNA-Sequencing and clinical data for 741 normal samples from ...
## EH166 | ERR188297
## EH167 | ERR188088
## ... ...
## EH359 | ZellerG_2014.marker_abundance.stool
## EH360 | ZellerG_2014.marker_presence.stool
## EH361 | ZellerG_2014.metaphlan_bugs_list.stool
## EH362 | ZellerG_2014.pathabundance_relab.stool
## EH363 | ZellerG_2014.pathcoverage.stool
query(ExperimentHub(), "TCGA")
## snapshotDate(): 2016-10-01
## ExperimentHub with 3 records
## # snapshotDate(): 2016-10-01
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: SummarizedExperiment, ExpressionSet
## # additional mcols(): taxonomyid, genome, description,
## # coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## # tags, rdatapath, sourceurl, sourcetype
## # retrieve records with, e.g., 'object[["EH1"]]'
##
## title
## EH1 | RNA-Sequencing and clinical data for 7706 tumor samples from ...
## EH164 | RNA-Sequencing and clinical data for 9246 tumor samples from ...
## EH165 | RNA-Sequencing and clinical data for 741 normal samples from ...
sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] SummarizedExperiment_1.6.3
## [2] DelayedArray_0.2.7
## [3] matrixStats_0.52.2
## [4] ExperimentHub_1.2.0
## [5] AnnotationHub_2.8.1
## [6] KEGGREST_1.16.0
## [7] biomaRt_2.32.0
## [8] TxDb.Hsapiens.UCSC.hg38.knownGene_3.4.0
## [9] GenomicFeatures_1.28.2
## [10] GenomicRanges_1.28.3
## [11] GenomeInfoDb_1.12.1
## [12] org.Hs.eg.db_3.4.1
## [13] AnnotationDbi_1.38.1
## [14] IRanges_2.10.2
## [15] S4Vectors_0.14.3
## [16] Biobase_2.36.2
## [17] BiocGenerics_0.22.0
## [18] BiocStyle_2.4.0
##
## loaded via a namespace (and not attached):
## [1] lattice_0.20-35 htmltools_0.3.6
## [3] rtracklayer_1.36.3 yaml_2.1.14
## [5] interactiveDisplayBase_1.14.0 XML_3.98-1.7
## [7] DBI_0.6-1 BiocParallel_1.10.1
## [9] GenomeInfoDbData_0.99.0 stringr_1.2.0
## [11] zlibbioc_1.22.0 Biostrings_2.44.1
## [13] codetools_0.2-15 memoise_1.1.0
## [15] evaluate_0.10 knitr_1.16
## [17] httpuv_1.3.3 BiocInstaller_1.26.0
## [19] curl_2.6 Rcpp_0.12.11
## [21] xtable_1.8-2 backports_1.1.0
## [23] XVector_0.16.0 mime_0.5
## [25] Rsamtools_1.28.0 png_0.1-7
## [27] digest_0.6.12 stringi_1.1.5
## [29] bookdown_0.4 shiny_1.0.3
## [31] rprojroot_1.2 grid_3.4.0
## [33] tools_3.4.0 bitops_1.0-6
## [35] magrittr_1.5 RCurl_1.95-4.8
## [37] RSQLite_1.1-2 Matrix_1.2-10
## [39] rmarkdown_1.5 httr_1.2.1
## [41] R6_2.2.1 GenomicAlignments_1.12.1
## [43] compiler_3.4.0
Research reported in this tutorial was supported by the National Human Genome Research Institute and the National Cancer Institute of the National Institutes of Health under award numbers U41HG004059 and U24CA180996.
This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement number 633974)