## ----doeval, echo = FALSE, results = "hide"-------------------------------- ## Globally switch off execution of code chunks evalMe <- FALSE haveProt <- FALSE ## ----loadlib, message = FALSE, eval = evalMe------------------------------- # library(ensembldb) # library(EnsDb.Hsapiens.v75) # edb <- EnsDb.Hsapiens.v75 # ## Evaluate whether we have protein annotation available # hasProteinData(edb) ## ----listCols, message = FALSE, eval = evalMe------------------------------ # listTables(edb) ## ----haveprot, echo = FALSE, results = "hide", eval = evalMe--------------- # ## Use this to conditionally disable eval on following chunks # haveProt <- hasProteinData(edb) & evalMe ## ----a_transcripts, eval = haveProt---------------------------------------- # ## Get also protein information for ZBTB16 transcripts # txs <- transcripts(edb, filter = GenenameFilter("ZBTB16"), # columns = c("protein_id", "uniprot_id", "tx_biotype")) # txs ## ----a_transcripts_coding_noncoding, eval = haveProt----------------------- # ## Subset to transcripts with tx_biotype other than protein_coding. # txs[txs$tx_biotype != "protein_coding", c("uniprot_id", "tx_biotype", # "protein_id")] ## ----a_transcripts_coding, eval = haveProt--------------------------------- # ## List the protein IDs and uniprot IDs for the coding transcripts # mcols(txs[txs$tx_biotype == "protein_coding", # c("tx_id", "protein_id", "uniprot_id")]) ## ----a_transcripts_coding_up, eval = haveProt------------------------------ # ## List all uniprot mapping types in the database. # listUniprotMappingTypes(edb) # # ## Get all protein_coding transcripts of ZBTB16 along with their protein_id # ## and Uniprot IDs, restricting to protein_id to uniprot_id mappings based # ## on "DIRECT" mapping methods. # txs <- transcripts(edb, filter = list(GenenameFilter("ZBTB16"), # UniprotMappingTypeFilter("DIRECT")), # columns = c("protein_id", "uniprot_id", "uniprot_db")) # mcols(txs) ## ----a_genes_protdomid_filter, eval = haveProt----------------------------- # ## Get all genes that encode a transcript encoding for a protein that contains # ## a certain protein domain. # gns <- genes(edb, filter = ProtDomIdFilter("PS50097")) # length(gns) # # sort(gns$gene_name) ## ----a_2_annotationdbi, message = FALSE, eval = haveProt------------------- # ## Show all columns that are provided by the database # columns(edb) # # ## Show all key types/filters that are supported # keytypes(edb) ## ----a_2_select, message = FALSE, eval = haveProt-------------------------- # select(edb, keys = "ZBTB16", keytype = "GENENAME", # columns = "UNIPROTID") ## ----a_2_select_nmd, message = FALSE, eval = haveProt---------------------- # ## Call select, this time providing a GenenameFilter. # select(edb, keys = GenenameFilter("ZBTB16"), # columns = c("TXBIOTYPE", "UNIPROTID", "PROTEINID")) ## ----b_proteins, message = FALSE, eval = haveProt-------------------------- # ## Get all proteins and return them as an AAStringSet # prts <- proteins(edb, filter = GenenameFilter("ZBTB16"), # return.type = "AAStringSet") # prts ## ----b_proteins_mcols, message = FALSE, eval = haveProt-------------------- # mcols(prts) ## ----b_proteins_prot_doms, message = FALSE, eval = haveProt---------------- # ## Get also protein domain annotations in addition to the protein annotations. # pd <- proteins(edb, filter = GenenameFilter("ZBTB16"), # columns = c("tx_id", listColumns(edb, "protein_domain")), # return.type = "AAStringSet") # pd ## ----b_proteins_prot_doms_2, message = FALSE, eval = haveProt-------------- # ## The number of protein domains per protein: # table(names(pd)) # # ## The mcols # mcols(pd)