## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----'install bioc', eval=FALSE----------------------------------------------- # if (!requireNamespace("BiocManager")) { # install.packages("BiocManager") # } # BiocManager::install("HuBMAPR") ## ----'install dev', eval = FALSE---------------------------------------------- # remotes::install("christinehou11/HuBMAPR") ## ----'library', message=FALSE, warning=FALSE---------------------------------- library("dplyr") library("tidyr") library("ggplot2") library("HuBMAPR") ## ----'datasets'--------------------------------------------------------------- datasets_df <- datasets() datasets_df ## ----'plot', echo=FALSE, warning=FALSE, message=FALSE------------------------- datasets_sub <- datasets_df |> select(organ, dataset_type) |> group_by(organ) |> mutate(count = n()) |> filter(!is.na(organ)) plot1 <- ggplot(datasets_sub, aes(y = reorder(organ, count), fill = dataset_type)) + geom_histogram(stat = "count") + labs(x = NULL, y = NULL, fill = "Assay Type") + theme_minimal() + theme( panel.grid.major.y = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(size = 9), axis.text.x = element_text(size = 9), legend.position = "right", legend.title = element_text(size = 9), legend.text = element_text(size = 7), panel.background = element_rect(fill = "white", color = NA), plot.background = element_rect(fill = "white", color = NA)) + guides(fill = guide_legend(ncol = 2)) plot1 ## ----'cols'------------------------------------------------------------------- # as = "tibble" (default) datasets_col_tbl <- datasets_default_columns(as = "tibble") datasets_col_tbl # as = "character" datasets_col_char <- datasets_default_columns(as = "character") datasets_col_char ## ----'summary cols'----------------------------------------------------------- tbl <- bind_cols( dataset = datasets_default_columns(as = "character"), sample = c(samples_default_columns(as = "character"), rep(NA, 7)), donor = c(donors_default_columns(as = "character"), rep(NA, 6)), collection = c(collections_default_columns(as = "character"), rep(NA, 10)), publication = c(publications_default_columns(as = "character"), rep(NA, 7)) ) tbl ## ----'organs'----------------------------------------------------------------- organs <- organ() organs ## ----'datasets filter'-------------------------------------------------------- # Example from datasets() datasets_df |> filter(organ == 'Small Intestine') |> count() ## ----'derived using left_join'------------------------------------------------ donors_df <- donors() donor_sub <- donors_df |> filter(Sex == "Female", Age <= 76 & Age >= 55, Race == "White", `Body Mass Index` <= 25, last_modified_timestamp >= "2020-01-08" & last_modified_timestamp <= "2020-06-30") |> head(1) # Datasets donor_sub_dataset <- donor_sub |> left_join(datasets_df |> select(-c(group_name, last_modified_timestamp)) |> rename("dataset_uuid" = "uuid", "dataset_hubmap_id" = "hubmap_id"), by = c("hubmap_id" = "donor_hubmap_id")) donor_sub_dataset # Samples samples_df <- samples() donor_sub_sample <- donor_sub |> left_join(samples_df |> select(-c(group_name, last_modified_timestamp)) |> rename("sample_uuid" = "uuid", "sample_hubmap_id" = "hubmap_id"), by = c("hubmap_id" = "donor_hubmap_id")) donor_sub_sample ## ----'*_detail()'------------------------------------------------------------- dataset_uuid <- datasets_df |> filter(dataset_type == "Auto-fluorescence", organ == "Kidney (Right)") |> head(1) |> pull(uuid) # Full Information dataset_detail(dataset_uuid) |> glimpse() # Specific Information dataset_detail(uuid = dataset_uuid) |> select(contributors) |> unnest_longer(contributors) |> unnest_wider(everything()) ## ----'metadata'--------------------------------------------------------------- dataset_metadata("993bb1d6fa02e2755fd69613bb9d6e08") sample_metadata("8ecdbdc3e2d04898e2563d666658b6a9") donor_metadata("b2c75c96558c18c9e13ba31629f541b6") ## ----'dataset derived'-------------------------------------------------------- # no derived/support dataset dataset_uuid_1 <- "3acdb3ed962b2087fbe325514b098101" dataset_derived(uuid = dataset_uuid_1) # has derived/support dataset dataset_uuid_2 <- "baf976734dd652208d13134bc5c4594b" dataset_derived(uuid = dataset_uuid_2) |> glimpse() ## ----'derived using sample_derived'------------------------------------------- sample_uuid <- samples_df |> filter(last_modified_timestamp >= "2023-01-01" & last_modified_timestamp <= "2023-10-01", organ == "Kidney (Left)") |> head(1) |> pull(uuid) sample_uuid # Derived Datasets sample_derived(uuid = sample_uuid, entity_type = "Dataset") # Derived Samples sample_derived(uuid = sample_uuid, entity_type = "Sample") ## ----'provenance'------------------------------------------------------------- # dataset provenance dataset_uuid <- "3e4c568d9ce8df9d73b8cddcf8d0fec3" uuid_provenance(dataset_uuid) # sample provenance sample_uuid <- "35e16f13caab262f446836f63cf4ad42" uuid_provenance(sample_uuid) # donor provenance donor_uuid <- "0abacde2443881351ff6e9930a706c83" uuid_provenance(donor_uuid) ## ----'collection datasets'---------------------------------------------------- collections_df <- collections() collection_uuid <- collections_df |> filter(last_modified_timestamp >= "2023-01-01") |> head(1) |> pull(uuid) collection_data(collection_uuid) ## ----'publication data'------------------------------------------------------- publications_df <- publications() publication_uuid <- publications_df |> filter(publication_venue == "Nature") |> head(1) |> pull(uuid) publication_data(publication_uuid, entity_type = "Dataset") publication_data(publication_uuid, entity_type = "Sample") ## ----'information'------------------------------------------------------------ collection_information(uuid = collection_uuid) publication_information(uuid = publication_uuid) ## ----'author'----------------------------------------------------------------- # Dataset dataset_contributors(uuid = dataset_uuid) # Collection collection_contacts(uuid = collection_uuid) collection_contributors(uuid = collection_uuid) # Publication publication_authors(uuid = publication_uuid) ## ----'bulk data transfer', eval=FALSE----------------------------------------- # uuid_globus <- "d1dcab2df80590d8cd8770948abaf976" # # bulk_data_transfer(uuid_globus) # # uuid_dbGAP_SRA <- "d926c41ac08f3c2ba5e61eec83e90b0c" # # bulk_data_transfer(uuid_dbGAP_SRA) # # uuid_not_avail <- "0eb5e457b4855ce28531bc97147196b6" # # bulk_data_transfer(uuid_not_avail) ## ----'sessionInfo', echo=FALSE---------------------------------------------------------------------------------------- ## Session info options(width = 120) sessionInfo()