## ----setup, echo=FALSE-------------------------------------------------------- knitr::opts_chunk$set(collapse=TRUE) ## ----eval = FALSE------------------------------------------------------------- # if (!"BiocManager" %in% rownames(installed.packages())) # install.packages("BiocManager") # BiocManager::install("BiocFileCache", dependencies=TRUE) ## ----results='hide', warning=FALSE, message=FALSE----------------------------- library(BiocFileCache) ## ----------------------------------------------------------------------------- path <- tempfile() bfc <- BiocFileCache(path, ask = FALSE) ## ----url---------------------------------------------------------------------- ## paste to avoid long line in vignette url <- paste( "ftp://ftp.ensembl.org/pub/release-71/gtf", "homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz", sep="/") ## ----eval=FALSE--------------------------------------------------------------- # library(BiocFileCache) # bfc <- BiocFileCache() # path <- bfcrpath(bfc, url) ## ----eval=FALSE--------------------------------------------------------------- # gtf <- rtracklayer::import.gff(path) ## ----eval=FALSE--------------------------------------------------------------- # gtf <- rtracklayer::import.gff(bfcrpath(BiocFileCache(), url)) ## ----eval=FALSE--------------------------------------------------------------- # library(BiocFileCache) # bfc <- BiocFileCache("~/my-experiment/results") ## ----eval=FALSE--------------------------------------------------------------- # suppressPackageStartupMessages({ # library(DESeq2) # library(airway) # }) # data(airway) # dds <- DESeqDataData(airway, design = ~ cell + dex) # result <- DESeq(dds) ## ----eval=FALSE--------------------------------------------------------------- # saveRDS(result, bfcnew(bfc, "airway / DESeq standard analysis")) ## ----eval=FALSE--------------------------------------------------------------- # result <- readRDS(bfcrpath(bfc, "airway / DESeq standard analysis")) ## ----eval=FALSE--------------------------------------------------------------- # suppressPackageStartupMessages({ # library(BiocFileCache) # library(rtracklayer) # }) # # # load the cache # path <- file.path(tempdir(), "tempCacheDir") # bfc <- BiocFileCache(path) # # # the web resource of interest # url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz" # # # check if url is being tracked # res <- bfcquery(bfc, url, exact=TRUE) # # if (bfccount(res) == 0L) { # # # if it is not in cache, add # ans <- bfcadd(bfc, rname="ensembl, homo sapien", fpath=url) # # } else { # # # if it is in cache, get path to load # rid = res$rid # ans <- bfcrpath(bfc, rid) # # # check to see if the resource needs to be updated # check <- bfcneedsupdate(bfc, rid) # # check can be NA if it cannot be determined, choose how to handle # if (is.na(check)) check <- TRUE # if (check){ # ans < - bfcdownload(bfc, rid) # } # } # # # ans is the path of the file to load # ans # # # we know because we search for the url that the file is a .gtf.gz, # # if we searched on other terms we can use 'bfcpath' to see the # # original fpath to know the appropriate load/read/import method # bfcpath(bfc, names(ans)) # # temp = GTFFile(ans) # info = import(temp) ## ----ensemblremote, eval=TRUE------------------------------------------------- # # A simpler test to see if something is in the cache # and if not start tracking it is using `bfcrpath` # suppressPackageStartupMessages({ library(BiocFileCache) library(rtracklayer) }) # load the cache path <- file.path(tempdir(), "tempCacheDir") bfc <- BiocFileCache(path, ask=FALSE) # the web resources of interest url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz" url2 <- "ftp://ftp.ensembl.org/pub/release-71/gtf/rattus_norvegicus/Rattus_norvegicus.Rnor_5.0.71.gtf.gz" # if not in cache will download and create new entry pathsToLoad <- bfcrpath(bfc, c(url, url2)) pathsToLoad # now load files as see fit info = import(GTFFile(pathsToLoad[1])) class(info) summary(info) ## ----eval=FALSE--------------------------------------------------------------- # # # # One could also imagine the following: # # # # library(BiocFileCache) # # # load the cache # bfc <- BiocFileCache() # # # # # Do some work! # # # # # add a location in the cache # filepath <- bfcnew(bfc, "R workspace") # # save(list = ls(), file=filepath) # # # now the R workspace is being tracked in the cache ## ----eval=FALSE--------------------------------------------------------------- # .get_cache <- # function() # { # cache <- tools::R_user_dir("MyNewPackage", which="cache") # BiocFileCache::BiocFileCache(cache) # } ## ----eval=FALSE--------------------------------------------------------------- # download_data_file <- # function( verbose = FALSE ) # { # fileURL <- "http://a_path_to/someremotefile.tsv.gz" # # bfc <- .get_cache() # rid <- bfcquery(bfc, "geneFileV2", "rname")$rid # if (!length(rid)) { # if( verbose ) # message( "Downloading GENE file" ) # rid <- names(bfcadd(bfc, "geneFileV2", fileURL )) # } # if (!isFALSE(bfcneedsupdate(bfc, rid))) # bfcdownload(bfc, rid) # # bfcrpath(bfc, rids = rid) # } ## ----preprocess--------------------------------------------------------------- url <- "http://bioconductor.org/packages/stats/bioc/BiocFileCache/BiocFileCache_stats.tab" headFile <- # how to process file before caching function(from, to) { dat <- readLines(from) writeLines(head(dat), to) TRUE } rid <- bfcquery(bfc, url, "fpath")$rid if (!length(rid)) # not in cache, add but do not download rid <- names(bfcadd(bfc, url, download = FALSE)) update <- bfcneedsupdate(bfc, rid) # TRUE if newly added or stale if (!isFALSE(update)) # download & process bfcdownload(bfc, rid, ask = FALSE, FUN = headFile) rpath <- bfcrpath(bfc, rids=rid) # path to processed result readLines(rpath) # read processed result ## ----sessioninfo-------------------------------------------------------------- sessionInfo()