## ----setup, echo=FALSE--------------------------------------------------- knitr::opts_chunk$set(collapse=TRUE) ## ---- eval = FALSE------------------------------------------------------- # source("http://www.bioconductor.org/biocLite.R") # biocLite("BiocFileCache", dependencies = TRUE) ## ---- results='hide', warning=FALSE, message=FALSE----------------------- library(BiocFileCache) ## ------------------------------------------------------------------------ path <- file.path(tempdir(), "tempCacheDir") bfc <- BiocFileCache(path) ## ------------------------------------------------------------------------ bfccache(bfc) length(bfc) ## ------------------------------------------------------------------------ bfc ## ------------------------------------------------------------------------ bfcinfo(bfc) ## ------------------------------------------------------------------------ savepath <- bfcnew(bfc, "NewResource", ext="RData") savepath ## now we can use that path in any save function m = matrix(1:12, nrow=3) save(m, file=savepath) ## and that file will be tracked in the cache bfcinfo(bfc) ## ------------------------------------------------------------------------ fl1 <- tempfile(); file.create(fl1) add2 <- bfcadd(bfc, "Test_addCopy", fl1) # copy # returns filepath being tracked in cache add2 # the name is the unique rid in the cache rid2 <- names(add2) fl2 <- tempfile(); file.create(fl2) add3 <- bfcadd(bfc, "Test2_addMove", fl2, action="move") # move rid3 <- names(add3) fl3 <- tempfile(); file.create(fl3) add4 <- bfcadd(bfc, "Test3_addAsis", fl3, rtype="local", action="asis") # reference rid4 <- names(add4) file.exists(fl1) # TRUE - copied from original location file.exists(fl2) # FALSE - moved from original location file.exists(fl3) # TRUE - left asis, original location tracked ## ------------------------------------------------------------------------ url <- "http://httpbin.org/get" add5 <- bfcadd(bfc, "TestWeb", fpath=url) rid5 <- names(add5) url2<- "https://en.wikipedia.org/wiki/Bioconductor" add6 <- bfcadd(bfc, "TestWeb", fpath=url2) rid6 <- names(add6) # let's look at our BiocFileCache object now bfc bfcinfo(bfc) ## ------------------------------------------------------------------------ bfcquery(bfc, "Web") bfcquery(bfc, "copy") q1 <- bfcquery(bfc, "wiki") q1 class(q1) ## ------------------------------------------------------------------------ bfccount(q1) ## ------------------------------------------------------------------------ bfcsubWeb = bfc[paste0("BFC", 5:6)] bfcsubWeb bfcinfo(bfcsubWeb) ## ------------------------------------------------------------------------ bfc[["BFC2"]] bfcpath(bfc, "BFC2") bfcpath(bfc, "BFC5") bfcrpath(bfc, rids="BFC5") bfcrpath(bfc) bfcrpath(bfc, c("http://httpbin.org/get","Test3_addAsis")) ## ------------------------------------------------------------------------ bfcneedsupdate(bfc, "BFC5") bfcneedsupdate(bfc, "BFC6") bfcneedsupdate(bfc) ## ------------------------------------------------------------------------ fileBeingReplaced <- bfc[[rid3]] fileBeingReplaced # fl3 was created when we were adding resources fl3 bfc[[rid3]]<-fl3 bfc[[rid3]] ## ------------------------------------------------------------------------ bfcinfo(bfc, "BFC1") bfcupdate(bfc, "BFC1", rname="FirstEntry") bfcinfo(bfc, "BFC1") ## ------------------------------------------------------------------------ library(dplyr) bfcinfo(bfc, "BFC6") %>% select(rid, rpath, fpath) bfcupdate(bfc, "BFC6", fpath=url, rname="Duplicate") bfcinfo(bfc, "BFC6") %>% select(rid, rpath, fpath) ## ------------------------------------------------------------------------ rid <- "BFC5" test <- !identical(bfcneedsupdate(bfc, rid), FALSE) # 'TRUE' or 'NA' if (test) bfcdownload(bfc, rid) ## ------------------------------------------------------------------------ # let's remind ourselves of our object bfc bfcremove(bfc, "BFC6") bfcremove(bfc, "BFC1") # let's look at our BiocFileCache object now bfc ## ------------------------------------------------------------------------ # create a new entry that hasn't been used path <- bfcnew(bfc, "UseMe") rmMe <- names(path) # We also have a file not being tracked because we updated rpath bfcsync(bfc) # you can suppress the messages and just have a TRUE/FALSE bfcsync(bfc, FALSE) # # Let's do some cleaning to have a synced object # bfcremove(bfc, rmMe) unlink(fileBeingReplaced) bfcsync(bfc) ## ----eval=FALSE---------------------------------------------------------- # cleanbfc(bfc) ## ----eval=FALSE---------------------------------------------------------- # removebfc(bfc) ## ------------------------------------------------------------------------ ## paste to avoid long line in vignette url <- paste( "ftp://ftp.ensembl.org/pub/release-71/gtf", "homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz", sep="/") ## ---- eval=FALSE--------------------------------------------------------- # library(BiocFileCache) # bfc <- BiocFileCache() # path <- bfcrpath(bfc, url) ## ---- eval=FALSE--------------------------------------------------------- # gtf <- rtracklayer::import.gff(path) ## ---- eval=FALSE--------------------------------------------------------- # gtf <- rtracklayer::import.gff(bfcrpath(BiocFileCache(), url)) ## ---- eval=FALSE--------------------------------------------------------- # library(BiocFileCache) # bfc <- BiocFileCache("~/my-experiment/results") ## ---- eval=FALSE--------------------------------------------------------- # library(DESeq2) # library(airway) # data(airway) # dds <- DESeqDataData(airway, design = ~ cell + dex) # result <- DESeq(dds) ## ---- eval=FALSE--------------------------------------------------------- # saveRDS(result, bfcnew(bfc, "airway / DESeq standard analysis")) ## ---- eval=FALSE--------------------------------------------------------- # result <- readRDS(bfcrpath(bfc, "airway / DESeq standard analysis")) ## ----eval=FALSE---------------------------------------------------------- # library(BiocFileCache) # library(rtracklayer) # # # load the cache # path <- file.path(tempdir(), "tempCacheDir") # bfc <- BiocFileCache(path) # # # the web resource of interest # url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz" # # # check if url is being tracked # res <- bfcquery(bfc, url) # # if (bfccount(res) == 0L) { # # # if it is not in cache, add # ans <- bfcadd(bfc, rname="ensembl, homo sapien", fpath=url) # # } else { # # # if it is in cache, get path to load # rid = res %>% filter(fpath == url) %>% collect(Inf) %>% `[[`("rid") # ans <- bfcrpath(bfc, rid) # # # check to see if the resource needs to be updated # check <- bfcneedsupdate(bfc, rid) # # check can be NA if it cannot be determined, choose how to handle # if (is.na(check)) check <- TRUE # if (check){ # ans < - bfcdownload(bfc, rid) # } # } # # # # ans is the path of the file to load # ans # # # # we know because we search for the url that the file is a .gtf.gz, # # if we searched on other terms we can use 'bfcpath' to see the # # original fpath to know the appropriate load/read/import method # bfcpath(bfc, names(ans)) # # temp = GTFFile(ans) # info = import(temp) ## ----eval=TRUE----------------------------------------------------------- # # A simplier test to see if something is in the cache # and if not start tracking it is using `bfcrpath` # library(BiocFileCache) library(rtracklayer) # load the cache path <- file.path(tempdir(), "tempCacheDir") bfc <- BiocFileCache(path) # the web resources of interest url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz" url2 <- "ftp://ftp.ensembl.org/pub/release-71/gtf/rattus_norvegicus/Rattus_norvegicus.Rnor_5.0.71.gtf.gz" # if not in cache will download and create new entry pathsToLoad <- bfcrpath(bfc, c(url, url2)) pathsToLoad # now load files as see fit info = import(GTFFile(pathsToLoad[1])) class(info) summary(info) ## ----eval=FALSE---------------------------------------------------------- # # # # One could also imagine the following: # # # # library(BiocFileCache) # # # load the cache # bfc <- BiocFileCache() # # # # # Do some work! # # # # # add a location in the cache # filepath <- bfcnew(bfc, "R workspace") # # save(list = ls(), file=filepath) # # # now the R workspace is being tracked in the cache