## ----eval = FALSE------------------------------------------------------------- # if (!requireNamespace("devtools", quietly = TRUE)) # install.packages("devtools") # devtools::install_github("wejlab/MetaScope") ## ----eval = FALSE------------------------------------------------------------- # suppressPackageStartupMessages({ # library(MetaScope) # library(dplyr) # }) ## ----eval = FALSE------------------------------------------------------------- # NCBI_key <- "" # options("ENTREZ_KEY" = NCBI_key) ## ----eval = FALSE, message = FALSE-------------------------------------------- # # Get barcode, index, and read data locations # barcodePath <- # system.file("extdata", "barcodes.txt", package = "MetaScope") # # indexPath <- system.file("extdata", "virus_example_index.fastq", # package = "MetaScope") # readPath <- # system.file("extdata", "virus_example.fastq", package = "MetaScope") # # # Get barcode, index, and read data locations # demult <- # meta_demultiplex(barcodePath, # indexPath, # readPath, # rcBarcodes = FALSE, # hammingDist = 2, # location = tempfile()) # demult ## ----eval = FALSE------------------------------------------------------------- # # For RefSeq data, set reference = TRUE and representative = FALSE # download_refseq('bacteria', reference = TRUE, representative = FALSE, # out_dir = NULL, compress = TRUE, patho_out = FALSE, # caching = TRUE) # # # If the RefSeq assortment is too limited, set representative = TRUE # download_refseq('bacteria', reference = TRUE, representative = FALSE, # out_dir = NULL, compress = TRUE, patho_out = FALSE, # caching = TRUE) ## ----eval = FALSE------------------------------------------------------------- # # for (taxa in c('bacteria', 'viruses', 'fungi')) { # download_refseq(taxa, reference = TRUE, representative = FALSE, # out_dir = NULL, compress = TRUE, patho_out = FALSE, # caching = TRUE) # } ## ----eval = FALSE------------------------------------------------------------- # download_refseq("Homo sapiens", reference = TRUE, representative = FALSE, # out_dir = NULL, compress = TRUE, patho_out = FALSE, # caching = TRUE) ## ----eval = FALSE------------------------------------------------------------- # head(MetaScope:::taxonomy_table) ## ----eval = FALSE------------------------------------------------------------- # all_staph_strains <- MetaScope:::taxonomy_table |> # dplyr::filter(genus == "Staphylococcus") |> # dplyr::pull(strain) # # # Download representative genomes # sapply(all_staph_strains, # function(strain) download_refseq(strain, reference = TRUE, representative = TRUE, # out_dir = NULL, compress = TRUE, patho_out = FALSE, # caching = TRUE)) ## ----eval = FALSE------------------------------------------------------------- # nThreads <- 1 # # # Example - target genome fungi # mk_bowtie_index(ref_dir = "fungi", lib_dir = "all_indices", # lib_name = "fungi", threads = nThreads, overwrite = TRUE) ## ----eval = FALSE------------------------------------------------------------- # # If unpaired, see documentation # readPath1 <- "reads1.paired.fastq.gz" # readPath2 <- "reads2.paired.fastq.gz" # # # Change to your target library names # targets <- c("fungi", "bacteria", "viruses") # # alignment_directory <- "" # # threads <- 8 # # # Usually expTag is a dynamically changing sample name # expTag <- "sampleX" # # # Location of alignment indices # all_indices <- "" ## ----eval = FALSE------------------------------------------------------------- # data("bt2_params") # # Use 16S params instead if you have 16S data # data("bt2_16S_params") # # target_map <- align_target_bowtie(read1 = readPath1, # read2 = readPath2, # # Where are indices stored? # lib_dir = all_indices, # libs = targets, # align_dir = alignment_directory, # align_file = expTag, # overwrite = TRUE, # threads = threads, # # Use 16S params if you have 16S data # bowtie2_options = bt2_params, # quiet = FALSE) ## ----eval = FALSE------------------------------------------------------------- # data("align_details") # # target_map <- align_target(read1 = readPath1, # read2 = readPath2, # lib_dir = all_indices, # libs = targets, # threads = threads, # lib_dir = target_ref_temp, # align_file = # file.path(alignment_directory, expTag), # subread_options = align_details, # quiet = FALSE) ## ----eval = FALSE------------------------------------------------------------- # # Change to your filter library name(s) # filters <- c("homo_sapiens") # # threads <- 8 # # output <- paste(paste0(alignment_directory, expTag), "filtered", sep = ".") ## ----eval = FALSE------------------------------------------------------------- # data("bt2_params") # # # target_map is the bam file output by the MetaAlign module # final_map <- filter_host_bowtie(reads_bam = target_map, # lib_dir = all_indices, # libs = filters, # make_bam = FALSE, # output = output, # threads = threads, # bowtie2_options = bt2_params, # overwrite = TRUE, # quiet = FALSE, # bowtie2_options = bt2_params) ## ----eval = FALSE------------------------------------------------------------- # data("align_details") # # final_map <- filter_host( # reads_bam = target_map, # lib_dir = all_indices, # make_bam = FALSE, # libs = filters, # output = output, # threads = threads, # subread_options = align_details) ## ----eval = FALSE------------------------------------------------------------- # your_ENTREZ_KEY <- "" # outDir <- "" # # Sys.setenv(ENTREZ_KEY = your_ENTREZ_KEY) ## ----eval = FALSE------------------------------------------------------------- # # NOTE: If 16S sample, use MetaAlign output # input_file <- target_map # # metascope_id(input_file, # input_type = "bam", # aligner = "bowtie2", # NCBI_key = your_ENTREZ_KEY, # num_species_plot = 15, # quiet = FALSE, # out_dir = outDir) ## ----eval = FALSE------------------------------------------------------------- # # NOTE: If Metagenomics sample, use MetaFilter output # input_file <- final_map # # metascope_id(input_file, # input_type = "csv.gz", # aligner = "bowtie2", # NCBI_key = your_ENTREZ_KEY, # num_species_plot = 15, # quiet = FALSE, # out_dir = outDir) ## ----eval = FALSE------------------------------------------------------------- # # NOTE: If 16S sample, use MetaAlign output # input_file <- target_map # # metascope_id(input_file, # input_type = "bam", # aligner = "subread", # NCBI_key = your_ENTREZ_KEY, # num_species_plot = 15, # quiet = FALSE, # out_dir = outDir) ## ----eval = FALSE------------------------------------------------------------- # # NOTE: If Metagenomics sample, use MetaFilter output # input_file <- final_map # # metascope_id(input_file, # input_type = "csv.gz", # aligner = "subread", # NCBI_key = your_ENTREZ_KEY, # num_species_plot = 15, # quiet = FALSE, # out_dir = outDir) ## ----eval = FALSE------------------------------------------------------------- # tempfolder <- tempfile() # dir.create(tempfolder) # # # Create three different samples # samp_names <- c("X123", "X456", "X789") # all_files <- file.path(tempfolder, # paste0(samp_names, ".csv")) # # create_IDcsv <- function (out_file) { # final_taxids <- c("273036", "418127", "11234") # final_genomes <- c( # "Staphylococcus aureus RF122, complete sequence", # "Staphylococcus aureus subsp. aureus Mu3, complete sequence", # "Measles virus, complete genome") # best_hit <- sample(seq(100, 1050), 3) # proportion <- best_hit/sum(best_hit) |> round(2) # EMreads <- best_hit + round(runif(3), 1) # EMprop <- proportion + 0.003 # dplyr::tibble(TaxonomyID = final_taxids, # Genome = final_genomes, # read_count = best_hit, Proportion = proportion, # EMreads = EMreads, EMProportion = EMprop) |> # dplyr::arrange(dplyr::desc(.data$read_count)) |> # utils::write.csv(file = out_file, row.names = FALSE) # message("Done!") # return(out_file) # } # out_files <- vapply(all_files, create_IDcsv, FUN.VALUE = character(1)) # ## ----eval = FALSE------------------------------------------------------------- # annot_dat <- file.path(tempfolder, "annot.csv") # dplyr::tibble(Sample = samp_names, RSV = c("pos", "neg", "pos"), # month = c("March", "July", "Aug"), # yrsold = c(0.5, 0.6, 0.2)) |> # utils::write.csv(file = annot_dat, # row.names = FALSE) ## ----eval = FALSE------------------------------------------------------------- # outMAE <- convert_animalcules(meta_counts = out_files, # annot_path = annot_dat, # which_annot_col = "Sample", # end_string = ".metascope_id.csv", # qiime_biom_out = FALSE, # NCBI_key = NULL) # # unlink(tempfolder, recursive = TRUE) ## ----session info------------------------------------------------------------- sessionInfo()