## ----echo=FALSE, results="hide"----------------------------------------------- knitr::opts_chunk$set(error=FALSE, message=FALSE) ## ----echo=FALSE, results="hide"----------------------------------------------- library(BiocStyle) ## ----------------------------------------------------------------------------- # Our pool of known variable sequences, one per barcode: known <- c("AAAAAAAA", "CCCCCCCC", "GGGGGGGG", "TTTTTTTT") # Mocking up some sequence data, where each read randomly contains # one of the variable sequences, flanked by constant regions. library(Biostrings) chosen <- sample(known, 1000, replace=TRUE) reads <- sprintf("GTAC%sCATG", chosen) names(reads) <- sprintf("READ_%i", seq_along(reads)) # Writing to a FASTQ file. single.fq <- tempfile(fileext=".fastq") writeXStringSet(DNAStringSet(reads), file=single.fq, format="fastq") ## ----------------------------------------------------------------------------- library(screenCounter) out <- matrixOfSingleBarcodes(single.fq, flank5="GTAC", flank3="CATG", choices=known) out assay(out) ## ----------------------------------------------------------------------------- # Our pool of known variable sequences: known1 <- c("AAAA", "CCCC", "GGGG", "TTTT") known2 <- c("ATTA", "CGGC", "GCCG", "TAAT") # Mocking up some sequence data, where each read randomly contains # two of the variable sequences within a template structure. library(Biostrings) chosen1 <- sample(known1, 1000, replace=TRUE) chosen2 <- sample(known2, 1000, replace=TRUE) reads <- sprintf("GTAC%sCATG%sGTAC", chosen1, chosen2) names(reads) <- sprintf("READ_%i", seq_along(reads)) # Writing to a FASTQ file. combo.fq <- tempfile(fileext=".fastq") writeXStringSet(DNAStringSet(reads), file=combo.fq, format="fastq") ## ----------------------------------------------------------------------------- out <- matrixOfComboBarcodes(combo.fq, template="GTACNNNNCATGNNNNGTAC", choices=list(first=known1, second=known2)) out ## ----------------------------------------------------------------------------- assay(out) ## ----------------------------------------------------------------------------- rowData(out) ## ----------------------------------------------------------------------------- # Creating an example dual barcode sequencing experiment. known.pool1 <- c("AGAGAGAGA", "CTCTCTCTC", "GTGTGTGTG", "CACACACAC") known.pool2 <- c("ATATATATA", "CGCGCGCGC", "GAGAGAGAG", "CTCTCTCTC") # Mocking up the barcode sequences. N <- 1000 read1 <- sprintf("CAGCTACGTACG%sCCAGCTCGATCG", sample(known.pool1, N, replace=TRUE)) names(read1) <- seq_len(N) read2 <- sprintf("TGGGCAGCGACA%sACACGAGGGTAT", sample(known.pool2, N, replace=TRUE)) names(read2) <- seq_len(N) # Writing them to FASTQ files. tmp <- tempfile() tmp1 <- paste0(tmp, "_1.fastq") writeXStringSet(DNAStringSet(read1), filepath=tmp1, format="fastq") tmp2 <- paste0(tmp, "_2.fastq") writeXStringSet(DNAStringSet(read2), filepath=tmp2, format="fastq") ## ----------------------------------------------------------------------------- choices <- expand.grid(known.pool1, known.pool2) choices <- DataFrame(barcode1=choices[,1], barcode2=choices[,2]) choices <- choices[sample(nrow(choices), nrow(choices)*0.9),] ## ----------------------------------------------------------------------------- out <- matrixOfDualBarcodes(list(c(tmp1, tmp2)), choices=choices, template=c("CAGCTACGTACGNNNNNNNNNCCAGCTCGATCG", "TGGGCAGCGACANNNNNNNNNACACGAGGGTAT")) out ## ----------------------------------------------------------------------------- assay(out) ## ----------------------------------------------------------------------------- colData(out) ## ----------------------------------------------------------------------------- rowData(out) ## ----------------------------------------------------------------------------- # Creating an example dual barcode sequencing experiment. known.pool1 <- c("AGAGAGAGA", "CTCTCTCTC", "GTGTGTGTG", "CACACACAC") known.pool2 <- c("ATATATATA", "CGCGCGCGC", "GAGAGAGAG", "CTCTCTCTC") # Mocking up the barcode sequences. N <- 1000 read <- sprintf("CAGCTACGTACG%sCCAGCTCGATCG%sACACGAGGGTAT", sample(known.pool1, N, replace=TRUE), sample(known.pool2, N, replace=TRUE)) names(read) <- seq_len(N) # Writing them to FASTQ files. tmp <- tempfile(fileext=".fastq") writeXStringSet(DNAStringSet(read), filepath=tmp, format="fastq") ## ----------------------------------------------------------------------------- choices <- expand.grid(known.pool1, known.pool2) choices <- DataFrame(barcode1=choices[,1], barcode2=choices[,2]) choices <- choices[sample(nrow(choices), nrow(choices)*0.9),] ## ----------------------------------------------------------------------------- out <- matrixOfDualBarcodesSingleEnd(tmp, choices=choices, template="CAGCTACGTACGNNNNNNNNNCCAGCTCGATCGNNNNNNNNNACACGAGGGTAT") out ## ----------------------------------------------------------------------------- assay(out) ## ----------------------------------------------------------------------------- colData(out) ## ----------------------------------------------------------------------------- rowData(out) ## ----------------------------------------------------------------------------- # Mocking up a 8-bp random variable region. N <- 1000 randomized <- lapply(1:N, function(i) { paste(sample(c("A", "C", "G", "T"), 8, replace=TRUE), collapse="") }) barcodes <- sprintf("CCCAGT%sGGGATAC", randomized) names(barcodes) <- sprintf("READ_%i", seq_along(barcodes)) # Writing to a FASTQ file. single.fq <- tempfile(fileext=".fastq") writeXStringSet(DNAStringSet(barcodes), file=single.fq, format="fastq") ## ----------------------------------------------------------------------------- library(screenCounter) out <- matrixOfRandomBarcodes(single.fq, template="CCCAGTNNNNNNNNGGGATAC") out head(assay(out)) ## ----------------------------------------------------------------------------- # Pretend that these are different samples: all.files <- c(single.fq, single.fq, single.fq) # Parallel execution: library(BiocParallel) out <- matrixOfSingleBarcodes(all.files, flank5="GTAC", flank3="CATG", choices=known, BPPARAM=SnowParam(2)) out ## ----------------------------------------------------------------------------- sessionInfo()