## ---- optChunkDefault, include=FALSE------------------------------------- library(knitr) opts_chunk$set( # Set these first, to make them default message = FALSE, warning=FALSE, error=FALSE ) optChunkDefault <- opts_chunk$get() ## ----biocLite, eval=FALSE------------------------------------------------ # # Currently: # devtools::install_github("kevinrue/TVTB") # # When hosted on Bioconductor: # source("http://bioconductor.org/biocLite.R") # biocLite("TVTB") ## ----library------------------------------------------------------------- library(TVTB) ## ----TVTBparamCreate----------------------------------------------------- tparam <- TVTBparam(Genotypes( ref = "0|0", het = c("0|1", "1|0", "0|2", "2|0", "1|2", "2|1"), alt = c("1|1", "2|2")), ranges = GenomicRanges::GRangesList( SLC24A5 = GenomicRanges::GRanges( seqnames = "15", IRanges::IRanges( start = 48413170, end = 48434757) ) ) ) ## ----TVTBparamView------------------------------------------------------- tparam ## ---- reduceGRangesList-------------------------------------------------- svp <- as(tparam, "ScanVcfParam") svp ## ----importPhenotypes---------------------------------------------------- phenoFile <- system.file( "extdata", "integrated_samples.txt", package = "TVTB") phenotypes <- S4Vectors::DataFrame( read.table(file = phenoFile, header = TRUE, row.names = 1)) ## ----vcfFile------------------------------------------------------------- vcfFile <- system.file( "extdata", "chr15.phase3_integrated.vcf.gz", package = "TVTB") tabixVcf <- Rsamtools::TabixFile(file = vcfFile) ## ----ScanVcfParam-------------------------------------------------------- VariantAnnotation::vcfInfo(svp(tparam)) <- vep(tparam) VariantAnnotation::vcfGeno(svp(tparam)) <- "GT" svp(tparam) ## ----preprocessVariants, message=FALSE----------------------------------- # Import variants as a CollapsedVCF object vcf <- VariantAnnotation::readVcf( tabixVcf, param = tparam, colData = phenotypes) # Expand into a ExpandedVCF object (bi-allelic records) vcf <- VariantAnnotation::expand(x = vcf, row.names = TRUE) ## ----vcf, echo=FALSE----------------------------------------------------- vcf ## ----addOverallFrequencies----------------------------------------------- initialInfo <- colnames(info(vcf)) vcf <- addOverallFrequencies(vcf = vcf) setdiff(colnames(info(vcf)), initialInfo) ## ----addFrequenciesOverall----------------------------------------------- vcf <- addFrequencies(vcf = vcf, force = TRUE) ## ----addPhenoLevelFrequencies-------------------------------------------- initialInfo <- colnames(info(vcf)) vcf <- addPhenoLevelFrequencies( vcf = vcf, pheno = "super_pop", level = "AFR") setdiff(colnames(info(vcf)), initialInfo) ## ----addFrequenciesPhenoLevel-------------------------------------------- initialInfo <- colnames(info(vcf)) vcf <- addFrequencies( vcf = vcf, phenos = list( super_pop = c("EUR", "AFR"), pop = c("GBR", "FIN", "MSL")), force = TRUE) setdiff(colnames(info(vcf)), initialInfo) ## ----FilterRules--------------------------------------------------------- fr <- S4Vectors::FilterRules(list( mixed = function(x){ VariantAnnotation::fixed(x)[,"FILTER"] == "PASS" & VariantAnnotation::info(x)[,"MAF"] >= 0.05 } )) fr ## ----VcfFixedRules------------------------------------------------------- fixedR <- VcfFixedRules(list( pass = expression(FILTER == "PASS"), qual = expression(QUAL > 20) )) fixedR ## ----VcfInfoRules-------------------------------------------------------- infoR <- VcfInfoRules(exprs = list( common = expression(MAF > 0.1), # minor allele frequency present = expression(ALT + HET > 0) # count of non-REF homozygotes ), active = c(TRUE, FALSE)) infoR ## ----VcfVepRules--------------------------------------------------------- vepR <- VcfVepRules(exprs = list( missense = expression(Consequence %in% c("missense_variant")), CADD = expression(CADD_PHRED > 15) )) vepR ## ----VcfFilterRules------------------------------------------------------ vcfRules <- VcfFilterRules(fixedR, infoR, vepR) vcfRules ## ----deactivateCADD------------------------------------------------------ active(vcfRules)["CADD"] <- FALSE active(vcfRules) ## ------------------------------------------------------------------------ summary(eval(expr = infoR, envir = vcf)) summary(eval(expr = vcfRules, envir = vcf)) summary(evalSeparately(expr = vcfRules, envir = vcf)) ## ----sessionInfo, echo=FALSE--------------------------------------------- sessionInfo()