## ----results='hide', message=FALSE--------------------------------------- suppressPackageStartupMessages(require(maftools)) #read TCGA maf file for LAML laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') laml = read.maf(maf = laml.maf, removeSilent = TRUE, useAll = FALSE) ## ------------------------------------------------------------------------ #Typing laml shows basic summary of MAF file. laml #Shows sample summry. getSampleSummary(laml) #Shows frequently mutated genes. getGeneSummary(laml) #Shows all fields in MAF getFields(laml) #Writes maf summary to an output file with basename laml. write.mafSummary(maf = laml, basename = 'laml') ## ----fig.height=6, fig.width=8------------------------------------------- plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE) ## ---- fig.align='left',fig.height=6,fig.width=10, eval=T, fig.align='left'---- #We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization) oncoplot(maf = laml, top = 10, removeNonMutated = TRUE) ## ----results='hide', message=FALSE--------------------------------------- #read TCGA maf file for LAML laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools") amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools") del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools") laml.plus.gistic = read.maf(maf = laml.maf, removeSilent = TRUE, useAll = FALSE, gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, isTCGA = TRUE) ## ---- fig.align='left',fig.height=6,fig.width=10, eval=T, fig.align='left'---- #We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization) oncoplot(maf = laml.plus.gistic, top = 10, removeNonMutated = TRUE, sortByMutation = TRUE) ## ---- fig.height=7,fig.width=10, eval=T, fig.align='left'---------------- #Read FAB classification of TCGA LAML barcodes. laml.fab.anno = system.file('extdata', 'tcga_laml_fab_annotation.txt', package = 'maftools') laml.fab.anno = read.delim(laml.fab.anno, sep = '\t') head(laml.fab.anno) #Changing colors (You can use any colors, here in this example we will use a color palette from RColorBrewer) col = RColorBrewer::brewer.pal(n = 8, name = 'Paired') names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins', 'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del') #We will plot same top ten mutated genes with FAB classification as annotation and using above defined colors. oncoplot(maf = laml, top = 10, annotation = laml.fab.anno, removeNonMutated = TRUE, colors = col) ## ---- fig.height=2,fig.width=8,fig.align='center'------------------------ oncostrip(maf = laml, genes = c('DNMT3A','NPM1', 'RUNX1'), removeNonMutated = TRUE, showTumorSampleBarcodes = FALSE) ## ---- fig.align='default', fig.height=6, fig.width=8, eval = T----------- laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE) #plot titv summary plotTiTv(res = laml.titv) ## ----fig.height=4.5,fig.width=8,fig.align='center'----------------------- #Lets plot lollipop plot for DNMT3A, which is one of the most frequent mutated gene in Leukemia. dnmt3a.lpop = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', showMutationRate = TRUE, domainLabelSize = 2.5) ## ----fig.height=4,fig.width=8,fig.align='center'------------------------- #Lets plot mutations on KIT gene, without repel option. kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', domainLabelSize = 3) #Same plot with repel=TRUE kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', repel = TRUE, domainLabelSize = 3) ## ---- warning=FALSE, message=FALSE, fig.height=4.5,fig.width=8,fig.align='center'---- laml.dnmt3a = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', refSeqID = 'NM_175629', labelPos = 882, collapsePosLabel = TRUE, cBioPortal = TRUE, domainLabelSize = 3) ## ---- fig.height=4,fig.width=8,fig.align='center'------------------------ tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools") plotCBSsegments(cbsFile = tcga.ab.009.seg, maf = laml, labelAll = TRUE) ## ---- results='hide', message=FALSE-------------------------------------- coad <- system.file("extdata", "coad.maf.gz", package = "maftools") coad = read.maf(maf = coad) ## ---- fig.height=5,fig.width=12,fig.align='center'----------------------- coad.rf = rainfallPlot(maf = coad, detectChangePoints = TRUE, fontSize = 12, pointSize = 0.6) ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- geneCloud(input = laml, minMut = 3) ## ------------------------------------------------------------------------ all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools") amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools") del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools") laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, isTCGA = TRUE) ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- gisticPlot(gistic = laml.gistic) ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- plotGisticResults(gistic = laml.gistic) ## ------------------------------------------------------------------------ #We will run mutExclusive on top 10 mutated genes. laml.mut.excl = mutExclusive(maf = laml, top = 10) head(laml.mut.excl) ## ---- fig.height=1.5,fig.width=8,fig.align='center'---------------------- oncostrip(maf = laml, genes = c('NPM1', 'RUNX1'), sort = TRUE, removeNonMutated = TRUE) ## ---- fig.align='default', fig.width=7,fig.height=5, message=F,results='hide', eval=T---- laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore') head(laml.sig) ## ---- fig.align='default', fig.width=7,fig.height=5, eval= T------------- plotOncodrive(res = laml.sig, fdrCutOff = 0.1, useFraction = TRUE) ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10) #Protein summary (Printing first 7 columns for display convenience) laml.pfam$proteinSummary[,1:7, with = FALSE] #Domain summary (Printing first 3 columns for display convenience) laml.pfam$domainSummary[,1:3, with = FALSE] ## ----results='hide', message=FALSE--------------------------------------- #Primary APL MAF primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools") primary.apl = read.maf(maf = primary.apl) #Relapse APL MAF relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools") relapse.apl = read.maf(maf = relapse.apl) ## ------------------------------------------------------------------------ #We will consider only genes which are mutated in at-least in 5 samples in one of the cohort, to avoid bias due to single mutated genes. pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', minMut = 5) print(pt.vs.rt) ## ---- fig.width=6, fig.height=5, fig.align='center'---------------------- apl.pt.vs.rt.fp = forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.05, show = 'stat', color = c('royalblue', 'maroon')) ## ---- fig.height=3,fig.width=11, eval=T, fig.align='left'---------------- genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3") coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE) ## ---- echo = TRUE, fig.align='center', fig.height=5, fig.width=7, eval=T---- #We will run this for sample TCGA.AB.2972 tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA.AB.2972', vafCol = 'i_TumorVAF_WU') print(tcga.ab.2972.het$clusterMeans) #Visualizing results plotClusters(clusters = tcga.ab.2972.het) ## ---- fig.align='center', fig.height=5, fig.width=7, eval=T-------------- seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools') tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA.AB.3009', segFile = seg, vafCol = 'i_TumorVAF_WU') #Visualizing results. Highlighting those variants on copynumber altered variants. plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE) ## ---- results='hide', message=F, fig.align='center',fig.width=7, fig.height=6, eval = T---- #we will specify for random 4 patients. laml.math = math.score(maf = laml, vafCol = 'i_TumorVAF_WU', sampleName = c('TCGA.AB.3009', 'TCGA.AB.2849', 'TCGA.AB.3002', 'TCGA.AB.2972')) ## ---- eval=T------------------------------------------------------------- print(laml.math) ## ---- eval=F------------------------------------------------------------- ## #First we extract adjacent bases to the mutated locus and clssify them into 96 substitution classes. ## laml.tnm = trinucleotideMatrix(maf = laml, ref_genome = '/path/to/hg19.fa', ## prefix = 'chr', add = TRUE, ignoreChr = 'chr23', useSyn = TRUE) ## ---- fig.height=5, fig.width=5, eval=F, message=FALSE------------------- ## #Run main function with maximum 6 signatures. ## require('NMF') ## laml.sign = extractSignatures(mat = laml.tnm, nTry = 6, plotBestFitRes = FALSE) ## # Warning : Found zero mutations for conversions A[T>G]C ## # Comparing against experimentally validated 30 signatures.. (See http://cancer.sanger.ac.uk/cosmic/signatures for details.) ## # Found Signature_1 most similar to validated Signature_1. CoSine-Similarity: 0.778739082321156 ## # Found Signature_2 most similar to validated Signature_1. CoSine-Similarity: 0.782748375695661 ## ---- echo=F------------------------------------------------------------- laml.sign = structure(list(signatures = structure(c(1.08748973712201e-18, 0.00839574301030403, 1.08748973712201e-18, 1.08748973712201e-18, 0.00240385240062567, 0.0077139799108362, 1.08748973712201e-18, 1.08748973712201e-18, 0.0162307404236774, 1.08748973712201e-18, 1.08748973712201e-18, 0.00375682293486133, 0.0149882188409168, 0.0166193873319139, 1.08748973712201e-18, 0.0340641337293563, 0.0177133495392653, 1.08748973712201e-18, 1.08748973712201e-18, 0.010900522793394, 1.08748973712201e-18, 1.08748973712201e-18, 0.0204384802376138, 1.08748973712201e-18, 0.016350784190091, 0.00817539209504552, 0.00817539209504552, 0.00510268819463668, 4.64672034428952e-06, 1.08748973712201e-18, 0.00226222985404151, 0.0136256534917425, 0.0305962431704599, 1.08748973712201e-18, 0.0644710774331736, 0.00640610796992356, 0.0940170090930234, 0.0580805923991668, 0.0849820094561238, 1.08748973712201e-18, 0.0397128561572591, 0.0074963059243485, 0.0626642796173211, 0.0351236523951451, 0.0201728180626018, 0.014196017550947, 0.0643882909888946, 1.08748973712201e-18, 1.08748973712201e-18, 0.00455330562748865, 0.00408769604752276, 1.08748973712201e-18, 0.00272513069834851, 0.00650167581744194, 0.016350784190091, 1.08748973712201e-18, 1.08748973712201e-18, 0.0122630881425683, 1.08748973712201e-18, 0.00334016875229922, 0.00136256534917425, 1.08748973712201e-18, 0.00545026139669701, 0.00545026139669701, 4.42324620317284e-15, 1.08748973712201e-18, 0.00737413388604955, 0.0269789492628818, 0.00988463958213221, 0.0100520699314415, 1.08748973712201e-18, 0.00316631020385699, 1.08748973712201e-18, 0.010900522793394, 1.08748973712201e-18, 0.010900522793394, 0.00272513069834851, 1.08748973712201e-18, 0.00394072268462079, 0.00852966039575841, 7.59566702198884e-17, 0.0115340943434354, 1.08748973712201e-18, 0.00237636221356833, 1.08748973712201e-18, 1.08748973712201e-18, 0.0136256534917425, 0.00181567829546557, 0.00408769604752276, 1.08748973712201e-18, 1.08748973712201e-18, 0.00545026139669701, 0.00408769604752276, 0.00499540259887787, 0.00272513069834851, 0.00353514720450814, 0.0134931114940269, 0.0108267734159331, 0.00758987521539011, 0.00674655574701343, 0.0094753598393619, 0.0070321331529894, 0.0109631530888968, 0.0109631530888968, 0.00344757540744538, 0.00674655574701343, 0.00505991681026007, 0.0128545761394095, 9.04499029878464e-19, 0.00742363125585919, 0.0101198336205201, 9.04499029878464e-19, 9.04499029878464e-19, 0.00505991681026007, 0.00505991681026007, 9.04499029878464e-19, 0.0118064725572735, 0.00421659734188339, 9.04499029878464e-19, 0.00758987521539011, 9.04499029878464e-19, 9.04499029878464e-19, 9.04499029878464e-19, 0.00190175907624695, 0.00421372139194615, 0.00590323627863675, 0.00703305451506787, 9.04499029878464e-19, 0.0131095012433463, 0.0480692096974707, 0.0857521368222207, 0.0171181159082965, 9.04499029878464e-19, 0.0526012816612792, 0.103417003771369, 0.108788211420591, 0.00746704363785783, 0.0358397179449789, 0.051450983147391, 0.0119940343267555, 0.020404090976265, 0.0173566988936855, 0.0225544149118068, 0.0312028203299371, 0.00337327787350671, 0.00645838048724634, 9.04499029878464e-19, 0.00252995840513004, 9.04499029878464e-19, 0.00187921658868539, 9.04499029878464e-19, 0.00590323627863675, 0.00337327787350671, 9.04499029878464e-19, 0.00421659734188339, 0.002149298816947, 9.04499029878464e-19, 0.00168663893675336, 9.04499029878464e-19, 9.04499029878464e-19, 0.0227696256461676, 0.0118064725572735, 0.0123023875952219, 0.00691512349188281, 0.00147207029502386, 0.00642836104928948, 0.0160230698991569, 0.0132200565053105, 0.00758987521539011, 9.04499029878464e-19, 0.0109631530888968, 9.04499029878464e-19, 9.04499029878464e-19, 0.00337327787350671, 0.00430756215200655, 0.0115872086847547, 0.00337327787350667, 0.0086313879649405, 0.00168663893675336, 0.00105917939270956, 0.000843319468376679, 0.00758987521539011, 9.04499029878464e-19, 0.00309283703504524, 9.04499029878464e-19, 0.00252995840513004, 0.00421659734188339, 9.04499029878464e-19, 9.04499029878464e-19, 0.00112484084993519, 9.04499029878464e-19, 0.00287194214691947), .Dim = c(96L, 2L), .Dimnames = list(c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "A[C>G]A", "A[C>G]C", "A[C>G]G", "A[C>G]T", "C[C>G]A", "C[C>G]C", "C[C>G]G", "C[C>G]T", "G[C>G]A", "G[C>G]C", "G[C>G]G", "G[C>G]T", "T[C>G]A", "T[C>G]C", "T[C>G]G", "T[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T", "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "A[T>C]A", "A[T>C]C", "A[T>C]G", "A[T>C]T", "C[T>C]A", "C[T>C]C", "C[T>C]G", "C[T>C]T", "G[T>C]A", "G[T>C]C", "G[T>C]G", "G[T>C]T", "T[T>C]A", "T[T>C]C", "T[T>C]G", "T[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T"), c("Signature_1", "Signature_2"))), coSineSimMat = structure(c(0.76789895507522, 0.757733629596811, 0.171803248681684, 0.199391522195904, 0.407671912943102, 0.372979035914154, 0.344078922420868, 0.319857408370786, 0.573357292983596, 0.562412460243176, 0.685700701802704, 0.686217358302521, 0.377725890462418, 0.386689478887272, 0.382312659188403, 0.407516946456442, 0.339149804914427, 0.305305965796845, 0.386629499233586, 0.15685755480318, 0.350678506033931, 0.562433289508901, 0.268840367435164, 0.322933955777266, 0.108666524311962, 0.0628339785033974, 0.49126593617209, 0.527932757462746, 0.47172512923794, 0.461711639647726, 0.362590079921887, 0.387794528034913, 0.154909499589746, 0.154613800740969, 0.303423806321064, 0.204479833568232, 0.570031076792535, 0.740784602225925, 0.445644443725404, 0.510768207280784, 0.248807908838572, 0.28784224944225, 0.140154287925718, 0.0725523826114571, 0.407829024906403, 0.610157444568381, 0.256945337078229, 0.227615984891259, 0.43572741633734, 0.391864627867027, 0.296855754287958, 0.345602091793204, 0.105681572106723, 0.0918011629446175, 0.0955192240249301, 0.0892005087879189, 0.35734783741945, 0.351111836488432, 0.570462074592721, 0.532907409369077), .Dim = c(2L, 30L), .Dimnames = list(c("Signature_1", "Signature_2"), c("Signature_1", "Signature_2", "Signature_3", "Signature_4", "Signature_5", "Signature_6", "Signature_7", "Signature_8", "Signature_9", "Signature_10", "Signature_11", "Signature_12", "Signature_13", "Signature_14", "Signature_15", "Signature_16", "Signature_17", "Signature_18", "Signature_19", "Signature_20", "Signature_21", "Signature_22", "Signature_23", "Signature_24", "Signature_25", "Signature_26", "Signature_27", "Signature_28", "Signature_29", "Signature_30" )))), .Names = c("signatures", "coSineSimMat")) ## ---- fig.width=7, fig.height=5, fig.align='center', eval = T------------ plotSignatures(laml.sign) ## ------------------------------------------------------------------------ require('corrplot') corrplot::corrplot(corr = laml.sign$coSineSimMat, col = RColorBrewer::brewer.pal(n = 9, name = 'Oranges'), is.corr = FALSE, tl.cex = 0.6, tl.col = 'black', cl.cex = 0.6) ## ------------------------------------------------------------------------ var.file = system.file('extdata', 'variants.tsv', package = 'maftools') #This is what input looks like var = read.delim(var.file, sep = '\t') head(var) ## ---- results='hide', eval=F, message=F---------------------------------- ## #Annotate ## var.maf = oncotate(maflite = var.file, header = TRUE) ## ---- eval = F----------------------------------------------------------- ## #Results from oncotate. First 20 columns. ## var.maf[1:10, 1:20, with = FALSE] ## ---- eval = F, engine="bash"-------------------------------------------- ## $perl table_annovar.pl variants.tsv ~/path/to/humandb/ -buildver hg19 ## -out variants --otherinfo -remove -protocol ensGene -operation g -nastring NA ## ---- eval=T------------------------------------------------------------- var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools") var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene') ## ------------------------------------------------------------------------ #Read sample ICGC data for ESCA esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools") esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE) #Printing first 16 columns for display convenience. print(esca.maf[1:5,1:16, with = FALSE]) ## ------------------------------------------------------------------------ ##Extract data for samples 'TCGA.AB.3009' and 'TCGA.AB.2933' (Printing just 5 rows for display convenience) subsetMaf(maf = laml, tsb = c('TCGA.AB.3009', 'TCGA.AB.2933'))[1:5] ##Same as above but return output as an MAF object subsetMaf(maf = laml, tsb = c('TCGA.AB.3009', 'TCGA.AB.2933'), mafObj = TRUE) ## ------------------------------------------------------------------------ ##Select all Splice_Site mutations from DNMT3A and NPM1 subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'") ##Same as above but include only 'i_transcript_name' column in the output subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name') ## ------------------------------------------------------------------------ sessionInfo()