## ----setup, include = FALSE-------------------------- knitr::opts_chunk$set( cache = TRUE, collapse = TRUE, echo = TRUE, message = FALSE, warning = FALSE, fig.align = 'center', dev.args=list(fix_text_size=FALSE ), comment = "#>") options( htmltools.dir.version = FALSE, formatR.indent = 2, width = 55, digits = 4, warnPartialMatchAttr = FALSE, warnPartialMatchDollar = FALSE, tinytex.verbose=TRUE) require(data.table) require(MPAC) ## ----WorkflowJPG, fig.cap='MPAC workflow', out.width='800px', echo=FALSE---- knitr::include_graphics('workflow.jpg') ## ----installFromGitHub, eval=FALSE------------------- # devtools::install_github('pliu55/MPAC') ## ----installFromBioconductor, eval=FALSE------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # The following initializes usage of Bioc devel # BiocManager::install(version='devel') # # BiocManager::install("MPAC") ## ----requiredPkg------------------------------------- require(SummarizedExperiment) require(MPAC) ## ----ppCnInp----------------------------------------- # a matrix of CN focal data with rows as genes and columns as samples cn_tumor_mat <- system.file('extdata/TcgaInp/focal_tumor.rds', package='MPAC') |> readRDS() # to return a SummarizedExperiment object of CNA state for running PARADIGM # activated, normal, or repressed state is represented by 1, 0, or -1 ppCnInp(cn_tumor_mat) ## ----ppRnaInp---------------------------------------- # a matrix of RNA-seq data with rows as genes and columns as tumor samples rna_tumor_mat <- system.file('extdata/TcgaInp/log10fpkmP1_tumor.rds', package='MPAC') |> readRDS() # a matrix of RNA-seq data with rows as genes and columns as normal samples rna_norm_mat <- system.file('extdata/TcgaInp/log10fpkmP1_normal.rds', package='MPAC') |> readRDS() # to return a SummarizedExperiment object of RNA state for running PARADIGM # activated, normal, or repressed state is represented by 1, 0, or -1 ppRnaInp(rna_tumor_mat, rna_norm_mat, threads=2) ## ----ppRealInp--------------------------------------- # to return a SummarizedExperiment object of CNA and RNA state real_se <- ppRealInp(cn_tumor_mat, rna_tumor_mat, rna_norm_mat, threads=2) # CNA state is in assays(real_se)$CN_state # RNA state is in assays(real_se)$RNA_state real_se ## ----ppPermInp--------------------------------------- # to return a list of list perml <- ppPermInp(real_se, n_perms=3) # three objects under the first level length(perml) # permutation index metadata(perml[[1]])$i # permuted CNA state matrix, same as the one from `ppCnInp()` assays(perml[[1]])$CN_state |> _[1:4, 1:3] # permuted RNA state matrix, same as the one from `ppRnaInp()` assays(perml[[1]])$RNA_state |> _[1:4, 1:3] ## ----echo=FALSE-------------------------------------- prd_paper_url='https://doi.org/10.1093/bioinformatics/btq182' prd_exe_url = paste0('https://github.com/sng87/paradigm-scripts/tree/', 'master/public/exe/') prd_linux_url= paste0(prd_exe_url, 'LINUX') prd_macos_url= paste0(prd_exe_url, 'MACOSX') ## ----runRealPrd-------------------------------------- # CNA and RNA state from `ppRealInp()` real_se <- system.file('extdata/TcgaInp/inp_real.rds', package='MPAC') |> readRDS() # Pathway file fpth <- system.file('extdata/Pth/tiny_pth.txt', package='MPAC') # folder to save all the output files outdir <- tempdir() # PARADIGM binary location. Replace the one below with a true location. paradigm_bin <- '/path/to/PARADIGM' ### code below depends on external PARADIGM binary runPrd(real_se, fpth, outdir, paradigm_bin, sampleids=c('TCGA-CV-7100')) ## ----runPermPrd-------------------------------------- # a list of list from `ppPermInp()` permll <- system.file('extdata/TcgaInp/inp_perm.rds', package='MPAC') |> readRDS() # Pathway file fpth <- system.file('extdata/Pth/tiny_pth.txt', package='MPAC') # folder to save all the output files outdir <- tempdir() # PARADIGM binary location. Replace the one below with a true location. paradigm_bin <- '/path/to/PARADIGM' # (optional) sample IDs to run PARADIGM on pat <- 'TCGA-CV-7100' ### code below depends on external PARADIGM binary runPermPrd(permll, fpth, outdir, paradigm_bin, sampleids=c(pat)) ## ----colRealIPL-------------------------------------- # the folder saving PARADIGM result on real data # it should be the `outdir` folder from `runPrd()` indir <- system.file('/extdata/runPrd/', package='MPAC') # to return a data.table with columns as entities and IPLs for each sample colRealIPL(indir) |> head() ## ----colPermIPL-------------------------------------- # the folder saving PARADIGM result on permuted data # it should be the `outdir` folder from `runPermPrd()` indir <- system.file('/extdata/runPrd/', package='MPAC') # number of permutated dataset results to collect n_perms <- 3 # return a data.table with columns as entities, permutation index, and IPLs for # each sample colPermIPL(indir, n_perms) |> head() ## ----fltByPerm--------------------------------------- # collected real IPLs. It is the output from `colRealIPL()` realdt <- system.file('extdata/fltByPerm/real.rds', package='MPAC') |> readRDS() # collected permutation IPLs. It is the output from `colPermIPL()` permdt <- system.file('extdata/fltByPerm/perm.rds', package='MPAC') |> readRDS() # to return a matrix of filtered IPLs with rows as pathway entities and columns # as samples. Entities with IPLs observed by chance are set to NA. fltByPerm(realdt, permdt) |> head() ## ----subNtw------------------------------------------ # a matrix generated by `fltByPerm()` fltmat <- system.file('extdata/fltByPerm/flt_real.rds', package='MPAC') |> readRDS() # a pathway file fpth <- system.file('extdata/Pth/tiny_pth.txt', package='MPAC') # a gene set file in MSigDB's GMT format. It should be the same file that will # be used in the over-representation analysis below. fgmt <- system.file('extdata/ovrGMT/fake.gmt', package='MPAC') # to return a list of igraph objects representing the larget sub-pathway for # each sample subNtw(fltmat, fpth, fgmt, min_n_gmt_gns=1) ## ----ovrGMT------------------------------------------ # a list of igraph objects from `subNtw()` subntwl <- system.file('extdata/subNtw/subntwl.rds', package='MPAC') |>readRDS() # a gene set file that has been used in `subNtw()` fgmt <- system.file('extdata/ovrGMT/fake.gmt', package='MPAC') # (optional) genes that have CN and RNA data in the input files for PARADIGM omic_gns <- system.file('extdata/TcgaInp/inp_focal.rds', package='MPAC') |> readRDS() |> rownames() # to return a matrix of over-representation adjusted p-values with rows as gene # set and columns as samples ovrGMT(subntwl, fgmt, omic_gns) ## ----clSamp------------------------------------------ # a matrix of gene set over-representation adjusted p-values from `ovrGMT()` ovrmat <- system.file('extdata/clSamp/ovrmat.rds', package='MPAC') |> readRDS() # to return a data.table of clustering result by 5 random runs: # # - each row represents a clustering result # - the first column, `nreps`, indicates the number of occurrences of a # clustering result in the 5 random runs # - the other columns represents each sample's clustering membership # clSamp(ovrmat, n_random_runs=5) ## ----conMtf------------------------------------------ # a list of igraph objects from `subNtw()` subntwl <- system.file('extdata/conMtf/subntwl.rds', package='MPAC') |>readRDS() # (optional) genes that have CN and RNA data in the input files for PARADIGM omic_gns <- system.file('extdata/TcgaInp/inp_focal.rds', package='MPAC') |> readRDS() |> rownames() # to return a list of igraph objects representing consensus motifs conMtf(subntwl, omic_gns, min_mtf_n_nodes=50) ## ----pltNeiStt--------------------------------------- # protein of focus protein <- 'CD86' # input pathway file fpth <- system.file('extdata/Pth/tiny_pth.txt', package='MPAC') # CNA and RNA state matrix from `ppRealInp()` real_se <- system.file('extdata/pltNeiStt/inp_real.rds', package='MPAC') |> readRDS() # filtered IPL matrix from `fltByPerm()` fltmat <- system.file('extdata/pltNeiStt/fltmat.rds', package='MPAC') |> readRDS() # to plot heatmap pltNeiStt(real_se, fltmat, fpth, protein) ## ----sessionInfo, echo=FALSE------------------------- sessionInfo()