## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----bioc, eval = FALSE-------------------------------------------------------
#  if (!require("BiocManager"))
#      install.packages("BiocManager")
#  
#  BiocManager::install("CNVgears")

## ----bioc dev, eval = FALSE---------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  # The following initializes usage of Bioc devel
#  BiocManager::install(version='devel')
#  
#  BiocManager::install("CNVgears")

## ----install git hub, eval = FALSE--------------------------------------------
#  # not run
#  devtools::install_github("SinomeM/CNVgears")

## ----library------------------------------------------------------------------
library(CNVgears, quietly = TRUE)

## ---- message=TRUE, warning=TRUE----------------------------------------------
data.table::setDTthreads(percent=75)
data.table::setDTthreads(4)

## ---- include=FALSE-----------------------------------------------------------
data.table::setDTthreads(2)

## ----PED PFB------------------------------------------------------------------
# cohort data
cohort <- read_metadt(DT_path = system.file("extdata", "cohort.ped", 
                                            package = "CNVgears"),
                      sample_ID_col = "Individual ID", 
                      fam_ID_col = "Family ID",
                      sex_col = "Gender", role_col = "Role")
head(cohort)

# markers location
SNP_markers <- read_finalreport_snps(system.file("extdata", "SNP.pfb", 
                                                 package = "CNVgears"),
                                     mark_ID_col = "Name", chr_col = "Chr", 
                                     pos_col = "Position")
head(SNP_markers)

## ----results + raw, message=TRUE, warning=TRUE--------------------------------
# CNV calling results
penn <- read_results(DT_path = system.file("extdata", 
                                           "chrs_14_22_cnvs_penn.txt", 
                                           package = "CNVgears"), 
                     res_type = "file", DT_type = "TSV/CSV", 
                     chr_col = "chr", start_col = "posStart", 
                     end_col = "posEnd", CN_col = "CN", 
                     samp_ID_col = "Sample_ID", sample_list = cohort, 
                     markers = SNP_markers, method_ID = "P")
head(penn)

quanti <- read_results(DT_path = system.file("extdata", 
                                             "chrs_14_22_cnvs_quanti.txt", 
                                             package = "CNVgears"), 
                      res_type = "file", DT_type = "TSV/CSV", 
                      chr_col = "chr", start_col = "posStart", 
                      end_col = "posEnd", CN_col = "CN", 
                      samp_ID_col = "Sample_ID", sample_list = cohort, 
                      markers = SNP_markers, method_ID = "Q")

ipn <-  read_results(DT_path = system.file("extdata", 
                                           "chrs_14_22_cnvs_ipn.txt", 
                                           package = "CNVgears"), 
                     res_type = "file", DT_type = "TSV/CSV", 
                     chr_col = "chr", start_col = "posStart", 
                     end_col = "posEnd", CN_col = "CN", 
                     samp_ID_col = "Sample_ID", sample_list = cohort, 
                     markers = SNP_markers, method_ID = "I")

## ---- eval = FALSE------------------------------------------------------------
#  # raw data
#  # not run. This step needs the additional data
#  library("data.table", quietly = TRUE)
#  setDTthreads(4)
#  
#  read_finalreport_raw(DT_path = "PATH_TO_DOWNLOADED_DATA_DIRECTORY",
#                       pref = NA, suff = ".txt",
#                       rds_path = file.path("tmp_RDS"),
#                       markers = SNP_markers,
#                       sample_list = cohort[fam_ID == "1463", ])

## ---- eval= FALSE-------------------------------------------------------------
#  ## not run
#  library(cn.mops)
#  data(cn.mops)
#  resCNMOPS <- cn.mops(XRanges)
#  resCNMOPS <- calcIntegerCopyNumbers(resCNMOPS)
#  resCNMOPS_cnvs <- cnvs(resCNMOPS)
#  
#  sample_list <- read_metadt("path/to/PED_like_file")
#  intervals <- read_NGS_intervals("path/to/markers/file")
#  cnmops_calls <- cnmops_to_CNVresults(resCNMOPS_cnvs, sample_list, intervals)

## ----summary stats------------------------------------------------------------
sstatPenn <- summary(object = penn, sample_list = cohort,
                     markers = SNP_markers)

## ---- eval = FALSE------------------------------------------------------------
#  # not run
#  sstatPenn <- summary(object = penn, sample_list = cohort,
#                       markers = SNP_markers,
#                       plots_path = file.path("tmp","sstatPenn")) # plots are saved

## -----------------------------------------------------------------------------
head(sstatPenn)

## ----filtering----------------------------------------------------------------
penn_filtered <- cleaning_filter(results = penn, min_len = 10000, min_NP = 10)
quanti_filtered <- cleaning_filter(quanti)
ipn_filtered <- cleaning_filter(ipn)

## ----blacklists---------------------------------------------------------------
hg19telom <- telom_centrom(hg19_start_end_centromeres, centrom = FALSE)
hg19centrom <- telom_centrom(hg19_start_end_centromeres, telom = FALSE, 
                             region = 25000)

## ---- eval=FALSE--------------------------------------------------------------
#  ensembl37 <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",
#                                host = "grch37.ensembl.org",
#                                dataset="hsapiens_gene_ensembl")
#  hg19_immuno <- immuno_regions(n_genes = 10, mart = ensembl37)

## -----------------------------------------------------------------------------
head(hg19telom)

## ----inheritance, eval=FALSE--------------------------------------------------
#  # not run, needs the additional raw data
#  penn_inheritance <-
#    cnvs_inheritance(results = penn_filtered, sample_list =  cohort[fam_ID == "1463", ],
#                     markers = SNP_markers, raw_path = file.path("tmp_RDS"))
#  quanti_inheritance <-
#    cnvs_inheritance(results = quanti_filtered, sample_list =  cohort[fam_ID == "1463", ],
#                     markers = SNP_markers, raw_path = file.path("tmp_RDS"))
#  ipn_inheritance <-
#    cnvs_inheritance(results = ipn_filtered, sample_list =  cohort[fam_ID == "1463", ],
#                     markers = SNP_markers, raw_path = file.path("tmp_RDS"))
#  
#  penn_denovo <- penn_inheritance[inheritance == "denovo", ]
#  quanti_denovo <- quanti_inheritance[inheritance == "denovo", ]
#  ipn_denovo <- ipn_inheritance[inheritance == "denovo", ]

## ----inter merge, results="hide"----------------------------------------------
ipq_filtered <- 
  inter_res_merge(res_list = list(penn_filtered, quanti_filtered, ipn_filtered),
                  sample_list = cohort, chr_arms = hg19_chr_arms)

## ---- eval=FALSE--------------------------------------------------------------
#  # not run, it needs inheritance detection step
#  ipq_denovo <-
#    inter_res_merge(res_list = list(penn_denovo, quanti_denovo, ipn_denovo),
#                    sample_list = cohort, chr_arms = hg19_chr_arms)

## ----cnvrs, results="hide"----------------------------------------------------
cnvrs_chr22 <- cnvrs_create(cnvs = ipq_filtered[chr == 22,], 
                            chr_arms = hg19_chr_arms)

## -----------------------------------------------------------------------------
# define common CNVR as the one present in > 5% of the cohort 
# here the cohort is very small so it won't be a significative result
common_cnvrs_chr22 <- cnvrs_chr22[[1]][freq > nrow(cohort)*0.5, ]
rare_cvns_chr22 <- cnvrs_chr22[[2]][!cnvr %in% common_cnvrs_chr22$r_ID, ]

## ----denovo plot, eval=FALSE--------------------------------------------------
#  # not run, it needs inheritance detection step
#  lrr_trio_plot(cnv = ipq_denovo[1], raw_path =  file.path("tmp_RDS"),
#                sample_list = cohort, results = ipn_filtered)
#  lrr_trio_plot(cnv = ipq_denovo[2], raw_path =  file.path("tmp_RDS"),
#                sample_list = cohort, results = ipn_filtered)
#  lrr_trio_plot(cnv = ipq_denovo[3], raw_path =  file.path("tmp_RDS"),
#                sample_list = cohort, results = ipn_filtered)

## ----annotation, echo=c(-2,-4), eval = FALSE----------------------------------
#  # locus
#  ipq_denovo_locus <- genomic_locus(DT_in = ipq_filtered, keep_str_end = FALSE)
#  
#  # genes
#  ensembl37 <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",
#                                host = "grch37.ensembl.org",
#                                dataset="hsapiens_gene_ensembl")
#  
#  ipq_denovo_genes <- genic_load(DT_in = ipq_filtered, mart = ensembl37)
#  
#  rm(ensembl37)

## ---- eval=FALSE--------------------------------------------------------------
#  ## not run
#  
#  # Select a subset of columns using DT[, .(col1,col2,coln)]
#  tmp <- ipq_filtered[, .(sample_ID, chr, start, end, GT, meth_ID)]
#  head(tmp, 3)
#  
#  # Select a subset of rows (i.e. filter) using DT[condition, ]
#  tmp <- ipq_filtered[n_meth > 1, .(sample_ID, chr, start, end, GT, meth_ID)]
#  head(tmp, 3)
#  
#  # Export object as a TSV
#  fwrite(tmp, "example.tsv", sep = "\t")

## -----------------------------------------------------------------------------
tmp <- ipq_filtered[, .(chr, start, end, paste0(sample_ID, "-", GT))]
head(tmp)

## ---- eval=FALSE--------------------------------------------------------------
#  ## not run
#  fwrite(tmp, "example.bed", sep = "\t", col.names = FALSE)

## -----------------------------------------------------------------------------
ipq_filtered_GRanges <- CNVresults_to_GRanges(ipq_filtered)

## ---- eval=FALSE--------------------------------------------------------------
#  # only call with > 1 calling methods
#  tmp <- ipq_filtered[n_meth > 1, ]
#  
#  # de novo CNVs with a significant corrected p-value (note that the p-values are
#  # lost when combining more result-objects)
#  tmp <- penn_inheritance[adj_m_pval < 0.05 & adj_p_pval < 0.05, ]