## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown(css.files = c("custom.css")) ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "##", crop = FALSE ) ## ----install_chunk, eval=FALSE------------------------------------------------ # if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") # # BiocManager::install("DuplexDiscovereR") # # library(DuplexDiscovereR) # ?DuplexDiscovereR ## ----install_chunk2, eval=FALSE----------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") # # library(devtools) # devtools::install_github("Egors01/DuplexDiscovereR") ## ----dataload,echo=T,eval=T,message=FALSE------------------------------------- library(DuplexDiscovereR) data(RNADuplexesSampleData) ## ----dataload2---------------------------------------------------------------- system.file("extdata", "scripts", "DD_data_generation.R", package = "DuplexDiscovereR") ## ----run_workflow, echo=T,eval=T,message=FALSE-------------------------------- genome_fasta <- NULL result <- DuplexDiscovereR::runDuplexDiscoverer( data = RNADuplexesRawChimSTAR, junctions_gr = SampleSpliceJncGR, anno_gr = SampleGeneAnnoGR, df_counts = RNADuplexesGeneCounts, sample_name = "example_run", lib_type = "SE", table_type = "STAR", fafile = genome_fasta, ) ## ----observe_results1,eval=TRUE----------------------------------------------- result ## ----observe_results2,eval=TRUE----------------------------------------------- gi_clusters <- dd_get_duplex_groups(result) head(gi_clusters, 2) ## ----observe_results3,eval=TRUE----------------------------------------------- gi_reads <- dd_get_chimeric_reads(result) head(gi_reads, 2) ## ----observe_results4a,eval=TRUE---------------------------------------------- df_reads <- dd_get_reads_classes(result) print(dim(df_reads)) print(dim(RNADuplexesRawChimSTAR)) ## ----observe_results4b,eval=TRUE---------------------------------------------- table(df_reads$read_type) ## ----observe_results5,eval=TRUE----------------------------------------------- table(gi_reads$junction_type) ## ----write_table, eval=TRUE--------------------------------------------------- clusters_dt <- makeDfFromGi(gi_clusters) write.table(clusters_dt, file = tempfile(pattern = "dgs_out", fileext = ".tab")) ## ----write_sam, eval=TRUE, message = FALSE------------------------------------ writeGiToSAMfile(gi_clusters, file_out = tempfile(pattern = "dgs_out", fileext = ".sam"), genome = "hg38") ## ----vis_1, eval=TRUE,message=FALSE------------------------------------------- library(Gviz) # define plotting region plotrange <- GRanges( seqnames = "chr22", ranges = IRanges( start = c(37877619), end = c(37878053) ), strand = "+" ) # make genes track anno_track <- Gviz::GeneRegionTrack(SampleGeneAnnoGR, name = "chr22 Genes", range = plotrange ) # construct DuplexTrack duplex_track <- DuplexTrack( gi = gi_clusters, gr_region = plotrange, labels.fontsize = 12, arcConstrain = 4, annotation.column1 = "gene_name.A", annotation.column2 = "gene_name.B" ) plotTracks(list(anno_track, duplex_track), sizes = c(1, 3), from = start(plotrange) - 100, to = end(plotrange) + 100 ) ## ----comp1, eval=TRUE--------------------------------------------------------- clust_reads <- gi_reads clust_reads <- clust_reads[!is.na(clust_reads$dg_id)] # Create separate pseudo-sample objects for each group set.seed(123) group_indices <- sample(rep(1:3, length.out = length(clust_reads))) group1 <- clust_reads[group_indices == 1] group2 <- clust_reads[group_indices == 2] group3 <- clust_reads[group_indices == 3] ## ----comp2, eval=TRUE--------------------------------------------------------- group1 <- collapse_duplex_groups(group1) group2 <- collapse_duplex_groups(group2) group3 <- collapse_duplex_groups(group3) ## ----comp3, eval=TRUE, message=FALSE------------------------------------------ a <- list("sample1" = group1, "sample2" = group2, "sample3" = group3) res_comp <- compareMultipleInteractions(a) names(res_comp) ## ----comp4, eval=TRUE--------------------------------------------------------- # first, check if UpSetR is installed if (!requireNamespace("UpSetR", quietly = TRUE)) { stop("Install 'UpSetR' to use this function.") } library(UpSetR) upset(res_comp$dt_upset, text.scale = 1.5) ## ----preproc ex1, message=FALSE----------------------------------------------- data("RNADuplexesSampleData") new_star <- runDuplexDiscoPreproc( data = RNADuplexesRawChimSTAR, table_type = "STAR", library_type = "SE", ) ## ----preproc ex2, message=FALSE----------------------------------------------- new_bedpe <- runDuplexDiscoPreproc( data = RNADuplexesRawBed, table_type = "bedpe", library_type = "SE", return_gi = TRUE ) ## ----preproc ex3, message=FALSE----------------------------------------------- # keep only readname in metadata mcols(RNADuplexSampleGI) <- mcols(RNADuplexSampleGI)["readname"] new_gi <- runDuplexDiscoPreproc(data = RNADuplexSampleGI, return_gi = TRUE) head(new_gi, 1) ## ----ex_classif1-------------------------------------------------------------- gi <- getChimericJunctionTypes(new_gi) gi <- getSpliceJunctionChimeras(gi, sj_gr = SampleSpliceJncGR) ## ----ex_classif2-------------------------------------------------------------- table(gi$junction_type) ## ----ex_cassif3--------------------------------------------------------------- table(gi$splicejnc) ## ----------------------------------------------------------------------------- keep <- which((gi$junction_type == "2arm") & (gi$splicejnc == 0)) gi <- gi[keep] ## ----clust1, message=FALSE---------------------------------------------------- gi <- clusterDuplexGroups(gi) ## ----ex_clust2---------------------------------------------------------------- table(is.na(gi$dg_id)) ## ----ex_clist3---------------------------------------------------------------- gi_dgs <- collapse_duplex_groups(gi) head(gi_dgs, 1) # number of DGs length(gi_dgs) ## ----dupl1-------------------------------------------------------------------- res_collapse <- collapseIdenticalReads(gi) # returns new object gi_unique <- res_collapse$gi_collapsed ## ----dupl2-------------------------------------------------------------------- head(res_collapse$stats_df, 1) ## ----dupl3, message=FALSE----------------------------------------------------- # cluster gi_unqiue gi_unique_dg <- collapse_duplex_groups(clusterDuplexGroups(gi_unique)) # check if the get the same amount of reads as in basic clustering sum(gi_unique_dg$n_reads) == sum(gi_dgs$n_reads) ## ----cl1---------------------------------------------------------------------- graph_df <- computeGISelfOverlaps(gi_unique) head(graph_df) ## ----cl2a--------------------------------------------------------------------- graph_df <- graph_df[graph_df$ratio.A > 0.5 & graph_df$ratio.B > 0.5, ] ## ----cl2b--------------------------------------------------------------------- rescale_vec <- function(x) { return((x - min(x)) / (max(x) - min(x))) } graph_df$weight_new <- graph_df$ratio.A + graph_df$ratio.B graph_df$weight_new <- rescale_vec(graph_df$weight_new) head(graph_df, 2) ## ----cl3, message=FALSE------------------------------------------------------- gi_clust_adj <- clusterDuplexGroups(gi_unique, graphdf = graph_df, weight_column = "weight_new") gi_clust_adj_dgs <- collapse_duplex_groups(gi_clust_adj) ## ----cl4---------------------------------------------------------------------- length(gi_clust_adj) ## ----session-info,cache = F,echo=T,message=T,warning=FALSE-------------------- sessionInfo()