## ----setup-libraries-and-theme, echo=FALSE, include=FALSE--------------------- library(knitr) library(dplyr) library(tidyr) library(tibble) library(purrr) library(magrittr) library(forcats) library(ggplot2) library(ggrepel) library(SummarizedExperiment) library(tidybulk) library(tidySummarizedExperiment) library(airway) # Define theme my_theme = theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(linewidth = 0.2), panel.grid.minor = element_line(linewidth = 0.1), text = element_text(size=12), legend.position="bottom", aspect.ratio=1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) ## ----load airway-------------------------------------------------------------- library(airway) data(airway) ## ----load tidySummarizedExperiment-------------------------------------------- library(tidySummarizedExperiment) ## ----add-gene-symbol---------------------------------------------------------- library(org.Hs.eg.db) library(AnnotationDbi) # Add gene symbol and entrez airway <- airway |> mutate(entrezid = mapIds(org.Hs.eg.db, keys = gene_name, keytype = "SYMBOL", column = "ENTREZID", multiVals = "first" )) detach("package:org.Hs.eg.db", unload = TRUE) detach("package:AnnotationDbi", unload = TRUE) ## ----data-overview------------------------------------------------------------ airway ## ----check-se-class----------------------------------------------------------- class(airway) ## ----convert-condition-to-factor---------------------------------------------- # Convert dex to factor for proper differential expression analysis airway = airway |> mutate(dex = as.factor(dex)) ## ----aggregate, message=FALSE, warning=FALSE, results='hide', class.source='yellow', eval=FALSE---- # rowData(airway)$gene_name = rownames(airway) # airway.aggr = airway |> aggregate_duplicates(.transcript = gene_name) ## ----aggregate long, eval=FALSE----------------------------------------------- # temp = data.frame( # symbol = dge_list$genes$symbol, # dge_list$counts # ) # dge_list.nr <- by(temp, temp$symbol, # function(df) # if(length(df[1,1])>0) # matrixStats:::colSums(as.matrix(df[,-1])) # ) # dge_list.nr <- do.call("rbind", dge_list.nr) # colnames(dge_list.nr) <- colnames(dge_list) ## ----normalise---------------------------------------------------------------- airway.norm = airway |> identify_abundant(factor_of_interest = dex) |> scale_abundance() ## ----normalise long, eval=FALSE----------------------------------------------- # library(edgeR) # # dgList <- DGEList(count_m=x,group=group) # keep <- filterByExpr(dgList) # dgList <- dgList[keep,,keep.lib.sizes=FALSE] # # ... additional processing steps ... # dgList <- calcNormFactors(dgList, method="TMM") # norm_counts.table <- cpm(dgList) ## ----include=FALSE------------------------------------------------------------ airway.norm |> dplyr::select(`counts`, counts_scaled, .abundant, everything()) ## ----plot_normalise, fig.width = 10, fig.height = 10-------------------------- airway.norm |> ggplot(aes(counts_scaled + 1, group=.sample, color=`dex`)) + geom_density() + scale_x_log10() + my_theme ## ----filter variable---------------------------------------------------------- airway.norm.variable = airway.norm |> keep_variable() ## ----filter variable long, eval=FALSE----------------------------------------- # library(edgeR) # # x = norm_counts.table # # s <- rowMeans((x-rowMeans(x))^2) # o <- order(s,decreasing=TRUE) # x <- x[o[1L:top],,drop=FALSE] # # norm_counts.table = norm_counts.table[rownames(x)] # # norm_counts.table$cell_type = tibble_counts[ # match( # tibble_counts$sample, # rownames(norm_counts.table) # ), # "cell" # ] ## ----mds---------------------------------------------------------------------- airway.norm.MDS = airway.norm |> reduce_dimensions(method="MDS", .dims = 2) ## ----eval = FALSE------------------------------------------------------------- # library(limma) # # count_m_log = log(count_m + 1) # cmds = limma::plotMDS(count_m_log, ndim = 3, plot = FALSE) # # cmds = cmds %$% # cmdscale.out |> # setNames(sprintf("Dim%s", 1:6)) # # cmds$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(cmds)), # "cell" # ] ## ----plot_mds, fig.width = 10, fig.height = 10, eval=FALSE-------------------- # airway.norm.MDS |> pivot_sample() |> dplyr::select(contains("Dim"), everything()) # # airway.norm.MDS |> # pivot_sample() |> # GGally::ggpairs(columns = 9:11, ggplot2::aes(colour=`dex`)) # # ## ----pca, message=FALSE, warning=FALSE, results='hide'------------------------ airway.norm.PCA = airway.norm |> reduce_dimensions(method="PCA", .dims = 2) ## ----eval=FALSE--------------------------------------------------------------- # count_m_log = log(count_m + 1) # pc = count_m_log |> prcomp(scale = TRUE) # variance = pc$sdev^2 # variance = (variance / sum(variance))[1:6] # pc$cell_type = counts[ # match(counts$sample, rownames(pc)), # "cell" # ] ## ----plot_pca, fig.width = 10, fig.height = 10, eval=FALSE-------------------- # # airway.norm.PCA |> pivot_sample() |> dplyr::select(contains("PC"), everything()) # # airway.norm.PCA |> # pivot_sample() |> # GGally::ggpairs(columns = 11:13, ggplot2::aes(colour=`dex`)) ## ----rotate------------------------------------------------------------------- airway.norm.MDS.rotated = airway.norm.MDS |> rotate_dimensions(`Dim1`, `Dim2`, rotation_degrees = 45) ## ----eval=FALSE--------------------------------------------------------------- # rotation = function(m, d) { # r = d * pi / 180 # ((bind_rows( # c(`1` = cos(r), `2` = -sin(r)), # c(`1` = sin(r), `2` = cos(r)) # ) |> as_matrix()) %*% m) # } # mds_r = pca |> rotation(rotation_degrees) # mds_r$cell_type = counts[ # match(counts$sample, rownames(mds_r)), # "cell" # ] ## ----plot_rotate_1, fig.width = 10, fig.height = 10--------------------------- airway.norm.MDS.rotated |> ggplot(aes(x=`Dim1`, y=`Dim2`, color=`dex` )) + geom_point() + my_theme ## ----plot_rotate_2, fig.width = 10, fig.height = 10--------------------------- airway.norm.MDS.rotated |> pivot_sample() |> ggplot(aes(x=`Dim1_rotated_45`, y=`Dim2_rotated_45`, color=`dex` )) + geom_point() + my_theme ## ----de, message=FALSE, warning=FALSE, results='hide'------------------------- airway.de = airway |> test_differential_expression( ~ dex, method = "edgeR_quasi_likelihood") |> pivot_transcript() airway.de ## ----eval=FALSE--------------------------------------------------------------- # library(edgeR) # # dgList <- DGEList(counts=counts_m,group=group) # keep <- filterByExpr(dgList) # dgList <- dgList[keep,,keep.lib.sizes=FALSE] # dgList <- calcNormFactors(dgList) # design <- model.matrix(~group) # dgList <- estimateDisp(dgList,design) # fit <- glmQLFit(dgList,design) # qlf <- glmQLFTest(fit,coef=2) # topTags(qlf, n=Inf) ## ----de contrast, message=FALSE, warning=FALSE, results='hide', eval=FALSE---- # airway.de = # airway |> # identify_abundant(factor_of_interest = dex) |> # test_differential_expression( # ~ 0 + dex, # .contrasts = c( "dexuntrt - dextrt"), # method = "edgeR_quasi_likelihood" # ) |> # pivot_transcript() ## ----adjust, message=FALSE, warning=FALSE, results='hide'--------------------- airway.norm.adj = airway.norm |> adjust_abundance( .factor_unwanted = cell, .factor_of_interest = dex, method="combat") ## ----eval=FALSE--------------------------------------------------------------- # library(sva) # # count_m_log = log(count_m + 1) # # design = # model.matrix( # object = ~ factor_of_interest + batch, # data = annotation # ) # # count_m_log.sva = # ComBat( # batch = design[,2], # mod = design, # ... # ) # # count_m_log.sva = ceiling(exp(count_m_log.sva) -1) # count_m_log.sva$cell_type = counts[ # match(counts$sample, rownames(count_m_log.sva)), # "cell" # ] # ## ----cibersort, eval=FALSE---------------------------------------------------- # # Requires gene symbols that match reference data # # airway.cibersort = # # airway |> # # deconvolve_cellularity( cores=1, prefix = "cibersort__") |> # # pivot_sample() # ## ----eval=FALSE--------------------------------------------------------------- # # source("CIBERSORT.R") # count_m |> write.table("mixture_file.txt") # results <- CIBERSORT( # "sig_matrix_file.txt", # "mixture_file.txt", # perm=100, QN=TRUE # ) # results$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(results)), # "cell" # ] # ## ----plot_cibersort, eval=FALSE----------------------------------------------- # # airway.cibersort |> # # pivot_longer( # # names_to= "Cell_type_inferred", # # values_to = "proportion", # # names_prefix ="cibersort__", # # cols=contains("cibersort__") # # ) |> # # ggplot(aes(x=Cell_type_inferred, y=proportion, fill=cell)) + # # geom_boxplot() + # # facet_wrap(~cell) + # # my_theme + # # theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), aspect.ratio=1/5) # ## ----cluster------------------------------------------------------------------ airway.norm.cluster = airway.norm |> cluster_elements(method="kmeans", centers = 2) ## ----eval=FALSE--------------------------------------------------------------- # library(stats) # # count_m_log = log(count_m + 1) # count_m_log_centered = count_m_log - rowMeans(count_m_log) # # kmeans_result = kmeans(t(count_m_log_centered), centers = 2) # # cluster_labels = kmeans_result$cluster ## ----differential------------------------------------------------------------- airway.norm.de = airway.norm |> test_differential_abundance(~ dex, method="edgeR_quasi_likelihood") ## ----eval=FALSE--------------------------------------------------------------- # library(edgeR) # # count_m_log = log(count_m + 1) # # design = model.matrix(~ dex, data = annotation) # # dge = DGEList(counts = count_m) # dge = calcNormFactors(dge) # dge = estimateDisp(dge, design) # # fit = glmQLFit(dge, design) # qlf = glmQLFTest(fit, coef=2) # # results = topTags(qlf, n = Inf) ## ----session-info------------------------------------------------------------- sessionInfo()