More Examples of Making Complex Heatmaps ======================================== **Author**: Zuguang Gu ( z.gu@dkfz.de ) **Date**: `r Sys.Date()` ------------------------------------------------------------- ```{r global_settings, echo = FALSE, message = FALSE} library(markdown) library(knitr) knitr::opts_chunk$set( error = FALSE, tidy = FALSE, message = FALSE, fig.align = "center", fig.width = 5, fig.height = 5) options(markdown.HTML.stylesheet = "custom.css") options(width = 100) ``` In the supplementaries of [the ComplexHeatmap paper](http://bioinformatics.oxfordjournals.org/content/early/2016/05/20/bioinformatics.btw313.abstract), there are four comprehensive examples which are applied on real-world high-throughput datasets. [The examples can be found here.](http://jokergoo.github.io/supplementary/ComplexHeatmap-supplementary1-4/index.html) Also [my blog](http://jokergoo.github.io/blog.html) has some examples and tips for making better complex heatmaps. ### Add more information for gene expression matrix Heatmaps are very popular to visualize gene expression matrix. Rows in the matrix correspond to genes and more information on these genes can be attached after the expression heatmap. In following example, the big heatmap visualize relative expression for genes, then the next is the absolute expression. Also gene length and gene type (i.e. protein coding or lincRNA) are visualized. ```{r expression_example, fig.width = 10, fig.height = 8} library(ComplexHeatmap) library(circlize) expr = readRDS(paste0(system.file(package = "ComplexHeatmap"), "/extdata/gene_expression.rds")) mat = as.matrix(expr[, grep("cell", colnames(expr))]) base_mean = rowMeans(mat) mat_scaled = t(apply(mat, 1, scale)) type = gsub("s\\d+_", "", colnames(mat)) ha = HeatmapAnnotation(df = data.frame(type = type)) Heatmap(mat_scaled, name = "expression", km = 5, col = colorRamp2(c(-2, 0, 2), c("green", "white", "red")), top_annotation = ha, top_annotation_height = unit(4, "mm"), show_row_names = FALSE, show_column_names = FALSE) + Heatmap(base_mean, name = "base_mean", show_row_names = FALSE, width = unit(5, "mm")) + Heatmap(expr$length, name = "length", col = colorRamp2(c(0, 1000000), c("white", "orange")), heatmap_legend_param = list(at = c(0, 200000, 400000, 60000, 800000, 1000000), labels = c("0kb", "200kb", "400kb", "600kb", "800kb", "1mb")), width = unit(5, "mm")) + Heatmap(expr$type, name = "type", width = unit(5, "mm")) ``` ### Visualize genomic regions and other correspondance Following example visualizes correlation between methylation and expression, as well as other annotation information (data are randomly generated). In the heatmap, each row corresponds to a differentially methylated regions (DMRs). From left to right, heatmaps are: 1. methylation for each DMR (by rows) in samples. 2. direction of the methylation (one column heatmap), i.e. is methylation hyper in tumor or hypo? 3. expression for the genes that are associated with corresponding DMRs (e.g. closest gene). 4. significance for the correlation between methylation and expression (-log10(p-value)). 5. type of genes, i.e. is the gene a protein coding gene or a lincRNA? 6. annotation to gene models, i.e. is the DMR located in the intragenic region of the corresponding gene or the DMR is intergenic? 7. distance from the DMR to the TSS of the corresponding gene. 8. overlapping between DMRs and enhancers (Color shows how much the DMR is covered by the enhancers). ```{r, fig.width = 10, fig.height = 8, echo = FALSE, results = "hide"} library(circlize) library(RColorBrewer) lt = readRDS(paste0(system.file(package = "ComplexHeatmap"), "/extdata/meth.rds")) list2env(lt, envir = environment()) ha = HeatmapAnnotation(df = data.frame(type = c(rep("Tumor", 10), rep("Control", 10))), col = list(type = c("Tumor" = "red", "Control" = "blue"))) ha2 = HeatmapAnnotation(df = data.frame(type = c(rep("Tumor", 10), rep("Control", 10))), col = list(type = c("Tumor" = "red", "Control" = "blue")), show_legend = FALSE) # column order of the methylation matrix which will be assigned to the expressio matrix column_tree = hclust(dist(t(meth))) ht_list = Heatmap(meth, name = "methylation", col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")), cluster_columns = column_tree, top_annotation = ha, column_names_gp = gpar(fontsize = 8), km = 5, column_title = "Methylation", column_title_gp = gpar(fontsize = 10), row_title_gp = gpar(fontsize = 10)) + Heatmap(direction, name = "direction", col = c("hyper" = "red", "hypo" = "blue"), column_names_gp = gpar(fontsize = 8)) + Heatmap(expr[, column_tree$order], name = "expression", col = colorRamp2(c(-2, 0, 2), c("green", "white", "red")), cluster_columns = FALSE, top_annotation = ha2, column_names_gp = gpar(fontsize = 8), column_title = "Expression", column_title_gp = gpar(fontsize = 10)) + Heatmap(cor_pvalue, name = "-log10(cor_p)", col = colorRamp2(c(0, 2, 4), c("white", "white", "red")), column_names_gp = gpar(fontsize = 8)) + Heatmap(gene_type, name = "gene type", col = brewer.pal(length(unique(gene_type)), "Set1"), column_names_gp = gpar(fontsize = 8)) + Heatmap(anno, name = "anno_gene", col = brewer.pal(length(unique(anno)), "Set2"), column_names_gp = gpar(fontsize = 8)) + Heatmap(dist, name = "dist_tss", col = colorRamp2(c(0, 10000), c("black", "white")), column_names_gp = gpar(fontsize = 8)) + Heatmap(enhancer, name = "anno_enhancer", col = colorRamp2(c(0, 1), c("white", "orange")), cluster_columns = FALSE, column_names_gp = gpar(fontsize = 8), column_title = "Enhancer", column_title_gp = gpar(fontsize = 10)) ht_global_opt(heatmap_legend_title_gp = gpar(fontsize = 8, fontface = "bold"), heatmap_legend_labels_gp = gpar(fontsize = 8)) draw(ht_list, newpage = FALSE, column_title = "Correspondence between methylation, expression and other genomic features", column_title_gp = gpar(fontsize = 12, fontface = "bold"), heatmap_legend_side = "bottom") invisible(ht_global_opt(RESET = TRUE)) ``` ## Combine pvclust and heatmap **pvclust** package provides a robust way to test the stability of the clustering by random sampling from original data. Here you can organize the heatmap by the clustering returned from `pvclust()`. ```{r} library(ComplexHeatmap) library(MASS) library(pvclust) data(Boston) boston.pv <- pvclust(Boston, nboot=100) plot(boston.pv) ``` Since by default `pvclust` clusters columns by 'correlation' method, we scale columns for `Boston` data set to see the relative trend. ```{r} Boston_scaled = apply(Boston, 2, scale) Heatmap(Boston_scaled, cluster_columns = boston.pv$hclust, heatmap_legend_param = list(title = "Boston")) ``` ## Make a same plot as heatmap() ```{r} set.seed(123) mat = matrix(rnorm(100), 10) heatmap(mat, col = topo.colors(50)) ``` Compare to the native `heatmap()`, `Heatmap()` can give more accurate interpolation for colors for continous values. ```{r} Heatmap(mat, col = topo.colors(50), color_space = "sRGB", row_dend_width = unit(2, "cm"), column_dend_height = unit(2, "cm"), row_dend_reorder = TRUE, column_dend_reorder = TRUE) ``` ## The measles vaccine heatmap Following code reproduces the heatmap introduced [here](https://biomickwatson.wordpress.com/2015/04/09/recreating-a-famous-visualisation/) and [here](https://benjaminlmoore.wordpress.com/2015/04/09/recreating-the-vaccination-heatmaps-in-r/). ```{r, fig.width = 10, fig.height = 8} mat = readRDS(paste0(system.file("extdata", package = "ComplexHeatmap"), "/measles.rds")) ha1 = HeatmapAnnotation(dist1 = anno_barplot(colSums(mat), bar_width = 1, gp = gpar(col = NA, fill = "#FFE200"), border = FALSE, axis = TRUE)) ha2 = rowAnnotation(dist2 = anno_barplot(rowSums(mat), bar_width = 1, gp = gpar(col = NA, fill = "#FFE200"), border = FALSE, which = "row", axis = TRUE), width = unit(1, "cm")) ha_column = HeatmapAnnotation(cn = function(index) { year = as.numeric(colnames(mat)) which_decade = which(year %% 10 == 0) grid.text(year[which_decade], which_decade/length(year), 1, just = c("center", "top")) }) Heatmap(mat, name = "cases", col = colorRamp2(c(0, 800, 1000, 127000), c("white", "cornflowerblue", "yellow", "red")), cluster_columns = FALSE, show_row_dend = FALSE, rect_gp = gpar(col= "white"), show_column_names = FALSE, row_names_side = "left", row_names_gp = gpar(fontsize = 10), column_title = 'Measles cases in US states 1930-2001\nVaccine introduced 1961', top_annotation = ha1, top_annotation_height = unit(1, "cm"), bottom_annotation = ha_column, bottom_annotation_height = grobHeight(textGrob("1900"))) + ha2 decorate_heatmap_body("cases", { i = which(colnames(mat) == "1961") x = i/ncol(mat) grid.lines(c(x, x), c(0, 1), gp = gpar(lwd = 2)) grid.text("Vaccine introduced", x, unit(1, "npc") + unit(5, "mm")) }) ``` ## What if my annotation name is too long? There is no space allocated for annotation name, but when the annotation name is too long, you can add paddings of the whole plot to give empty spaces for the annotation names. ```{r, fig.width = 7} ha = HeatmapAnnotation(df = data.frame(a_long_long_long_annotation_name = runif(10)), show_legend = FALSE) ht = Heatmap(matrix(rnorm(100), 10), name = "foo", top_annotation = ha) # because the default width for row cluster is 1cm padding = unit.c(unit(2, "mm"), grobWidth(textGrob("a_long_long_long_annotation_name")) - unit(1, "cm"), unit(c(2, 2), "mm")) draw(ht, padding = padding) decorate_annotation("a_long_long_long_annotation_name", { grid.text("a_long_long_long_annotation_name", 0, 0.5, just = "right") }) ``` ## Session info ```{r} sessionInfo() ```