## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----eval=FALSE--------------------------------------------------------------- # # Install from Bioconductor (once submitted) # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install("DMRsegaldata") ## ----load-package------------------------------------------------------------- library(DMRsegaldata) ## ----beta-data---------------------------------------------------------------- # Load the beta values matrix library(ExperimentHub) eh <- ExperimentHub() beta <- eh[["EH10275"]] # Examine the structure dim(beta) class(beta) # Preview the first few rows and columns beta[1:5, 1:5] # Summary statistics summary(beta[1:100, 1]) ## ----pheno-data--------------------------------------------------------------- # Load phenotype data pheno <- eh[["EH10276"]] # View the structure str(pheno) head(pheno) # Summary of sample groups table(pheno$Sample_Group) # Summary of gender distribution table(pheno$Gender) table(pheno$Sample_Group, pheno$Gender) # Age distribution summary(pheno$Age) ## ----dmps-data---------------------------------------------------------------- # Load DMP results dmps <- eh[["EH10277"]] # Examine the structure head(dmps) dim(dmps) # Summary of significance levels summary(dmps$pval) summary(dmps$pval_adj) # Top 10 most significant DMPs head(dmps, 10) # Count of significant DMPs at different thresholds sum(dmps$pval_adj < 0.05) sum(dmps$pval_adj < 0.01) sum(dmps$pval_adj < 0.001) ## ----array-type--------------------------------------------------------------- # Load array type information array_type <- eh[["EH10278"]] array_type ## ----exploration-------------------------------------------------------------- # Check sample consistency between beta and pheno all(colnames(beta) == rownames(pheno)) # Calculate mean methylation per sample mean_methylation <- colMeans(beta, na.rm = TRUE) names(mean_methylation) <- colnames(beta) # Compare mean methylation between groups cancer_samples <- rownames(pheno)[pheno$Sample_Group == "cancer"] normal_samples <- rownames(pheno)[pheno$Sample_Group == "normal"] mean_cancer <- mean(mean_methylation[cancer_samples]) mean_normal <- mean(mean_methylation[normal_samples]) cat("Mean methylation in cancer samples:", round(mean_cancer, 4), "\n") cat("Mean methylation in normal samples:", round(mean_normal, 4), "\n") ## ----visualization, fig.cap="Distribution of mean methylation by sample group"---- # Create a data frame for plotting plot_data <- data.frame( Sample = colnames(beta), MeanMethylation = mean_methylation, Group = pheno[colnames(beta), "Sample_Group"], Age = pheno[colnames(beta), "Age"], Gender = pheno[colnames(beta), "Gender"] ) # Boxplot comparing groups boxplot(MeanMethylation ~ Group, data = plot_data, main = "Mean Methylation by Sample Group", xlab = "Sample Group", ylab = "Mean Beta Value", col = c("cancer" = "lightcoral", "normal" = "lightblue") ) # Add individual points stripchart(MeanMethylation ~ Group, data = plot_data, vertical = TRUE, method = "jitter", add = TRUE, pch = 19, col = "black" ) ## ----top-dmps, fig.cap="Methylation levels at top 3 DMPs"--------------------- # Get top 3 DMPs top_dmps <- head(rownames(dmps), 3) # Set up plotting area par(mfrow = c(1, 3)) # Plot each DMP for (cpg in top_dmps) { if (cpg %in% rownames(beta)) { cpg_values <- beta[cpg, ] boxplot(cpg_values ~ pheno$Sample_Group, main = cpg, xlab = "Group", ylab = "Beta Value", col = c("lightcoral", "lightblue") ) stripchart(cpg_values ~ pheno$Sample_Group, vertical = TRUE, method = "jitter", add = TRUE, pch = 19, col = "black" ) } } par(mfrow = c(1, 1)) ## ----volcano-plot, fig.cap="Volcano plot of differentially methylated positions"---- # Calculate effect sizes for DMPs # (mean difference between cancer and normal) # show only a subset dmps_subset <- dmps[sample(rownames(dmps), min(1000, nrow(dmps))), , drop = FALSE] cancer_beta <- beta[rownames(dmps_subset), cancer_samples] normal_beta <- beta[rownames(dmps_subset), normal_samples] cancer_beta_means <- rowMeans(cancer_beta, na.rm = TRUE) normal_beta_means <- rowMeans(normal_beta, na.rm = TRUE) dmps_subset$delta_beta <- cancer_beta_means - normal_beta_means # Create volcano plot plot(dmps_subset$delta_beta, -log10(dmps_subset$pval), xlab = "Delta Beta (Cancer - Normal)", ylab = "-log10(p-value)", main = "Volcano Plot of DMPs", pch = 19, col = ifelse(dmps_subset$pval_adj < 0.01 & abs(dmps_subset$delta_beta) > 0.2, "red", "grey50"), cex = 0.5 ) abline(h = -log10(0.05), lty = 2, col = "blue") abline(v = c(-0.2, 0.2), lty = 2, col = "blue") legend("topright", legend = c("FDR < 0.01 && abs(Delta Beta) > 0.2", "Not significant"), col = c("red", "grey50"), pch = 19 ) ## ----session-info------------------------------------------------------------- sessionInfo()