## ----setup, include = FALSE---------------------------------------------------
# Bioc markdown style
BiocStyle::markdown()

# Knitr options
knitr::opts_chunk$set(
  fig.width = 7,
  fig.height = 5,
  out.width = "70%",
  fig.align = "center",
  dpi = 300
)

## ----install-release, eval=FALSE----------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# 
# BiocManager::install("ReducedExperiment")

## ----install-devel, eval=FALSE------------------------------------------------
# BiocManager::install("ReducedExperiment", version = "devel")
# 
# devtools::install_github("jackgisby/ReducedExperiment")

## ----load-packages, message=FALSE, warning=FALSE, include=TRUE----------------
library(SummarizedExperiment)
library(ReducedExperiment)
library(ggplot2)

theme_set(theme_bw())

## ----se-vignette, eval=FALSE--------------------------------------------------
# vignette("SummarizedExperiment", package = "SummarizedExperiment")

## ----load-data----------------------------------------------------------------
wave1_se <- readRDS(system.file(
    "extdata", "wave1.rds",
    package = "ReducedExperiment"
))
wave2_se <- readRDS(system.file(
    "extdata", "wave2.rds",
    package = "ReducedExperiment"
))

wave1_se

## ----estimate-stability, eval=FALSE, message=FALSE, warning=FALSE, include=TRUE----
# stability_res <- estimateStability(
#     wave1_se,
#     n_runs = 100,
#     min_components = 10, max_components = 60, by = 2,
#     mean_stability_threshold = 0.85,
#     verbose = FALSE,
#     BPPARAM = BiocParallel::SerialParam(RNGseed = 1)
# )
# 
# stability_plot <- plotStability(stability_res)
# print(stability_plot$combined_plot)

## ----stability-image, echo=FALSE----------------------------------------------
img_path <- system.file("stability.png", package = "ReducedExperiment")
if (file.exists(img_path)) {
  knitr::include_graphics(img_path)
} else {
  knitr::include_graphics("../figures/stability.png")
}

## ----run-factor-analysis------------------------------------------------------
wave1_fe <- estimateFactors(
    wave1_se,
    nc = 35,
    use_stability = TRUE,
    n_runs = 30,
    stability_threshold = 0.25, 
    BPPARAM = BiocParallel::SerialParam(RNGseed = 1)
)

wave1_fe

## ----get-factor-reduced-------------------------------------------------------
# get reduced data for first 5 samples and factors
reduced(wave1_fe)[1:5, 1:5]

## ----get-factor-loadings------------------------------------------------------
# get loadings for first 5 genes and factors
loadings(wave1_fe)[1:5, 1:5]

## ----slice-fe-----------------------------------------------------------------
dim(wave1_fe[1:20, 1:10, 1:5])

## ----assess-threshold---------------------------------------------------------
WGCNA::disableWGCNAThreads()

wave1_fit_indices <- assessSoftThreshold(wave1_se)

wave1_estimated_power <- 
    wave1_fit_indices$Power[wave1_fit_indices$estimated_power]
print(paste0("Estimated power: ", wave1_estimated_power))

## ----plot-dendro--------------------------------------------------------------
wave1_me <- identifyModules(
    wave1_se,
    verbose = 0,
    power = wave1_estimated_power,
    minModuleSize = 10
)

plotDendro(wave1_me)

## ----print-me-----------------------------------------------------------------
wave1_me

## ----get-module-reduced-------------------------------------------------------
# get reduced data for first 5 samples and factors
reduced(wave1_me)[1:5, 1:5]

## ----get-module-assignments---------------------------------------------------
# get assignments for first 5 genes
assignments(wave1_me)[1:5]

## ----get-module-loadings------------------------------------------------------
# get loadings for first 5 genes
loadings(wave1_me)[1:5]

## ----show-coldata-------------------------------------------------------------
selected_cols <- c(
    "sample_id", 
    "individual_id", 
    "case_control", 
    "sex", 
    "calc_age"
)

colData(wave1_se)[, selected_cols]

## ----associate-components-----------------------------------------------------
f <- "~ case_control + sex + calc_age"

associations_me <- associateComponents(wave1_me, method = "lm", formula = f)
associations_fe <- associateComponents(wave1_fe, method = "lm", formula = f)

## ----associate-table----------------------------------------------------------
associations_fe$summaries[
    associations_fe$summaries$term == "case_controlPOSITIVE" &
        associations_fe$summaries$adj_pvalue < 0.05,
    c("term", "component", "estimate", "stderr", "adj_pvalue")
]

## ----plot-factor-associations-------------------------------------------------
ggplot(
    associations_fe$summaries[
        associations_fe$summaries$term == "case_controlPOSITIVE",
    ],
    aes(-log10(adj_pvalue),
        reorder(component, -log10(adj_pvalue)),
        fill = estimate
    )
) +
    geom_point(pch = 21, size = 3) +
    xlab("-log10(adjusted p-value)") +
    ylab("Factor name (ordered by adjusted p-value)") +
    geom_vline(xintercept = -log10(0.05)) +
    scale_fill_gradient2(low = "#3A3A98", high = "#832424")

## ----plot-module-associations-------------------------------------------------
ggplot(
    associations_me$summaries[
        associations_me$summaries$term == "case_controlPOSITIVE",
    ],
    aes(-log10(adj_pvalue),
        reorder(component, -log10(adj_pvalue)),
        fill = estimate
    )
) +
    geom_point(pch = 21, size = 3) +
    xlab("-log10(adjusted p-value)") +
    ylab("Module name (ordered by adjusted p-value)") +
    geom_vline(xintercept = -log10(0.05)) +
    scale_fill_gradient2(low = "#3A3A98", high = "#832424")

## ----module-centrality--------------------------------------------------------
module_centrality <- getCentrality(wave1_me)
head(module_centrality)

## ----factor-alignments--------------------------------------------------------
aligned_features <- getAlignedFeatures(wave1_fe, format = "data.frame")
head(aligned_features[, c("component", "feature", "value")])

## ----run-enrich, message=FALSE, warning=FALSE---------------------------------

TERM2GENE <- getMsigdbT2G()

factor_5_enrich <- runEnrich(
    wave1_fe[, , "factor_5"], 
    method = "overrepresentation", 
    as_dataframe = TRUE,
    TERM2GENE = TERM2GENE
)

module_1_enrich <- runEnrich(
    wave1_me[, , "module_1"], 
    method = "overrepresentation", 
    as_dataframe = TRUE,
    TERM2GENE = TERM2GENE
)

## ----example-factor-enrichment------------------------------------------------
rownames(factor_5_enrich) <- NULL
factor_5_enrich[, c("ID", "pvalue", "p.adjust", "geneID")]

## ----example-module-enrichment------------------------------------------------
ggplot(
    module_1_enrich[1:15, ],
    aes(-log10(p.adjust), reorder(substr(ID, 1, 45), -log10(p.adjust)))
) +
    geom_point(pch = 21, size = 3) +
    xlab("-log10(adjusted p-value)") +
    ylab("Pathway name (ordered by adjusted p-value)") +
    expand_limits(x = 0) +
    geom_vline(xintercept = -log10(0.05))

## ----project-data-------------------------------------------------------------
wave2_fe <- projectData(wave1_fe, wave2_se)
wave2_fe

## ----reprint-original---------------------------------------------------------
wave1_fe

## ----replicating-associations-------------------------------------------------
replication_fe <- associateComponents(
    wave2_fe, 
    method = "lmer", 
    formula = paste0(f, " + (1|individual_id)")
)

## ----plotting-replicated-associations-----------------------------------------
ggplot(
    replication_fe$summaries[
        replication_fe$summaries$term == "case_controlPOSITIVE",
    ],
    aes(
        -log10(adj_pvalue),
        reorder(component, -log10(adj_pvalue)),
        fill = estimate
    )
) +
    geom_point(pch = 21, size = 3) +
    xlab("-log10(adjusted p-value)") +
    ylab("Module name (ordered by adjusted p-value)") +
    geom_vline(xintercept = -log10(0.05)) +
    scale_fill_gradient2(low = "#3A3A98", high = "#832424") +
    expand_limits(x = 0)

## ----correlation-with-projection----------------------------------------------
original_estimates <- associations_fe$summaries$estimate[
    associations_fe$summaries$term == "case_controlPOSITIVE"
]
replication_estimates <- replication_fe$summaries$estimate[
    replication_fe$summaries$term == "case_controlPOSITIVE"
]

cor.test(original_estimates, replication_estimates)

## ----calc-eigengenes-new-data-------------------------------------------------
wave2_me <- calcEigengenes(wave1_me, wave2_se)

replication_me <- associateComponents(
    wave2_me, 
    method = "lmer", 
    formula = paste0(f, " + (1|individual_id)")
)

original_estimates <- associations_me$summaries$estimate[
    associations_me$summaries$term == "case_controlPOSITIVE"
]
replication_estimates <- replication_me$summaries$estimate[
    replication_me$summaries$term == "case_controlPOSITIVE"
]

cor.test(original_estimates, replication_estimates)

## ----module-preservation------------------------------------------------------
# Using a low number of permutations for the vignette
mod_pres_res <- modulePreservation(
    wave1_me, 
    wave2_se, 
    verbose = 0, 
    nPermutations = 5
)

plotModulePreservation(mod_pres_res)

## ----extension-vignette, eval=FALSE-------------------------------------------
# vignette("Extensions", package = "SummarizedExperiment")

## ----extend-class-------------------------------------------------------------
PCAExperiment <- setClass(
    "PCAExperiment",
    contains = "ReducedExperiment",
    slots = representation(
        rotation = "matrix",
        sdev = "numeric"
    )
)

## ----create-pca-experiment----------------------------------------------------
prcomp_res <- stats::prcomp(t(assay(wave1_se)), center = TRUE, scale. = TRUE)

my_pca_experiment <- PCAExperiment(
    wave1_se,
    reduced = prcomp_res$x,
    rotation = prcomp_res$rotation,
    sdev = prcomp_res$sdev,
    center = prcomp_res$center,
    scale = prcomp_res$scale
)

my_pca_experiment

## ----get-pca-reduced----------------------------------------------------------
reduced(my_pca_experiment)[1:5, 1:5]

## ----get-rotation-------------------------------------------------------------
setGeneric("rotation", function(x, ...) standardGeneric("rotation"))

setMethod("rotation", "PCAExperiment", function(x, withDimnames = TRUE) {
    return(x@rotation)
})

rotation(my_pca_experiment)[1:5, 1:5]

## ----set-rotation-------------------------------------------------------------
setGeneric("rotation<-", function(x, ..., value) standardGeneric("rotation<-"))

setReplaceMethod("rotation", "PCAExperiment", function(x, value) {
    x@rotation <- value
    validObject(x)
    return(x)
})

# Set the value at position [1, 1] to 10 and get
rotation(my_pca_experiment)[1, 1] <- 10
rotation(my_pca_experiment)[1:5, 1:5]

## ----make-fe-from-pca---------------------------------------------------------
# Construct a SummarizedExperiment (if you don't already have one)
expr_mat <- assay(wave1_se)
pheno_data <- colData(wave1_se)
se <- SummarizedExperiment(
    assays = list("normal" = expr_mat),
    colData = pheno_data
)

# Calculate PCA with prcomp
prcomp_res <- stats::prcomp(t(assay(se)), center = TRUE, scale. = TRUE)

# Create a FactorisedExperiment container
fe <- FactorisedExperiment(
    se,
    reduced = prcomp_res$x,
    loadings = prcomp_res$rotation,
    stability = prcomp_res$sdev,
    center = prcomp_res$center,
    scale = prcomp_res$scale
)

fe

## ----session-info, echo=FALSE-------------------------------------------------
sessionInfo()