## ----setup, include=TRUE------------------------------------------------------
# Global chunk defaults
knitr::opts_chunk$set(
  echo = TRUE, message = FALSE, warning = FALSE,
  fig.width = 9, fig.height = 6
)

# Libraries
suppressPackageStartupMessages({
  library(BayesianDisaggregation)
  library(dplyr)
  library(tidyr)
  library(ggplot2)
  library(readr)
  library(openxlsx)
})

# Logging verbosity from the package
log_enable("INFO")
set.seed(2024)

## ----L_from_P, include=TRUE---------------------------------------------------
# Example call (internals are in the package):
# L <- compute_L_from_P(P) 

## ----spread-L, include=TRUE---------------------------------------------------
# Example call:
# LT <- spread_likelihood(L, T_periods = nrow(P), pattern = "recent")

## ----posteriors, include=TRUE-------------------------------------------------
# posterior_weighted(P, LT, lambda = 0.7)
# posterior_multiplicative(P, LT)
# posterior_dirichlet(P, LT, gamma = 0.1)
# posterior_adaptive(P, LT)

## ----metrics-fns, include=TRUE------------------------------------------------
# coherence_score(P, W, L, mult = 3.0, const = 0.5)
# numerical_stability_exp(W, a = 1000, b = 10)
# temporal_stability(W, kappa = 50)
# stability_composite(W, a = 1000, b = 10, kappa = 50)

## ----interp-fn, include=TRUE--------------------------------------------------
# interpretability_score(P, W, use_q90 = TRUE)

## ----api, include=TRUE--------------------------------------------------------
# Example signature (see Section 8 for real data):
# bayesian_disaggregate(path_cpi, path_weights,
#   method = c("weighted","multiplicative","dirichlet","adaptive"),
#   lambda = 0.7, gamma = 0.1,
#   coh_mult = 3.0, coh_const = 0.5,
#   stab_a = 1000, stab_b = 10, stab_kappa = 50,
#   likelihood_pattern = "recent")

## ----demo, include=TRUE-------------------------------------------------------
# Synthetic prior matrix (rows on simplex)
T <- 10; K <- 6
set.seed(123)
P <- matrix(rexp(T*K), nrow = T)
P <- P / rowSums(P)

# Likelihood vector from P (PCA/SVD; robust with fallback)
L  <- compute_L_from_P(P)

# Spread over time with "recent" pattern
LT <- spread_likelihood(L, T_periods = T, pattern = "recent")

# Try a couple of posteriors
W_weighted <- posterior_weighted(P, LT, lambda = 0.7)
W_adaptive <- posterior_adaptive(P, LT)

# Metrics for adaptive
coh  <- coherence_score(P, W_adaptive, L)
stab <- stability_composite(W_adaptive, a = 1000, b = 10, kappa = 50)
intr <- interpretability_score(P, W_adaptive)
eff  <- 0.65
comp <- 0.30*coh + 0.25*stab + 0.25*intr + 0.20*eff

data.frame(coherence = coh, stability = stab, interpretability = intr,
           efficiency = eff, composite = comp) %>% round(4)

## ----real-pipeline, eval=FALSE------------------------------------------------
# # === Create synthetic data for CRAN-compliant demo ===
# demo_dir <- tempdir()
# 
# # Create synthetic CPI data (mimicking your structure)
# set.seed(123)
# cpi_demo <- data.frame(
#   Year = 2000:2010,
#   CPI = cumsum(c(100, rnorm(10, 0.5, 2)))  # Random walk starting at 100
# )
# cpi_file <- file.path(demo_dir, "synthetic_cpi.xlsx")
# openxlsx::write.xlsx(cpi_demo, cpi_file)
# 
# # Create synthetic weights matrix (mimicking VAB weights structure)
# set.seed(456)
# years <- 2000:2010
# sectors <- c("Agriculture", "Manufacturing", "Services", "Construction", "Mining")
# 
# weights_demo <- data.frame(Year = years)
# for(sector in sectors) {
#   weights_demo[[sector]] <- runif(length(years), 0.05, 0.35)
# }
# # Normalize rows to sum to 1 (simplex constraint)
# weights_demo[, -1] <- weights_demo[, -1] / rowSums(weights_demo[, -1])
# weights_file <- file.path(demo_dir, "synthetic_weights.xlsx")
# openxlsx::write.xlsx(weights_demo, weights_file)
# 
# # Use synthetic data paths
# path_cpi <- cpi_file
# path_w <- weights_file
# out_dir <- demo_dir
# 
# cat("Using synthetic data for CRAN demo:\n")
# cat("CPI file:", path_cpi, "\n")
# cat("Weights file:", path_w, "\n")
# cat("Output directory:", out_dir, "\n")
# 
# # --- Base run (robust defaults) ---
# base_res <- bayesian_disaggregate(
#   path_cpi           = path_cpi,
#   path_weights       = path_w,
#   method             = "adaptive",
#   lambda             = 0.7,   # recorded in metrics; not used by "adaptive"
#   gamma              = 0.1,
#   coh_mult           = 3.0,
#   coh_const          = 0.5,
#   stab_a             = 1000,
#   stab_b             = 10,
#   stab_kappa         = 60,
#   likelihood_pattern = "recent"
# )
# xlsx_base <- save_results(base_res, out_dir = file.path(out_dir, "base"))
# print(base_res$metrics)
# 
# # --- Minimal grid search for demo (reduced size) ---
# n_cores <- 1  # Use single core for CRAN compliance
# grid_df <- expand.grid(
#   method             = c("weighted", "adaptive"),  # Reduced methods
#   lambda             = c(0.5, 0.7),               # Reduced options
#   gamma              = 0.1,                       # Single option
#   coh_mult           = 3.0,                       # Single option
#   coh_const          = 0.5,                       # Single option
#   stab_a             = 1000,
#   stab_b             = 10,
#   stab_kappa         = 60,                        # Single option
#   likelihood_pattern = "recent",                  # Single option
#   KEEP.OUT.ATTRS     = FALSE,
#   stringsAsFactors   = FALSE
# )
# 
# grid_res <- run_grid_search(
#   path_cpi     = path_cpi,
#   path_weights = path_w,
#   grid_df      = grid_df,
#   n_cores      = n_cores
# )
# write.csv(grid_res, file.path(out_dir, "grid_results.csv"), row.names = FALSE)
# 
# best_row <- grid_res %>% arrange(desc(composite)) %>% slice(1)
# print("Best configuration from grid search:")
# print(best_row)
# 
# # --- Re-run the best configuration for clean export ---
# best_res <- bayesian_disaggregate(
#   path_cpi           = path_cpi,
#   path_weights       = path_w,
#   method             = best_row$method,
#   lambda             = if (!is.na(best_row$lambda)) best_row$lambda else 0.7,
#   gamma              = if (!is.na(best_row$gamma))  best_row$gamma  else 0.1,
#   coh_mult           = best_row$coh_mult,
#   coh_const          = best_row$coh_const,
#   stab_a             = best_row$stab_a,
#   stab_b             = best_row$stab_b,
#   stab_kappa         = best_row$stab_kappa,
#   likelihood_pattern = best_row$likelihood_pattern
# )
# xlsx_best <- save_results(best_res, out_dir = file.path(out_dir, "best"))
# 
# # --- One Excel with everything (including hyperparameters) ---
# sector_summary <- tibble(
#   Sector          = colnames(best_res$posterior)[-1],
#   prior_mean      = colMeans(as.matrix(best_res$prior[, -1])),
#   posterior_mean  = colMeans(as.matrix(best_res$posterior[, -1]))
# )
# 
# wb <- createWorkbook()
# addWorksheet(wb, "Hyperparameters"); writeData(wb, "Hyperparameters", best_row)
# addWorksheet(wb, "Metrics");         writeData(wb, "Metrics", best_res$metrics)
# addWorksheet(wb, "Prior_P");         writeData(wb, "Prior_P", best_res$prior)
# addWorksheet(wb, "Posterior_W");     writeData(wb, "Posterior_W", best_res$posterior)
# addWorksheet(wb, "Likelihood_t");    writeData(wb, "Likelihood_t", best_res$likelihood_t)
# addWorksheet(wb, "Likelihood_L");    writeData(wb, "Likelihood_L", best_res$likelihood)
# addWorksheet(wb, "Sector_Summary");  writeData(wb, "Sector_Summary", sector_summary)
# 
# for (sh in c("Hyperparameters","Metrics","Prior_P","Posterior_W",
#              "Likelihood_t","Likelihood_L","Sector_Summary")) {
#   freezePane(wb, sh, firstRow = TRUE)
#   addFilter(wb, sh, rows = 1, cols = 1:ncol(readWorkbook(wb, sh)))
#   setColWidths(wb, sh, cols = 1:200, widths = "auto")
# }
# 
# # --- Add sectoral CPI (aggregate times posterior weights) ---
# W_post <- best_res$posterior           # Year + sectors
# cpi_df <- read_cpi(path_cpi)           # Year, CPI
# sector_cpi <- dplyr::left_join(W_post, cpi_df, by = "Year") %>%
#   dplyr::mutate(dplyr::across(-c(Year, CPI), ~ .x * CPI))
# 
# # Quality check: sector sums vs CPI
# check_sum <- sector_cpi %>%
#   dplyr::mutate(row_sum = rowSums(dplyr::across(-c(Year, CPI))),
#                 diff    = CPI - row_sum)
# cat("Quality check (first 5 rows):\n")
# print(head(check_sum, 5))
# 
# addWorksheet(wb, "Sector_CPI")
# writeData(wb, "Sector_CPI", sector_cpi)
# freezePane(wb, "Sector_CPI", firstRow = TRUE)
# addFilter(wb, "Sector_CPI", rows = 1, cols = 1:ncol(sector_cpi))
# setColWidths(wb, "Sector_CPI", cols = 1:200, widths = "auto")
# 
# excel_onefile <- file.path(out_dir, "best", "Best_Full_Output_withSectorCPI.xlsx")
# saveWorkbook(wb, excel_onefile, overwrite = TRUE)
# cat("Full results saved to:", excel_onefile, "\n")
# 
# # --- Quick plots (saved as PNGs) ---
# dir_plots <- file.path(out_dir, "best", "plots")
# if (!dir.exists(dir_plots)) dir.create(dir_plots, recursive = TRUE)
# 
# W_long <- best_res$posterior %>%
#   pivot_longer(-Year, names_to = "Sector", values_to = "Weight")
# p_heat <- ggplot(W_long, aes(Year, Sector, fill = Weight)) +
#   geom_tile() + scale_fill_viridis_c() +
#   labs(title = "Posterior weights (W): heatmap", x = "Year", y = "Sector", fill = "Share") +
#   theme_minimal(base_size = 11) + theme(axis.text.y = element_text(size = 6))
# ggsave(file.path(dir_plots, "posterior_heatmap.png"), p_heat, width = 12, height = 9, dpi = 220)
# 
# top_sectors <- best_res$posterior %>%
#   summarise(across(-Year, mean)) %>%
#   pivot_longer(everything(), names_to = "Sector", values_to = "MeanShare") %>%
#   arrange(desc(MeanShare)) %>% slice(1:3) %>% pull(Sector)  # Reduced to top 3 for demo
# 
# p_lines <- best_res$posterior %>%
#   select(Year, all_of(top_sectors)) %>%
#   pivot_longer(-Year, names_to = "Sector", values_to = "Weight") %>%
#   ggplot(aes(Year, Weight, color = Sector)) +
#   geom_line(linewidth = 0.9) +
#   labs(title = "Top 3 sectors by average share (posterior W)", y = "Share", x = "Year") +
#   theme_minimal(base_size = 11)
# ggsave(file.path(dir_plots, "posterior_topSectors.png"), p_lines, width = 11, height = 6, dpi = 220)
# 
# cat("Demo completed successfully. All files written to temporary directory.\n")

## ----invariants, include=TRUE-------------------------------------------------
# Example: invariants on a fresh synthetic run
T <- 6; K <- 5
set.seed(7)
P <- matrix(rexp(T*K), nrow = T); P <- P / rowSums(P)
L <- compute_L_from_P(P)
LT <- spread_likelihood(L, T, "recent")
W  <- posterior_multiplicative(P, LT)

# Invariants
stopifnot(all(abs(rowSums(P)  - 1) < 1e-12))
stopifnot(all(abs(rowSums(LT) - 1) < 1e-12))
stopifnot(all(abs(rowSums(W)  - 1) < 1e-12))
c(
  coherence = coherence_score(P, W, L),
  stability = stability_composite(W),
  interpret = interpretability_score(P, W)
) %>% round(4)

## ----session, include=TRUE----------------------------------------------------
sessionInfo()