## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----frame-------------------------------------------------------------------- library(sps) set.seed(123654) frame <- data.frame( revenue = round(rlnorm(1e3) * 1000), region = sample(1:3, 1e3, prob = c(0.2, 0.3, 0.5), replace = TRUE) ) head(frame) ## ----outcome------------------------------------------------------------------ sales <- round(frame$revenue * runif(1e3, 0.5, 2)) ## ----allocation--------------------------------------------------------------- allocation <- with(frame, prop_allocation(revenue, 100, region)) allocation ## ----sample------------------------------------------------------------------- sample <- with(frame, sps(revenue, allocation, region)) survey <- cbind(frame[sample, ], sales = sales[sample]) head(survey) ## ----weights------------------------------------------------------------------ survey$weight <- weights(sample) head(survey) ## ----estimate----------------------------------------------------------------- ht <- with(survey, sum(sales * weight)) ht ## ----bias--------------------------------------------------------------------- ht / sum(sales) - 1 ## ----variance----------------------------------------------------------------- repweights <- sps_repweights(weights(sample)) var <- attr(repweights, "tau")^2 * mean((colSums(survey$sales * repweights) - ht)^2) sqrt(var) / ht ## ----variance2---------------------------------------------------------------- sps_var <- function(y, w) { y <- y[w > 1] w <- w[w > 1] n <- length(y) Y <- sum(y * w) n / (n - 1) * sum((1 - 1 / w) * (w * y - Y / n)^2) } var <- with( survey, mapply(sps_var, split(sales, region), split(weight, region)) ) sqrt(sum(var)) / ht ## ----prns--------------------------------------------------------------------- frame$prn <- runif(1000) head(frame) ## ----prn samples-------------------------------------------------------------- pareto <- order_sampling(\(x) x / (1 - x)) sample <- with(frame, sps(revenue, allocation, region, prn)) parsample <- with(frame, pareto(revenue, allocation, region, (prn - 0.5) %% 1)) length(intersect(sample, parsample)) / 100 ## ----prn simualtion----------------------------------------------------------- replicate(1000, { s <- with(frame, pareto(revenue, allocation, region)) length(intersect(sample, s)) / 100 }) |> summary() ## ----top up------------------------------------------------------------------- sample <- with(frame, sps(revenue, allocation, region, prn)) sample_tu <- with(frame, sps(revenue, allocation + c(10, 0, 0), region, prn)) all(sample %in% sample_tu) ## ----critical----------------------------------------------------------------- Map(\(x) head(becomes_ta(x)), split(frame$revenue, frame$region)) ## ----no_tu-------------------------------------------------------------------- set.seed(13026) x <- rlnorm(10) u <- runif(10) becomes_ta(x) sample <- sps(x, 4, prn = u) sample %in% sps(x, 5, prn = u) ## ----yes_tu------------------------------------------------------------------- sample %in% sps(x, 6, prn = u) ## ----ht bias------------------------------------------------------------------ sampling_distribution <- replicate(1000, { sample <- with(frame, sps(revenue, allocation, region)) sum(sales[sample] * weights(sample)) }) summary(sampling_distribution / sum(sales) - 1) ## ----tille, fig.width=8, fig.height=5.33-------------------------------------- set.seed(123456) n <- 5e3 frame1 <- subset(frame, region == 1) pi_est <- tabulate( replicate(n, sps(frame1$revenue, allocation[1])), nbins = nrow(frame1) ) / n pi <- inclusion_prob(frame1$revenue, allocation[1]) dist <- (pi_est - pi) / sqrt(pi * (1 - pi) / n) plot( density(dist, na.rm = TRUE), ylim = c(0, 0.5), xlim = c(-4, 4), ylab = "", xlab = "", main = "Empirical distribution of inclusion probabilities" ) lines(seq(-4, 4, 0.1), dnorm(seq(-4, 4, 0.1)), lty = "dashed") legend("topright", c("empirical", "theoretical"), lty = c("solid", "dashed"))