## ----preliminaries, echo=FALSE, warning = FALSE, message = FALSE, results='hide'---- hasMosek <- requireNamespace("Rmosek", quietly = TRUE) if(hasMosek) hasMosek <- tryCatch(example(mosek)$value$response$code == 0, warning = function(c) 0, error = function(c) 0) if (!hasMosek) { knitr::opts_chunk$set(eval = FALSE) msg <- paste("This vignette requires Mosek, but this system", "does not have Mosek", "installed, so code examples will not be evaluated.") msg <- paste(strwrap(msg), collapse="\n") message(msg) } require("REBayes") knitr::render_sweave() options(prompt = "R> ", continue = "+ ", digits = 2, show.signif.stars = FALSE) cleanup <- FALSE ## ----N02setup, include=FALSE-------------------------------------------------- N02.cap = "Kiefer Wolfowitz Estimation of a Gaussian Location Mixture: The left panel is the (unknown) two component mixture density, the middle panel is the estimated NPMLE mixing density and the right panel is the estimated Bayes rule for predicting $\\hat \\mu = \\delta (x)$ based on seeing an observation $x$." set.seed(1729) ## ----N02, fig.height = 4, fig.width = 10, fig.cap = N02.cap------------------- # A simple Gaussian mixture model par(mfrow = c(1,3)) x <- seq(-5, 6, by = 0.05) plot(x, 0.9 * dnorm(x,0) + 0.1 * dnorm(x,2), type = "l", xlab = "x", ylab = expression(g(x)), main = "") y <- rep(c(0,2), times = c(900,100)) + rnorm(1000) z <- GLmix(y) plot(z, xlab = expression(mu), ylab = expression(f(mu)), main = "") plot(x, predict(z,x), type = "l", ylab = expression(delta(x))) ## ----N0Gsetup, include=FALSE-------------------------------------------------- N0G.cap = "Kiefer Wolfowitz Estimation of a Gaussian Location Mixture: The left panel is the (unknown) mixture density, the middle panel is the estimated NPMLE mixing density and the right panel is the estimated Bayes rule for predicting $\\hat \\mu = \\delta (x)$ based on seeing an observation $x$." set.seed(1726) ## ----N0G, fig.height = 4, fig.width = 10, fig.cap = N0G.cap------------------- # Another simple Gaussian mixture model par(mfrow = c(1,3)) x <- seq(-5, 7, by = 0.05) plot(x, 0.8 * dnorm(x,0) + 0.2 * dnorm(x,2,sqrt(2)), type = "l", xlab = "x", ylab = "g(x)", main = "") y <- c(rep(0,800), rnorm(200, 2)) + rnorm(1000) z <- GLmix(y) plot(z, xlab = expression(mu), ylab = expression(f(mu)), main = "") plot(x, predict(z,x), type = "l", ylab = expression(delta(x))) ## ----P01setup, include=FALSE-------------------------------------------------- P01.cap = "Histogram of Claims per Exposure for 72 occupation groups." ## ----P01, fig.height = 4, fig.width = 6, fig.cap = P01.cap-------------------- # Parametric Gamma vs Poisson mixture models for insurance claims data("Norberg") E <- Norberg$Exposure/344 X <- Norberg$Death hist(X/E, 90, freq = TRUE, xlab = "X/E", main = "", ylab = "Frequency") ## ----P02setup, include=FALSE-------------------------------------------------- P02.cap = "Estimated mixing distribution $G$ for $\\theta$ for the group insurance data. The left panel depicts to the Kiefer-Wolfowitz NPMLE estimator for $G$ with 1000 grid points. The right panel depicts the cube root of the mass associated with support points around 8. The smooth curve superimposed in the left panel corresponds to the parametric maximum likelihood estimate of the mixing density assuming $G$ follows a Gamma distribution." ## ----P02, fig.height = 4, fig.width = 6, cache = TRUE, fig.cap = P02.cap------ # Maximum likelihood estimation of the Gamma model logL<- function(par, x, e){ f <- choose(x + par[1] - 1, x) * (par[2]/(e + par[2]))^par[1] * (e/(e+par[2]))^x -sum(log(f)) } z <- optim(c(5,5), logL, x = X, e = E, hessian = TRUE) sez <- sqrt(diag(solve(z$hessian))) z <- z$par # Estimation of the Poisson mixture model f = Pmix(X, v = 1000, exposure = E, rtol = 1e-8) # Now plot the comparison par(mfrow=c(1,2)) plot(f$x,f$y/sum(f$y), type="l", xlab = expression(theta), ylab = expression(f(theta)), ylim = c(0,1)) lines(f$x, dgamma(f$x, shape = z[1], rate = z[2]), col = 2) plot(f$x,(f$y/sum(f$y))^(1/3), type="l", xlab = expression(theta), ylab = expression(f(theta)^{1/3}), ylim = c(0,1)) ## ----P03setup, include=FALSE-------------------------------------------------- P03.cap = "Comparison of the Parametric and the Nonparametric Empirical Bayes estimator of $\\theta_i$ for 72 occupation groups. As indicated by the 45 degree line there is good agreement between the parametric and nonparametric Bayes rules except for the two groups appearing in the upper right corner of the plot." ## ----P03, fig.height = 4, fig.width = 6, fig.cap = P03.cap-------------------- # Bayes rules for insurance application PBrule <- (X + z[1])/(E + z[2]) NPBrule <- f$dy plot(PBrule, NPBrule, cex = 0.5, xlab = "P-EBayes", ylab = "NP-EBayes") abline(c(0,1)) ## ----W01setup, include=FALSE-------------------------------------------------- W01.cap = "Raw and estimated mortality rates for Carey medflies by gender" ## ----W01, fig.height = 5, fig.width = 8, fig.cap = W01.cap-------------------- data("flies") attach(flies) # Weibull hazard function hweibull <- function(s,alpha,lambda, f){ Lambda<-outer((lambda*s)^(alpha),exp(f$x)) Surv <- exp(-Lambda) %*% f$y/sum(f$y) A <- matrix(0, length(s), length(f$x)) for (i in 1:length(s)){ for (j in 1:length(f$x)) A[i,j] <- dweibull(s[i],shape=alpha, scale = lambda^(-1) * (exp(f$x[j]))^(-1/alpha)) } g <- A %*% f$y g/(sum(g)*Surv) } ahat <- c(2.793, 2.909) # Gender specific Weibull shape parameters counts <- tapply(num,list(age,female),"sum") cols <- c("black","grey") labs <- c("Male","Female") # Plot raw and estimated hazard functions by gender for(g in 1:2){ gc <- counts[!is.na(counts[,g]),g] freq <- gc/sum(gc) day <- as.numeric(names(gc)) atrisk <- rev(cumsum(rev(gc))) h <- rev(diff(rev(c(atrisk,0))))/atrisk fW <- Weibullmix(day, m = 5000, alpha = ahat[g], weight = freq) hW <- hweibull(day, alpha = ahat[g], lambda = 1, fW) if(g == 1){ plot(day[1:100],hW[1:100],type="l", xlim = c(0,110), ylim = c(0,.20), xlab = "Day", ylab = "Hazard") points(day[1:100], h[1:100], cex = 0.7) } else{ lines(day[1:120],hW[1:120],col = cols[2]) points(day[1:100], h[1:100], cex = 0.7, col = cols[2]) } legend("topleft", labs, lty = rep(1,2), lwd = 1.5, col=cols) } ## ----W02setup, include=FALSE-------------------------------------------------- W02.cap = "Initial Cage Density Effect in the Weibull Mixture Model: Profile Log Likelihood (in 1000's) for the cage density effect with 0.95 (Wilks) confidence interval in blue." ## ----W02, fig.height = 4, fig.width = 6, cache = TRUE, warning = FALSE, fig.cap = W02.cap---- # Profile likelihood estimation of initial cage density effect counts <- tapply(num,list(age, begin),"sum") freq <- c(counts) day <- as.numeric(dimnames(counts)[[1]]) den <- as.numeric(dimnames(counts)[[2]]) day <- rep(day, 165) den <- rep(den, each = 136) s <- !is.na(freq) day <- day[s] den <- den[s] freq <- freq[s]/sum(freq[s]) beta <- -10:10/10 logL <- beta for(i in 1:length(beta)){ f <- Weibullmix(day, m = 500, alpha = 2.95, lambda = exp(beta[i]*den), weight = freq) logL[i] <- f$logLik } plot(beta, logL/1000, cex = 0.5, xlab = expression(beta), ylab = "Profile Likelihood") lines(beta, logL/1000) # Wilks interval for cage density effect fsp <- splinefun(beta, max(logL) - logL - qchisq(.95,1)/2) blo <- uniroot(fsp,c(-1,-.5))$root bhi <- uniroot(fsp,c(-.5, 0))$root polygon(c(blo,bhi,bhi,blo), c(-40,-40,-30,-30), col = "lightblue") ## ----W03setup, include=FALSE-------------------------------------------------- counts <- tapply(num,age,"sum") freq <- counts/sum(counts) day <- as.numeric(names(counts)) counts <- c("0.5" = 0, counts) atrisk <- rev(cumsum(rev(counts))) hazard <- rev(diff(rev(c(atrisk,0))))/atrisk W03.cap = "Parametric versus Nonparametric Estimates of Medfly Mortality Rates" ## ----W03, fig.height = 4, fig.width = 6, cache = TRUE, warning = FALSE, fig.cap = W03.cap---- # Parametric Gamma fraility vs nonparametric Weibull mixture model GammaFrailty <- function(pars, age, num, hazard = FALSE){ alpha <- pars[1] beta <- pars[2] delta <- pars[3] a <- (alpha/beta) * (age/beta)^(alpha - 1) A <- (age/beta)^alpha if(hazard) z <- a/(1 + delta * A) else z <- -sum(num * (log(a) - (1 + 1/delta)* log(1 + delta * A))) z } pars <- c(5, 20, 1) z <- optim(pars, GammaFrailty, age = age, num = num) fitG <- z$par fitW <- Weibullmix(day, m = 5000, alpha = 2.95, weights = freq) s <- 1:110 day <- day[s] hazard <- hazard[s] plot(day, hazard, cex = 0.5) lines(day, GammaFrailty(z$par,day,num, hazard = TRUE), lty = 2) hW <- hweibull(day, alpha = 2.95, lambda = 1, fitW) lines(day, hW, col = 2) legend("topright", c("NPMLE", "Gamma"), col = 2:1, lty = 1:2) ## ----W04setup, include=FALSE-------------------------------------------------- W04.cap = "Conditional Frailty at Various Ages: Note that the cube root of the frailties have been plotted to accentuate the smaller mass points" ## ----W04, fig.height = 8, fig.width = 8, fig.cap = W04.cap-------------------- # Conditional fraility at various ages Gfrailt <- function(age, fit){ alpha <- fit[1] beta <- fit[2] delta <- fit[3] A <- (age/beta)^alpha (1 + delta * A)^(-1/delta) } frailt <- function(v, t, alpha, fit){ fv = fit$y/sum(fit$y) g = sum(exp(-v * (t^alpha))* fv) exp(-v * (t^alpha)) * fv/g } par(mfrow = c(2,2)) v <- exp(fitW$x) for(t in c(1.5, 20, 60, 100)){ plot(log(v), frailt(v, t, alpha = 2.95, fitW)^(1/3), type="l", main = paste("age =", t), xlab = expression(log(theta)), ylab = expression(h( theta , t)^{1/3})) } ## ----W05setup, include=FALSE-------------------------------------------------- W05.cap = "Ten-Day Term Life Insurance Premia for Medflies of Various Ages" ## ----W05, fig.height = 4, fig.width = 6, fig.cap = W05.cap-------------------- # Life insurance premia for medflies of various ages premium <- function(v, t, k = 1, alpha, fit){ if("Weibullmix" %in% class(fit)) { R <- t for(i in 1:length(t)){ D <- exp(-v * t[i]^alpha) - exp(-v * (t[i] + k)^alpha) D <- D/exp(-v * t[i]^alpha) D[is.nan(D)] <- 1 R[i] <- sum(D * frailt(v, t[i], alpha, fit)) } } else R <- (Gfrailt(t,fit) - Gfrailt(t+k, fit))/Gfrailt(t, fit) R } v <- exp(fitW$x) R <- premium(v, day, k = 10, alpha = 2.95, fitW) plot(day, R, type = "l", col = 2, ylab = "Premium") R <- premium(v, day, k = 10, alpha = 2.95, fitG) lines(day, R, lty = 2) legend("topright", c("NPMLE", "Gamma"), col = 2:1, lty = 1:2)