## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE) library(VizTest) library(ggplot2) # Datasets from library(wooldridge) library(carData) # For wrangling and analysis library(dplyr) library(tidyr) library(ggplot2) library(patchwork) library(lme4) library(marginaleffects) ## ----reg-obj, echo = TRUE, out.width = "100%", fig.cap = "Regular objects --- default output"---- #### 4.1 Regular object #### model1 <- lm(colGPA~skipped+alcohol+PC+male+car+job20,data=gpa1) # Parsing to viztest viztestObj <- viztest(model1, test_level = 0.05, range_levels = c(0.25,0.99), level_increment = 0.01) # Print print(viztestObj) ## ----reg-obj-plot, echo = TRUE, fig.show="hide", fig.height=4, fig.width=6, out.width = "100%", fig.cap = "Regular objects --- default output"---- f1 <- plot(viztestObj) ## ----reg-obj-plot2, echo = TRUE, fig.height=4, fig.width=6, out.width = "100%", fig.cap = "Inferential (black) and 95% (gray) Confidence Intervals", fig.show="hide"---- reg_plot_data <- plot(viztestObj, make_plot=FALSE) %>% mutate(lwr95 = est - qnorm(.975)*se, upr95 = est + qnorm(.975)*se) f2 <- ggplot(reg_plot_data, aes(y = label, x=est)) + geom_linerange(aes(xmin=lwr95, xmax=upr95), color="gray75") + geom_linerange(aes(xmin=lwr, xmax=upr), color="black", linewidth=3) + geom_point(color="white", size=.75) + geom_vline(xintercept=0, linetype=3) + theme_classic() + labs(x="Regression Coefficient", y="") ## ----reg-plot-both, out.width="100%", echo=FALSE, fig.cap = "Plots of Regression Output with Inferential Confidence Intervals", fig.height=3.5, fig.width=7---- (f1 + ggtitle("A) Default plot() Output") + theme(plot.title.position="plot")) + (f2 + ggtitle("B) plot() Output with 95% CI Included") + theme(plot.title.position="plot")) ## ----mltlvl-obj, echo = TRUE, out.width = "100%", fig.cap = "Multilevel regression objects --- default output with all reference lines"---- #### 4.2 Multilevel regression objects #### data(WVS, package='carData') # Poverty variable as a scale from -1 to 1 NewWVS <- WVS %>% mutate(povertyNum = as.numeric(poverty)-2) # The model model2 <- lmer(povertyNum ~ age + religion + degree + gender + (1 | country), NewWVS) # Creating custom object ## Extracting fixed effect coefficients and naming them named_coef_vec <- fixef(model2) coefNames <- c("(Intercept)","Age","Religious","Has a degree","Male") names(named_coef_vec) <- coefNames ## Extracting vcov matrix #### As matrix necessary to overwrite special object from lme4 vcov <- as.matrix(vcov(model2)) eff_vt <- make_vt_data(named_coef_vec, vcov) # Parsing to viztest viztestObj <- viztest(eff_vt,test_level = 0.05, range_levels = c(0.25,0.99),level_increment = 0.01) # Print print(viztestObj) ## ----mltlvl-obj-plot, echo = TRUE, out.width = "65%", fig.height=3.5, fig.width=5, fig.align="center", fig.cap = "Multilevel regression objects --- default output with all reference lines"---- # Plotting plot(viztestObj,ref_lines = "all", level = "ce") + labs(y="", x="Fixed Effects") ## ----predprob-obj, echo = TRUE, out.width = "100%", fig.cap = "Predicted probabilities --- default output with ambiguous reference lines"---- #### 4.3 Predicted probabilities #### data(TitanicSurvival, package="carData") NewTitanicSurvival <- TitanicSurvival %>% mutate(ageCat = case_when(age <= 10 ~ "0-10", age > 10 & age <=18 ~ "11-18", age > 18 & age <=30 ~ "19-30", age > 30 & age <=40 ~ "31-40", age > 40 & age <=50 ~ "41-50", age >50 ~ "51+")) # The model model3 <- glm(survived~sex*ageCat+passengerClass, data=NewTitanicSurvival,family = binomial(link="logit")) # Predicted values mes <- avg_predictions(model3, variables = list(ageCat = levels(NewTitanicSurvival$ageCat), sex=levels(NewTitanicSurvival$sex))) ## ----"print-titanic-intervals-norm"------------------------------------------- vt_titanic <- viztest(mes, test_level = 0.05, range_levels = c(0.25,0.99), level_increment = 0.01, include_zero=FALSE) vt_titanic ## ----avt-preds-plot, echo = TRUE, out.width = "65%", fig.height=3.5, fig.width=5, fig.align="center", fig.cap = "Average Predicted Probabilities --- Plot with Normal Theory Inferential CIs"---- mes <- mes %>% mutate(lwr76 = estimate - qnorm(.88)*std.error, upr76 = estimate + qnorm(.88)*std.error) ggplot(mes, aes(y = ageCat, x=estimate, xmin = lwr76, xmax=upr76, colour=sex)) + geom_pointrange(position = position_dodge(width=.5)) + scale_colour_manual("Sex",values=c("gray50", "black")) + theme_bw() + labs(x="Predicted Pr(Survival)\nInferential Confidence Intervals (76%)", y="Age Category") ## ----eval=TRUE,echo=T--------------------------------------------------------- ap_sim <- avg_predictions(model3, variables = list(ageCat = levels(NewTitanicSurvival$ageCat), sex=levels(NewTitanicSurvival$sex))) %>% inferences(method = "simulation", R=2500) ## ----echo=T------------------------------------------------------------------- post <- t(attr(ap_sim, "posterior")) colnames(post) <- paste0("b", 1:ncol(post)) post_vt <- make_vt_data(post, type="sim") vt_sim <- viztest(post_vt, cifun="hdi", include_zero=FALSE) vt_sim ## ----ave-preds-sim-plot, echo = TRUE, out.width = "65%", fig.height=3.5, fig.width=5, fig.align="center", fig.cap = "Average Predicted Probabilities --- Plot with Simulation-based Inferential HDIs"---- hdis <- apply(post, 2, \(x)hdi(x, 0.79)) mes <- mes %>% arrange(ageCat) %>% mutate(lwr79 =hdis[1,], upr79 = hdis[2,]) ggplot(mes, aes(y = ageCat, x=estimate, xmin = lwr79, xmax=upr79, colour=sex)) + geom_pointrange(position = position_dodge(width=.5)) + scale_colour_manual("Sex",values=c("gray50", "black")) + theme_bw() + labs(x="Predicted Pr(Survival)\nInferential Highest Density Regions (79%)", y="Age Category") ## ----echo=T------------------------------------------------------------------- #### 4.4 Descriptive quantities #### data(CES11, package="carData") NewCES11 <- CES11 %>% mutate(rel_imp=(as.numeric(importance)-1)/3) %>% group_by(province) %>% summarise(mean=mean(rel_imp), samp_var=var(rel_imp)/n()) # Creating a vtcustom object ## Vector for the means means <- NewCES11$mean names(means) <- NewCES11$province vt_ces_data <- make_vt_data(means, NewCES11$samp_var) # Passing to viztest viztestCES <- viztest(vt_ces_data, test_level = 0.05, range_levels = c(0.25,0.99), level_increment = 0.01, include_zero=FALSE) # Print viztestCES ## ----descr-obj, echo = TRUE, out.width = "50%", fig.height=3.5, fig.width=3.5, fig.align="center", fig.cap = "Descriptive quantities --- default output with ambiguous reference lines"---- # plotting plot(viztestCES, level = "ce", trans = \(x)x*100, ref_lines = "ambiguous")+ labs(y="Provinces", x="Average importance given to religion") + theme_bw()