## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = "ragg_png", out.height = "100%", out.width = "100%", fig.width = 7.2916667, fig.height = 7.2916667, message = FALSE, warning = FALSE, cache = FALSE ) library(dplyr) library(stringr) library(ggnormalviolin) library(ggplot2) library(purrr) library(simstandard) library(unusualprofile) ## ----more-setup, echo=FALSE--------------------------------------------------- myfont <- "Titillium Web" # font_add_google("Titillium Web", "Titillium Web") theme_set(theme_minimal(16, base_family = myfont)) update_geom_defaults(geom = "text", new = list(family = myfont)) update_geom_defaults(geom = "label", new = list(family = myfont)) update_geom_defaults(geom = "point", new = list(pch = 16)) #Rounds proportions to significant digits both near 0 and 1 PropRound <- function(p, maxDigits = 10) { d <- rep(2, length(p)) pp <- rep(0, length(p)) for (i in seq(1, length(p))) { if (p[i] > 0.99 || p[i] < 0.01) { d[i] <- min(maxDigits, 1 - 1 - floor(log10(abs( ifelse(p[i] < 0.5, p[i], 1 - p[i]) )))) } pp[i] <- formatC(round(p[i], digits = d[i]), digits = d[i], flag = "") if (round(p[i], digits = maxDigits) == 0) { pp[i] <- 0 } if (round(p[i], digits = maxDigits) == 1) { pp[i] <- 1 } gsub(" ", "", pp[i]) } return(gsub(" ", "", pp)) } numText <- function(x, digits = 2) { str_replace(formatC(x, digits = digits, format = "f"), pattern = "\\-", "−") } bmatrix <- function(x, digits = 2) { x_formatted <- apply(x, 1, formatC, digits = digits, format = "f") x_formatted <- apply(x_formatted, 1, str_remove, pattern = "^0") x_formatted[x == 0] <- "0" x_formatted[x == 1] <- "1" paste0("\\begin{bmatrix}\n", paste0( apply( x_formatted, MARGIN = 1, FUN = paste0, collapse = " & " ), collapse = "\\\\\n" ), "\n\\end{bmatrix}") } ## ----worked-model, echo = FALSE----------------------------------------------- model <- "X =~ 0.95 * X_1 + 0.90 * X_2 + 0.85 * X_3 + 0.60 * X_4" ## ----worked-data, echo = FALSE------------------------------------------------ # Create data.frame, and add the composite score d <- data.frame( X_1 = 2, X_2 = 3, X_3 = 1, X_4 = 2 ) %>% simstandard::add_composite_scores(m = model) ## ----standardized-scores, echo=FALSE------------------------------------------ # Standardized observed scores X <- d[1, -5] %>% t %>% as.vector %>% `names<-`(colnames(d)[-5]) ## ----one-dimensional, echo = FALSE, fig.align='center', fig.cap="A simple model with standardized loadings", out.width=300---- knitr::include_graphics("One_dimensional.svg") ## ----example-profile, echo = F, fig.cap="Figure 1. Example profile in a standard multivariate normal distribution."---- tibble( Variable = paste0("italic(X)[", 1:4, "]"), Score = X, vjust = c(1.5, -0.5, 1.5, 1.5) ) %>% ggplot(aes(Variable, Score)) + geom_normalviolin(aes(mu = 0, sigma = 1), fill = "gray", alpha = .4) + geom_line(aes(group = 1), color = "firebrick") + geom_point(pch = 16, color = "firebrick", size = 2) + geom_text(aes(label = Score, vjust = vjust), color = "firebrick") + geom_hline(yintercept = d$X_Composite) + scale_x_discrete( NULL, labels = function(l) { parse(text = l) } ) + scale_y_continuous() + annotate( geom = "text", x = 3.5, y = d$X_Composite, label = paste0("Composite = ", formatC(d$X_Composite, 2, format = "f")), vjust = -.6, size = 4.5 ) ## ----X-vector----------------------------------------------------------------- X <- c( X_1 = 2, X_2 = 3, X_3 = 1, X_4 = 2 ) X ## ----variable-names----------------------------------------------------------- v_observed <- names(X) v_latent <- "X" v_composite <- "X_Composite" v_names <- c(v_observed, v_composite) ## ----matrix-loadings---------------------------------------------------------- lambda <- c(0.95, 0.90, 0.85, 0.60) %>% matrix() %>% `rownames<-`(v_observed) %>% `colnames<-`(v_latent) lambda ## ----R-X---------------------------------------------------------------------- # Observed Correlations R_X <- lambda %*% t(lambda) %>% `diag<-`(1) R_X ## ----implied-corelations, results='asis', echo = FALSE------------------------ cat(paste0("$$R_{X} \\approx ", bmatrix(R_X), "$$")) ## ----weight-matrix------------------------------------------------------------ w <- cbind(diag(4), rep(1, 4)) %>% `rownames<-`(v_observed) %>% `colnames<-`(v_names) w ## ----sigma-calculations, include= FALSE--------------------------------------- Sigma <- (t(w) %*% R_X %*% w) Sigma ## ----Sigma-matrix, results='asis', echo = FALSE------------------------------- cat(paste0("$$\\Sigma \\approx ", bmatrix(Sigma), "$$")) ## ----ref.label="sigma-calculations", echo = TRUE------------------------------ Sigma <- (t(w) %*% R_X %*% w) Sigma ## ----R-matrix, results='asis', echo = FALSE----------------------------------- cat(paste0("$$R_{All} \\approx ", bmatrix(round(cov2cor(Sigma), 2)), "$$")) ## ----matrix-purist, echo = TRUE----------------------------------------------- D_root_inverted <- Sigma %>% diag() %>% sqrt() %>% diag() %>% solve() %>% `rownames<-`(v_names) %>% `colnames<-`(v_names) R_all <- D_root_inverted %*% Sigma %*% D_root_inverted R_all ## ----cor2cov------------------------------------------------------------------ # Convert covariance matrix to correlations R_all <- cov2cor(Sigma) R_all ## ----composite-calculations--------------------------------------------------- # Population means of observed variables mu_X <- rep_along(X, 0) # Population standard deviations of observed variables sd_X <- rep_along(X, 1) # Covariance Matrix Sigma_X <- diag(sd_X) %*% R_X %*% diag(sd_X) # Vector of ones ones <- rep_along(X, 1) # Standardized composite score z_C <- c(ones %*% (X - mu_X) / sqrt(ones %*% (Sigma_X) %*% ones)) ## ----expected-X--------------------------------------------------------------- # Predicted value of X, given composite score X_hat <- sd_X * z_C * R_all[v_observed, v_composite] + mu_X ## ----------------------------------------------------------------------------- d_mc <- c(sqrt(t(X - X_hat) %*% solve(Sigma_X) %*% (X - X_hat))) d_mc ## ----cm-p--------------------------------------------------------------------- # Number of observed variables k <- length(v_observed) # Number of composite variables j <- length(v_composite) # Cumulative distribution function p <- pchisq(d_mc ^ 2, df = k - j) p