rm(list=ls(all=TRUE))
library(rdhte)
library(rdrobust)
library(sandwich)
library(multcomp)
#> Loading required package: mvtnorm
#> Loading required package: survival
#> Loading required package: TH.data
#> Loading required package: MASS
#>
#> Attaching package: 'TH.data'
#> The following object is masked from 'package:MASS':
#>
#> geyser
# Generate data
set.seed(123)
n <- 5000
X <- runif(n, -1, 1) * 2 # Running variable in [-2,2]
# Cluster variable (C) with 10 clusters
C <- sample(1:10, n, replace = TRUE) # categorical variable (e.g., region, firm)
# Heterogeneity variables
W1 <- rbinom(n, 1, 0.5) # Binary variable (e.g., gender, policy group)
W2 <- sample(1:3, n, replace = TRUE) # W takes values 1, 2, or 3
W3 <- runif(n, 0, 10) # Continuous variable (e.g., income, age)
# Define heterogeneous treatment effects
tau_W1 <- 3 * W1 # The treatment effect depends on W
tau_W2 <- ifelse(W2 == 1, 2, ifelse(W2 == 2, 4, 6)) # Treatment effect varies by W
tau_W3 <- 2 + 0.5 * W3 # Treatment effect increases with W
# Define heterogeneous treatment effects
tau_W4 <- ifelse(W1 == 0 & W2 == 1, 2, # Low education, male
ifelse(W1 == 0 & W2 == 2, 4, # Medium education, male
ifelse(W1 == 0 & W2 == 3, 6, # High education, male
ifelse(W1 == 1 & W2 == 1, 3, # Low education, female
ifelse(W1 == 1 & W2 == 2, 5, # Medium education, female
7))))) # High education, female
error_term <- rep(rnorm(10, 0, 1), length.out = n) # Cluster-level errors
epsilon <- error_term + rnorm(n) # Add individual-level noise to the errors
# Generate outcome variable with heterogeneous treatment effect at X = 0
Y1 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W1 * (X >= 0) + rnorm(n) # Treatment effect = 3*W at cutoff
Y2 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W2 * (X >= 0) + rnorm(n) # Treatment effect = tau_W at cutoff
Y3 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W3 * (X >= 0) + rnorm(n)
Y4 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W4 * (X >= 0) + rnorm(n)
Y5 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W2 * (X >= 0) + epsilon
### Case 1: one subgroup variable W2.
m1 <- rdhte(y = Y2, x = X, covs.hte = factor(W2))
summary(m1)
#> Sharp RD Heterogeneous Treatment Effects: Subgroups.
#>
#> Number of Obs. 5000
#> BW type mserd
#> Kernel Triangular
#> VCE method HC3
#>
#> Number of Obs. 1628 1679
#> Order est. (p) 1 1
#> Order bias (q) 2 2
#>
#> ================================================================================================================
#> Point Robust Inference
#> factor(W2) Estimate z Pr(>|z|) [ 95% C.I. ] Nh- Nh+ h- h+
#> ----------------------------------------------------------------------------------------------------------------
#> 1 2.006 7.491 0.000 [1.514 , 2.587] 281 242 0.630 0.630
#> 2 3.806 12.008 0.000 [3.052 , 4.243] 224 225 0.538 0.538
#> 3 6.004 18.152 0.000 [5.448 , 6.767] 230 218 0.506 0.506
#> ================================================================================================================
linfct <- c("`factor(W2)2` - `factor(W2)3` = 0", "`factor(W2)2` = 0")
rdhte_lincom(model = m1, linfct = linfct)
#> $individual
#> hypothesis estimate t_stat p_value conf.low conf.high
#> 1 `factor(W2)2` - `factor(W2)3` -2.199 -5.428 0 -3.348 -1.572
#> 2 factor(W2)2 3.806 12.008 0 3.052 4.243
#>
#> $joint
#> statistic.chi2 X2L p_value
#> 1 473.703 2 0
# Case 3: one continuous variable W3.
m2 <- rdhte(y = Y3, x = X, covs.hte = W3)
summary(m2)
#> Sharp RD Heterogeneous Treatment Effects: Continuous.
#>
#> Number of Obs. 5000
#> BW type mserd
#> Kernel Triangular
#> VCE method HC3
#>
#> Number of Obs. 2521 2479
#> Eff. Number of Obs. 0 0
#> Order est. (p) 1 1
#> Order bias (q) 2 2
#> BW est. (h) 0.452 0.452
#>
#> ========================================================================
#> Point Robust Inference
#> Estimate z Pr(>|z|) [ 95% C.I. ]
#> ------------------------------------------------------------------------
#> T 1.656 4.750 0.000 [1.071 , 2.577]
#> T:W3 0.551 8.053 0.000 [0.387 , 0.636]
#> ========================================================================
linfct <- c("T - T:W3 = 0")
rdhte_lincom(model = m2, linfct = linfct)
#> $individual
#> hypothesis estimate t_stat p_value conf.low conf.high
#> 1 T - T:W3 1.104 2.973 0.003 0.447 2.177
#>
#> $joint
#> statistic.chi2 X1L p_value
#> 1 8.84 1 0.003
# Case 3: two (or more) subgroup variables:
m3 <- rdhte(y = Y2, x = X, covs.hte = factor(W2):factor(W1))
summary(m3)
#> Sharp RD Heterogeneous Treatment Effects: Subgroups.
#>
#> Number of Obs. 5000
#> BW type mserd
#> Kernel Triangular
#> VCE method HC3
#>
#> Number of Obs. 825 803
#> Order est. (p) 1 1
#> Order bias (q) 2 2
#>
#> =========================================================================================================================
#> Point Robust Inference
#> factor(W2):factor(W1) Estimate z Pr(>|z|) [ 95% C.I. ] Nh- Nh+ h- h+
#> -------------------------------------------------------------------------------------------------------------------------
#> 1:0 2.279 4.561 0.000 [1.318 , 3.304] 100 93 0.466 0.466
#> 1:1 1.848 4.904 0.000 [1.072 , 2.501] 131 113 0.564 0.564
#> 2:0 3.944 11.296 0.000 [3.268 , 4.640] 158 166 0.704 0.704
#> 2:1 3.458 5.118 0.000 [2.030 , 4.549] 81 77 0.427 0.427
#> 3:0 5.793 9.631 0.000 [4.726 , 7.141] 120 107 0.508 0.508
#> 3:1 6.150 16.433 0.000 [5.359 , 6.810] 121 121 0.549 0.549
#> =========================================================================================================================
linfct <- c("`factor(W2):factor(W1)1:0` - `factor(W2):factor(W1)1:1`= 0")
rdhte_lincom(model = m3, linfct = linfct)
#> $individual
#> hypothesis estimate t_stat
#> 1 `factor(W2):factor(W1)1:0` - `factor(W2):factor(W1)1:1` 0.431 0.84
#> p_value conf.low conf.high
#> 1 0.401 -0.699 1.748
#>
#> $joint
#> statistic.chi2 X1L p_value
#> 1 0.706 1 0.401
# Case 4: two (or more) continuous variables (this includes polynomials).
m4 <- rdhte(y = Y3, x = X, covs.hte = "W2 + W3")
summary(m4)
#> Sharp RD Heterogeneous Treatment Effects: Continuous.
#>
#> Number of Obs. 5000
#> BW type mserd
#> Kernel Triangular
#> VCE method HC3
#>
#> Number of Obs. 2521 2479
#> Eff. Number of Obs. 0 0
#> Order est. (p) 1 1
#> Order bias (q) 2 2
#> BW est. (h) 0.452 0.452
#>
#> ========================================================================
#> Point Robust Inference
#> Estimate z Pr(>|z|) [ 95% C.I. ]
#> ------------------------------------------------------------------------
#> T 1.335 2.033 0.042 [0.045 , 2.491]
#> T:W2 0.157 1.204 0.229 [-0.166 , 0.697]
#> T:W3 0.550 8.063 0.000 [0.387 , 0.636]
#> ========================================================================
linfct <- c("T:W2 - T:W3 = 0")
rdhte_lincom(model = m4, linfct = linfct)
#> $individual
#> hypothesis estimate t_stat p_value conf.low conf.high
#> 1 T:W2 - T:W3 -0.393 -1.093 0.274 -0.689 0.195
#>
#> $joint
#> statistic.chi2 X1L p_value
#> 1 1.196 1 0.274
# Case 5: interaction between continous and subgroup varibles.
m5 <- rdhte(y = Y3, x = X, covs.hte = "W3*factor(W1)")
summary(m5)
#> Sharp RD Heterogeneous Treatment Effects: Continuous.
#>
#> Number of Obs. 5000
#> BW type mserd
#> Kernel Triangular
#> VCE method HC3
#>
#> Number of Obs. 2521 2479
#> Eff. Number of Obs. 0 0
#> Order est. (p) 1 1
#> Order bias (q) 2 2
#> BW est. (h) 0.452 0.452
#>
#> ===========================================================================
#> Point Robust Inference
#> Estimate z Pr(>|z|) [ 95% C.I. ]
#> ---------------------------------------------------------------------------
#> T 1.983 4.248 0.000 [1.156 , 3.136]
#> T:W3 0.594 5.737 0.000 [0.361 , 0.735]
#> T:factor.W1.0 -0.639 -0.910 0.363 [-2.214 , 0.810]
#> T:W3.factor.W1.1 -0.085 -0.481 0.631 [-0.318 , 0.193]
#> ===========================================================================
linfct <- c("T:W3.factor.W1.1 = 0")
rdhte_lincom(model = m5, linfct = linfct)
#> $individual
#> hypothesis estimate t_stat p_value conf.low conf.high
#> 1 T:W3.factor.W1.1 -0.085 -0.481 0.631 -0.318 0.193
#>
#> $joint
#> statistic.chi2 X1L p_value
#> 1 0.231 1 0.631
# Format: cases 1-3 can be either a formula or a expression. Cases 4-5 only work as a formula, thus they need to be included in quotes.
# Note: some expressions work either way, but with different interpretations. For example, W1 + W2 vs "W1 + W2".
# Exact expressions for setting the linear restrictions in `linfct` can be obtained from the names in the `coef` object returned by `rdhte`.