rdhte-examples

rm(list=ls(all=TRUE))
library(rdhte)
library(rdrobust)
library(sandwich)
library(multcomp)
#> Loading required package: mvtnorm
#> Loading required package: survival
#> Loading required package: TH.data
#> Loading required package: MASS
#> 
#> Attaching package: 'TH.data'
#> The following object is masked from 'package:MASS':
#> 
#>     geyser


# Generate data
set.seed(123)
n <- 5000
X <- runif(n, -1, 1) * 2  # Running variable in [-2,2]
# Cluster variable (C) with 10 clusters
C <- sample(1:10, n, replace = TRUE)  # categorical variable (e.g., region, firm)

# Heterogeneity variables
W1 <- rbinom(n, 1, 0.5)  # Binary variable (e.g., gender, policy group)
W2 <- sample(1:3, n, replace = TRUE)  # W takes values 1, 2, or 3
W3 <- runif(n, 0, 10)  # Continuous variable (e.g., income, age)

# Define heterogeneous treatment effects
tau_W1 <- 3 * W1  # The treatment effect depends on W
tau_W2 <- ifelse(W2 == 1, 2, ifelse(W2 == 2, 4, 6))  # Treatment effect varies by W
tau_W3 <- 2 + 0.5 * W3  # Treatment effect increases with W
# Define heterogeneous treatment effects
tau_W4 <- ifelse(W1 == 0 & W2 == 1, 2,  # Low education, male
                ifelse(W1 == 0 & W2 == 2, 4,  # Medium education, male
                       ifelse(W1 == 0 & W2 == 3, 6,  # High education, male
                              ifelse(W1 == 1 & W2 == 1, 3,  # Low education, female
                                     ifelse(W1 == 1 & W2 == 2, 5,  # Medium education, female
                                            7)))))  # High education, female

error_term <- rep(rnorm(10, 0, 1), length.out = n)  # Cluster-level errors
epsilon <- error_term + rnorm(n)  # Add individual-level noise to the errors

# Generate outcome variable with heterogeneous treatment effect at X = 0
Y1 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W1 * (X >= 0) + rnorm(n)  # Treatment effect = 3*W at cutoff
Y2 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W2 * (X >= 0) + rnorm(n)  # Treatment effect = tau_W at cutoff
Y3 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W3 * (X >= 0) + rnorm(n)
Y4 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W4 * (X >= 0) + rnorm(n)
Y5 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W2 * (X >= 0) + epsilon  

### Case 1: one subgroup variable W2.
m1 <- rdhte(y = Y2, x = X, covs.hte = factor(W2))
summary(m1)
#> Sharp RD Heterogeneous Treatment Effects: Subgroups.
#> 
#> Number of Obs.                 5000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                      HC3
#> 
#> Number of Obs.                 1628         1679
#> Order est. (p)                    1            1
#> Order bias  (q)                   2            2
#> 
#> ================================================================================================================
#>                    Point        Robust Inference
#>   factor(W2)    Estimate           z    Pr(>|z|)      [ 95% C.I. ]             Nh-       Nh+        h-        h+
#> ----------------------------------------------------------------------------------------------------------------
#>            1       2.006       7.491       0.000   [1.514 , 2.587]             281       242     0.630     0.630
#>            2       3.806      12.008       0.000   [3.052 , 4.243]             224       225     0.538     0.538
#>            3       6.004      18.152       0.000   [5.448 , 6.767]             230       218     0.506     0.506
#> ================================================================================================================
linfct <- c("`factor(W2)2` - `factor(W2)3` = 0", "`factor(W2)2` = 0")
rdhte_lincom(model = m1, linfct = linfct)
#> $individual
#>                      hypothesis estimate t_stat p_value conf.low conf.high
#> 1 `factor(W2)2` - `factor(W2)3`   -2.199 -5.428       0   -3.348    -1.572
#> 2                   factor(W2)2    3.806 12.008       0    3.052     4.243
#> 
#> $joint
#>   statistic.chi2 X2L p_value
#> 1        473.703   2       0

# Case 3: one continuous variable W3.
m2 <- rdhte(y = Y3, x = X, covs.hte = W3)
summary(m2)
#> Sharp RD Heterogeneous Treatment Effects: Continuous.
#> 
#> Number of Obs.                 5000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                      HC3
#> 
#> Number of Obs.                 2521         2479
#> Eff. Number of Obs.               0            0
#> Order est. (p)                    1            1
#> Order bias  (q)                   2            2
#> BW est. (h)                   0.452        0.452
#> 
#> ========================================================================
#>                    Point        Robust Inference
#>                 Estimate           z    Pr(>|z|)      [ 95% C.I. ]      
#> ------------------------------------------------------------------------
#>            T       1.656       4.750       0.000   [1.071 , 2.577]      
#>         T:W3       0.551       8.053       0.000   [0.387 , 0.636]      
#> ========================================================================
linfct <- c("T - T:W3 = 0")
rdhte_lincom(model = m2, linfct = linfct)
#> $individual
#>   hypothesis estimate t_stat p_value conf.low conf.high
#> 1   T - T:W3    1.104  2.973   0.003    0.447     2.177
#> 
#> $joint
#>   statistic.chi2 X1L p_value
#> 1           8.84   1   0.003

# Case 3: two (or more) subgroup variables:
m3 <- rdhte(y = Y2, x = X, covs.hte = factor(W2):factor(W1))
summary(m3)
#> Sharp RD Heterogeneous Treatment Effects: Subgroups.
#> 
#> Number of Obs.                 5000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                      HC3
#> 
#> Number of Obs.                  825          803
#> Order est. (p)                    1            1
#> Order bias  (q)                   2            2
#> 
#> =========================================================================================================================
#>                             Point        Robust Inference
#> factor(W2):factor(W1)    Estimate           z    Pr(>|z|)      [ 95% C.I. ]             Nh-       Nh+        h-        h+
#> -------------------------------------------------------------------------------------------------------------------------
#>                   1:0       2.279       4.561       0.000   [1.318 , 3.304]             100        93     0.466     0.466
#>                   1:1       1.848       4.904       0.000   [1.072 , 2.501]             131       113     0.564     0.564
#>                   2:0       3.944      11.296       0.000   [3.268 , 4.640]             158       166     0.704     0.704
#>                   2:1       3.458       5.118       0.000   [2.030 , 4.549]              81        77     0.427     0.427
#>                   3:0       5.793       9.631       0.000   [4.726 , 7.141]             120       107     0.508     0.508
#>                   3:1       6.150      16.433       0.000   [5.359 , 6.810]             121       121     0.549     0.549
#> =========================================================================================================================
linfct <- c("`factor(W2):factor(W1)1:0` - `factor(W2):factor(W1)1:1`= 0")
rdhte_lincom(model = m3, linfct = linfct)
#> $individual
#>                                                hypothesis estimate t_stat
#> 1 `factor(W2):factor(W1)1:0` - `factor(W2):factor(W1)1:1`    0.431   0.84
#>   p_value conf.low conf.high
#> 1   0.401   -0.699     1.748
#> 
#> $joint
#>   statistic.chi2 X1L p_value
#> 1          0.706   1   0.401

# Case 4: two (or more) continuous variables (this includes polynomials). 
m4 <- rdhte(y = Y3, x = X, covs.hte = "W2 + W3")
summary(m4)
#> Sharp RD Heterogeneous Treatment Effects: Continuous.
#> 
#> Number of Obs.                 5000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                      HC3
#> 
#> Number of Obs.                 2521         2479
#> Eff. Number of Obs.               0            0
#> Order est. (p)                    1            1
#> Order bias  (q)                   2            2
#> BW est. (h)                   0.452        0.452
#> 
#> ========================================================================
#>                    Point        Robust Inference
#>                 Estimate           z    Pr(>|z|)      [ 95% C.I. ]      
#> ------------------------------------------------------------------------
#>            T       1.335       2.033       0.042   [0.045 , 2.491]      
#>         T:W2       0.157       1.204       0.229  [-0.166 , 0.697]      
#>         T:W3       0.550       8.063       0.000   [0.387 , 0.636]      
#> ========================================================================
linfct <- c("T:W2 - T:W3 = 0")
rdhte_lincom(model = m4, linfct = linfct)
#> $individual
#>    hypothesis estimate t_stat p_value conf.low conf.high
#> 1 T:W2 - T:W3   -0.393 -1.093   0.274   -0.689     0.195
#> 
#> $joint
#>   statistic.chi2 X1L p_value
#> 1          1.196   1   0.274

# Case 5: interaction between continous and subgroup varibles.
m5 <- rdhte(y = Y3, x = X, covs.hte = "W3*factor(W1)")
summary(m5)
#> Sharp RD Heterogeneous Treatment Effects: Continuous.
#> 
#> Number of Obs.                 5000
#> BW type                       mserd
#> Kernel                   Triangular
#> VCE method                      HC3
#> 
#> Number of Obs.                 2521         2479
#> Eff. Number of Obs.               0            0
#> Order est. (p)                    1            1
#> Order bias  (q)                   2            2
#> BW est. (h)                   0.452        0.452
#> 
#> ===========================================================================
#>                       Point        Robust Inference
#>                    Estimate           z    Pr(>|z|)      [ 95% C.I. ]      
#> ---------------------------------------------------------------------------
#>               T       1.983       4.248       0.000   [1.156 , 3.136]      
#>            T:W3       0.594       5.737       0.000   [0.361 , 0.735]      
#>   T:factor.W1.0      -0.639      -0.910       0.363  [-2.214 , 0.810]      
#> T:W3.factor.W1.1      -0.085      -0.481       0.631  [-0.318 , 0.193]      
#> ===========================================================================
linfct <- c("T:W3.factor.W1.1 = 0")
rdhte_lincom(model = m5, linfct = linfct)
#> $individual
#>         hypothesis estimate t_stat p_value conf.low conf.high
#> 1 T:W3.factor.W1.1   -0.085 -0.481   0.631   -0.318     0.193
#> 
#> $joint
#>   statistic.chi2 X1L p_value
#> 1          0.231   1   0.631

# Format: cases 1-3 can be either a formula or a expression. Cases 4-5 only work as a formula, thus they need to be included in quotes. 
# Note: some expressions work either way, but with different interpretations. For example, W1 + W2 vs "W1 + W2".
# Exact expressions for setting the linear restrictions in `linfct` can be obtained from the names in the `coef` object returned by `rdhte`.