## ---- include = FALSE---------------------------------------------------------
options(rmarkdown.html_vignette.check_title = FALSE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.width=8, fig.height=5) 
knitr::opts_chunk$set(
  collapse = FALSE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
if(!require(devtools)){
    install.packages("devtools", repos = "http://cran.us.r-project.org")
}

if(!require(sitepickR)){
    devtools::install_github("ElenaBadilloG/sitepickR")
    }

## -----------------------------------------------------------------------------
library(sitepickR)

## ---- results='asis'----------------------------------------------------------
rawCCD <- sitepickR::rawCCD
knitr::kable(head(rawCCD), format = "html")

## ---- results='asis'----------------------------------------------------------
dfCCD <- prepDF(rawCCD,
                 unitID="LEAID", subunitID="NCESSCH")
knitr::kable(dfCCD[1:10,(ncol(dfCCD)-5-1):ncol(dfCCD)], format = "html")

## -----------------------------------------------------------------------------
seed <- 1122 

## -----------------------------------------------------------------------------
calip <- 0.2 # maximum standard deviations of covariate distance around target population

## -----------------------------------------------------------------------------
Nu <- 100 # district sample size
K <- 10 # number of matches per district
Ns <- 5 # number of schools per potential district

## -----------------------------------------------------------------------------
uSampVarsCCD <- c("w.pct.frlunch", "w.pct.black", "w.pct.hisp", "w.pct.female") 

## -----------------------------------------------------------------------------
suSampVarsCCD <- c("sch.pct.frlunch", "sch.pct.black", "sch.pct.hisp", "sch.pct.female")

## -----------------------------------------------------------------------------
exactMatchVars <-  c("distr.type")

## -----------------------------------------------------------------------------
calipMatchVars <-  c("w.pct.black", "w.pct.hisp", "w.pct.female")

## -----------------------------------------------------------------------------
smOut <- selectMatch(df = dfCCD, # user dataset
                       unitID = "LEAID", # column name of unit ID in user dataset
                       subunitID = "NCESSCH", # column name of sub-unit ID in user dataset
                       unitVars = uSampVarsCCD, # name of unit level covariate columns
                       subunitSampVars = suSampVarsCCD, # name of sub-unit level covariate columns
                       exactMatchVars = exactMatchVars, # unit level categorical covariates on which to match exactly
                       calipMatchVars = calipMatchVars, # unit level numeric covariates on which to match within a radius
                       nUnitSamp = Nu, # original unit sample size
                       nRepUnits = K, # number of desired matches per initially selected unit
                       nsubUnits = Ns, # number of sub-units to sample from each candidate unit
                       calipValue = calip, # maximum distance on which to match specified unit level covariates (calipMatchVars)
                       seedN = seed, # random seed number
                       matchDistance = "mahalanobis", # metric used for matching units
                       sizeFlag = TRUE,
                       repFlag = FALSE, # pick matches without repetition
                       writeOut = FALSE, # write out a csv file for: 1) matched units and 2) selected sub-units
                       replacementUnitsFilename = "replacementsTable.csv", # filename for {districtA: [districtA replacement list]} table
                       subUnitTableFilename = "schoolsDirectory.csv" # filename for {districtA: [districtA schools]} table
)

## ---- results='asis'----------------------------------------------------------
districtReplTable <- smOut[[1]]
knitr::kable(head(districtReplTable), format = "html")

## ---- results='asis'----------------------------------------------------------
schoolDirectory <- smOut[[2]]
knitr::kable(head(dplyr::filter(schoolDirectory, !is.na(Sub_unit5_ID))), format = "html")


## ----fig.width=6, fig.height=5------------------------------------------------
unitLovePlot(smOut,
   title = "Standardized Mean Difference: \n Initially Selected Units vs. Population")

## ---- results='asis'----------------------------------------------------------
unitBalanceTab <- getSummary(smOut, diagnostic="unitBal")

knitr::kable(head(unitBalanceTab, 10), format = "html")

## ----fig.width=9, fig.height=6------------------------------------------------
matchBalance(smOut, 
  title = "Standardized Mean Difference: \
  Replacement Unit Groups (1...K) vs. Originally Selected Units")

## ---- results='asis'----------------------------------------------------------
matchBalanceTab <- getSummary(smOut, diagnostic="matchBal")
knitr::kable(head(matchBalanceTab, 20), format = "html")

## ----fig.width=7, fig.height=6------------------------------------------------
matchFreq(smOut,
             title="Match Frequency per Original Unit")

## ----fig.width=7, fig.height=6------------------------------------------------
matchCount(smOut,
             title="% of Successful Matches per Unit Group")

## ---- results='asis'----------------------------------------------------------
matchFreqTab <- getSummary(smOut, diagnostic="matchFreq")

knitr::kable(matchFreqTab, format = "html")

## ---- results='asis'----------------------------------------------------------
matchCountTab <- getSummary(smOut, diagnostic="matchCount")

knitr::kable(matchCountTab, format = "html")

## ----fig.width=9, fig.height=6------------------------------------------------
subUnitBalance(smOut,
     title="Standardized Mean Difference: \n Sub-units from Original + Replacement Unit Groups vs. Population")

## ---- results='asis'----------------------------------------------------------
subUnitBalanceTab <- getSummary(smOut, diagnostic="subunitBal")
knitr::kable(subUnitBalanceTab, format = "html")