# library(reclin2) # data("linkexample1", "linkexample2") print(linkexample1) print(linkexample2) # pairs <- pair_blocking(linkexample1, linkexample2, "postcode") print(pairs) # pairs <- compare_pairs(pairs, on = c("lastname", "firstname", "address", "sex")) print(pairs) # compare_pairs(pairs, on = c("lastname", "firstname", "address", "sex"), inplace = TRUE) print(pairs) # compare_pairs(pairs, on = c("lastname", "firstname", "address", "sex"), default_comparator = cmp_jarowinkler(0.9), inplace = TRUE) print(pairs) # m <- problink_em(~ lastname + firstname + address + sex, data = pairs) print(m) # pairs <- predict(m, pairs = pairs, add = TRUE) print(pairs) # pairs <- score_simple(pairs, "score", on = c("lastname", "firstname", "address", "sex")) # pairs <- score_simple(pairs, "score", on = c("lastname", "firstname", "address", "sex"), w1 = c(lastname = 2, firstname = 2, address = 1, sex = 0.5), w0 = -1, wna = 0) # pairs <- select_threshold(pairs, "threshold", score = "weights", threshold = 8) print(pairs) # pairs <- compare_vars(pairs, "truth", on_x = "id", on_y = "id") print(pairs) # table(pairs$truth, pairs$threshold) # pairs <- select_greedy(pairs, "weights", variable = "greedy", threshold = 0) table(pairs$truth, pairs$greedy) # pairs <- select_n_to_m(pairs, "weights", variable = "ntom", threshold = 0) table(pairs$truth, pairs$ntom) # linked_data_set <- link(pairs, selection = "ntom") print(linked_data_set)