## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(digits = 4) ## ----------------------------------------------------------------------------- library(coda.base) X = parliament2017[,-1] ## ----------------------------------------------------------------------------- B1 = pb_basis(X, method = "exact") ## ----fig.width=7.5, fig.height=4.5-------------------------------------------- plot_balance(B1) ## ----------------------------------------------------------------------------- apply(coordinates(X, B1), 2, var) ## ----echo=FALSE, fig.width=7.5, fig.height=4.5, warning=FALSE----------------- library(ggplot2) D = 10 # time_ = sapply(5:20, function(D){ # print(D) # x = matrix(rlnorm(100*D), ncol=D) # a = tic() # r = pb_basis(x, method = 'exact') # b = toc() # b$toc - b$tic # }) # dplot = data.frame( # parts = tail(5:20, 11), # time = tail(time_, 11) # ) dplot = structure(list(parts = 10:20, time = c(0.00499999999999545, 0.0119999999999436, 0.0260000000000673, 0.0749999999999318, 0.243000000000052, 0.831999999999994, 2.64400000000001, 9.56200000000001, 28.787, 95.4560000000001, 311.133)), class = "data.frame", row.names = c(NA, -11L)) ggplot(data=dplot) + geom_point(aes(x=parts, y=time)) + geom_segment(aes(x=parts, xend =parts, y = 0, yend=time)) + labs(x = 'Number of parts', y = 'Time in seconds (logarithmic scale)', title = 'Time needed to calculate Principal Balances', caption = "Times measured in a MacBook Pro (13-inch, 2017) \n2,3 GHz Dual-Core Intel Core i5.16 GB 2133 MHz LPDDR3") + scale_x_continuous(breaks = tail(5:20, 11)) + scale_y_continuous(trans = 'log', breaks = c(0.004, 0.04, 0.4, 4, 40, 400), labels = c("0.004", "0.04", "0.4", 4, 40, 400)) + theme_minimal() ## ----------------------------------------------------------------------------- D = as.dist(variation_array(X)) D ## ----fig.width=7.5, fig.height=4.5-------------------------------------------- B2 = pb_basis(X, method = 'cluster') plot_balance(B2) ## ----------------------------------------------------------------------------- apply(coordinates(X, B2), 2, var) ## ----fig.width=7.5, fig.height=4.5-------------------------------------------- B3 = pb_basis(X, method = 'constrained') plot_balance(B3) ## ----------------------------------------------------------------------------- apply(coordinates(X, B3), 2, var) ## ----eval=FALSE, echo=FALSE--------------------------------------------------- # hc = hclust_dendrogram(B1) # hcd = as.dendrogram(hc) # dd = dendro_data(hcd) # ggdendrogram(dd) # dd$segments = dd$segments %>% # mutate( # end_node = yend == 0 # ) # # p <- ggplot(dd$segments) + # geom_segment(aes(x = x, y = y, xend = xend, yend = yend, linetype = end_node))+ # geom_label(data = dd$labels, aes(x, y, label = label), # hjust = 0.5, angle = 90, size = 4) + # theme_void() + scale_linetype_discrete(guide=FALSE) # p # # ## Build tree # build_tree_order = function(B, ibalance){ # balance = B[,ibalance] # if(sum(balance != 0) == 2){ # return(ibalance) # } # L = NULL; R = NULL # left_ = balance < 0 # if(sum(left_) > 1){ # L = Recall(B, which(apply((B != 0 & left_) | (B == 0 & !left_), 2, all))) # } # right_ = balance > 0 # if(sum(right_) > 1){ # R = Recall(B, which(apply((B != 0 & right_) | (B == 0 & !right_), 2, all))) # } # return(unname(c(L, R, ibalance))) # } # ord_ = build_tree_order(B, which(apply(B != 0, 2, all))) # TREE = setNames(lapply(1:ncol(X), function(i) list('c' = i)), names(X)[ORDER]) # for(i in ord_){ # balance = B[,i] # name_ = paste(sort(names(balance)[balance != 0]), collapse = '_') # name_left = paste(sort(names(balance)[balance < 0]), collapse = '_') # name_right = paste(sort(names(balance)[balance > 0]), collapse = '_') # l_node = list( # list( # name = name_, # left = name_left, # right = name_right, # l = TREE[[name_left]]$c, # r = TREE[[name_right]]$c, # c = (TREE[[name_left]]$c+TREE[[name_right]]$c)/2 # )) # names(l_node) = name_ # TREE = c(TREE, l_node) # } # nodes = sapply(ord_, function(i){ # balance = B[,i] # paste(sort(names(balance)[balance != 0]), collapse = '_') # }) # TREE = TREE[nodes] # lapply(TREE, as_tibble) %>% bind_rows() # for(node in rev(nodes)){ # TREE[[node]] # } # # # lapply(nodes, function(name_){ # # tibble( # # name = name_, # # name_l = TREE[[name_]]$left, # # name_r = TREE[[name_]]$right, # # l = TREE[[name_]]$l, # # r = TREE[[name_]]$r, # # c = TREE[[name_]]$c, # # var = var(H[,i]) # # ) # # }) %>% bind_rows() # # # H = coordinates(X, B) # H_mean = colMeans(H) # H_var = apply(H, 2, var) # # names(X)[ORDER] # # l_nodes = lapply(rev(ord_), function(i){ # list( # # balance = B[,i], # mean = boot::inv.logit(H_mean[i]), # var = H_var[i] # ) # }) # names(l_nodes) = sapply(l_nodes, function(x) paste(sort(names(x$balance)[x$balance != 0]), collapse = '_')) # l_nodes ## ----eval=FALSE, include=FALSE------------------------------------------------ # library(coda.base) # X = iris[,1:4] # B = pb_basis(X, method = 'exact') # rownames(B) = names(X) # H = coordinates(X, B) # apply(H, 2, var) ## ----eval = FALSE, include=FALSE---------------------------------------------- # plot(H[,1], H[,2]) ## ----eval=FALSE, include=FALSE------------------------------------------------ # lX = split(X, iris$Species) # m_ = lapply(lX, colMeans) # s_ = lapply(lX, cov) # S = cov(X) # S_ = replicate(3, S, simplify = FALSE) ## ----eval=FALSE, include=FALSE------------------------------------------------ # Prob = mapply(function(m,s){ # mvtnorm::dmvnorm(X, m, s) # }, m_, S_) ## ----eval=FALSE, include=FALSE------------------------------------------------ # B_1 = pb_basis(Prob, method = 'exact') # H_1 = coordinates(Prob, B_1) # plot(H_1, col = iris$Species) ## ----eval=FALSE, include=FALSE------------------------------------------------ # B_2 = pc_basis(Prob) # H_2 = coordinates(Prob, B_2) # plot(-H_2, col = iris$Species) ## ----eval=FALSE, include=FALSE------------------------------------------------ # library(fpc) # dp = discrproj(X, iris$Species) # plot(dp$proj[,1:2], col = iris$Species) ## ----eval=FALSE, echo=FALSE--------------------------------------------------- # @article{cite-key, # Date-Added = {2020-06-13 08:44:39 +0000}, # Date-Modified = {2020-06-13 08:44:39 +0000}, # Id = {ref25}, # Title = {Pawlowsky-Glahn V, Egozcue JJ, Tolosana-Delgado R (2011) Principal balances. In Egozcue JJ, Tolosana-Delgado R, Ortego M (eds) Proceedings of the 4th international workshop on compositional data analysis, Girona, Spain, pp 1--10}, # Ty = {STD}} # # @article{cite-key, # Author = {Mart{\'\i}n-Fern{\'a}ndez, J. A. and Pawlowsky-Glahn, V. and Egozcue, J. J. and Tolosona-Delgado, R.}, # Da = {2018/04/01}, # Date-Added = {2020-06-13 08:42:47 +0000}, # Date-Modified = {2020-06-13 08:42:47 +0000}, # Doi = {10.1007/s11004-017-9712-z}, # Id = {Mart{\'\i}n-Fern{\'a}ndez2018}, # Isbn = {1874-8953}, # Journal = {Mathematical Geosciences}, # Number = {3}, # Pages = {273--298}, # Title = {Advances in Principal Balances for Compositional Data}, # Ty = {JOUR}, # Url = {https://doi.org/10.1007/s11004-017-9712-z}, # Volume = {50}, # Year = {2018}, # Bdsk-Url-1 = {https://doi.org/10.1007/s11004-017-9712-z}} # # Frank Ruskey LEcture note computer science 762 (1993) 205-206 # @String{j-LECT-NOTES-COMP-SCI = "Lecture Notes in Computer Science"} # @String{ack-nhfb = "Nelson H. F. Beebe, # University of Utah, # Department of Mathematics, 110 LCB, # 155 S 1400 E RM 233, # Salt Lake City, UT 84112-0090, USA, # Tel: +1 801 581 5254, # FAX: +1 801 581 4148, # e-mail: \path|beebe@math.utah.edu|, # \path|beebe@acm.org|, # \path|beebe@computer.org| (Internet), # URL: \path|http://www.math.utah.edu/~beebe/|"} # @Article{Ruskey:1993:SCG, # author = "F. Ruskey", # title = "Simple Combinatorial Gray Codes Constructed by # Reversing Sublists", # journal = j-LECT-NOTES-COMP-SCI, # volume = "762", # pages = "201--208", # year = "1993", # CODEN = "LNCSD9", # ISSN = "0302-9743 (print), 1611-3349 (electronic)", # ISSN-L = "0302-9743", # bibdate = "Wed Sep 15 10:01:31 MDT 1999", # bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1993.bib", # acknowledgement = ack-nhfb, # keywords = "algorithms; computation; ISAAC", # }