## ---- echo=TRUE, message=FALSE------------------------------------------------
library(philr); packageVersion("philr")
library(ape); packageVersion("ape")
library(ggplot2); packageVersion("ggplot2")

## ---- echo=TRUE, message=FALSE------------------------------------------------
library(mia); packageVersion("mia")
library(dplyr); packageVersion("dplyr")
data(GlobalPatterns, package = "mia")

## ---- message=FALSE, warning=FALSE--------------------------------------------
## Select prevalent taxa 
tse <-  GlobalPatterns %>% subsetByPrevalentTaxa(
                               detection = 3,
                               prevalence = 20/100,
                               as_relative = FALSE)

## Pick taxa that have notable abundance variation across sammples
variable.taxa <- apply(assays(tse)$counts, 1, function(x) sd(x)/mean(x) > 3.0)
tse <- tse[variable.taxa,]
# Collapse the tree!
# Otherwise the original tree with all nodes is kept
# (including those that were filtered out from rowData)
tree <- ape::keep.tip(phy = rowTree(tse), tip = rowLinks(tse)$nodeNum)
rowTree(tse) <- tree

## Add a new assay with a pseudocount 
assays(tse)$counts.shifted <- assays(tse)$counts + 1 

## ---- echo=FALSE--------------------------------------------------------------
tse

## ---- message=FALSE, warning=FALSE--------------------------------------------
library(ape); packageVersion("ape")
is.rooted(tree) # Is the tree Rooted?
is.binary(tree) # All multichotomies resolved?

## ---- message=FALSE, warning=FALSE--------------------------------------------
tree <- makeNodeLabel(tree, method="number", prefix='n')

# Add the modified tree back to the (`TreeSE`) data object 
rowTree(tse) <- tree

## -----------------------------------------------------------------------------
# Extract taxonomy table from the TreeSE object
tax <- rowData(tse)[,taxonomyRanks(tse)]

# Get name balances
name.balance(tree, tax, 'n1')

## -----------------------------------------------------------------------------
otu.table <- t(as(assays(tse)$counts.shifted, "matrix"))
tree <- rowTree(tse)
metadata <- colData(tse)
tax <- rowData(tse)[,taxonomyRanks(tse)]

otu.table[1:2,1:2] # OTU Table
tree # Phylogenetic Tree
head(metadata,2) # Metadata
head(tax,2) # taxonomy table

## -----------------------------------------------------------------------------
human.samples <- factor(colData(tse)$SampleType %in% c("Feces", "Mock", "Skin", "Tongue"))

## ---- message=FALSE-----------------------------------------------------------
gp.philr <- philr(otu.table, tree, 
                  part.weights='enorm.x.gm.counts', 
                  ilr.weights='blw.sqrt')
gp.philr[1:5,1:5]

## ---- message=FALSE-----------------------------------------------------------
gp.philr <- philr(tse, abund_values = "counts.shifted",
                  part.weights='enorm.x.gm.counts', 
                  ilr.weights='blw.sqrt')

## ---- message=FALSE-----------------------------------------------------------
pseq <- makePhyloseqFromTreeSummarizedExperiment(tse, abund_values="counts.shifted")
gp.philr <- philr(pseq, 
                  part.weights='enorm.x.gm.counts', 
                  ilr.weights='blw.sqrt')


## -----------------------------------------------------------------------------
# Distances between samples based on philr transformed data
gp.dist <- dist(gp.philr, method="euclidean") 

# Calculate MDS for the distance matrix
d <- as.data.frame(cmdscale(gp.dist))
colnames(d) <- paste0("PC", 1:2)

## -----------------------------------------------------------------------------
# Add some metadata for the visualization 
d$SampleType <- factor(metadata$SampleType)

# Create a plot
ggplot(data = d,
  aes(x=PC1, y=PC2, color=SampleType)) +
  geom_point() +
  labs(title = "Euclidean distances with phILR")

## ---- message=FALSE, warning=FALSE--------------------------------------------
library(glmnet); packageVersion('glmnet')
glmmod <- glmnet(gp.philr, human.samples, alpha=1, family="binomial")

## -----------------------------------------------------------------------------
top.coords <- as.matrix(coefficients(glmmod, s=0.2526))
top.coords <- rownames(top.coords)[which(top.coords != 0)]
(top.coords <- top.coords[2:length(top.coords)]) # remove the intercept as a coordinate

## -----------------------------------------------------------------------------
tc.names <- sapply(top.coords, function(x) name.balance(tree, tax, x))
tc.names

## -----------------------------------------------------------------------------
votes <- name.balance(tree, tax, 'n730', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]]   # Numerator at Family Level
votes[[c('down.votes', 'Family')]] # Denominator at Family Level

## ---- message=FALSE, warning=FALSE--------------------------------------------
library(ggtree); packageVersion("ggtree")
library(dplyr); packageVersion('dplyr')

## ---- message=FALSE, warning=FALSE--------------------------------------------
tc.nn <- name.to.nn(tree, top.coords)
tc.colors <- c('#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99')
p <- ggtree(tree, layout='fan') +
  geom_balance(node=tc.nn[1], fill=tc.colors[1], alpha=0.6) +
  geom_balance(node=tc.nn[2], fill=tc.colors[2], alpha=0.6) +
  geom_balance(node=tc.nn[3], fill=tc.colors[3], alpha=0.6) +
  geom_balance(node=tc.nn[4], fill=tc.colors[4], alpha=0.6) +
  geom_balance(node=tc.nn[5], fill=tc.colors[5], alpha=0.6)
p <- annotate_balance(tree, 'n16', p=p, labels = c('n16+', 'n16-'),
                 offset.text=0.15, bar=FALSE)
annotate_balance(tree, 'n730', p=p, labels = c('n730+', 'n730-'),
                 offset.text=0.15, bar=FALSE)

## -----------------------------------------------------------------------------
gp.philr.long <- convert_to_long(gp.philr, human.samples) %>%
  filter(coord %in% top.coords)

ggplot(gp.philr.long, aes(x=labels, y=value)) +
  geom_boxplot(fill='lightgrey') +
  facet_grid(.~coord, scales='free_x') +
  labs(x = 'Human', y = 'Balance Value') +
  theme_bw()

## ---- message=FALSE, warning=FALSE--------------------------------------------
library(tidyr); packageVersion('tidyr')

gp.philr.long %>%
  dplyr::rename(Human=labels) %>%
  dplyr::filter(coord %in% c('n16', 'n730')) %>%
  tidyr::spread(coord, value) %>%
  ggplot(aes(x=n16, y=n730, color=Human)) +
  geom_point(size=4) +
  labs(x = tc.names['n16'], y = tc.names['n730']) +
  theme_bw()

## ---- echo=TRUE, message=FALSE------------------------------------------------
sessionInfo()