## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ---- eval = FALSE------------------------------------------------------------
#  
#  library(devtools)
#  devtools::install_github("PYangLab/PhosR")

## ---- eval = FALSE------------------------------------------------------------
#  #if (!requireNamespace("BiocManager", quietly = TRUE))
#  #    install.packages("BiocManager")
#  #
#  #BiocManager::install("PhosR")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(PhosR)
})

## -----------------------------------------------------------------------------
data("phospho_L6_ratio")

ppe <- PhosphoExperiment(assays = list(Quantification = as.matrix(phospho.L6.ratio)))
class(ppe)

## -----------------------------------------------------------------------------
GeneSymbol(ppe) <- sapply(strsplit(rownames(ppe), ";"), "[[", 2)
Residue(ppe) <- gsub("[0-9]","", sapply(strsplit(rownames(ppe), ";"), "[[", 3))
Site(ppe) <- as.numeric(gsub("[A-Z]","", sapply(strsplit(rownames(ppe), ";"), "[[", 3)))
Sequence(ppe) <- sapply(strsplit(rownames(ppe), ";"), "[[", 4)

## -----------------------------------------------------------------------------
ppe <- PhosphoExperiment(assays = list(Quantification = as.matrix(phospho.L6.ratio)), 
                  Site = as.numeric(gsub("[A-Z]","", sapply(strsplit(rownames(ppe), ";"), "[[", 3))),
                  GeneSymbol = sapply(strsplit(rownames(ppe), ";"), "[[", 2),
                  Residue = gsub("[0-9]","", sapply(strsplit(rownames(ppe), ";"), "[[", 3)),
                  Sequence = sapply(strsplit(rownames(ppe), ";"), "[[", 4))

## -----------------------------------------------------------------------------
ppe

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
  library(PhosR)
})

## -----------------------------------------------------------------------------
data("phospho.cells.Ins.pe")

ppe <- phospho.cells.Ins.pe 
class(ppe)

## -----------------------------------------------------------------------------
ppe

## -----------------------------------------------------------------------------
grps = gsub("_[0-9]{1}", "", colnames(ppe))

## -----------------------------------------------------------------------------
# FL38B
gsub("Intensity.", "", grps)[1:12]
# Hepa1
gsub("Intensity.", "", grps)[13:24]

## -----------------------------------------------------------------------------
dim(ppe)

## -----------------------------------------------------------------------------
ppe_filtered <- selectGrps(ppe, grps, 0.5, n=1) 
dim(ppe_filtered)

## -----------------------------------------------------------------------------
# In cases where you have fewer replicates ( e.g.,triplicates), you may want to 
# select phosphosites quantified in 70% of replicates. 
ppe_filtered_v1 <- selectGrps(ppe, grps, 0.7, n=1) 
dim(ppe_filtered_v1)

## -----------------------------------------------------------------------------
set.seed(123)
ppe_imputed_tmp <- scImpute(ppe_filtered, 0.5, grps)[,colnames(ppe_filtered)]

## -----------------------------------------------------------------------------
set.seed(123)
ppe_imputed <- ppe_imputed_tmp
ppe_imputed[,seq(6)] <- ptImpute(ppe_imputed[,seq(7,12)], 
                                 ppe_imputed[,seq(6)], 
                                 percent1 = 0.6, percent2 = 0, paired = FALSE)
ppe_imputed[,seq(13,18)] <- ptImpute(ppe_imputed[,seq(19,24)], 
                                     ppe_imputed[,seq(13,18)], 
                                     percent1 = 0.6, percent2 = 0, 
                                     paired = FALSE)

## -----------------------------------------------------------------------------
ppe_imputed_scaled <- medianScaling(ppe_imputed, scale = FALSE, assay = "imputed")

## -----------------------------------------------------------------------------
p1 = plotQC(SummarizedExperiment::assay(ppe_filtered,"Quantification"), 
    labels=colnames(ppe_filtered), 
    panel = "quantify", grps = grps)
p2 = plotQC(SummarizedExperiment::assay(ppe_imputed_scaled,"scaled"), 
    labels=colnames(ppe_imputed_scaled), panel = "quantify", grps = grps)
ggpubr::ggarrange(p1, p2, nrow = 1)

## -----------------------------------------------------------------------------
p1 = plotQC(SummarizedExperiment::assay(ppe_filtered,"Quantification"), 
       labels=colnames(ppe_filtered), panel = "dendrogram", 
       grps = grps)
p2 = plotQC(SummarizedExperiment::assay(ppe_imputed_scaled,"scaled"), 
       labels=colnames(ppe_imputed_scaled), 
       panel = "dendrogram", grps = grps)
ggpubr::ggarrange(p1, p2, nrow = 1)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
  library(PhosR)
  library(stringr)
  library(ggplot2)
})

## -----------------------------------------------------------------------------
data("phospho_L6_ratio_pe")
data("SPSs")

ppe <- phospho.L6.ratio.pe
ppe

## -----------------------------------------------------------------------------
colnames(ppe)[grepl("AICAR_", colnames(ppe))]
colnames(ppe)[grepl("^Ins_", colnames(ppe))]
colnames(ppe)[grepl("AICARIns_", colnames(ppe))]

## -----------------------------------------------------------------------------
dim(ppe)

## -----------------------------------------------------------------------------
sum(is.na(SummarizedExperiment::assay(ppe,"Quantification")))

## -----------------------------------------------------------------------------
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
                 sapply(Residue(ppe), function(x)x),
                 sapply(Site(ppe), function(x)x),
                 ";", sep = "")

## -----------------------------------------------------------------------------
# take the grouping information
grps = gsub("_.+", "", colnames(ppe))
grps

## -----------------------------------------------------------------------------
plotQC(SummarizedExperiment::assay(ppe,"Quantification"), panel = "dendrogram", 
    grps=grps, labels = colnames(ppe)) +
    ggplot2::ggtitle("Before batch correction")

## -----------------------------------------------------------------------------
plotQC(SummarizedExperiment::assay(ppe,"Quantification"), grps=grps, 
    labels = colnames(ppe), panel = "pca") +
    ggplot2::ggtitle("Before batch correction")

## -----------------------------------------------------------------------------
design = model.matrix(~ grps - 1)
design

## -----------------------------------------------------------------------------
# phosphoproteomics data normalisation and batch correction using RUV
ctl = which(sites %in% SPSs)
ppe = RUVphospho(ppe, M = design, k = 3, ctl = ctl)

## -----------------------------------------------------------------------------
# plot after batch correction
p1 = plotQC(SummarizedExperiment::assay(ppe, "Quantification"), grps=grps, 
    labels = colnames(ppe), panel = "dendrogram" )
p2 = plotQC(SummarizedExperiment::assay(ppe, "normalised"), grps=grps, 
    labels = colnames(ppe), panel="dendrogram")
ggpubr::ggarrange(p1, p2, nrow = 1)

## -----------------------------------------------------------------------------
# plot after batch correction
p1 = plotQC(SummarizedExperiment::assay(ppe, "Quantification"), panel = "pca", 
    grps=grps, labels = colnames(ppe)) +
    ggplot2::ggtitle("Before Batch correction")
p2 = plotQC(SummarizedExperiment::assay(ppe, "normalised"), grps=grps, 
    labels = colnames(ppe), panel="pca") +
    ggplot2::ggtitle("After Batch correction")
ggpubr::ggarrange(p1, p2, nrow = 2)

## -----------------------------------------------------------------------------
data("phospho_L6_ratio_pe")
data("phospho.liver.Ins.TC.ratio.RUV.pe")
data("phospho.cells.Ins.pe")
ppe1 <- phospho.L6.ratio.pe
ppe2 <- phospho.liver.Ins.TC.ratio.RUV.pe
# ppe2 <- phospho.L1.Ins.ratio.subset.pe
ppe3 <- phospho.cells.Ins.pe

## -----------------------------------------------------------------------------
grp3 = gsub('_[0-9]{1}', '', colnames(ppe3))
ppe3 <- selectGrps(ppe3, grps = grp3, 0.5, n=1)
ppe3 <- tImpute(ppe3)
FL83B.ratio <- SummarizedExperiment::assay(ppe3, "imputed")[, 1:12] - 
    rowMeans(SummarizedExperiment::assay(ppe3, "imputed")[,grep("FL83B_Control", 
        colnames(ppe3))])
Hepa.ratio <- SummarizedExperiment::assay(ppe3,"imputed")[, 13:24] - 
    rowMeans(SummarizedExperiment::assay(ppe3,"imputed")[,grep("Hepa1.6_Control", 
        colnames(ppe3))])
SummarizedExperiment::assay(ppe3, "Quantification") <- 
    cbind(FL83B.ratio, Hepa.ratio)

## -----------------------------------------------------------------------------
ppe.list <- list(ppe1, ppe2, ppe3)
# ppe.list <- list(ppe1, ppe3)

cond.list <- list(grp1 = gsub("_.+", "", colnames(ppe1)),
                  grp2 = str_sub(colnames(ppe2), end=-5),
                  grp3 = str_sub(colnames(ppe3), end=-3))

## -----------------------------------------------------------------------------
inhouse_SPSs <- getSPS(ppe.list, conds = cond.list)

head(inhouse_SPSs)

## -----------------------------------------------------------------------------
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
              sapply(Residue(ppe), function(x)x),
              sapply(Site(ppe), function(x)x), ";", sep = "")

ctl = which(sites %in% inhouse_SPSs)
ppe = RUVphospho(ppe, M = design, k = 3, ctl = ctl)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
  library(calibrate)
  library(limma)
  library(directPA)
  library(org.Rn.eg.db)
  library(reactome.db)
  library(annotate)
  library(PhosR)
})

## -----------------------------------------------------------------------------
data("PhosphoSitePlus")

## ---- include = FALSE---------------------------------------------------------
data("phospho_L6_ratio")
data("SPSs")

##### Run batch correction
ppe <- phospho.L6.ratio.pe
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
                 sapply(Residue(ppe), function(x)x),
                 sapply(Site(ppe), function(x)x),
                 ";", sep = "")
grps = gsub("_.+", "", colnames(ppe))
design = model.matrix(~ grps - 1)
ctl = which(sites %in% SPSs)
ppe = RUVphospho(ppe, M = design, k = 3, ctl = ctl)

## -----------------------------------------------------------------------------
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
                 sapply(Residue(ppe), function(x)x),
                 sapply(Site(ppe), function(x)x),
                 ";", sep = "")

## -----------------------------------------------------------------------------
f <- gsub("_exp\\d", "", colnames(ppe))
X <- model.matrix(~ f - 1)
fit <- lmFit(SummarizedExperiment::assay(ppe, "normalised"), X)

## -----------------------------------------------------------------------------
table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1)
table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3)
table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2)


DE1.RUV <- c(sum(table.AICAR[,"adj.P.Val"] < 0.05), sum(table.Ins[,"adj.P.Val"] < 0.05), sum(table.AICARIns[,"adj.P.Val"] < 0.05))

# extract top-ranked phosphosites for each group comparison
contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X)  # defining group comparisons
contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X)  # defining group comparisons
fit1 <- contrasts.fit(fit, contrast.matrix1)
fit2 <- contrasts.fit(fit, contrast.matrix2)
table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf)
table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf)


DE2.RUV <- c(sum(table.AICARInsVSIns[,"adj.P.Val"] < 0.05), sum(table.AICARInsVSAICAR[,"adj.P.Val"] < 0.05))

o <- rownames(table.AICARInsVSIns)
Tc <- cbind(table.Ins[o,"logFC"], table.AICAR[o,"logFC"], table.AICARIns[o,"logFC"])

rownames(Tc) <- sites[match(o, rownames(ppe))]
rownames(Tc) <- gsub("(.*)(;[A-Z])([0-9]+)(;)", "\\1;\\3;", rownames(Tc))

colnames(Tc) <- c("Ins", "AICAR", "AICAR+Ins")

## -----------------------------------------------------------------------------
Tc.gene <- phosCollapse(Tc, id=gsub(";.+", "", rownames(Tc)), 
                        stat=apply(abs(Tc), 1, max), by = "max")
geneSet <- names(sort(Tc.gene[,1], 
                        decreasing = TRUE))[seq(round(nrow(Tc.gene) * 0.1))]

head(geneSet)

## -----------------------------------------------------------------------------
pathways = as.list(reactomePATHID2EXTID)

path_names = as.list(reactomePATHID2NAME)
name_id = match(names(pathways), names(path_names))
names(pathways) = unlist(path_names)[name_id]

pathways = pathways[which(grepl("Rattus norvegicus", names(pathways), ignore.case = TRUE))]

pathways = lapply(pathways, function(path) {
    gene_name = unname(getSYMBOL(path, data = "org.Rn.eg"))
    toupper(unique(gene_name))
})

## -----------------------------------------------------------------------------
path1 <- pathwayOverrepresent(geneSet, annotation=pathways, 
                                universe = rownames(Tc.gene), alter = "greater")
path2 <- pathwayRankBasedEnrichment(Tc.gene[,1], 
                                    annotation=pathways, 
                                    alter = "greater")

## -----------------------------------------------------------------------------
lp1 <- -log10(as.numeric(path2[names(pathways),1]))
lp2 <- -log10(as.numeric(path1[names(pathways),1]))
plot(lp1, lp2, ylab="Overrepresentation (-log10 pvalue)", xlab="Rank-based enrichment (-log10 pvalue)", main="Comparison of 1D pathway analyses", xlim = c(0, 10))

# select highly enriched pathways
sel <- which(lp1 > 1.5 & lp2 > 0.9)
textxy(lp1[sel], lp2[sel], gsub("_", " ", gsub("REACTOME_", "", names(pathways)))[sel])

## -----------------------------------------------------------------------------
# 2D direction site-centric kinase activity analyses
par(mfrow=c(1,2))
dpa1 <- directPA(Tc[,c(1,3)], direction=0, 
                 annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), 
                 main="Direction pathway analysis")
dpa2 <- directPA(Tc[,c(1,3)], direction=pi*7/4, 
                 annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), 
                 main="Direction pathway analysis")

# top activated kinases
dpa1$pathways[1:5,]
dpa2$pathways[1:5,]

## -----------------------------------------------------------------------------
z1 <- perturbPlot2d(Tc=Tc[,c(1,2)], 
                    annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}),
                    cex=1, xlim=c(-2, 4), ylim=c(-2, 4), 
                    main="Kinase perturbation analysis")
z2 <- perturbPlot2d(Tc=Tc[,c(1,3)], annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}),
                    cex=1, xlim=c(-2, 4), ylim=c(-2, 4), 
                    main="Kinase perturbation analysis")
z3 <- perturbPlot2d(Tc=Tc[,c(2,3)], annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}),
                    cex=1, xlim=c(-2, 4), ylim=c(-2, 4), 
                    main="Kinase perturbation analysis")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
  library(parallel)
  library(ggplot2)
  library(ClueR)
  library(reactome.db)
  library(org.Mm.eg.db)
  library(annotate)
  library(PhosR)
})

## -----------------------------------------------------------------------------
# data("phospho_liverInsTC_RUV_pe")
data("phospho.liver.Ins.TC.ratio.RUV.pe")
ppe <- phospho.liver.Ins.TC.ratio.RUV.pe
ppe

## -----------------------------------------------------------------------------
# take grouping information

grps <- sapply(strsplit(colnames(ppe), "_"), 
            function(x)x[3])

# select differentially phosphorylated sites
sites.p <- matANOVA(SummarizedExperiment::assay(ppe, "Quantification"), 
                    grps)
ppm <- meanAbundance(SummarizedExperiment::assay(ppe, "Quantification"), grps)
sel <- which((sites.p < 0.05) & (rowSums(abs(ppm) > 1) != 0))
ppm_filtered <- ppm[sel,]

# summarise phosphosites information into gene level
ppm_gene <- phosCollapse(ppm_filtered, 
            gsub(";.+", "", rownames(ppm_filtered)), 
                stat = apply(abs(ppm_filtered), 1, max), by = "max")

# perform ClueR to identify optimal number of clusters
pathways = as.list(reactomePATHID2EXTID)

pathways = pathways[which(grepl("R-MMU", names(pathways), ignore.case = TRUE))]

pathways = lapply(pathways, function(path) {
    gene_name = unname(getSYMBOL(path, data = "org.Mm.eg"))
    toupper(unique(gene_name))
})

RNGkind("L'Ecuyer-CMRG")
set.seed(123)
c1 <- runClue(ppm_gene, annotation=pathways, 
            kRange = seq(2,10), rep = 5, effectiveSize = c(5, 100), 
            pvalueCutoff = 0.05, alpha = 0.5)

# Visualise the evaluation results
data <- data.frame(Success=as.numeric(c1$evlMat), Freq=rep(seq(2,10), each=5))
myplot <- ggplot(data, aes(x=Freq, y=Success)) + 
    geom_boxplot(aes(x = factor(Freq), fill="gray")) +
    stat_smooth(method="loess", colour="red", size=3, span = 0.5) +
    xlab("# of cluster") + 
    ylab("Enrichment score") + 
    theme_classic()
myplot

set.seed(123)
best <- clustOptimal(c1, rep=5, mfrow=c(2, 3), visualize = TRUE)

## -----------------------------------------------------------------------------
RNGkind("L'Ecuyer-CMRG")
set.seed(1)
PhosphoSite.mouse2 = mapply(function(kinase) {
  gsub("(.*)(;[A-Z])([0-9]+;)", "\\1;\\3", kinase)
}, PhosphoSite.mouse)

# perform ClueR to identify optimal number of clusters
c3 <- runClue(ppm_filtered, annotation=PhosphoSite.mouse2, kRange = 2:10, rep = 5, effectiveSize = c(5, 100), pvalueCutoff = 0.05, alpha = 0.5)

# Visualise the evaluation results
data <- data.frame(Success=as.numeric(c3$evlMat), Freq=rep(2:10, each=5))

myplot <- ggplot(data, aes(x=Freq, y=Success)) + geom_boxplot(aes(x = factor(Freq), fill="gray"))+
  stat_smooth(method="loess", colour="red", size=3, span = 0.5) + xlab("# of cluster")+ ylab("Enrichment score")+theme_classic()
myplot

set.seed(1)
best <- clustOptimal(c3, rep=10, mfrow=c(2, 3), visualize = TRUE)

# Finding enriched pathways from each cluster
best$enrichList

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({

  library(PhosR)
  library(dplyr)
  library(ggplot2)
  library(GGally)
  library(ggpubr)
  library(calibrate)
  library(network)
})

## -----------------------------------------------------------------------------
data("KinaseMotifs")
data("KinaseFamily")
data("phospho_L6_ratio_pe")
data("SPSs")

ppe = phospho.L6.ratio.pe
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
                 sapply(Residue(ppe), function(x)x),
                 sapply(Site(ppe), function(x)x),
                 ";", sep = "")
grps = gsub("_.+", "", colnames(ppe))
design = model.matrix(~ grps - 1)
ctl = which(sites %in% SPSs)
ppe = RUVphospho(ppe, M = design, k = 3, ctl = ctl)

## -----------------------------------------------------------------------------
data("phospho_L6_ratio")
data("SPSs")

##### Run batch correction
ppe <- phospho.L6.ratio.pe
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
                 sapply(Residue(ppe), function(x)x),
                 sapply(Site(ppe), function(x)x),
                 ";", sep = "")
grps = gsub("_.+", "", colnames(ppe))
design = model.matrix(~ grps - 1)
ctl = which(sites %in% SPSs)
ppe = RUVphospho(ppe, M = design, k = 3, ctl = ctl)


phosphoL6 = SummarizedExperiment::assay(ppe, "normalised")

## -----------------------------------------------------------------------------
# filter for up-regulated phosphosites

phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub("_.+", "", colnames(phosphoL6)))
aov <- matANOVA(mat=phosphoL6, grps=gsub("_.+", "", colnames(phosphoL6)))
idx <- (aov < 0.05) & (rowSums(phosphoL6.mean > 0.5) > 0)
phosphoL6.reg <- phosphoL6[idx, ,drop = FALSE]

L6.phos.std <- standardise(phosphoL6.reg)
rownames(L6.phos.std) <- paste0(GeneSymbol(ppe), ";", Residue(ppe), Site(ppe), ";")[idx]

## -----------------------------------------------------------------------------
L6.phos.seq <- Sequence(ppe)[idx]

## ----fig.height=15, fig.width=5-----------------------------------------------
L6.matrices <- kinaseSubstrateScore(substrate.list = PhosphoSite.mouse, 
                                    mat = L6.phos.std, seqs = L6.phos.seq, 
                                    numMotif = 5, numSub = 1, verbose = FALSE)
set.seed(1)
L6.predMat <- kinaseSubstratePred(L6.matrices, top=30, verbose = FALSE) 

## ----fig.height=10, fig.width=10----------------------------------------------
kinaseOI = c("PRKAA1", "AKT1")

Signalomes_results <- Signalomes(KSR=L6.matrices, 
                                predMatrix=L6.predMat, 
                                exprsMat=L6.phos.std, 
                                KOI=kinaseOI)

## ----fig.height=6, fig.width=15-----------------------------------------------
### generate palette
my_color_palette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "Accent"))
kinase_all_color <- my_color_palette(ncol(L6.matrices$combinedScoreMatrix))
names(kinase_all_color) <- colnames(L6.matrices$combinedScoreMatrix)
kinase_signalome_color <- kinase_all_color[colnames(L6.predMat)]

plotSignalomeMap(signalomes = Signalomes_results, color = kinase_signalome_color)

## ----fig.height=5, fig.width=5------------------------------------------------
plotKinaseNetwork(KSR = L6.matrices, predMatrix = L6.predMat, threshold = 0.9, color = kinase_all_color)

## -----------------------------------------------------------------------------
sessionInfo()