## ----settings, include = FALSE--------------------------------------------------------------------
#library(knitr)
#opts_chunk$set(warning=TRUE, message = FALSE, cache = FALSE, tidy = FALSE, tidy.opts = list(width.cutoff = 60))
options(width = 100)
knitr::opts_chunk$set(collapse = TRUE, comment = "#>",class.source = "whiteCode")

## ---- message = FALSE, eval = FALSE---------------------------------------------------------------
#  ## From Bioconductor repository
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#          install.packages("BiocManager")
#      }
#  BiocManager::install("fgga")
#  
#  ## Or from GitHub repository using devtools
#  BiocManager::install("devtools")
#  devtools::install_github("fspetale/fgga")

## ----setup, eval = TRUE, message=FALSE------------------------------------------------------------
library(fgga)

## ---- message = FALSE, eval = TRUE----------------------------------------------------------------
# Loading Canis lupus familiaris dataset and example R objects
data(CfData)


## ---- message = FALSE, eval = TRUE----------------------------------------------------------------
# To see the summarized experiment object
summary(CfData)

# To see the information of characterized data
dim(CfData$dxCf)

colnames(CfData$dxCf)[1:20]

rownames(CfData$dxCf)[1:10]

head.matrix(CfData$dxCf[, 51:61], n = 10)

# to see the information of GO data
dim(CfData$tableCfGO)

colnames(CfData$tableCfGO)[1:10]

rownames(CfData$tableCfGO)[1:10]

head(CfData$tableCfGO)[, 1:8]

## ---- message = FALSE, eval = TRUE----------------------------------------------------------------
# Checking the amount of annotations by GO-term

apply(CfData$tableCfGO, MARGIN=2, sum)

## ---- message = FALSE, eval = TRUE----------------------------------------------------------------
library(GO.db)
library(GOstats)

mygraph <- GOGraph(CfData$nodesGO, GOMFPARENTS)

# Delete root node called all
mygraph <- subGraph(CfData$nodesGO, mygraph)

# We adapt the graph to the format used by FGGA
mygraph <- t(as(mygraph, "matrix"))
mygraphGO <- as(mygraph, "graphNEL")

# We search the root GO-term
rootGO <- leaves(mygraphGO, "in")

rootGO

plot(mygraphGO)

## ---- message = FALSE, eval = FALSE---------------------------------------------------------------
#  # We add GO-terms corresponding to Cellular Component subdomain
#  myGOs <- c(CfData[['nodesGO']], "GO:1902494", "GO:0032991", "GO:1990234",
#              "GO:0005575")
#  
#  # We build a graph respecting the GO constraints of inference to MF, CC and BP subdomains
#  mygraphGO <- preCoreFG(myGOs, domains="GOMF")
#  
#  plot(mygraphGO)

## ----message=FALSE, include=FALSE, results='hide'-------------------------------------------------
mygraphGO <- as(CfData[["graphCfGO"]], "graphNEL")

rootGO <- leaves(mygraphGO, "in")

## ---- message = FALSE, eval = TRUE----------------------------------------------------------------
modelFGGA <- fgga2bipartite(mygraphGO)

## ---- message = FALSE, eval = TRUE----------------------------------------------------------------
# We take a subset of Cf data to train our model
idsTrain <- CfData$indexGO[["indexTrain"]][1:750]

# We build our model of binary SVM classifiers
modelSVMs <- lapply(CfData[["nodesGO"]], FUN = svmTrain, 
                    tableOntoTerms = CfData[["tableCfGO"]][idsTrain, ], 
                    dxCharacterized = CfData[["dxCf"]][idsTrain, ], 
                    graphOnto = mygraphGO, kernelSVM = "radial")

## ---- message = FALSE, eval = FALSE---------------------------------------------------------------
#  # We calculate the reliability of each GO-term
#  varianceGOs <- varianceOnto(tableOntoTerms = CfData[["tableCfGO"]][idsTrain, ],
#                          dxCharacterized = CfData[["dxCf"]][idsTrain, ],
#                          kFold = 5, graphOnto = mygraphGO, rootNode = rootGO,
#                          kernelSVM = "radial")
#  
#  varianceGOs

## ----echo=FALSE, message=FALSE--------------------------------------------------------------------
CfData[["varianceGOs"]]

varianceGOs <- CfData[["varianceGOs"]]

## ---- message = FALSE, eval = TRUE----------------------------------------------------------------

dxTestCharacterized <- CfData[["dxCf"]][CfData$indexGO[["indexTest"]][1:50], ]

matrixGOTest <- svmOnto(svmMoldel = modelSVMs, 
                    dxCharacterized = dxTestCharacterized, 
                    rootNode = rootGO, 
                    varianceSVM = varianceGOs)

head(matrixGOTest)[,1:8]

## ----message = FALSE, eval = TRUE-----------------------------------------------------------------
matrixFGGATest <- t(apply(matrixGOTest, MARGIN = 1, FUN = msgFGGA, 
                        matrixFGGA = modelFGGA, graphOnto = mygraphGO,
                        tmax = 50, epsilon = 0.001))

head(matrixFGGATest)[,1:8]

## ---- message = FALSE, eval  = TRUE---------------------------------------------------------------
fHierarchicalMeasures(CfData$tableCfGO[rownames(matrixFGGATest), ], 
                        matrixFGGATest, mygraphGO)

## ---- message = FALSE, eval = FALSE---------------------------------------------------------------
#  # Computing F-score
#  Fs <- fMeasures(CfData$tableCfGO[rownames(matrixFGGATest), ], matrixFGGATest)
#  
#  # Average F-score
#  Fs$perfByTerms[4]
#  
#  library(pROC)
#  
#  # Computing ROC curve to the first term
#  rocGO <- roc(CfData$tableCfGO[rownames(matrixFGGATest), 1],  matrixFGGATest[, 1])
#  
#  # Average AUC the first term
#  auc(roc)
#  
#  # Computing precision at different recall levels to the first term
#  rocGO <- roc(CfData$tableCfGO[rownames(matrixFGGATest), 1],
#              matrixFGGATest[, 1], percent=TRUE)
#  PXR <- coords(rocGO, "all", ret = c("recall", "precision"), transpose = FALSE)
#  
#  # Average PxR to the first term
#  apply(as.matrix(PXR$precision[!is.na(PXR$precision)]), MARGIN = 2, mean

## ----session,eval=TRUE,echo=FALSE-----------------------------------------------------------------
sessionInfo()