## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()
knitr::opts_chunk$set(fig.wide = TRUE, fig.retina = 3)

## ----arch, echo=FALSE, out.width="80%", eval=TRUE, fig.cap="Integration of rawDiag and rawrr into the Spectra ecosystem (by courtesy of Johannes Rainer)."----
  knitr::include_graphics("arch.jpg")

## ----require------------------------------------------------------------------
suppressMessages(
  stopifnot(require(Spectra),
            require(MsBackendRawFileReader),
            require(tartare),
            require(BiocParallel))
)

## ----installAssemblies, echo=TRUE---------------------------------------------
if (isFALSE(rawrr::.checkDllInMonoPath())){
  rawrr::installRawFileReaderDLLs()
}

if (isFALSE(file.exists(rawrr:::.rawrrAssembly()))){
 rawrr::installRawrrExe()
}

## ----tartareEH4547, warning=FALSE, message=FALSE, eval=TRUE-------------------
# fetch via ExperimentHub
library(ExperimentHub)
eh <- ExperimentHub::ExperimentHub()

## ----tartare------------------------------------------------------------------
query(eh, c('tartare'))

## ----EH3220, message=FALSE, warning=FALSE-------------------------------------
EH3220 <- normalizePath(eh[["EH3220"]])
(rawfileEH3220 <- paste0(EH3220, ".raw"))
if (!file.exists(rawfileEH3220)){
  file.link(EH3220, rawfileEH3220)
}

EH3222 <- normalizePath(eh[["EH3222"]])
(rawfileEH3222 <- paste0(EH3222, ".raw"))
if (!file.exists(rawfileEH3222)){
  file.link(EH3222, rawfileEH3222)
}

EH4547  <- normalizePath(eh[["EH4547"]])
(rawfileEH4547  <- paste0(EH4547 , ".raw"))
if (!file.exists(rawfileEH4547 )){
  file.link(EH4547 , rawfileEH4547 )
}

## ----backendInitializeMsBackendRawFileReader, message=FALSE-------------------
beRaw <- Spectra::backendInitialize(
  MsBackendRawFileReader::MsBackendRawFileReader(),
  files = c(rawfileEH3220, rawfileEH3222, rawfileEH4547))

## ----show---------------------------------------------------------------------
beRaw

## ----rawrrFigure2, fig.cap = "Peptide spectrum match. The vertical grey lines indicate the *in-silico* computed y-ions of the peptide precusor LGGNEQVTR++ as calculated by the [protViz]( https://CRAN.R-project.org/package=protViz) package."----
(S <- (beRaw |>  
   filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") )[437]) |> 
  plotSpectra()

# supposed to be scanIndex 9594
S

# add yIonSeries to the plot
(yIonSeries <- protViz::fragmentIon("LGGNEQVTR")[[1]]$y[1:8])
names(yIonSeries) <- paste0("y", seq(1, length(yIonSeries)))
abline(v = yIonSeries, col='#DDDDDD88', lwd=5)
axis(3, yIonSeries, names(yIonSeries))

## ----defineFilterIon----------------------------------------------------------
setGeneric("filterIons", function(object, ...) standardGeneric("filterIons"))

setMethod("filterIons", "MsBackend",
  function(object, mZ=numeric(), tol=numeric(), ...) {
    
    keep <- lapply(peaksData(object, BPPARAM = bpparam()),
                   FUN=function(x){
       NN <- protViz::findNN(mZ, x[, 1])
       hit <- (error <- mZ - x[NN, 1]) < tol & x[NN, 2] >= quantile(x[, 2], .9)
       if (sum(hit) == length(mZ))
         TRUE
       else
         FALSE
                   })
    object[unlist(keep)]
  })

## ----applyFilterIons----------------------------------------------------------
start_time <- Sys.time()
X <- beRaw |> 
  MsBackendRawFileReader::filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") |>
  filterIons(yIonSeries, tol = 0.005) |> 
  Spectra::Spectra() |>
  Spectra::peaksData() 
end_time <- Sys.time()

## ----runTime------------------------------------------------------------------
end_time - start_time

## ----definePlotLGGNEQVTR------------------------------------------------------
## A helper plot function to visualize a peptide spectrum match for 
## the LGGNEQVTR peptide.
.plot.LGGNEQVTR <- function(x){
  
  yIonSeries <- protViz::fragmentIon("LGGNEQVTR")[[1]]$y[1:8]
  names(yIonSeries) <- paste0("y", seq(1, length(yIonSeries)))
  
  plot(x, type = 'h', xlim = range(yIonSeries))
  abline(v = yIonSeries, col = '#DDDDDD88', lwd=5)
  axis(3, yIonSeries, names(yIonSeries))
  
  # find nearest mZ value
  idx <- protViz::findNN(yIonSeries, x[,1])
  
  data.frame(
    ion = names(yIonSeries),
    mZ.yIon = yIonSeries,
    mZ = x[idx, 1],
    intensity = x[idx, 2]
  )
}

## ----filterIons2, fig.height=8, fig.cap = "Visualizing of the LGGNEQVTR spectrum matches.", echo=FALSE----
op <- par(mfrow=c(4, 1), mar=c(4, 4, 4, 1))
XC <- X |>
  lapply(FUN = .plot.LGGNEQVTR) |>
  Reduce(f = rbind) 

## ----sanityCheck, error=FALSE-------------------------------------------------
stats::aggregate(mZ ~ ion, data = XC, FUN = base::mean)
stats::aggregate(intensity ~ ion, data = XC, FUN = base::max)

## ----combinePeaks, fig.cap = "Combined LGGNEQVTR peptide spectrum match plot."----
X |>
  Spectra::combinePeaks(ppm=10, intensityFun=base::max) |>
  .plot.LGGNEQVTR()

## ----mgf----------------------------------------------------------------------
## Map Spectra variables to Mascot Server compatible vocabulary.
map <- c(custom = "TITLE",
         msLevel = "CHARGE",
         scanIndex = "SCANS",
         precursorMz = "PEPMASS",
         rtime = "RTINSECONDS")

## Compose custom TITLE
beRaw$custom <- paste0("File: ", beRaw$dataOrigin, " ; SpectrumID: ", S$scanIndex)

(mgf <- tempfile(fileext = '.mgf'))

(beRaw |>
  filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") )[437] |>
  Spectra::Spectra() |>
  Spectra::selectSpectraVariables(c("rtime", "precursorMz",
    "precursorCharge", "msLevel", "scanIndex", "custom")) |>
  MsBackendMgf::export(backend = MsBackendMgf::MsBackendMgf(),
                       file = mgf, map = map)
readLines(mgf) |> head(12)

## ----mgfTail------------------------------------------------------------------
readLines(mgf) |> tail()

## ----allMS2-------------------------------------------------------------------
S <- Spectra::backendInitialize(
  MsBackendRawFileReader::MsBackendRawFileReader(),
  files = c(rawfileEH4547)) |>
  Spectra() 

S

## ----writeMGF, eval=FALSE-----------------------------------------------------
#  S |>
#    MsBackendMgf::export(backend = MsBackendMgf::MsBackendMgf(),
#                         file = mgf,
#                         map = map)

## ----defineScanTypePattern----------------------------------------------------
## Define scanType patterns
scanTypePattern <- list(
  EThcD.lowres = "ITMS.+sa Full ms2.+@etd.+@hcd.+",
  ETciD.lowres = "ITMS.+sa Full ms2.+@etd.+@cid.+",
  CID.lowres = "ITMS[^@]+@cid[^@]+$",
  HCD.lowres = "ITMS[^@]+@hcd[^@]+$",
  EThcD.highres = "FTMS.+sa Full ms2.+@etd.+@hcd.+",
  HCD.highres = "FTMS[^@]+@hcd[^@]+$"
)

## -----------------------------------------------------------------------------
beRaw <- Spectra::backendInitialize(
  MsBackendRawFileReader::MsBackendRawFileReader(),
  files = c(rawrr::sampleFilePath()))

## -----------------------------------------------------------------------------
beRaw <- Spectra::backendInitialize(
  MsBackendRawFileReader::MsBackendRawFileReader(),
  files = rawrr::sampleFilePath())

beRaw$custom <- paste0("File: ", gsub("/srv/www/htdocs/Data2San/", "", beRaw$dataOrigin), " ; SpectrumID: ", beRaw$scanIndex)

## -----------------------------------------------------------------------------
.generate_mgf <- function(ext, pattern,  dir=tempdir(), ...){
  mgf <- file.path(dir, paste0(sub("\\.raw", "", unique(basename(beRaw$dataOrigin))),
                               ".", ext, ".mgf"))

  idx <- beRaw$scanType |> grepl(patter=pattern)

  if (sum(idx) == 0) return (NULL)

  message(paste0("Extracting ", sum(idx), " ",
                 pattern, " scans\n\t to file ", mgf, " ..."))

  beRaw[which(idx)] |>
    Spectra::Spectra() |>
    Spectra::selectSpectraVariables(c("rtime", "precursorMz",
    "precursorCharge", "msLevel", "scanIndex", "custom")) |>
    MsBackendMgf::export(backend = MsBackendMgf::MsBackendMgf(),
                         file = mgf,
                         map = map)

  mgf
}

#mapply(ext = names(scanTypePattern),
#      scanTypePattern,
#       FUN = .generate_mgf) |>
#  lapply(FUN = function(f){if (file.exists(f)) {readLines(f) |> head()}})

## ----defineTopN---------------------------------------------------------------
## Define a function that takes a matrix as input and derives
## the top n most intense peaks within a mass window.
## Of note, here, we require centroided data. (no profile mode!)
MsBackendRawFileReader:::.top_n

## ----applyProcessing----------------------------------------------------------
S_2 <- Spectra::addProcessing(S, MsBackendRawFileReader:::.top_n, n = 1) 

## ----plotSpectraMirror9594, fig.retina=3, fig.cap = "Spectra mirror plot of the filtered  (bottom) and unfiltered scan 9594.", error=TRUE----
Spectra::plotSpectraMirror(S[9594], S_2[9594], ppm = 50)

## ----compare9594mZValues------------------------------------------------------
S_2[9594] |> mz() |> unlist()
yIonSeries

## ----readBenchmarkData, fig.cap="I/O Benchmark. The XY plot graphs how many spectra the backend can read in one second versus the chunk size of the rawrr::readSpectrum method for different compute architectures."----

ioBm <- file.path(system.file(package = 'MsBackendRawFileReader'),
               'extdata', 'specs.csv') |>
  read.csv2(header=TRUE)

# perform and include a local IO benchmark
ioBmLocal <- ioBenchmark(1000, c(32, 64, 128, 256), rawfile = rawfileEH4547)


lattice::xyplot((1 / as.numeric(time)) * workers ~ size | factor(n) ,
                group = host,
                data = rbind(ioBm, ioBmLocal),
                horizontal = FALSE,
		scales=list(y = list(log = 10)),
                auto.key = TRUE,
                layout = c(3, 1),
                ylab = 'spectra read in one second',
                xlab = 'number of spectra / file')

## ----mzXML--------------------------------------------------------------------
mzXMLEH3219 <- normalizePath(eh[["EH3219"]])
mzXMLEH3221 <- normalizePath(eh[["EH3221"]])

## ----backendInitialize, message=FALSE, fig.cap='Aggregated intensities mzXML versus raw of the 1st 100 spectra.', message=FALSE, warning=FALSE, fig.width=5, fig.height=5----
if (require(mzR)){
  beMzXML <- Spectra::backendInitialize(
    Spectra::MsBackendMzR(),
    files = c(mzXMLEH3219))
  
  beRaw <- Spectra::backendInitialize(
    MsBackendRawFileReader::MsBackendRawFileReader(),
    files = c(rawfileEH3220))
  
  intensity.xml <- sapply(intensity(beMzXML[1:100]), sum)
  intensity.raw <- sapply(intensity(beRaw[1:100]), sum)
  
  plot(intensity.xml ~ intensity.raw, log = 'xy', asp = 1,
    pch = 16, col = rgb(0.5, 0.5, 0.5, alpha=0.5), cex=2)
  abline(lm(intensity.xml ~ intensity.raw), 
  	col='red')
}

## -----------------------------------------------------------------------------
if (require(mzR)){
  table(scanIndex(beRaw) %in% scanIndex(beMzXML))
}

## ----sessionInfo--------------------------------------------------------------
sessionInfo()