## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  message = FALSE,
  warning = FALSE,
  tidy = FALSE
)
library(BiocStyle)

python_available <- tryCatch({
    proc <- basilisk::basiliskStart(immLynx::immLynxEnv)
    on.exit(basilisk::basiliskStop(proc))
    TRUE
}, error = function(e) {
    FALSE
})

## ----installation, eval = FALSE-----------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("immLynx")

## ----load---------------------------------------------------------------------
library(immLynx)
library(scran)
library(scater)

data("immLynx_example")

immLynx_example

## ----extract------------------------------------------------------------------
tcr_data <- extractTCRdata(immLynx_example,
                           chains = "TRB")
head(tcr_data)

tcr_wide <- extractTCRdata(immLynx_example,
                           chains = "both",
                           format = "wide")

validation <- validateTCRdata(tcr_data)
print(validation)

## ----summary------------------------------------------------------------------
summary <- summarizeTCRrepertoire(immLynx_example,
                                  chains = "TRB")
print(summary)

## ----clustcr, eval=python_available-------------------------------------------
# sce <- runClustTCR(
#   immLynx_example,
#   chains = "TRB",
#   method = "mcl",
#   inflation = 2.0
# )
# 
# table(sce$clustcr_TRB)

## ----tcrdist, eval=python_available-------------------------------------------
# dist_results <- runTCRdist(
#   immLynx_example,
#   chains = "beta",
#   organism = "human"
# )
# 
# dim(dist_results$distances$pw_beta)

## ----olga, eval=python_available----------------------------------------------
# sce <- runOLGA(
#   immLynx_example,
#   chains = "TRB",
#   model = "humanTRB"
# )
# 
# hist(log10(sce$olga_pgen_TRB), breaks = 50)

## ----generate, eval=python_available------------------------------------------
# random_seqs <- generateOLGA(n = 100,
#                             model = "humanTRB")
# head(random_seqs)

## ----embeddings, eval=FALSE---------------------------------------------------
# sce <- runEmbeddings(
#   immLynx_example,
#   chains = "TRB",
#   model_name = "facebook/esm2_t12_35M_UR50D",
#   pool = "mean"
# )
# 
# sce <- scater::runUMAP(sce,
#                        dimred = "tcr_esm")
# scater::plotReducedDim(sce, dimred = "UMAP")

## ----metaclonotypist, eval=python_available-----------------------------------
# sce <- runMetaclonotypist(
#   immLynx_example,
#   chains = "beta",
#   method = "tcrdist",
#   max_edits = 2,
#   max_dist = 20
# )
# 
# table(sce$metaclone)

## ----sonnia, eval=FALSE-------------------------------------------------------
# background <- generateOLGA(n = 10000, model = "humanTRB")
# write.csv(background, "background.csv", row.names = FALSE)
# 
# sce <- runSoNNia(
#   immLynx_example,
#   chains = "TRB",
#   background_file = "background.csv"
# )

## ----workflow, eval=FALSE-----------------------------------------------------
# library(immLynx)
# library(scran)
# library(scater)
# library(ggplot2)
# 
# data("immLynx_example")
# 
# # Summarize the repertoire
# summary <- summarizeTCRrepertoire(immLynx_example)
# print(summary)
# 
# # Cluster TCRs by CDR3 similarity
# immLynx_example <- runClustTCR(
#   immLynx_example,
#   chains = "TRB",
#   method = "mcl"
# )
# 
# # Calculate generation probability
# immLynx_example <- runOLGA(
#   immLynx_example,
#   chains = "TRB"
# )
# 
# # Generate protein language model embeddings
# immLynx_example <- runEmbeddings(
#   immLynx_example,
#   chains = "TRB"
# )
# 
# # Visualize embeddings colored by cluster assignment
# immLynx_example <- scater::runUMAP(
#   immLynx_example,
#   dimred = "tcr_esm",
#   name = "tcr_umap"
# )
# 
# scater::plotReducedDim(
#   immLynx_example,
#   dimred = "tcr_umap",
#   colour_by = "clustcr_TRB"
# )
# 
# # Visualize embeddings colored by generation probability
# scater::plotReducedDim(
#   immLynx_example,
#   dimred = "tcr_umap",
#   colour_by = "olga_pgen_log10_TRB"
# )

## ----session, eval=TRUE-------------------------------------------------------
sessionInfo()