## ----tfcensus-op--------------------------------------------------------------------------------------------
library(OmnipathR)
library(magrittr)
library(dplyr)

tfcensus <-
    import_omnipath_annotations(
        resources = 'TFcensus',
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique

## ----tfcensus-direct----------------------------------------------------------------------------------------
library(purrr)

tfcensus_direct <-
    tfcensus_download() %>%
    pull(`HGNC symbol`) %>%
    discard(is.na) %>%
    unique

## ----tfs-from-network---------------------------------------------------------------------------------------
tfs_from_network <-
    import_transcriptional_interactions(
        # confidence levels;
        # we use only the 3 levels with highest confidence
        dorothea_levels = c('A', 'B', 'C'),
        entity_types = 'protein'
    ) %>%
    pull(source_genesymbol) %>%
    unique

## ----tf-list------------------------------------------------------------------------------------------------
tfs <- union(tfcensus, tfs_from_network)

## ----localizations------------------------------------------------------------------------------------------
localizations <-
    import_omnipath_annotations(
        resources = c(
            'Ramilowski_location',
            'UniProt_location',
            'HPA_subcellular'
        ),
        entity_types = 'protein',
        wide = TRUE
    )

## ----loc-coverage-------------------------------------------------------------------------------------------
localizations %>%
map(function(x){x %>% pull(genesymbol) %>% n_distinct})

## ----ligrec-------------------------------------------------------------------------------------------------
ligands <-
    OmnipathR::import_omnipath_intercell(
        parent = 'ligand',
        topology = 'sec',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

receptors <-
    OmnipathR::import_omnipath_intercell(
        parent = 'receptor',
        topology = 'pmtm',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

transmitters <-
    OmnipathR::import_omnipath_intercell(
        causality = 'trans',
        topology = 'sec',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

receivers <-
    OmnipathR::import_omnipath_intercell(
        causality = 'rec',
        topology = 'pmtm',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

## ----ppi----------------------------------------------------------------------------------------------------
ppi <-
    import_omnipath_interactions(
        datasets = 'omnipath',
        entity_types = 'protein'
    )

## ----signalink----------------------------------------------------------------------------------------------
signalink_pathways <-
    import_omnipath_annotations(
        resources = 'SignaLink_pathway',
        entity_types = 'protein',
        wide = TRUE
    )

signalink_functions <-
    import_omnipath_annotations(
        resources = 'SignaLink_function',
        entity_types = 'protein',
        wide = TRUE
    )

## ----lig-of-interest----------------------------------------------------------------------------------------
ligands_of_interest <-
    intersect(
        signalink_pathways %>%
            filter(pathway == 'TGF') %>%
            pull(genesymbol),
        signalink_functions %>%
            filter(`function` == 'Ligand') %>%
            pull(genesymbol)
    ) %T>%
    length

## ----ppi-graph----------------------------------------------------------------------------------------------
ppi_graph <-
    ppi %>%
    interaction_graph

## ----in-network---------------------------------------------------------------------------------------------
library(igraph)

ligands_of_interest__in_network <-
    ligands_of_interest %>%
    keep(. %in% V(ppi_graph)$name)

tfs__in_network <-
    tfs %>%
    keep(. %in% V(ppi_graph)$name)

## ----find-paths, eval = FALSE-------------------------------------------------------------------------------
#  paths <-
#      ppi_graph %>%
#      find_all_paths(
#          start = ligands_of_interest__in_network,
#          end = tfs__in_network,
#          maxlen = 3,
#          attr = 'name'
#      )

## ----filter-paths, eval = FALSE-----------------------------------------------------------------------------
#  # only selecting Cytoplasm, of course we could
#  # consider other intracellular compartments
#  in_cytoplasm <-
#      localizations$UniProt_location %>%
#      filter(location == 'Cytoplasm') %>%
#      pull(genesymbol) %>%
#      unique
#  
#  in_nucleus <-
#      localizations$UniProt_location %>%
#      filter(location %in% c('Nucleus', 'Nucleolus')) %>%
#      pull(genesymbol) %>%
#      unique
#  
#  paths_selected <-
#      paths %>%
#      # removing single node paths
#      discard(
#          function(p){
#              length(p) == 1
#          }
#      ) %>%
#      # receptors are plasma membrane transmembrane
#      # according to our query to OmniPath
#      keep(
#          function(p){
#              p[2] %in% receptors
#          }
#      ) %>%
#      # making sure all further mediators are
#      # in the cytoplasm
#      keep(
#          function(p){
#              all(p %>% tail(-2) %>% is_in(in_cytoplasm))
#          }
#      ) %>%
#      # the last nodes are all TFs, implying that they are in the nucleus
#      # but we can optionally filter foall(p %>% tail(-2)r this too:
#      keep(
#          function(p){
#              last(p) %in% in_nucleus
#          }
#      ) %>%
#      # finally, we can remove paths which contain TFs also as intermediate
#      # nodes as these are redundant
#      discard(
#          function(p){
#              !any(p %>% head(-1) %>% is_in(tfs))
#          }
#      )

## ----path-stats, eval = FALSE-------------------------------------------------------------------------------
#  paths_selected %>% length
#  
#  paths_selected %>% map_int(length) %>% table

## ----intercell-network--------------------------------------------------------------------------------------
ligand_receptor <-
    import_intercell_network(
        ligand_receptor = TRUE,
        high_confidence = TRUE,
        entity_types = 'protein'
    ) %>%
    simplify_intercell_network

## ----tf-target-network--------------------------------------------------------------------------------------
tf_target <-
    import_transcriptional_interactions(
        # confidence levels;
        # we use only the 2 levels with highest confidence
        dorothea_levels = c('A', 'B'),
        # I added this only to have less interactions so the
        # examples here run faster; feel free to remove it,
        # then you will have more gene regulatory interactions
        # from a variety of databases
        datasets = 'tf_target',
        entity_types = 'protein',
        # A workaround to mitigate a temporary issue (05/01/2022)
        resources = c('ORegAnno', 'PAZAR')
    )

## ----ppi-2, eval = FALSE------------------------------------------------------------------------------------
#  ppi <-
#      import_omnipath_interactions(
#          datasets = 'omnipath',
#          entity_types = 'protein'
#      )

## ----annotate-networks--------------------------------------------------------------------------------------
ppi %<>%
    annotated_network('UniProt_location') %>%
    annotated_network('kinase.com') %>%
    # these columns define a unique interaction; 5 of them would be enough
    # for only the grouping, we include the rest only to keep them after
    # the summarize statement
    group_by(
        source,
        target,
        source_genesymbol,
        target_genesymbol,
        is_directed,
        is_stimulation,
        is_inhibition,
        sources,
        references,
        curation_effort,
        n_references,
        n_resources
    ) %>%
    summarize(
        location_source = list(unique(location_source)),
        location_target = list(unique(location_target)),
        family_source = list(unique(family_source)),
        family_target = list(unique(family_target)),
        subfamily_source = list(unique(subfamily_source)),
        subfamily_target = list(unique(subfamily_target))
    ) %>%
    ungroup

tf_target %<>%
    annotated_network('UniProt_location') %>%
    annotated_network('kinase.com') %>%
    group_by(
        source,
        target,
        source_genesymbol,
        target_genesymbol,
        is_directed,
        is_stimulation,
        is_inhibition,
        sources,
        references,
        curation_effort,
        # Workaround to mitigate a temporary issue with
        # DoRothEA data in OmniPath (05/01/2022)
        # dorothea_level,
        n_references,
        n_resources
    ) %>%
    summarize(
        location_source = list(unique(location_source)),
        location_target = list(unique(location_target)),
        family_source = list(unique(family_source)),
        family_target = list(unique(family_target)),
        subfamily_source = list(unique(subfamily_source)),
        subfamily_target = list(unique(subfamily_target))
    ) %>%
    ungroup

## ----tfs-2--------------------------------------------------------------------------------------------------
tfs <-
    tf_target %>%
    pull(source_genesymbol) %>%
    unique

## ----grow-paths-function------------------------------------------------------------------------------------
library(rlang)
library(tidyselect)

grow_paths <- function(paths, step, interactions, endpoints = NULL){

    right_target_col <- sprintf(
        'target_genesymbol_step%i',
        step
    )

    paths$growing %<>% one_step(step, interactions)

    # finished paths with their last node being an endpoint:
    paths[[sprintf('step%i', step)]] <-
        paths$growing %>%
        {`if`(
            is.null(endpoints),
            .,
            filter(., !!sym(right_target_col) %in% endpoints)
        )}

    # paths to be further extended:
    paths$growing %<>%
        {`if`(
            is.null(endpoints),
            NULL,
            filter(., !(!!sym(right_target_col) %in% endpoints))
        )}

    invisible(paths)

}


one_step <- function(paths, step, interactions){

    left_col <- sprintf('target_genesymbol_step%i', step - 1)
    right_col <- sprintf('source_genesymbol_step%i', step)
    by <- setNames(right_col, left_col)

    paths %>%
    # making sure even at the first step we have the suffix `_step1`
    {`if`(
        'target_genesymbol' %in% colnames(.),
        rename_with(
            .,
            function(x){sprintf('%s_step%i', x, step)}
        ),
        .
    )} %>%
    {`if`(
        step == 0,
        .,
        inner_join(
            .,
            # adding suffix for the next step
            interactions %>%
            rename_with(function(x){sprintf('%s_step%i', x, step)}),
            by = by
        )
    )} %>%
    # removing loops
    filter(
        select(., contains('_genesymbol')) %>%
        pmap_lgl(function(...){!any(duplicated(c(...)))})
    )

}

## ----build-paths--------------------------------------------------------------------------------------------
library(stringr)

steps <- 2 # to avoid OOM

paths <-
    seq(0, steps) %>%
    head(-1) %>%
    reduce(
        grow_paths,
        interactions = ppi,
        endpoints = tfs,
        .init = list(
            growing = ligand_receptor
        )
    ) %>%
    within(rm(growing)) %>%
    map2(
        names(.) %>% str_extract('\\d+$') %>% as.integer %>% add(1),
        one_step,
        interactions = tf_target
    )

## ----packages, eval = FALSE---------------------------------------------------------------------------------
#  library(OmnipathR)
#  library(magrittr)
#  library(dplyr)
#  library(purrr)
#  library(igraph)
#  library(rlang)
#  library(tidyselect)
#  library(stringr)
#  library(rmarkdown)

## ----install, eval = FALSE----------------------------------------------------------------------------------
#  missing_packages <-
#      setdiff(
#          c(
#              'magrittr',
#              'dplyr',
#              'purrr',
#              'igraph',
#              'rlang',
#              'tidyselect',
#              'stringr',
#              'rmarkdown',
#              'devtools'
#          ),
#          installed.packages()
#      )
#  
#  if(length(missing_packages)){
#      install.packages(missing_packages)
#  }
#  
#  if(!'OmnipathR' %in% installed.packages()){
#      library(devtools)
#      devtools::install_github('saezlab/OmnipathR')
#  }

## ----render, eval = FALSE-----------------------------------------------------------------------------------
#  library(rmarkdown)
#  render('paths.Rmd', output_format = 'html_document')

## ----session------------------------------------------------------------------------------------------------
sessionInfo()