This vignette demonstrates how a Protein-Protein interaction (PPI) graph may be constructed from the database stringDB.
Other networks can also be used with netSmooth. We mostly rely on networks from stringDB. StringDB has multiple species available such as human, mouse, zebrafish, C.elengas and D.melanogaster. It is also possible to prune the network differently. For our purposes we use the edges that have highest confidence score. Below, we are showing how to obtain and prune human network from stringDB. Specifically, we use the work flow below.
require(STRINGdb)
require(igraph)
require(biomaRt)
# 1. getSTRINGdb for human
string_db <- STRINGdb$new(species=9606)
human_graph <- string_db$get_graph()
# 2. get edges with high confidence score
edge.scores <- E(human_graph)$combined_score
ninetyth.percentile <- quantile(edge.scores, 0.9)
thresh <- data.frame(name='90th percentile',
                     val=ninetyth.percentile)
human_graph <- subgraph.edges(human_graph,
                              E(human_graph)[combined_score > ninetyth.percentile])
# 3. create adjacency matrix
adj_matrix <- as_adjacency_matrix(human_graph)
# 4. map gene ids to protein ids
### get gene/protein ids via Biomart
mart=useMart(host = 'grch37.ensembl.org',
             biomart='ENSEMBL_MART_ENSEMBL',
             dataset='hsapiens_gene_ensembl')
### extract protein ids from the human network
protein_ids <- sapply(strsplit(rownames(adj_matrix), '\\.'),
                      function(x) x[2])
### get protein to gene id mappings
mart_results <- getBM(attributes = c("ensembl_gene_id",
                                     "ensembl_peptide_id"),
                      filters = "ensembl_peptide_id", values = protein_ids,
                      mart = mart)
### replace protein ids with gene ids
ix <- match(protein_ids, mart_results$ensembl_peptide_id)
ix <- ix[!is.na(ix)]
newnames <- protein_ids
newnames[match(mart_results[ix,'ensembl_peptide_id'], newnames)] <-
    mart_results[ix, 'ensembl_gene_id']
rownames(adj_matrix) <- newnames
colnames(adj_matrix) <- newnames
ppi <- adj_matrix[!duplicated(newnames), !duplicated(newnames)]
nullrows <- Matrix::rowSums(ppi)==0
ppi <- ppi[!nullrows,!nullrows] ## ppi is the network with gene idssessionInfo()## R version 4.4.0 beta (2024-04-15 r86425)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] BiocStyle_2.32.0
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35       R6_2.5.1            bookdown_0.39      
##  [4] fastmap_1.1.1       xfun_0.43           cachem_1.0.8       
##  [7] knitr_1.46          htmltools_0.5.8.1   rmarkdown_2.26     
## [10] lifecycle_1.0.4     cli_3.6.2           sass_0.4.9         
## [13] jquerylib_0.1.4     compiler_4.4.0      tools_4.4.0        
## [16] evaluate_0.23       bslib_0.7.0         yaml_2.3.8         
## [19] BiocManager_1.30.22 jsonlite_1.8.8      rlang_1.1.3