## Loading required package: kinship2
## Loading required package: Matrix
## Loading required package: quadprog
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## 
## Attaching package: 'FamAgg'
## The following object is masked from 'package:igraph':
## 
##     cliques
## The following object is masked from 'package:kinship2':
## 
##     pedigree

Package: FamAgg
Authors: J. Rainer, D. Taliun, C.X. Weichenberger
Modified: 2021-05-19 15:08:57
Compiled: Wed May 19 17:38:29 2021

1 Introduction

This package provides basic pedigree analysis and plotting utilities as well as a variety of methods to evaluate familial clustering of cases from a given trait. Identification of families or groups of individuals within families with significant aggregation of cases can aid also in the selection of interesting and promising individuals for whole genome or exome sequencing projects.

For kinship coefficient calculations and pedigree plotting the package relies and extends the functionality of the kinship2 package [1].

If you use this package please cite Rainer et al. [2].

2 Basic pedigree operations

In the examples below we perform some simple pedigree operations, such as plotting the pedigree for an individual or family, finding the closest common ancestor for a set of individuals in a pedigree or retrieving the identifiers (IDs) of all ancestors for an individual. Basic pedigree information is stored in FAData objects, thus we first generate such an object from a subset of the Minnesota Breast Cancer Study provided by the kinship2 package. In the example below, we generate the FAData providing a data.frame with the pedigree data, alternatively, the pedigree information could be imported from a file (see Section 3). Upon data set creation the kinship matrix (i.e. a matrix containing the kinship coefficient between each pair of individuals in the whole pedigree) is internally calculated using the functionality from the kinship2 package [1].

library(FamAgg)

data(minnbreast)
## Subsetting to only few families of the whole data set.
mbsub <- minnbreast[minnbreast$famid %in% 4:14, ]
mbped <- mbsub[, c("famid", "id", "fatherid", "motherid", "sex")]
## Renaming column names.
colnames(mbped) <- c("family", "id", "father", "mother", "sex")
## Defining the optional argument age.
endage <- mbsub$endage
names(endage) <- mbsub$id
## Create the object.
fad <- FAData(pedigree = mbped, age = endage) 

We can access all the pedigree information stored in this object using the pedigree method, but also using $. The row names of the pedigree data.frame as well as the names of the vectors returned by $ are the IDs of the individuals in the pedigree.

## Use the pedigree method to access the full pedigree
## data.frame,
head(pedigree(fad))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M
## or access individual columns using $.
## The ID of the father (0 representing "founders"):
head(fad$father)
##  1  2  3  4  5  6 
## NA NA 25  1  1  1
## Mother:
head(fad$mother)
##  1  2  3  4  5  6 
## NA NA  4  2  2  2
## Sex:
head(fad$sex)
## 1 2 3 4 5 6 
## M F F F M M 
## Levels: M F
## We can also access the age of each individual, if
## provided.
head(age(fad)) 
##        1        2        3        4        5        6 
##       NA 78.05886 55.50000 48.00000 75.00342 53.63997

To extract the pedigree for a single family we can use the family method, specifying either the ID of the family or the ID of an individual in the family.

## Extract the pedigree information from family "4"...
nrow(family(fad, family = 4))
## [1] 43
head(family(fad, family = 4))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M
## ...which is the same as extracting the family pedigree
## for an individual of this family.
head(family(fad, id = 3))
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M
## Note that IDs are internally always converted to character,
## thus, using id=3 and id="3" return the same information.
head(family(fad, id = "3")) 
##   family id father mother sex
## 1      4  1     NA     NA   M
## 2      4  2     NA     NA   F
## 3      4  3     25      4   F
## 4      4  4      1      2   F
## 5      4  5      1      2   M
## 6      4  6      1      2   M

Alternatively, we could subset the FAData to individuals of a single family.

## Subset the object to a single family.
fam4 <- fad[fad$family == "4", ]
table(fam4$family) 
## 
##  4 
## 43

To explore this family we can plot its pedigree. By default, the plotting capabilities of the kinship2 package are used to plot pedigrees, but alternatively, if all required dependencies are available, the HaploPainter [3] perl script (http://haplopainter.sourceforge.net/) can be used instead. The switchPlotfun function can be used to switch the plotting back-end. Available arguments are ks2paint and haplopaint for kinship2 and HaploPainter plotting, respectively. Note however, that HaploPainter only allows to export plots to a file, while kinship2 plotting allows, in addition to export the plot, also to show it as a standard R plot.

Below we use the switchPlotfun to ensure the use of kinship2 plotting (usually not required) and plot the full available pedigree of individual 3. If the age of individuals is available, it will be plotted below the individual’s ID.

switchPlotfun("ks2paint")
## By supplying device="plot", we specify that we wish to visualize the
## pedigree in an R plot. This is the default for "ks2paint", anyway.
plotPed(fad, id = 3, device = "plot") 

The pedigree for an individual or a list of individuals can be extracted using the buildPed method. By default the method first tries to identify all parents up to 3 generations in the pedigree, and subsequently all children of the individuals and all identified parents.

## Build the pedigree for individual 3.
fullPed <- buildPed(fad, id = "3")
nrow(fullPed) 
## [1] 29

Alternatively, we can extract the smallest possible pedigree for a list of individuals by specifying prune=TRUE. Internally, the function transforms the pedigree into a graph, tries to find all paths between the individuals and returns the sub-graph of all individuals along with individuals along the paths between them.

## Find the subpedigree for individuals 21, 22 and 17.
buildPed(fad, id = c(21, 22, 17), prune = TRUE) 
##    family id father mother sex
## 3       4  3     25      4   F
## 4       4  4      1      2   F
## 1       4  1     NA     NA   M
## 8       4  8      1      2   F
## 17      4 17     28      8   M
## 21      4 21     24      3   M
## 22      4 22     24      3   F
## 2       4  2     NA     NA   F
## 25      4 25     NA     NA   M
## 28      4 28     NA     NA   M
## 24      4 24     NA     NA   M

And the pedigree plot for that subset of the whole family:

plotPed(fad, id = c(21, 22, 17), prune = TRUE) 

Note that the pedigree returned by the buildPed method for an individual might be different than the pedigree of a whole family. The pedigree returned by buildPed contains only individuals that share kinship with the specified individual. To exemplify this, we plot the pedigree for the family 14 in the Minnesota Breast Cancer data set. Note that the individuals in the pedigree plot depicted as diamonds are individuals with unknown gender. (The message “Did not plot…” is issued by the kinship2 plotting function and indicates singletons that are assigned to the family but do neither have parents nor children.)

plotPed(fad, family = "14", cex = 0.4) 

## Did not plot the following people: 457 463 470 471 26067 26068 26098 26099

In this family, founder 441 is the founder of two family branches. Building the pedigree for individual 440 will not include any of the individuals of the second branch, as he does not share kinship with any of them. The pedigree built for 447 on the other hand contains also individuals from the second branch as she shares kinship with them (via her mother 441).

## Check if we have individual 26064 from the second branch in the pedigree
## of individual 440.
any(buildPed(fad, id = "440")$id == "26064")
## [1] FALSE
## What for the pedigree of 447?
any(buildPed(fad, id = "447")$id == "26064") 
## [1] TRUE

A family pedigree may consist of many founder couples (i.e. individuals for which neither father nor mother is defined in the pedigree). To identify the pedigree’s founder couple (being the couple with the largest number of offspring generations in the pedigree) the findFounders method can be used. Note that the function returns always only one couple, even if there might be two founder couples in the family pedigree with the same number of offspring generations.

## Find founders for family 4.
findFounders(fad, "4") 
## [1] "1" "2"

Alternatively, it might be of interest to determine the closest common ancestor between individuals in a pedigree. Below we use the getCommonAncestor method to identify the common ancestor for individuals 21, 22 and 17 (which we know from the pedigree a bit above are 1 and 2).

## Find the closest common ancestor.
getCommonAncestor(fad, id = c(21, 22, 17)) 
## [1] "1" "2"

Other useful methods are getChildren, getAncestors and getSiblings, that return the children (or all offspring generations up to a specified level), the parents (or all ancestors) or the siblings for the specified individuals, respectively.

## Get the children of ID 4.
getChildren(fad, id = "4", max.generations = 1)
## [1] "3"
## Get the offsprings.
getChildren(fad, id = "4")
## [1] "3"  "21" "22" "23"
## Get all ancestors.
getAncestors(fad, id = "4")
## [1] "1" "2"
## Get the siblings.
getSiblings(fad, id = c("4")) 
## [1] "4"  "5"  "6"  "7"  "8"  "9"  "10"

In the whole Minnesota Breast Cancer data set there are 426 families corresponding to 426 founders that had cancer during the screening phase between 1944 and 1952. In the code block below we identify the affected founders per family.

## Add the trait information to the FAData object.
cancer <- mbsub$cancer
names(cancer) <- as.character(mbsub$id)
trait(fad) <- cancer

## Identify the affected founders.
## First all affected individuals.
affIds <- affectedIndividuals(fad)
## Identify founders for each family.
founders <- lapply(unique(fad$family), function(z){
    return(findFounders(fad, family = z))
})
names(founders) <- unique(fad$family)

## Track the affected founder.
affFounders <- lapply(founders, function(z){
    return(z[z %in% affIds])
})
## Interestingly, not all founders are affected! It seems in some cases
## parents of the affected participants in the screening phase have also
## been included.
affFounders <- affFounders[unlist(lapply(affFounders, length)) > 0]

## The number of families analyzed.
length(founders)
## [1] 10
## The number of families with affected founder.
length(affFounders)
## [1] 2

Unexpectedly, only in few families one of the founders is affected. For the other families additional (unaffected) ancestors might have been added at a later time point.

Next we get the number of affected individuals that are related to these affected founders.

kin2affFounders <- shareKinship(fad, unlist(affFounders))

## How many of these are affected?
sum(kin2affFounders %in% affIds)
## [1] 7
## How many affected are not related to an affected founder?
sum(!(affIds %in% kin2affFounders))
## [1] 21

2.1 Pedigree analysis methods

In this section we perform some more advanced pedigree operations. First, we identify all individuals in the pedigree that share kinship with individual 4.

## Get all individuals sharing kinship with individual 4.
shareKinship(fad, id = "4") 
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23"

Next, we determine generations within the pedigree. Generations can only be estimated for a single family, since in most instances e.g. the year of birth is not available. Thus, generations are estimated considering the relation between individuals, starting from the founder couple, i.e. generation 0, assigning generation 1 to their children and all the mates of their children and so on. The estimateGenerations method calculates such generation numbers for each family defined in the object (or for a single family, if the family ID is provided). The result is returned as a list with the list names corresponding to the family ID and the list elements being the estimated generation numbers (with names corresponding to the ID of the respective individual).

## Estimate generation levels for all families.
estimateGenerations(fad)[1:3] 
## $`4`
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 
##  0  0  2  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  3  3  3  2  1  1 
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 
##  1  1  1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA 
## 
## $`5`
##    44    45    46    47    48    49    50    51    52    53    54    55    56 
##     0     0     2     2     2     2     2     2     2     2     1     3     3 
##    57    58    59    60    61    62    63    64    65    66    67    68    69 
##     3     3     3     3     3     3     3     3     2     2     2    NA     2 
##    70    71    72    73    74    75    76    77    78    79 26050 26051 
##     1    NA    NA    NA    NA    NA    NA    NA    NA     2    NA    NA 
## 
## $`6`
##    80    81    82    83    84    85    86    87    88    89    90    91    92 
##     0     0     2     2     1     1     1     1     1     1     1     1     1 
##    93    94    95    96    97    98    99   100   101   102   103   104   105 
##     1     2     2     2     2     2     2     2     3     3     3     3     2 
##   106   107   108   109   110   111   112   113   114   115   116   117   118 
##     2     1     1     1    NA    NA    NA    NA     2    NA    NA    NA    NA 
## 26052 26053 
##     3     3

Individuals without generation level (i.e. with an NA) are not connected to any other individual in the pedigree (and thus most likely represent errors in the pedigree).

In addition, it is also possible to calculate generation levels relative to a (single) specified individual:

gens <- generationsFrom(fad, id = "4") 

We can render these generation numbers into the pedigree:

plotPed(fad, family = 4, label2 = gens) 

## Did not plot the following people: 30 31 32 33 34 35 36 37 38 39 40 41 42 43

2.2 Additional plotting options

If a trait information is available it might be of interest to highlight affected individuals in the pedigree. Trait information should always be coded as 0 (or FALSE) for unaffected and 1 (or TRUE) for affected. In the example below, we use the cancer information from the Minnesota Breast Cancer Study.

## Extract the cancer trait information.
tcancer <- mbsub$cancer
names(tcancer) <- mbsub$id
## Set the trait.
trait(fad) <- tcancer

We can now extract the trait information from the object or identify directly the phenotyped or affected individuals.

## Extract the trait information.
head(trait(fad))
## 1 2 3 4 5 6 
## 0 0 0 1 0 0
## We can also extract the IDs of the affected individuals.
head(affectedIndividuals(fad))
## [1] "4"   "11"  "37"  "54"  "84"  "122"
## Or the IDs of the phenotyped individuals.
head(phenotypedIndividuals(fad)) 
## [1] "1" "2" "3" "4" "5" "6"

Plotting a FAData object with trait information results in a pedigree plot with highlighted affected individuals (for kinship2 pedigree plotting: affected, unaffected and not phenotyped are represented as filled symbols, open symbols and symbols with a question mark inside, respectively).

## Plotting the pedigree for family "9".
plotPed(fad, family = "9")