## ----warning=FALSE, message=FALSE------------------------------------------ library(FamAgg) data(minnbreast) ## Subsetting to only few families of the whole data set. mbsub <- minnbreast[minnbreast$famid %in% 4:14, ] mbped <- mbsub[, c("famid", "id", "fatherid", "motherid", "sex")] ## Renaming column names. colnames(mbped) <- c("family", "id", "father", "mother", "sex") ## Defining the optional argument age. endage <- mbsub$endage names(endage) <- mbsub$id ## Create the object. fad <- FAData(pedigree=mbped, age=endage) ## -------------------------------------------------------------------------- ## Use the pedigree method to access the full pedigree ## data.frame, head(pedigree(fad)) ## or access individual columns using $. ## The ID of the father (0 representing "founders"): head(fad$father) ## Mother: head(fad$mother) ## Sex: head(fad$sex) ## We can also access the age of each individual, if ## provided. head(age(fad)) ## -------------------------------------------------------------------------- ## Extract the pedigree information from family "4"... nrow(family(fad, family=4)) head(family(fad, family=4)) ## ...which is the same as extracting the family pedigree ## for an individual of this family. head(family(fad, id=3)) ## Note that IDs are internally always converted to character, ## thus, using id=3 and id="3" return the same information. head(family(fad, id="3")) ## -------------------------------------------------------------------------- ## Subset the object to a single family. fam4 <- fad[fad$family == "4", ] table(fam4$family) ## ----family-4-pedigree, message=FALSE, fig.align='center'------------------ switchPlotfun("ks2paint") ## By supplying device="plot", we specify that we wish to visualize the ## pedigree in an R plot. This is the default for "ks2paint", anyway. plotPed(fad, id=3, device="plot") ## -------------------------------------------------------------------------- ## Build the pedigree for individual 3. fullPed <- buildPed(fad, id="3") nrow(fullPed) ## -------------------------------------------------------------------------- ## Find the subpedigree for individuals 21, 22 and 17. buildPed(fad, id=c(21, 22, 17), prune=TRUE) ## ----family-sub-pedigree, message=FALSE, fig.align='center'---------------- plotPed(fad, id=c(21, 22, 17), prune=TRUE) ## ----pedigree-family-14, message=FALSE, fig.align='center'----------------- plotPed(fad, family="14", cex=0.4) ## -------------------------------------------------------------------------- ## Check if we have individual 26064 from the second branch in the pedigree ## of individual 440. any(buildPed(fad, id="440")$id == "26064") ## What for the pedigree of 447? any(buildPed(fad, id="447")$id == "26064") ## -------------------------------------------------------------------------- ## Find founders for family 4. findFounders(fad, "4") ## -------------------------------------------------------------------------- ## Find the closest common ancestor. getCommonAncestor(fad, id=c(21, 22, 17)) ## -------------------------------------------------------------------------- ## Get the children of ID 4. getChildren(fad, id="4", max.generations=1) ## Get the offsprings. getChildren(fad, id="4") ## Get all ancestors. getAncestors(fad, id="4") ## Get the siblings. getSiblings(fad, id=c("4")) ## -------------------------------------------------------------------------- ## Add the trait information to the FAData object. cancer <- mbsub$cancer names(cancer) <- as.character(mbsub$id) trait(fad) <- cancer ## Identify the affected founders. ## First all affected individuals. affIds <- affectedIndividuals(fad) ## Identify founders for each family. founders <- lapply(unique(fad$family), function(z){ return(findFounders(fad, family=z)) }) names(founders) <- unique(fad$family) ## Track the affected founder. affFounders <- lapply(founders, function(z){ return(z[z %in% affIds]) }) ## Interestingly, not all founders are affected! It seems in some cases ## parents of the affected participants in the screening phase have also ## been included. affFounders <- affFounders[unlist(lapply(affFounders, length)) > 0] ## The number of families analyzed. length(founders) ## The number of families with affected founder. length(affFounders) ## -------------------------------------------------------------------------- kin2affFounders <- shareKinship(fad, unlist(affFounders)) ## How many of these are affected? sum(kin2affFounders %in% affIds) ## How many affected are not related to an affected founder? sum(!(affIds %in% kin2affFounders)) ## -------------------------------------------------------------------------- ## Get all individuals sharing kinship with individual 4. shareKinship(fad, id="4") ## -------------------------------------------------------------------------- ## Estimate generation levels for all families. estimateGenerations(fad)[1:3] ## -------------------------------------------------------------------------- gens <- generationsFrom(fad, id="4") ## ----family-four-gens-rel-to-four, message=FALSE, fig.align='center'------- plotPed(fad, family=4, label2=gens) ## ----results='hide', message=FALSE----------------------------------------- ## Extract the cancer trait information. tcancer <- mbsub$cancer names(tcancer) <- mbsub$id ## Set the trait. trait(fad) <- tcancer ## -------------------------------------------------------------------------- ## Extract the trait information. head(trait(fad)) ## We can also extract the IDs of the affected individuals. head(affectedIndividuals(fad)) ## Or the IDs of the phenotyped individuals. head(phenotypedIndividuals(fad)) ## ----family-pedigree-affected, message=FALSE, fig.align='center'----------- ## Plotting the pedigree for family "9". plotPed(fad, family="9") ## ----family-pedigree-affected-highlighted, message=FALSE, fig.align='center'---- ## Plotting the pedigree for family "9". plotPed(fad, family="9", highlight.ids=list(a=c("185", "201", "198"), b=c("193"))) ## -------------------------------------------------------------------------- ## Transform the full pedigree to a graph. fullGraph <- ped2graph(pedigree(fad)) ## In addition, build the graph for a single family. singleFam <- ped2graph(family(fad, family=4)) ## ----graph-plots, fig.align='center'--------------------------------------- ## Build the layout. plot(fullGraph) lay <- layout_(singleFam, on_grid()) plot(singleFam, layout=lay) ## -------------------------------------------------------------------------- subgr <- connectedSubgraph(singleFam, nodes=c("7", "8", "27", "17")) ## ----subgraph-plot, fig.align='center'------------------------------------- ## Plot the graph. plot(subgr) ## Similar to buildPed/plotPed with prune=TRUE. plotPed(fad, id=c("7", "8", "17", "27"), prune=TRUE) ## ----message=FALSE--------------------------------------------------------- ## Import a "ped" file. pedFile <- system.file("txt/minnbreastsub.ped.gz", package="FamAgg") ## Quick glance at the file. readLines(pedFile, n=1) fad <- FAData(pedFile) head(pedigree(fad)) ## ----message=FALSE--------------------------------------------------------- ## Create the FAData by reading data from a txt file. pedFile <- system.file("txt/minnbreastsub.txt", package="FamAgg") fad <- FAData(pedigree=pedFile, header=TRUE, id.col="id", family.col="famid", father.col="fatherid", mother.col="motherid") ## -------------------------------------------------------------------------- tmpF <- tempfile() ## Subset the pedigree to family 4 fam4 <- fad[fad$family == 4, ] ## Export data in ped format. export(fam4, tmpF, format="ped") ## ----warning=TRUE, message=FALSE------------------------------------------- library(FamAgg) set.seed(18011977) data(minnbreast) ## Subset the dataset to reduce processing time. mbsub <- minnbreast[minnbreast$famid %in% c(4:60, 432), ] ## Uncomment the line below to use the whole dataset instead. ## mbsub <- minnbreast ## Define the number of simulations we perform. ## nsim <- 10000 nsim <- 1000 mbped <- mbsub[, c("famid", "id", "fatherid", "motherid", "sex")] ## Renaming column names. colnames(mbped) <- c("family", "id", "father", "mother", "sex") ## Create the FAData object. fad <- FAData(pedigree=mbped) ## Define the trait. tcancer <- mbsub$cancer names(tcancer) <- as.character(mbsub$id) ## ----warning=FALSE, message=TRUE------------------------------------------- ## Calculate the genealogical index of familiality. gi <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim) ## Display the result. result(gi) ## ----warning=FALSE, eval=FALSE--------------------------------------------- # ## Calculate the genealogical index of familiality using random sampling from # ## a sex matched control set. # giSexMatch <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim, # controlSetMethod="getSexMatched") # # ## Use an external vector to perform the matching. # ## The results are essentially identical. # giExtMatch <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim, # controlSetMethod="getExternalMatched", match.using=fad$sex) ## ----message=FALSE--------------------------------------------------------- ## Evaluate the proportion of male and femal cases. table(gi$sex[affectedIndividuals(gi)]) ## We can use the gender information to perform stratified sampling, i.e. ## in each permutation a random set of 3 male and 15 females will be selected. giStrata <- genealogicalIndexTest(fad, trait=tcancer, traitName="cancer", nsim=nsim, strata=fad$sex) result(giStrata) ## ----mbreast-genealogical-index-result, message=FALSE, warning=FALSE, fig.align='center'---- ## Plot the result. plotRes(giStrata) ## ----message=FALSE--------------------------------------------------------- library(gap) ## Adding the trait information, so the extracted pedigree data.frame will ## also contain a column "affected" with that information. trait(fad) <- tcancer ## Extract the pedigree and re-format it for the gif function. pedi <- pedigree(fad) ## Remove singletons. pedi <- removeSingletons(pedi) pedi[is.na(pedi$father), "father"] <- 0 pedi[is.na(pedi$mother), "mother"] <- 0 ## Identify the affected individuals. affIds <- as.numeric(pedi$id[which(pedi$affected == 1)]) ## Execute the gif method contained in the gap package. gifRes <- gif(pedi[, c("id", "father", "mother")], affIds) ## Calculate the GIF using FamAgg's genealogicalIndexTest. gifT <- genealogicalIndexTest(fad, trait=tcancer, nsim=100) ## Comparing the results: gifRes[[1]] == result(gifT)$genealogical_index ## ----message=FALSE, warning=FALSE------------------------------------------ ## Perform the analysis (no strata etc) separately for each family. giFam <- genealogicalIndexTest(fad, trait=tcancer, nsim=nsim, perFamilyTest=TRUE, traitName="Cancer") ## Display the result from the analysis. head(result(giFam)) ## ----warning=FALSE--------------------------------------------------------- ## Estimate the risk for each individual using the familial incidence rate method. ## We use the endage provided in the Minnesota Breast Cancer Record as ## a measure for time at risk. fr <- familialIncidenceRate(fad, trait=tcancer, timeAtRisk=mbsub$endage) ## ----mbreast-mean-fr-per-family, message=FALSE, warning=FALSE, fig.align='center'---- ## Split the FIR by family and average the values within each. frFam <- split(fr, f=fad$family) frFamAvg <- lapply(frFam, mean, na.rm=TRUE) ## Sort and plot the averages. frFamAvg <- sort(unlist(frFamAvg), decreasing=TRUE) plot(frFamAvg, type="h", xaxt="n", xlab="", ylab="mean FIR", main="Per family averaged familial incidence rate") axis(side=1, las=2, at=1:length(frFamAvg), label=names(frFamAvg)) ## ----warning=FALSE, message=FALSE------------------------------------------ ## Estimate the risk for each individual using the familial incidence rate method. ## We use the endage provided in the Minnesota Breast Cancer Record as ## a measure for time at risk. frTest <- familialIncidenceRateTest(fad, trait=tcancer, traitName="cancer", timeAtRisk=mbsub$endage, nsim=nsim) ## -------------------------------------------------------------------------- head(familialIncidenceRate(frTest)) head(frTest$fir) ## -------------------------------------------------------------------------- head(result(frTest)) ## -------------------------------------------------------------------------- frRes <- result(frTest) frSig <- frRes[which(frRes$padj < 0.05), ] ## Split by family. frFam <- split(frSig, frSig$family) frRes <- data.frame(family=names(frFam), no_sign_fir=unlist(lapply(frFam, nrow))) ## Determine the number of phenotyped and affected individuals per family. noPheNAff <- sapply(names(frFam), function(z){ fam <- family(frTest, family=z) return(c(no_pheno=sum(!is.na(fam$affected)), no_aff=length(which(fam$affected == 1)) )) }) frRes <- cbind(frRes, t(noPheNAff)) ## Display the number of phenotyped and affected individuals as well as ## the number of individuals within the families with a significant FIR. frRes[order(frRes[, "no_sign_fir"], decreasing=TRUE), ] ## -------------------------------------------------------------------------- ## Perform the kinship sum test. kinSum <- kinshipSumTest(fad, trait=tcancer, traitName="cancer", nsim=nsim, strata=fad$sex) head(result(kinSum)) ## -------------------------------------------------------------------------- ## Extract the IDs of the individuals with significant kinship. By default, the raw ## p-values are adjusted for multiple hypothesis testing using the method from ## Benjamini and Hochberg. kinSumRes <- result(kinSum) kinSumIds <- as.character(kinSumRes[kinSumRes$padj < 0.1, "affected_id"]) ## From which families are these? table(kinSumRes[kinSumIds, "family"]) ## ----mbreast-family-432-FIR-compared-to-others, message=FALSE, warning=FALSE, fig.align='center'---- ## Get the familial ratio of the significant in this family, of all in this family, ## and of all others. famId <- kinSumRes[1, "family"] ## Extract the family. fam <- family(kinSum, family=famId) ## Stratify individuals in affected/unaffected. strat <- rep("All, unaff.", length(kinSum$id)) strat[which(kinSum$affected > 0)] <- "All, aff." strat[kinSum$id %in% fam$id] <- paste0("Fam ", famId, ", unaff.") strat[kinSum$id %in% fam$id[which(fam$affected > 0)]] <- paste0("Fam ",famId,", aff.") famData <- data.frame(fr=fr, group=strat) boxplot(fr~group, data=famData, na.rm=TRUE, ylab="FIR", col=rep(c("#FBB4AE", "#B3CDE3"), 2)) ## ----mbreast-family-432-affected, message=FALSE, warning=FALSE, fig.align='center'---- ## Plot the pedigree for the family of the selected individual removing ## all individuals that were not phenotypes. plotPed(kinSum, id=kinSumIds[1], cex=0.3, only.phenotyped=TRUE) ## ----mbreast-family-432-affecte-res, message=FALSE, warning=FALSE, fig.align='center'---- plotRes(kinSum, id=kinSumIds[1]) ## ----message=FALSE--------------------------------------------------------- ## Calculate the kinship test. kinGroup <- kinshipGroupTest(fad, trait=tcancer, traitName="cancer", nsim=nsim, strata=fad$sex) head(result(kinGroup)) ## -------------------------------------------------------------------------- kinGroupRes <- result(kinGroup) ## Create a data.frame with the summarized results. resTab <- data.frame(total_families=length(unique(kinGroup$family)), ratio_sign=length(unique( kinGroupRes[kinGroupRes$ratio_padj < 0.05, "family"] )), kinship_sign=length(unique( kinGroupRes[kinGroupRes$kinship_padj < 0.05, "family"] )) ) resTab ## ----mbreast-family-432-affecte-res-kinship, message=FALSE, warning=FALSE, fig.align='center'---- plotPed(kinGroup, id=kinGroupRes[kinGroupRes$family == "432", "group_id"][1], prune=TRUE, label1=fr) ## ----message=FALSE--------------------------------------------------------- ## First we load the trait/affected information into the FAData object. trait(fad) <- tcancer ## Next we determine the number of phenotyped individuals per family. famAff <- pedigree(fad)[, c("family", "affected")] ## Exclude individuals that were not phenotyped. famAff <- famAff[!is.na(famAff$affected), ] ## Calculate the number of phenotyped per family. famSize <- table(famAff$family) keepFams <- names(famSize)[famSize < 22] ## Extract the family and restrict to those on which we can perform the analysis. famCliq <- fad$family famCliq <- famCliq[famCliq %in% keepFams] ## ----message=FALSE--------------------------------------------------------- probRes <- probabilityTest(fad, trait=tcancer, traitName="Cancer", cliques=famCliq, nsim=nsim) probResTab <- result(probRes) head(probResTab) ## ----mbreast-prob-clique-1, message=FALSE, warning=FALSE, fig.align='center'---- plotPed(probRes, id=probResTab[1, "group_id"])