## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() ## ----global_options, include=FALSE-------------------------------------------------------------------------- knitr::opts_chunk$set(fig.width=10, fig.height=7, warning=FALSE, message=FALSE) options(width=110) set.seed(123) ## ----------------------------------------------------------------------------------------------------------- ## load ABAEnrichment package library(ABAEnrichment) ## create input vector with candidate genes gene_ids = c('NCAPG', 'APOL4', 'NGFR', 'NXPH4', 'C21orf59', 'CACNG2', 'AGTR1', 'ANO1', 'BTBD3', 'MTUS1', 'CALB1', 'GYG1', 'PAX2') genes = rep(1, length(gene_ids)) names(genes) = gene_ids genes ## ---- eval=FALSE-------------------------------------------------------------------------------------------- # ## run enrichment analyses with default parameters for the adult and developing human brain # res_adult = aba_enrich(genes, dataset='adult') # res_devel = aba_enrich(genes, dataset='5_stages') ## ----results='hide'----------------------------------------------------------------------------------------- ## run enrichment analysis with less cutoffs and randomsets to save computation time res_devel = aba_enrich(genes, dataset='5_stages', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- ## extract first element from the output list, which contains the statistics fwers_devel = res_devel[[1]] ## see results for the brain regions with highest enrichment for children (3-11 yrs, age_category 3) fwers_3 = fwers_devel[fwers_devel[,1]==3, ] head(fwers_3) ## ----------------------------------------------------------------------------------------------------------- res_devel[2:3] ## ----eval=FALSE--------------------------------------------------------------------------------------------- # ## run enrichment analysis, with randomsets dependent on gene length # res_len = aba_enrich(genes, gene_len=TRUE) # ## run the same analysis using gene coordinates from GRCh38 instead of the default GRCh37 # res_len_hg20 = aba_enrich(genes, gene_len=TRUE, ref_genome='grch38') ## ----------------------------------------------------------------------------------------------------------- ## assign random scores to the genes used above genes = sample(1:50, length(gene_ids)) names(genes) = gene_ids genes ## ---- results='hide'---------------------------------------------------------------------------------------- ## test for enrichment of expressed genes with high scores in the adult brain using the Wilcoxon rank-sum test res_wilcox = aba_enrich(genes, test='wilcoxon', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- head(res_wilcox[[1]]) ## ----------------------------------------------------------------------------------------------------------- ## create input vector with a candidate region on chromosome 8 ## and background regions on chromosome 7, 8 and 9 genes = c(1, rep(0,6)) names(genes) = c('8:82000000-83000000', '7:1300000-56800000', '7:74900000-148700000', '8:7400000-44300000', '8:47600000-146300000', '9:0-39200000', '9:69700000-140200000') genes ## ----results='hide'----------------------------------------------------------------------------------------- ## run enrichment analysis for the adult human brain res_region = aba_enrich(genes, dataset='adult', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- ## look at the results from the enrichment analysis fwers_region = res_region[[1]] head(fwers_region) ## see which genes are located in the candidate region input_genes = res_region[[2]] candidate_genes = input_genes[input_genes==1] candidate_genes ## ----------------------------------------------------------------------------------------------------------- ## get expression data for the first 5 brain regions from the last aba_enrich-analysis top_regions = fwers_region[1:5, 'structure_id'] top_regions expr = get_expression(top_regions, background=FALSE) head(expr) ## ---- eval=FALSE-------------------------------------------------------------------------------------------- # ## get expression data independent from previous aba_enrich analysis # regions = c('Allen:12926', 'Allen:4738', 'Allen:4671', 'Allen:12909', 'Allen:4718') # gene_ids = c('CHMP4C', 'FABP12', 'FABP4', 'FABP5', 'FABP9', 'IMPA1', # 'PAG1', 'PMP2', 'SLC10A5', 'SNX16', 'ZFAND1') # expr2 = get_expression(regions, gene_ids=gene_ids, dataset='adult', background=FALSE) ## ----------------------------------------------------------------------------------------------------------- ## get expression data for the first 5 brain regions from the last aba_enrich-analysis top_regions = fwers_region[1:5, 'structure_id'] plot_expression(top_regions, background=FALSE) ## ----------------------------------------------------------------------------------------------------------- ## plot the same expression data without dendrogram plot_expression(top_regions, dendro=FALSE, background=FALSE) ## ----------------------------------------------------------------------------------------------------------- ## plot expression of some genes for the frontal neocortex (Allen:10161) in age category 3 (children, 3-11 yrs) gene_ids = c('ENSG00000157764', 'ENSG00000163041', 'ENSG00000182158', 'ENSG00000147889', 'ENSG00000103126', 'ENSG00000184634') plot_expression('Allen:10161', gene_ids=gene_ids, dataset='5_stages', age_category=3) ## ----------------------------------------------------------------------------------------------------------- ## get IDs of the substructures of the frontal neocortex (Allen:10161) for which expression data are available subs = get_sampled_substructures('Allen:10161') subs ## get the full name of those substructures get_name(subs) ## get the superstructures of the frontal neocortex (from general to special) supers = get_superstructures('Allen:10161') supers ## get the full names of those superstructures get_name(supers) ## ----------------------------------------------------------------------------------------------------------- ## get structure IDs of brain regions that contain 'accumbens' in their names get_id('accumbens') ## get structure IDs of brain regions that contain 'telencephalon' in their names get_id('telencephalon') ## ----------------------------------------------------------------------------------------------------------- ## get all brain regions included in ABAEnrichment together with thier IDs all_regions = get_id('') head(all_regions) ## ---- results='hide'---------------------------------------------------------------------------------------- ## run an enrichment analysis with 7 candidate and 7 background genes for the developing brain gene_ids = c('FOXJ1', 'NTS', 'LTBP1', 'STON2', 'KCNJ6', 'AGT', 'ANO3', 'TTR', 'ELAVL4', 'BEAN1', 'TOX', 'EPN3', 'PAX2', 'KLHL1') genes = rep(c(1,0), each=7) names(genes) = gene_ids res = aba_enrich(genes, dataset='5_stages', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- head(res[[1]]) ## see which candidate genes are annotated to brain regions with a FWER < 0.01 anno = get_annotated_genes(res, fwer_threshold=0.1) anno ## ----------------------------------------------------------------------------------------------------------- ## find out which of the above genes have expression above the 70% and 90% expression-cutoff, respectively, ## in the basal ganglia of the adult human brain (Allen:4276) anno2 = get_annotated_genes(structure_ids='Allen:4276', dataset='adult', cutoff_quantiles=c(0.7,0.9), genes=gene_ids) anno2 ## ----------------------------------------------------------------------------------------------------------- ## use previously defined input genes vector genes ## ----results='hide'----------------------------------------------------------------------------------------- ## run enrichment analysis with developmental effect score res_dev_effect = aba_enrich(genes, dataset='dev_effect', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- ## see the 5 brain regions with the lowest FWERs top_regions = res_dev_effect[[1]][1:5, ] top_regions ## ----------------------------------------------------------------------------------------------------------- ## plot developmental effect score of the 5 brain structures with the lowest FWERs plot_expression(top_regions[ ,'structure_id']) ## ----------------------------------------------------------------------------------------------------------- sessionInfo()