## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE, warning=FALSE------------------------------------ library(knitr) set.seed(42) opts_chunk$set(comment=NA, fig.align="center", warning=FALSE) ## ----installation, eval=FALSE-------------------------------------------- ## source("http://bioconductor.org/biocLite.R") ## biocLite("pcaExplorer") ## ----installation-github, eval=FALSE------------------------------------- ## library("devtools") ## install_github("federicomarini/pcaExplorer") ## ----loadlibrary, message=FALSE------------------------------------------ library("pcaExplorer") ## ------------------------------------------------------------------------ citation("pcaExplorer") ## ----installairway, eval=FALSE------------------------------------------- ## source("https://bioconductor.org/biocLite.R") ## biocLite("airway") ## ----loadairway, message=FALSE------------------------------------------- library(airway) library(DESeq2) data(airway) dds_airway <- DESeqDataSet(airway,design= ~ cell + dex) dds_airway rld_airway <- rlogTransformation(dds_airway) rld_airway ## ----launchairway, eval=FALSE-------------------------------------------- ## pcaExplorer(dds = dds_airway, ## rlt = rld_airway) ## ----annoairway, message = FALSE----------------------------------------- library(org.Hs.eg.db) genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL") annotation_airway <- data.frame(gene_name = genenames_airway, row.names = rownames(dds_airway), stringsAsFactors = FALSE) head(annotation_airway) ## ----annofuncs, eval=FALSE----------------------------------------------- ## ## anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, ## orgdb_species = "org.Hs.eg.db", ## idtype = "ENSEMBL") ## ## anno_df_biomart <- get_annotation(dds = dds_airway, ## biomart_dataset = "hsapiens_gene_ensembl", ## idtype = "ensembl_gene_id") ## ## ----echo=FALSE---------------------------------------------------------- anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, orgdb_species = "org.Hs.eg.db", idtype = "ENSEMBL") ## ------------------------------------------------------------------------ head(anno_df_orgdb) ## ----launchairwayanno, eval=FALSE---------------------------------------- ## pcaExplorer(dds = dds_airway, ## rlt = rld_airway, ## annotation = annotation_airway) # or anno_df_orgdb, or anno_df_biomart ## ----ddsmultifac--------------------------------------------------------- dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1) ## ----prepsimul----------------------------------------------------------- dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1,betaSD_tissue = 3) dds_multifac rld_multifac <- rlogTransformation(dds_multifac) rld_multifac ## checking how the samples cluster on the PCA plot pcaplot(rld_multifac,intgroup = c("condition","tissue")) ## ----launchsimul, eval=FALSE--------------------------------------------- ## pcaExplorer(dds = dds_multifac, ## rlt = rld_multifac) ## ----func-pcaplot-------------------------------------------------------- pcaplot(rld_airway,intgroup = c("cell","dex"),ntop = 1000, pcX = 1, pcY = 2, title = "airway dataset PCA on samples - PC1 vs PC2") # on a different set of principal components... pcaplot(rld_airway,intgroup = c("dex"),ntop = 1000, pcX = 1, pcY = 4, title = "airway dataset PCA on samples - PC1 vs PC4", ellipse = TRUE) ## ----func-pcaplot3d, eval=FALSE------------------------------------------ ## pcaplot3d(rld_airway,intgroup = c("cell","dex"),ntop = 1000, ## pcX = 1, pcY = 2, pcZ = 3) ## # will open up in the viewer ## ----func-pcascree------------------------------------------------------- pcaobj_airway <- prcomp(t(assay(rld_airway))) pcascree(pcaobj_airway,type="pev", title="Proportion of explained proportion of variance - airway dataset") ## ----func-correlatepcs--------------------------------------------------- res_pcairway <- correlatePCs(pcaobj_airway,colData(dds_airway)) res_pcairway plotPCcorrs(res_pcairway) ## ----func-hiloadings----------------------------------------------------- # extract the table of the genes with high loadings hi_loadings(pcaobj_airway,topN = 10,exprTable=counts(dds_airway)) # or alternatively plot the values hi_loadings(pcaobj_airway,topN = 10,annotation = annotation_airway) ## ----func-genespca------------------------------------------------------- groups_airway <- colData(dds_airway)$dex cols_airway <- scales::hue_pal()(2)[groups_airway] # with many genes, do not plot the labels of the genes genespca(rld_airway,ntop=5000, choices = c(1,2), arrowColors=cols_airway,groupNames=groups_airway, alpha = 0.2, useRownamesAsLabels=FALSE, varname.size = 5 ) # with a smaller number of genes, plot gene names included in the annotation genespca(rld_airway,ntop=100, choices = c(1,2), arrowColors=cols_airway,groupNames=groups_airway, alpha = 0.7, varname.size = 5, annotation = annotation_airway ) ## ----func-topGOtable, eval=FALSE----------------------------------------- ## # example not run due to quite long runtime ## dds_airway <- DESeq(dds_airway) ## res_airway <- results(dds_airway) ## res_airway$symbol <- mapIds(org.Hs.eg.db, ## keys=row.names(res_airway), ## column="SYMBOL", ## keytype="ENSEMBL", ## multiVals="first") ## res_airway$entrez <- mapIds(org.Hs.eg.db, ## keys=row.names(res_airway), ## column="ENTREZID", ## keytype="ENSEMBL", ## multiVals="first") ## resOrdered <- as.data.frame(res_airway[order(res_airway$padj),]) ## head(resOrdered) ## # extract DE genes ## de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj),] ## de_symbols <- de_df$symbol ## # extract background genes ## bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0] ## bg_symbols <- mapIds(org.Hs.eg.db, ## keys=bg_ids, ## column="SYMBOL", ## keytype="ENSEMBL", ## multiVals="first") ## # run the function ## topgoDE_airway <- topGOtable(de_symbols, bg_symbols, ## ontology = "BP", ## mapping = "org.Hs.eg.db", ## geneID = "symbol") ## ----func-pca2go, eval=FALSE--------------------------------------------- ## pca2go_airway <- pca2go(rld_airway, ## annotation = annotation_airway, ## organism = "Hs", ## ensToGeneSymbol = TRUE, ## background_genes = bg_ids) ## # for a smooth interactive exploration, use DT::datatable ## datatable(pca2go_airway$PC1$posLoad) ## # display it in the normal R session... ## head(pca2go_airway$PC1$posLoad) ## # ... or use it for running the app and display in the dedicated tab ## pcaExplorer(dds_airway,rld_airway, ## pca2go = pca2go_airway, ## annotation = annotation_airway) ## ## ----func, message = FALSE, eval = FALSE--------------------------------- ## goquick_airway <- limmaquickpca2go(rld_airway, ## pca_ngenes = 10000, ## inputType = "ENSEMBL", ## organism = "Hs") ## # display it in the normal R session... ## head(goquick_airway$PC1$posLoad) ## # ... or use it for running the app and display in the dedicated tab ## pcaExplorer(dds_airway,rld_airway, ## pca2go = goquick_airway, ## annotation = annotation_airway) ## ## ## ----func-makedataset---------------------------------------------------- dds_simu <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 0.5) dds_simu dds2_simu <- makeExampleDESeqDataSet_multifac(betaSD_condition = 0.5,betaSD_tissue = 2) dds2_simu rld_simu <- rlogTransformation(dds_simu) rld2_simu <- rlogTransformation(dds2_simu) pcaplot(rld_simu,intgroup = c("condition","tissue")) + ggplot2::ggtitle("Simulated data - Big condition effect, small tissue effect") pcaplot(rld2_simu,intgroup = c("condition","tissue")) + ggplot2::ggtitle("Simulated data - Small condition effect, bigger tissue effect") ## ----eval=FALSE---------------------------------------------------------- ## distro_expr(rld_airway,plot_type = "density") ## distro_expr(rld_airway,plot_type = "violin") ## ------------------------------------------------------------------------ distro_expr(rld_airway,plot_type = "boxplot") ## ------------------------------------------------------------------------ dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1) rlt <- DESeq2::rlogTransformation(dds) set.seed(42) geneprofiler(rlt,paste0("gene",sample(1:1000,20)), plotZ = FALSE) ## ----eval=FALSE---------------------------------------------------------- ## anno_df_biomart <- get_annotation(dds = dds_airway, ## biomart_dataset = "hsapiens_gene_ensembl", ## idtype = "ensembl_gene_id") ## ## anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, ## orgdb_species = "org.Hs.eg.db", ## idtype = "ENSEMBL") ## ------------------------------------------------------------------------ # use a subset of the counts to reduce plotting time, it can be time consuming with many samples pair_corr(counts(dds_airway)[1:100,]) ## ------------------------------------------------------------------------ sessionInfo()