## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) knitr::opts_chunk$set(fig.width=6, fig.height=6, dpi=300,echo = FALSE) knitr::opts_chunk$set(fig.pos = "H", out.extra = "") ## ----echo=TRUE, eval=FALSE---------------------------------------------------- # library(RFlocalfdr.data) # data(smoking) # ?smoking # y<-smoking$y # smoking_data<-smoking$rma # y.numeric <-ifelse((y=="never-smoked"),0,1) # ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("GEOquery") # BiocManager::install("annotate") # # install.packages("geneExpressionFromGEO") # library(geneExpressionFromGEO) # DF1 <- getGeneExpressionFromGEO("GSE994", FALSE, FALSE) #retrieveGeneSymbols, verbose = FALSE) # # aa<-match(gsub("([A-Z0-9]*).*","\\1", rownames(smoking_data)), colnames(DF1)) # all.equal(aa,1:57) # #[1] TRUE # # temp <- t(DF1[1:57]) # #but class temp is a data.frame not a ‘AffyBatch’ object. So how do we normalize it? # # ## library(affy) # ## rma.data <- rma(data) # ## or # ## rma.data <- expresso(data, # ## bgcorrect.method = "rma", # ## normalize.method = "quantiles", # ## pmcorrect.method = "pmonly", # ## summary.method = "medianpolish") # ## ----echo=TRUE, eval=FALSE--------------------------------------------------- # library(ranger) # rf1 <-ranger(y=y.numeric ,x=smoking_data,importance="impurity",seed=123, num.trees = 10000, # classification=TRUE) # t2 <-count_variables(rf1) # imp<-log(rf1$variable.importance) # #png("./supp_figures/smoking_log_importances.png") # plot(density(imp),xlab="log importances",main="") # #dev.off() # ## ----echo=TRUE, eval=FALSE--------------------------------------------------- # # starting from a randomForest model # library(RFlocalfdr) # library(randomForest) # sert.seed(123) # rf2 <-randomForest(y=factor(y.numeric) ,x=smoking_data,importance=TRUE, ntree = 10000) # imp.rf2 <- log(rf2$importance[,"MeanDecreaseGini"]) # t2.rf2 <-varUsed(rf2) # plot(density(imp.rf2),xlab="log importances",main="") # # cutoffs <- c(2,3,4,5) # res.con<- determine_cutoff(imp.rf2,t2.rf2 ,cutoff=cutoffs,plot=c(2,3,4,5)) # # plot(cutoffs,res.con[,3],pch=15,col="red",cex=1.5,ylab="max(abs(y - t1))") # cutoffs[which.min(res.con[,3])] # #2 # # ## ----simulation2, echo=FALSE, fig.cap="A small simulated data set. Each band contains blocks of size {1, 2, 4, 8, 16, 32, 64}, and each block consists of correlated (identical variables).", fig.align="center", out.width = '50%'---- knitr::include_graphics("./supp_figures/smoking_log_importances.png") ## ----echo=TRUE, eval=FALSE---------------------------------------------------- # cutoffs <- c(2,3,4,5) # #png("./supp_figures/smoking_data_determine_cutoff.png") # res.con<- determine_cutoff(imp,t2,cutoff=cutoffs,plot=c(2,3,4,5)) # #dev.off() # # #png("./supp_figures/smoking_data_determine_cutoffs_2.png") # plot(cutoffs,res.con[,3],pch=15,col="red",cex=1.5,ylab="max(abs(y - t1))") # #dev.off() # cutoffs[which.min(res.con[,3])] # ## ----echo=TRUE,eval=FALSE----------------------------------------------------- # temp<-imp[t2 > 3] # temp <- temp - min(temp) + .Machine$double.eps # qq <- plotQ(temp,debug.flag = 1) # ppp<-run.it.importances(qq,temp,debug.flag = 0) # # # #png("./supp_figures/smoking_significant_genes.png") # #aa<-significant.genes(ppp,temp,cutoff=0.05,do.plot=1) # aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=TRUE) # #dev.off() # length(aa$probabilities) # 17 # # aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=FALSE) # length(aa$probabilities) # 19 # ## ----echo=TRUE,eval=FALSE----------------------------------------------------- # sessionInfo() # ## ----echo=TRUE,eval=FALSE----------------------------------------------------- # devtools::install_github("parsifal9/RFlocalfdr", build_vignettes = TRUE, force = TRUE) # # library(RFlocalfdr) # data(smoking) # ?smoking # y<-smoking$y # smoking_data<-smoking$rma # y.numeric <-ifelse((y=="never-smoked"),0,1) # # library(ranger) # rf1 <-ranger(y=y.numeric ,x=smoking_data,importance="impurity",seed=123, num.trees = 10000, # classification=TRUE) # t2 <-count_variables(rf1) # imp<-log(rf1$variable.importance) # #png("./supp_figures/smoking_log_importances.png") # plot(density(imp),xlab="log importances",main="") # #dev.off() # # # # cutoffs <- c(2,3,4,5) # #png("./supp_figures/smoking_data_determine_cutoff.png") # res.con<- determine_cutoff(imp,t2,cutoff=cutoffs,plot=c(2,3,4,5)) # #dev.off() # # #png("./supp_figures/smoking_data_determine_cutoffs_2.png") # plot(cutoffs,res.con[,3],pch=15,col="red",cex=1.5,ylab="max(abs(y - t1))") # #dev.off() # cutoffs[which.min(res.con[,3])] # # # temp<-imp[t2 > 3] # temp <- temp - min(temp) + .Machine$double.eps # qq <- plotQ(temp,debug.flag = 1) # ppp<-run.it.importances(qq,temp,debug.flag = 0) # # # #png("./supp_figures/smoking_significant_genes.png") # #aa<-significant.genes(ppp,temp,cutoff=0.05,do.plot=1) # aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=TRUE) # #dev.off() # length(aa$probabilities) # 17 -- Roc gets 30 # # aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=FALSE) # length(aa$probabilities) # 19-- Roc gets 30 #