## ----setupQualityMeasures, message=FALSE, echo=FALSE, warning=FALSE----------- knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, results = "asis" ) # Define colors constants---- COL.OVD <- "#66C2A5" COL.OVO <- "#A6D854" COL.OVCL <- "#FC8D62" COL.HLD <- "#8DA0CB" COL.HLO <- "#E78AC3" getNum <- function(str.vect) { sapply(strsplit(str.vect, "[_]"), "[[", 2) } ## ----NanoStringQC, message=TRUE, echo=TRUE------------------------------------ library(nanostringr) expOVD <- NanoStringQC(ovd.r, subset(expQC, OVD == "Yes")) expOVO <- NanoStringQC(ovo.r, subset(expQC, OVO == "Yes")) expOVCL <- NanoStringQC(ovc.r, subset(expQC, OVCL == "Yes")) expHLD <- NanoStringQC(hld.r, subset(expQC, HLD == "Yes")) expHLO <- NanoStringQC(hlo.r, subset(expQC, HLO == "Yes")) expQC <- rbind(expHLD, expOVD, expHLO, expOVO, expOVCL) expQC$cohort <- factor( x = c( rep("HLD", nrow(expHLD)), rep("OVD", nrow(expOVD)), rep("HLO", nrow(expHLO)), rep("OVO", nrow(expOVO)), rep("OVCL", nrow(expOVCL)) ), levels = c("HLD", "OVD", "OVCL", "HLO", "OVO"), labels = c("HL", "OC", "OVCL", "HLO", "OVO") ) ## ----perFOVPlot, fig.cap="Samples that failed imaging QC based on percent fields of view (FOV) counted across cohorts."---- boxplot(perFOV ~ cohort, ylab = "% FOV", main = "% FOV by Cohort", data = expQC, pch = 20, col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO)) abline(h = 75, lty = 2, col = "red") grid(NULL, NULL, lwd = 1) ## ----linPCPlot, fig.cap="Plot of $R^2$ of positive control probes from samples across all cohorts."---- boxplot(linPC ~ cohort, ylab = expression(R ^ 2), main = "Linearity of Positive Controls by Cohort", data = expQC, pch = 20, col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO), ylim = c(0, 1)) abline(h = 0.95, lty = 2, col = "red") grid(NULL, NULL, lwd = 1) ## ----averageHKPlot, fig.cap="Average log expression of Housekeeping genes by Cohort."---- boxplot(averageHK ~ cohort, ylab = "Average log HK expression", main = "Average log expression of Housekeeping genes by Cohort", data = expQC, pch = 20, col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO)) abline(h = 50, lty = 2, col = "red") grid(NULL, NULL, lwd = 1) ## ----lodPlot, fig.cap="Limit of detection by cohort."------------------------- boxplot(lod ~ cohort, ylab = "LOD", main = "Limit of detection (LOD) by Cohort", data = expQC, pch = 20, col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO)) abline(h = 50, lty = 2, col = "red") grid(NULL, NULL, lwd = 1) ## ----pergdPlot, fig.cap="Percent genes of total (excluding controls) detected above the limit of detection."---- boxplot(pergd ~ cohort, data = expQC, border = "white", ylab = "% Genes Detected", main = "Percent of Genes Detected Above \n the Limit of Detection", pch = 20, col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO)) abline(h = 50, lty = 2, col = "red") grid(NULL, NULL, lwd = 1) stripchart(pergd ~ cohort, data = expQC, vertical = TRUE, method = "jitter", pch = 20, cex = 0.4 , col = "#3A6EE3", add = TRUE) ## ----snPlot, fig.cap="Signal to Noise versus % Gene Detected by cohort."------ sn <- 100 detect <- 60 plot(expOVD$sn, expOVD$pergd, pch = 20, col = COL.OVD, xaxt = "n", ylim = c(0, 100), xlim = range(expOVD$sn), xlab = "Signal to Noise Ratio", ylab = "% Genes Detected") points(expOVO$sn, expOVO$pergd, pch = 20, col = COL.OVO) points(expOVCL$sn, expOVCL$pergd, pch = 20, col = COL.OVCL) points(expHLD$sn, expHLD$pergd, pch = 20, col = COL.HLD) points(expHLO$sn, expHLO$pergd, pch = 20, col = COL.HLO) axis(1, at = seq(0, max(expQC$sn) + 1, 300)) abline(v = sn, col = "red", lwd = 2) abline(h = detect, lty = 2) title("Signal to Noise vs \n Ratio of Genes Detected") legend("bottomright", c("HL", "OC", "OVCL", "HLO", "OVO"), pch = 20, bty = 'n', col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO)) ## ----snZoom, fig.cap="Signal to Noise versus % Gene Detected by cohort zoomed in to the area of possible failures."---- plot(expOVD$sn, expOVD$pergd, pch = 20, col = COL.OVD, xaxt = "n", ylim = c(0, 100), xlim = c(0, 6000), xlab = "Signal to Noise Ratio ", ylab = "Ratio of Genes Detected") points(expOVO$sn, expOVO$pergd, pch = 20, col = COL.OVO) points(expOVCL$sn, expOVCL$pergd, pch = 20, col = COL.OVCL) points(expHLD$sn, expHLD$pergd, pch = 20, col = COL.HLD) points(expHLO$sn, expHLO$pergd, pch = 20, col = COL.HLO) axis(1, at = seq(0, max(expQC$sn) + 1, 300)) abline(v = sn, col = "red", lwd = 2) abline(h = detect, lty = 2) title("Signal to Noise vs \n Ratio of Genes Detected (Zooming-in)") legend("bottomright", c("HL", "OC", "OVCL", "HLO", "OVO"), pch = 20, bty = 'n', col = c(COL.HLD, COL.OVD, COL.OVCL, COL.HLO, COL.OVO)) ## ----HKnorm------------------------------------------------------------------- expHLD0 <- expHLD any(expHLD0$QCFlag == "Failed") expHLD0$sampleID[which(expHLD0$QCFlag == "Failed")] ## ----remove_samples----------------------------------------------------------- expHLD <- dplyr::filter(expHLD0, sampleID != "HL1_18" & sampleID != "HL2_18") hld <- hld.r[, !colnames(hld.r) %in% c("HL1_18", "HL2_18")] ## ----normalize_HK------------------------------------------------------------- # If data already log normalized hld.n <- HKnorm(hld, is.logged = TRUE) # Otherwise, normalize to HK hld.n <- HKnorm(hld) hld1 <- hld.n[, grep("HL1", colnames(hld.n))] exp.hld1 <- subset(expHLD, geneRLF == "HL1") hld2 <- hld.n[, grep("HL2", colnames(hld.n))] exp.hld2 <- subset(expHLD, geneRLF == "HL2") ## ----refMethod---------------------------------------------------------------- r <- 3 # The number of references to use choice.refs <- exp.hld1$sampleID[sample((1:dim(exp.hld1)[1]), r, replace = F)] # select reference samples randomly R1 <- t(hld1[, choice.refs]) R2 <- t(hld2[, paste("HL2", getNum(choice.refs), sep = "_")]) Y <- t(hld2[, !colnames(hld2) %in% paste("HL2", getNum(choice.refs), sep = "_")]) S2.r <- t(refMethod(Y, R1, R2)) # Data from CodeSet 2 now calibrated for CodeSet 1 ## ----plot_gene, fig.cap='HL1 vs HL2 gene expression with and without ccorrection'---- set.seed(2016) gene <- sample(1:nrow(hld1), 1) par(mfrow = c(1, 2)) plot(t(hld1[gene, ]), t(hld2[gene, ]), xlab = "HL1", ylab = "HL2", main = "No Correction") abline(0, 1) plot(t(hld1[gene, !(colnames(hld1) %in% choice.refs)]), t(S2.r[gene, ]), xlab = "HL1", ylab = "HL2", main = "Corrected") abline(0, 1)