Martin Morgan
October 29, 2014
Analysis and comprehension of high-throughput genomic data
Packages, vignettes, work flows
Objects
getClass()
, showMethods(..., where=search())
,
selectMethod()
method?"substr,<tab>"
to select help on methods, class?D<tab>
for help on classesExample
suppressPackageStartupMessages({
library(Biostrings)
})
data(phiX174Phage) # sample data, see ?phiX174Phage
phiX174Phage
## A DNAStringSet instance of length 6
## width seq names
## [1] 5386 GAGTTTTATCGCTTCCATGAC...ATTGGCGTATCCAACCTGCA Genbank
## [2] 5386 GAGTTTTATCGCTTCCATGAC...ATTGGCGTATCCAACCTGCA RF70s
## [3] 5386 GAGTTTTATCGCTTCCATGAC...ATTGGCGTATCCAACCTGCA SS78
## [4] 5386 GAGTTTTATCGCTTCCATGAC...ATTGGCGTATCCAACCTGCA Bull
## [5] 5386 GAGTTTTATCGCTTCCATGAC...ATTGGCGTATCCAACCTGCA G97
## [6] 5386 GAGTTTTATCGCTTCCATGAC...ATTGGCGTATCCAACCTGCA NEB03
m <- consensusMatrix(phiX174Phage)[1:4,] # nucl. x position counts
polymorphic <- which(colSums(m != 0) > 1)
m[, polymorphic]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## A 4 5 4 3 0 0 5 2 0
## C 0 0 0 0 5 1 0 0 5
## G 2 1 2 3 0 0 1 4 0
## T 0 0 0 0 1 5 0 0 1
showMethods(class=class(phiX174Phage), where=search())
Genomic range
seqnames
), start, end, and optionally strandWhy genomic ranges?
Data objects
GenomicRanges::GRanges
seqnames()
start()
, end()
, width()
strand()
mcols()
: 'metadata' associated with each range, stored as a
DataFrame
GenomicRanges::GRangesList
length()
, names()
, [
, [[
) <!– ] –>unlist()
and relist()
.GenomicAlignments::GAlignments, GAlignmentsList, GAlignemntPairs; VariantAnnotation::VCF, VRanges
Example: GRanges
## 'Annotation' package; more later...
suppressPackageStartupMessages({
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
})
promoters <- promoters(TxDb.Hsapiens.UCSC.hg19.knownGene)
## 'GRanges' with 2 metadata columns
promoters
## GRanges object with 82960 ranges and 2 metadata columns:
## seqnames ranges strand | tx_id tx_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] chr1 [ 9874, 12073] + | 1 uc001aaa.3
## [2] chr1 [ 9874, 12073] + | 2 uc010nxq.1
## [3] chr1 [ 9874, 12073] + | 3 uc010nxr.1
## [4] chr1 [ 67091, 69290] + | 4 uc001aal.1
## [5] chr1 [319084, 321283] + | 5 uc001aaq.2
## ... ... ... ... ... ... ...
## [82956] chrY [27605479, 27607678] - | 78803 uc004fwx.1
## [82957] chrY [27606222, 27608421] - | 78804 uc022cpc.1
## [82958] chrY [27607233, 27609432] - | 78805 uc004fwz.3
## [82959] chrY [27635755, 27637954] - | 78806 uc022cpd.1
## [82960] chrY [59360655, 59362854] - | 78807 uc011ncc.1
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome
head(table(seqnames(promoters)))
##
## chr1 chr2 chr3 chr4 chr5 chr6
## 7967 5092 4328 2888 3366 4220
table(strand(promoters))
##
## + - *
## 42198 40762 0
seqinfo(promoters)
## Seqinfo object with 93 sequences (1 circular) from hg19 genome:
## seqnames seqlengths isCircular genome
## chr1 249250621 FALSE hg19
## chr2 243199373 FALSE hg19
## chr3 198022430 FALSE hg19
## chr4 191154276 FALSE hg19
## chr5 180915260 FALSE hg19
## ... ... ... ...
## chrUn_gl000245 36651 FALSE hg19
## chrUn_gl000246 38154 FALSE hg19
## chrUn_gl000247 36422 FALSE hg19
## chrUn_gl000248 39786 FALSE hg19
## chrUn_gl000249 38502 FALSE hg19
## vector-like access
promoters[ seqnames(promoters) %in% c("chr1", "chr2") ]
## GRanges object with 13059 ranges and 2 metadata columns:
## seqnames ranges strand | tx_id tx_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] chr1 [ 9874, 12073] + | 1 uc001aaa.3
## [2] chr1 [ 9874, 12073] + | 2 uc010nxq.1
## [3] chr1 [ 9874, 12073] + | 3 uc010nxr.1
## [4] chr1 [ 67091, 69290] + | 4 uc001aal.1
## [5] chr1 [319084, 321283] + | 5 uc001aaq.2
## ... ... ... ... ... ... ...
## [13055] chr2 [242617330, 242619529] - | 13055 uc002wcb.2
## [13056] chr2 [242751523, 242753722] - | 13056 uc002wck.1
## [13057] chr2 [242794933, 242797132] - | 13057 uc010fzs.3
## [13058] chr2 [242800859, 242803058] - | 13058 uc002wcq.4
## [13059] chr2 [242800859, 242803058] - | 13059 uc010fzt.3
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome
## metadata
mcols(promoters)
## DataFrame with 82960 rows and 2 columns
## tx_id tx_name
## <integer> <character>
## 1 1 uc001aaa.3
## 2 2 uc010nxq.1
## 3 3 uc010nxr.1
## 4 4 uc001aal.1
## 5 5 uc001aaq.2
## ... ... ...
## 82956 78803 uc004fwx.1
## 82957 78804 uc022cpc.1
## 82958 78805 uc004fwz.3
## 82959 78806 uc022cpd.1
## 82960 78807 uc011ncc.1
length(unique(promoters$tx_name))
## [1] 82960
## exons, grouped by transcript
exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx", use.names=TRUE)
## list-like subsetting
exByTx[1:10] # also logical, character, ...
## GRangesList object of length 10:
## $uc001aaa.3
## GRanges object with 3 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## <Rle> <IRanges> <Rle> | <integer> <character> <integer>
## [1] chr1 [11874, 12227] + | 1 <NA> 1
## [2] chr1 [12613, 12721] + | 3 <NA> 2
## [3] chr1 [13221, 14409] + | 5 <NA> 3
##
## $uc010nxq.1
## GRanges object with 3 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## [1] chr1 [11874, 12227] + | 1 <NA> 1
## [2] chr1 [12595, 12721] + | 2 <NA> 2
## [3] chr1 [13403, 14409] + | 6 <NA> 3
##
## $uc010nxr.1
## GRanges object with 3 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## [1] chr1 [11874, 12227] + | 1 <NA> 1
## [2] chr1 [12646, 12697] + | 4 <NA> 2
## [3] chr1 [13221, 14409] + | 5 <NA> 3
##
## ...
## <7 more elements>
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome
exByTx[["uc001aaa.3"]] # also numeric
## GRanges object with 3 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name exon_rank
## <Rle> <IRanges> <Rle> | <integer> <character> <integer>
## [1] chr1 [11874, 12227] + | 1 <NA> 1
## [2] chr1 [12613, 12721] + | 3 <NA> 2
## [3] chr1 [13221, 14409] + | 5 <NA> 3
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome
## accessors return typed-List, e.g., IntegerList
width(exByTx)
## IntegerList of length 82960
## [["uc001aaa.3"]] 354 109 1189
## [["uc010nxq.1"]] 354 127 1007
## [["uc010nxr.1"]] 354 52 1189
## [["uc001aal.1"]] 918
## [["uc001aaq.2"]] 32
## [["uc001aar.2"]] 62
## [["uc009vjk.2"]] 192 58 2500
## [["uc001aau.3"]] 169 58 4143
## [["uc021oeh.1"]] 58 248 406 514
## [["uc021oei.1"]] 894
## ...
## <82950 more elements>
log10(width(exByTx))
## NumericList of length 82960
## [["uc001aaa.3"]] 2.54900326202579 2.03742649794062 3.07518185461869
## [["uc010nxq.1"]] 2.54900326202579 2.10380372095596 3.00302947055362
## [["uc010nxr.1"]] 2.54900326202579 1.7160033436348 3.07518185461869
## [["uc001aal.1"]] 2.96284268120124
## [["uc001aaq.2"]] 1.50514997831991
## [["uc001aar.2"]] 1.79239168949825
## [["uc009vjk.2"]] 2.28330122870355 1.76342799356294 3.39794000867204
## [["uc001aau.3"]] 2.22788670461367 1.76342799356294 3.61731493329829
## [["uc021oeh.1"]] 1.76342799356294 2.39445168082622 2.60852603357719 2.71096311899528
## [["uc021oei.1"]] 2.95133751879592
## ...
## <82950 more elements>
## 'easy' to ask basic questions, e.g., ...
hist(unlist(log10(width(exByTx)))) # widths of exons
exByTx[which.max(max(width(exByTx)))] # transcript with largest exon
## GRangesList object of length 1:
## $uc031qjh.1
## GRanges object with 1 range and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] chr12 [102591363, 102796374] + | 164764 <NA>
## exon_rank
## <integer>
## [1] 1
##
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome
exByTx[which.max(elementLengths(exByTx))] # transcript with most exons
## GRangesList object of length 1:
## $uc031qqx.1
## GRanges object with 5065 ranges and 3 metadata columns:
## seqnames ranges strand | exon_id exon_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] chr14 [107283004, 107283085] - | 192985 <NA>
## [2] chr14 [107282819, 107282992] - | 192984 <NA>
## [3] chr14 [107281146, 107281249] - | 192983 <NA>
## [4] chr14 [107281126, 107281145] - | 192982 <NA>
## [5] chr14 [107276018, 107276044] - | 192981 <NA>
## ... ... ... ... ... ... ...
## [5061] chr14 [106067906, 106068064] - | 187862 <NA>
## [5062] chr14 [106054457, 106054734] - | 187853 <NA>
## [5063] chr14 [106052986, 106052998] - | 187849 <NA>
## [5064] chr14 [105994262, 105994283] - | 187848 <NA>
## [5065] chr14 [105994256, 105994261] - | 187847 <NA>
## exon_rank
## <integer>
## [1] 1
## [2] 2
## [3] 3
## [4] 4
## [5] 5
## ... ...
## [5061] 5061
## [5062] 5062
## [5063] 5063
## [5064] 5064
## [5065] 5065
##
## -------
## seqinfo: 93 sequences (1 circular) from hg19 genome
There are many neat range-based operations (more later)!
Some detail
What is an experiment?
Why integrate?
Data objects
exprs()
): matrix of expression valuesfeatureData(); fData()
): probeset or gene
identifiersphenoData(); pData()
: data.frame
of relevant
informationexptData()
): Instance of class MIAME
.assay(), assays()
): arbitrary matrix-like objectrowData()
): GRanges
or GRangesList
;
use GRangesList
with names and 0-length elements to represent
assays without ranges.colData()
): DataFrame
of relevant information.exptData()
): List
of arbitrary information.Example: ExpressionSet
(see vignettes in Biobase).
suppressPackageStartupMessages({
library(ALL)
})
data(ALL)
ALL
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 12625 features, 128 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: 01005 01010 ... LAL4 (128 total)
## varLabels: cod diagnosis ... date last seen (21 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## pubMedIds: 14684422 16243790
## Annotation: hgu95av2
## 'Phenotype' (sample) and 'feature' data
head(pData(ALL))
## cod diagnosis sex age BT remission CR date.cr t(4;11) t(9;22)
## 01005 1005 5/21/1997 M 53 B2 CR CR 8/6/1997 FALSE TRUE
## 01010 1010 3/29/2000 M 19 B2 CR CR 6/27/2000 FALSE FALSE
## 03002 3002 6/24/1998 F 52 B4 CR CR 8/17/1998 NA NA
## 04006 4006 7/17/1997 M 38 B1 CR CR 9/8/1997 TRUE FALSE
## 04007 4007 7/22/1997 M 57 B2 CR CR 9/17/1997 FALSE FALSE
## 04008 4008 7/30/1997 M 17 B1 CR CR 9/27/1997 FALSE FALSE
## cyto.normal citog mol.biol fusion protein mdr kinet ccr
## 01005 FALSE t(9;22) BCR/ABL p210 NEG dyploid FALSE
## 01010 FALSE simple alt. NEG <NA> POS dyploid FALSE
## 03002 NA <NA> BCR/ABL p190 NEG dyploid FALSE
## 04006 FALSE t(4;11) ALL1/AF4 <NA> NEG dyploid FALSE
## 04007 FALSE del(6q) NEG <NA> NEG dyploid FALSE
## 04008 FALSE complex alt. NEG <NA> NEG hyperd. FALSE
## relapse transplant f.u date last seen
## 01005 FALSE TRUE BMT / DEATH IN CR <NA>
## 01010 TRUE FALSE REL 8/28/2000
## 03002 TRUE FALSE REL 10/15/1999
## 04006 TRUE FALSE REL 1/23/1998
## 04007 TRUE FALSE REL 11/4/1997
## 04008 TRUE FALSE REL 12/15/1997
head(featureNames(ALL))
## [1] "1000_at" "1001_at" "1002_f_at" "1003_s_at" "1004_at" "1005_at"
## access to pData columns; matrix-like subsetting; exprs()
ALL[, ALL$sex %in% "M"]
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 12625 features, 83 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: 01005 01010 ... 83001 (83 total)
## varLabels: cod diagnosis ... date last seen (21 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## pubMedIds: 14684422 16243790
## Annotation: hgu95av2
range(exprs(ALL))
## [1] 1.984919 14.126571
## 30% 'most variable' features (c.f., genefilter::varFilter)
iqr <- apply(exprs(ALL), 1, IQR)
ALL[iqr > quantile(iqr, 0.7), ]
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 3788 features, 128 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: 01005 01010 ... LAL4 (128 total)
## varLabels: cod diagnosis ... date last seen (21 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## pubMedIds: 14684422 16243790
## Annotation: hgu95av2
Example: SummarizedExperiment
(see vignettes in GenomicRanges).
suppressPackageStartupMessages({
library(airway)
})
data(airway)
airway
## class: SummarizedExperiment
## dim: 64102 8
## exptData(1): ''
## assays(1): counts
## rownames(64102): ENSG00000000003 ENSG00000000005 ... LRG_98 LRG_99
## rowData metadata column names(0):
## colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
## colData names(9): SampleName cell ... Sample BioSample
## column and row data
colData(airway)
## DataFrame with 8 rows and 9 columns
## SampleName cell dex albut Run avgLength
## <factor> <factor> <factor> <factor> <factor> <integer>
## SRR1039508 GSM1275862 N61311 untrt untrt SRR1039508 126
## SRR1039509 GSM1275863 N61311 trt untrt SRR1039509 126
## SRR1039512 GSM1275866 N052611 untrt untrt SRR1039512 126
## SRR1039513 GSM1275867 N052611 trt untrt SRR1039513 87
## SRR1039516 GSM1275870 N080611 untrt untrt SRR1039516 120
## SRR1039517 GSM1275871 N080611 trt untrt SRR1039517 126
## SRR1039520 GSM1275874 N061011 untrt untrt SRR1039520 101
## SRR1039521 GSM1275875 N061011 trt untrt SRR1039521 98
## Experiment Sample BioSample
## <factor> <factor> <factor>
## SRR1039508 SRX384345 SRS508568 SAMN02422669
## SRR1039509 SRX384346 SRS508567 SAMN02422675
## SRR1039512 SRX384349 SRS508571 SAMN02422678
## SRR1039513 SRX384350 SRS508572 SAMN02422670
## SRR1039516 SRX384353 SRS508575 SAMN02422682
## SRR1039517 SRX384354 SRS508576 SAMN02422673
## SRR1039520 SRX384357 SRS508579 SAMN02422683
## SRR1039521 SRX384358 SRS508580 SAMN02422677
rowData(airway)
## GRangesList object of length 64102:
## $ENSG00000000003
## GRanges object with 17 ranges and 2 metadata columns:
## seqnames ranges strand | exon_id exon_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] X [99883667, 99884983] - | 667145 ENSE00001459322
## [2] X [99885756, 99885863] - | 667146 ENSE00000868868
## [3] X [99887482, 99887565] - | 667147 ENSE00000401072
## [4] X [99887538, 99887565] - | 667148 ENSE00001849132
## [5] X [99888402, 99888536] - | 667149 ENSE00003554016
## ... ... ... ... ... ... ...
## [13] X [99890555, 99890743] - | 667156 ENSE00003512331
## [14] X [99891188, 99891686] - | 667158 ENSE00001886883
## [15] X [99891605, 99891803] - | 667159 ENSE00001855382
## [16] X [99891790, 99892101] - | 667160 ENSE00001863395
## [17] X [99894942, 99894988] - | 667161 ENSE00001828996
##
## ...
## <64101 more elements>
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome
## access colData; matrix-like subsetting; assay() / assays()
airway[, airway$dex %in% "trt"]
## class: SummarizedExperiment
## dim: 64102 4
## exptData(1): ''
## assays(1): counts
## rownames(64102): ENSG00000000003 ENSG00000000005 ... LRG_98 LRG_99
## rowData metadata column names(0):
## colnames(4): SRR1039509 SRR1039513 SRR1039517 SRR1039521
## colData names(9): SampleName cell ... Sample BioSample
head(assay(airway))
## SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## ENSG00000000003 679 448 873 408 1138
## ENSG00000000005 0 0 0 0 0
## ENSG00000000419 467 515 621 365 587
## ENSG00000000457 260 211 263 164 245
## ENSG00000000460 60 55 40 35 78
## ENSG00000000938 0 0 2 0 1
## SRR1039517 SRR1039520 SRR1039521
## ENSG00000000003 1047 770 572
## ENSG00000000005 0 0 0
## ENSG00000000419 799 417 508
## ENSG00000000457 331 233 229
## ENSG00000000460 63 76 60
## ENSG00000000938 0 0 0
assays(airway)
## List of length 1
## names(1): counts
## library size
colSums(assay(airway))
## SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516 SRR1039517
## 20637971 18809481 25348649 15163415 24448408 30818215
## SRR1039520 SRR1039521
## 19126151 21164133
hist(rowMeans(log10(assay(airway))))
Calculate the GC content of human chr1 in the hg19 build, excluding regions where the sequence is “N”. You will need to
[[
, chromosome 1 (“chr1”). <!– ]] –>alphabetFrequency()
to calculate the count or frequency
of the nucleotides in chr1library(BSgenome.Hsapiens.UCSC.hg19)
## Loading required package: BSgenome
## Loading required package: rtracklayer
chr1seq <- BSgenome.Hsapiens.UCSC.hg19[["chr1"]]
chr1alf <- alphabetFrequency(chr1seq)
chr1gc <- sum(chr1alf[c("G", "C")]) / sum(chr1alf[c("A", "C", "G", "T")])
Calculate the GC content of 'exome' (approximately, all genic regions) on chr1. You will need to
genes()
to extract genic regions of all genes, then
subsetting operations to restrict to chromosome 1.getSeq,BSgenome-method
to extract sequences from
chromosome 1 of the BSgenome object.alphabetFrequency()
(with the argument collapse=TRUE
–
why?) and standard R operations to extract the gc content of
the genes.library(TxDb.Hsapiens.UCSC.hg19.knownGene)
genes <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
genes1 <- genes[seqnames(genes) %in% "chr1"]
seq1 <- getSeq(BSgenome.Hsapiens.UCSC.hg19, genes1)
alf1 <- alphabetFrequency(seq1, collapse=TRUE)
gc1 <- sum(alf1[c("G", "C")]) / sum(alf1[c("A", "C", "G", "T")])
How does the GC content just calculated compare to the average of
the GC content of each exon? Answer this using
alphabetFrequency()
but with collapse=FALSE)
, and adjust the
calculation of GC content to act on a matrix, rather than
vector. Why are these numbers different?
alf2 <- alphabetFrequency(seq1, collapse=FALSE)
gc2 <- rowSums(alf2[, c("G", "C")]) / rowSums(alf2[,c("A", "C", "G", "T")])
Plot a histogram of per-gene GC content, annotating with
information about chromosome and exome GC content. Use base
graphics hist()
, abline()
, plot(density(...))
,
plot(ecdf(...))
, etc. (one example is below). If this is too
easy, prepare a short presentation for the class illustrating how
to visualize this type of information using another R graphics
package, e.g., ggplot2, {r CRANpkg("ggvis")
, or
{r CRANpkg(“lattice”)}.
plot(density(gc2))
abline(v=c(chr1gc, gc1), col=c("red", "blue"), lwd=2)
This exercise illustrates how integrated containers can be used to effectively manage data; it does NOT represent a suitable way to analyze RNASeq differential expression data.
Load the airway package and airway
data
set. Explore it a litte, e.g., determining its dimensions (number
of regions of interest and samples), the information describing
samples, and the range of values in the count
assay. The data are
from an RNA-seq experiment. The colData()
describe treatment
groups and other information. The assay()
is the (raw) number of
short reads overlapping each region of interest, in each
sample. The solution to this exercise is summarized above.
Create a subset of the data set that contains only the 30% most variable (using IQR as a metric) observations. Plot the distribution of asinh-transformed (a log-like transformation, except near 0) row mean counts
iqr <- apply(assay(airway), 1, IQR)
airway1 <- airway[iqr > quantile(iqr, 0.7),]
plot(density(rowMeans(asinh(assay(airway1)))))
Use the genefilter package rowttests
function
(consult it's help page!) to compare asinh-transformed read counts
between the two dex
treatment groups for each row. Explore the
result in various ways, e.g., finding the 'most' differentially
expressed genes, the genes with largest (absolute) difference
between treatment groups, adding adjusted P values (via
p.adjust()
, in the stats package), etc. Can you obtain the
read counts for each treatment group, for the most differentially
expressed gene?
suppressPackageStartupMessages({
library(genefilter)
})
ttest <- rowttests(asinh(assay(airway1)), airway1$dex)
ttest$p.adj <- p.adjust(ttest$p.value, method="BH")
ttest[head(order(ttest$p.adj)),]
## statistic dm p.value p.adj
## ENSG00000179593 24.61562 5.463536 2.956680e-07 0.005671209
## ENSG00000101342 15.22622 2.697598 5.065386e-06 0.014773935
## ENSG00000101347 15.69846 2.485261 4.233805e-06 0.014773935
## ENSG00000134253 -15.46740 -1.483468 4.619047e-06 0.014773935
## ENSG00000143494 -14.93364 -2.760705 5.675890e-06 0.014773935
## ENSG00000152583 17.03482 3.054983 2.617828e-06 0.014773935
split(assay(airway1)[order(ttest$p.adj)[1], ], airway1$dex)
## $trt
## SRR1039509 SRR1039513 SRR1039517 SRR1039521
## 81 87 129 213
##
## $untrt
## SRR1039508 SRR1039512 SRR1039516 SRR1039520
## 0 0 0 0
Add the statistics of differential expression to the airway1
SummarizedExperiment. Confirm that the statistics have been
added.
mcols(rowData(airway1)) <- ttest
head(mcols(airway1))
## DataFrame with 6 rows and 4 columns
## statistic dm p.value p.adj
## <numeric> <numeric> <numeric> <numeric>
## 1 -1.62895587 -0.38927224 0.15444536 0.6325992
## 2 0.09683305 0.01803387 0.92601249 0.9799343
## 3 -0.67163380 -0.09939038 0.52681603 0.8637800
## 4 -0.81787455 -0.16759915 0.44468651 0.8286889
## 5 0.55526482 0.17003218 0.59878922 0.8865085
## 6 -2.55199111 -0.29224482 0.04337408 0.4154906
Publications (General Bioconductor)
Other