Mark D. Robinson, University of Zurich
31.07.2014
voom
(10 min)diffSplice
NEWvoom
and/or diffSplice
library("edgeR")
rm(list=ls())
Load in count table
setwd("/data/RNA_SEQ_edgeR_voom_featureCounts/")
load("ds1.Rdata")
setwd("~")
ls()
[1] "counts"
head(counts[,1:7],3)
ES.07985 DE.07981 GT.66339 FG.08004 PE.07980 FE.66350
ENSG00000254468 0 0 0 1 0 0
ENSG00000177951 44 50 24 37 38 41
ENSG00000188076 0 0 0 0 0 0
ES.66342
ENSG00000254468 1
ENSG00000177951 162
ENSG00000188076 0
dim(counts)
[1] 30727 27
grp <- as.factor(substr(colnames(counts),1,2))
table(grp)
grp
DE ES FE FG GT IS PE PH
3 3 4 3 3 2 6 3
setwd("/data/RNA_SEQ_edgeR_voom_featureCounts/")
load("ds2.Rdata")
setwd("~")
ls()
head(counts,3)
class(counts)
md
class(md)
grp <- md$condition
o <- order(grp)
pairs(log2(1+counts[,o[1:7]]), pch=".",lower.panel=NULL)
d <- DGEList(counts=counts, group=grp)
d <- calcNormFactors(d)
d$samples
group lib.size norm.factors
ES.07985 ES 769440 0.9721
DE.07981 DE 947798 1.0560
GT.66339 GT 548227 0.8668
FG.08004 FG 726088 1.2161
PE.07980 PE 688903 1.0602
FE.66350 FE 521573 1.1357
ES.66342 ES 2281754 0.9036
DE.66333 DE 2834208 0.9781
GT.66341 GT 1596550 0.7663
FG.66346 FG 2070020 1.1650
PE.66344 PE 1978504 1.0385
FE.66331 FE 1517276 1.0618
PH.66332 PH 840380 1.3760
PH.66336 PH 852548 1.3735
PE.66345 PE 915042 1.2394
PE.66330 PE 938173 1.2296
FE.66348 FE 629687 1.1131
FE.66334 FE 647313 1.1129
ES.66335 ES 2048439 0.7027
DE.66349 DE 1843746 0.7489
GT.66337 GT 1761089 0.8636
FG.66351 FG 1417106 0.9454
PE.66338 PE 2317354 0.8958
PH.66340 PH 4087566 1.1389
PE.66329 PE 3700620 1.0793
IS.66347 IS 12583285 0.5621
IS.66343 IS 33286768 0.9297
dim(d)
[1] 30727 27
cps <- cpm(d)
k <- rowSums(cps>=1)>2
d <- d[k,]
dim(d)
[1] 21707 27
cols <- as.numeric(d$samples$group)
plotMDS(d,col=cols)
par(mfrow=c(2,2))
plotMDS(d,col=cols, main="500 / lLFC")
plotMDS(d,col=cols, method="bcv", main="500 / BCV")
plotMDS(d,col=cols, top=2000, main="2000 / lLFC")
plotMDS(d,col=cols, top=2000, method="bcv", main="2000 / BCV")
#mm <- model.matrix(~group,data=d$samples)
mm <- model.matrix(~-1+grp)
mm
grpDE grpES grpFE grpFG grpGT grpIS grpPE grpPH
1 0 1 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0
3 0 0 0 0 1 0 0 0
4 0 0 0 1 0 0 0 0
5 0 0 0 0 0 0 1 0
6 0 0 1 0 0 0 0 0
7 0 1 0 0 0 0 0 0
8 1 0 0 0 0 0 0 0
9 0 0 0 0 1 0 0 0
10 0 0 0 1 0 0 0 0
11 0 0 0 0 0 0 1 0
12 0 0 1 0 0 0 0 0
13 0 0 0 0 0 0 0 1
14 0 0 0 0 0 0 0 1
15 0 0 0 0 0 0 1 0
16 0 0 0 0 0 0 1 0
17 0 0 1 0 0 0 0 0
18 0 0 1 0 0 0 0 0
19 0 1 0 0 0 0 0 0
20 1 0 0 0 0 0 0 0
21 0 0 0 0 1 0 0 0
22 0 0 0 1 0 0 0 0
23 0 0 0 0 0 0 1 0
24 0 0 0 0 0 0 0 1
25 0 0 0 0 0 0 1 0
26 0 0 0 0 0 1 0 0
27 0 0 0 0 0 1 0 0
attr(,"assign")
[1] 1 1 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$grp
[1] "contr.treatment"
d <- estimateGLMCommonDisp(d,mm)
d <- estimateGLMTrendedDisp(d,mm)
d <- estimateGLMTagwiseDisp(d,mm)
par(mfrow=c(1,1))
plotBCV(d)
plotMeanVar(d,show.raw=TRUE, show.tagwise=TRUE, show.binned=TRUE)
par(mfrow=c(1,2))
plotSmear(d, pair=c("ES","DE"), ylim=c(-5,5))
plotSmear(d, pair=c("DE","GT"), ylim=c(-5,5))
f <- glmFit(d,mm)
con <- makeContrasts("DE-ES"=grpDE-grpES,levels=colnames(mm))
con
Contrasts
Levels DE-ES
grpDE 1
grpES -1
grpFE 0
grpFG 0
grpGT 0
grpIS 0
grpPE 0
grpPH 0
lrt <- glmLRT(f,contrast=con)
topTags(lrt,20)
Coefficient: 1*grpDE -1*grpES
logFC logCPM LR PValue FDR
ENSG00000095596 8.532 6.658 295.5 3.189e-66 6.923e-62
ENSG00000076356 7.978 7.297 280.2 6.962e-63 7.556e-59
ENSG00000164292 10.583 6.440 262.0 6.295e-59 4.555e-55
ENSG00000185823 7.349 5.685 241.4 1.936e-54 1.051e-50
ENSG00000125798 9.891 8.044 227.5 2.074e-51 9.003e-48
ENSG00000132130 11.314 6.398 223.1 1.880e-50 6.801e-47
ENSG00000126005 -4.501 6.464 218.4 2.059e-49 6.384e-46
ENSG00000104332 4.707 6.784 217.4 3.278e-49 8.895e-46
ENSG00000158815 14.932 7.295 202.7 5.407e-46 1.304e-42
ENSG00000119242 6.266 6.655 198.9 3.606e-45 7.828e-42
ENSG00000182798 5.062 7.548 185.1 3.694e-42 7.289e-39
ENSG00000266283 11.071 8.362 181.4 2.338e-41 4.230e-38
ENSG00000120875 7.807 5.529 178.2 1.193e-40 1.991e-37
ENSG00000110799 7.639 4.880 177.5 1.693e-40 2.624e-37
ENSG00000121966 7.529 5.850 174.7 6.890e-40 9.971e-37
ENSG00000185155 6.432 4.827 171.7 3.206e-39 4.349e-36
ENSG00000164736 12.848 5.288 171.5 3.560e-39 4.546e-36
ENSG00000124942 6.854 7.487 170.6 5.400e-39 6.512e-36
ENSG00000141448 13.186 6.579 166.0 5.478e-38 6.259e-35
ENSG00000110148 6.790 6.114 162.3 3.517e-37 3.817e-34
cps <- cpm(d)
o <- order(colnames(counts))
barplot( cps["ENSG00000095596",o], col=cols[o], las=2)
grp <- relevel(grp, ref="ES")
mm1 <- model.matrix(~grp)
mm1
(Intercept) grpDE grpFE grpFG grpGT grpIS grpPE grpPH
1 1 0 0 0 0 0 0 0
2 1 1 0 0 0 0 0 0
3 1 0 0 0 1 0 0 0
4 1 0 0 1 0 0 0 0
5 1 0 0 0 0 0 1 0
6 1 0 1 0 0 0 0 0
7 1 0 0 0 0 0 0 0
8 1 1 0 0 0 0 0 0
9 1 0 0 0 1 0 0 0
10 1 0 0 1 0 0 0 0
11 1 0 0 0 0 0 1 0
12 1 0 1 0 0 0 0 0
13 1 0 0 0 0 0 0 1
14 1 0 0 0 0 0 0 1
15 1 0 0 0 0 0 1 0
16 1 0 0 0 0 0 1 0
17 1 0 1 0 0 0 0 0
18 1 0 1 0 0 0 0 0
19 1 0 0 0 0 0 0 0
20 1 1 0 0 0 0 0 0
21 1 0 0 0 1 0 0 0
22 1 0 0 1 0 0 0 0
23 1 0 0 0 0 0 1 0
24 1 0 0 0 0 0 0 1
25 1 0 0 0 0 0 1 0
26 1 0 0 0 0 1 0 0
27 1 0 0 0 0 1 0 0
attr(,"assign")
[1] 0 1 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$grp
[1] "contr.treatment"
f1 <- glmFit(d,mm1)
lrt1 <- glmLRT(f1,coef=2)
topTags(lrt1,10)
Coefficient: grpDE
logFC logCPM LR PValue FDR
ENSG00000095596 8.532 6.658 295.5 3.189e-66 6.923e-62
ENSG00000076356 7.978 7.297 280.2 6.962e-63 7.556e-59
ENSG00000164292 10.583 6.440 262.0 6.295e-59 4.555e-55
ENSG00000185823 7.349 5.685 241.4 1.936e-54 1.051e-50
ENSG00000125798 9.891 8.044 227.5 2.074e-51 9.003e-48
ENSG00000132130 11.314 6.398 223.1 1.880e-50 6.801e-47
ENSG00000126005 -4.501 6.464 218.4 2.059e-49 6.384e-46
ENSG00000104332 4.707 6.784 217.4 3.278e-49 8.895e-46
ENSG00000158815 14.932 7.295 202.7 5.407e-46 1.304e-42
ENSG00000119242 6.266 6.655 198.9 3.606e-45 7.828e-42
lrt2 <- glmLRT(f1,coef=2:8)
topTags(lrt2,10)
Coefficient: grpDE grpFE grpFG grpGT grpIS grpPE grpPH
logFC.grpDE logFC.grpFE logFC.grpFG logFC.grpGT
ENSG00000095596 8.532 -5.6090 5.4212 -2.5693
ENSG00000185823 7.349 0.1045 -0.9652 -2.8885
ENSG00000182798 5.062 -5.3870 -4.4118 -3.0604
ENSG00000215866 4.090 -6.3872 -2.8365 -1.1766
ENSG00000259048 -3.460 -10.6658 -8.4672 -9.7041
ENSG00000164265 -3.231 -11.3976 -11.2619 -8.5869
ENSG00000235590 -3.119 4.1737 -7.4417 -8.1857
ENSG00000143768 2.774 -7.0026 -10.9438 -8.6882
ENSG00000121351 -2.694 11.3601 0.7422 0.3227
ENSG00000163827 1.087 -10.0383 -14.3370 -5.8485
logFC.grpIS logFC.grpPE logFC.grpPH logCPM LR PValue FDR
ENSG00000095596 -4.154 -2.7383 -3.531 6.658 1830 0 0
ENSG00000185823 -3.113 -2.6988 -3.802 5.685 1560 0 0
ENSG00000182798 -7.625 -5.1707 -4.987 7.548 2125 0 0
ENSG00000215866 -8.597 -4.7484 -7.145 5.853 1597 0 0
ENSG00000259048 -14.006 -11.5784 -15.724 8.177 2027 0 0
ENSG00000164265 -12.015 -11.0028 -14.349 8.895 2002 0 0
ENSG00000235590 2.561 -7.3002 -8.194 9.283 1708 0 0
ENSG00000143768 -11.428 -7.6863 -10.603 8.844 1692 0 0
ENSG00000121351 11.013 -0.3279 1.671 7.255 1782 0 0
ENSG00000163827 -8.040 -10.6184 -14.337 8.317 2023 0 0
par(mfrow=c(1,3))
barplot( cps["ENSG00000182798",o], col=cols[o], las=2)
barplot( cps["ENSG00000164736",o], col=cols[o], las=2, main="SOX17")
barplot( cps["ENSG00000204531",o], col=cols[o], las=2, main="OCT4")
d1 <- d
d1$genes <- data.frame(ensid=rownames(d$counts),
round(cps,1)) # add extra columns to output
f3 <- glmFit(d1,mm1)
lrt3 <- glmLRT(f3,coef=2)
tt <- topTags(lrt3,n=Inf)$table
write.table(tt, file="LRT3.xls",row.names=FALSE, sep="\t", quote=FALSE)
topTags
qCount()
in QuasRcountOverlaps()
easyRNASeq()
library("parallel")
detectCores()
[1] 4
anno <- dir(,".gtf$")
anno
[1] "Drosophila_melanogaster.BDGP5.75.gtf"
f <- dir(,".bam$")
f
[1] "GSM461178_s.bam" "GSM461179_s.bam"
library("Rsubread")
fcs <- featureCounts(f[2], annot.ext=anno,nthreads=4,
isGTFAnnotationFile=TRUE)
#names(fcs)
#head(fcs$counts,3)
library("Rsubread")
fcp <- featureCounts(f[1], annot.ext=anno,
isGTFAnnotationFile=TRUE,
nthreads=4, isPairedEnd=TRUE))
wget
sf <- system.file("extdata", package = "pasillaBamSubset")
fs <- dir(sf,".bam$",full=TRUE)
fs
[1] "/Users/mark/Library/R/3.1/library/pasillaBamSubset/extdata/untreated1_chr4.bam"
[2] "/Users/mark/Library/R/3.1/library/pasillaBamSubset/extdata/untreated3_chr4.bam"
v <- voom(d, mm, plot=TRUE)
vf <- lmFit(v,mm) # 'mm' defined above
cf <- contrasts.fit(vf,con) # 'con' defined above
cf <- eBayes(cf)
topTable(cf)
logFC AveExpr t P.Value adj.P.Val B
ENSG00000104332 4.682 5.3343 16.82 4.643e-17 1.008e-12 28.60
ENSG00000182798 5.049 2.3421 16.02 1.802e-16 1.781e-12 27.44
ENSG00000126005 -4.485 5.7958 -15.47 4.669e-16 1.781e-12 26.29
ENSG00000147869 7.395 1.6008 15.35 5.733e-16 1.781e-12 26.23
ENSG00000076356 7.788 5.7036 15.35 5.745e-16 1.781e-12 25.22
ENSG00000158815 11.806 1.6305 15.55 4.016e-16 1.781e-12 24.92
ENSG00000164292 9.515 5.0669 15.50 4.418e-16 1.781e-12 24.89
ENSG00000164736 9.722 0.6449 15.09 9.219e-16 2.501e-12 24.24
ENSG00000003056 -3.454 8.4616 -13.94 7.795e-15 1.538e-11 23.84
ENSG00000125798 8.925 7.0947 14.11 5.645e-15 1.361e-11 22.79
rm(list=ls())
load("ds3.Rdata")
head(ecounts,3)
GSM461176 GSM461177 GSM461178 GSM461179 GSM461180 GSM461181 GSM461182
1 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0
3 0 1 1 2 0 0 2
head(gid)
[1] "FBgn0000003" "FBgn0000008" "FBgn0000008" "FBgn0000008" "FBgn0000008"
[6] "FBgn0000008"
head(eid)
[1] "FBgn0000003:001" "FBgn0000008:001" "FBgn0000008:002" "FBgn0000008:003"
[5] "FBgn0000008:004" "FBgn0000008:005"
md
treatment libtype sampleid
1 Untreated SINGLE GSM461176
2 Untreated PAIRED GSM461177
3 Untreated PAIRED GSM461178
4 CG8144_RNAi SINGLE GSM461179
5 CG8144_RNAi PAIRED GSM461180
6 CG8144_RNAi PAIRED GSM461181
7 Untreated SINGLE GSM461182
dx <- DGEList(ecounts,group=md$treatment)
dx <- calcNormFactors(dx)
dx$samples
group lib.size norm.factors
GSM461176 Untreated 21094780 0.9718
GSM461177 Untreated 11177434 1.0391
GSM461178 Untreated 12641679 0.9555
GSM461179 CG8144_RNAi 19308849 1.0127
GSM461180 CG8144_RNAi 12073251 1.0061
GSM461181 CG8144_RNAi 13828766 1.0070
GSM461182 Untreated 14923812 1.0101
xmm <- model.matrix(~libtype+treatment,data=md)
xmm
(Intercept) libtypeSINGLE treatmentUntreated
1 1 1 1
2 1 0 1
3 1 0 1
4 1 1 0
5 1 0 0
6 1 0 0
7 1 1 1
attr(,"assign")
[1] 0 1 2
attr(,"contrasts")
attr(,"contrasts")$libtype
[1] "contr.treatment"
attr(,"contrasts")$treatment
[1] "contr.treatment"
vx <- voom(dx,xmm,plot=TRUE)
fx <- lmFit(vx,xmm)
ex <- diffSplice(fx,geneid=gid,exonid=eid)
Total number of exons: 77026
Total number of genes: 15369
Number of genes with 1 exon: 3153
Mean number of exons in a gene: 5
Max number of exons in a gene: 115
topSplice(ex)
ExonID GeneID logFC t P.Value FDR
6991 FBgn0005630:021 FBgn0005630 -2.458 -13.184 1.190e-26 8.789e-22
33534 FBgn0034072:009 FBgn0034072 -2.462 -16.048 1.643e-24 6.069e-20
8369 FBgn0010909:013 FBgn0010909 2.244 12.834 6.892e-24 1.697e-19
69604 FBgn0261451:037 FBgn0261451 -6.051 -11.108 2.261e-23 4.176e-19
74391 FBgn0263289:042 FBgn0263289 2.204 11.491 3.187e-23 4.709e-19
69599 FBgn0261451:032 FBgn0261451 -3.435 -10.858 1.418e-22 1.723e-18
69603 FBgn0261451:036 FBgn0261451 -5.717 -10.839 1.633e-22 1.723e-18
9891 FBgn0013733:022 FBgn0013733 1.618 10.473 2.162e-20 1.997e-16
69600 FBgn0261451:033 FBgn0261451 -4.072 -9.944 1.035e-19 8.496e-16
71099 FBgn0261885:005 FBgn0261885 -3.129 -11.854 8.533e-19 6.304e-15
par(mfrow=c(1,2))
plotSplice(ex, geneid="FBgn0005630")
plotSplice(ex, geneid="FBgn0034072")