library(Biostrings)
dna <- DNAStringSet(c("AAACTG", "CCCTTCAAC", "TACGAA"))
dna
## A DNAStringSet instance of length 3
## width seq
## [1] 6 AAACTG
## [2] 9 CCCTTCAAC
## [3] 6 TACGAA
length(dna)
## [1] 3
dna[c(1, 3, 1)]
## A DNAStringSet instance of length 3
## width seq
## [1] 6 AAACTG
## [2] 6 TACGAA
## [3] 6 AAACTG
width(dna)
## [1] 6 9 6
reverseComplement(dna)
## A DNAStringSet instance of length 3
## width seq
## [1] 6 CAGTTT
## [2] 9 GTTGAAGGG
## [3] 6 TTCGTA
Biostrings themes
length()
, [
, …reverseComplement()
Help!
methods(class="DNAStringSet")
?"DNAStringSet"
browseVignettes(package="Biostrings")
library(GenomicRanges)
gr <- GRanges(c("chr1:100-120", "chr1:115-130", "chr2:200-220"))
gr
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [100, 120] *
## [2] chr1 [115, 130] *
## [3] chr2 [200, 220] *
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
shift(gr, 1)
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [101, 121] *
## [2] chr1 [116, 131] *
## [3] chr2 [201, 221] *
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
reduce(gr)
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [100, 130] *
## [2] chr2 [200, 220] *
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
anno <- GRanges(c("chr1:110-150", "chr2:150-210"))
countOverlaps(anno, gr)
## [1] 2 1
rtracklayer::import.bed()
and HelloRanges)length()
, [
, etc.Intra-range operations
shift()
, narrow()
, flank()
Inter-range operations
reduce()
, coverage()
, gaps()
, disjoin()
Between-range operations
countOverlaps()
, findOverlaps()
, summarizeOverlaps()
Help!
methods(class="GRanges")
methods(class="GRangesList")
?"GRanges"
?"GRangesList"
browseVignettes(package="GenomicRanges")
Component parts
counts <- read.csv("lecture-02-data/airway_counts.csv", row.names=1)
counts <- as.matrix(counts)
head(counts, 3)
## SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## ENSG00000000003 679 448 873 408 1138
## ENSG00000000419 467 515 621 365 587
## ENSG00000000457 260 211 263 164 245
## SRR1039517 SRR1039520 SRR1039521
## ENSG00000000003 1047 770 572
## ENSG00000000419 799 417 508
## ENSG00000000457 331 233 229
colData <- read.csv("lecture-02-data/airway_colData.csv", row.names=1)
colData[, 1:4]
## SampleName cell dex albut
## SRR1039508 GSM1275862 N61311 untrt untrt
## SRR1039509 GSM1275863 N61311 trt untrt
## SRR1039512 GSM1275866 N052611 untrt untrt
## SRR1039513 GSM1275867 N052611 trt untrt
## SRR1039516 GSM1275870 N080611 untrt untrt
## SRR1039517 GSM1275871 N080611 trt untrt
## SRR1039520 GSM1275874 N061011 untrt untrt
## SRR1039521 GSM1275875 N061011 trt untrt
rowRanges <- readRDS("lecture-02-data/airway_rowRanges.rds")
rowRanges
## GRangesList object of length 33469:
## $ENSG00000000003
## GRanges object with 17 ranges and 2 metadata columns:
## seqnames ranges strand | exon_id exon_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] X [99883667, 99884983] - | 667145 ENSE00001459322
## [2] X [99885756, 99885863] - | 667146 ENSE00000868868
## [3] X [99887482, 99887565] - | 667147 ENSE00000401072
## [4] X [99887538, 99887565] - | 667148 ENSE00001849132
## [5] X [99888402, 99888536] - | 667149 ENSE00003554016
## ... ... ... ... . ... ...
## [13] X [99890555, 99890743] - | 667156 ENSE00003512331
## [14] X [99891188, 99891686] - | 667158 ENSE00001886883
## [15] X [99891605, 99891803] - | 667159 ENSE00001855382
## [16] X [99891790, 99892101] - | 667160 ENSE00001863395
## [17] X [99894942, 99894988] - | 667161 ENSE00001828996
##
## ...
## <33468 more elements>
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome
Could manipulate independently…
cidx <- colData$dex == "trt"
plot(
rowMeans(1 + counts[, cidx]) ~ rowMeans(1 + counts[, !cidx]),
log="xy"
)
colData
rows had been re-ordered?Solution: coordinate in a single object – SummarizedExperiment
library(SummarizedExperiment)
se <- SummarizedExperiment(counts, rowRanges = rowRanges, colData = colData)
cidx <- se$dex == "trt"
plot(
rowMeans(1 + assay(se)[, cidx]) ~ rowMeans(1 + assay(se)[, !cidx]),
log="xy"
)
dim()
, two-dimensional [
, …assay()
, rowData()
/ rowRanges()
, colData()
, …Help!
methods(class="SummarizedExperiment")
?"SummarizedExperiment"
browseVignettes(package="SummarizedExperiment")
More than 1300 ‘software’ packages
Discovery and use, e.g., DESeq2