\name{BSgenome-utils} \alias{BSgenome-utils} \alias{matchPWM,BSgenome-method} \alias{countPWM,BSgenome-method} \alias{vmatchPattern,BSgenome-method} \alias{vcountPattern,BSgenome-method} \title{BSgenome utilities} \description{ Utilities for BSgenome objects. } \usage{ \S4method{matchPWM}{BSgenome}(pwm, subject, min.score = "80\%", exclude = "") \S4method{countPWM}{BSgenome}(pwm, subject, min.score = "80\%", exclude = "") \S4method{vmatchPattern}{BSgenome}(pattern, subject, algorithm="auto", max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, exclude = "") \S4method{vcountPattern}{BSgenome}(pattern, subject, algorithm="auto", max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE, exclude = "") } \arguments{ \item{pwm}{ A numeric matrix with row names A, C, G and T representing a Position Weight Matrix. } \item{pattern}{ The pattern string. } \item{subject}{ A \link{BSgenome} object containing the subject sequences. } \item{min.score}{ The minimum score for counting a match. Can be given as a character string containing a percentage (e.g. \code{"85\%"}) of the highest possible score or as a single number. } \item{algorithm}{ One of the following: \code{"auto"}, \code{"naive-exact"}, \code{"naive-inexact"}, \code{"boyer-moore"}, \code{"shift-or"} or \code{"indels"}. } \item{max.mismatch, min.mismatch}{ The maximum and minimum number of mismatching letters allowed (see \code{?`\link[Biostrings]{lowlevel-matching}`} for the details). If non-zero, an inexact matching algorithm is used. } \item{with.indels}{ If \code{TRUE} then indels are allowed. In that case, \code{min.mismatch} must be \code{0} and \code{max.mismatch} is interpreted as the maximum "edit distance" allowed between the pattern and a match. Note that in order to avoid pollution by redundant matches, only the "best local matches" are returned. Roughly speaking, a "best local match" is a match that is locally both the closest (to the pattern P) and the shortest. More precisely, a substring S' of the subject S is a "best local match" iff: \preformatted{ (a) nedit(P, S') <= max.mismatch (b) for every substring S1 of S': nedit(P, S1) > nedit(P, S') (c) for every substring S2 of S that contains S': nedit(P, S2) <= nedit(P, S') } One nice property of "best local matches" is that their first and last letters are guaranteed to be aligned with letters in P (i.e. they match letters in P). } \item{fixed}{ If \code{FALSE} then IUPAC extended letters are interpreted as ambiguities (see \code{?`\link[Biostrings]{lowlevel-matching}`} for the details). } \item{exclude}{ A character vector with strings that will be used to filter out chromosomes whose names match these strings. } } \value{ A \link[IRanges]{RangedData} object for \code{matchPWM} and \code{vmatchPattern} with two values columns: strand (factor) and string (DNAStringSet). A data.frame object for \code{countPWM} and \code{vcountPattern} with columns three columns: seqname (factor), strand (factor), and count (integer). } \author{P. Aboyoun} \seealso{ \code{\link[Biostrings]{matchPWM}}, \code{\link[Biostrings]{matchPattern}}, \code{\link[BSgenome]{bsapply}} } \examples{ library(BSgenome.Celegans.UCSC.ce2) data(HNF4alpha) pwm <- PWM(HNF4alpha) matchPWM(pwm, Celegans) countPWM(pwm, Celegans) pattern <- consensusString(HNF4alpha) vmatchPattern(pattern, Celegans, fixed = "subject") vcountPattern(pattern, Celegans, fixed = "subject") } \keyword{methods} \keyword{utilities}