\name{trimLRPatterns} \alias{trimLRPatterns} \alias{trimLRPatterns,XString-method} \alias{trimLRPatterns,XStringSet-method} \title{Trim Flanking Patterns from Sequences} \description{ The \code{trimLRPatterns} function trims left and/or right flanking patterns from sequences. } \usage{ trimLRPatterns(Lpattern = "", Rpattern = "", subject, max.Lmismatch = 0, max.Rmismatch = 0, with.Lindels = FALSE, with.Rindels = FALSE, Lfixed = TRUE, Rfixed = TRUE, ranges = FALSE) } \arguments{ \item{Lpattern}{ The left part of the pattern. } \item{Rpattern}{ The right part of the pattern. } \item{subject}{ An \link{XString} or \link{XStringSet} object containing the target sequence(s). } \item{max.Lmismatch}{ Either an integer vector of length \code{nLp = nchar(Lpattern)} whose elements \code{max.Lmismatch[i]} represent the maximum number of acceptable mismatching letters when aligning \code{substring(Lpattern, nLp - i + 1, nLp)} with \code{substring(subject, 1, i)} or a single numeric value in \code{(0, 1)} that represents a constant maximum mismatch rate for each of the \code{nL} alignments. Negative numbers in integer vector inputs are used to prevent trimming at the i-th location. If an integer vector input has \code{length(max.Lmismatch) < nLp}, then \code{max.Lmismatch} will be augmented with enough -1's at the beginning of the vector to bring it up to length \code{nLp}. If non-zero, an inexact matching algorithm is used (see the \code{\link{matchPattern}} function for more information). } \item{max.Rmismatch}{ Either an integer vector of length \code{nRp = nchar(Rpattern)} whose elements \code{max.Rmismatch[i]} represent the maximum number of acceptable mismatching letters when aligning \code{substring(Rpattern, nRp - i + 1, nRp)} with \code{substring(subject, 1, i)} or a single numeric value in \code{(0, 1)} that represents a constant maximum mismatch rate for each of the \code{nR} alignments. Negative numbers in integer vector inputs are used to prevent trimming at the i-th location. If an integer vector input has \code{length(max.Rmismatch) < nRp}, then \code{max.Rmismatch} will be augmented with enough -1's at the beginning of the vector to bring it up to length \code{nRp}. If non-zero, an inexact matching algorithm is used (see the \code{\link{matchPattern}} function for more information). } \item{with.Lindels}{ If \code{TRUE} then indels are allowed in the left part of the pattern. In that case \code{max.Lmismatch} is interpreted as the maximum "edit distance" allowed in the left part of the pattern. See the \code{with.indels} argument of the \code{\link{matchPattern}} function for more information. } \item{with.Rindels}{ Same as \code{with.Lindels} but for the right part of the pattern. } \item{Lfixed}{ Only with a \link{DNAString} or \link{RNAString} subject can a \code{Lfixed} value other than the default (\code{TRUE}) be used. With \code{Lfixed=FALSE}, ambiguities (i.e. letters from the IUPAC Extended Genetic Alphabet (see \code{\link{IUPAC_CODE_MAP}}) that are not from the base alphabet) in the left pattern \_and\_ in the subject are interpreted as wildcards i.e. they match any letter that they stand for. See the \code{fixed} argument of the \code{\link{matchPattern}} function for more information. } \item{Rfixed}{ Same as \code{Lfixed} but for the right part of the pattern. } \item{ranges}{ If \code{TRUE}, then return the ranges to use to trim \code{subject}. If \code{FALSE}, then returned the trimmed \code{subject}. } } \value{ A new \link{XString} or \link{XStringSet} object with the flanking patterns within the specified edit distances removed. } \author{P. Aboyoun} \seealso{ \code{\link{matchPattern}}, \code{\link{matchLRPatterns}}, \link{match-utils}, \link{XString-class}, \link{XStringSet-class} } \examples{ Lpattern <- "TTCTGCTTG" Rpattern <- "GATCGGAAG" subject <- DNAString("TTCTGCTTGACGTGATCGGA") subjectSet <- DNAStringSet(c("TGCTTGACGGCAGATCGG", "TTCTGCTTGGATCGGAAG")) ## Only allow for perfect matches on the flanks trimLRPatterns(Lpattern = Lpattern, subject = subject) trimLRPatterns(Rpattern = Rpattern, subject = subject) trimLRPatterns(Lpattern = Lpattern, Rpattern = Rpattern, subject = subjectSet) ## Allow for perfect matches on the flanking overlaps trimLRPatterns(Lpattern = Lpattern, Rpattern = Rpattern, subject = subjectSet, max.Lmismatch = rep(0, 9), max.Rmismatch = rep(0, 9)) ## Allow for mismatches on the flanks trimLRPatterns(Lpattern = Lpattern, Rpattern = Rpattern, subject = subject, max.Lmismatch = 0.2, max.Rmismatch = 0.2) maxMismatches <- as.integer(0.2 * 1:9) maxMismatches trimLRPatterns(Lpattern = Lpattern, Rpattern = Rpattern, subject = subjectSet, max.Lmismatch = maxMismatches, max.Rmismatch = maxMismatches) ## Produce ranges that can be an input into other functions trimLRPatterns(Lpattern = Lpattern, Rpattern = Rpattern, subject = subjectSet, max.Lmismatch = rep(0, 9), max.Rmismatch = rep(0, 9), ranges = TRUE) trimLRPatterns(Lpattern = Lpattern, Rpattern = Rpattern, subject = subject, max.Lmismatch = 0.2, max.Rmismatch = 0.2, ranges = TRUE) } \keyword{methods}