\name{complexity.dust} \docType{methods} \alias{complexity.dust} \alias{complexity.dust,DNAStringSet-method} \alias{complexity.dust,ShortRead-method} \alias{complexity.dust,SFFContainer-method} \title{Sequence Complexity Using The DUST Algorithm} \description{ This function evaluates the sequence complexity using the DUST algorithm. } \usage{ complexity.dust(object, xlab="Complexity score (0=high, 100=low)", ylab="Number of sequences", xlim=c(0, 100), col="firebrick1", breaks=100, ...) } \arguments{ \item{object}{An object of class \link{DNAStringSet}, \link{ShortRead} or \link{SFFContainer}.} \item{xlab}{The X axis label.} \item{ylab}{The Y axis label.} \item{xlim}{The limits of the X axis.} \item{col}{The plotting color.} \item{breaks}{The number of breaks in the histogram (see \sQuote{hist}).} \item{\dots}{Arguments to be passed to methods, such as graphical parameters (see \sQuote{par}).} } \details{ The complexity score is based on how often different trinucleotides occur and is scaled between 0 and 100. A sequence of homopolymer repeats (e.g. TTTTTTTTTT) has a score of 100, of dinucleotide repeats (e.g. TATATATATA) has a score around 49, and of trinucleotide repeats (e.g. TAGTAGTAG) has a score around 32. Scores above seven can be considered low-complexity. } \value{ A numeric vector containing the complexity score for each sequence. } \references{ Schmieder R. (2011) Quality control and preprocessing of metagenomic datasets. \emph{Bioinformatics}, 2011 Mar 15;27(6):863-4. } \author{ Christian Ruckert } \keyword{methods}