%\VignetteIndexEntry{motifStack Vignette} %\VignetteDepends{motifStack} %\VignetteKeywords{sequence logo} %\VignettePackage{motifStack} \documentclass[12pt]{article} \usepackage{hyperref} \usepackage{url} \usepackage[numbers]{natbib} \usepackage{graphicx} \bibliographystyle{plainnat} \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpackage}[1]{{\textit{#1}}} \author{Jianhong Ou\footnote{jianhong.ou@umassmed.edu}, Lihua Julie Zhu\footnote{Julie.Zhu@umassmed.edu}} \begin{document} \title{motifStack guide} \maketitle \tableofcontents %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Introduction} A sequence logo has been widely used as a graphical representation of an alignment of multiple amino acid or nucleic acid sequences. There is a package seqlogo\cite{Oliver} implemented in R to draw DNA sequence logos. However, it does not support amino acid sequence logos. We have developed motifStack package for drawing sequence logos for protein, DNA and RNA sequences. motifStack provides the flexibility for users to select the font type and symbol colors. Comparing to seqlogo, motifStack has the capability for graphical representation of multiple motifs. \section{Prepare environment} You will need ghostscript: the full path to the executable can be set by the environment variable R\_GSCMD. If this is unset, a GhostScript executable will be searched by name on your path. For example, on a Unix, linux or Mac "gs" is used for searching, and on Windows the setting of the environment variable GSC is used, otherwise commands "gswi64c.exe" then "gswin32c.exe" are tried. Example on Windows: assume that the gswin32c.exe is installed at C:\textbackslash Program Files\textbackslash gs\textbackslash gs9.06\textbackslash bin, then open R and try: \begin{scriptsize} Sys.setenv(R\_GSCMD="\textbackslash"C:\textbackslash\textbackslash Program Files\textbackslash\textbackslash gs\textbackslash\textbackslash gs9.06\textbackslash\textbackslash bin\textbackslash\textbackslash gswin32c.exe\textbackslash"") \end{scriptsize} \section{Examples of using motifStack} \subsection{plot a DNA sequence logo with different fonts and colors} Users can select different fonts and colors to draw the sequence logo. \begin{scriptsize} <>= library(motifStack) pcm <- read.table(file.path(find.package("motifStack"), "extdata", "bin_SOLEXA.pcm")) pcm <- pcm[,3:ncol(pcm)] rownames(pcm) <- c("A","C","G","T") motif <- new("pcm", mat=as.matrix(pcm), name="bin_SOLEXA") ##pfm object #motif <- pcm2pfm(pcm) #motif <- new("pfm", mat=motif, name="bin_SOLEXA") plot(motif) #try a different font plot(motif, font="mono,Courier") #try a different font and a different color group motif@color <- colorset(colorScheme='basepairing') plot(motif,font="Times") @ \end{scriptsize} \begin{figure}[htb] \centering <>= opar<-par(mfrow=c(3,1)) motif@color<-colorset(colorScheme='auto') motif@name="bin_SOLEXA, font='Helvetica', color='auto'" plot(motif) motif@name="bin_SOLEXA, font='mono,Courier', color='auto'" plot(motif, font="mono,Courier") motif@color <- colorset(colorScheme='basepairing') motif@name="bin_SOLEXA, font='mono,Courier', color='basepairing'" plot(motif,font="Times") par<-opar @ \caption{DNA sequence logo} \label{fig:font} \end{figure} \subsection{plot an amino acid sequence logo} Given that motifStack allows to use any letters as symbols, it can also be used to draw amino acid sequence logos. \begin{scriptsize} <>= library(motifStack) protein<-read.table(file.path(find.package("motifStack"),"extdata","cap.txt")) protein<-t(protein[,1:20]) motif<-pcm2pfm(protein) motif<-new("pfm", mat=motif, name="CAP", color=colorset(alphabet="AA",colorScheme="chemistry")) plot(motif) @ \end{scriptsize} \begin{figure}[htb] \centering <>= plot(motif) @ \caption{Amino acid sequence logo} \label{fig:protein} \end{figure} \subsection{plot sequence logo stack} motifStack is designed to show multiple motifs in same canvas. To show the sequence logo stack, the distance of motifs need to be calculated first for example by using MotIV\cite{Eloi2010}::motifDistances, which implemented STAMP\cite{Mahony2007}. After alignment, users can use plotMotifLogoStack function to draw sequence logos stack or use plotMotifLogoStackWithTree function to show the distance tree with the sequence logos stack or use plotMotifStackWithRadialPhylog function to plot sequence logo stack in radial style in the same canvas. There is a shortcut function named as motifStack. Use stack layout to call plotMotifLogoStack, treeview layout to call plotMotifLogoStackWithTree and radialPhylog to call plotMotifStackWithRadialPhylog. \begin{scriptsize} <>= library(motifStack) #####Input##### pcms<-readPCM(file.path(find.package("motifStack"), "extdata"),"pcm$") pcms<-lapply(pcms,function(.ele){.ele<-.ele[,3:ncol(.ele)];rownames(.ele)<-c("A","C","G","T");.ele}) motifs<-lapply(pcms,pcm2pfm) motifs<-lapply(names(motifs), function(.ele, motifs){new("pfm",mat=motifs[[.ele]], name=.ele)},motifs) ##plot stacks motifStack(motifs, layout="stack", ncex=1.0) motifStack(motifs, layout="tree") ###When the number of motifs is too much to be shown in a vertical stack, ###motifStack can draw them in a radial style. library("MotifDb") matrix.fly <- query(MotifDb, "Dmelanogaster") motifs2 <- as.list(matrix.fly) motifs2 <- motifs2[grepl("Dmelanogaster\\-FlyFactorSurvey\\-", names(motifs2))] names(motifs2) <- gsub("Dmelanogaster_FlyFactorSurvey_", "", gsub("_FBgn\\d+$", "", gsub("[^a-zA-Z0-9]","_", gsub("(_\\d+)+$", "", names(motifs2))))) motifs2 <- motifs2[unique(names(motifs2))] pfms <- sample(motifs2, 50) motifs2 <- lapply(names(pfms), function(.ele, pfms){new("pfm",mat=pfms[[.ele]], name=.ele)},pfms) library(RColorBrewer) color <- brewer.pal(12, "Set3") @ \begin{Schunk} \begin{Sinput} >motifStack(motifs2, layout="radialPhylog", + col.bg=rep(color, each=5), col.bg.alpha=0.3, + col.leaves=rep(color, each=5), + col.inner.label.circle=rep(color,5), + col.outer.label.circle=rep(color,5), outer.label.circle.width=0.1, + angle=350) \end{Sinput} \end{Schunk} \end{scriptsize} \begin{figure}[htb] \centering <>= motifStack(motifs, layout="stack", ncex=1.0) @ \caption{sequence logo stack} \label{fig:logostack} \end{figure} \begin{figure}[htb] \centering <>= motifStack(motifs, layout="tree") @ \caption{sequence logo stack with hierarchical cluster tree} \label{fig:treestack} \end{figure} \begin{figure}[htb] \centering <>= motifStack(motifs2, layout="radialPhylog", circle=0.4, cleaves = 0.2, clabel.leaves = 0.5, col.bg=rep(color, each=5), col.bg.alpha=0.3, col.leaves=rep(color, each=5), col.inner.label.circle=rep(color,5), col.outer.label.circle=rep(color,5), outer.label.circle.width=0.1, angle=350) @ \caption{sequence logo stack in radial style} \label{fig:radialstack} \end{figure} \subsection{plot a sequence logo cloud} We can also plot a sequence logo cloud for DNA sequence logo. \begin{scriptsize} <>= groups <- rep(paste("group",1:5,sep=""), each=10) names(groups) <- names(pfms) group.col <- brewer.pal(5, "Set3") names(group.col)<-paste("group",1:5,sep="") jaspar.scores <- MotIV::readDBScores(file.path(find.package("MotIV"), "extdata", "jaspar2010_PCC_SWU.scores")) d <- MotIV::motifDistances(pfms) hc <- MotIV::motifHclust(d) phylog <- hclust2phylog(hc) leaves <- names(phylog$leaves) pfms <- pfms[leaves] pfms <- lapply(names(pfms), function(.ele, pfms){new("pfm",mat=pfms[[.ele]], name=.ele)},pfms) motifSig <- motifSignature(pfms, phylog, groupDistance=0.1) @ \begin{Schunk} \begin{Sinput} > motifCloud(motifSig, scale=c(6, .5), + layout="rectangles", group.col=group.col, groups=groups, draw.legend=T) \end{Sinput} \end{Schunk} \end{scriptsize} \begin{figure}[htb] \centering <>= motifCloud(motifSig, scale=c(6, .5), layout="rectangles", group.col=group.col, groups=groups, draw.legend=T) @ \caption{a sequence logo cloud with rectangle packing layout} \label{fig:motifCloud} \end{figure} \section{References} \begin{thebibliography}{99} \bibitem[Oliver Bembom ()]{Oliver} seqLogo: Sequence logos for DNA sequence alignments. R package version 1.22.0. \bibitem[Eloi et al. (2010)]{Eloi2010} MotIV: Motif Identification and Validation. Eloi Mercier and Raphael Gottardo (2010). R package version 1.10.0. \bibitem[Mahony et al. (2007)]{Mahony2007} STAMP: a web tool for exploring DNA-binding motif similarities. Mahony S, Benos PV, Nucleic Acids Res. 2007, 35(Web Server issue): W253-W258. \end{thebibliography} \section{Session Info} <<>>= sessionInfo() @ \end{document}