\section{Image segmentation and feature extraction}

\subsection{Preliminaries}

Load the \Bioconductor{} package \Biocexptpkg{HD2013SGI}.
<<imageProcessing1>>=
library("HD2013SGI")
dir.create(file.path("result","Figures"),recursive=TRUE,showWarnings=FALSE)
@

We adapted image segmentation and feature extraction methods from previous
work\cite{carpenter2006cellprofiler,fuchs2010clustering,held2010cellcognition},
using the \Bioconductor{} package \Biocpkg{EBImage}\cite{pau2010ebimage}. The data set comprised
5.6 terabytes. Here, exemplarily we show the image segmentation and feature extraction
on a clipped imaged of size 340 pixels $\times$ 490 pixels.

\subsection{Image segmentation}
Images were obtained from the InCell Analyzer 2000 as 12-bit TIFF
images of size 2,048 pixels $\times$ 2,048 pixels at three colors. Techically the
images were stored as 16bit-images, but their dynamic range only
extended to 12bit ($0,\ldots,4095$). The package
contains a clipped image of size 340 pixels $\times$ 490 pixels. The three channels
were saved in separate TIFF files, and we load them into the workspace
as follows.
<<imageProcessing2>>=
fnch1 = system.file(file.path("images","image-DAPI.tif"), package="HD2013SGI")
fnch2 = system.file(file.path("images","image-FITC.tif"), package="HD2013SGI")
fnch3 = system.file(file.path("images","image-Cy3.tif"),  package="HD2013SGI")

Img1 = readImage(fnch1)
Img2 = readImage(fnch2)
Img3 = readImage(fnch3)
@

The images \Robject{Img1}, \Robject{Img2} and \Robject{Img3} are represented as arrays of
floating point numbers. Their dynamic range depends in each case on the staining reagent,
the power of the light source and the excitation time. For display purposes, the following
commands obtain linearly transformed versions of the images; note however that the
subsequent analysis will be performed on the original images.
<<imageProcessing3>>=
ImgDAPI = normalize(Img1, inputRange=c(0.002, 0.03))
ImgFITC = normalize(Img2, inputRange=c(0.004, 0.04))
ImgCy3 = normalize(Img3,  inputRange=c(0.002, 0.03))
@
<<imageProcessing3,eval=FALSE>>=
display(combine(ImgDAPI, ImgFITC, ImgCy3))
@
%
The \Rfunction{display} function will open the images in a browser, and you can navigate
through the three channels with the arrows on the top of the page.
%
<<imageProcessing4,echo=FALSE>>=
writeImage(ImgDAPI,file.path("result","Figures","image-ImgDAPI.jpg"),quality=98)
writeImage(ImgFITC,file.path("result","Figures","image-ImgFITC.jpg"),quality=98)
writeImage(ImgCy3,file.path("result","Figures","image-ImgCy3.jpg"),quality=98)
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\hspace{0.195\textwidth}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-ImgDAPI.jpg}\\
  \Robject{ImgDAPI}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-ImgFITC.jpg}\\
  \Robject{ImgFITC}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-ImgCy3.jpg}\\
  \Robject{ImgCy3}
\end{minipage}
\hfill
\hspace{0.195\textwidth}\vspace{1.5ex}
\par\endgroup

We can combine the three channels to a single false color image using red for
Actin, yellow for $\alpha$-Tubulin, and blue for the DAPI staining.
<<imageProcessing5>>=
ImgColor = Image(combine(ImgCy3,ImgFITC,ImgDAPI),colormode="Color")
@
<<imageProcessing6,eval=FALSE>>=
display(ImgColor)
@
<<imageProcessing7,echo=FALSE>>=
writeImage(ImgColor,file.path("result","Figures","image-ImgColor.jpg"),quality=98)
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\hspace{0.195\textwidth}
\hfill
\hspace{0.195\textwidth}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-ImgColor.jpg}\\
  \Robject{ImgFITC}
\end{minipage}
\hfill
\hspace{0.195\textwidth}
\hfill
\hspace{0.195\textwidth}\vspace{1.5ex}
\par\endgroup

In a first processing step we smoothed the images by linear filtering with
a Gaussian kernel with a small bandwidth of 1 pixel, respectively
3 pixels.
<<imageProcessing8>>=
Filter1 = makeBrush(size=51,shape = "gaussian",sigma=1)/12.91571
Filter3 = makeBrush(size=51,shape = "gaussian",sigma=3)/12.91571
Img1smooth = filter2(Img1, filter=Filter1)
Img2smooth = filter2(Img2, filter=Filter3)
Img3smooth = filter2(Img3, filter=Filter3)
@
%
% The magic number 12.91571 compensates for a bug in a previous
% version of EBImage (based on imageMagick).
The smoothed images can be displayed as above after normalization. The difference to the
original images is almost not visible, but it helped to smoothen the segmentation
boundaries that were generated by the next steps.
%
<<imageProcessing9>>=
smoothDAPI = normalize(Img1smooth*12.91571, inputRange=c(0.002, 0.03))
smoothFITC = normalize(Img2smooth*12.91571, inputRange=c(0.004, 0.04))
smoothCy3 = normalize(Img3smooth*12.91571,  inputRange=c(0.002, 0.03))
@
<<imageProcessing10,eval=FALSE>>=
display(combine(smoothDAPI,smoothFITC,smoothCy3))
@
<<imageProcessing11,echo=FALSE>>=
writeImage(smoothDAPI, file.path("result","Figures","image-smoothDAPI.jpg"),quality=98)
writeImage(smoothFITC, file.path("result","Figures","image-smoothFITC.jpg"),quality=98)
writeImage(smoothCy3,  file.path("result","Figures","image-smoothCy3.jpg"), quality=98)
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\hspace{0.195\textwidth}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-smoothDAPI.jpg}\\
  \Robject{smoothDAPI}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-smoothFITC.jpg}\\
  \Robject{smoothFITC}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-smoothCy3.jpg}\\
  \Robject{smoothCy3}
\end{minipage}
\hfill
\hspace{0.195\textwidth}\vspace{1.5ex}
\par\endgroup
Nuclei were segmented by adaptive thresholding with a window size of 10 pixels
(corresponding to 7.4 $\mu m$). See the next row of images for the
outcomes of each of the following steps.
<<imageProcessing12>>=
nucleusThresh = thresh(Img1smooth, w = 10, h = 10, offset = 0.0001)
@
A morphological operation (opening) removes tiny segments and
smoothens their shapes.
<<imageProcessing13>>=
nucleusOpening = opening(nucleusThresh, kern=makeBrush(3, shape="disc"))
@
An integer value is assigned to individually label the segmented regions.
<<imageProcessing14>>=
nucleusSeed = bwlabel(nucleusOpening)
@
%
The resulting segmentation (\Robject{nucleusSeed}), shown below,
nicely separates the different nuclei from each other, but
unfortunately in some cases shows holes and does not cover all of the
area with nuclear staining in the image. Therefore, we generated a second
segmentation, to be used as a mask, that covered the whole nuclear stained region, but which
was in many places connected across neighbouring nuclei. To do so, we started with a less
stringent adaptive thresholding and applied to it another
morphological operation, which fills holes that are surrounded by foreground pixels.
<<imageProcessing15>>=
nucleusFill = fillHull(thresh(Img1smooth, w = 20, h = 20, offset = 0.00005))
@
%
To improve on what we had in
(\Robject{nucleusSeed}), we propagated the segmented objects until the
mask defined
\Robject{nucleusFill} was filled. Boundaries between nuclei, in those
places where the mask was connected, were found by
Voronoi tessellation on a metric space that depends on the gradient field
of \Robject{smoothDAPI} using the algorithm by Jones et al.~\cite{jones2005voronoi}.
<<imageProcessing16>>=
nucleusRegions = propagate(Img1smooth, nucleusSeed, mask=nucleusFill)
@
We can now display the five above images from the nucleus segmentation.
<<imageProcessing17,eval=FALSE>>=
display(combine(nucleusThresh,
                nucleusOpening,
                colorLabels(nucleusSeed),
                nucleusFill,
                colorLabels(nucleusRegions)))
@
<<imageProcessing18,echo=FALSE>>=
writeImage(nucleusThresh,
           file.path("result","Figures","image-seg-nucleusThresh.jpg"),quality=98)
writeImage(nucleusOpening,
           file.path("result","Figures","image-seg-nucleusOpening.jpg"),quality=98)
writeImage(colorLabels(nucleusSeed),
           file.path("result","Figures","image-seg-nucleusSeed.jpg"),quality=98)
writeImage(nucleusFill,
           file.path("result","Figures","image-seg-nucleusFill.jpg"),quality=98)
writeImage(colorLabels(nucleusRegions),
           file.path("result","Figures","image-seg-nucleusRegions.jpg"),quality=98)
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-nucleusThresh.jpg}\\
  \Robject{nucleusThresh}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-nucleusOpening.jpg}\\
  \Robject{nucleusOpening}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-nucleusSeed.jpg}\\
  \Robject{nucleusSeed}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-nucleusFill.jpg}\\
  \Robject{nucleusFill}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-nucleusRegions.jpg}\\
  \Robject{nucleusRegions}
\end{minipage}\vspace{1.5ex}
\par\endgroup

To segment the cell bodies, we applied adaptive thresholding followed
by morphological
opening on the actin channel \Robject{Img3smooth}, similar to what we did for the nuclei on
the DAPI channel (\Robject{Img1smooth}).
<<imageProcessing19>>=
cytoplasmThresh = thresh(Img3smooth, w = 20, h = 20, offset = 0.000001)
cytoplasmOpening = opening(cytoplasmThresh,kern=makeBrush(3,shape="disc"))
@
%
Because the cell bodies often covered large sections of whole image, adaptive thresholding
did not always detect all cellular areas, and we employed a second segmentation
by a global, large threshold.
%
<<imageProcessing20>>=
globalThreshold = 0.0003
cytoplasmOpening2 = opening(Img3smooth > globalThreshold)
@
%
To define the mask of image area covered by cell bodies, we combined the two cytoplasmic
masks and the nuclear mask by taking their union. 
% \fixme{Could we just use \Robject{`|`} for this?}
%
<<imageProcessing21>>=
cytoplasmCombined = cytoplasmOpening
cytoplasmCombined[cytoplasmOpening2 > cytoplasmCombined] =
              cytoplasmOpening2[cytoplasmOpening2 > cytoplasmCombined]
cytoplasmCombined[nucleusFill > cytoplasmCombined] =
              nucleusFill[nucleusFill > cytoplasmCombined]
@
%
To define the cellular bodies, the nucleus segmentation was extended by a
Voronoi tessellation-based propagation algorithm\cite{jones2005voronoi}.
%
<<imageProcessing22>>=
cytoplasmRegions = propagate(Img3smooth, nucleusRegions, lambda=1.0e-4, mask=cytoplasmCombined)
@
%
The three image masks for the segmentation of the cytoplam are
displayed below.
%
<<imageProcessing23,eval=FALSE>>=
display(combine(cytoplasmThresh,
                cytoplasmOpening,
                cytoplasmOpening2,
                cytoplasmCombined,
                colorLabels(cytoplasmRegions)))
@
<<imageProcessing24,echo=FALSE>>=
writeImage(cytoplasmThresh,
           file.path("result","Figures","image-seg-cytoplasmThresh.jpg"),quality=98)
writeImage(cytoplasmOpening,
           file.path("result","Figures","image-seg-cytoplasmOpening.jpg"),quality=98)
writeImage(cytoplasmOpening2,
           file.path("result","Figures","image-seg-cytoplasmOpening2.jpg"),quality=98)
writeImage(cytoplasmCombined,
           file.path("result","Figures","image-seg-cytoplasmCombined.jpg"),quality=98)
writeImage(colorLabels(cytoplasmRegions),
           file.path("result","Figures","image-seg-cytoplasmRegions.jpg"),quality=98)
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-cytoplasmThresh.jpg}\\
  \Robject{cytoplasmThresh}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-cytoplasmOpening.jpg}\\
  \Robject{cytoplasmOpening}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-cytoplasmOpening2.jpg}\\
  \Robject{cytoplasmOpening2}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-cytoplasmCombined.jpg}\\
  \Robject{cytoplasmCombined}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-cytoplasmRegions.jpg}\\
  \Robject{cytoplasmRegions}
\end{minipage}\vspace{1.5ex}
\par\endgroup

As an alternative representation, below we display the nucleus segmentation and cell
body segmentation on top of the original images.
<<imageProcessing25>>=
  Imgout1 = paintObjects(nucleusRegions,ImgDAPI, col='#ff00ff')
  Imgout2 = paintObjects(nucleusRegions,ImgColor, col='#ff00ff')
  Imgout3 = paintObjects(cytoplasmRegions,
                         paintObjects(nucleusRegions,
                                      Image(ImgDAPI,colormode="Color"),
                                      col='#ff00ff'),
                         col='#ff0000')
  Imgout4 = paintObjects(cytoplasmRegions,
                         paintObjects(nucleusRegions,
                                      Image(ImgCy3,colormode="Color"),
                                      col='#ff00ff'),
                         col='#ff0000')
  Imgout5 = paintObjects(cytoplasmRegions,
                         paintObjects(nucleusRegions,ImgColor, col='#ff00ff'),
                         col='#ff0000')
@
<<imageProcessing26,echo=FALSE>>=
writeImage(Imgout1,
           file.path("result","Figures","image-seg-Imgout1.jpg"),quality=98)
writeImage(Imgout2,
           file.path("result","Figures","image-seg-Imgout2.jpg"),quality=98)
writeImage(Imgout3,
           file.path("result","Figures","image-seg-Imgout3.jpg"),quality=98)
writeImage(Imgout4,
           file.path("result","Figures","image-seg-Imgout4.jpg"),quality=98)
writeImage(Imgout5,
           file.path("result","Figures","image-seg-Imgout5.jpg"),quality=98)
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-Imgout1.jpg}\\
  \Robject{nucleusRegions}\\
  DAPI
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-Imgout2.jpg}\\
  \Robject{nucleusRegions}\\
  {\color{Red}Actin}/{\color{Green}$\alpha$-Tubulin}/{\color{Blue}DAPI}
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-Imgout3.jpg}\\
  \Robject{cytoplasmRegions}\\
  DAPI
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-Imgout4.jpg}\\
  \Robject{cytoplasmRegions}\\
  Actin
\end{minipage}
\hfill
\begin{minipage}[t]{0.195\textwidth}
  \centering
  \includegraphics[width=\textwidth]{result/Figures/image-seg-Imgout5.jpg}\\
  \Robject{cytoplasmRegions}\\
  {\color{Red}Actin}/{\color{Green}$\alpha$-Tubulin}/{\color{Blue}DAPI}
\end{minipage}\vspace{1.5ex}
\par\endgroup

\subsection{Feature extraction}
Features for intensity, shape and texture were extracted for each cell
from the DAPI channel using the nucleus segmentation (\Robject{nucleusRegions}) and from the
actin and tubulin channels using the cell body segmentation (\Robject{cytoplasmRegions}).
%
<<imageProcessing27>>=
F1 = computeFeatures(nucleusRegions,  Img1, xname="nuc",  refnames="nuc")
F2 = computeFeatures(cytoplasmRegions,Img2, xname="cell", refnames="tub")
F3 = computeFeatures(cytoplasmRegions,Img3, xname="cell", refnames="act")
@
%
Additional features were extracted from the joint distribution of the
DAPI and the tubulin signals using the cell body mask.
%
<<imageProcessing28>>=
F4 = computeFeatures(cytoplasmRegions, (Img1 - mean(Img1)) * (Img2 - mean(Img2)),
                     xname="cell", refnames="nuctub")
@
%
The below figure shows three phenotypic features for the cells segmented in the
example image.
%
<<imageProcessing29,echo=FALSE,results=hide>>=
pdf(file=file.path("result","Figures","image-features.pdf"),width=12,height=6)
vp = viewport(layout=grid.layout(nrow=3,ncol=7,
                                 widths=c(dim(Img1)[1],rep(c(0.07,0.7)*dim(Img1)[1],3)),
                                 heights=c(0.01*dim(Img1)[2],dim(Img1)[2],0.15*dim(Img1)[2]),respect=TRUE))
pushViewport(vp)
pushViewport(viewport(layout.pos.row=2,layout.pos.col=1,xscale=c(0.5,dim(Img1)[1]+0.5),yscale=c(dim(Img1)[2]+0.5,0.5)))
grid.raster(Imgout5)
#grid.points(F1[,1],F1[,2],default.units="native",gp=gpar(col="white"))
grid.polyline(x=c(F1[,1],rep(dim(Img1)[1]+0.5,nrow(F1))),
              y=c(F1[,2],F1[,2]),id=c(seq_len(nrow(F1)),seq_len(nrow(F1))),
              default.units="native",gp=gpar(col="gray"))
popViewport()

#########
## nucleus area
f = F1[,"nuc.0.s.area"]
r = range(f,finite=TRUE)
pushViewport(viewport(layout.pos.row=2,layout.pos.col=3,
                      xscale=r,
                      yscale=c(dim(Img1)[2]+0.5,0.5)))
grid.rect(gp=gpar(fill="gray90",col=NA))
grid.polyline(x=c(rep(r[1]-0.1*diff(r),nrow(F1)),rep(r[2],nrow(F1))),
              y=c(F1[,2],F1[,2]),id=c(seq_len(nrow(F1)),seq_len(nrow(F1))),
              default.units="native",gp=gpar(col="gray"))
m = mean(f)
grid.lines(x=unit(c(m,m),"native"),
          y=unit(c(0,1),"npc")+unit(c(-1,1),"lines"))
grid.points(f,F1[,2],default.units="native",pch=20)
grid.xaxis()
grid.text(x=unit(0.5,"npc"),y=unit(-3,"lines"),just=c("center","top"),label="nucleus area")
popViewport()

#########
## cell eccentricity
f = F3[,"cell.0.m.eccentricity"]
r = range(f,finite=TRUE)
pushViewport(viewport(layout.pos.row=2,layout.pos.col=5,
                      xscale=r,
                      yscale=c(dim(Img1)[2]+0.5,0.5)))
grid.rect(gp=gpar(fill="gray90",col=NA))
grid.polyline(x=c(rep(r[1]-0.1*diff(r),nrow(F1)),rep(r[2],nrow(F1))),
              y=c(F1[,2],F1[,2]),id=c(seq_len(nrow(F1)),seq_len(nrow(F1))),
              default.units="native",gp=gpar(col="gray"))
m = mean(f)
grid.lines(x=unit(c(m,m),"native"),
          y=unit(c(0,1),"npc")+unit(c(-1,1),"lines"))
grid.points(f,F1[,2],default.units="native",pch=20)
grid.xaxis()
grid.text(x=unit(0.5,"npc"),y=unit(-3,"lines"),just=c("center","top"),label="cell eccentricity")
popViewport()

#########
## tubulin intensity
f = F2[,"cell.tub.b.mean"]
r = range(f,finite=TRUE)
pushViewport(viewport(layout.pos.row=2,layout.pos.col=7,
                      xscale=r,
                      yscale=c(dim(Img1)[2]+0.5,0.5)))
grid.rect(gp=gpar(fill="gray90",col=NA))
grid.polyline(x=c(rep(r[1]-0.1*diff(r),nrow(F1)),f),
              y=c(F1[,2],F1[,2]),id=c(seq_len(nrow(F1)),seq_len(nrow(F1))),
              default.units="native",gp=gpar(col="gray"))
m = mean(f)
grid.lines(x=unit(c(m,m),"native"),
          y=unit(c(0,1),"npc")+unit(c(-1,1),"lines"))
grid.points(f,F1[,2],default.units="native",pch=20)
grid.xaxis()
grid.text(x=unit(0.5,"npc"),y=unit(-3,"lines"),just=c("center","top"),label="tubulin intensity")
popViewport()
popViewport()
dev.off()
@

\begingroup
\parindent=0pt
\vspace{1.5ex}
\includegraphics[width=\textwidth]{result/Figures/image-features.pdf}
\vspace{1.5ex}
\par\endgroup

Features were summarized
per experiment by the arithmetic mean over all cells, which in the
figure above is indicated by the black vertical lines. The number of
segmented nuclei was used as a proxy for cell count.
%
<<imageProcessing29>>=
F = cbind(F1, F2, F3, F4)
Fwell = c(count=nrow(F), apply(F, 2, mean))
@
%
The equivalent of \Robject{Fwell}, computed for all images in the
screen, together with associated metadata, is available as
R-object \Robject{featuresPerWell} from the package and can be loaded
by \Rfunction{data("featuresPerWell", package="HD2013SGI")}; see the
next section.