\documentclass[online]{chaksem}

\usepackage[T1]{fontenc}
\usepackage[fleqn]{amsmath}
\usepackage{alltt}
\usepackage{amscd,amssymb,stmaryrd}
\usepackage{pstricks,pst-node,pst-plot,pst-coil}
\usepackage{graphicx}
\usepackage{haskell}


\renewcommand{\ttdefault}{cmtt}

\newcommand{\TODO}[1]{\textbf{--- #1 ---}}

\newcommand{\scheme}[1]{\langle\text{#1}\rangle}
\newcommand{\code}[1]{\texttt{#1}}

\newcommand{\docase}[1]{%
  \renewcommand{\arraystretch}{.8}
  \begin{array}[t]{@{~~~\code{->}~}l@{~\code=~}l}%
    \multicolumn{2}{@{}l}{\code{case}}\\
    #1
  \end{array}%
  }

% names
%
\newcommand{\Haskell}{\textsc{Haskell}}
\newcommand{\iHaskell}{\textit{\rmfamily i}\textsc{Haskell}}
\newcommand{\CToHS}{\textsc{\bfseries c\boldmath${\to}$hs}}
\newcommand{\GtkHS}{\textsc{Gtk+Haskell}}

% symbols
%
\newcommand{\DEMO}{\textbf{\ \ding{71}}}

% names of languages etc
%
\newcommand{\Nepal}{\textsc{Nepal}}
\newcommand{\lambdaPA}{\(\lambda_{\text{PA}}\)}

% Haskell environment
%
\hsmargin.5\leftmargini

% fonts
%
\DeclareMathAlphabet{\mathsfsbc}{T1}{cmss}{sbc}{n}

% misc. classes of symbols
%
\newcommand{\meta}[1]{\mathit{#1}}      % meta-level variables
\newcommand{\nt}[1]{%                   % non-terminal
  \mathsfsbc{#1}}
\newcommand{\defconst}[1]{\mathsf{#1}}  % defined constant
\newcommand{\mathname}[1]{%             % multi-character name in math mode
  \mathord{\mathit{#1}}}
\newcommand{\tup}[1]{\overline{#1}}     % tuple
\newcommand{\up}{^\uparrow}

% forms of the calculus
%
\newcommand{\trec}[2]{\mu_t#1.#2}
\newcommand{\all}[2]{\forall#1.#2}
%\newcommand{\plist}[1]{[\mkern-1mu|#1|\mkern-1mu]}
\newcommand{\plist}[1]{[\mkern-.8mu|#1|\mkern-.8mu]}
%
\newcommand{\abst}[2]{\lambda#1.#2}
\newcommand{\tabst}[3]{\lambda#1^{#2}.#3}
\newcommand{\appl}[2]{#1\;#2}
\newcommand{\tvrec}[3]{\mu_v#1^{#2}.#3}
\newcommand{\vrec}[2]{\mu_v#1.#2}
\newcommand{\letin}[3]{\mathbf{let}~{{#1}={#2}}~\mathbf{in}~#3}
%
\newcommand{\Int}{\defconst{Int}}
\newcommand{\IntArr}{\defconst{IntArr}}
\newcommand{\Float}{\defconst{Float}}
\newcommand{\Bool}{\defconst{Bool}}
\newcommand{\BoolArr}{\defconst{BoolArr}}
%
\newcommand{\false}{\defconst{false}}
\newcommand{\true}{\defconst{true}}
%
\newcommand{\pair}[2]{\langle#1,#2\rangle}
\newcommand{\pfst}{\defconst{fst}}
\newcommand{\psnd}{\defconst{snd}}
%
\newcommand{\sleft}{\defconst{left}}
\newcommand{\sright}{\defconst{right}}
\newcommand{\scase}{\defconst{case}}
%
\newcommand{\pmap}{\defconst{pmap}}
\newcommand{\mapP}{\defconst{mapP}}
\newcommand{\packP}{\defconst{packP}}
%\newcommand{\pzero}{[||]}
%\newcommand{\punit}[1]{[|#1|]}
%\newcommand{\pjoin}[2]{#1\mathbin{{+}\!\!\!{+}\!\!\!{+}}#2}
%\newcommand{\pmap}[3]{[|{#1}\mid{{#2}\leftarrow{#3}}|]}
\newcommand{\ate}[3]{[|{#1}\mid{{#2}\leftarrow{#3}}|]}
%
\newcommand{\tyidx}[1]{_{\langle#1\rangle}}
\newcommand{\typrom}[1]{_{\langle\!\langle#1\rangle\!\rangle}}
%
\newcommand{\ppoly}[1]{poly\typrom{#1}}
%
%\newcommand{\dist}{\mathbin\bullet}
\newcommand{\dist}{\defconst{rep}}
%\newcommand{\distS}{\mathbin{\underline\bullet}}
\newcommand{\distS}{\defconst{rep}^0}
%\newcommand{\distL}{\mathbin{\overline\bullet}}
\newcommand{\distL}{\defconst{rep}^1}
\newcommand{\intdist}{\mathbin{\dist_\Int}}
%\newcommand{\intdistS}{\mathbin{\distS_\Int}}
\newcommand{\intdistS}{\distS_\Int}
%\newcommand{\intdistL}{\mathbin{\distL_\Int}}
\newcommand{\intdistL}{\distL_\Int}
%\newcommand{\booldist}{\mathbin{\dist_\Bool}}
\newcommand{\booldist}{\dist_\Bool}
%\newcommand{\booldistS}{\mathbin{\distS_\Bool}}
\newcommand{\booldistS}{\distS_\Bool}
%\newcommand{\booldistL}{\mathbin{\distL_\Bool}}
\newcommand{\booldistL}{\distL_\Bool}
%\newcommand{\tdist}[1]{\mathbin{\dist\tyidx{#1}}}
\newcommand{\tdist}[1]{\dist\tyidx{#1}}
%\newcommand{\tdistS}[1]{\mathbin{\distS\tyidx{#1}}}
\newcommand{\tdistS}[1]{\distS\tyidx{#1}}
%\newcommand{\tdistL}[1]{\mathbin{\distL\tyidx{#1}}}
\newcommand{\tdistL}[1]{\distL\tyidx{#1}}
%\newcommand{\tdistC}[1]{\mathbin{\tdist{#1}^C}}
\newcommand{\tdistC}[1]{\tdist{#1}^C}
%\newcommand{\tdistCS}[1]{\mathbin{\tdistS{#1}^C}}
\newcommand{\tdistCS}[1]{\tdist{#1}^{C0}}
%\newcommand{\tdistCL}[1]{\mathbin{\tdistL{#1}^C}}
\newcommand{\tdistCL}[1]{\tdist{#1}^{C1}}
\newcommand{\join}{\mathbin{{+}\!\!\!{+}\!\!\!{+}}}
\newcommand{\joinC}{\mathbin{\join^C}}
\newcommand{\append}{\mathbin{{+}\!\!\!{+}}}
\newcommand{\appendP}{\mathbin{{+}\!\!\!{+}\!\!\!{+}}}
\newcommand{\indexP}{\mathbin{!|}}
\newcommand{\intindexP}{\mathbin{!|_\Int}}
\newcommand{\boolindexP}{\mathbin{!|_\Bool}}
\newcommand{\indexS}{\mathbin{\underline{\indexP}}}
\newcommand{\tindex}[1]{\mathbin{\indexP\tyidx{#1}}}
\newcommand{\tindexP}[1]{\mathbin{\indexS\tyidx{#1}}}
\newcommand{\tindexS}[1]{\mathbin{\indexS\tyidx{#1}}}
\newcommand{\tindexL}[1]{\mathbin{\indexL\tyidx{#1}}}
\newcommand{\tindexC}[1]{\mathbin{\indexP\tyidx{#1}^C}}
\newcommand{\tindexCS}[1]{\mathbin{\indexS\tyidx{#1}^C}}
\newcommand{\tindexCL}[1]{\mathbin{\tindexL{#1}^C}}

\newcommand{\intjoin}{\mathbin{\join_\Int}}
\newcommand{\booljoin}{\mathbin{\join_\Bool}}
\newcommand{\tjoin}[1]{\mathbin{\join\tyidx{#1}}}
\newcommand{\tjoinC}[1]{\mathbin{\join\tyidx{#1}^C}}
%\newcommand{\len}{\#}
\newcommand{\len}{\defconst{len}}
\newcommand{\tlen}[1]{\mathbin{\len\tyidx{#1}}}
\newcommand{\idx}{\mathbin{!|}}
\newcommand{\tidx}[1]{\mathbin{\idx\tyidx{#1}}}
%
\newcommand{\app}{\mathbin{\$}}
\newcommand{\papp}{\mathbin{\app\up}}
\newcommand{\pabst}[2]{\lambda\up#1.#2}
\newcommand{\pappl}[2]{#1\papp#2}
%
\newcommand{\tupval}[1]{\langle#1\rangle}
%
\newcommand{\Nothing}{\langle\rangle}
\newcommand{\Just}[1]{\langle{#1}\rangle}
%
\newcommand{\ind}{\defconst{index}}
\newcommand{\range}[2]{#1\ldots#2}
\newcommand{\suma}{\defconst{sum}}
\newcommand{\bnot}{\defconst{not}}
\newcommand{\flatten}{\defconst{flatten}}
\newcommand{\segd}{\defconst{segd}}
\newcommand{\partition}{\defconst{partition}}
\newcommand{\fp}{{\Uparrow}}
\newcommand{\from}{\leftarrow}

% schemes
%
\newcommand{\elim}[1]{\mathcal E\llbracket#1\rrbracket}
\newcommand{\vect}[1]{\mathcal V\llbracket#1\rrbracket}
\newcommand{\lift}[1]{^{\uparrow#1}}
\newcommand{\lifts}[1]{\mathcal L\llbracket#1\rrbracket}
\newcommand{\repr}[1]{\mathcal F\llbracket#1\rrbracket}
\newcommand{\arepr}[1]{\mathcal A\llbracket#1\rrbracket}

% set the second argument centered into a box as wide as the first argument
% (both arguments are set in the same mode as the context)
%
\makeatletter
\newcommand{\substbox}[2]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
  \hbox to\wd0{\hfil\ifouterm@th$#2$\else#2\fi\hfil}
  }
\newcommand{\substboxl}[2]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
  \hbox to\wd0{\ifouterm@th$#2$\else#2\fi\hfil}
  }
\newcommand{\substboxr}[2]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
  \hbox to\wd0{\hfil\ifouterm@th$#2$\else#2\fi}
  }
\makeatother

% PSTricks stuff

\psset{unit=1ex,                        % scale with font
       linewidth=.6pt,                  % lines shouldn't be too heavy
       framearc=.1}

\newgray{dgray}{.5}
\newgray{bggray}{.8}
\newcmykcolor{lightgreen}{.25 0 .25 0}
\newcmykcolor{ultralightgreen}{.12 0 .12 0}
\newcmykcolor{lightmag}{.05 .25 0 0}
\newcmykcolor{altlightgreen}{.3 0 .1 0}
\newcmykcolor{lightyellow}{0 0 .5 0}
\newcmykcolor{magg}{.3 1 0 0}
%\newcmykcolor{dgreen}{1 0 .6 .3}
\newcmykcolor{dgreen}{1 0 .7 .4}
\newcmykcolor{lgreen}{1 0 .6 0}
\newcmykcolor{dblue}{1 1 0 .3}
\newcmykcolor{lblue}{1 .6 0 0}
\newcmykcolor{lightblue}{.5 .5 0 .1}
\newcmykcolor{ultralightblue}{.12 .12 0 0}
\newcmykcolor{ultralightred}{0 .12 .12 0}
%\newcmykcolor{dorange}{0 .6 .8 .3}
%\newcmykcolor{dorange}{0 .3 .5 .2}
\newcmykcolor{dorange}{0 .8 1 0}
\newcmykcolor{lred}{0 1 1 .0}
\newgray{fggray}{.7}

\newcommand{\bgcolbox}[2]{%
  \psframebox[fillcolor=#1,linestyle=none,fillstyle=solid]{#2}}
\newenvironment{bgcolmpbox}[3][\textwidth]{%
  \bgcolbox{#2}{\begin{minipage}{#1}#3\end{minipage}}}

\makeatletter
\newcommand{\fouerase}[1]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
%  \unhcopy0\hskip-\wd0\hskip-.15ex%
  #1\hskip-\wd0\hskip-.15ex%
  \fou{{\red\raise.25em\hbox{\rule{\wd0}{1pt}\rule{.15ex}{1pt}}}}%
  }
\makeatother

\makeatletter
\newcommand{\sndtrd}[2]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
%  \unhcopy0\hskip-\wd0%  % doesn't work in math mode
  #1\hskip-\wd0%          % this does
  \trd{#2}%
  }
\newcommand{\trdfou}[2]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
%  \unhcopy0\hskip-\wd0%  % doesn't work in math mode
  #1\hskip-\wd0%          % this does
  \fou{#2}%
  }
\newcommand{\foufif}[2]{%
  \newif\ifouterm@th
  \ifmmode\outerm@thtrue\else\outerm@thfalse\fi
  \setbox0=\hbox{\ifouterm@th$#1$\else#1\fi}%
%  \unhcopy0\hskip-\wd0%  % doesn't work in math mode
  #1\hskip-\wd0%          % this does
  \fif{#2}%
  }
\makeatother

\let\emcol=\dorange
\let\headcol=\dblue

\newcommand{\slideem}[1]{\bgroup\emcol#1\egroup}
\newcommand{\slidehead}[1]{\bgroup\headcol#1\egroup}
%\newcommand{\slideem}[1]{\bgroup\bfseries#1\egroup}
%\newcommand{\slidehead}[1]{\bgroup\itshape#1\egroup}

% chaksem config
%
\let\toprulecol=\emcol
\let\botrulecol=\headcol

\begin{document}

\begin{slide}
  \vspace*{-.9\bigskipamount}
  \begin{center}
    \vspace*{\fill}
    \Large
    {\dblue More Types for\\
      Nested Data Parallel Programming}
    \\[1.2em]      
    \normalsize
    \renewcommand{\arraystretch}{1}
    \begin{tabular}{cc}
      Manuel M. T. Chakravarty
      &Gabriele Keller\\
      {\small\dgreen University of New South Wales}
      &{\small\dgreen University of Technology, Sydney}\\
      \small\texttt{chak@cse.unsw.edu.au}
      &\small\texttt{keller@it.uts.edu.au}\\
    \end{tabular}
    \par\vfill
    \rule{.25\textwidth}{.5pt}
    \par\vfill\bigskip
        \hspace*{\fill}\begin{minipage}{.75\textwidth}
        \begin{slumerate}
        \item \normalsize Nested Data Parallelism
        \item \normalsize Flattening-based Implementation
        \item \normalsize Vectorisation of Functions
        \item \normalsize Type Transformation and Specialisation
        \end{slumerate}
      \end{minipage}\hspace*{\fill}
  \end{center}%
  \vspace*{-.5\bigskipamount}
\setfooter{%
  \bgcolbox{bggray}{{\upshape Produced with \LaTeX\ seminar style \& PSTricks}}}
\end{slide}

% 20+10 minutes => 10-12 slides

\begin{slide}
  \let\toprulecol=\dgreen
  \let\botrulecol=\headcol
  \heading{Nested Data Parallelism}

  \begin{second}
    {\headcol Flat (Regular) Data Parallelism:}
    %
    \begin{center}
%      {\darkgray\rule{20ex}{.7pt}}\par
      {\small \( 
        \substbox{}{\llap{\text{Dense matrix:}}}%\(
        \left(  
          \begin{matrix}
            5 & 1 & 8 \\
            1 & 2 & -2 \\
            9 & 3 & 5
          \end{matrix} 
        \right)
        *
        \left(  \begin{matrix}
            -3 \\
            2 \\
            9
          \end{matrix} 
        \right)
        \)}
      \par\trd{{\darkgray\rule{20ex}{.7pt}}}
    \end{center}
  \end{second}
  %
  \vspace*{-\medskipamount}
  \begin{third}\small
    \begin{haskell}
      \hskwd{type} Vector &= [Float]\\
      \hskwd{type} Matrix &= [Vector]\\
      \hsnoalign{\hskip0pt}
      mvm &:: Matrix \to Vector \to Vector\\
      mvm mat v &= 
      [{\dgreen{}sum [x * y \mid (x, y)\hsfrom{}zip row v]} \mid row\hsfrom{}mat]
    \end{haskell}%
%    \begin{haskell}
%      \hskwd{type} Vector &= Array \substboxl{(Int, Int)}{Int}Float\\
%      \hskwd{type} Matrix &= Array (Int, Int) Float\\
%      \hsnoalign{\hskip0pt}
%      mvm &:: Matrix \to Vector \to Vector\\
%      mvm mat v &= \hslet{(1, n) = bounds v}{%
%        listArray (1, n) \\
%        \hskip\hsmargin[{\dgreen{}sum [mat!(i, j)*v!j \mid j\hsfrom[1..n]]} \mid i\hsfrom[1..n]]
%        }
%    \end{haskell}%
  \end{third}%
\end{slide}

\begin{slide}
  {\headcol Nested, or Irregular Data Parallelism:}
  %
  \begin{center}
%      {\darkgray\rule{20ex}{.7pt}}\par
    {\small \(\substbox{}{\llap{\text{Sparse matrix:}}}% \(
      \left(  \begin{matrix}
          5 & 0 & 8 \\
          0 & 0 & 0 \\
          9 & 0 & 0
        \end{matrix} 
      \right)
      *
      \left(  \begin{matrix}
          -3 \\
          2 \\
          9
        \end{matrix} 
      \right)
      \)}
    \par\snd{{\darkgray\rule{20ex}{.7pt}}}
  \end{center}
  %
  \newcommand{\plistfou}[1]{[\mkern-1mu\trdfou{\hskip0pt}{{\emcol|}}#1\trdfou{\hskip0pt}{{\emcol|}}\mkern-1mu]}
  \begin{second}
    \begin{haskell}
      \hskwd{type} SparseRow    &= \plistfou{(Int, Float)}
%      [\mkern-1mu\sndtrd{\hskip0pt}{|}(Int, Float)\sndtrd{\hskip0pt}{|}\mkern-1mu]
\\%      \hscom{index, value}\\
      \hskwd{type} SparseMatrix &= \plistfou{SparseRow}\\
%      [\mkern-1mu\sndtrd{\hskip0pt}{|}SparseRow\sndtrd{\hskip0pt}{|}\mkern-1mu]\\
      \hskip0pt\\
      \hsnoalign{\headcol \text{E.g.,}~
        {\plistfou{\plistfou{(0,5),(2,8)},\plistfou{},\plistfou{(0,9)}}}}
    \end{haskell}%
  \end{second}%
  %
  \vspace*{-1.4\bigskipamount}
  \begin{third}
    \begin{haskell}
      smvm &:: SparseMatrix\to\plistfou{Float}\to\plistfou{Float}\\
      smvm sm vec &= \hsbody{
        \plistfou{sum\trdfou{\hskip0pt}{{\emcol{}P}}
          \underbrace{\plistfou{x*(vec\mathbin{!\trdfou{!}{{\emcol|}}}{col})\mid(col, x)\from{row}}}_{%
            \(\text{\small{}products of one row}\)}\mid{row\from{sm}}}
      }
    \end{haskell}%
  \end{third}%
\end{slide}

\begin{slide}
  {\headcol Parallel Arrays:}
  %
  \begin{haskell}
    \hskwd{data} \plist\alpha &&\hscom{parallel array with elements \<\alpha\>}
  \end{haskell}
  %
  \begin{slitemize}
  \item "Special" syntax like for lists: \<\plist{e_1,\ldots,e_n}\>
  \item Prelude functions carry a "\<P\>" suffix: \<mapP\>, \<replicateP\>,
    etc.
  \item Parallel array comprehensions
  \end{slitemize}

  \begin{second}
    {\headcol Main differences when compared to lists:}
    %
    \begin{slumerate}
    \item Construction is strict in all elements
    \item Collective operations (like \<mapP\>, \<foldP\>, \<filterP\>, etc.)
      execute in parallel
    \item On distributed-memory machines, distributed across PEs
    \item Parallel arrays are always finite
    \item No constructors (such as \<[\mkern3mu]\> and \<(:)\>) for
      construction and pattern-matching
    \end{slumerate}
    %
    \begin{center}\emcol
      Programming style as with Nesl's vectors [Blelloch 1996]
    \end{center}
  \end{second}
\end{slide}

\begin{slide}
  {\headcol Flat Versus Nested DP:}
  %
  \begin{slitemize}
  \item Nested DP is more {\emcol expressive,} whereas flat DP is {\emcol
      easier} to implement
  \item Thus, traditional focus on flat DP (Fortran, Sisal, etc.)
  \item Nested DP is especially interesting in a purely functional context
  \end{slitemize}

  \begin{second}
    {\headcol The {\emcol Flattening} Transformation:}
    %
    \begin{slitemize}
    \item Maps nested DP to flat DP while preserving the available parallelism
    \item Nesl [Blelloch et al.] \& Proteus [Prins et al.]
    \end{slitemize}
  \end{second}

  \begin{third}
    {\headcol Alternatives to Flattening:}
    %
    \begin{slitemize}
    \item Thread-based implementation [Blelloch et al.]
    \item PSciCo@CMU [Harper et al.]
    \item Focus on shared-memory architectures
    \item Challenge: Threads can be very fine grained
    \end{slitemize}
  \end{third}
\end{slide}

\begin{slide}
  \begin{center}\small
    \psset{xunit=1ex, yunit=1ex}
    \hspace*{-6em}\begin{pspicture}(65,45)
%      \psaxes[Dx=5,Dy=5](65,45)
      % irregular algo
      \rput(15,45){\psline[linewidth=5pt,linecolor=dgreen](0,0)(4.25,0)}
      \rput(19.75,45){\psline[linewidth=5pt,linecolor=dgreen](0,0)(1.5,0)}
      \rput(21.75,45){\psline[linewidth=5pt,linecolor=dgreen](0,0)(4.5,0)}
      \rput(26.75,45){\psline[linewidth=5pt,linecolor=dgreen](0,0)(11.5,0)}
      \rput(38.75,45){\psline[linewidth=5pt,linecolor=dgreen](0,0)(1.5,0)}
      \rput(40.75,45){\psline[linewidth=5pt,linecolor=dgreen](0,0)(4.25,0)}
      %
      \rput(15,43){\psline[linewidth=5pt,linecolor=lgreen](0,0)(4.25,0)}
      \rput(19.75,43){\psline[linewidth=5pt,linecolor=lgreen](0,0)(1.5,0)}
      \rput(21.75,43){\psline[linewidth=5pt,linecolor=lgreen](0,0)(4.5,0)}
      \rput(26.75,43){\psline[linewidth=5pt,linecolor=lgreen](0,0)(11.5,0)}
      \rput(38.75,43){\psline[linewidth=5pt,linecolor=lgreen](0,0)(1.5,0)}
      \rput(40.75,43){\psline[linewidth=5pt,linecolor=lgreen](0,0)(4.25,0)}
      %
      \rput(17.125,38){%
        \pscircle[linestyle=none,fillstyle=solid,fillcolor=dblue](0,0){.5}%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(1.5,-1.5){\footnotesize$\Sigma$}}
        }
      \rput(20.5,38){%
        \pscircle[linestyle=none,fillstyle=solid,fillcolor=dblue](0,0){.5}%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(1.5,-1.5){\footnotesize$\Sigma$}}
        }
      \rput(24,38){%
        \pscircle[linestyle=none,fillstyle=solid,fillcolor=dblue](0,0){.5}%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(1.5,-1.5){\footnotesize$\Sigma$}}
        }
      \rput(32,38){%
        \pscircle[linestyle=none,fillstyle=solid,fillcolor=dblue](0,0){.5}%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(1.5,-1.5){\footnotesize$\Sigma$}}
        }
      \rput(39.5,38){%
        \pscircle[linestyle=none,fillstyle=solid,fillcolor=dblue](0,0){.5}%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(1.5,-1.5){\footnotesize$\Sigma$}}
        }
      \rput(43,38){%
        \pscircle[linestyle=none,fillstyle=solid,fillcolor=dblue](0,0){.5}%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(1.5,-1.5){\footnotesize$\Sigma$}}
        }
      \rput[r](15,44){\darkgray$\Pi$~}
      \rput[l](47,44){%
        \shortstack{\footnotesize nested vectors (list shape)}}
      \rput(59,39){\darkgray
        \shortstack{irregular algorithm\\(algorithmic partitioning)}}
      \begin{second}
      % flattening
      \rput(15,35){\psline[linestyle=dotted](0,0)(30,0)}
      \rput[r](15,35){\footnotesize \foufif{Flattening}{{\emcol{}Flattening}}~~}
      \rput(15,32){\psline[linewidth=5pt,linecolor=dgreen](0,0)(30,0)}
      \rput(15,30){\psline[linewidth=5pt,linecolor=lgreen](0,0)(30,0)}
      \rput(15,28){\psline[linewidth=1pt,linecolor=dorange](0,0)(4.25,0)}
      \rput(19.75,28){\psline[linewidth=1pt,linecolor=dorange](0,0)(1.5,0)}
      \rput(21.75,28){\psline[linewidth=1pt,linecolor=dorange](0,0)(4.5,0)}
      \rput(26.75,28){\psline[linewidth=1pt,linecolor=dorange](0,0)(11.5,0)}
      \rput(38.75,28){\psline[linewidth=1pt,linecolor=dorange](0,0)(1.5,0)}
      \rput(40.75,28){\psline[linewidth=1pt,linecolor=dorange](0,0)(4.25,0)}
      \rput(30,23){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=dblue](-3,0)(3,0)%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(2,-1.5){\footnotesize$\Sigma^\uparrow$}}
        }
      \rput[r](15,31){\darkgray$\Pi$~}
      \rput[l](47,31){%
        \shortstack{\footnotesize flat vectors (vector shape)}}
      \rput[l](47,28){%
        \shortstack{\footnotesize segment desc (rt shape desc)}}
      \rput(59,25){\darkgray
        vectorised code}
      \end{second}%
      \begin{third}
      % partitioning
      \rput(15,20){\psline[linestyle=dotted](0,0)(30,0)}
      \rput[r](15,20){\footnotesize Partitioning~~}
      \rput(15,17){\psline[linewidth=5pt,linecolor=dgreen](0,0)(7,0)}
      \rput(22.5,17){\psline[linewidth=5pt,linecolor=dgreen](0,0)(7,0)}
      \rput(30,17){\psline[linewidth=5pt,linecolor=dgreen](0,0)(7,0)}
      \rput(37.5,17){\psline[linewidth=5pt,linecolor=dgreen](0,0)(7.5,0)}
      \rput(15,15){\psline[linewidth=5pt,linecolor=lgreen](0,0)(7,0)}
      \rput(22.5,15){\psline[linewidth=5pt,linecolor=lgreen](0,0)(7,0)}
      \rput(30,15){\psline[linewidth=5pt,linecolor=lgreen](0,0)(7,0)}
      \rput(37.5,15){\psline[linewidth=5pt,linecolor=lgreen](0,0)(7.5,0)}
      \rput(15,13){\psline[linewidth=1pt,linecolor=dorange](0,0)(4.25,0)}
      \rput(19.75,13){\psline[linewidth=1pt,linecolor=dorange](0,0)(1.5,0)}
      \rput(21.75,13){\psline[linewidth=1pt,linecolor=dorange](0,0)(4.5,0)}
      \rput(26.75,13){\psline[linewidth=1pt,linecolor=dorange](0,0)(11.5,0)}
      \rput(38.75,13){\psline[linewidth=1pt,linecolor=dorange](0,0)(1.5,0)}
      \rput(40.75,13){\psline[linewidth=1pt,linecolor=dorange](0,0)(4.25,0)}
      \rput(18.5,8){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=lblue](-1.5,0)(1.5,0)%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(2,-1.5){\footnotesize$\Sigma^\uparrow$}}
        }
      \rput(26,8){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=lblue](-1,0)(1,0)%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(2,-1.5){\footnotesize$\Sigma^\uparrow$}}
        }
      \rput(33.5,8){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=lblue](-0.5,0)(0.5,0)%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(2,-1.5){\footnotesize$\Sigma^\uparrow$}}
        }
      \rput(41,8){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=lblue](-0.5,0)(0.5,0)%
        \rput(0,4){
          \psline[linewidth=1.5pt,linecolor=darkgray]{->}(0,0)(0,-3)%
          \darkgray\rput(2,-1.5){\footnotesize$\Sigma^\uparrow$}}
        }
      \rput(30,5.5){%
        \psline[linewidth=1.5pt,fillstyle=solid,linecolor=darkgray]{<->}(-5,0)(5,0)%
        }
      \rput(18.5,3){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=dblue](-1,0)(1,0)%
        }
      \rput(26,3){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=dblue](-1,0)(1,0)%
        }
      \rput(33.5,3){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=dblue](-1,0)(1,0)%
        }
      \rput(41,3){%
        \psline[linewidth=5pt,fillstyle=solid,linecolor=dblue](-1,0)(1,0)%
        }
      \rput[r](15,16){\darkgray$\Pi$~}
      \rput[l](47,16){%
        \shortstack{\footnotesize partitioned vectors}}
      \rput(59,12){\darkgray
        \shortstack{partitioned code\\(machine-level partitioning)}}
    \end{third}%
    \begin{fourth}
      % fusion
      \rput[r](15,10){\footnotesize Fusion~~}
      \psellipse[linewidth=1pt,linecolor=darkgray](18.5,13)(4,6)
      \psellipse[linewidth=1pt,linecolor=darkgray](26,13)(4,6)
      \psellipse[linewidth=1pt,linecolor=darkgray](33.5,13)(4,6)
      \psellipse[linewidth=1pt,linecolor=darkgray](41,13)(4,6)
      \rput(59,8.5){\darkgray
        deep computations}
    \end{fourth}
    \end{pspicture}  
  \end{center}
\end{slide}

\begin{slide}
  \heading{Flattening: The Story So Far}

  {\headcol Restrictions:}
  %
  \begin{slitemize}
  \item Types: only basic types, tuples, and parallel arrays (no recursive
    types and no sum types)
  \item Currying problematic
  \item Polymorphism removed during compilation
  \item Separate compilation of modules problematic
  \item Only specialised languages
  \end{slitemize}

  \begin{second}
    {\headcol Our New and Extended Formulation:}
    %
    \begin{slitemize}
    \item Removes all of the above restrictions
    \item Partially based on generic programming [Hinze 2000]: more
      systematic; eases separate compilation
    \item Can handle Haskell- or ML-based NDP
    \end{slitemize}
  \end{second}
\end{slide}

\begin{slide}
  \heading{Lambda Calculus with Parallel Arrays: \lambdaPA}
  
  {\headcol Types:}
  %
  \[
  \renewcommand{\arraystretch}{1}
  \begin{array}{@{}lcl}
    \nt{T} & \rightarrow & () \mid \Bool \mid \Int \mid \nt{V} \\
           & |           & \nt{T}_1 \times \nt{T}_2 \mid \nt{T}_1 + \nt{T}_2
                           \mid \nt{T}_1 \to \nt{T}_2 
                           \mid \trec{\nt{V}}{\nt{T}}\\
           & |           & {\emcol \plist{\nt{T}}}
  \end{array}
  \]

  \begin{second}
    {\headcol Expressions:}
    %
    \[
    \renewcommand{\arraystretch}{1}
    \begin{array}{@{}lcl}
      \nt{E} & \rightarrow & \nt{C} \mid \nt{V}
                             \mid \tabst{\nt{V}}{\nt{T}}{\nt{E}}
                             \mid \appl{\nt{E}_1}{\nt{E}_2}
                             \mid \tvrec{\nt{V}}{\nt{T}}{\nt{E}}  
    \end{array}
  \]
  \end{second}
  \newpage

  \begin{haskell}
    \hsnoalign{\hskip-.5\leftmargini{}\textsc{\headcol Products}}
    \pair\cdot\cdot\tyidx{\alpha,\beta} 
    &:: \alpha\to\beta\to\alpha\times\beta
    \\
    \pfst\tyidx{\alpha,\beta} 
    &:: \alpha\times\beta\to\alpha
    \\
    \psnd\tyidx{\alpha,\beta} 
    &:: \alpha\times\beta\to\beta
    \\
    \hsnoalign{\hskip-.5\leftmargini{}\textsc{\headcol Sums\rule{0pt}{2em}}}
    \sleft\tyidx{\alpha,\beta} 
    &:: \alpha\to\alpha+\beta
    \\
    \sright\tyidx{\alpha,\beta} 
    &:: \beta\to\alpha+\beta
    \\
    \scase\tyidx{\alpha,\beta,\gamma} 
    &::
    (\alpha\to\gamma)\times(\beta\to\gamma)\times(\alpha+\beta)\to\gamma
    \\
    \hsnoalign{\hskip-.5\leftmargini{}\textsc{\headcol Parallel Arrays%
      \rule{0pt}{2em}}}
    \tdist{\alpha}
    &:: {\alpha\times\Int\to\plist\alpha}
    \hskip3.4em\hscom{replication}
    \\
    \tlen{\alpha}
    &:: \plist\alpha\to\Int
    \hskip5.3em\hscom{length}
    \\
    (\tjoin{\alpha})
    &:: \plist\alpha\times\plist\alpha\to\plist\alpha
    \hskip1.95em\hscom{concatenation}
    \\
    (\tidx{\alpha})
    &:: \plist\alpha\times\Int\to\alpha
    \hskip3.45em\hscom{indexing}
    \\
    \mapP\tyidx{\alpha,\beta} 
    &:: (\alpha\to\beta)\times\plist\alpha\to\plist\beta
    \hscom{dp application}
  \end{haskell}
\end{slide}

\begin{slide}
  \heading{The Big Picture}
  %
  {\headcol Two type transformations and two value transformations:}
  %
  \begin{slumerate}
  \item {\emcol Function Vectorisation:} Lifts computations into vector space
    %
    \begin{itemize}
    \item For every function, generates a vectorised variant operating on
      arrays of input values in parallel
    \item Type transformation \<\repr\cdot\>; value transformation
      \<\vect\cdot\>
    \end{itemize}
  \item \snd{{\emcol Structure Vectorisation:} Represents nested structures by
      flat arrays}
      %
    \begin{second}
      \begin{itemize}
      \item Arrays of nested structures are represented by nested structures
        of arrays
      \item Type-indexed type \<\plist\alpha\> and primitives like
        \<\tdist{\alpha}\> and \<(\tjoin{\alpha})\> are specialised
      \end{itemize}
    \end{second}
  \end{slumerate}
  %
  \vspace*{-0.8\bigskipamount}
  \begin{third}
    \begin{center}
      \(
      \begin{CD}
        \tau             @>f>>               \sigma\\
        @V{\repr\cdot}VV                     @VV{\repr\cdot}V\\
        \repr\tau        @>>\pfst (\vect{f})> \repr\sigma
      \end{CD}
      \)
    \end{center}
  \end{third}
\end{slide}

\begin{slide}
  \heading{Vectorisation}

  {\headcol Type Transformation \<\repr\cdot\>:}
  %
  \begin{slitemize}
  \item Each abstraction is replaced by a pair of abstractions:
    %
    \begin{itemize}
    \item The original one and
    \item its {\emcol lifted} variant, i.e., its data parallel extension
    \end{itemize}
  \end{slitemize}

  \begin{haskell}
    \repr{\tau_1\to\tau_2}  &&= (\repr{\tau_1}\to\repr{\tau_2}) \times
                                (\plist{\tau_1}\to\plist{\tau_2})
  \end{haskell}%
  %
  \begin{second}
    {\headcol For example,}
    %
    \begin{slitemize}
    \item The lifted version of addition, \<+\up\>, is the elementwise
      addition of two arrays
    \item The lifted version of vector summation, \<sumP\up\>, is the
      {\emcol simultaneous} summation of all subarrays in an array of arrays
    \item The transformation extends this to user-defined functions
    \end{slitemize}
  \end{second}
\end{slide}

\begin{slide}
  {\headcol Expression Transformation:}

  Given the source expressions \<\plist{x+y\mid{x\from{e}}}\>, we have in
  \lambdaPA,
  %
  \begin{haskell}
    \hskwd{let} f=\tabst{x}\Int{x+y} \hskwd{in}
    \mapP\pair{f}{e}
  \end{haskell}%
  %
  \begin{second}
    {\headcol Functions have to be replaced by pairs of functions:}
    %
    \begin{haskell}
      (+) = \pair{(+')}{(+\up)} 
      :: \pair{\Int\times\Int\to\Int}{\plist\Int\times\plist\Int\to\plist\Int}
    \end{haskell}%
  \end{second}%
  %
  \begin{third}
    {\headcol Thus, we get}
    %
    \begin{haskell}
      \hslet{%
        f=\langle\tabst{x}\Int{\pfst (+) \pair{x}{y}},\\
        \phantom{f={}\langle}\tabst{x}{\plist\Int}{\psnd (+) 
          \pair{x}{\dist'\pair{\len' x}{y}}}\rangle}{
        \fouerase{\mapP\pair{f}{e}}~\fou{(\psnd f) e}}
    \end{haskell}%
  \end{third}%
  %
  \begin{fifth}
    {\headcol We can reduce this to}\quad 
    \<e\mathbin{+\up}\dist'\pair{\len' e}{y}\>
  \end{fifth}
\end{slide}

\begin{slide}
  \heading[Structure Vectorisation:]{Separation of Structure and Data}

  \begin{slitemize}
  \item Data parallel operations are most efficient over flat arrays of
    primitive data types (ints, floats, etc.)
  \item However, we can have complex user-defined algebraic types as elements
    of parallel arrays
  \end{slitemize}

  \begin{second}
    $\Rightarrow$\quad \emcol Separate structure from primitive data items
  \end{second}
\end{slide}

\begin{slide}
\let\toprulecol=\magg
\let\botrulecol=\emcol
  {\headcol Nested parallel arrays:}
  %
  \begin{center}
  \begin{pspicture}(70,10)
  %\psaxes[Dx=5,Dy=2](70,10)
    \rput(30,0){\<\plist{{\dgreen 5},2,{\magg 0},1}\>%
      \(\rlap{\text{\hspace*{3.1em}\small--- segment descriptor}}\)}
    \rput(30,8){\psline[linewidth=1.5pt]{->}(0,0)(0,-3)}
    \rput(30,3){\<\plist{{\dgreen 1,2,3,4,5},6,7{\magg,}10}\>%
      \(\rlap{\text{\quad\small--- data items}}\)}
    \rput(30,10){%
      \<\plist{{\dgreen \plist{1,2,3,4,5}},\plist{6,7},{\magg \plist{}},\plist{8}}\>}
  \end{pspicture}
  \end{center}

  \bigskip\bigskip
  {\headcol Tuples in parallel arrays:}
  %
  \begin{center}
  \begin{pspicture}(70,10)
  %\psaxes[Dx=5,Dy=2](70,10)
%    \rput(30,0){\<\plist{{True, True, False, True}}\>}
    \rput(30,8){\psline[linewidth=1.5pt]{->}(0,0)(0,-3)}
    \rput(30,-1){%
      \(
      \begin{array}{@{}l@{}l@{}l@{}l@{}l@{}l@{\quad}l}
        [\mkern-.8mu|&10, & -1, & -5, & 3&|\mkern-.8mu]
        &\rlap{\text{\small{}--- first component}}\\{}
        [\mkern-.8mu|&\mathit{True}, & \mathit{True}, & \mathit{False}, &\mathit{True}&|\mkern-.8mu]
        &\rlap{\text{\small{}--- second component}}\\{}
      \end{array}
      \)}
%    \rput(30,3){%
%      \<\plist{{\substboxr{True}{10},\substboxr{True}{-1},\substboxr{False}{5},\substboxr{True}{3}}}\>}
    \rput(30,10){%
      \<\plist{({10}, {True}), ({-1}, {True}), 
        ({5}, {False}), ({3}, {True})}\>}
  \end{pspicture}
  \end{center}
\end{slide}

\begin{slide}
  {\headcol How about sum types?}
  %
  \begin{haskell}
    \plist{[\alpha]} = \plist{\trec{list}{{\dgreen()}+(\alpha\times{list})}}
  \end{haskell}

  {\headcol E.g.,}
  %
  \begin{align*}
    &\plist{{\dgreen[\mkern3mu]}, [1, 2], {\dgreen[\mkern3mu]}, {\dgreen[\mkern3mu]}, [3]}\\
    &\quad\Longrightarrow~
    \pair{\underbrace{\plist{{\dgreen\false}, \true, {\dgreen\false}, {\dgreen\false}, \true}}_{\langle\text{selector}\rangle}}{
      \pair{{\dgreen\plist{[\mkern3mu], [\mkern3mu], [\mkern3mu]}}}{\plist{[1, 2], [3]}}}
  \end{align*}

  \begin{second}
    {\headcol Overall, we get}
    %
    \begin{haskell}
      \trec{list}{(\underbrace{(\Int\times\BoolArr)}_{\langle\text{selector}\rangle}\times(\Int\times(\plist\alpha\times{list})))}    
    \end{haskell}
  \end{second}
\end{slide}

\begin{slide}
  {\headcol Rose trees in hierarchical $n$-body codes}

\begin{figure}
\begin{center}
\psset{yunit=1.6ex,
       xunit=2.1ex,
       linewidth=.6pt}     
\begin{pspicture}(30,20)

  \rput(20,15){\pscircle[fillcolor=black,fillstyle=solid]{0.6}}

\rput(2,5){\pscircle[fillcolor=black,fillstyle=solid]{0.4}}
\rput(4,3){\pscircle[fillcolor=black,fillstyle=solid]{0.55}}
\rput(5.5,9){\pscircle[fillcolor=black,fillstyle=solid]{0.3}}
\rput(5,5){\pscircle[fillcolor=black,fillstyle=solid]{0.35}}

\begin{second}
\rput(2.2,5.2){\psbezier[linecolor=fggray]{->}(0,0)(1,1)(9,6)(17.6,9.8)}
\rput(4.2,3.2){\psbezier[linecolor=fggray]{->}(0,0)(1,1)(7,7)(15.4,11.4)}
\rput(5.7,9.2){\psbezier[linecolor=fggray]{->}(0,0)(1,1)(7,4)(14.1,5.9)}
\rput(5.2,5.2){\psbezier[linecolor=fggray]{->}(0,0)(1,1)(7,6)(14.4,9.6)}

\end{second}
\end{pspicture}
\end{center}
\end{figure}
\end{slide}


\begin{slide}
  {\headcol Rose trees in hierarchical $n$-body codes}

\begin{figure}
\begin{center}
\psset{yunit=1.6ex,
       xunit=2.1ex,
       linewidth=.6pt}     
\begin{pspicture}(30,20)

  \rput(20,15){\pscircle[fillcolor=black,fillstyle=solid]{0.6}}

\rput(2,5){\pscircle[fillcolor=black,fillstyle=solid]{0.4}}
\rput(4,3){\pscircle[fillcolor=black,fillstyle=solid]{0.55}}
\rput(5.5,9){\pscircle[fillcolor=black,fillstyle=solid]{0.3}}
\rput(5,5){\pscircle[fillcolor=black,fillstyle=solid]{0.35}}

\rput(4.1,4.5){\pscircle[linecolor=red,fillcolor=red,fillstyle=solid]{0.7}}

\rput(4.5,5){\psbezier[linecolor=lred,linewidth=1.1pt]{<->}(0,0)(1,2)(6,9)(14.7,9.9)}
\rput(2,5){\psline{->}(1.6,-0.5)}
\rput(4,3){\psline{->}(0.1,1.)}
\rput(5.5,9){\psline{->}(-1.3,-4.1)}
\rput(5,5){\psline{->}(-0.6,-0.3)}

\end{pspicture}
\end{center}
\end{figure}
\end{slide}

\begin{slide}
\begin{figure}
\begin{center}
\psset{yunit=1.6ex,
       xunit=2.1ex,
       linewidth=.6pt}     
\begin{pspicture}(0,5)(30,17)
%\psaxes(20,8)

\rput(2,9){\psframe(8,8)}

\begin{second}
\rput(6,9){\psline[linecolor=fggray](0,8)}
\rput(2,13){\psline[linecolor=fggray](8,0)}
\end{second}
\begin{third}
\rput(-.2,0){
\rput(4,9){\psline[linecolor=fggray](0,8)}
\rput(8,9){\psline[linecolor=fggray](0,4)}
%
\rput(2,11){\psline[linecolor=fggray](8,0)}  

\rput(2,15){\psline[linecolor=fggray](4,0)}}
%
%
%
\end{third}
\begin{fourth}
\rput(-.4,0){
\rput(2,10){\psline[linecolor=fggray](2,0)}
\rput(3,9){\psline[linecolor=fggray](0,2)}
%

\rput(3,9){\psline[linecolor=fggray](0,2)}
%
\rput(4,12){\psline[linecolor=fggray](2,0)}
\rput(5,11){\psline[linecolor=fggray](0,2)}
}
\end{fourth}


{\footnotesize
\rput(5.5,12.5){$\llap{$p_6$}$}
\rput(4.5,12.5){$\llap{$p_7$}$}
\rput(7,12){$\llap{$p_4$}$}
\rput(9,10){$\llap{$p_5$}$}
\rput(5,14){$\llap{$p_2$}$}
\rput(5,16){$\llap{$p_3$}$}
\rput(3.5,10.5){$\llap{$p_8$}$}
\rput(3.5,9.5){$\llap{$p_9$}$}
\rput(8,15){$\llap{$p_1$}$}
}
%
% Tree
%
\begin{fifth}
\rput(13,11.8){\blue{$p_6$}}
\rput(14.4,11.8){\blue{$p_7$}}
\rput(15.5,11.8){\blue{$p_8$}}
\rput(16.8,11.8){\blue{$p_9$}}
%
\rput(14,13.5){{$c_4$}}
\rput(15.6,13.5){{$c_5$}}
\rput(16.5,13.5){\blue{$p_2$}}
\rput(17.8,13.5){\blue{$p_3$}}
\rput(18.9,13.5){\blue{$p_4$}}
\rput(20.2,13.5){\blue{$p_5$}}
%
\rput(15,15.2){{$c_1$}}
\rput(17,15.2){{$c_2$}}
\rput(19.4,15.2){{$c_3$}}
\rput(20.9,15.2){\blue{$p_1$}}
%
\rput(18,17){$c_0$}
%
\rput(15.4,15.6){\psline(2.2,1.1)}
\rput(20.6,15.6){\psline(-2.2,1.1)}
\rput(17.7,16.4){\psline(-0.7,-0.7)}
\rput(18.5,16.4){\psline(0.7,-0.7)}
%
\rput(14.5,14.8){\psline(-0.4,-0.8)}
\rput(16.8,14.8){\psline(-0.4,-0.8)}
\rput(19.2,14.8){\psline(-0.4,-0.8)}
%
\rput(15.2,14.8){\psline(0.4,-0.8)}
\rput(17.2,14.8){\psline(0.4,-0.8)}
\rput(19.6,14.8){\psline(0.4,-0.8)}
%
\rput(14,13.0){\psline(0.2,-0.7)}
\rput(15.7,13.0){\psline(-0.2,-0.7)}
%
\rput(13.7,13.1){\psline(-.8,-0.8)}
\rput(15.9,13.1){\psline(0.8,-0.8)}
%
%\rput(14,5){
%\begin{small}
%\(\left\{
%\begin{array}{@{}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{\hskip3pt}c@{}}
%c_0, & c_1, & c_4, & c_5, & p_6, & p_7, & p_8, & p_9, & c_2, & p_2,  & p_3, &
%c_3,  & p_4, &
%p_5, & p_1\\ 
%\{2,2,2,0\} &
%\{2,2\}&
%\{0,0\}&
%\{0,0\}&
%\{\}&
%\{\}&
%\{\}&
%\{\}&
%\{0,0\}&
%\{\}&
%\{\}&
%\{0,0\}&
%\{\}&
%\{\}&
%\{\}
%\end{array}
%\right\}\)
%\end{small}
%}
\end{fifth}
\end{pspicture}
\begin{sixth}\small
%\psset{yunit=1.6ex,
%       xunit=2.5ex,
%       linewidth=.6pt}     
\psset{unit=.7ex,                        % scale with font
       linewidth=.6pt}                  % lines shouldn't be too heavy
\begin{pspicture}(0,25)(60,0)
\rput(20,21){\((c_0, (\plist{c_1, c_2, c_3, p_1}, (\plist{2,2,2,0},\;\;))) \)}
\rput(25,14){\((\plist{c_4, c_5, p_2, p_3, p_4, p_5}, (\plist{2,2,0,0,0,0},\;\;))\) }
\rput(25,7){\((\plist{p_6, p_7, p_8, p_9}, (\plist{0,0,0,0},\;\;))\)}
\rput(25,0){\((\plist{},(\plist{}, \huge{\times}))\)}
%\rput(36.5,21){\pscircle{0.6}}
%\rput(43,14){\pscircle{0.6}}
%\rput(38,7){\pscircle{0.6}}

\rput(0,0){\pscurve{o->}(36.5,21)(35,19)(32,17.5)(-1,16)(0,14)}
\rput(0,0){\pscurve{o->}(45,14)(44,12)(34,9.5)(5,9)(6,7)}
\rput(0,0){\pscurve{o->}(40,7)(39,5)(32,2.5)(14,2)(18,0)}
\end{pspicture}  
\end{sixth}
\end{center}
\end{figure}
\end{slide}

\begin{slide}
  \heading{Specialisation as an Exercise in Generic Programming}

  {\headcol Problems with an ad-hoc transformation:}
  %
  \begin{slitemize}
  \item Recursive types: Termination of transformation and generated code
  \item Seperate compilation in the presence of polymorphism
  \item Complexity
  \end{slitemize}

  \begin{second}
    {\headcol Generic Programming offers a better solution [Hinze 2000]:}
    %
    \begin{slitemize}
    \item Representation of \<\plist\tau\> follows the structure of \<\tau\>
    \item \trd{Regard parallel arrays \<\plist\tau\> as a {\emcol
          type-indexed} type}
    \item \trd{Regard operations (e.g., \<\tdist\tau\>) on them as {\emcol
          type-indexed} values}
    \item \trd{Then, we get the specialisation procedure for free}
    \end{slitemize}
  \end{second}
\end{slide}

\begin{slide}
  \vspace*{-1.5\bigskipamount}
  {\headcol Definition of \<\plist\tau\> as a type-indexed type:}
  %
  \vspace*{-0.5\bigskipamount}
  \begin{haskell}
    \plist{()}                     &&= \Int\\
    \plist{\Bool}                  &&= \Int\times\BoolArr\\
    \plist{\Int}                   &&= \Int\times\IntArr\\
    \plist{\tau_1\times\tau_2}     &&= \plist{\tau_1}\times\plist{\tau_2}\\
    \plist{\tau_1+\tau_2}          &&= 
      \BoolArr\times\plist{\tau_1}\times\plist{\tau_2}\\
    \plist{\tau_1\to\tau_2}        &&= 
      \plist{\tau_1}\to\plist{\tau_2}\\
    \plist{\plist\tau}             &&= \plist{\Int}\times\plist\tau
  \end{haskell}
  %
  \vspace*{-2\bigskipamount}\par
  {\headcol Partial definition of \<\tdist\tau\> as a type-indexed value:}
  %
  \vspace*{-0.5\bigskipamount}
  \begin{haskell}
    \tdistS{\alpha}               
     &&:: \repr{\alpha}\times\Int\to\repr{\plist\alpha}\\
    \tdistS{()}                &\pair{n}{x} &= n\\
    \tdistS{\Int}              &\pair{n}{x} &= \pair{n}{\intdistS \pair{n}{x}}\\
    \tdistS{\alpha+\beta} &\pair{n}{x} &= \ldots\\ 
    \tdistS{\alpha\times\beta} &\pair{n}{x} &= 
      \pair{\tdistS\alpha \pair{n}{\pfst x}}{
        \tdistS\beta \pair{n}{\psnd x}}\\    
    \tdistS{\alpha\to\beta}    &\pair{n}{x} &= \psnd x\\
    \tdistS{\plist\alpha} &\pair{n}{x} &= \ldots
  \end{haskell}%
\end{slide}

\begin{slide}
\let\toprulecol=\emcol
\let\botrulecol=\headcol
  \heading{Preliminary Benchmarks}{\dgreen\rule{.1pt}{.1pt}}
  %
    \savedata{\optimdata}[{{1, 5.62}, {2, 2.979389}, {3, 2.101845}, {4, 1.580166},
    {5, 1.294903}, {6, 1.079464}, {7, 0.941284}, {8, 0.819273}, {9, 0.740404},
    {10, 0.671757}, {11, 0.612657}, {12, 0.566856}, {13, 0.525612}, {14,
      0.495851}, {15, 0.467015}, {16, 0.433649}, {17, 0.415749}, {18, 0.393696},
    {19, 0.375319}, {20, 0.361736}}]
  \savedata{\bkdata}[{2, 7.62},
  {3, 5.339078}, 
  {4, 3.987710}, 
  {5, 3.235798}, 
  {6, 2.714308}, 
  {8, 2.034804}, 
  {9, 1.844242}, 
  {10, 1.663300}, 
  {12, 1.400130}, 
  {14,  1.214645}, 
  {15, 1.137211}, 
  {16, 1.053647}, 
  {17, 1.011222}, 
  {18, 0.960002},
  {19, 0.911698}, 
  {20,0.869761}}] 
  \savedata{\absspeed}[
  {2, 0.56430446194225714},
  {3, 0.80538250237213238}, 
  {4, 1.0783131170521427}, 
  {5, 1.3288839414574085} 
  {6, 1.5841975192203686}, 
  {8, 2.1132256472859305}, 
  {9, 2.3315812133114853}, 
  {10, 2.5852221487404554}, 
  {12, 3.0711433938277155}, 
  {14, 3.5401290088873703}, 
  {15, 3.7811804493625192}, 
  {16, 4.0810632023818219}, 
  {17, 4.2522809036986926}, 
  {18, 4.4791573350888854},
  {19, 4.7164740955886701}
  {20, 4.9438868838680969}
  }] 
  \savedata{\scaleele}[{{1, 1.0},
  {2, 1.6345633282528735},
  {3, 2.3170119585411864},
  {4, 3.0819546807107607},
  {5, 3.7608994650564562},
  {6, 4.5114982991558774},
  {7, 5.1737838951899748},
  {8, 5.9442945147710224},
  {9, 6.5774901270117399},
  {10, 7.2496453330594246},
  {11, 7.948982872961543},
  {12, 8.5912471597725002},
  {13, 9.265389679078865},
  {14, 9.8214987970176537},
  {15, 10.42793058038821},
  {16, 11.230280710897523},
  {17, 11.713798469749777},
  {18, 12.369950418597091},
  {19, 12.975628731825461},
  {20, 13.462856890107703}}]
  \savedata{\scaletwn}[{{1, 1.0},
  {2, 1.8543307086614174},
  {3, 2.6465243624461006},
  {4, 3.5433870567318086},
  {5, 4.3667744401844617},
  {6, 5.2057467317636767},
  {8, 6.9441577665465575},
  {9, 7.6616843125793697},
  {10, 8.4951602236517765},
  {12, 10.091920035996658},
  {14, 11.633028580367103},
  {15, 12.425134825463349},
  {16, 13.410563499919803},
  {17, 13.973192830061056},
  {18, 14.718719336001383},
  {19, 15.49855324899254},
  {20, 16.245842248617723}}]
  \savedata{\blub}[{{0, 0}, {1, 0}}]
  %
  \begin{center}\small
  \psset{yunit=3.2ex,
         xunit=1.9ex,
         linewidth=.6pt}                  % lines shouldn't be too heavy
  \begin{pspicture}(21,-1)(80,6)
%  \rput(0,7.2){\textrm{t[sec]}}
%  \rput(24.5,7.2){\textrm{Speedup}}
%  \rput(23.5,-.8){\textrm{PE}}
%  \rput(2,0.125){\psaxes[Dx=2,Dy=1.0]{->}(22,7)}
%  \rput(2,0.125){\dataplot[plotstyle=curve,showpoints=true,dotstyle=square]{\optimdata}}
%  \rput(2,0.125){\dataplot[plotstyle=curve,showpoints=true,dotstyle=triangle]{\bkdata}}
  %
  \psset{yunit=1.0ex,
         xunit=1.73ex,
         linewidth=.6pt}                  % lines shouldn't be too heavy
  \rput(30,0){\psaxes[Dx=5,Dy=5]{->}(22,22)}
  \rput(30,0){\dataplot[linecolor=dblue,plotstyle=curve,showpoints=true,dotstyle=square]{\scaleele}}
  \rput(30,0){\dataplot[linecolor=dorange,plotstyle=curve,showpoints=true,dotstyle=triangle]{\scaletwn}}
  \rput(30,0){\dataplot[linecolor=dgreen,plotstyle=curve,showpoints=true]{\absspeed}}
  \rput(30,0){\psplot[plotpoints=20,linestyle=dashed]{0}{20}{x}}
  %
  \rput[l](34,12.8){\textrm{\emcol relative (20,000)}}
  \rput(31,12.8){\dataplot[linecolor=dorange,plotstyle=curve,showpoints=true,dotstyle=triangle]{\blub}}
  \rput[l](34,15){\textrm{\headcol relative (9,000)}}
  \rput(31,15){\dataplot[linecolor=dblue,plotstyle=curve,showpoints=true,dotstyle=square]{\blub}}
  %
  \rput[l](34,17.2){\textrm{optimal}}
  \rput(31,17.2){\psline[linestyle=dashed](1.5,0)}
  \rput[l](34,19.4){\textrm{\dgreen absolute (20,000)}}
  \rput(31,19.4){\dataplot[linecolor=dgreen,plotstyle=curve,showpoints=true]{\blub}}
  \rput(52,-2){[PE]}
  %
  \end{pspicture}
  \end{center}
  %
  \bigskip\bigskip
  \begin{slitemize}
  \item Barnes-Hut hierarchical $n$-body algorithm on a Cray T3E
  \item No full compiler yet; manual transformation
  \item Absolute speedup is relative to a sequential C program
  \end{slitemize}
\end{slide}

\begin{slide}
  \heading{Conclusions}
  %
  \begin{slitemize}
  \item Extension of existing languages, instead of specialised languages
  \item Presentation of flattening in the conventional setting of
    lambda calculus (currying)
  \item Transformation of data structures by generic programming
    %
    \begin{itemize}
    \item Recursive types \& sum types
    \item Seperate compilation
    \item Systematic \& well-founded
    \end{itemize}
  \end{slitemize}

  \bigskip\bigskip
  \begin{center}
    Remaining problem: functionals in parallel arrays
  \end{center}
\end{slide}

\begin{slide}
  \heading[Functionals in Parallel Arrays:]{Control Parallelism in Disguise}

  \begin{haskell}
    &\quad\plist{f\hsap a \mid f\hsfrom\plist{foo, bar}}\\
    =\\
    &\quad{}mapP (\lambda f. f a) \plist{foo, bar}
  \end{haskell}
  %
  implies that \<foo a\> and \<bar a\> are evaluated in parallel!
\end{slide}

\end{document}
