%%% ====================================================================
%%% @LaTeX-file{
%%% filename = "changn95.ltx",
%%% date = "20 November 1995",
%%% time = "21:41:31 EST",
%%% author = "David M. Jones",
%%% email = "jacm@theory.lcs.mit.edu",
%%% url = "http://theory.lcs.mit.edu/~jacm/",
%%% address = "Journal of the ACM
%%% MIT Laboratory for Computer Science
%%% Room NE43-316
%%% 545 Technology Square
%%% Cambridge, MA 02139
%%% USA",
%%% telephone = "(617) 253-5936",
%%% FAX = "(617) 253-3480",
%%% checksum = "21203 110 491 4001",
%%% codetable = "ISO/ASCII",
%%% supported = "yes",
%%% docstring = "Copyright (c) 1995 by ACM, Inc.
%%% Permission to make digital or hard copies of part
%%% or all of this work for personal or classroom use
%%% is granted without fee provided that copies are
%%% not made or distributed for profit or direct
%%% commercial advantage and that copies bear this
%%% notice and the full citation on the first page.
%%% Copyrights for components of this work owned by
%%% others than ACM must be honored. Abstracting with
%%% credit is permitted. To copy otherwise, to
%%% republish, to post on servers, or to redistribute
%%% to lists, requires prior specific permission
%%% and/or a fee. Request permissions from
%%% Publications Dept, ACM Inc., fax +1 (212)
%%% 869-0481, or permissions@acm.org.
%%%
%%% This is a LaTeX2e file. To process it, you will
%%% need a copy of the acmabs document class, which is
%%% available via the following URL:
%%%
%%% http://theory.lcs.mit.edu/~jacm/acmart/acmabs.cls
%%% ",
%%% }
%%% ====================================================================
\documentclass{acmabs}
\begin{document}
\Journal{Journal of the ACM}
\refkey{ChangN95}
\title{Bounds on the Speedup and Efficiency of Partial Synchronization in
Parallel Processing Systems}
\author{C.~S. Chang \and R. Nelson}
\Pages{204--231}
\Month{January}
\Year{1995}
\Volume{42}
\Number{1}
\maketitle
\begin{abstract}
In this paper, we derive bounds on the speedup and efficiency of applications
that schedule tasks on a set of parallel processors. We assume that the
application runs an algorithm that consists of $N$ iterations and before
starting its $i + 1$st iteration, a processor must wait for data (i.e.,
synchronize) evaluated in the $i$th iteration by a subset of the other
processors of the system. Processing times and interconnections between
iterations are modeled by random variables with possibly deterministic
distributions. Scientific applications consisting of iterations of recursive
equations are examples of applications that can be modeled within this
formulation. We consider the efficiency of such applications and show that,
although efficiency decreases with an increase in the number of processors,
it has a nonzero limit when the number of processors increases to infinity.
We obtain a lower bound for the efficiency by solving an equation that
depends on the distribution of task service times and the expected number of
tasks needed to be synchronized. We also show that the lower bound is
approached if the topology of the processor graph is ``spread-out,'' a notion
we define in the paper.
\end{abstract}
\begin{categories}
C.1.2[parallel processors]; C.4[performance attributes]; G.M[queuing theory]
\end{categories}
\begin{terms}
Measurement, Performance
\end{terms}
\begin{keywords}
Large deviations theory, synchronization
\end{keywords}
\end{document}