OPAL (Object Oriented Parallel Accelerator Library)  2.2.0
OPAL
VRB.cpp
Go to the documentation of this file.
1 // -*- C++ -*-
2 /***************************************************************************
3  *
4  * The IPPL Framework
5  *
6  * This program was prepared by PSI.
7  * All rights in the program are reserved by PSI.
8  * Neither PSI nor the author(s)
9  * makes any warranty, express or implied, or assumes any liability or
10  * responsibility for the use of this software
11  *
12  * Visit www.amas.web.psi for more details
13  *
14  ***************************************************************************/
15 
16 // -*- C++ -*-
17 /***************************************************************************
18  *
19  * The IPPL Framework
20  *
21  *
22  * Visit http://people.web.psi.ch/adelmann/ for more details
23  *
24  ***************************************************************************/
25 
26 // include files
27 #include "VRB.h"
28 #include "Utility/PAssert.h"
29 
30 #ifdef __VRB_DIAGNOSTIC__
31 #include <cstdio>
32 #include <cstdlib>
33 #endif
34 
35 //
36 // A local function we use for the recursive bisection.
37 //
38 
39 static void
40 recurseCoordinateVRB(int dim,
41  const int *strides, const int* sizes, const int *offsets,
42  int nprocs, int firstProc, int *procs );
43 
45 
46 //
47 // vnodeRecurseiveBisection
48 //
49 // This is the user visible function for decomposing along coordinate
50 // directions.
51 //
52 // This function just sets up the arguments for the recursive
53 // algorithm. That algorithm works by subdividing blocks of
54 // vnodes and blocks of processors.
55 //
56 // The blocks are identified with the offsets of the lower left
57 // corner and the sizes of the block in each dimension.
58 //
59 // The processors are identified by a continuous range from
60 // a firstProc and a number of procs nprocs.
61 //
62 // In addition to the input arguments, it calculates for the recursion:
63 //
64 // strides: The strides to go from vnode to its neighbors in N dimensions.
65 // offsets: The lower left corner of the block of vnodes.
66 //
67 
68 void
69 vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
70 {
71  int i;
72 
73  // Initialize the lower left corner of the vnodes to the origin.
74  int *offsets = new int[dim];
75  for (i=0; i<dim; ++i)
76  offsets[i] = 0;
77 
78  // Initialize the strides with C semantics (last index varies fastest).
79  int *strides = new int[dim];
80  strides[dim-1] = 1;
81  for (i=dim-2; i>=0; --i)
82  strides[i] = strides[i+1]*sizes[i+1];
83 
84  // Dive into the recursion.
85  recurseCoordinateVRB(dim,strides,sizes,offsets,nprocs,0,procs);
86 
87  // Clean up after ourselves.
88  delete [] offsets;
89  delete [] strides;
90 }
91 
93 
94 //
95 // assign
96 //
97 // Given a description of a block of vnodes, fill the proc array
98 // with a processor number x.
99 //
100 // The block of vnodes is described by:
101 // dim: The dimension of space.
102 // strides: The strides to find neighbors of a vnode.
103 // offsets: The lower left corner of the block of vnodes
104 // sizes : the size of the block in each dimension.
105 //
106 
107 static void
108 assign(int dim, const int *strides, const int *sizes, const int *offsets,
109  int x, int *procs)
110 {
111  // Make sure the input is sensible.
112  PAssert_GT(dim, 0);
113  PAssert(sizes);
114  PAssert(offsets);
115  PAssert(procs);
116 
117  int i;
118 
119 #ifdef __VRB__DIAGNOSTIC__
120  printf("---------- assign ----------\n");
121  printf("dim=%d, sizes=",dim);
122  for (i=0; i<dim; ++i)
123  printf(" %d",sizes[i]);
124  printf(", offsets=");
125  for (i=0; i<dim; ++i)
126  printf(" %d",offsets[i]);
127  printf(", strides=");
128  for (i=0; i<dim; ++i)
129  printf(" %d",strides[i]);
130  printf(", procs=%lx\n",procs);
131  printf("----------------------------\n");
132 #endif
133 
134  // Termination condition: one dimension.
135  if ( dim==1 )
136  // Fill the one row with x.
137  for (i=0; i<sizes[0]; ++i)
138  procs[offsets[0]+i] = x;
139 
140  // Otherwise, loop over the outermost dimension and recurse.
141  else
142  {
143  // For each slab, fill it.
144  for (i=0; i<sizes[0]; ++i)
145  assign(dim-1,strides+1,sizes+1,offsets+1,x,procs+(offsets[0]+i)*strides[0]);
146  }
147 }
148 
150 
151 //
152 // RecurseCoordinateVRB
153 //
154 // Perform the recursion, finding the procs for each vnode.
155 //
156 // Inputs:
157 // dim : The dimension of the breakdown.
158 // strides : The strides between the slabs of the proc array.
159 // sizes : The size of the block of vnodes.
160 // offsets : The lower left corner of the block of vnodes.
161 // nprocs : The number of processors to put these vnodes on.
162 // firstProc : The first proc we are putting these vnodes on.
163 //
164 // Output:
165 // procs: Array to store the processor for each vnode.
166 //
167 
168 static void
169 recurseCoordinateVRB(int dim,
170  const int* strides, const int* sizes, const int *offsets,
171  int nprocs, int firstProc, int *procs )
172 {
173  // Make sure the input is sensible.
174  PAssert_GT(dim, 0);
175  PAssert(sizes);
176  PAssert_GT(nprocs, 0);
177  PAssert(procs);
178  PAssert_GE(firstProc, 0);
179  for (int i=0; i<dim; ++i)
180  {
181  PAssert_GT(sizes[i], 0);
182  PAssert_GE(offsets[i], 0);
183  }
184 
185 #ifdef __VRB_DIAGNOSTIC__
186  printf("---------- RecurseCoordinateVRB ----------\n");
187  printf("dim= %d, sizes=",dim);
188  for (i=0; i<dim; ++i)
189  printf(" %d",sizes[i]);
190  printf(", offsets=");
191  for (i=0; i<dim; ++i)
192  printf(" %d",offsets[i]);
193  printf(", nprocs= %d, firstProc= %d\n",nprocs,firstProc);
194  printf("------------------------------------------\n");
195 #endif
196 
197  // If we just have one proc, all the vnodes are on this proc.
198  if ( nprocs == 1 )
199  {
200  // Fill the hypercube defined by sizes,offsets with
201  // the value firstProc.
202  assign(dim,strides,sizes,offsets,firstProc,procs);
203  }
204 
205  // If there is more than one processor left,
206  // recurse by splitting the procs into two groups and
207  // the work into two groups, and allocating work to procs.
208  else
209  {
210  int d;
211 
212  // Calculate the total number of vnodes.
213  int totalVnodes = sizes[0];
214  for (d=1; d<dim; ++d)
215  totalVnodes *= sizes[d];
216  PAssert_GE(totalVnodes, nprocs);
217 
218  // Find the number of processors on each side.
219  int leftProcs = nprocs/2;
220  int rightProcs = nprocs-leftProcs;
221 
222  // Decide which dimension to split.
223  // Just loop over the dimensions and pick the biggest.
224  int splitDim = 0;
225  for (d=1; d<dim; ++d)
226  if ( sizes[d] > sizes[splitDim] )
227  splitDim = d;
228 
229  // Get the number of vnodes in that dimension.
230  int splitSize = sizes[splitDim];
231 
232  // Find where along that dimension to split.
233  // Balance the work between the two procs as well as possible.
234 
235  // The number of vnodes on the left.
236  // Start with none on the left.
237  int leftVnodes = 0;
238 
239  // The degree to which things are out of balance.
240  double outOfBalance = splitSize/(double)rightProcs;
241 
242  // The number of vnodes in a hyperslab perpendicular
243  // to the split direction.
244  int crossSize = 1;
245  for (d=0; d<dim; ++d)
246  if ( d != splitDim )
247  crossSize *= sizes[d];
248 
249  // Consider all possible split locations and pick the best.
250  for (int l=1; l<splitSize; ++l)
251  {
252  // How far out of balance is this?
253  int r = splitSize - l;
254  double b = l/(double)leftProcs - r/(double)rightProcs;
255 
256  // Get the absolute value of the unbalance.
257  if ( b<0 )
258  b=-b;
259 
260  // Compare to the best so far.
261  // If is better balance and we have at least as many
262  // procs on each side of the divide as we have vnodes,
263  // then keep this split.
264  if ( (b < outOfBalance) &&
265  (l*crossSize>=leftProcs) &&
266  (r*crossSize>=rightProcs) )
267  {
268  // It is better, keep it.
269  leftVnodes = l;
270  outOfBalance = b;
271  }
272  }
273 
274  // If we couldn't find a good split, die.
275  PAssert_GT(leftVnodes, 0);
276 
277  // We now know what dimension to split on, and where in
278  // that dimension to split. Recurse.
279 
280  // Make a copy of the sizes array.
281  int *newSizes = new int[dim];
282  for (d=0; d<dim; ++d)
283  newSizes[d] = sizes[d];
284 
285  // Make a copy of the offsets array.
286  int *newOffsets = new int[dim];
287  for (d=0; d<dim; ++d)
288  newOffsets[d] = offsets[d];
289 
290  // Get the sizes for the left.
291  newSizes[splitDim] = leftVnodes;
292 
293  // Recurse for the left.
294  recurseCoordinateVRB(dim,strides,newSizes,newOffsets,leftProcs,firstProc,procs);
295 
296  // Get the sizes and offsets for the right.
297  newSizes[splitDim] = splitSize - leftVnodes;
298  newOffsets[splitDim] += leftVnodes;
299 
300  // Recurse for the right.
301  recurseCoordinateVRB(dim,strides,newSizes,newOffsets,rightProcs,firstProc+leftProcs,procs);
302 
303  // Delete the memory.
304  delete [] newSizes;
305  delete [] newOffsets;
306  }
307 }
308 
310 
311 #ifdef __VRB_DIAGNOSTIC__
312 
313 //
314 // print out a hypercube of proc data.
315 //
316 
317 static void
318 print(int dim, const int *sizes, const int *procs)
319 {
320  if ( dim == 1 )
321  {
322  for ( int i=0; i<*sizes; ++i)
323  printf("%4d",procs[i]);
324  }
325  else
326  {
327  int skip = 1;
328  int i;
329  for (i=1; i<dim; ++i)
330  skip *= sizes[i];
331  for (i=0; i<sizes[0]; ++i)
332  {
333  print(dim-1,sizes+1,procs+skip*i);
334  printf("\n");
335  }
336  printf("\n");
337  }
338 }
339 
341 
342 int
343 main(int argc, char *argv[])
344 {
345  // The number of dimensions is the number of args to the program.
346  int dim = argc-2;
347  PAssert_GT(dim, 0);
348 
349  // Get the number of procs.
350  int nprocs = atoi(argv[1]);
351  PAssert_GT(nprocs, 0);
352 
353  // Get the size of each dimension.
354  int *sizes = new int[dim];
355  int totalVnodes = 1;
356  for ( int d = 0; d<dim; ++d )
357  {
358  sizes[d] = atoi(argv[d+2]);
359  totalVnodes *= sizes[d];
360  }
361  int *procs = new int[totalVnodes];
362 
363  // Do it.
364  vnodeRecursiveBisection(dim,sizes,nprocs,procs);
365 
366  // Print it out.
367  print(dim,sizes,procs);
368 
369  return 0;
370 }
371 
372 #endif
373 
374 /***************************************************************************
375  * $RCSfile: VRB.cpp,v $ $Author: adelmann $
376  * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:27 $
377  * IPPL_VERSION_ID: $Id: VRB.cpp,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $
378  ***************************************************************************/
void vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
Definition: VRB.cpp:69
void assign(const BareField< T, Dim > &a, RHS b, OP op, ExprTag< true >)
#define PAssert_GE(a, b)
Definition: PAssert.h:124
#define PAssert_GT(a, b)
Definition: PAssert.h:123
int main(int argc, char *argv[])
Definition: Main.cpp:107
#define PAssert(c)
Definition: PAssert.h:117