OPAL (Object Oriented Parallel Accelerator Library)  2024.1
OPAL
VRB.cpp
Go to the documentation of this file.
1 // -*- C++ -*-
2 /***************************************************************************
3  *
4  * The IPPL Framework
5  *
6  * This program was prepared by PSI.
7  * All rights in the program are reserved by PSI.
8  * Neither PSI nor the author(s)
9  * makes any warranty, express or implied, or assumes any liability or
10  * responsibility for the use of this software
11  *
12  * Visit www.amas.web.psi for more details
13  *
14  ***************************************************************************/
15 
16 // -*- C++ -*-
17 /***************************************************************************
18  *
19  * The IPPL Framework
20  *
21  *
22  * Visit http://people.web.psi.ch/adelmann/ for more details
23  *
24  ***************************************************************************/
25 
26 // include files
27 #include "VRB.h"
28 #include "Utility/PAssert.h"
29 
30 #ifdef __VRB_DIAGNOSTIC__
31 #include <cstdio>
32 #include <cstdlib>
33 #endif
34 
35 //
36 // A local function we use for the recursive bisection.
37 //
38 
39 static void
40 recurseCoordinateVRB(int dim,
41  const int *strides, const int* sizes, const int *offsets,
42  int nprocs, int firstProc, int *procs );
43 
45 
46 //
47 // vnodeRecurseiveBisection
48 //
49 // This is the user visible function for decomposing along coordinate
50 // directions.
51 //
52 // This function just sets up the arguments for the recursive
53 // algorithm. That algorithm works by subdividing blocks of
54 // vnodes and blocks of processors.
55 //
56 // The blocks are identified with the offsets of the lower left
57 // corner and the sizes of the block in each dimension.
58 //
59 // The processors are identified by a continuous range from
60 // a firstProc and a number of procs nprocs.
61 //
62 // In addition to the input arguments, it calculates for the recursion:
63 //
64 // strides: The strides to go from vnode to its neighbors in N dimensions.
65 // offsets: The lower left corner of the block of vnodes.
66 //
67 
68 void
69 vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
70 {
71  int i;
72 
73  // Initialize the lower left corner of the vnodes to the origin.
74  int *offsets = new int[dim];
75  for (i=0; i<dim; ++i)
76  offsets[i] = 0;
77 
78  // Initialize the strides with C semantics (last index varies fastest).
79  int *strides = new int[dim];
80  strides[dim-1] = 1;
81  for (i=dim-2; i>=0; --i)
82  strides[i] = strides[i+1]*sizes[i+1];
83 
84  // Dive into the recursion.
85  recurseCoordinateVRB(dim,strides,sizes,offsets,nprocs,0,procs);
86 
87  // Clean up after ourselves.
88  delete [] offsets;
89  delete [] strides;
90 }
91 
93 
94 //
95 // assign
96 //
97 // Given a description of a block of vnodes, fill the proc array
98 // with a processor number x.
99 //
100 // The block of vnodes is described by:
101 // dim: The dimension of space.
102 // strides: The strides to find neighbors of a vnode.
103 // offsets: The lower left corner of the block of vnodes
104 // sizes : the size of the block in each dimension.
105 //
106 
107 static void
108 assign(int dim, const int *strides, const int *sizes, const int *offsets,
109  int x, int *procs)
110 {
111  // Make sure the input is sensible.
112  PAssert_GT(dim, 0);
113  PAssert(sizes);
114  PAssert(offsets);
115  PAssert(procs);
116 
117  int i;
118 
119 #ifdef __VRB__DIAGNOSTIC__
120  printf("---------- assign ----------\n");
121  printf("dim=%d, sizes=",dim);
122  for (i=0; i<dim; ++i)
123  printf(" %d",sizes[i]);
124  printf(", offsets=");
125  for (i=0; i<dim; ++i)
126  printf(" %d",offsets[i]);
127  printf(", strides=");
128  for (i=0; i<dim; ++i)
129  printf(" %d",strides[i]);
130  printf(", procs=%lx\n",procs);
131  printf("----------------------------\n");
132 #endif
133 
134  // Termination condition: one dimension.
135  if ( dim==1 )
136  // Fill the one row with x.
137  for (i=0; i<sizes[0]; ++i)
138  procs[offsets[0]+i] = x;
139 
140  // Otherwise, loop over the outermost dimension and recurse.
141  else
142  {
143  // For each slab, fill it.
144  for (i=0; i<sizes[0]; ++i)
145  assign(dim-1,strides+1,sizes+1,offsets+1,x,procs+(offsets[0]+i)*strides[0]);
146  }
147 }
148 
150 
151 //
152 // RecurseCoordinateVRB
153 //
154 // Perform the recursion, finding the procs for each vnode.
155 //
156 // Inputs:
157 // dim : The dimension of the breakdown.
158 // strides : The strides between the slabs of the proc array.
159 // sizes : The size of the block of vnodes.
160 // offsets : The lower left corner of the block of vnodes.
161 // nprocs : The number of processors to put these vnodes on.
162 // firstProc : The first proc we are putting these vnodes on.
163 //
164 // Output:
165 // procs: Array to store the processor for each vnode.
166 //
167 
168 static void
169 recurseCoordinateVRB(int dim,
170  const int* strides, const int* sizes, const int *offsets,
171  int nprocs, int firstProc, int *procs )
172 {
173  // Make sure the input is sensible.
174  PAssert_GT(dim, 0);
175  PAssert(sizes);
176  PAssert_GT(nprocs, 0);
177  PAssert(procs);
178  PAssert_GE(firstProc, 0);
179  for (int i=0; i<dim; ++i)
180  {
181  PAssert_GT(sizes[i], 0);
182  PAssert_GE(offsets[i], 0);
183  }
184 
185 #ifdef __VRB_DIAGNOSTIC__
186  printf("---------- RecurseCoordinateVRB ----------\n");
187  printf("dim= %d, sizes=",dim);
188  for (i=0; i<dim; ++i)
189  printf(" %d",sizes[i]);
190  printf(", offsets=");
191  for (i=0; i<dim; ++i)
192  printf(" %d",offsets[i]);
193  printf(", nprocs= %d, firstProc= %d\n",nprocs,firstProc);
194  printf("------------------------------------------\n");
195 #endif
196 
197  // If we just have one proc, all the vnodes are on this proc.
198  if ( nprocs == 1 )
199  {
200  // Fill the hypercube defined by sizes,offsets with
201  // the value firstProc.
202  assign(dim,strides,sizes,offsets,firstProc,procs);
203  }
204 
205  // If there is more than one processor left,
206  // recurse by splitting the procs into two groups and
207  // the work into two groups, and allocating work to procs.
208  else
209  {
210  int d;
211 
212 #ifndef NOPAssert
213  // Calculate the total number of vnodes.
214  int totalVnodes = sizes[0];
215  for (d=1; d<dim; ++d)
216  totalVnodes *= sizes[d];
217  PAssert_GE(totalVnodes, nprocs);
218 #endif
219  // Find the number of processors on each side.
220  int leftProcs = nprocs/2;
221  int rightProcs = nprocs-leftProcs;
222 
223  // Decide which dimension to split.
224  // Just loop over the dimensions and pick the biggest.
225  int splitDim = 0;
226  for (d=1; d<dim; ++d)
227  if ( sizes[d] > sizes[splitDim] )
228  splitDim = d;
229 
230  // Get the number of vnodes in that dimension.
231  int splitSize = sizes[splitDim];
232 
233  // Find where along that dimension to split.
234  // Balance the work between the two procs as well as possible.
235 
236  // The number of vnodes on the left.
237  // Start with none on the left.
238  int leftVnodes = 0;
239 
240  // The degree to which things are out of balance.
241  double outOfBalance = splitSize/(double)rightProcs;
242 
243  // The number of vnodes in a hyperslab perpendicular
244  // to the split direction.
245  int crossSize = 1;
246  for (d=0; d<dim; ++d)
247  if ( d != splitDim )
248  crossSize *= sizes[d];
249 
250  // Consider all possible split locations and pick the best.
251  for (int l=1; l<splitSize; ++l)
252  {
253  // How far out of balance is this?
254  int r = splitSize - l;
255  double b = l/(double)leftProcs - r/(double)rightProcs;
256 
257  // Get the absolute value of the unbalance.
258  if ( b<0 )
259  b=-b;
260 
261  // Compare to the best so far.
262  // If is better balance and we have at least as many
263  // procs on each side of the divide as we have vnodes,
264  // then keep this split.
265  if ( (b < outOfBalance) &&
266  (l*crossSize>=leftProcs) &&
267  (r*crossSize>=rightProcs) )
268  {
269  // It is better, keep it.
270  leftVnodes = l;
271  outOfBalance = b;
272  }
273  }
274 
275  // If we couldn't find a good split, die.
276  PAssert_GT(leftVnodes, 0);
277 
278  // We now know what dimension to split on, and where in
279  // that dimension to split. Recurse.
280 
281  // Make a copy of the sizes array.
282  int *newSizes = new int[dim];
283  for (d=0; d<dim; ++d)
284  newSizes[d] = sizes[d];
285 
286  // Make a copy of the offsets array.
287  int *newOffsets = new int[dim];
288  for (d=0; d<dim; ++d)
289  newOffsets[d] = offsets[d];
290 
291  // Get the sizes for the left.
292  newSizes[splitDim] = leftVnodes;
293 
294  // Recurse for the left.
295  recurseCoordinateVRB(dim,strides,newSizes,newOffsets,leftProcs,firstProc,procs);
296 
297  // Get the sizes and offsets for the right.
298  newSizes[splitDim] = splitSize - leftVnodes;
299  newOffsets[splitDim] += leftVnodes;
300 
301  // Recurse for the right.
302  recurseCoordinateVRB(dim,strides,newSizes,newOffsets,rightProcs,firstProc+leftProcs,procs);
303 
304  // Delete the memory.
305  delete [] newSizes;
306  delete [] newOffsets;
307  }
308 }
309 
311 
312 #ifdef __VRB_DIAGNOSTIC__
313 
314 //
315 // print out a hypercube of proc data.
316 //
317 
318 static void
319 print(int dim, const int *sizes, const int *procs)
320 {
321  if ( dim == 1 )
322  {
323  for ( int i=0; i<*sizes; ++i)
324  printf("%4d",procs[i]);
325  }
326  else
327  {
328  int skip = 1;
329  int i;
330  for (i=1; i<dim; ++i)
331  skip *= sizes[i];
332  for (i=0; i<sizes[0]; ++i)
333  {
334  print(dim-1,sizes+1,procs+skip*i);
335  printf("\n");
336  }
337  printf("\n");
338  }
339 }
340 
342 
343 int
344 main(int argc, char *argv[])
345 {
346  // The number of dimensions is the number of args to the program.
347  int dim = argc-2;
348  PAssert_GT(dim, 0);
349 
350  // Get the number of procs.
351  int nprocs = atoi(argv[1]);
352  PAssert_GT(nprocs, 0);
353 
354  // Get the size of each dimension.
355  int *sizes = new int[dim];
356  int totalVnodes = 1;
357  for ( int d = 0; d<dim; ++d )
358  {
359  sizes[d] = atoi(argv[d+2]);
360  totalVnodes *= sizes[d];
361  }
362  int *procs = new int[totalVnodes];
363 
364  // Do it.
365  vnodeRecursiveBisection(dim,sizes,nprocs,procs);
366 
367  // Print it out.
368  print(dim,sizes,procs);
369 
370  return 0;
371 }
372 
373 #endif
374 
375 /***************************************************************************
376  * $RCSfile: VRB.cpp,v $ $Author: adelmann $
377  * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:27 $
378  * IPPL_VERSION_ID: $Id: VRB.cpp,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $
379  ***************************************************************************/
#define PAssert_GE(a, b)
Definition: PAssert.h:109
int main(int argc, char *argv[])
Definition: Main.cpp:139
void vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
Definition: VRB.cpp:69
void assign(const BareField< T, Dim > &a, RHS b, OP op, ExprTag< true >)
#define PAssert(c)
Definition: PAssert.h:102
#define PAssert_GT(a, b)
Definition: PAssert.h:108