OPAL (Object Oriented Parallel Accelerator Library) 2022.1
OPAL
VRB.cpp
Go to the documentation of this file.
1// -*- C++ -*-
2/***************************************************************************
3 *
4 * The IPPL Framework
5 *
6 * This program was prepared by PSI.
7 * All rights in the program are reserved by PSI.
8 * Neither PSI nor the author(s)
9 * makes any warranty, express or implied, or assumes any liability or
10 * responsibility for the use of this software
11 *
12 * Visit www.amas.web.psi for more details
13 *
14 ***************************************************************************/
15
16// -*- C++ -*-
17/***************************************************************************
18 *
19 * The IPPL Framework
20 *
21 *
22 * Visit http://people.web.psi.ch/adelmann/ for more details
23 *
24 ***************************************************************************/
25
26// include files
27#include "VRB.h"
28#include "Utility/PAssert.h"
29
30#ifdef __VRB_DIAGNOSTIC__
31#include <cstdio>
32#include <cstdlib>
33#endif
34
35//
36// A local function we use for the recursive bisection.
37//
38
39static void
40recurseCoordinateVRB(int dim,
41 const int *strides, const int* sizes, const int *offsets,
42 int nprocs, int firstProc, int *procs );
43
45
46//
47// vnodeRecurseiveBisection
48//
49// This is the user visible function for decomposing along coordinate
50// directions.
51//
52// This function just sets up the arguments for the recursive
53// algorithm. That algorithm works by subdividing blocks of
54// vnodes and blocks of processors.
55//
56// The blocks are identified with the offsets of the lower left
57// corner and the sizes of the block in each dimension.
58//
59// The processors are identified by a continuous range from
60// a firstProc and a number of procs nprocs.
61//
62// In addition to the input arguments, it calculates for the recursion:
63//
64// strides: The strides to go from vnode to its neighbors in N dimensions.
65// offsets: The lower left corner of the block of vnodes.
66//
67
68void
69vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
70{
71 int i;
72
73 // Initialize the lower left corner of the vnodes to the origin.
74 int *offsets = new int[dim];
75 for (i=0; i<dim; ++i)
76 offsets[i] = 0;
77
78 // Initialize the strides with C semantics (last index varies fastest).
79 int *strides = new int[dim];
80 strides[dim-1] = 1;
81 for (i=dim-2; i>=0; --i)
82 strides[i] = strides[i+1]*sizes[i+1];
83
84 // Dive into the recursion.
85 recurseCoordinateVRB(dim,strides,sizes,offsets,nprocs,0,procs);
86
87 // Clean up after ourselves.
88 delete [] offsets;
89 delete [] strides;
90}
91
93
94//
95// assign
96//
97// Given a description of a block of vnodes, fill the proc array
98// with a processor number x.
99//
100// The block of vnodes is described by:
101// dim: The dimension of space.
102// strides: The strides to find neighbors of a vnode.
103// offsets: The lower left corner of the block of vnodes
104// sizes : the size of the block in each dimension.
105//
106
107static void
108assign(int dim, const int *strides, const int *sizes, const int *offsets,
109 int x, int *procs)
110{
111 // Make sure the input is sensible.
112 PAssert_GT(dim, 0);
113 PAssert(sizes);
114 PAssert(offsets);
115 PAssert(procs);
116
117 int i;
118
119#ifdef __VRB__DIAGNOSTIC__
120 printf("---------- assign ----------\n");
121 printf("dim=%d, sizes=",dim);
122 for (i=0; i<dim; ++i)
123 printf(" %d",sizes[i]);
124 printf(", offsets=");
125 for (i=0; i<dim; ++i)
126 printf(" %d",offsets[i]);
127 printf(", strides=");
128 for (i=0; i<dim; ++i)
129 printf(" %d",strides[i]);
130 printf(", procs=%lx\n",procs);
131 printf("----------------------------\n");
132#endif
133
134 // Termination condition: one dimension.
135 if ( dim==1 )
136 // Fill the one row with x.
137 for (i=0; i<sizes[0]; ++i)
138 procs[offsets[0]+i] = x;
139
140 // Otherwise, loop over the outermost dimension and recurse.
141 else
142 {
143 // For each slab, fill it.
144 for (i=0; i<sizes[0]; ++i)
145 assign(dim-1,strides+1,sizes+1,offsets+1,x,procs+(offsets[0]+i)*strides[0]);
146 }
147}
148
150
151//
152// RecurseCoordinateVRB
153//
154// Perform the recursion, finding the procs for each vnode.
155//
156// Inputs:
157// dim : The dimension of the breakdown.
158// strides : The strides between the slabs of the proc array.
159// sizes : The size of the block of vnodes.
160// offsets : The lower left corner of the block of vnodes.
161// nprocs : The number of processors to put these vnodes on.
162// firstProc : The first proc we are putting these vnodes on.
163//
164// Output:
165// procs: Array to store the processor for each vnode.
166//
167
168static void
169recurseCoordinateVRB(int dim,
170 const int* strides, const int* sizes, const int *offsets,
171 int nprocs, int firstProc, int *procs )
172{
173 // Make sure the input is sensible.
174 PAssert_GT(dim, 0);
175 PAssert(sizes);
176 PAssert_GT(nprocs, 0);
177 PAssert(procs);
178 PAssert_GE(firstProc, 0);
179 for (int i=0; i<dim; ++i)
180 {
181 PAssert_GT(sizes[i], 0);
182 PAssert_GE(offsets[i], 0);
183 }
184
185#ifdef __VRB_DIAGNOSTIC__
186 printf("---------- RecurseCoordinateVRB ----------\n");
187 printf("dim= %d, sizes=",dim);
188 for (i=0; i<dim; ++i)
189 printf(" %d",sizes[i]);
190 printf(", offsets=");
191 for (i=0; i<dim; ++i)
192 printf(" %d",offsets[i]);
193 printf(", nprocs= %d, firstProc= %d\n",nprocs,firstProc);
194 printf("------------------------------------------\n");
195#endif
196
197 // If we just have one proc, all the vnodes are on this proc.
198 if ( nprocs == 1 )
199 {
200 // Fill the hypercube defined by sizes,offsets with
201 // the value firstProc.
202 assign(dim,strides,sizes,offsets,firstProc,procs);
203 }
204
205 // If there is more than one processor left,
206 // recurse by splitting the procs into two groups and
207 // the work into two groups, and allocating work to procs.
208 else
209 {
210 int d;
211
212#ifndef NOPAssert
213 // Calculate the total number of vnodes.
214 int totalVnodes = sizes[0];
215 for (d=1; d<dim; ++d)
216 totalVnodes *= sizes[d];
217 PAssert_GE(totalVnodes, nprocs);
218#endif
219 // Find the number of processors on each side.
220 int leftProcs = nprocs/2;
221 int rightProcs = nprocs-leftProcs;
222
223 // Decide which dimension to split.
224 // Just loop over the dimensions and pick the biggest.
225 int splitDim = 0;
226 for (d=1; d<dim; ++d)
227 if ( sizes[d] > sizes[splitDim] )
228 splitDim = d;
229
230 // Get the number of vnodes in that dimension.
231 int splitSize = sizes[splitDim];
232
233 // Find where along that dimension to split.
234 // Balance the work between the two procs as well as possible.
235
236 // The number of vnodes on the left.
237 // Start with none on the left.
238 int leftVnodes = 0;
239
240 // The degree to which things are out of balance.
241 double outOfBalance = splitSize/(double)rightProcs;
242
243 // The number of vnodes in a hyperslab perpendicular
244 // to the split direction.
245 int crossSize = 1;
246 for (d=0; d<dim; ++d)
247 if ( d != splitDim )
248 crossSize *= sizes[d];
249
250 // Consider all possible split locations and pick the best.
251 for (int l=1; l<splitSize; ++l)
252 {
253 // How far out of balance is this?
254 int r = splitSize - l;
255 double b = l/(double)leftProcs - r/(double)rightProcs;
256
257 // Get the absolute value of the unbalance.
258 if ( b<0 )
259 b=-b;
260
261 // Compare to the best so far.
262 // If is better balance and we have at least as many
263 // procs on each side of the divide as we have vnodes,
264 // then keep this split.
265 if ( (b < outOfBalance) &&
266 (l*crossSize>=leftProcs) &&
267 (r*crossSize>=rightProcs) )
268 {
269 // It is better, keep it.
270 leftVnodes = l;
271 outOfBalance = b;
272 }
273 }
274
275 // If we couldn't find a good split, die.
276 PAssert_GT(leftVnodes, 0);
277
278 // We now know what dimension to split on, and where in
279 // that dimension to split. Recurse.
280
281 // Make a copy of the sizes array.
282 int *newSizes = new int[dim];
283 for (d=0; d<dim; ++d)
284 newSizes[d] = sizes[d];
285
286 // Make a copy of the offsets array.
287 int *newOffsets = new int[dim];
288 for (d=0; d<dim; ++d)
289 newOffsets[d] = offsets[d];
290
291 // Get the sizes for the left.
292 newSizes[splitDim] = leftVnodes;
293
294 // Recurse for the left.
295 recurseCoordinateVRB(dim,strides,newSizes,newOffsets,leftProcs,firstProc,procs);
296
297 // Get the sizes and offsets for the right.
298 newSizes[splitDim] = splitSize - leftVnodes;
299 newOffsets[splitDim] += leftVnodes;
300
301 // Recurse for the right.
302 recurseCoordinateVRB(dim,strides,newSizes,newOffsets,rightProcs,firstProc+leftProcs,procs);
303
304 // Delete the memory.
305 delete [] newSizes;
306 delete [] newOffsets;
307 }
308}
309
311
312#ifdef __VRB_DIAGNOSTIC__
313
314//
315// print out a hypercube of proc data.
316//
317
318static void
319print(int dim, const int *sizes, const int *procs)
320{
321 if ( dim == 1 )
322 {
323 for ( int i=0; i<*sizes; ++i)
324 printf("%4d",procs[i]);
325 }
326 else
327 {
328 int skip = 1;
329 int i;
330 for (i=1; i<dim; ++i)
331 skip *= sizes[i];
332 for (i=0; i<sizes[0]; ++i)
333 {
334 print(dim-1,sizes+1,procs+skip*i);
335 printf("\n");
336 }
337 printf("\n");
338 }
339}
340
342
343int
344main(int argc, char *argv[])
345{
346 // The number of dimensions is the number of args to the program.
347 int dim = argc-2;
348 PAssert_GT(dim, 0);
349
350 // Get the number of procs.
351 int nprocs = atoi(argv[1]);
352 PAssert_GT(nprocs, 0);
353
354 // Get the size of each dimension.
355 int *sizes = new int[dim];
356 int totalVnodes = 1;
357 for ( int d = 0; d<dim; ++d )
358 {
359 sizes[d] = atoi(argv[d+2]);
360 totalVnodes *= sizes[d];
361 }
362 int *procs = new int[totalVnodes];
363
364 // Do it.
365 vnodeRecursiveBisection(dim,sizes,nprocs,procs);
366
367 // Print it out.
368 print(dim,sizes,procs);
369
370 return 0;
371}
372
373#endif
374
375/***************************************************************************
376 * $RCSfile: VRB.cpp,v $ $Author: adelmann $
377 * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:27 $
378 * IPPL_VERSION_ID: $Id: VRB.cpp,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $
379 ***************************************************************************/
int main(int argc, char *argv[])
Definition: Main.cpp:128
void vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
Definition: VRB.cpp:69
void assign(const BareField< T, Dim > &a, RHS b, OP op, ExprTag< true >)
#define PAssert(c)
Definition: PAssert.h:102
#define PAssert_GE(a, b)
Definition: PAssert.h:109
#define PAssert_GT(a, b)
Definition: PAssert.h:108