src/FieldLayout/VRB.cpp

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 /***************************************************************************
00003  *
00004  * The IPPL Framework
00005  * 
00006  * This program was prepared by PSI. 
00007  * All rights in the program are reserved by PSI.
00008  * Neither PSI nor the author(s)
00009  * makes any warranty, express or implied, or assumes any liability or
00010  * responsibility for the use of this software
00011  *
00012  * Visit http://www.acl.lanl.gov/POOMS for more details
00013  *
00014  ***************************************************************************/
00015 
00016 // -*- C++ -*-
00017 /***************************************************************************
00018  *
00019  * The IPPL Framework
00020  * 
00021  *
00022  * Visit http://people.web.psi.ch/adelmann/ for more details
00023  *
00024  ***************************************************************************/
00025 
00026 // include files
00027 #include "VRB.h"
00028 #include "Utility/PAssert.h"
00029 
00030 #ifdef __VRB_DIAGNOSTIC__
00031 #include <stdio.h>
00032 #include <stdlib.h>
00033 #endif
00034 
00035 //
00036 // A local function we use for the recursive bisection.
00037 //
00038 
00039 static void 
00040 recurseCoordinateVRB(int dim, 
00041                      const int *strides, const int* sizes, const int *offsets, 
00042                      int nprocs, int firstProc, int *procs );
00043 
00045 
00046 //
00047 // vnodeRecurseiveBisection
00048 //
00049 // This is the user visible function for decomposing along coordinate
00050 // directions.
00051 //
00052 // This function just sets up the arguments for the recursive
00053 // algorithm.  That algorithm works by subdividing blocks of 
00054 // vnodes and blocks of processors. 
00055 //
00056 // The blocks are identified with the offsets of the lower left 
00057 // corner and the sizes of the block in each dimension.  
00058 //
00059 // The processors are identified by a continuous range from 
00060 // a firstProc and a number of procs nprocs.
00061 //
00062 // In addition to the input arguments, it calculates for the recursion:
00063 //
00064 // strides: The strides to go from vnode to its neighbors in N dimensions.
00065 // offsets: The lower left corner of the block of vnodes.
00066 //
00067 
00068 void
00069 vnodeRecursiveBisection(int dim, const int *sizes, int nprocs, int *procs)
00070 {
00071   int i;
00072 
00073   // Initialize the lower left corner of the vnodes to the origin.
00074   int *offsets = new int[dim];
00075   for (i=0; i<dim; ++i)
00076     offsets[i] = 0;
00077 
00078   // Initialize the strides with C semantics (last index varies fastest).
00079   int *strides = new int[dim];
00080   strides[dim-1] = 1;
00081   for (i=dim-2; i>=0; --i)
00082     strides[i] = strides[i+1]*sizes[i+1];
00083 
00084   // Dive into the recursion.
00085   recurseCoordinateVRB(dim,strides,sizes,offsets,nprocs,0,procs);
00086 
00087   // Clean up after ourselves.
00088   delete [] offsets;
00089   delete [] strides;
00090 }
00091 
00093 
00094 //
00095 // assign
00096 //
00097 // Given a description of a block of vnodes, fill the proc array
00098 // with a processor number x.
00099 //
00100 // The block of vnodes is described by:
00101 // dim:     The dimension of space.
00102 // strides: The strides to find neighbors of a vnode.
00103 // offsets: The lower left corner of the block of vnodes
00104 // sizes  : the size of the block in each dimension.
00105 //
00106 
00107 static void 
00108 assign(int dim, const int *strides, const int *sizes, const int *offsets, 
00109        int x, int *procs)
00110 {
00111   // Make sure the input is sensible.
00112   PAssert(dim>0);
00113   PAssert(sizes);
00114   PAssert(offsets);
00115   PAssert(procs);
00116 
00117   int i;
00118 
00119 #ifdef __VRB__DIAGNOSTIC__
00120   printf("---------- assign ----------\n");
00121   printf("dim=%d, sizes=",dim);
00122   for (i=0; i<dim; ++i)
00123     printf(" %d",sizes[i]);
00124   printf(", offsets=");
00125   for (i=0; i<dim; ++i)
00126     printf(" %d",offsets[i]);
00127   printf(", strides=");
00128   for (i=0; i<dim; ++i)
00129     printf(" %d",strides[i]);
00130   printf(", procs=%lx\n",procs);
00131   printf("----------------------------\n");
00132 #endif
00133 
00134   // Termination condition: one dimension.
00135   if ( dim==1 )
00136     // Fill the one row with x.
00137     for (int i=0; i<sizes[0]; ++i)
00138       procs[offsets[0]+i] = x;
00139 
00140   // Otherwise, loop over the outermost dimension and recurse.
00141   else
00142     {
00143       // For each slab, fill it.
00144       for (i=0; i<sizes[0]; ++i)
00145         assign(dim-1,strides+1,sizes+1,offsets+1,x,procs+(offsets[0]+i)*strides[0]);
00146     }
00147 }
00148 
00150 
00151 //
00152 // RecurseCoordinateVRB
00153 //
00154 // Perform the recursion, finding the procs for each vnode.
00155 //
00156 // Inputs:
00157 // dim     : The dimension of the breakdown.
00158 // strides : The strides between the slabs of the proc array.
00159 // sizes   : The size of the block of vnodes.
00160 // offsets : The lower left corner of the block of vnodes.
00161 // nprocs  : The number of processors to put these vnodes on.
00162 // firstProc : The first proc we are putting these vnodes on.
00163 // 
00164 // Output:
00165 // procs: Array to store the processor for each vnode.
00166 // 
00167 
00168 static void 
00169 recurseCoordinateVRB(int dim, 
00170                      const int* strides, const int* sizes, const int *offsets, 
00171                      int nprocs, int firstProc, int *procs )
00172 {
00173   // Make sure the input is sensible.
00174   PAssert(dim>0);
00175   PAssert(sizes);
00176   PAssert(nprocs>0);
00177   PAssert(procs);
00178   PAssert(firstProc>=0);
00179   for (int i=0; i<dim; ++i) 
00180     {
00181       PAssert(sizes[i]>0);
00182       PAssert(offsets[i]>=0);
00183     }
00184 
00185 #ifdef __VRB_DIAGNOSTIC__
00186   printf("---------- RecurseCoordinateVRB ----------\n");
00187   printf("dim= %d, sizes=",dim);
00188   for (i=0; i<dim; ++i)
00189     printf(" %d",sizes[i]);
00190   printf(", offsets=");
00191   for (i=0; i<dim; ++i)
00192     printf(" %d",offsets[i]);
00193   printf(", nprocs= %d, firstProc= %d\n",nprocs,firstProc);
00194   printf("------------------------------------------\n");
00195 #endif
00196 
00197   // If we just have one proc, all the vnodes are on this proc.
00198   if ( nprocs == 1 )
00199     {
00200       // Fill the hypercube defined by sizes,offsets with
00201       // the value firstProc.
00202       assign(dim,strides,sizes,offsets,firstProc,procs);
00203     }
00204 
00205   // If there is more than one processor left, 
00206   // recurse by splitting the procs into two groups and
00207   // the work into two groups, and allocating work to procs.
00208   else
00209     {
00210       int d;
00211 
00212       // Calculate the total number of vnodes.
00213       int totalVnodes = sizes[0];
00214       for (d=1; d<dim; ++d)
00215         totalVnodes *= sizes[d];
00216       PAssert(totalVnodes>=nprocs);
00217 
00218       // Find the number of processors on each side.
00219       int leftProcs = nprocs/2;
00220       int rightProcs = nprocs-leftProcs;
00221 
00222       // Decide which dimension to split.
00223       // Just loop over the dimensions and pick the biggest.
00224       int splitDim = 0;
00225       for (d=1; d<dim; ++d)
00226         if ( sizes[d] > sizes[splitDim] )
00227           splitDim = d;
00228 
00229       // Get the number of vnodes in that dimension.
00230       int splitSize = sizes[splitDim];
00231 
00232       // Find where along that dimension to split.
00233       // Balance the work between the two procs as well as possible.
00234 
00235       // The number of vnodes on the left.
00236       // Start with none on the left.
00237       int leftVnodes = 0;
00238 
00239       // The degree to which things are out of balance.
00240       double outOfBalance = splitSize/(double)rightProcs;
00241 
00242       // The number of vnodes in a hyperslab perpendicular
00243       // to the split direction.
00244       int crossSize = 1;
00245       for (d=0; d<dim; ++d)
00246         if ( d != splitDim )
00247           crossSize *= sizes[d];
00248 
00249       // Consider all possible split locations and pick the best.
00250       for (int l=1; l<splitSize; ++l)
00251         {
00252           // How far out of balance is this?
00253           int r = splitSize - l;
00254           double b = l/(double)leftProcs - r/(double)rightProcs;
00255 
00256           // Get the absolute value of the unbalance.
00257           if ( b<0 ) 
00258             b=-b;
00259 
00260           // Compare to the best so far.
00261           // If is better balance and we have at least as many
00262           // procs on each side of the divide as we have vnodes,
00263           // then keep this split.
00264           if ( (b < outOfBalance) &&
00265                (l*crossSize>=leftProcs) &&
00266                (r*crossSize>=rightProcs) )
00267             {
00268               // It is better, keep it.
00269               leftVnodes = l;
00270               outOfBalance = b;
00271             }
00272         }
00273 
00274       // If we couldn't find a good split, die.
00275       PAssert(leftVnodes>0);
00276 
00277       // We now know what dimension to split on, and where in 
00278       // that dimension to split.  Recurse.
00279 
00280       // Make a copy of the sizes array.
00281       int *newSizes = new int[dim];
00282       for (d=0; d<dim; ++d)
00283         newSizes[d] = sizes[d];
00284 
00285       // Make a copy of the offsets array.
00286       int *newOffsets = new int[dim];
00287       for (d=0; d<dim; ++d)
00288         newOffsets[d] = offsets[d];
00289       
00290       // Get the sizes for the left.
00291       newSizes[splitDim] = leftVnodes;
00292 
00293       // Recurse for the left.
00294       recurseCoordinateVRB(dim,strides,newSizes,newOffsets,leftProcs,firstProc,procs);
00295 
00296       // Get the sizes and offsets for the right.
00297       newSizes[splitDim] = splitSize - leftVnodes;
00298       newOffsets[splitDim] += leftVnodes;
00299 
00300       // Recurse for the right.
00301       recurseCoordinateVRB(dim,strides,newSizes,newOffsets,rightProcs,firstProc+leftProcs,procs);
00302 
00303       // Delete the memory.
00304       delete [] newSizes;
00305       delete [] newOffsets;
00306     }
00307 }
00308 
00310 
00311 #ifdef __VRB_DIAGNOSTIC__
00312 
00313 //
00314 // print out a hypercube of proc data.
00315 //
00316 
00317 static void 
00318 print(int dim, const int *sizes, const int *procs)
00319 {
00320   if ( dim == 1 )
00321     {
00322       for ( int i=0; i<*sizes; ++i)
00323         printf("%4d",procs[i]);
00324     }
00325   else
00326     {
00327       int skip = 1;
00328       int i;
00329       for (i=1; i<dim; ++i)
00330         skip *= sizes[i];
00331       for (i=0; i<sizes[0]; ++i)
00332         {
00333           print(dim-1,sizes+1,procs+skip*i);
00334           printf("\n");
00335         }
00336       printf("\n");
00337     }
00338 }
00339 
00341 
00342 int 
00343 main(int argc, char *argv[])
00344 {
00345   // The number of dimensions is the number of args to the program.
00346   int dim = argc-2;
00347   PAssert(dim>0);
00348 
00349   // Get the number of procs.
00350   int nprocs = atoi(argv[1]);
00351   PAssert(nprocs>0);
00352 
00353   // Get the size of each dimension.
00354   int *sizes = new int[dim];
00355   int totalVnodes = 1;
00356   for ( int d = 0; d<dim; ++d )
00357     {
00358       sizes[d] = atoi(argv[d+2]);
00359       totalVnodes *= sizes[d];
00360     }
00361   int *procs = new int[totalVnodes];
00362 
00363   // Do it.
00364   vnodeRecursiveBisection(dim,sizes,nprocs,procs);
00365 
00366   // Print it out.
00367   print(dim,sizes,procs);
00368 
00369   return 0;
00370 }
00371 
00372 #endif
00373 
00374 /***************************************************************************
00375  * $RCSfile: VRB.cpp,v $   $Author: adelmann $
00376  * $Revision: 1.1.1.1 $   $Date: 2003/01/23 07:40:27 $
00377  * IPPL_VERSION_ID: $Id: VRB.cpp,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $ 
00378  ***************************************************************************/

Generated on Mon Jan 16 13:23:47 2006 for IPPL by  doxygen 1.4.6