IPPL: src/FieldLayout/MultiBalancer.h Source File

00001 // -*- C++ -*-
00002 /***************************************************************************
00003  *
00004  * The IPPL Framework
00005  * 
00006  *
00007  * Visit http://people.web.psi.ch/adelmann/ for more details
00008  *
00009  ***************************************************************************/
00010 
00012 // Class MultiBalancer
00014 
00015 #ifndef MULTI_BALANCER_H
00016 #define MULTI_BALANCER_H
00017 
00019 //
00020 // Description
00021 // The guts of the load balancing algorithm for ConejoBalancer.
00022 // Inputs weights for vnodes and figures out where the vnodes should go.
00023 //
00025 
00026 // include files
00027 #ifdef IPPL_STDSTL
00028 #include <vector>
00029 using std::vector;
00030 #else
00031 #include <vector.h>
00032 #endif // IPPL_STDSTL
00033 
00034 /*
00035 
00036   This is a workhorse class for ConejoBalancer.
00037   It inputs the weights for all the vnodes in the system and
00038   calculates what processor they will go to.
00039 
00040   This works only at the vnode level.  The idea is that this class is
00041   used by a higher level class which knows about Fields and actually
00042   does the redistribution.
00043 
00044   **************************************************
00045   General Problem Outline
00046   **************************************************
00047 
00048   There are M materials
00049   There are V vnodes
00050   There are P processors
00051 
00052   Each time step proceeds in a series of phases, one for each
00053   material.  Each of those phases syncronizes often, so each phase
00054   must be load balanced.
00055 
00056   Each vnode has a weight associated with each material.  It can
00057   have nonzero weight associated with any number of materials. 
00058   The weights represent the amount of computation that must be
00059   performed for that vnode in each phase.
00060 
00061   The total time for a time step is the sum of the times for each
00062   material.  The time for each material is the maximum over the 
00063   processors of the total weight on that processor.  The total weight 
00064   on each processor is the sum of the weights on that processor. 
00065   Algebraically that looks like:
00066     T = sum_m max_p sum_v T_{mpv}
00067   
00068   Our goal is to chose a distribution of vnodes on processors that
00069   minimize T.  This problem is bound to be NP complete, so there are
00070   two ways to find a solution:
00071   1. Try all combinations. This is expensive.
00072   2. Find a good heuristic.  This is the approach taken here.
00073 
00074   **************************************************
00075   General Algorithm Outline
00076   **************************************************
00077 
00078   There are three phases.  
00079 
00080   1. For each material input a series of blocks of vnodes.  The idea
00081   is that the user will input the vnodes from one processor after
00082   another for each material in turn since the high level interface is
00083   expected to input a Field of weights for each material.
00084 
00085   2. Tell the balancer to figure out what the new distribution should
00086   be.  It does this by looping over the vnodes, assigning each one to
00087   a processor.  It figures out the processor by finding the processor
00088   that minimizes T above when adding each vnode.
00089 
00090   3. Read out the new destination processor for each vnode.
00091 
00092   The skeleton of how MultiBalancer would be used is then something
00093   like:
00094 
00095   // Construct with the number of procs and vnodes.
00096   MultiBalancer balancer(procs,vnodes);
00097 
00098   // Tell the balancer about the vnodes.
00099   // Loop over materials
00100   for (int m=0; m<materials; ++m_)
00101     {
00102       // Loop over input from each processor
00103       for (int p=0; p<procs; ++p)
00104         {
00105           // Tell the balancer the weights for the vnodes
00106           // on the processor where they start out.
00107           balancer.appendWeights(...);
00108         }
00109     }
00110   
00111   // Tell the balancer to figure out the new distribution.
00112   balancer.distribute();
00113 
00114   // Read out the new distributions.  
00115   for (MultiBalancer::iterator newProc=balancer.begin();
00116        newProc != balancer.end();
00117        ++newProc)
00118     {
00119       // Get the new processor for this vnode.
00120       ... *newProc ...
00121     }
00122 
00123 */
00124 
00125 // class definition for MultiBalancer
00126 class MultiBalancer
00127 {
00128 public:
00129 
00130   // Constructor.
00131   // Tell it the number of processors.
00132   MultiBalancer(int procs, int vnodes);
00133 
00134   // Destructor.
00135   ~MultiBalancer();
00136 
00137   // Tell it to start recording data for a new material.
00138   void newMaterial();
00139   
00140   // Append weights for the current material on the next processor.
00141   // Inputs are:
00142   // begin, end: iterators to loop over the weights.
00143   void appendWeights(double *begin, double* end);
00144 
00145   // Distribute the vnodes to the processors.
00146   void distribute();
00147 
00148   // Interface for reading the data out again.
00149   // What we output is a processor for each
00150   // vnode, so just hold a vector of integers.
00151   typedef vector<int>::iterator iterator;
00152 
00153   // Begin and end iterators for iterating over the vnodes
00154   // for outputting the destination processor.
00155   // The vnodes are read out in the order in which they were inserted.
00156   iterator begin() { return m_vnodeProcs.begin(); }
00157   iterator end()   { return m_vnodeProcs.end(); }
00158 
00159 private:
00160 
00161   // This thing knows the number of processors, vnodes, materials it has.
00162   int m_procs;
00163   int m_vnodes;
00164   int m_materials;
00165 
00166   // Record what phase it is in.
00167   int m_phase;
00168   enum { noMaterials, someMaterials, distributed };
00169   
00170   // A vector with one weight per vnode.
00171   typedef vector<double> VnodeWeights_t;
00172 
00173   // A vector with one weight per material.
00174   typedef vector<double> MaterialWeights_t;
00175 
00176   // A vector with one weight per processor.
00177   typedef vector<double> ProcWeights_t;
00178 
00179   // A vector of vectors to hold weights for all materials on each vnode.
00180   typedef vector<VnodeWeights_t*> MaterialVnodeWeights_t;
00181 
00182   // A vector of vectors to hold weights for all materials on each proc.
00183   typedef vector<VnodeWeights_t*> MaterialProcWeights_t;
00184 
00185   // A vector of integers to hold the destination processors.
00186   typedef vector<int> VnodeDestProcs_t;
00187 
00188   // Record of the weights when the come in from other processors.
00189   MaterialVnodeWeights_t m_inputWeights;
00190 
00191   // The list of processors where each vnode will end up.
00192   VnodeDestProcs_t m_vnodeProcs;
00193 
00194   // The weight assigned to each processor.
00195   ProcWeights_t m_procWeights;
00196 
00197   // The maximum weights for each material.
00198   MaterialWeights_t m_materialMaxWeights;
00199 
00200   // The weight of each material on each processor.
00201   MaterialProcWeights_t m_materialProcWeights;
00202 
00203   //
00204   // We need to do an indirect sort.
00205   // That is, we need to sort a list of integers
00206   // where the integers are offsets in an array of doubles,
00207   // and the sort criterion is the size of the doubles.
00208   //
00209   // So, we need to build a comparator object to be passed to
00210   // the sort algorithm.
00211   //
00212   struct IndirectComparator
00213   {
00214     // A vector with one weight per vnode.
00215     typedef vector<double> VnodeWeights_t;
00216 
00217     // Store a pointer to the weights.
00218     VnodeWeights_t::const_iterator m_weights;
00219 
00220     // Construct with a pointer to the weights.
00221     IndirectComparator(VnodeWeights_t::const_iterator p)
00222       : m_weights(p) {}
00223 
00224     // Given two integers, compare the weights at those offsets.
00225     bool operator()(int i, int j) { return m_weights[i] < m_weights[j]; }
00226   };
00227 
00228   //
00229   // Accumulate the total weight per vnode into a container.
00230   //
00231   void calcTotalWeights(VnodeWeights_t& vnodeTotalWeights);
00232 
00233   //
00234   // Build an indirection vector that can be used to access the
00235   // vnodes in order of total weight.
00236   //
00237   void calcIndirectSort(vector<int>& vnodeIndirect,
00238                         const VnodeWeights_t& vnodeTotalWeights);
00239 
00240   //
00241   // Find what proc a vnode should be put on.
00242   //
00243   int findProcForVnode(int vnode);
00244 
00245   // Allow the tester to see in here.
00246   friend void multiBalancerTester();
00247 
00248 };
00249 
00251 #endif //MULTI_BALANCER_H
00252 
00253 /***************************************************************************
00254  * $RCSfile: MultiBalancer.h,v $   $Author: adelmann $
00255  * $Revision: 1.1.1.1 $   $Date: 2003/01/23 07:40:27 $
00256  * IPPL_VERSION_ID: $Id: MultiBalancer.h,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $ 
00257  ***************************************************************************/