00001 // -*- C++ -*- 00002 /*************************************************************************** 00003 * 00004 * The IPPL Framework 00005 * 00006 * 00007 * Visit http://people.web.psi.ch/adelmann/ for more details 00008 * 00009 ***************************************************************************/ 00010 00012 // Class MultiBalancer 00014 00015 #ifndef MULTI_BALANCER_H 00016 #define MULTI_BALANCER_H 00017 00019 // 00020 // Description 00021 // The guts of the load balancing algorithm for ConejoBalancer. 00022 // Inputs weights for vnodes and figures out where the vnodes should go. 00023 // 00025 00026 // include files 00027 #ifdef IPPL_STDSTL 00028 #include <vector> 00029 using std::vector; 00030 #else 00031 #include <vector.h> 00032 #endif // IPPL_STDSTL 00033 00034 /* 00035 00036 This is a workhorse class for ConejoBalancer. 00037 It inputs the weights for all the vnodes in the system and 00038 calculates what processor they will go to. 00039 00040 This works only at the vnode level. The idea is that this class is 00041 used by a higher level class which knows about Fields and actually 00042 does the redistribution. 00043 00044 ************************************************** 00045 General Problem Outline 00046 ************************************************** 00047 00048 There are M materials 00049 There are V vnodes 00050 There are P processors 00051 00052 Each time step proceeds in a series of phases, one for each 00053 material. Each of those phases syncronizes often, so each phase 00054 must be load balanced. 00055 00056 Each vnode has a weight associated with each material. It can 00057 have nonzero weight associated with any number of materials. 00058 The weights represent the amount of computation that must be 00059 performed for that vnode in each phase. 00060 00061 The total time for a time step is the sum of the times for each 00062 material. The time for each material is the maximum over the 00063 processors of the total weight on that processor. The total weight 00064 on each processor is the sum of the weights on that processor. 00065 Algebraically that looks like: 00066 T = sum_m max_p sum_v T_{mpv} 00067 00068 Our goal is to chose a distribution of vnodes on processors that 00069 minimize T. This problem is bound to be NP complete, so there are 00070 two ways to find a solution: 00071 1. Try all combinations. This is expensive. 00072 2. Find a good heuristic. This is the approach taken here. 00073 00074 ************************************************** 00075 General Algorithm Outline 00076 ************************************************** 00077 00078 There are three phases. 00079 00080 1. For each material input a series of blocks of vnodes. The idea 00081 is that the user will input the vnodes from one processor after 00082 another for each material in turn since the high level interface is 00083 expected to input a Field of weights for each material. 00084 00085 2. Tell the balancer to figure out what the new distribution should 00086 be. It does this by looping over the vnodes, assigning each one to 00087 a processor. It figures out the processor by finding the processor 00088 that minimizes T above when adding each vnode. 00089 00090 3. Read out the new destination processor for each vnode. 00091 00092 The skeleton of how MultiBalancer would be used is then something 00093 like: 00094 00095 // Construct with the number of procs and vnodes. 00096 MultiBalancer balancer(procs,vnodes); 00097 00098 // Tell the balancer about the vnodes. 00099 // Loop over materials 00100 for (int m=0; m<materials; ++m_) 00101 { 00102 // Loop over input from each processor 00103 for (int p=0; p<procs; ++p) 00104 { 00105 // Tell the balancer the weights for the vnodes 00106 // on the processor where they start out. 00107 balancer.appendWeights(...); 00108 } 00109 } 00110 00111 // Tell the balancer to figure out the new distribution. 00112 balancer.distribute(); 00113 00114 // Read out the new distributions. 00115 for (MultiBalancer::iterator newProc=balancer.begin(); 00116 newProc != balancer.end(); 00117 ++newProc) 00118 { 00119 // Get the new processor for this vnode. 00120 ... *newProc ... 00121 } 00122 00123 */ 00124 00125 // class definition for MultiBalancer 00126 class MultiBalancer 00127 { 00128 public: 00129 00130 // Constructor. 00131 // Tell it the number of processors. 00132 MultiBalancer(int procs, int vnodes); 00133 00134 // Destructor. 00135 ~MultiBalancer(); 00136 00137 // Tell it to start recording data for a new material. 00138 void newMaterial(); 00139 00140 // Append weights for the current material on the next processor. 00141 // Inputs are: 00142 // begin, end: iterators to loop over the weights. 00143 void appendWeights(double *begin, double* end); 00144 00145 // Distribute the vnodes to the processors. 00146 void distribute(); 00147 00148 // Interface for reading the data out again. 00149 // What we output is a processor for each 00150 // vnode, so just hold a vector of integers. 00151 typedef vector<int>::iterator iterator; 00152 00153 // Begin and end iterators for iterating over the vnodes 00154 // for outputting the destination processor. 00155 // The vnodes are read out in the order in which they were inserted. 00156 iterator begin() { return m_vnodeProcs.begin(); } 00157 iterator end() { return m_vnodeProcs.end(); } 00158 00159 private: 00160 00161 // This thing knows the number of processors, vnodes, materials it has. 00162 int m_procs; 00163 int m_vnodes; 00164 int m_materials; 00165 00166 // Record what phase it is in. 00167 int m_phase; 00168 enum { noMaterials, someMaterials, distributed }; 00169 00170 // A vector with one weight per vnode. 00171 typedef vector<double> VnodeWeights_t; 00172 00173 // A vector with one weight per material. 00174 typedef vector<double> MaterialWeights_t; 00175 00176 // A vector with one weight per processor. 00177 typedef vector<double> ProcWeights_t; 00178 00179 // A vector of vectors to hold weights for all materials on each vnode. 00180 typedef vector<VnodeWeights_t*> MaterialVnodeWeights_t; 00181 00182 // A vector of vectors to hold weights for all materials on each proc. 00183 typedef vector<VnodeWeights_t*> MaterialProcWeights_t; 00184 00185 // A vector of integers to hold the destination processors. 00186 typedef vector<int> VnodeDestProcs_t; 00187 00188 // Record of the weights when the come in from other processors. 00189 MaterialVnodeWeights_t m_inputWeights; 00190 00191 // The list of processors where each vnode will end up. 00192 VnodeDestProcs_t m_vnodeProcs; 00193 00194 // The weight assigned to each processor. 00195 ProcWeights_t m_procWeights; 00196 00197 // The maximum weights for each material. 00198 MaterialWeights_t m_materialMaxWeights; 00199 00200 // The weight of each material on each processor. 00201 MaterialProcWeights_t m_materialProcWeights; 00202 00203 // 00204 // We need to do an indirect sort. 00205 // That is, we need to sort a list of integers 00206 // where the integers are offsets in an array of doubles, 00207 // and the sort criterion is the size of the doubles. 00208 // 00209 // So, we need to build a comparator object to be passed to 00210 // the sort algorithm. 00211 // 00212 struct IndirectComparator 00213 { 00214 // A vector with one weight per vnode. 00215 typedef vector<double> VnodeWeights_t; 00216 00217 // Store a pointer to the weights. 00218 VnodeWeights_t::const_iterator m_weights; 00219 00220 // Construct with a pointer to the weights. 00221 IndirectComparator(VnodeWeights_t::const_iterator p) 00222 : m_weights(p) {} 00223 00224 // Given two integers, compare the weights at those offsets. 00225 bool operator()(int i, int j) { return m_weights[i] < m_weights[j]; } 00226 }; 00227 00228 // 00229 // Accumulate the total weight per vnode into a container. 00230 // 00231 void calcTotalWeights(VnodeWeights_t& vnodeTotalWeights); 00232 00233 // 00234 // Build an indirection vector that can be used to access the 00235 // vnodes in order of total weight. 00236 // 00237 void calcIndirectSort(vector<int>& vnodeIndirect, 00238 const VnodeWeights_t& vnodeTotalWeights); 00239 00240 // 00241 // Find what proc a vnode should be put on. 00242 // 00243 int findProcForVnode(int vnode); 00244 00245 // Allow the tester to see in here. 00246 friend void multiBalancerTester(); 00247 00248 }; 00249 00251 #endif //MULTI_BALANCER_H 00252 00253 /*************************************************************************** 00254 * $RCSfile: MultiBalancer.h,v $ $Author: adelmann $ 00255 * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:27 $ 00256 * IPPL_VERSION_ID: $Id: MultiBalancer.h,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $ 00257 ***************************************************************************/