src/expde/grid/parallel.h

Go to the documentation of this file.
00001 
00002 //    expde: expression templates for partial differential equations.
00003 //    Copyright (C) 2001  Christoph Pflaum
00004 //    This program is free software; you can redistribute it and/or modify
00005 //    it under the terms of the GNU General Public License as published by
00006 //    the Free Software Foundation; either version 2 of the License, or
00007 //    (at your option) any later version.
00008 //
00009 //    This program is distributed in the hope that it will be useful,
00010 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 //    GNU General Public License for more details.
00013 //
00014 //                 SEE  Notice1.doc made by 
00015 //                 LAWRENCE LIVERMORE NATIONAL LABORATORY
00016 //
00017 
00018 // ------------------------------------------------------------
00019 // parallel.h
00020 //
00021 // ------------------------------------------------------------
00022 
00023 #ifndef PARALLEL_H_
00024 #define PARALLEL_H_
00025        
00026 class Parallel_Info;
00027 
00029 // Erklaerung:
00030 // 3-dimensionale strukturierte Aufteilung der Prozessoren
00031 // Hierzu gibt es verschiedene Moeglichkeiten; es wird eine optimale gesucht
00032 // Eine Moeglichkeit der Aufteilung ist in einem Objekt vom Typ Partitioning 
00033 // abgespeichert.
00034 // Die unterschiedlichen Aufteilung werden durch
00035 // a) durch unterschiedlich feine Aufteilungen der Tiefe p_level
00036 //
00037 
00038 
00039 
00040 
00041 //  Partitioning
00042 // ---------------------------------
00043 
00044 // hier gehoert noch folgendes rein:
00045 // n_nec:   Anzahl der notwendigen Prozessoren
00046 // p_level: Feinheit des Gitters zur Aufteilung der Prozessoren
00047 // opt_I,opt_J,opt_K 
00048 
00049 class Partitioning {
00050  private:
00051   bool *start;   // matrix of size^3=(2^p_level)^3
00052   int size;
00053  public:
00054   Partitioning(int level);
00055   ~Partitioning();
00056   void Put_proc(int i, int j, int k);
00057   bool Is_this_a_proc(Index3D Ind);
00058   void Print();
00059 };
00060 
00061 //  quality of decomposition
00062 // ---------------------------------
00063 class quality {
00064  public:
00065   quality(int n_p, int m_i, int a_i) : p_nec(n_p), minimal_intersec(m_i),
00066     maximal_intersec(a_i) {};
00067   quality() : p_nec(-1), minimal_intersec(0), maximal_intersec(0) {};
00068   bool operator<(quality other);
00069   bool positive_quality() { return p_nec>0; };
00070   int  Give_p_nec() { return p_nec; };
00071   void Print() {
00072     cout << " Quality: " << p_nec << " processors and itersec: "
00073          << minimal_intersec << endl;
00074   }
00075  private:
00076   int p_nec;            // number of necessary processors
00077   int minimal_intersec; // number of minimal intesection 
00078                         // for boundary processors
00079   int maximal_intersec; // number of minimal intesection 
00080 };
00081 
00082 inline bool quality::operator<(quality other) {
00083   /*
00084   if(other.p_nec>p_nec)  return true;
00085   if(other.p_nec==p_nec) return other.minimal_intersec>minimal_intersec;
00086   return false;
00087   */
00088   if(other.p_nec<=0) return false;
00089   if(p_nec<=0)       return true;
00090   if(other.maximal_intersec<maximal_intersec)  return true;
00091   if(other.maximal_intersec==maximal_intersec) 
00092     return other.minimal_intersec>minimal_intersec;
00093   return false;
00094 }
00095 
00096 //  rough domain
00097 // ---------------------------------
00098 //  rough_domain is constructed by looking at points
00099 //  it stores all cells adjecent to an interior point to be
00100 //  a interior cell, these cells must be covered by the processors
00101 class rough_domain {
00102  private:
00103   int p_level;       // level procesors can be 0 <= p_level_min, ...
00104                      // n_parallel = p_level_min+1,...
00105   int refine_level;  // refine_level    can be 0,1, ...
00106   int n_interior;    // Zweipotenz(p_level+refine_level)
00107   int n_side;        // Zweipotenz(refine_level)
00108   bool   *start;     // rough domain informations
00109                      // size: n_interior-1 x n_interior-1 x n_interior-1
00110                      // entspricht: ]0,1[^3
00111   void set(int i, int j, int k);
00112   int l;
00113   int ll;
00114   // periodic
00115   bool is_periodic;
00116 
00117  public:
00118   rough_domain(int P_level, int Refine_level, All_Domains* dom,
00119                D3vector A_bounding, double H_bounding);
00120   ~rough_domain();
00121 
00122   int Give_n_side() { return n_side; }
00123   /*
00124   int p_necessary(int I, int J, int K,
00125                   int p);               // number of processors necessary for
00126                                         // I,J,K = 0,1, ..., n_side-1
00127   */
00128   quality calc_quality(int I, int J, int K,
00129                        int p);          // number of processors necessary for
00130                                         // I,J,K = 0,1, ..., n_side-1
00131   int Calc_optimal_partitioning(int p,  // p number of processors
00132                                 int& opt_I, int& opt_J, int& opt_K,
00133                                 Partitioning* parti);
00134   void Set_partitioning(int opt_I,int opt_J,int opt_K,Partitioning* parti);
00135   bool In_domain(int i, int j, int k);               // cell indomain ??
00136   int Give_n_interior() { return n_interior; };
00137 };
00138 
00139 
00140 
00141 class Grid_base;
00142 
00143 //  Fuer Hashtable proc: Processes
00144 // ---------------------------------
00145 class Point_hashtable_proc {
00146   friend class Parallel_Info;
00147   friend class Grid_base;
00148  private:
00149   Index3D ind;      // midpoint of processes
00150   int num_proc;     // number   of process
00151 
00152   Point_hashtable_proc* next;
00153  public:
00154   bool used_on_coarser_grid;
00155   bool interior_processor;    // braucht man das ??????
00156 
00157   //  I Index des  Mittelpunktes der Zelle oder Kante
00158   Point_hashtable_proc(Index3D I) : ind(I) { 
00159     num_proc = -1;
00160     next = NULL;
00161     interior_processor   = true;
00162     used_on_coarser_grid = false;
00163   };
00164   Index3D Give_Index()    { return ind; }
00165   int     Give_num_proc() { return num_proc; }
00166   void    Set_num_proc(int p) {  num_proc=p; }
00167 
00168  ~Point_hashtable_proc() {
00169     if(next!=NULL) delete next;
00170   };
00171 };
00172 
00173 inline int hashtable_proc_function(int x, int y, int z, int length) {
00174   return (x + 101*y + 10007*z) % length;
00175 }
00176 
00177 //  Fuer Iteration durch Hashtable proc:
00178 // -----------------------------------------
00179 // folgendes define verwendet : point_proc:
00180 #define iterate_hash_proc  for(p_iter=0;p_iter<hashtable_proc_leng;++p_iter) for(point_proc=hashtable_proc_start[p_iter];point_proc!=NULL;point_proc=point_proc->next) 
00181 
00182 //  Fuer Hashtable proc_corner:
00183 // ---------------------------------
00184 class Point_hashtable_proc_corner {
00185   friend class Parallel_Info;
00186   friend class Grid_base;
00187  private:
00188   Index3D ind;      // corner point 
00189   int num_proc;     // number of responsible process
00190 
00191   Point_hashtable_proc_corner* next;
00192  public:
00193   int maximal_level; // with respect to points
00194 
00195   //  I Index des  Mittelpunktes der Zelle oder Kante
00196   Point_hashtable_proc_corner(Index3D I) : ind(I) { 
00197     num_proc = -1;
00198     next = NULL;
00199     maximal_level = 0;
00200   };
00201   Index3D Give_Index()    { return ind; }
00202   int     Give_num_proc() { return num_proc; }
00203   void    Set_num_proc(int p) {  num_proc=p; }
00204 
00205  ~Point_hashtable_proc_corner() {
00206     if(next!=NULL) delete next;
00207   };
00208 };
00209 
00210 inline int hashtable_proc_corner_function(int x, int y, int z, int length) {
00211   return (x + 101*y + 10007*z) % length;
00212 }
00213 
00214 //  Fuer Iteration durch Hashtable proc_corner: 
00215 // -----------------------------------------
00216 // folgendes define verwendet : point_proc:
00217 #define iterate_hash_proc_corner  for(p_iter_corner=0;p_iter_corner<hashtable_proc_corner_leng;++p_iter_corner) for(point_proc_corner=hashtable_proc_corner_start[p_iter_corner];point_proc_corner!=NULL;point_proc_corner=point_proc_corner->next) 
00218 
00219 
00220 
00221 
00222 
00223 
00224 
00225 
00226 //  Class describing processes
00227 // ----------------------------
00228 
00229 //class Parallel_Info : public Grid_gen_parameters {
00230 class Parallel_Info  {
00231  public:
00232   // function for testing pre_Grid generation
00233   Parallel_Info(int n_max, All_Domains* dom, int number_processes, 
00234                 Grid_gen_parameters& gpara);
00235   Parallel_Info(double meshsize, All_Domains* dom, int number_processes, 
00236                 Grid_gen_parameters& gpara);
00237 
00238   int Give_my_rank() const { return my_rank; };
00239   Index3D Give_my_index() const { return my_index; }
00240   Index3D Give_my_level_index(int level); // level here with respect to 
00241                                           // cellpoint
00242   Index3D Give_level_index_of(int rang, int level);
00243   int Give_coarsest_level_of(int rang);
00244    
00245   int Give_n_parallel() const { return n_parallel; } 
00246   int Give_my_coarsest_level()            // coarsest level for this process
00247     { return my_coarsest_level; }         // with respect to points
00248 
00249   bool exists_process(Index3D I);
00250   bool I_am_active();
00251 
00252   inline int give_number_of_processes();
00253   inline int give_number_of_active_processes();
00254   void process_info();
00255 
00256   void Print_processors();
00257   void Print_region_processes_UCD_normalized(ofstream *Datei);
00258 
00259   MPI_Comm Give_comm() { return comm; };
00260   bool        I_am_responsible_for(Index3D I) { return I<my_index; }
00261   inline bool I_am_responsible_for(Index3D I, int level); // level w.r. points
00262   bool        I_am_responsible_for(IndexBo Ib);
00263 
00264   inline All_Domains* Give_domain() { return domain; }
00265   
00266   ~Parallel_Info();
00267 
00268  protected:
00269   Parallel_Info(All_Domains* dom, MPI_Comm comm);
00270 
00271   // ^^^^^^^^^^^^^^^^^^^^  Part A: Processes ^^^^^^^^^^^^^^^^^^^
00272   void Generate_processes();
00273   void Generate_seriel_process();
00274 
00275   // informations for all processes  ///////////////////////////////
00276   All_Domains* domain;  // domain
00277   MPI_Comm comm;
00278   int p;          // number of processes
00279   int n_parallel; 
00280 
00281   void Add_proc_cell(Index3D I);               // Add cell for process
00282   void Add_proc_corner(Index3D I, int l);      // Add corner process
00283 
00284   // transform computational coordinate in physical coordinate
00285   inline D3vector coord_in_domain(const Index3D  I) const;
00286 
00287   // for bounding box
00288   double   H_bounding;   
00289   D3vector A_bounding;
00290 
00291   // for making grid through one grid point
00292   void make_grid_with_grid_point(D3vector gp, double fms);
00293 
00294   // for old bounding box and local box
00295   bool contained_in_old_bounding_box(Index3D I) const;
00296   bool contained_in_boxes(Index3D I) const;  // local and old bounding
00297 
00298   // maximal and minimal level
00299   int max_level;
00300   int min_level;
00301 
00302   // informations for all processes  ///////////////////////////////
00303   int my_rank;             // my rank of process
00304   Index3D my_index;        // my index
00305   int my_coarsest_level;   // coarsest level for this process
00306                            // with respect to points
00307   Index3D *my_level_index; // my index in coarser grids than n_parallel
00308   Index3D Give_next_on_level(int d,          // 0 <= d < 26
00309                              Index3D me);    // next proc index
00310                                              // if it exists or not  
00311 
00312   // void Set_coarser_index_and_level();
00313   void Calc_coarse_processors();
00314   void Recursion_calc_coarse_processors(Index3D I);
00315   
00316 
00317   // ^^^^^^^^^^^^^^^^^^^^  Part  B:  Communication ^^^^^^^^^^^^^^^^
00318   // buffers used for several communications
00319   int* Give_buffer_send_info(int l);
00320   int* Give_buffer_receive_info(int l);
00321 
00322   // communication general in direction d (dir_3D)
00323   void Set_rank_3D();
00324   int give_next_rank_source(dir_3D d,int level);       // -1 if not exists
00325   int give_next_rank_destination(dir_3D d,int level);  // -1 if not exists
00326 
00327   // communication general in edge direction d (Edges_cell)
00328   int give_next_rank_source(Edges_cell ed);       // -1 if not exists
00329   int give_next_rank_destination(Edges_cell ed);  // -1 if not exists
00330   int give_next_rank_source(Edges_cell ed, int level);      // -1 if not exists
00331   int give_next_rank_destination(Edges_cell ed, int level); // -1 if not exists
00332 
00333   // communication general in corner direction d (dir_sons)
00334   int give_next_rank_source(dir_sons ed);       // -1 if not exists
00335   int give_next_rank_destination(dir_sons ed);  // -1 if not exists
00336   int give_next_rank_source(dir_sons ed, int level);       // -1 if not exists
00337   int give_next_rank_destination(dir_sons ed, int level);  // -1 if not exists
00338 
00339   // for communication during expression template evaluation
00340   // -------------------------------------------------------
00341   // I. storage for faces:
00342   // a) number of points for each face
00343   int** number_receive_face;
00344   int** number_send_face;
00345      // number of points for each face for prolongation
00346   int** number_receive_face_prolongation;
00347   int** number_send_face_prolongation;
00348   // b) where the start of "var" is
00349   double**** var_receive;
00350   double**** var_send;
00351      // where the start of "var" for prolongation is
00352   double**** var_receive_prolongation;
00353   double**** var_send_prolongation;
00354   // c) buffer
00355   double* Give_buffer_send(int d, int l); 
00356   double* Give_buffer_receive(int d, int l);
00357 
00358 
00359   // Explaination:
00360   // double** xxxx[num][level][direction];
00361 
00362   // interior points at faces: contained in var_receive[...]
00363   //                                 and in    var_send[...]
00364   // Zellfreiheitsgrade and Randpunkte:
00365   //                           contained in var_receive[E]  [N]  [T]
00366   //                                 and in    var_send[E]  [N]  [T]
00367   // II. storage for boundary stencil:
00368   // a) number of points for each direction
00369   int number_receive_bo_stencil[Max_number_levels][26];
00370   int number_send_bo_stencil[Max_number_levels][26];
00371   // b) where the start of "stencil" is
00372   double** bo_stencil_receive[Max_number_levels][26];
00373   double** bo_stencil_send[Max_number_levels][26];
00374   // c) buffer
00375   // same as above
00376 
00377   // II. storage for stencil:
00378   // a) number of points for each direction
00379   int number_receive_stencil[Max_number_levels][26];
00380   int number_send_stencil[Max_number_levels][26];
00381   // b) where the start of "stencil" is
00382   double** stencil_receive[Max_number_levels][26];
00383   double** stencil_send[Max_number_levels][26];
00384   // c) buffer
00385   // same as above
00386 
00387 
00388 
00389 
00390 // private:
00391   void Recursion_study_cells(Index3D I, Partitioning* parti);
00392   // for partitioning
00393   int refine_level;       // 0,1,2,... optimzation parameter for partitioning
00394   int p_level;            // level procesors can be 0 <= p_level_min, ...
00395                           // n_parallel = p_level_min+1,...
00396   int number_of_active_procs; // number of active processors
00397 
00398   // for old bounding box
00399   Index1D Ixl, Ixr;
00400   Index1D Iyl, Iyr;
00401   Index1D Izl, Izr;
00402 
00403   double  pxl, pxr;
00404   double  pyl, pyr;
00405   double  pzl, pzr;
00406 
00407   // values for a local box
00408   IndexSurface coarse_faces[6];
00409   bool         info_coarse_faces[6];
00410   IndexEdge    coarse_edges[12];
00411   bool         info_coarse_edges[12];
00412   Index3D      coarse_corners[8];
00413   bool         info_coarse_corners[8];
00414 
00415   // buffers used for several communications
00416   int* buffer_send_info;
00417   int* buffer_receive_info;
00418   int length_send_info;
00419   int length_receive_info;
00420 
00421   // for communication
00422   // ------------------
00423   // next processor for level >= n_parallel-1
00424   int next_rank_source[6];
00425   int next_rank_destination[6];
00426 
00427   int next_rank_source_edges[12];
00428   int next_rank_destination_edges[12];
00429 
00430   int next_rank_source_corners[8];
00431   int next_rank_destination_corners[8];
00432 
00433   // next processor for level < n_parallel-1
00434   int* next_rank_source_coarse[6];
00435   int* next_rank_destination_coarse[6];
00436 
00437   int* next_rank_source_coarse_edges[12];
00438   int* next_rank_destination_coarse_edges[12];
00439 
00440   int* next_rank_source_coarse_corners[8];
00441   int* next_rank_destination_coarse_corners[8];
00442 
00443   // buffers ...
00444   double* buffer_send[26];
00445   double* buffer_receive[26];
00446   int length_send[26];
00447   int length_receive[26];
00448 
00449   // Hashtable
00450   // for iteration
00451   int p_iter;
00452   int p_iter_corner;
00453   // Funktionen zur Anwendung:
00454   int  Give_proc_number(Index3D I);
00455   bool Exists_proc(Index3D I);
00456   int  Give_proc_corner_number(Index3D I);
00457   bool Exists_proc_corner(Index3D I);
00458 
00459 
00460   Point_hashtable_proc* hashtable_proc_point(Index3D I) const;
00461   Point_hashtable_proc_corner* hashtable_proc_corner_point(Index3D I) const;
00462 
00463   void Initialize_hash_proc(int lenght);
00464   void Initialize_hash_proc_corner(int lenght);
00465 
00466   // Speicher des hashtable_proc:
00467   Point_hashtable_proc* point_proc;          //fuer Iterationen
00468   int hashtable_proc_leng;               // Laenge des hashtable
00469   int hashtable_proc_occ;                // Besetzungszahl des hashtable
00470   Point_hashtable_proc** hashtable_proc_start;  //Zeiger auf Tabelle
00471 
00472   // Speicher des hashtable_proc_corner:
00473   Point_hashtable_proc_corner* point_proc_corner;  //fuer Iterationen
00474   int hashtable_proc_corner_leng;                  // Laenge des hashtable
00475   int hashtable_proc_corner_occ;                // Besetzungszahl des hashtable
00476   Point_hashtable_proc_corner** hashtable_proc_corner_start; 
00477                                                 //Zeiger auf Tabelle
00478 };
00479 
00480 
00481 
00483 // Implementierung einiger Memberfunktionen
00485 
00486 // transform computational coordinate in physical coordinate
00487 inline D3vector Parallel_Info::coord_in_domain(const Index3D  I) const {
00488   D3vector V;
00489   V = I.coordinate();
00490   return D3vector(A_bounding.x + V.x*H_bounding,
00491                   A_bounding.y + V.y*H_bounding,
00492                   A_bounding.z + V.z*H_bounding);
00493 };
00494 
00495 inline int Parallel_Info::give_number_of_processes() {
00496   return p;
00497 };
00498 
00499 inline int Parallel_Info::give_number_of_active_processes() {
00500   return number_of_active_procs;
00501 }
00502 
00503 inline bool Parallel_Info::I_am_active() {
00504   if(parallel_version)
00505     return my_rank<number_of_active_procs;
00506   else 
00507     return true;
00508 }
00509 
00510 // hash proc
00511 
00512 inline Point_hashtable_proc* Parallel_Info::hashtable_proc_point(Index3D I)
00513      const {
00514   Point_hashtable_proc* point;
00515   point = hashtable_proc_start
00516     [hashtable_proc_function(I.ind_x.index,I.ind_y.index,I.ind_z.index,
00517                          hashtable_proc_leng)];
00518   while(point!=NULL) {
00519     if(point->ind==I) return point;
00520     point = point->next;
00521   }
00522   return NULL;
00523 }
00524 
00525 inline Point_hashtable_proc_corner* 
00526 Parallel_Info::hashtable_proc_corner_point(Index3D I)
00527      const {
00528   static Point_hashtable_proc_corner* point;
00529   point = hashtable_proc_corner_start
00530     [hashtable_proc_corner_function(I.ind_x.index,I.ind_y.index,I.ind_z.index,
00531                                     hashtable_proc_corner_leng)];
00532   while(point!=NULL) {
00533     if(point->ind==I) return point;
00534     point = point->next;
00535   }
00536   return NULL;
00537 }
00538 
00539 inline bool Parallel_Info::exists_process(Index3D I) {
00540   return hashtable_proc_point(I)!=NULL;
00541 }
00542 
00543 
00544 inline int Parallel_Info::give_next_rank_source(dir_3D d,int level) {
00545   if(level>=n_parallel-1)
00546     return next_rank_source[d];
00547   return next_rank_source_coarse[d][level];
00548 }
00549 
00550 inline int Parallel_Info::give_next_rank_destination(dir_3D d,int level) {
00551   if(level>=n_parallel-1)
00552     return next_rank_destination[d];
00553   return next_rank_destination_coarse[d][level];
00554 }
00555 
00556 inline int Parallel_Info::give_next_rank_source(Edges_cell d,
00557                                                 int level) {
00558   if(level>=n_parallel-1)
00559     return next_rank_source_edges[d];
00560   return next_rank_source_coarse_edges[d][level];
00561 }
00562 
00563 inline int Parallel_Info::give_next_rank_destination(Edges_cell d,
00564                                                      int level) {
00565   if(level>=n_parallel-1)
00566     return next_rank_destination_edges[d];
00567   return next_rank_destination_coarse_edges[d][level];
00568 }
00569 
00570 inline int Parallel_Info::give_next_rank_source(dir_sons d,
00571                                                 int level) {
00572   if(level>=n_parallel-1)
00573     return next_rank_source_corners[d];
00574   return next_rank_source_coarse_corners[d][level];
00575 }
00576 
00577 inline int Parallel_Info::give_next_rank_destination(dir_sons d,
00578                                                      int level) {
00579   if(level>=n_parallel-1)
00580     return next_rank_destination_corners[d];
00581   return next_rank_destination_coarse_corners[d][level];
00582 }
00583 
00584 
00585 
00586 inline bool Parallel_Info::I_am_responsible_for(IndexBo Ib) {
00587   return (Ib.I.next(Ib.d,MAX(n_parallel,Ib.I.Tiefe())+1)) < my_index;
00588 }
00589 
00590 inline bool Parallel_Info::I_am_responsible_for(Index3D I, int level) {
00591   // level w.r. points
00592   return I < Give_my_level_index(level+1);
00593 }
00594 
00595 #endif
00596      

Generated on Mon Jan 16 13:23:41 2006 for IPPL by  doxygen 1.4.6