src/Utility/IpplInfo.cpp

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 /***************************************************************************
00003  *
00004  * The IPPL Framework
00005  * 
00006  * This program was prepared by PSI. 
00007  * All rights in the program are reserved by PSI.
00008  * Neither PSI nor the author(s)
00009  * makes any warranty, express or implied, or assumes any liability or
00010  * responsibility for the use of this software
00011  *
00012  * Visit http://www.acl.lanl.gov/POOMS for more details
00013  *
00014  ***************************************************************************/
00015 
00016 // -*- C++ -*-
00017 /***************************************************************************
00018  *
00019  * The IPPL Framework
00020  * 
00021  *
00022  * Visit http://people.web.psi.ch/adelmann/ for more details
00023  *
00024  ***************************************************************************/
00025 
00026 // include files
00027 #include "Utility/IpplInfo.h"
00028 #include "Utility/IpplStats.h"
00029 #include "Utility/PAssert.h"
00030 #include "Utility/RandomNumberGen.h"
00031 #include "Utility/Pstring.h"
00032 #include "Utility/vmap.h"
00033 #include "DataSource/DataConnectCreator.h"
00034 #include "Message/CommCreator.h"
00035 #include "Message/Communicate.h"
00036 #include "Profile/Profiler.h"
00037 #include "IpplVersions.h"
00038 
00039 // #include <string.h>
00040 #include <unistd.h>
00041 #include <stdio.h>
00042 #include <signal.h>
00043 
00044 #if defined(IPPL_SUN)
00045 // Declaration of function gethostname is not in the SunOS header files!
00046 extern "C" {
00047 extern int gethostname(char*, int);
00048 }
00049 #elif defined(IPPL_TFLOP)
00050 // need an extra header for gethostname function
00051 // #include <sysent.h>
00052 #endif
00053 
00054 
00056 // public static members of IpplInfo, initialized to default values
00057 Communicate *IpplInfo::Comm = new Communicate();
00058 IpplStats  *IpplInfo::Stats = new IpplStats();
00059 Inform *IpplInfo::Info = new Inform("Ippl");
00060 Inform *IpplInfo::Warn = new Inform("Warning", cerr);
00061 Inform *IpplInfo::Error = new Inform("Error", cerr, INFORM_ALL_NODES);
00062 Inform *IpplInfo::Debug = new Inform("**DEBUG**", cerr, INFORM_ALL_NODES);
00063 
00064 // should we use the optimization of deferring guard cell fills until
00065 // absolutely needed?  Can be changed to true by specifying the
00066 // flag --defergcfill
00067 bool IpplInfo::deferGuardCellFills = false;
00068 
00069 // should we use the compression capabilities in {[Bare]Field,LField}? Can be
00070 // changed to false by specifying the flag --nofieldcompression
00071 bool IpplInfo::noFieldCompression = false;
00072 
00073 // private static members of IpplInfo, initialized to default values
00074 int  IpplInfo::NumCreated = 0;
00075 bool IpplInfo::CommInitialized = false;
00076 bool IpplInfo::PrintStats = false;
00077 bool IpplInfo::NeedDeleteComm = false;
00078 int  IpplInfo::MyArgc = 0;
00079 char **IpplInfo::MyArgv = 0;
00080 int  IpplInfo::MyNode = 0;
00081 int  IpplInfo::TotalNodes = 1;
00082 int  IpplInfo::NumSMPs = 1;
00083 int* IpplInfo::SMPIDList = 0;
00084 int* IpplInfo::SMPNodeList = 0;
00085 bool IpplInfo::UseChecksums = false;
00086 bool IpplInfo::Retransmit = false;
00087 int  IpplInfo::MaxFFTNodes = 0;
00088 int  IpplInfo::ChunkSize = 512*1024; // 512K == 64K doubles
00089 bool IpplInfo::PerSMPParallelIO = false;
00090 bool IpplInfo::offsetStorage = false;
00091 bool IpplInfo::extraCompressChecks = false;
00092 bool IpplInfo::useDirectIO = false;
00093 
00094 
00095 #ifdef IPPL_COMM_ALARMS
00096 // A timeout quantity, in seconds, to allow us to wait a certain number
00097 // of seconds before we signal a timeout when we're trying to rece
00098 // a message.  By default, this will be zero; change it with the
00099 // --msgtimeout <seconds> flag
00100 unsigned int IpplInfo::CommTimeoutSeconds = 0;
00101 #endif
00102 
00103 
00105 // print out current state to the given output stream
00106 ostream& operator<<(ostream& o, const IpplInfo&) {
00107   o << "------------------------------------------\n";
00108   o << "IPPL Framework Application Summary:\n";
00109   o << "  Running on node " << IpplInfo::myNode();
00110   o << ", out of " << IpplInfo::getNodes() << " total.\n";
00111   o << "  Number of SMPs: " << IpplInfo::getSMPs() << "\n";
00112   o << "  Relative SMP node: " << IpplInfo::mySMPNode();
00113   o << ", out of " << IpplInfo::getSMPNodes(IpplInfo::mySMP());
00114   o << " nodes.\n";
00115   o << "  Communication method: " << IpplInfo::Comm->name() << "\n";
00116   o << "  Disc read chunk size: " << IpplInfo::chunkSize() << " bytes.\n";
00117   o << "  Deferring guard cell fills? ";
00118   o << IpplInfo::deferGuardCellFills << "\n";
00119   o << "  Turning off Field compression? ";
00120   o << IpplInfo::noFieldCompression << "\n";
00121   o << "  Offsetting storage? ";
00122   o << IpplInfo::offsetStorage << "\n";
00123   o << "  Using extra compression checks in expressions? ";
00124   o << IpplInfo::extraCompressChecks << "\n";
00125   o << "  Use per-SMP parallel IO? ";
00126   o << IpplInfo::perSMPParallelIO() << "\n";
00127   o << "  Computing message CRC checksums? ";
00128   o << IpplInfo::useChecksums() << "\n";
00129   o << "  Retransmit messages on error (only if checkums on)? ";
00130   o << IpplInfo::retransmit() << "\n";
00131 
00132 #ifdef IPPL_DIRECTIO
00133   o << "  Use Direct-IO? " << IpplInfo::useDirectIO << "\n";
00134 #endif
00135 
00136 #ifdef IPPL_COMM_ALARMS
00137   if (IpplInfo::getCommTimeout() > 0) {
00138     o << "  Allowed message receive timeout length (in seconds): ";
00139     o << IpplInfo::getCommTimeout() << "\n";
00140   }
00141 #endif
00142 
00143   o << "  Elapsed wall-clock time (in seconds): ";
00144   o << IpplInfo::Stats->getTime().clock_time() << "\n";
00145   o << "  Elapsed CPU-clock time (in seconds) : ";
00146   o << IpplInfo::Stats->getTime().cpu_time() << "\n";
00147   o << "------------------------------------------\n";
00148   return o;
00149 }
00150 
00151 
00153 // Constructor 1: parse argc, argv, and create proper Communicate object
00154 // The second argument controls whether the IPPL-specific command line
00155 // arguments are stripped out (the default) or left in (if the setting
00156 // is IpplInfo::KEEP).
00157 IpplInfo::IpplInfo(int& argc, char**& argv, int removeargs) {
00158 
00159 #ifdef __MWERKS__
00160   MetrowerksInitialize(argc,argv);
00161 #endif
00162 
00163   int i;                        // loop variables
00164   int connectoption = (-1);     // for connection method option
00165   int retargc;                  // number of args to return to caller
00166   char **retargv;               // arguments to return
00167   bool printsummary = false;    // print summary at end of constructor
00168 
00169   //Inform dbgmsg("IpplInfo(argc,argv)", INFORM_ALL_NODES);
00170 
00171   // determine whether we should strip out ippl-specific arguments, or keep
00172   bool stripargs = (removeargs != KEEP);
00173 
00174   // You can only specify argc, argv once; if it is done again, print a warning
00175   // and continue as if we had not given argc, argv.
00176   if ( CommInitialized ) {
00177     WARNMSG("Attempt to create IpplInfo with argc, argv again." << endl);
00178     WARNMSG("Using previous argc,argv settings." << endl);
00179   } else {
00180     // dbgmsg << "Starting initialization: argc = " << argc << ", " << endl;
00181     // for (unsigned int dbgi=0; dbgi < argc; ++dbgi)
00182     //   dbgmsg << "  argv[" << dbgi << "] = '" << argv[dbgi] << "'" << endl;
00183 
00184     // first make a pass through the arguments, figure out whether we should
00185     // run in parallel, and start up the parallel environment.  After this,
00186     // process all the other cmdline args
00187     string commtype;
00188     bool startcomm = false;
00189     bool comminit = true;         // do comm. system's init call
00190     int nprocs = (-1);          // num of processes to start; -1 means default
00191     TAU_PROFILE_INIT(argc, argv);
00192     for (i=1; i < argc; ++i) {
00193       if ( ( strcmp(argv[i], "--processes") == 0 ) ||
00194            ( strcmp(argv[i], "-procs") == 0 ) ) {
00195         // The user specified how many processes to use. This may not be useful
00196         // for all communication methods.
00197         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) > 0 )
00198           nprocs = atoi(argv[++i]);
00199         else
00200           param_error(argv[i],
00201                       "Please specify a positive number of processes", 0);
00202       } else if ( ( strcmp(argv[i], "--commlib") == 0 ) ||
00203                   ( strcmp(argv[i], "-comm") == 0 ) ) {
00204         // The user specified what kind of comm library to use
00205         if ( (i + 1) < argc && argv[i+1][0] != '-' ) {
00206           commtype = argv[++i];
00207           startcomm = true;
00208         } else {
00209           param_error(argv[i], "Please use one of: ",
00210                       CommCreator::getAllLibraryNames(), 0);
00211         }
00212       } else if ( strcmp(argv[i], "--nocomminit") == 0 ) {
00213         // The user requested that we do not let the run-time system call
00214         // whatever initialization routine it might have (like MPI_Init).
00215         // This is in case another agency has already done the initialization.
00216         comminit = false;
00217       } else if ( ( strcmp(argv[i], "-p4amslave") == 0 ) ||
00218                   ( strcmp(argv[i], "-p4wd") == 0 ) ) {
00219         // HACK HACK HACK: this is a special test for the case where
00220         // this process was started by the latest mpich's mpirun.  If the
00221         // argument mpich gives to slave processes changes from -p4amslave
00222         // to something else, THIS TEST WILL NEED TO BE CHANGED.  Oh why
00223         // oh why couldn't mpich give ALL the cmdline args to ALL the
00224         // processes??? Ack.
00225         commtype = "mpi";
00226         startcomm = true;
00227       }
00228     }
00229 
00230     // create Communicate object now.
00231     // dbgmsg << "Setting up parallel environment ..." << endl;
00232     if (startcomm && nprocs != 0 && nprocs != 1) {
00233       // dbgmsg << "  commlibarg=" << commtype << endl;
00234       // dbgmsg << ", nprocs=" << nprocs << endl;
00235       Communicate *newcomm = CommCreator::create(commtype.c_str(),
00236                                                  argc, argv,
00237                                                  nprocs, comminit);
00238       if (newcomm == 0) {
00239         if (CommCreator::supported(commtype.c_str()))
00240           param_error("--commlib", "Could not initialize this ",
00241                       "communication library.", commtype.c_str());
00242         else if (CommCreator::known(commtype.c_str()))
00243           param_error("--commlib", "This communication library is not ",
00244                       "available.", commtype.c_str());
00245         else
00246           param_error("--commlib", "Please use one of: ",
00247                       CommCreator::getAllLibraryNames(), 0);
00248       } else {
00249         // success, we have a new comm object
00250         NeedDeleteComm = true;
00251         delete Comm;
00252         Comm = newcomm;
00253 
00254         // cache our node number and node count
00255         MyNode = Comm->myNode();
00256         TotalNodes = Comm->getNodes();
00257         find_smp_nodes();
00258 
00259         // advance the default random number generator
00260         IpplRandom.AdvanceSeed(Comm->myNode());
00261 
00262         // dbgmsg << "  Comm creation successful." << endl;
00263         // dbgmsg << *this << endl;
00264       }
00265     }
00266 
00267     // dbgmsg << "After comm init: argc = " << argc << ", " << endl;
00268     // for (unsigned int dbgi=0; dbgi < argc; ++dbgi)
00269     //   dbgmsg << "  argv[" << dbgi << "] = '" << argv[dbgi] << "'" << endl;
00270 
00271     // keep track of which arguments we do no use; these are returned
00272     retargc = 1;
00273     retargv = new char*[argc];
00274     retargv[0] = argv[0];       // we always return arg 0 (the exec. name)
00275 
00276     // if we're not stripping out arguments, just save all the args
00277     if (!stripargs)
00278       for (i=1; i < argc; ++i)
00279         retargv[retargc++] = argv[i];
00280 
00281     TAU_PROFILE_SET_NODE(MyNode);
00282     // Parse command-line options, looking for ippl options.  When found,
00283     // save their suggested values and use them at the end to create data, etc.
00284     for (i=1; i < argc; ++i) {
00285       if ( ( strcmp(argv[i], "--processes") == 0 ) ||
00286            ( strcmp(argv[i], "-procs") == 0 ) ) {
00287         // handled above
00288         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) > 0 )
00289           ++i;
00290 
00291       } else if ( ( strcmp(argv[i], "--nocomminit") == 0 ) ) {
00292         // handled above, nothing to do here but skip the arg
00293 
00294       } else if ( ( strcmp(argv[i], "--summary") == 0 ) ) {
00295         // set flag to print out summary of Ippl library settings at the
00296         // end of this constructor
00297         printsummary = true;
00298 
00299       } else if ( ( strcmp(argv[i], "--version") == 0 ) ) {
00300         printVersion(false);
00301         IpplInfo::abort(0, 0);
00302 
00303       } else if ( ( strcmp(argv[i], "--checksums") == 0 ) ||
00304                   ( strcmp(argv[i], "--checksum") == 0 ) ) {
00305         UseChecksums = true;
00306 
00307       } else if ( ( strcmp(argv[i], "--retransmit") == 0 ) ) {
00308         Retransmit = true;
00309 
00310       } else if ( ( strcmp(argv[i], "--versionall") == 0 ) ||
00311                   ( strcmp(argv[i], "-vall") == 0 ) ) {
00312         printVersion(true);
00313         IpplInfo::abort(0, 0);
00314 
00315       } else if ( ( strcmp(argv[i], "--time") == 0 ) ||
00316                   ( strcmp(argv[i], "-time") == 0 ) ||
00317                   ( strcmp(argv[i], "--statistics") == 0 ) ||
00318                   ( strcmp(argv[i], "-stats") == 0 ) ) {
00319         // The user specified that the program stats be printed at
00320         // the end of the program.
00321         PrintStats = true;
00322 
00323       } else if ( ( strcmp(argv[i], "--info") == 0 ) ) {
00324         // Set the output level for informative messages.
00325         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) >= 0 )
00326            Info->setOutputLevel(atoi(argv[++i]));
00327         else
00328           param_error(argv[i],
00329                       "Please specify an output level from 0 to 5", 0);
00330 
00331       } else if ( ( strcmp(argv[i], "--warn") == 0 ) ) {
00332         // Set the output level for warning messages.
00333         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) >= 0 )
00334            Warn->setOutputLevel(atoi(argv[++i]));
00335         else
00336           param_error(argv[i],
00337                       "Please specify an output level from 0 to 5", 0);
00338 
00339       } else if ( ( strcmp(argv[i], "--error") == 0 ) ) {
00340         // Set the output level for error messages.
00341         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) >= 0 )
00342            Error->setOutputLevel(atoi(argv[++i]));
00343         else
00344           param_error(argv[i],
00345                       "Please specify an output level from 0 to 5", 0);
00346 
00347       } else if ( ( strcmp(argv[i], "--debug") == 0 ) ) {
00348         // Set the output level for debug messages.
00349         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) >= 0 )
00350            Debug->setOutputLevel(atoi(argv[++i]));
00351         else
00352           param_error(argv[i],
00353                       "Please specify an output level from 0 to 5", 0);
00354 
00355       } else if ( ( strcmp(argv[i], "--connect") == 0 ) ) {
00356         // Set the default external connection method
00357         if ( (i + 1) < argc && argv[i+1][0] != '-' )
00358           connectoption = ++i;
00359         else
00360           param_error(argv[i], "Please use one of: ",
00361                       DataConnectCreator::getAllMethodNames(), 0);
00362 
00363       } else if ( ( strcmp(argv[i], "--connectnodes") == 0 ) ) {
00364         // Set the number of nodes that are used in connections, by default
00365         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) > 0 )
00366           DataConnectCreator::setDefaultNodes(atoi(argv[++i]));
00367         else
00368           param_error(argv[i],
00369                       "Please specify a number of nodes for connections > 0",
00370                       0);
00371 
00372       } else if ( ( strcmp(argv[i], "--commlib") == 0 ) ||
00373                   ( strcmp(argv[i], "-comm") == 0 ) ) {
00374         // handled above
00375         if ( (i + 1) < argc && argv[i+1][0] != '-' )
00376           ++i;
00377 
00378       } else if   ( strcmp(argv[i], "--profile") == 0 )  {
00379         // handled above in TAU_PROFILE_INIT(argc, argv);
00380         if ( (i + 1) < argc && argv[i+1][0] != '-' )
00381           ++i;
00382 
00383       } else if ( ( strcmp(argv[i], "--persmppario") == 0 ) ) {
00384         // Turn on the ability to use per-smp parallel IO
00385         PerSMPParallelIO = true;
00386 
00387       } else if ( ( strcmp(argv[i], "--nopersmppario") == 0 ) ) {
00388         // Turn off the ability to use per-smp parallel IO
00389         PerSMPParallelIO = false;
00390 
00391       } else if ( ( strcmp(argv[i], "--chunksize") == 0 ) ) {
00392         // Set the I/O chunk size, used to limit how many items
00393         // are read in or written to disk at one time.
00394         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) >= 0 ) {
00395           ChunkSize = atoi(argv[++i]);
00396           char units = toupper(argv[i][strlen(argv[i])-1]);
00397           if (units == 'K')
00398             ChunkSize *= 1024;
00399           else if (units == 'M')
00400             ChunkSize *= 1024*1024;
00401           else if (units == 'G')
00402             ChunkSize *= 1024*1024*1024;
00403         } else {
00404           param_error(argv[i],
00405                       "Please specify a timeout value (in seconds)", 0);
00406         }
00407 #ifdef IPPL_COMM_ALARMS
00408       } else if ( ( strcmp(argv[i], "--msgtimeout") == 0 ) ) {
00409         // Set the timeout period for receiving messages
00410         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) >= 0 )
00411           CommTimeoutSeconds = atoi(argv[++i]);
00412         else
00413           param_error(argv[i],
00414                       "Please specify a timeout value (in seconds)", 0);
00415 #endif
00416 
00417       } else if ( ( strcmp(argv[i], "--defergcfill") == 0 ) ) {
00418         // Turn on the defer guard cell fill optimization
00419         deferGuardCellFills = true;
00420 
00421       } else if ( ( strcmp(argv[i], "--offsetstorage") == 0 ) ) {
00422         // Turn on the offset-storage modification to LFields
00423         offsetStorage = true;
00424 
00425       } else if ( ( strcmp(argv[i], "--extracompcheck") == 0 ) ) {
00426         // Turn on the extra compression checks in expressions
00427         extraCompressChecks = true;
00428 
00429       } else if ( ( strcmp(argv[i], "--nofieldcompression") == 0 ) ) {
00430         // Turn off compression in the Field classes
00431         noFieldCompression = true;
00432 
00433       } else if ( ( strcmp(argv[i], "--directio") == 0 ) ) {
00434         // Turn on the use of Direct-IO, if possible
00435 #ifdef IPPL_DIRECTIO
00436         useDirectIO = true;
00437 #else
00438         param_error(argv[i],
00439                     "Direct-IO is not available in this build of IPPL", 0);
00440 #endif
00441       } else if ( ( strcmp(argv[i], "--maxfftnodes") == 0 ) ) {
00442         // Limit the number of nodes that can participate in FFT operations
00443         if ( (i + 1) < argc && argv[i+1][0] != '-' && atoi(argv[i+1]) > 0 )
00444           MaxFFTNodes = atoi(argv[++i]);
00445         else
00446           param_error(argv[i],
00447                       "Please specify a maximum number of FFT nodes > 0", 0);
00448 
00449       } else if ( ( strcmp(argv[i], "--help") == 0 ) ||
00450                   ( strcmp(argv[i], "-h") == 0 ) ||
00451                   ( strcmp(argv[i], "-?") == 0 ) ) {
00452         // print out summary of command line switches and exit
00453         INFOMSG("Usage: " << argv[0] << " [<option> <option> ...]\n");
00454         INFOMSG("       The possible values for <option> are:\n");
00455         INFOMSG("   --help          : Display this command-line summary.\n");
00456         INFOMSG("   --summary       : Print IPPL lib summary at start.\n");
00457         INFOMSG("   --processes <n> : Number of parallel nodes to use.\n");
00458         INFOMSG("   --commlib <x>   : Selects a parallel comm. library.\n");
00459         INFOMSG("                     <x> = ");
00460         INFOMSG(CommCreator::getAllLibraryNames() << "\n");
00461         INFOMSG("   --nocomminit    : IPPL does not do communication\n");
00462         INFOMSG("                     initialization, assume already done.\n");
00463         INFOMSG("   --connect <x>   : Select external connection method.\n");
00464         INFOMSG("                     <x> = ");
00465         INFOMSG(DataConnectCreator::getAllMethodNames() << "\n");
00466         INFOMSG("   --time          : Show total time used in execution.\n");
00467         INFOMSG("   --notime        : Do not show timing info (default).\n");
00468         INFOMSG("   --info <n>      : Set info message level.  0 = off.\n");
00469         INFOMSG("   --warn <n>      : Set warning message level.  0 = off.\n");
00470         INFOMSG("   --error <n>     : Set error message level.  0 = off.\n");
00471         INFOMSG("   --debug <n>     : Set debug message level.  0 = off.\n");
00472 #ifdef PROFILING_ON
00473         INFOMSG("   --profile <gr>  : Enable profiling for groups (e.g., M+P+io) \n");
00474         INFOMSG("             M - Message, P - Pete, V - Viz, A - Assign, I - IO\n");
00475         INFOMSG("             F - Field, L - Layout, S - Sparse, D - Domainmap \n");
00476         INFOMSG("             Ut - Utility, R - Region, Ff - FFT \n");
00477         INFOMSG("             U - User, 1 - User1, 2 - User2, 3 - User3, 4 - User4\n");
00478  
00479 #endif //PROFILING_ON
00480         INFOMSG("   --defergcfill   : Turn on deferred guard cell fills.\n");
00481         INFOMSG("   --nofieldcompression: Turn off compression in the Field classes.\n");
00482         INFOMSG("   --offsetstorage : Turn on random LField storage offsets.\n");
00483         INFOMSG("   --extracompcheck: Turn on extra compression checks in evaluator.\n");
00484 #ifdef IPPL_COMM_ALARMS
00485         INFOMSG("   --msgtimeout <n>: Set receive timeout time, in secs.\n");
00486 #endif
00487         INFOMSG("   --checksums     : Turn on CRC checksums for messages.\n");
00488         INFOMSG("   --retransmit    : Resent messages if a CRC error occurs.\n");
00489         INFOMSG("   --maxfftnodes <n> : Limit the nodes that work on FFT's.\n");
00490         INFOMSG("   --chunksize <n> : Set I/O chunk size.  Can end w/K,M,G.\n");
00491         INFOMSG("   --persmppario   : Enable on-SMP parallel IO option.\n");
00492         INFOMSG("   --nopersmppario : Disable on-SMP parallel IO option (default).\n");
00493 #ifdef IPPL_DIRECTIO
00494         INFOMSG("   --directio      : Use Direct-IO if possible.\n");
00495 #endif
00496         INFOMSG("   --version       : Print a brief version summary.\n");
00497         INFOMSG("   --versionall    : Print a detailed version summary.\n");
00498         INFOMSG(endl);
00499         IpplInfo::abort(0, 0);
00500 
00501       } else {
00502         // Unknown option; just ignore it.
00503         DEBUGMSG(level3 << "Unknown command-line option " << argv[i] << endl);
00504         if (stripargs)
00505           retargv[retargc++] = argv[i];
00506       }
00507     }
00508 
00509     // We can get on with creating and initializing all globally-used objects.
00510 
00511     // Select the default connection method
00512     if ( connectoption >= 0 ) {
00513       if ( ! DataConnectCreator::setDefaultMethod(argv[connectoption]) ) {
00514         if (DataConnectCreator::supported(argv[connectoption]))
00515           param_error(argv[connectoption - 1], "Could not initialize this ",
00516                       "connection.", argv[connectoption]);
00517         else if (DataConnectCreator::known(argv[connectoption]))
00518           param_error(argv[connectoption - 1],"This connection method is not ",
00519                       "available.", argv[connectoption]);
00520         else
00521           param_error(argv[connectoption - 1], "Please use one of: ",
00522                       DataConnectCreator::getAllMethodNames(), 0);
00523       }
00524     }
00525 
00526     // indicate back to the caller which arguments are left
00527     MyArgc = retargc;
00528     MyArgv = retargv;
00529     if (stripargs) {
00530       argc = retargc;
00531       argv = retargv;
00532     }
00533 
00534     // Inform dbgmsg("IpplInfo::IpplInfo", INFORM_ALL_NODES);
00535     // dbgmsg << "Created IpplInfo.  node = " << MyNode << " out of ";
00536     // dbgmsg << TotalNodes << ", commlib = " << Comm->name() << endl;
00537 
00538     // now, at end, start the timer running, and print out a summary if asked
00539     Stats->getTime().stop();
00540     Stats->getTime().clear();
00541     Stats->getTime().start();
00542   }
00543 
00544   // indicate we've created one more Ippl object
00545   CommInitialized = true;
00546   NumCreated++;
00547 
00548   // At the very end, print out a summary if requested
00549   if (printsummary)
00550     INFOMSG(*this << endl);
00551 }
00552 
00553 
00555 // Constructor 2: default constructor.
00556 IpplInfo::IpplInfo() {
00557   // just indicate we've also been created
00558   NumCreated++;
00559 }
00560 
00561 
00563 // Constructor 3: copy constructor.
00564 IpplInfo::IpplInfo(const IpplInfo&) {
00565   // just indicate we've also been created
00566   NumCreated++;
00567 }
00568 
00569 
00571 // Destructor: need to delete comm library if this is the last IpplInfo
00572 IpplInfo::~IpplInfo() {
00573   // indicate we have one less instance; if this is the last one,
00574   // close communication and clean up
00575   // Inform dbgmsg("IpplInfo::~IpplInfo", INFORM_ALL_NODES);
00576   // dbgmsg << "In destructor: Current NumCreated = " << NumCreated << endl;
00577   if ((--NumCreated) == 0) {
00578     // at end of program, print statistics if requested to do so
00579     if (PrintStats) {
00580       Inform statsmsg("Stats", INFORM_ALL_NODES);
00581       statsmsg << *this;
00582       printStatistics(statsmsg);
00583     }
00584 
00585     // Delete the communications object, if necessary, to shut down parallel
00586     // environment
00587     if (NeedDeleteComm) {
00588       // dbgmsg << "  Deleting comm object, since now NumCreated = ";
00589       // dbgmsg << NumCreated << endl;
00590       delete Comm;
00591       NeedDeleteComm = false;
00592     }
00593     CommInitialized = false;
00594 
00595     // delete other dynamically-allocated static objects
00596     delete [] MyArgv;
00597     if (SMPIDList != 0)
00598       delete [] SMPIDList;
00599     if (SMPNodeList != 0)
00600       delete [] SMPNodeList;
00601   }
00602 #ifdef __MWERKS__
00603   MetrowerksFinalize();
00604 #endif
00605 }
00606 
00607 
00609 // equal operator
00610 IpplInfo& IpplInfo::operator=(const IpplInfo&) {
00611   // nothing to do, we don't even need to indicate we've made another
00612   return *this;
00613 }
00614 
00615 
00617 // abort: kill the comm and exit the program, in an emergency.  This
00618 // will exit with an error code.  If the given exit code is < 0, the
00619 // program will call the system abort().  If the exit code is >= 0,
00620 // the program will call the system exit() with the given error code.
00621 void IpplInfo::abort(const char *msg, int exitcode) {
00622   // print out message, if one was provided
00623   if (msg != 0) {
00624     ERRORMSG(msg << endl);
00625   }
00626 
00627   // print out final stats, if necessary
00628   if (PrintStats) {
00629     Inform statsmsg("Stats", INFORM_ALL_NODES);
00630     statsmsg << IpplInfo();
00631     printStatistics(statsmsg);
00632   }
00633 
00634   // delete communication object, if necessary
00635   if (NeedDeleteComm) {
00636     NeedDeleteComm = false;
00637     delete Comm;
00638     Comm = 0;
00639   }
00640 
00641   // that's it, folks
00642   TAU_PROFILE_EXIT(msg);
00643   if (exitcode < 0)
00644     ::abort();
00645   else
00646     ::exit(exitcode);
00647 }
00648 
00649 
00651 // Signal to ALL the nodes that we should exit or abort.  If we abort,
00652 // a core file will be produced.  If we exit, no core file will be made.
00653 // The node which calls abortAllNodes will print out the given message;
00654 // the other nodes will print out that they are aborting due to a message
00655 // from this node.
00656 void IpplInfo::abortAllNodes(const char *msg, bool abortThisNode) {
00657   // print out message, if one was provided
00658   if (msg != 0) {
00659     ERRORMSG(msg << endl);
00660   }
00661 
00662   // print out final stats, if necessary
00663   if (PrintStats) {
00664     Inform statsmsg("Stats", INFORM_ALL_NODES);
00665     statsmsg << IpplInfo();
00666     printStatistics(statsmsg);
00667   }
00668 
00669   // broadcast out the kill message, if necessary
00670   if (getNodes() > 1)
00671     Comm->broadcast_others(new Message, IPPL_ABORT_TAG);
00672 
00673   // Now quit ourselves, if necessary
00674   if (abortThisNode)
00675     ::abort();
00676 }
00677 
00678 void IpplInfo::exitAllNodes(const char *msg, bool exitThisNode) {
00679   // print out message, if one was provided
00680   if (msg != 0) {
00681     ERRORMSG(msg << endl);
00682   }
00683 
00684   // print out final stats, if necessary
00685   if (PrintStats) {
00686     Inform statsmsg("Stats", INFORM_ALL_NODES);
00687     statsmsg << IpplInfo();
00688     printStatistics(statsmsg);
00689   }
00690 
00691   // broadcast out the kill message, if necessary
00692   if (getNodes() > 1)
00693     Comm->broadcast_others(new Message, IPPL_EXIT_TAG);
00694 
00695   // Now quit ourselves
00696   if (exitThisNode)
00697     exit(1);
00698 }
00699 
00700 
00702 // getNodes: return the number of 'Nodes' in use for the computation
00703 int IpplInfo::getNodes() {
00704   return TotalNodes;
00705 }
00706 
00707 
00709 // getContexts: return the number of 'Contexts' for the given node
00710 int IpplInfo::getContexts(const int n) {
00711   return Comm->getContexts(n);
00712 }
00713 
00714 
00716 // getProcesses: return the number of 'Processes' for the given Node and Context
00717 int IpplInfo::getProcesses(const int n, const int c) {
00718   return Comm->getProcesses(n, c);
00719 }
00720 
00721 
00723 // myNode: return which Node we are running on right now
00724 int IpplInfo::myNode() {
00725   return MyNode;
00726 }
00727 
00728 
00730 // getSMPs: return number of SMP's (each of which may be running
00731 // several processes)
00732 int IpplInfo::getSMPs() {
00733   return NumSMPs;
00734 }
00735 
00736 
00738 // getSMPNodes: return number of nodes on the SMP with the given index
00739 int IpplInfo::getSMPNodes(unsigned int smpindx) {
00740   int num = 0;
00741   if (SMPIDList == 0) {
00742     num = 1;
00743   } else {
00744     for (unsigned int i=0; i < TotalNodes; ++i)
00745       if (SMPIDList[i] == smpindx)
00746         num++;
00747   }
00748   return num;
00749 }
00750 
00751 
00753 // mySMP: return ID of my SMP (numbered 0 ... getSMPs() - 1)
00754 int IpplInfo::mySMP() {
00755   return (SMPIDList != 0 ? SMPIDList[MyNode] : 0);
00756 }
00757 
00758 
00760 // mySMPNode: return relative node number within the nodes on our SMP
00761 int IpplInfo::mySMPNode() {
00762   return (SMPNodeList != 0 ? SMPNodeList[MyNode] : 0);
00763 }
00764 
00765 
00767 // printVersion: print out a version summary.  If the argument is true,
00768 // print out a detailed listing, otherwise a summary.
00769 void IpplInfo::printVersion(bool printFull) {
00770   INFOMSG("IPPL Framework version " << version() << endl);
00771   INFOMSG("Last build date: " << compileDate() << " by user ");
00772   INFOMSG(compileUser() << endl);
00773   INFOMSG("Built for architecture: " << compileArch() << endl);
00774   INFOMSG("Built for machine: " << compileMachine() << endl);
00775   INFOMSG("Compile-time options: " << compileOptions() << endl);
00776   if (printFull) {
00777     INFOMSG("Compile line: " << compileLine() << endl);
00778     if (versionListSize() > 0) {
00779       INFOMSG("Summary of source files:" << endl);
00780       for (unsigned int n=0; n < versionListSize(); ++n)
00781         INFOMSG(versionList(n) << endl);
00782     }
00783   }
00784 }
00785 
00786 
00788 // here: as in stop in IpplInfo::here (in the debugger)
00789 void IpplInfo::here() 
00790 {
00791 }
00792 
00794 // print out statistics to the given Inform stream
00795 void IpplInfo::printStatistics(Inform &o) { Stats->print(o); }
00796 
00797 
00799 // version: return the name of this version of Ippl, as a string
00800 // (from Versions.h)
00801 const char *IpplInfo::version() {
00802   return ippl_version_name;
00803 }
00804 
00805 
00807 // versionList: return one of the summary lines from the source code
00808 // version list
00809 const char *IpplInfo::versionList(unsigned int n) {
00810   return ippl_version_log[n];
00811 }
00812 
00813 
00815 // versionListSize: return the number of source code files listed in
00816 // the versionList array.
00817 unsigned int IpplInfo::versionListSize() {
00818   return ippl_version_numfiles;
00819 }
00820 
00821 
00823 // compileArch: return the architecture on which this library was built
00824 // (from IpplVersions.h)
00825 const char *IpplInfo::compileArch() {
00826   return ippl_compile_arch;
00827 }
00828 
00829 
00831 // compileDate: return the date on which this library was prepared for
00832 // compilation (from IpplVersions.h)
00833 const char *IpplInfo::compileDate() {
00834   return ippl_compile_date;
00835 }
00836 
00837 
00839 // compileLine: return the compiler command used to compile each source file
00840 // (from IpplVersions.h)
00841 const char *IpplInfo::compileLine() {
00842   return ippl_compile_line;
00843 }
00844 
00845 
00847 // compileMachine: return the machine on which this library was
00848 // compiled (from IpplVersions.h)
00849 const char *IpplInfo::compileMachine() {
00850   return ippl_compile_machine;
00851 }
00852 
00853 
00855 // compileOptions: return the option list used to compile this library
00856 // (from IpplVersions.h)
00857 const char *IpplInfo::compileOptions() {
00858   return ippl_compile_options;
00859 }
00860 
00861 
00863 // compileUser: return the username of the user who compiled this
00864 // library (from IpplVersions.h)
00865 const char *IpplInfo::compileUser() {
00866   return ippl_compile_user;
00867 }
00868 
00869 
00871 // param_error: print out an error message when an illegal cmd-line
00872 // parameter is encountered.
00873 // Arguments are: parameter, error message, bad value (if any)
00874 void IpplInfo::param_error(const char *param, const char *msg,
00875                             const char *bad) {
00876   if ( param != 0 )
00877     ERRORMSG(param << " ");
00878   if ( bad != 0 )
00879     ERRORMSG(bad << " ");
00880   if ( msg != 0 )
00881     ERRORMSG(": " << msg);
00882   ERRORMSG(endl);
00883   abort(0, 0);
00884 }
00885 
00886 void IpplInfo::param_error(const char *param, const char *msg1,
00887                             const char *msg2, const char *bad) {
00888   if ( param != 0 )
00889     ERRORMSG(param << " ");
00890   if ( bad != 0 )
00891     ERRORMSG(bad << " ");
00892   if ( msg1 != 0 )
00893     ERRORMSG(": " << msg1);
00894   if ( msg2 != 0 )
00895     ERRORMSG(msg2);
00896   ERRORMSG(endl);
00897   abort(0, 0);
00898 }
00899 
00900 
00902 // find out how many SMP's there are, and which processor we are on
00903 // our local SMP (e.g., if there are two SMP's with 4 nodes each,
00904 // the process will have a node number from 0 ... 7, and an SMP node
00905 // number from 0 ... 3
00906 void IpplInfo::find_smp_nodes() {
00907   // Inform dbgmsg("IpplInfo::find_smp_nodes", INFORM_ALL_NODES);
00908 
00909   // create a tag for use in sending info to/from other nodes
00910   int tag = Comm->next_tag(IPPL_MAKE_HOST_MAP_TAG, IPPL_TAG_CYCLE);
00911 
00912   // create arrays to store the Node -> SMP mapping, and the relative
00913   // SMP node number
00914   if (SMPIDList != 0)
00915     delete [] SMPIDList;
00916   if (SMPNodeList != 0)
00917     delete [] SMPNodeList;
00918   SMPIDList   = new int[TotalNodes];
00919   SMPNodeList = new int[TotalNodes];
00920 
00921   // obtain the hostname and processor ID to send out
00922   char name[1024];
00923   if (gethostname(name, 1023) != 0) {
00924     ERRORMSG("Could not get hostname ... using localhost." << endl);
00925     strcpy(name, "localhost");
00926   }
00927   string NodeName(name,strlen(name));
00928   // dbgmsg << "My hostname is " << NodeName << endl;
00929 
00930   // all other nodes send their hostname to node 0; node 0 gets the names,
00931   // maps pnode ID's -> SMP ID's, then broadcasts all the necessary info to
00932   // all other nodes
00933   if (MyNode != 0) {
00934     // other nodes send their node name to node 0
00935     Message *msg = new Message;
00936     ::putMessage(*msg,NodeName);
00937     // dbgmsg << "Sending my name to node 0." << endl;
00938     Comm->send(msg, 0, tag);
00939 
00940     // receive back the SMPIDList mapping
00941     int node = 0;
00942     msg = Comm->receive_block(node, tag);
00943     PInsist(msg != 0 && node == 0,
00944       "SPMDList map not received from master in IpplInfo::find_smp_nodes!!");
00945     ::getMessage_iter(*msg, SMPIDList);
00946     ::getMessage_iter(*msg, SMPNodeList);
00947     delete msg;
00948   }
00949   else {
00950     // collect node names from everyone else, and then retransmit the collected
00951     // list.
00952     SMPIDList[0] = 0;
00953     vmap<string,int> smpMap;
00954     vmap<string,int>::iterator smpiter;
00955     smpMap.insert(vmap<string,int>::value_type(NodeName, 0));
00956     int unreceived = TotalNodes - 1;
00957     while (unreceived-- > 0) {
00958       // get the hostname from the remote node
00959       int node = COMM_ANY_NODE;
00960       Message *msg = Comm->receive_block(node, tag);
00961       PInsist(msg != 0,
00962         "Hostname not received by master in IpplInfo::find_smp_nodes!!");
00963       string nodename;
00964       ::getMessage(*msg,nodename);
00965       delete msg;
00966       // dbgmsg <<"Received name '"<< nodename <<"' from node "<< node<<endl;
00967 
00968       // put it in the mapping from hostname -> SMP ID, if necessary
00969       smpiter = smpMap.find(nodename);
00970       if (smpiter == smpMap.end())
00971         smpMap.insert(vmap<string,int>::value_type(nodename,smpMap.size()));
00972 
00973       // from the hostname, get the SMP ID number and store it in SMPIDList
00974       SMPIDList[node] = smpMap[nodename];
00975     }
00976 
00977     // convert from SMPID mapping -> relative node number
00978     for (int smpindx = 0; smpindx < smpMap.size(); ++smpindx) {
00979       unsigned int smpnodes = 0;
00980       for (unsigned int n=0; n < TotalNodes; ++n) {
00981         if (SMPIDList[n] == smpindx)
00982           SMPNodeList[n] = smpnodes++;
00983       }
00984     }
00985 
00986     // broadcast SMP info to other nodes
00987     if (TotalNodes > 1) {
00988       Message *msg = new Message;
00989       ::putMessage(*msg, SMPIDList, SMPIDList + TotalNodes);
00990       ::putMessage(*msg, SMPNodeList, SMPNodeList + TotalNodes);
00991       Comm->broadcast_others(msg, tag);
00992     }
00993   }
00994 
00995   // compute number of SMP's ... necessary for all but node 0, but we'll do
00996   // it for all
00997   NumSMPs = 0;
00998   for (unsigned int ns=0; ns < TotalNodes; ++ns)
00999     if (SMPNodeList[ns] == 0)
01000       NumSMPs++;
01001 
01002   // dbgmsg << "Results of SMP mapping: NumSMPs = " << NumSMPs << endl;
01003   // for (unsigned int n=0; n < TotalNodes; ++n) {
01004   //   dbgmsg << "  n=" << n << ", SMPID=" << SMPIDList[n] << ", SMPNode=";
01005   //   dbgmsg << SMPNodeList[n] << endl;
01006   // }
01007 }
01008 
01009 
01010 #ifdef IPPL_RUNTIME_ERRCHECK
01011 
01012 // special routine used in runtime debugging error detection
01013 void __C_runtime_error ( int trap_code, char *name, int line_no, ... ) {
01014   switch ( trap_code ) {
01015     /* Subscript range violations: */
01016   case BRK_RANGE:
01017     fprintf ( stderr, "error: Subscript range violation" );
01018     break;
01019 
01020     /* Others (unknown trap codes): */
01021   default:
01022     fprintf ( stderr, "error: Trap %d ", trap_code );
01023     break;
01024   }
01025 
01026   fprintf ( stderr, " in '%s'", name);
01027   if ( line_no != -1 )
01028     fprintf ( stderr, " (line %d)", line_no );
01029   exit (99);
01030 }
01031 #endif
01032 
01033 
01034 /***************************************************************************
01035  * $RCSfile: IpplInfo.cpp,v $   $Author: adelmann $
01036  * $Revision: 1.1.1.1 $   $Date: 2003/01/23 07:40:33 $
01037  * IPPL_VERSION_ID: $Id: IpplInfo.cpp,v 1.1.1.1 2003/01/23 07:40:33 adelmann Exp $ 
01038  ***************************************************************************/

Generated on Mon Jan 16 13:23:59 2006 for IPPL by  doxygen 1.4.6