FEMAXX (Finite Element Maxwell Eigensolver): src/eigsolv/KnyazevLOBPCG.cpp Source File

00001 //**************************************************************************
00002 //
00003 //                                 NOTICE
00004 //
00005 // This software is a result of the research described in the report
00006 //
00007 // " A comparison of algorithms for modal analysis in the absence 
00008 //   of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
00009 //  Sandia National Laboratories, Technical cout SAND2003-1028J.
00010 //
00011 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
00012 // framework ( http://software.sandia.gov/trilinos/ ).
00013 //
00014 // The distribution of this software follows also the rules defined in Trilinos.
00015 // This notice shall be marked on any reproduction of this software, in whole or
00016 // in part.
00017 //
00018 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00019 // license for use of this work by or on behalf of the U.S. Government.
00020 //
00021 // This program is distributed in the hope that it will be useful, but
00022 // WITHOUT ANY WARRANTY; without even the implied warranty of
00023 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00024 //
00025 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
00026 //
00027 //**************************************************************************
00028 
00029 #include "KnyazevLOBPCG.h"
00030 
00031 
00032 KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00033                              const Epetra_Operator *PP, 
00034                              double _tol, int _maxIter, int _verb) :
00035     myVerify(_Comm),
00036     callBLAS(),
00037     callFortran(),
00038     modalTool(_Comm),
00039     mySort(),
00040     MyComm(_Comm),
00041     K(KK),
00042     M(0),
00043     Prec(PP),
00044     MyWatch(_Comm),
00045     tolEigenSolve(_tol),
00046     maxIterEigenSolve(_maxIter),
00047     blockSize(0),
00048     normWeight(0),
00049     verbose(_verb),
00050     historyCount(0),
00051     resHistory(0),
00052     memRequested(0.0),
00053     highMem(0.0),
00054     massOp(0),
00055     numRestart(0),
00056     outerIter(0),
00057     precOp(0),
00058     residual(0),
00059     stifOp(0),
00060     timeLocalProj(0.0),
00061     timeLocalSolve(0.0),
00062     timeLocalUpdate(0.0),
00063     timeMassOp(0.0),
00064     timeNorm(0.0),
00065     timeOuterLoop(0.0),
00066     timePostProce(0.0),
00067     timePrecOp(0.0),
00068     timeResidual(0.0),
00069     timeStifOp(0.0)
00070 {
00071 
00072 }
00073 
00074 
00075 KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00076                              const Epetra_Operator *MM, const Epetra_Operator *PP,
00077                              Projector *YHCP,
00078                              double _tol, int _maxIter, int _verb,
00079                              double *_weight) :
00080     myVerify(_Comm),
00081     callBLAS(),
00082     callFortran(),
00083     modalTool(_Comm),
00084     mySort(),
00085     MyComm(_Comm),
00086     K(KK),
00087     M(MM),
00088     Prec(PP),
00089     YHC(YHCP),
00090     MyWatch(_Comm),
00091     tolEigenSolve(_tol),
00092     maxIterEigenSolve(_maxIter),
00093     blockSize(0),
00094     normWeight(_weight),
00095     verbose(_verb),
00096     historyCount(0),
00097     resHistory(0),
00098     memRequested(0.0),
00099     highMem(0.0),
00100     massOp(0),
00101     numRestart(0),
00102     outerIter(0),
00103     precOp(0),
00104     residual(0),
00105     stifOp(0),
00106     timeLocalProj(0.0),
00107     timeLocalSolve(0.0),
00108     timeLocalUpdate(0.0),
00109     timeMassOp(0.0),
00110     timeNorm(0.0),
00111     timeOuterLoop(0.0),
00112     timePostProce(0.0),
00113     timePrecOp(0.0),
00114     timeResidual(0.0),
00115     timeStifOp(0.0)
00116 {
00117 
00118 }
00119 
00120 
00121 KnyazevLOBPCG::~KnyazevLOBPCG() {
00122 
00123     if (resHistory) {
00124         delete[] resHistory;
00125         resHistory = 0;
00126     }
00127 
00128 }
00129 
00130 
00131 int KnyazevLOBPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
00132 
00133     // Computes the smallest eigenvalues and the corresponding eigenvectors
00134     // of the generalized eigenvalue problem
00135     // 
00136     //      K X = M X Lambda
00137     // 
00138     // using a Locally Optimal Block Preconditioned Conjugate Gradient method. 
00139     //
00140     // Note that if M is not specified, then  K X = X Lambda is solved.
00141     // Note that the blocksize is equal to the number of requested eigenvalues.
00142     // 
00143     // Ref: A. Knyazev, "Toward the optimal preconditioned eigensolver:
00144     // Locally optimal block preconditioner conjugate gradient method",
00145     // SIAM J. Sci. Comput., vol 23, n 2, pp. 517-541
00146     // Ref: A. Knyazev and M. Argentati, "Implementation of a preconditioned
00147     // eigensolver using Hypre", Numerical Linear Algebra with Applications (submitted)
00148     // 
00149     // Input variables:
00150     // 
00151     // numEigen  (integer) = Number of eigenmodes requested
00152     // 
00153     // Q (Epetra_MultiVector) = Converged eigenvectors
00154     //                   The number of columns of Q must be equal to numEigen.
00155     //                   The rows of Q are distributed across processors.
00156     //                   At exit, the first numEigen columns contain the eigenvectors requested.
00157     // 
00158     // lambda (array of doubles) = Converged eigenvalues
00159     //                   At input, it must be of size numEigen.
00160     //                   At exit, the first numEigen locations contain the eigenvalues requested.
00161     //
00162     // Return information on status of computation
00163     // 
00164     // info >=   0 >> Number of converged eigenpairs at the end of computation
00165     // 
00166     // // Failure due to input arguments
00167     // 
00168     // info = -  1 >> The stiffness matrix K has not been specified.
00169     // info = -  2 >> The maps for the matrix K and the matrix M differ.
00170     // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
00171     // info = -  4 >> The maps for the vectors and the matrix K differ.
00172     // info = -  5 >> Q is too small for the number of eigenvalues requested.
00173     // info = -  6 >> Q is too small for the computation parameters.
00174     //
00175     // info = - 10 >> Failure during the mass orthonormalization
00176     // 
00177     // info = - 20 >> Error in LAPACK during the local eigensolve
00178     //
00179     // info = - 30 >> MEMORY
00180     //
00181 
00182     // Check the input parameters
00183   
00184     if (numEigen <= 0) {
00185         return 0;
00186     }
00187 
00188     int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen);
00189     if (info < 0)
00190         return info;
00191 
00192     int myPid = MyComm.MyPID();
00193 
00194     // Get the weight for approximating the M-inverse norm
00195     Epetra_Vector *vectWeight = 0;
00196     if (normWeight) {
00197         vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
00198     }
00199 
00200     int knownEV = 0;
00201     int localVerbose = verbose*(myPid==0);
00202 
00203     // Define local block vectors
00204     //
00205     // MX = Working vectors (storing M*X if M is specified, else pointing to X)
00206     // KX = Working vectors (storing K*X)
00207     //
00208     // R = Residuals
00209     //
00210     // H = Preconditioned search space
00211     // MH = Working vectors (storing M*H if M is specified, else pointing to H)
00212     // KH = Working vectors (storing K*H)
00213     //
00214     // P = Search directions
00215     // MP = Working vectors (storing M*P if M is specified, else pointing to P)
00216     // KP = Working vectors (storing K*P)
00217 
00218     int xr = Q.MyLength();
00219     Epetra_MultiVector X(View, Q, 0, numEigen);
00220 
00221     blockSize = numEigen;
00222 
00223     int tmp;
00224     tmp = (M == 0) ? 6*numEigen*xr : 9*numEigen*xr;
00225 
00226     double *work1 = new (nothrow) double[tmp]; 
00227     if (work1 == 0) {
00228         if (vectWeight)
00229             delete vectWeight;
00230         info = -30;
00231         return info;
00232     }
00233     memRequested += sizeof(double)*tmp/(1024.0*1024.0);
00234 
00235     highMem = (highMem > currentSize()) ? highMem : currentSize();
00236 
00237     double *tmpD = work1;
00238 
00239     Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, numEigen);
00240     tmpD = tmpD + xr*numEigen;
00241 
00242     Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, numEigen);
00243     tmpD = (M) ? tmpD + xr*numEigen : tmpD;
00244 
00245     Epetra_MultiVector R(View, Q.Map(), tmpD, xr, numEigen);
00246     tmpD = tmpD + xr*numEigen;
00247 
00248     Epetra_MultiVector H(View, Q.Map(), tmpD, xr, numEigen);
00249     tmpD = tmpD + xr*numEigen;
00250 
00251     Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, numEigen);
00252     tmpD = tmpD + xr*numEigen;
00253 
00254     Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, numEigen);
00255     tmpD = (M) ? tmpD + xr*numEigen : tmpD;
00256 
00257     Epetra_MultiVector P(View, Q.Map(), tmpD, xr, numEigen);
00258     tmpD = tmpD + xr*numEigen;
00259 
00260     Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, numEigen);
00261     tmpD = tmpD + xr*numEigen;
00262 
00263     Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, numEigen);
00264 
00265     // Define arrays
00266     //
00267     // theta = Store the local eigenvalues (size: numEigen)
00268     // normR = Store the norm of residuals (size: numEigen)
00269     //
00270     // MM = Local mass matrix              (size: 3*numEigen x 3*numEigen)
00271     // KK = Local stiffness matrix         (size: 3*numEigen x 3*numEigen)
00272     //
00273     // S = Local eigenvectors              (size: 3*numEigen x 3*numEigen)
00274 
00275     int lwork2;
00276     lwork2 = 2*numEigen + 27*numEigen*numEigen;
00277     double *work2 = new (nothrow) double[lwork2];
00278     if (work2 == 0) {
00279         if (vectWeight)
00280             delete vectWeight;
00281         delete[] work1;
00282         info = -30;
00283         return info;
00284     }
00285 
00286     highMem = (highMem > currentSize()) ? highMem : currentSize();
00287 
00288     tmpD = work2;
00289 
00290     double *theta = tmpD;
00291     tmpD = tmpD + numEigen;
00292 
00293     double *normR = tmpD;
00294     tmpD = tmpD + numEigen;
00295 
00296     double *MM = tmpD;
00297     tmpD = tmpD + 9*numEigen*numEigen;
00298 
00299     double *KK = tmpD;
00300     tmpD = tmpD + 9*numEigen*numEigen;
00301 
00302     double *S = tmpD;
00303 
00304     memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
00305 
00306     // Define an array to store the residuals history
00307     if (localVerbose > 2) {
00308         resHistory = new (nothrow) double[maxIterEigenSolve*numEigen];
00309         if (resHistory == 0) {
00310             if (vectWeight)
00311                 delete vectWeight;
00312             delete[] work1;
00313             delete[] work2;
00314             info = -30;
00315             return info;
00316         }
00317         historyCount = 0;
00318     }
00319 
00320     // Miscellaneous definitions
00321 
00322     bool reStart = false;
00323     numRestart = 0;
00324 
00325     int localSize = 0;
00326     int firstIndex = knownEV;
00327     int i, j;
00328 
00329     if (localVerbose > 0) {
00330         cout << endl;
00331         cout << " *|* Problem: ";
00332         if (M) 
00333             cout << "K*Q = M*Q D ";
00334         else
00335             cout << "K*Q = Q D ";
00336         if (Prec)
00337             cout << " with preconditioner";
00338         cout << endl;
00339         cout << " *|* Algorithm = LOBPCG (Knyazev's version)" << endl;
00340         cout << " *|* Size of blocks = " << blockSize << endl;
00341         cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
00342         cout.precision(2);
00343         cout.setf(ios::scientific, ios::floatfield);
00344         cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
00345         cout << " *|* Norm used for convergence: ";
00346         if (normWeight)
00347             cout << "weighted L2-norm with user-provided weights" << endl;
00348         else
00349             cout << "L^2-norm" << endl;
00350         cout << "\n -- Start iterations -- \n";
00351     }
00352 
00353     timeOuterLoop -= MyWatch.WallTime();
00354     for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
00355 
00356         highMem = (highMem > currentSize()) ? highMem : currentSize();
00357 
00358         int workingSize = numEigen - knownEV;
00359 
00360         Epetra_MultiVector  XI(View, X, firstIndex, workingSize);
00361         Epetra_MultiVector MXI(View, MX, firstIndex, workingSize);
00362         Epetra_MultiVector KXI(View, KX, firstIndex, workingSize);
00363 
00364         Epetra_MultiVector  HI(View, H, firstIndex, workingSize);
00365         Epetra_MultiVector MHI(View, MH, firstIndex, workingSize);
00366         Epetra_MultiVector KHI(View, KH, firstIndex, workingSize);
00367 
00368         Epetra_MultiVector  PI(View, P, firstIndex, workingSize);
00369         Epetra_MultiVector MPI(View, MP, firstIndex, workingSize);
00370         Epetra_MultiVector KPI(View, KP, firstIndex, workingSize);
00371 
00372         Epetra_MultiVector  RI(View, R, firstIndex, workingSize);
00373 
00374         if ((outerIter == 1) || (reStart == true)) {
00375 
00376             reStart = false;
00377             localSize = numEigen;
00378 
00379             // Apply the mass matrix to X
00380             timeMassOp -= MyWatch.WallTime();
00381             if (M)
00382                 M->Apply(XI, MXI);
00383             timeMassOp += MyWatch.WallTime();
00384             massOp += workingSize;
00385 
00386             // Apply the stiffness matrix to X
00387             timeStifOp -= MyWatch.WallTime();
00388             K->Apply(XI, KXI);
00389             timeStifOp += MyWatch.WallTime();
00390             stifOp += workingSize;
00391 
00392         } // if ((outerIter == 1) || (reStart == true))
00393         else {
00394 
00395             // Apply the preconditioner on the residuals
00396             if (Prec) {
00397                 timePrecOp -= MyWatch.WallTime();
00398                 Prec->ApplyInverse(RI, MHI);
00399                 timePrecOp += MyWatch.WallTime();
00400                 precOp += workingSize;
00401             }
00402             else {
00403                 memcpy(MHI.Values(), RI.Values(), xr*workingSize*sizeof(double));
00404             }
00405       
00406       
00407             if (YHC) {
00408                 YHC->Apply(MHI, HI);
00409             } 
00410       
00411             // Apply the mass matrix on H
00412             timeMassOp -= MyWatch.WallTime();
00413             if (M)
00414                 M->Apply(HI, MHI);
00415             timeMassOp += MyWatch.WallTime();
00416             massOp += workingSize;
00417       
00418             // Apply the stiffness matrix to H
00419             timeStifOp -= MyWatch.WallTime();
00420             K->Apply(HI, KHI);
00421             timeStifOp += MyWatch.WallTime();
00422             stifOp += workingSize;
00423 
00424             if (localSize == numEigen)
00425                 localSize += workingSize;
00426 
00427         } // if ((outerIter == 1) || (reStart==true))
00428 
00429         // Form "local" mass and stiffness matrices
00430         // Note: Use S as a temporary workspace
00431         timeLocalProj -= MyWatch.WallTime();
00432         modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, KX.Values(), xr, 
00433                                   KK, localSize, S);
00434         modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, MX.Values(), xr, 
00435                                   MM, localSize, S);
00436         if (localSize > numEigen) {
00437             double *pointer = KK + numEigen*localSize;
00438             modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KHI.Values(), xr,
00439                                       pointer, localSize, S);
00440             modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KHI.Values(), xr,
00441                                       pointer + numEigen, localSize, S);
00442             pointer = MM + numEigen*localSize;
00443             modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MHI.Values(), xr, 
00444                                       pointer, localSize, S);
00445             modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MHI.Values(), xr,
00446                                       pointer + numEigen, localSize, S);
00447             if (localSize > numEigen + workingSize) {
00448                 pointer = KK + (numEigen + workingSize)*localSize;
00449                 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KPI.Values(), xr,
00450                                           pointer, localSize, S);
00451                 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KPI.Values(), xr,
00452                                           pointer + numEigen, localSize, S);
00453                 modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, KPI.Values(), xr,
00454                                           pointer + numEigen + workingSize, localSize, S);
00455                 pointer = MM + (numEigen + workingSize)*localSize;
00456                 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MPI.Values(), xr,
00457                                           pointer, localSize, S);
00458                 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MPI.Values(), xr,
00459                                           pointer + numEigen, localSize, S);
00460                 modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, MPI.Values(), xr,
00461                                           pointer + numEigen + workingSize, localSize, S);
00462             } // if (localSize > numEigen + workingSize)
00463         } // if (localSize > numEigen)
00464         timeLocalProj += MyWatch.WallTime();
00465 
00466         // Perform a spectral decomposition
00467         timeLocalSolve -= MyWatch.WallTime();
00468         int nevLocal = localSize;
00469         info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
00470                                       S, localSize, theta, localVerbose, 
00471                                       (blockSize == 1) ? 1 : 0);
00472         timeLocalSolve += MyWatch.WallTime();
00473 
00474         if (info < 0) {
00475             // Stop when spectral decomposition has a critical failure
00476             break;
00477         } // if (info < 0)
00478 
00479         // Check for restarting
00480         if ((theta[0] < 0.0) || (nevLocal < numEigen)) {
00481             if (localVerbose > 0) {
00482                 cout << " Iteration " << outerIter;
00483                 cout << " - Failure for spectral decomposition - RESTART with new random search\n";
00484             }
00485             if (workingSize == 1) {
00486                 XI.Random();
00487             }
00488             else {
00489                 Epetra_MultiVector Xinit(View, XI, 1, workingSize-1);
00490                 Xinit.Random();
00491             } // if (workingSize == 1)
00492             reStart = true;
00493             numRestart += 1;
00494             info = 0;
00495             continue;
00496         } // if ((theta[0] < 0.0) || (nevLocal < numEigen))
00497 
00498         if ((localSize == numEigen+workingSize) && (nevLocal == numEigen)) {
00499             for (j = 0; j < nevLocal; ++j)
00500                 memcpy(S+j*numEigen, S+j*localSize, numEigen*sizeof(double));
00501             localSize = numEigen;
00502         }
00503 
00504         if ((localSize == numEigen+2*workingSize) && (nevLocal <= numEigen+workingSize)) {
00505             for (j = 0; j < nevLocal; ++j)
00506                 memcpy(S+j*(numEigen+workingSize), S+j*localSize, (numEigen+workingSize)*sizeof(double));
00507             localSize = numEigen + workingSize;
00508         }
00509 
00510         // Update the spaces
00511         // Note: Use R as a temporary work space
00512         timeLocalUpdate -= MyWatch.WallTime();
00513 
00514         memcpy(R.Values(), X.Values(), xr*numEigen*sizeof(double));
00515         callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
00516                       0.0, X.Values(), xr);
00517         if (M) {
00518             memcpy(R.Values(), MX.Values(), xr*numEigen*sizeof(double));
00519             callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
00520                           0.0, MX.Values(), xr);
00521         }
00522         memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
00523         callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
00524                       0.0, KX.Values(), xr);
00525 
00526         if (localSize == numEigen + workingSize) {
00527             callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
00528                           S + numEigen, localSize, 0.0, P.Values(), xr);
00529             if (M) {
00530                 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
00531                               S + numEigen, localSize, 0.0, MP.Values(), xr);
00532             }
00533             callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
00534                           S + numEigen, localSize, 0.0, KP.Values(), xr);
00535         }
00536 
00537         if (localSize > numEigen + workingSize) {
00538             memcpy(RI.Values(), PI.Values(), xr*workingSize*sizeof(double));
00539             callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
00540                           S + numEigen, localSize, 0.0, P.Values(), xr);
00541             callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
00542                           S + numEigen + workingSize, localSize, 1.0, P.Values(), xr);
00543             if (M) {
00544                 memcpy(RI.Values(), MPI.Values(), xr*workingSize*sizeof(double));
00545                 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
00546                               S + numEigen, localSize, 0.0, MP.Values(), xr);
00547                 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
00548                               S + numEigen + workingSize, localSize, 1.0, MP.Values(), xr);
00549             } 
00550             memcpy(RI.Values(), KPI.Values(), xr*workingSize*sizeof(double));
00551             callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
00552                           S + numEigen, localSize, 0.0, KP.Values(), xr);
00553             callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
00554                           S + numEigen + workingSize, localSize, 1.0, KP.Values(), xr);
00555         }
00556 
00557         if (localSize > numEigen) {
00558             callBLAS.AXPY(xr*numEigen, 1.0, P.Values(), X.Values());
00559             if (M)
00560                 callBLAS.AXPY(xr*numEigen, 1.0, MP.Values(), MX.Values());
00561             callBLAS.AXPY(xr*numEigen, 1.0, KP.Values(), KX.Values());
00562         }
00563         timeLocalUpdate += MyWatch.WallTime();
00564 
00565         // Compute the residuals
00566         timeResidual -= MyWatch.WallTime();
00567         memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
00568         for (j = 0; j < numEigen; ++j) {
00569             callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
00570         }
00571         timeResidual += MyWatch.WallTime();
00572         residual += numEigen;
00573 
00574         // Compute the norms of the residuals
00575         timeNorm -= MyWatch.WallTime();
00576         if (vectWeight) {
00577             R.NormWeighted(*vectWeight, normR);
00578         }
00579         else {
00580             R.Norm2(normR);
00581         }
00582         // Scale the norms of residuals with the eigenvalues
00583         for (j = 0; j < numEigen; ++j) {
00584             normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
00585         }
00586         timeNorm += MyWatch.WallTime();
00587 
00588         // When required, monitor some orthogonalities
00589         if (verbose > 2) {
00590             accuracyCheck(&X, &MX, &R);
00591         } // if (verbose > 2)
00592 
00593         if (localSize == numEigen + workingSize)
00594             localSize += workingSize;
00595 
00596         // Count the converged eigenvectors
00597         timeNorm -= MyWatch.WallTime();
00598         knownEV = 0;
00599         for (i=0; i < numEigen; ++i) {
00600             if (normR[i] < tolEigenSolve) {
00601                 lambda[i] = theta[i];
00602                 knownEV += 1;
00603             }
00604         }
00605         timeNorm += MyWatch.WallTime();
00606 
00607         // Store the residual history
00608         if (localVerbose > 2) {
00609             memcpy(resHistory + historyCount, normR, numEigen*sizeof(double));
00610             historyCount += numEigen;
00611         }
00612 
00613         // Print information on current iteration
00614         if (localVerbose > 0) {
00615             cout << " Iteration " << outerIter;
00616             cout << " - Number of converged eigenvectors " << knownEV << endl;
00617         }
00618 
00619         if (localVerbose > 1) {
00620             cout << endl;
00621             cout.precision(2);
00622             cout.setf(ios::scientific, ios::floatfield);
00623             for (i=0; i<numEigen; ++i) {
00624                 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00625                 cout << " = " << normR[i] << endl;
00626             }
00627             cout << endl;
00628             cout.precision(2);
00629             for (i=0; i<numEigen; ++i) {
00630                 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
00631                 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
00632                 cout << " = " << theta[i] << endl;
00633             }
00634             cout << endl;
00635         }
00636 
00637         // Convergence test
00638         if (knownEV >= numEigen) {
00639             if (localVerbose == 1) {
00640                 cout << endl;
00641                 cout.precision(2);
00642                 cout.setf(ios::scientific, ios::floatfield);
00643                 for (i=0; i<numEigen; ++i) {
00644                     cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00645                     cout << " = " << normR[i] << endl;
00646                 }
00647                 cout << endl;
00648             }
00649             break;
00650         }
00651 
00652         // Update the sizes
00653         if ((knownEV > 0) && (localSize > numEigen)) {
00654             if (localSize == numEigen + workingSize)
00655                 localSize = 2*numEigen - knownEV;
00656             if (localSize == numEigen + 2*workingSize)
00657                 localSize = 3*numEigen - 2*knownEV;
00658         }
00659 
00660         // Put the unconverged vectors at the end if needed
00661 
00662         for (i=0; i<numEigen; ++i) {
00663             if (normR[i] >= tolEigenSolve) {
00664                 firstIndex = i;
00665                 break;
00666             }
00667         }
00668 
00669         // Continue the loop if no motion of vectors is necessary
00670         if (firstIndex == numEigen-1)
00671             continue;
00672 
00673         while (firstIndex < knownEV) {
00674 
00675             for (j = firstIndex; j < numEigen; ++j) {
00676                 if (normR[j] < tolEigenSolve) {
00677                     callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
00678                     callFortran.SWAP(xr, X.Values() + j*xr, 1, X.Values() + firstIndex*xr, 1);
00679                     callFortran.SWAP(xr, KX.Values() + j*xr, 1, KX.Values() + firstIndex*xr, 1);
00680                     callFortran.SWAP(xr, R.Values() + j*xr, 1, R.Values() + firstIndex*xr, 1);
00681                     callFortran.SWAP(xr, H.Values() + j*xr, 1, H.Values() + firstIndex*xr, 1);
00682                     callFortran.SWAP(xr, KH.Values() + j*xr, 1, KH.Values() + firstIndex*xr, 1);
00683                     callFortran.SWAP(xr, P.Values() + j*xr, 1, P.Values() + firstIndex*xr, 1);
00684                     callFortran.SWAP(xr, KP.Values() + j*xr, 1, KP.Values() + firstIndex*xr, 1);
00685                     if (M) {
00686                         callFortran.SWAP(xr, MX.Values() + j*xr, 1, MX.Values() + firstIndex*xr, 1);
00687                         callFortran.SWAP(xr, MH.Values() + j*xr, 1, MH.Values() + firstIndex*xr, 1);
00688                         callFortran.SWAP(xr, MP.Values() + j*xr, 1, MP.Values() + firstIndex*xr, 1);
00689                     }
00690                     break;
00691                 }
00692             }
00693 
00694             for (i = firstIndex; i < numEigen; ++i) {
00695                 if (normR[i] >= tolEigenSolve) {
00696                     firstIndex = i;
00697                     break;
00698                 }
00699             }
00700 
00701         }
00702 
00703     } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
00704     timeOuterLoop += MyWatch.WallTime();
00705     highMem = (highMem > currentSize()) ? highMem : currentSize();
00706 
00707     // Clean memory
00708     delete[] work1;
00709     delete[] work2;
00710     if (vectWeight)
00711         delete vectWeight;
00712 
00713     // Sort the eigenpairs
00714     timePostProce -= MyWatch.WallTime();
00715     if ((info == 0) && (knownEV > 0)) {
00716         mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
00717     }
00718     timePostProce += MyWatch.WallTime();
00719 
00720     return (info == 0) ? knownEV : info;
00721 
00722 }
00723 
00724 
00725 void KnyazevLOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
00726                                   const Epetra_MultiVector *R) const {
00727 
00728     cout.precision(2);
00729     cout.setf(ios::scientific, ios::floatfield);
00730     double tmp;
00731 
00732     int myPid = MyComm.MyPID();
00733 
00734     if (X) {
00735         if (M) {
00736             if (MX) {
00737                 tmp = myVerify.errorEquality(X, MX, M);
00738                 if (myPid == 0)
00739                     cout << " >> Difference between MX and M*X = " << tmp << endl;
00740             }
00741             tmp = myVerify.errorOrthonormality(X, M);
00742             if (myPid == 0)
00743                 cout << " >> Error in X^T M X - I = " << tmp << endl;
00744         }
00745         else {
00746             tmp = myVerify.errorOrthonormality(X, 0);
00747             if (myPid == 0)
00748                 cout << " >> Error in X^T X - I = " << tmp << endl;
00749         }
00750     }
00751 
00752     if ((R) && (X)) {
00753         tmp = myVerify.errorOrthogonality(X, R);
00754         if (myPid == 0)
00755             cout << " >> Orthogonality X^T R up to " << tmp << endl;
00756     }
00757 
00758 }
00759 
00760 
00761 void KnyazevLOBPCG::initializeCounters() {
00762 
00763     historyCount = 0;
00764     if (resHistory) {
00765         delete[] resHistory;
00766         resHistory = 0;
00767     }
00768 
00769     memRequested = 0.0;
00770     highMem = 0.0;
00771 
00772     massOp = 0;
00773     numRestart = 0;
00774     outerIter = 0;
00775     precOp = 0;
00776     residual = 0;
00777     stifOp = 0;
00778 
00779     timeLocalProj = 0.0;
00780     timeLocalSolve = 0.0;
00781     timeLocalUpdate = 0.0;
00782     timeMassOp = 0.0;
00783     timeNorm = 0.0;
00784     timeOuterLoop = 0.0;
00785     timePostProce = 0.0;
00786     timePrecOp = 0.0;
00787     timeResidual = 0.0;
00788     timeStifOp = 0.0;
00789 
00790 
00791 }
00792 
00793 
00794 void KnyazevLOBPCG::algorithmInfo() const {
00795 
00796     int myPid = MyComm.MyPID();
00797   
00798     if (myPid ==0) {
00799         cout << " Algorithm: LOBPCG (Knyazev's version) with Cholesky-based local eigensolver\n";
00800         cout << " Block Size: " << blockSize << endl;
00801     }
00802 
00803 }
00804 
00805 
00806 void KnyazevLOBPCG::historyInfo() const {
00807 
00808     if (resHistory) {
00809         int j;
00810         cout << " Block Size: " << blockSize << endl;
00811         cout << endl;
00812         cout << " Residuals\n";
00813         cout << endl;
00814         cout.precision(4);
00815         cout.setf(ios::scientific, ios::floatfield);
00816         for (j = 0; j < historyCount; ++j) {
00817             cout << resHistory[j] << endl;
00818         }
00819         cout << endl;
00820     }
00821 
00822 }
00823 
00824 
00825 void KnyazevLOBPCG::memoryInfo() const {
00826 
00827     int myPid = MyComm.MyPID();
00828 
00829     double maxHighMem = 0.0;
00830     double tmp = highMem;
00831     MyComm.MaxAll(&tmp, &maxHighMem, 1);
00832 
00833     double maxMemRequested = 0.0;
00834     tmp = memRequested;
00835     MyComm.MaxAll(&tmp, &maxMemRequested, 1);
00836 
00837     if (myPid == 0) {
00838         cout.precision(2);
00839         cout.setf(ios::fixed, ios::floatfield);
00840         cout << " Memory requested per processor by the eigensolver   = (EST) ";
00841         cout.width(6);
00842         cout << maxMemRequested << " MB " << endl;
00843         cout << endl;
00844         cout << " High water mark in eigensolver                      = (EST) ";
00845         cout.width(6);
00846         cout << maxHighMem << " MB " << endl;
00847         cout << endl;
00848     }
00849 
00850 }
00851 
00852 
00853 void KnyazevLOBPCG::operationInfo() const {
00854 
00855     int myPid = MyComm.MyPID();
00856   
00857     if (myPid == 0) {
00858         cout << " --- Operations ---\n\n";
00859         cout << " Total number of mass matrix multiplications      = ";
00860         cout.width(9);
00861         cout << massOp << endl;
00862         cout << " Total number of stiffness matrix operations      = ";
00863         cout.width(9);
00864         cout << stifOp << endl;
00865         cout << " Total number of preconditioner applications      = ";
00866         cout.width(9);
00867         cout << precOp << endl;
00868         cout << " Total number of computed eigen-residuals         = ";
00869         cout.width(9);
00870         cout << residual << endl;
00871         cout << "\n";
00872         cout << " Total number of outer iterations                 = ";
00873         cout.width(9);
00874         cout << outerIter << endl;
00875         cout << "       Number of restarts                         = ";
00876         cout.width(9);
00877         cout << numRestart << endl;
00878         cout << "\n";
00879     }
00880 
00881 }
00882 
00883 
00884 void KnyazevLOBPCG::timeInfo() const {
00885 
00886     int myPid = MyComm.MyPID();
00887   
00888     if (myPid == 0) {
00889         cout << " --- Timings ---\n\n";
00890         cout.setf(ios::fixed, ios::floatfield);
00891         cout.precision(2);
00892         cout << " Total time for outer loop                       = ";
00893         cout.width(9);
00894         cout << timeOuterLoop << " s" << endl;
00895         cout << "       Time for mass matrix operations           = ";
00896         cout.width(9);
00897         cout << timeMassOp << " s     ";
00898         cout.width(5);
00899         cout << 100*timeMassOp/timeOuterLoop << " %\n";
00900         cout << "       Time for stiffness matrix operations      = ";
00901         cout.width(9);
00902         cout << timeStifOp << " s     ";
00903         cout.width(5);
00904         cout << 100*timeStifOp/timeOuterLoop << " %\n";
00905         cout << "       Time for preconditioner applications      = ";
00906         cout.width(9);
00907         cout << timePrecOp << " s     ";
00908         cout.width(5);
00909         cout << 100*timePrecOp/timeOuterLoop << " %\n";
00910         cout << "       Time for local projection                 = ";
00911         cout.width(9);
00912         cout << timeLocalProj << " s     ";
00913         cout.width(5);
00914         cout << 100*timeLocalProj/timeOuterLoop << " %\n";
00915         cout << "       Time for local eigensolve                 = ";
00916         cout.width(9);
00917         cout << timeLocalSolve << " s     ";
00918         cout.width(5);
00919         cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
00920         cout << "       Time for local update                     = ";
00921         cout.width(9);
00922         cout << timeLocalUpdate << " s     ";
00923         cout.width(5);
00924         cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
00925         cout << "       Time for residual computations            = ";
00926         cout.width(9);
00927         cout << timeResidual << " s     ";
00928         cout.width(5);
00929         cout << 100*timeResidual/timeOuterLoop << " %\n";
00930         cout << "       Time for residuals norm computations      = ";
00931         cout.width(9);
00932         cout << timeNorm << " s     ";
00933         cout.width(5);
00934         cout << 100*timeNorm/timeOuterLoop << " %\n";
00935         cout << "\n";
00936         cout << " Total time for post-processing                  = ";
00937         cout.width(9);
00938         cout << timePostProce << " s\n";
00939         cout << endl;
00940     } // if (myPid == 0)
00941 
00942 }
00943 
00944