00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 #include "LOBPCG.h"
00030 
00031 
00032 LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK, 
00033                const Epetra_Operator *PP, Projector *YHCP, int _blk,
00034                double _tol, int _maxIter, int _verb) :
00035     myVerify(_Comm),
00036     callBLAS(),
00037     callFortran(),
00038     modalTool(_Comm),
00039     mySort(),
00040     MyComm(_Comm),
00041     K(KK),
00042     M(0),
00043     Prec(PP),
00044     YHC(YHCP),
00045     MyWatch(_Comm),
00046     tolEigenSolve(_tol),
00047     maxIterEigenSolve(_maxIter),
00048     blockSize(_blk),
00049     normWeight(0),
00050     verbose(_verb),
00051     historyCount(0),
00052     resHistory(0),
00053     memRequested(0.0),
00054     highMem(0.0),
00055     massOp(0),
00056     numRestart(0),
00057     outerIter(0),
00058     precOp(0),
00059     residual(0),
00060     stifOp(0),
00061     timeLocalProj(0.0),
00062     timeLocalSolve(0.0),
00063     timeLocalUpdate(0.0),
00064     timeMassOp(0.0),
00065     timeNorm(0.0),
00066     timeOrtho(0.0),
00067     timeOuterLoop(0.0),
00068     timePostProce(0.0),
00069     timePrecOp(0.0),
00070     timeResidual(0.0),
00071     timeRestart(0.0),
00072     timeStifOp(0.0)
00073 {
00074 
00075 }
00076 
00077 
00078 LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00079                const Epetra_Operator *MM, const Epetra_Operator *PP, Projector *YHCP, int _blk,
00080                double _tol, int _maxIter, int _verb,
00081                double *_weight) :
00082     myVerify(_Comm),
00083     callBLAS(),
00084     callFortran(),
00085     modalTool(_Comm),
00086     mySort(),
00087     MyComm(_Comm),
00088     K(KK),
00089     M(MM),
00090     Prec(PP),
00091     YHC(YHCP),
00092     MyWatch(_Comm),
00093     tolEigenSolve(_tol),
00094     maxIterEigenSolve(_maxIter),
00095     blockSize(_blk),
00096     normWeight(_weight),
00097     verbose(_verb),
00098     historyCount(0),
00099     resHistory(0),
00100     memRequested(0.0),
00101     highMem(0.0),
00102     massOp(0),
00103     numRestart(0),
00104     outerIter(0),
00105     precOp(0),
00106     residual(0),
00107     stifOp(0),
00108     timeLocalProj(0.0),
00109     timeLocalSolve(0.0),
00110     timeLocalUpdate(0.0),
00111     timeMassOp(0.0),
00112     timeNorm(0.0),
00113     timeOrtho(0.0),
00114     timeOuterLoop(0.0),
00115     timePostProce(0.0),
00116     timePrecOp(0.0),
00117     timeResidual(0.0),
00118     timeRestart(0.0),
00119     timeStifOp(0.0)
00120 {
00121 
00122 }
00123 
00124 
00125 LOBPCG::~LOBPCG() {
00126 
00127     if (resHistory) {
00128         delete[] resHistory;
00129         resHistory = 0;
00130     }
00131 
00132 }
00133 
00134 
00135 int LOBPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
00136 
00137     
00138     
00139     
00140     
00141     
00142     
00143     
00144     
00145     
00146     
00147     
00148     
00149     
00150     
00151     
00152     
00153     
00154     
00155     
00156     
00157     
00158     
00159     
00160     
00161     
00162     
00163     
00164     
00165     
00166     
00167     
00168     
00169     
00170     
00171     
00172     
00173     
00174     
00175     
00176     
00177     
00178     
00179     
00180     
00181     
00182     
00183 
00184     
00185   
00186     if (numEigen <= 0) {
00187         return 0;
00188     }
00189 
00190     int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
00191     if (info < 0)
00192         return info;
00193 
00194     int myPid = MyComm.MyPID();
00195 
00196     
00197     Epetra_Vector *vectWeight = 0;
00198     if (normWeight) {
00199         vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
00200     }
00201 
00202     int knownEV = 0;
00203     int localVerbose = verbose*(myPid==0);
00204 
00205     
00206     
00207     
00208     
00209     
00210     
00211     
00212     
00213     
00214     
00215     
00216     
00217     
00218     
00219 
00220     int xr = Q.MyLength();
00221     Epetra_MultiVector X(View, Q, numEigen, blockSize);
00222 
00223     int tmp;
00224     tmp = (M == 0) ? 6*blockSize*xr : 9*blockSize*xr;
00225 
00226     double *work1 = new (nothrow) double[tmp]; 
00227     if (work1 == 0) {
00228         if (vectWeight)
00229             delete vectWeight;
00230         info = -30;
00231         return info;
00232     }
00233     memRequested += sizeof(double)*tmp/(1024.0*1024.0);
00234 
00235     highMem = (highMem > currentSize()) ? highMem : currentSize();
00236 
00237     double *tmpD = work1;
00238 
00239     Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
00240     tmpD = tmpD + xr*blockSize;
00241 
00242     Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
00243     tmpD = (M) ? tmpD + xr*blockSize : tmpD;
00244 
00245     Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
00246     tmpD = tmpD + xr*blockSize;
00247 
00248     Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
00249     tmpD = tmpD + xr*blockSize;
00250 
00251     Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, blockSize);
00252     tmpD = tmpD + xr*blockSize;
00253 
00254     Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, blockSize);
00255     tmpD = (M) ? tmpD + xr*blockSize : tmpD;
00256 
00257     Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
00258     tmpD = tmpD + xr*blockSize;
00259 
00260     Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
00261     tmpD = tmpD + xr*blockSize;
00262 
00263     Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
00264 
00265     
00266     
00267     
00268     
00269     
00270     
00271     
00272     
00273     
00274 
00275     int lwork2;
00276     lwork2 = 4*blockSize + 27*blockSize*blockSize;
00277     double *work2 = new (nothrow) double[lwork2];
00278     if (work2 == 0) {
00279         if (vectWeight)
00280             delete vectWeight;
00281         delete[] work1;
00282         info = -30;
00283         return info;
00284     }
00285 
00286     highMem = (highMem > currentSize()) ? highMem : currentSize();
00287 
00288     tmpD = work2;
00289 
00290     double *theta = tmpD;
00291     tmpD = tmpD + 3*blockSize;
00292 
00293     double *normR = tmpD;
00294     tmpD = tmpD + blockSize;
00295 
00296     double *MM = tmpD;
00297     tmpD = tmpD + 9*blockSize*blockSize;
00298 
00299     double *KK = tmpD;
00300     tmpD = tmpD + 9*blockSize*blockSize;
00301 
00302     double *S = tmpD;
00303 
00304     memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
00305 
00306     
00307     if (localVerbose > 2) {
00308         resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
00309         if (resHistory == 0) {
00310             if (vectWeight)
00311                 delete vectWeight;
00312             delete[] work1;
00313             delete[] work2;
00314             info = -30;
00315             return info;
00316         }
00317         historyCount = 0;
00318     }
00319 
00320     
00321 
00322     bool reStart = false;
00323     numRestart = 0;
00324 
00325     int localSize = 0;
00326     int twoBlocks = 2*blockSize;
00327     int threeBlocks = 3*blockSize;
00328     int nFound = blockSize;
00329     int i, j;
00330 
00331     if (localVerbose > 0) {
00332         cout << endl;
00333         cout << " *|* Problem: ";
00334         if (M) 
00335             cout << "K*Q = M*Q D ";
00336         else
00337             cout << "K*Q = Q D ";
00338         if (Prec)
00339             cout << " with preconditioner";
00340         cout << endl;
00341         cout << " *|* Algorithm = LOBPCG" << endl;
00342         cout << " *|* Size of blocks = " << blockSize << endl;
00343         cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
00344         cout.precision(2);
00345         cout.setf(ios::scientific, ios::floatfield);
00346         cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
00347         cout << " *|* Norm used for convergence: ";
00348         if (normWeight){
00349             cout << "weighted L2-norm with user-provided weights" << endl;
00350         }
00351         else {
00352             cout << "L^2-norm" << endl;
00353         }
00354         cout << "\n -- Start iterations -- \n";
00355     }
00356 
00357     timeOuterLoop -= MyWatch.WallTime();
00358     for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
00359 
00360         highMem = (highMem > currentSize()) ? highMem : currentSize();
00361 
00362         if ((outerIter == 1) || (reStart == true)) {
00363 
00364             reStart = false;
00365             localSize = blockSize;
00366 
00367             if (nFound > 0) {
00368 
00369                 Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
00370                 Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
00371                 Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
00372 
00373                 
00374                 timeMassOp -= MyWatch.WallTime();
00375                 if (M)
00376                     M->Apply(X2, MX2);
00377                 timeMassOp += MyWatch.WallTime();
00378                 massOp += nFound;
00379 
00380                 if (knownEV > 0) {
00381                     
00382                     
00383                     Epetra_MultiVector copyQ(View, Q, 0, knownEV);
00384                     timeOrtho -= MyWatch.WallTime();
00385                     info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values(), 1.0);
00386                     timeOrtho += MyWatch.WallTime();
00387                     
00388                     if (info < 0) {
00389                         info = -10;
00390                         if (vectWeight)
00391                             delete vectWeight;
00392                         delete[] work1;
00393                         delete[] work2;
00394                         return info;
00395                     }
00396                 }
00397 
00398                 
00399                 timeStifOp -= MyWatch.WallTime();
00400                 K->Apply(X2, KX2);
00401                 timeStifOp += MyWatch.WallTime();
00402                 stifOp += nFound;
00403 
00404             } 
00405 
00406         } 
00407         else {
00408 
00409             
00410             if (Prec) {
00411                 timePrecOp -= MyWatch.WallTime();
00412                 Prec->ApplyInverse(R, H);
00413                 timePrecOp += MyWatch.WallTime();
00414                 precOp += blockSize;
00415             }
00416             else {
00417                 memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
00418             }
00419 
00420             
00421             timeMassOp -= MyWatch.WallTime();
00422             if (M)
00423                 M->Apply(H, MH);
00424             timeMassOp += MyWatch.WallTime();
00425             massOp += blockSize;
00426 
00427             
00428             if (knownEV > 0) {
00429                 
00430                 
00431                 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
00432                 timeOrtho -= MyWatch.WallTime();
00433                 modalTool.massOrthonormalize(H, MH, M, copyQ, blockSize, 1, R.Values());
00434                 timeOrtho += MyWatch.WallTime();
00435             }   
00436       
00437             
00438             if (YHC) {
00439                 YHC->Apply(H, H);
00440             }      
00441       
00442   
00443 
00444             
00445             
00446             timeStifOp -= MyWatch.WallTime();
00447             K->Apply(H, KH);
00448             timeStifOp += MyWatch.WallTime();
00449             stifOp += blockSize;
00450 
00451             if (localSize == blockSize)
00452                 localSize += blockSize;
00453 
00454         } 
00455     
00456  
00457         
00458         
00459         timeLocalProj -= MyWatch.WallTime();
00460         modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
00461                                   KK, localSize, S);
00462         modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
00463                                   MM, localSize, S);
00464         if (localSize > blockSize) {
00465             modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KH.Values(), xr,
00466                                       KK + blockSize*localSize, localSize, S);
00467             modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KH.Values(), xr,
00468                                       KK + blockSize*localSize + blockSize, localSize, S);
00469             modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MH.Values(), xr,
00470                                       MM + blockSize*localSize, localSize, S);
00471             modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MH.Values(), xr,
00472                                       MM + blockSize*localSize + blockSize, localSize, S);
00473             if (localSize > twoBlocks) {
00474                 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
00475                                           KK + twoBlocks*localSize, localSize, S);
00476                 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KP.Values(), xr,
00477                                           KK + twoBlocks*localSize + blockSize, localSize, S);
00478                 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
00479                                           KK + twoBlocks*localSize + twoBlocks, localSize, S);
00480                 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
00481                                           MM + twoBlocks*localSize, localSize, S);
00482                 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MP.Values(), xr,
00483                                           MM + twoBlocks*localSize + blockSize, localSize, S);
00484                 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
00485                                           MM + twoBlocks*localSize + twoBlocks, localSize, S);
00486             } 
00487         } 
00488         timeLocalProj += MyWatch.WallTime();
00489 
00490         
00491         timeLocalSolve -= MyWatch.WallTime();
00492         int nevLocal = localSize;
00493         info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
00494                                       S, localSize, theta, localVerbose, 
00495                                       (blockSize == 1) ? 1 : 0);
00496         timeLocalSolve += MyWatch.WallTime();
00497 
00498         if (info < 0) {
00499             
00500             break;
00501         } 
00502 
00503         
00504         if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
00505             if (localVerbose > 0) {
00506                 cout << " Iteration " << outerIter;
00507                 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
00508             }
00509             if (blockSize == 1) {
00510                 X.Random();
00511                 nFound = blockSize;
00512             }
00513             else {
00514                 Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
00515                 Xinit.Random();
00516                 nFound = blockSize - 1;
00517             } 
00518             reStart = true;
00519             numRestart += 1;
00520             info = 0;
00521             continue;
00522         } 
00523 
00524         if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
00525             for (j = 0; j < nevLocal; ++j) 
00526                 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double)); 
00527             localSize = blockSize;
00528         }
00529 
00530         if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
00531             for (j = 0; j < nevLocal; ++j) 
00532                 memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*sizeof(double)); 
00533             localSize = twoBlocks;
00534         }
00535 
00536         
00537         timeResidual -= MyWatch.WallTime();
00538         callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
00539                       S, localSize, 0.0, R.Values(), xr);
00540         if (localSize >= twoBlocks) {
00541             callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
00542                           S + blockSize, localSize, 1.0, R.Values(), xr);
00543             if (localSize == threeBlocks) {
00544                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
00545                               S + twoBlocks, localSize, 1.0, R.Values(), xr);
00546             }
00547         }
00548         for (j = 0; j < blockSize; ++j)
00549             callBLAS.SCAL(localSize, theta[j], S + j*localSize);
00550         callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
00551                       S, localSize, 1.0, R.Values(), xr);
00552         if (localSize >= twoBlocks) {
00553             callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MH.Values(), xr,
00554                           S + blockSize, localSize, 1.0, R.Values(), xr);
00555             if (localSize == threeBlocks) {
00556                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
00557                               S + twoBlocks, localSize, 1.0, R.Values(), xr);
00558             }
00559         }
00560         for (j = 0; j < blockSize; ++j)
00561             callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
00562         timeResidual += MyWatch.WallTime();
00563     
00564         
00565         timeNorm -= MyWatch.WallTime();
00566         if (vectWeight)
00567             R.NormWeighted(*vectWeight, normR);
00568         else
00569             R.Norm2(normR);
00570 
00571         
00572         
00573         nFound = 0;
00574         for (j = 0; j < blockSize; ++j) {
00575             normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
00576             if (normR[j] < tolEigenSolve) 
00577                 nFound += 1;
00578         }
00579         timeNorm += MyWatch.WallTime();
00580 
00581         
00582         if (localVerbose > 2) {
00583             memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
00584             historyCount += 1;
00585         }
00586 
00587         
00588         if (localVerbose > 0) {
00589             cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
00590             cout << knownEV + nFound << endl;
00591         }
00592 
00593         if (localVerbose > 1) {
00594             cout << endl;
00595             cout.precision(2);
00596             cout.setf(ios::scientific, ios::floatfield);
00597             for (i=0; i<blockSize; ++i) {
00598                 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00599                 cout << " = " << normR[i] << endl;
00600             }
00601             cout << endl;
00602             cout.precision(2);
00603             for (i=0; i<blockSize; ++i) {
00604                 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
00605                 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
00606                 cout << " = " << theta[i] << endl;
00607             }
00608             cout << endl;
00609         }
00610 
00611         if (nFound == 0) {
00612             
00613             
00614             timeLocalUpdate -= MyWatch.WallTime();
00615             memcpy(R.Values(), X.Values(), xr*blockSize*sizeof(double));
00616             callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
00617                           0.0, X.Values(), xr);
00618             memcpy(R.Values(), KX.Values(), xr*blockSize*sizeof(double));
00619             callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
00620                           0.0, KX.Values(), xr);
00621             if (M) {
00622                 memcpy(R.Values(), MX.Values(), xr*blockSize*sizeof(double));
00623                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
00624                               0.0, MX.Values(), xr);
00625             }
00626             if (localSize == twoBlocks) {
00627                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, 
00628                               S+blockSize, localSize, 0.0, P.Values(), xr);
00629                 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
00630                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr, 
00631                               S+blockSize, localSize, 0.0, KP.Values(), xr);
00632                 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
00633                 if (M) {
00634                     callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr, 
00635                                   S+blockSize, localSize, 0.0, MP.Values(), xr);
00636                     callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
00637                 }
00638             } 
00639             if (localSize == threeBlocks) {
00640                 memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
00641                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, 
00642                               S + twoBlocks, localSize, 0.0, P.Values(), xr);
00643                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, 
00644                               S + blockSize, localSize, 1.0, P.Values(), xr);
00645                 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
00646                 memcpy(R.Values(), KP.Values(), xr*blockSize*sizeof(double));
00647                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
00648                               S + twoBlocks, localSize, 0.0, KP.Values(), xr);
00649                 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
00650                               S + blockSize, localSize, 1.0, KP.Values(), xr);
00651                 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
00652                 if (M) {
00653                     memcpy(R.Values(), MP.Values(), xr*blockSize*sizeof(double));
00654                     callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
00655                                   S + twoBlocks, localSize, 0.0, MP.Values(), xr);
00656                     callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
00657                                   S + blockSize, localSize, 1.0, MP.Values(), xr);
00658                     callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
00659                 }
00660             } 
00661             timeLocalUpdate += MyWatch.WallTime();
00662             
00663             timeResidual -= MyWatch.WallTime();
00664             memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
00665             for (j = 0; j < blockSize; ++j) 
00666                 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
00667             timeResidual += MyWatch.WallTime();
00668             
00669             if (verbose > 2) {
00670                 if (knownEV == 0) {
00671                     accuracyCheck(&X, &MX, &R, 0, 0, 0);
00672                 }
00673                 else {
00674                     Epetra_MultiVector copyQ(View, Q, 0, knownEV);
00675                     accuracyCheck(&X, &MX, &R, ©Q, (localSize>blockSize) ? &H : 0,
00676                                   (localSize>twoBlocks) ? &P : 0);
00677                 }
00678             } 
00679             if (localSize < threeBlocks)
00680                 localSize += blockSize;
00681             continue;
00682         } 
00683 
00684         
00685         int firstIndex = blockSize;
00686         for (j = 0; j < blockSize; ++j) {
00687             if (normR[j] >= tolEigenSolve) {
00688                 firstIndex = j;
00689                 break;
00690             }
00691         } 
00692         while (firstIndex < nFound) {
00693             for (j = firstIndex; j < blockSize; ++j) {
00694                 if (normR[j] < tolEigenSolve) {
00695                     
00696                     callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
00697                     callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
00698                     callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
00699                     break;
00700                 }
00701             } 
00702             for (j = 0; j < blockSize; ++j) {
00703                 if (normR[j] >= tolEigenSolve) {
00704                     firstIndex = j;
00705                     break;
00706                 }
00707             } 
00708         } 
00709 
00710         
00711         memcpy(lambda + knownEV, theta, nFound*sizeof(double));
00712 
00713         
00714         if (knownEV + nFound >= numEigen) {
00715             callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
00716                           S, localSize, 0.0, R.Values(), xr);
00717             if (localSize >= twoBlocks) {
00718                 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
00719                               S + blockSize, localSize, 1.0, R.Values(), xr);
00720                 if (localSize == threeBlocks) {
00721                     callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
00722                                   S + twoBlocks, localSize, 1.0, R.Values(), xr);
00723                 }
00724             }
00725             memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
00726             knownEV += nFound;
00727             if (localVerbose == 1) {
00728                 cout << endl;
00729                 cout.precision(2);
00730                 cout.setf(ios::scientific, ios::floatfield);
00731                 for (i=0; i<blockSize; ++i) {
00732                     cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00733                     cout << " = " << normR[i] << endl;
00734                 }
00735                 cout << endl;
00736             }
00737             break;
00738         }
00739 
00740         
00741         callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
00742                       S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
00743         if (localSize >= twoBlocks) {
00744             callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
00745                           S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
00746             if (localSize >= threeBlocks)
00747                 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
00748                               S + twoBlocks, localSize, 1.0, Q.Values() + knownEV*xr, xr);
00749         }
00750         knownEV += nFound;
00751 
00752         
00753         timeRestart -= MyWatch.WallTime();
00754         int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
00755         double *Snew = S + nFound*localSize;
00756         memcpy(R.Values(), X.Values(), blockSize*xr*sizeof(double));
00757         callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
00758                       Snew, localSize, 0.0, X.Values(), xr);
00759         memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
00760         callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
00761                       Snew, localSize, 0.0, KX.Values(), xr);
00762         if (M) {
00763             memcpy(R.Values(), MX.Values(), blockSize*xr*sizeof(double));
00764             callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
00765                           Snew, localSize, 0.0, MX.Values(), xr);
00766         }
00767         if (localSize >= twoBlocks) {
00768             callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
00769                           Snew+blockSize, localSize, 1.0, X.Values(), xr);
00770             callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KH.Values(), xr,
00771                           Snew+blockSize, localSize, 1.0, KX.Values(), xr);
00772             if (M) {
00773                 callBLAS.GEMM('N','N', xr, leftOver, blockSize, 1.0, MH.Values(), xr,
00774                               Snew+blockSize, localSize, 1.0, MX.Values(), xr);
00775             }
00776             if (localSize >= threeBlocks) {
00777                 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
00778                               Snew+twoBlocks, localSize, 1.0, X.Values(), xr);
00779                 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
00780                               Snew+twoBlocks, localSize, 1.0, KX.Values(), xr);
00781                 if (M) {
00782                     callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
00783                                   Snew+twoBlocks, localSize, 1.0, MX.Values(), xr);
00784                 }
00785             }
00786         }
00787         if (nevLocal < blockSize + nFound) {
00788             
00789             Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
00790             Xtmp.Random();
00791         }
00792         else {
00793             nFound = 0;
00794         }
00795         reStart = true;
00796         timeRestart += MyWatch.WallTime();
00797 
00798     } 
00799     timeOuterLoop += MyWatch.WallTime();
00800     highMem = (highMem > currentSize()) ? highMem : currentSize();
00801 
00802     
00803     delete[] work1;
00804     delete[] work2;
00805     if (vectWeight)
00806         delete vectWeight;
00807 
00808     
00809     timePostProce -= MyWatch.WallTime();
00810     if ((info == 0) && (knownEV > 0)) {
00811         mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
00812     }
00813     timePostProce += MyWatch.WallTime();
00814 
00815     return (info == 0) ? knownEV : info;
00816 
00817 }
00818 
00819 
00820 void LOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
00821                            const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
00822                            const Epetra_MultiVector *H, const Epetra_MultiVector *P) const {
00823 
00824     cout.precision(2);
00825     cout.setf(ios::scientific, ios::floatfield);
00826     double tmp;
00827 
00828     int myPid = MyComm.MyPID();
00829 
00830     if (X) {
00831         if (M) {
00832             if (MX) {
00833                 tmp = myVerify.errorEquality(X, MX, M);
00834                 if (myPid == 0)
00835                     cout << " >> Difference between MX and M*X = " << tmp << endl;
00836             }
00837             tmp = myVerify.errorOrthonormality(X, M);
00838             if (myPid == 0)
00839                 cout << " >> Error in X^T M X - I = " << tmp << endl;
00840         }
00841         else {
00842             tmp = myVerify.errorOrthonormality(X, 0);
00843             if (myPid == 0)
00844                 cout << " >> Error in X^T X - I = " << tmp << endl;
00845         }
00846     }
00847 
00848     if ((R) && (X)) {
00849         tmp = myVerify.errorOrthogonality(X, R);
00850         if (myPid == 0)
00851             cout << " >> Orthogonality X^T R up to " << tmp << endl;
00852     }
00853 
00854     if (Q == 0)
00855         return;
00856 
00857     if (M) {
00858         tmp = myVerify.errorOrthonormality(Q, M);
00859         if (myPid == 0)
00860             cout << " >> Error in Q^T M Q - I = " << tmp << endl;
00861         if (X) {
00862             tmp = myVerify.errorOrthogonality(Q, X, M);
00863             if (myPid == 0)
00864                 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
00865         }
00866         if (H) {
00867             tmp = myVerify.errorOrthogonality(Q, H, M);
00868             if (myPid == 0)
00869                 cout << " >> Orthogonality Q^T M H up to " << tmp << endl;
00870         }
00871         if (P) {
00872             tmp = myVerify.errorOrthogonality(Q, P, M);
00873             if (myPid == 0)
00874                 cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
00875         }
00876     }
00877     else {
00878         tmp = myVerify.errorOrthonormality(Q, 0);
00879         if (myPid == 0)
00880             cout << " >> Error in Q^T Q - I = " << tmp << endl;
00881         if (X) {
00882             tmp = myVerify.errorOrthogonality(Q, X, 0);
00883             if (myPid == 0)
00884                 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
00885         }
00886         if (H) {
00887             tmp = myVerify.errorOrthogonality(Q, H, 0);
00888             if (myPid == 0)
00889                 cout << " >> Orthogonality Q^T H up to " << tmp << endl;
00890         }
00891         if (P) {
00892             tmp = myVerify.errorOrthogonality(Q, P, 0);
00893             if (myPid == 0)
00894                 cout << " >> Orthogonality Q^T P up to " << tmp << endl;
00895         }
00896     }
00897 
00898 }
00899 
00900 
00901 void LOBPCG::initializeCounters() {
00902 
00903     historyCount = 0;
00904     if (resHistory) {
00905         delete[] resHistory;
00906         resHistory = 0;
00907     }
00908 
00909     memRequested = 0.0;
00910     highMem = 0.0;
00911 
00912     massOp = 0;
00913     numRestart = 0;
00914     outerIter = 0;
00915     precOp = 0;
00916     residual = 0;
00917     stifOp = 0;
00918 
00919     timeLocalProj = 0.0;
00920     timeLocalSolve = 0.0;
00921     timeLocalUpdate = 0.0;
00922     timeMassOp = 0.0;
00923     timeNorm = 0.0;
00924     timeOrtho = 0.0;
00925     timeOuterLoop = 0.0;
00926     timePostProce = 0.0;
00927     timePrecOp = 0.0;
00928     timeResidual = 0.0;
00929     timeRestart = 0.0;
00930     timeStifOp = 0.0;
00931 
00932 
00933 }
00934 
00935 
00936 void LOBPCG::algorithmInfo() const {
00937 
00938     int myPid = MyComm.MyPID();
00939   
00940     if (myPid ==0) {
00941         cout << " Algorithm: LOBPCG (block version) with Cholesky-based local eigensolver\n";
00942         cout << " Block Size: " << blockSize << endl;
00943     }
00944 
00945 }
00946 
00947 
00948 void LOBPCG::historyInfo() const {
00949 
00950     if (resHistory) {
00951         int j;
00952         cout << " Block Size: " << blockSize << endl;
00953         cout << endl;
00954         cout << " Residuals\n";
00955         cout << endl;
00956         cout.precision(4);
00957         cout.setf(ios::scientific, ios::floatfield);
00958         for (j = 0; j < historyCount; ++j) {
00959             int ii;
00960             for (ii = 0; ii < blockSize; ++ii)
00961                 cout << resHistory[blockSize*j + ii] << endl;
00962         }
00963         cout << endl;
00964     }
00965 
00966 }
00967 
00968 
00969 void LOBPCG::memoryInfo() const {
00970 
00971     int myPid = MyComm.MyPID();
00972 
00973     double maxHighMem = 0.0;
00974     double tmp = highMem;
00975     MyComm.MaxAll(&tmp, &maxHighMem, 1);
00976 
00977     double maxMemRequested = 0.0;
00978     tmp = memRequested;
00979     MyComm.MaxAll(&tmp, &maxMemRequested, 1);
00980 
00981     if (myPid == 0) {
00982         cout.precision(2);
00983         cout.setf(ios::fixed, ios::floatfield);
00984         cout << " Memory requested per processor by the eigensolver   = (EST) ";
00985         cout.width(6);
00986         cout << maxMemRequested << " MB " << endl;
00987         cout << endl;
00988         cout << " High water mark in eigensolver                      = (EST) ";
00989         cout.width(6);
00990         cout << maxHighMem << " MB " << endl;
00991         cout << endl;
00992     }
00993 
00994 }
00995 
00996 
00997 void LOBPCG::operationInfo() const {
00998 
00999     int myPid = MyComm.MyPID();
01000   
01001     if (myPid == 0) {
01002         cout << " --- Operations ---\n\n";
01003         cout << " Total number of mass matrix multiplications      = ";
01004         cout.width(9);
01005         cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
01006         cout << "       Mass multiplications in solver loop        = ";
01007         cout.width(9);
01008         cout << massOp << endl;
01009         cout << "       Mass multiplications for orthogonalization = ";
01010         cout.width(9);
01011         cout << modalTool.getNumProj_MassMult() << endl;
01012         cout << "       Mass multiplications for normalization     = ";
01013         cout.width(9);
01014         cout << modalTool.getNumNorm_MassMult() << endl;
01015         cout << " Total number of stiffness matrix operations      = ";
01016         cout.width(9);
01017         cout << stifOp << endl;
01018         cout << " Total number of preconditioner applications      = ";
01019         cout.width(9);
01020         cout << precOp << endl;
01021         cout << " Total number of computed eigen-residuals         = ";
01022         cout.width(9);
01023         cout << residual << endl;
01024         cout << "\n";
01025         cout << " Total number of outer iterations                 = ";
01026         cout.width(9);
01027         cout << outerIter << endl;
01028         cout << "       Number of restarts                         = ";
01029         cout.width(9);
01030         cout << numRestart << endl;
01031         cout << "\n";
01032     }
01033 
01034 }
01035 
01036 
01037 void LOBPCG::timeInfo() const {
01038 
01039     int myPid = MyComm.MyPID();
01040   
01041     if (myPid == 0) {
01042         cout << " --- Timings ---\n\n";
01043         cout.setf(ios::fixed, ios::floatfield);
01044         cout.precision(2);
01045         cout << " Total time for outer loop                       = ";
01046         cout.width(9);
01047         cout << timeOuterLoop << " s" << endl;
01048         cout << "       Time for mass matrix operations           = ";
01049         cout.width(9);
01050         cout << timeMassOp << " s     ";
01051         cout.width(5);
01052         cout << 100*timeMassOp/timeOuterLoop << " %\n";
01053         cout << "       Time for stiffness matrix operations      = ";
01054         cout.width(9);
01055         cout << timeStifOp << " s     ";
01056         cout.width(5);
01057         cout << 100*timeStifOp/timeOuterLoop << " %\n";
01058         cout << "       Time for preconditioner applications      = ";
01059         cout.width(9);
01060         cout << timePrecOp << " s     ";
01061         cout.width(5);
01062         cout << 100*timePrecOp/timeOuterLoop << " %\n";
01063         cout << "       Time for orthogonalizations               = ";
01064         cout.width(9);
01065         cout << timeOrtho << " s     ";
01066         cout.width(5);
01067         cout << 100*timeOrtho/timeOuterLoop << " %\n";
01068         cout << "            Projection step          : ";
01069         cout.width(9);
01070         cout << modalTool.getTimeProj() << " s\n";
01071         cout << "                 Q^T mult.  :: ";
01072         cout.width(9);
01073         cout << modalTool.getTimeProj_QtMult() << " s\n";
01074         cout << "                 Q mult.    :: ";
01075         cout.width(9);
01076         cout << modalTool.getTimeProj_QMult() << " s\n";
01077         cout << "                 Mass mult. :: ";
01078         cout.width(9);
01079         cout << modalTool.getTimeProj_MassMult() << " s\n";
01080         cout << "            Normalization step       : ";
01081         cout.width(9);
01082         cout << modalTool.getTimeNorm() << " s\n";
01083         cout << "                 Mass mult. :: ";
01084         cout.width(9);
01085         cout << modalTool.getTimeNorm_MassMult() << " s\n";
01086         cout << "       Time for local projection                 = ";
01087         cout.width(9);
01088         cout << timeLocalProj << " s     ";
01089         cout.width(5);
01090         cout << 100*timeLocalProj/timeOuterLoop << " %\n";
01091         cout << "       Time for local eigensolve                 = ";
01092         cout.width(9);
01093         cout << timeLocalSolve << " s     ";
01094         cout.width(5);
01095         cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
01096         cout << "       Time for local update                     = ";
01097         cout.width(9);
01098         cout << timeLocalUpdate << " s     ";
01099         cout.width(5);
01100         cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
01101         cout << "       Time for residual computations            = ";
01102         cout.width(9);
01103         cout << timeResidual << " s     ";
01104         cout.width(5);
01105         cout << 100*timeResidual/timeOuterLoop << " %\n";
01106         cout << "       Time for residuals norm computations      = ";
01107         cout.width(9);
01108         cout << timeNorm << " s     ";
01109         cout.width(5);
01110         cout << 100*timeNorm/timeOuterLoop << " %\n";
01111         cout << "       Time for restarting space definition      = ";
01112         cout.width(9);
01113         cout << timeRestart << " s     ";
01114         cout.width(5);
01115         cout << 100*timeRestart/timeOuterLoop << " %\n";
01116         cout << "\n";
01117         cout << " Total time for post-processing                  = ";
01118         cout.width(9);
01119         cout << timePostProce << " s\n";
01120         cout << endl;
01121     } 
01122 
01123 }
01124 
01125