00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "KnyazevLOBPCG.h"
00030
00031
00032 KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00033 const Epetra_Operator *PP,
00034 double _tol, int _maxIter, int _verb) :
00035 myVerify(_Comm),
00036 callBLAS(),
00037 callFortran(),
00038 modalTool(_Comm),
00039 mySort(),
00040 MyComm(_Comm),
00041 K(KK),
00042 M(0),
00043 Prec(PP),
00044 MyWatch(_Comm),
00045 tolEigenSolve(_tol),
00046 maxIterEigenSolve(_maxIter),
00047 blockSize(0),
00048 normWeight(0),
00049 verbose(_verb),
00050 historyCount(0),
00051 resHistory(0),
00052 memRequested(0.0),
00053 highMem(0.0),
00054 massOp(0),
00055 numRestart(0),
00056 outerIter(0),
00057 precOp(0),
00058 residual(0),
00059 stifOp(0),
00060 timeLocalProj(0.0),
00061 timeLocalSolve(0.0),
00062 timeLocalUpdate(0.0),
00063 timeMassOp(0.0),
00064 timeNorm(0.0),
00065 timeOuterLoop(0.0),
00066 timePostProce(0.0),
00067 timePrecOp(0.0),
00068 timeResidual(0.0),
00069 timeStifOp(0.0)
00070 {
00071
00072 }
00073
00074
00075 KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00076 const Epetra_Operator *MM, const Epetra_Operator *PP,
00077 Projector *YHCP,
00078 double _tol, int _maxIter, int _verb,
00079 double *_weight) :
00080 myVerify(_Comm),
00081 callBLAS(),
00082 callFortran(),
00083 modalTool(_Comm),
00084 mySort(),
00085 MyComm(_Comm),
00086 K(KK),
00087 M(MM),
00088 Prec(PP),
00089 YHC(YHCP),
00090 MyWatch(_Comm),
00091 tolEigenSolve(_tol),
00092 maxIterEigenSolve(_maxIter),
00093 blockSize(0),
00094 normWeight(_weight),
00095 verbose(_verb),
00096 historyCount(0),
00097 resHistory(0),
00098 memRequested(0.0),
00099 highMem(0.0),
00100 massOp(0),
00101 numRestart(0),
00102 outerIter(0),
00103 precOp(0),
00104 residual(0),
00105 stifOp(0),
00106 timeLocalProj(0.0),
00107 timeLocalSolve(0.0),
00108 timeLocalUpdate(0.0),
00109 timeMassOp(0.0),
00110 timeNorm(0.0),
00111 timeOuterLoop(0.0),
00112 timePostProce(0.0),
00113 timePrecOp(0.0),
00114 timeResidual(0.0),
00115 timeStifOp(0.0)
00116 {
00117
00118 }
00119
00120
00121 KnyazevLOBPCG::~KnyazevLOBPCG() {
00122
00123 if (resHistory) {
00124 delete[] resHistory;
00125 resHistory = 0;
00126 }
00127
00128 }
00129
00130
00131 int KnyazevLOBPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184 if (numEigen <= 0) {
00185 return 0;
00186 }
00187
00188 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen);
00189 if (info < 0)
00190 return info;
00191
00192 int myPid = MyComm.MyPID();
00193
00194
00195 Epetra_Vector *vectWeight = 0;
00196 if (normWeight) {
00197 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
00198 }
00199
00200 int knownEV = 0;
00201 int localVerbose = verbose*(myPid==0);
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218 int xr = Q.MyLength();
00219 Epetra_MultiVector X(View, Q, 0, numEigen);
00220
00221 blockSize = numEigen;
00222
00223 int tmp;
00224 tmp = (M == 0) ? 6*numEigen*xr : 9*numEigen*xr;
00225
00226 double *work1 = new (nothrow) double[tmp];
00227 if (work1 == 0) {
00228 if (vectWeight)
00229 delete vectWeight;
00230 info = -30;
00231 return info;
00232 }
00233 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
00234
00235 highMem = (highMem > currentSize()) ? highMem : currentSize();
00236
00237 double *tmpD = work1;
00238
00239 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, numEigen);
00240 tmpD = tmpD + xr*numEigen;
00241
00242 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, numEigen);
00243 tmpD = (M) ? tmpD + xr*numEigen : tmpD;
00244
00245 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, numEigen);
00246 tmpD = tmpD + xr*numEigen;
00247
00248 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, numEigen);
00249 tmpD = tmpD + xr*numEigen;
00250
00251 Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, numEigen);
00252 tmpD = tmpD + xr*numEigen;
00253
00254 Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, numEigen);
00255 tmpD = (M) ? tmpD + xr*numEigen : tmpD;
00256
00257 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, numEigen);
00258 tmpD = tmpD + xr*numEigen;
00259
00260 Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, numEigen);
00261 tmpD = tmpD + xr*numEigen;
00262
00263 Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, numEigen);
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 int lwork2;
00276 lwork2 = 2*numEigen + 27*numEigen*numEigen;
00277 double *work2 = new (nothrow) double[lwork2];
00278 if (work2 == 0) {
00279 if (vectWeight)
00280 delete vectWeight;
00281 delete[] work1;
00282 info = -30;
00283 return info;
00284 }
00285
00286 highMem = (highMem > currentSize()) ? highMem : currentSize();
00287
00288 tmpD = work2;
00289
00290 double *theta = tmpD;
00291 tmpD = tmpD + numEigen;
00292
00293 double *normR = tmpD;
00294 tmpD = tmpD + numEigen;
00295
00296 double *MM = tmpD;
00297 tmpD = tmpD + 9*numEigen*numEigen;
00298
00299 double *KK = tmpD;
00300 tmpD = tmpD + 9*numEigen*numEigen;
00301
00302 double *S = tmpD;
00303
00304 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
00305
00306
00307 if (localVerbose > 2) {
00308 resHistory = new (nothrow) double[maxIterEigenSolve*numEigen];
00309 if (resHistory == 0) {
00310 if (vectWeight)
00311 delete vectWeight;
00312 delete[] work1;
00313 delete[] work2;
00314 info = -30;
00315 return info;
00316 }
00317 historyCount = 0;
00318 }
00319
00320
00321
00322 bool reStart = false;
00323 numRestart = 0;
00324
00325 int localSize = 0;
00326 int firstIndex = knownEV;
00327 int i, j;
00328
00329 if (localVerbose > 0) {
00330 cout << endl;
00331 cout << " *|* Problem: ";
00332 if (M)
00333 cout << "K*Q = M*Q D ";
00334 else
00335 cout << "K*Q = Q D ";
00336 if (Prec)
00337 cout << " with preconditioner";
00338 cout << endl;
00339 cout << " *|* Algorithm = LOBPCG (Knyazev's version)" << endl;
00340 cout << " *|* Size of blocks = " << blockSize << endl;
00341 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
00342 cout.precision(2);
00343 cout.setf(ios::scientific, ios::floatfield);
00344 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
00345 cout << " *|* Norm used for convergence: ";
00346 if (normWeight)
00347 cout << "weighted L2-norm with user-provided weights" << endl;
00348 else
00349 cout << "L^2-norm" << endl;
00350 cout << "\n -- Start iterations -- \n";
00351 }
00352
00353 timeOuterLoop -= MyWatch.WallTime();
00354 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
00355
00356 highMem = (highMem > currentSize()) ? highMem : currentSize();
00357
00358 int workingSize = numEigen - knownEV;
00359
00360 Epetra_MultiVector XI(View, X, firstIndex, workingSize);
00361 Epetra_MultiVector MXI(View, MX, firstIndex, workingSize);
00362 Epetra_MultiVector KXI(View, KX, firstIndex, workingSize);
00363
00364 Epetra_MultiVector HI(View, H, firstIndex, workingSize);
00365 Epetra_MultiVector MHI(View, MH, firstIndex, workingSize);
00366 Epetra_MultiVector KHI(View, KH, firstIndex, workingSize);
00367
00368 Epetra_MultiVector PI(View, P, firstIndex, workingSize);
00369 Epetra_MultiVector MPI(View, MP, firstIndex, workingSize);
00370 Epetra_MultiVector KPI(View, KP, firstIndex, workingSize);
00371
00372 Epetra_MultiVector RI(View, R, firstIndex, workingSize);
00373
00374 if ((outerIter == 1) || (reStart == true)) {
00375
00376 reStart = false;
00377 localSize = numEigen;
00378
00379
00380 timeMassOp -= MyWatch.WallTime();
00381 if (M)
00382 M->Apply(XI, MXI);
00383 timeMassOp += MyWatch.WallTime();
00384 massOp += workingSize;
00385
00386
00387 timeStifOp -= MyWatch.WallTime();
00388 K->Apply(XI, KXI);
00389 timeStifOp += MyWatch.WallTime();
00390 stifOp += workingSize;
00391
00392 }
00393 else {
00394
00395
00396 if (Prec) {
00397 timePrecOp -= MyWatch.WallTime();
00398 Prec->ApplyInverse(RI, MHI);
00399 timePrecOp += MyWatch.WallTime();
00400 precOp += workingSize;
00401 }
00402 else {
00403 memcpy(MHI.Values(), RI.Values(), xr*workingSize*sizeof(double));
00404 }
00405
00406
00407 if (YHC) {
00408 YHC->Apply(MHI, HI);
00409 }
00410
00411
00412 timeMassOp -= MyWatch.WallTime();
00413 if (M)
00414 M->Apply(HI, MHI);
00415 timeMassOp += MyWatch.WallTime();
00416 massOp += workingSize;
00417
00418
00419 timeStifOp -= MyWatch.WallTime();
00420 K->Apply(HI, KHI);
00421 timeStifOp += MyWatch.WallTime();
00422 stifOp += workingSize;
00423
00424 if (localSize == numEigen)
00425 localSize += workingSize;
00426
00427 }
00428
00429
00430
00431 timeLocalProj -= MyWatch.WallTime();
00432 modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, KX.Values(), xr,
00433 KK, localSize, S);
00434 modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, MX.Values(), xr,
00435 MM, localSize, S);
00436 if (localSize > numEigen) {
00437 double *pointer = KK + numEigen*localSize;
00438 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KHI.Values(), xr,
00439 pointer, localSize, S);
00440 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KHI.Values(), xr,
00441 pointer + numEigen, localSize, S);
00442 pointer = MM + numEigen*localSize;
00443 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MHI.Values(), xr,
00444 pointer, localSize, S);
00445 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MHI.Values(), xr,
00446 pointer + numEigen, localSize, S);
00447 if (localSize > numEigen + workingSize) {
00448 pointer = KK + (numEigen + workingSize)*localSize;
00449 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KPI.Values(), xr,
00450 pointer, localSize, S);
00451 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KPI.Values(), xr,
00452 pointer + numEigen, localSize, S);
00453 modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, KPI.Values(), xr,
00454 pointer + numEigen + workingSize, localSize, S);
00455 pointer = MM + (numEigen + workingSize)*localSize;
00456 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MPI.Values(), xr,
00457 pointer, localSize, S);
00458 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MPI.Values(), xr,
00459 pointer + numEigen, localSize, S);
00460 modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, MPI.Values(), xr,
00461 pointer + numEigen + workingSize, localSize, S);
00462 }
00463 }
00464 timeLocalProj += MyWatch.WallTime();
00465
00466
00467 timeLocalSolve -= MyWatch.WallTime();
00468 int nevLocal = localSize;
00469 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
00470 S, localSize, theta, localVerbose,
00471 (blockSize == 1) ? 1 : 0);
00472 timeLocalSolve += MyWatch.WallTime();
00473
00474 if (info < 0) {
00475
00476 break;
00477 }
00478
00479
00480 if ((theta[0] < 0.0) || (nevLocal < numEigen)) {
00481 if (localVerbose > 0) {
00482 cout << " Iteration " << outerIter;
00483 cout << " - Failure for spectral decomposition - RESTART with new random search\n";
00484 }
00485 if (workingSize == 1) {
00486 XI.Random();
00487 }
00488 else {
00489 Epetra_MultiVector Xinit(View, XI, 1, workingSize-1);
00490 Xinit.Random();
00491 }
00492 reStart = true;
00493 numRestart += 1;
00494 info = 0;
00495 continue;
00496 }
00497
00498 if ((localSize == numEigen+workingSize) && (nevLocal == numEigen)) {
00499 for (j = 0; j < nevLocal; ++j)
00500 memcpy(S+j*numEigen, S+j*localSize, numEigen*sizeof(double));
00501 localSize = numEigen;
00502 }
00503
00504 if ((localSize == numEigen+2*workingSize) && (nevLocal <= numEigen+workingSize)) {
00505 for (j = 0; j < nevLocal; ++j)
00506 memcpy(S+j*(numEigen+workingSize), S+j*localSize, (numEigen+workingSize)*sizeof(double));
00507 localSize = numEigen + workingSize;
00508 }
00509
00510
00511
00512 timeLocalUpdate -= MyWatch.WallTime();
00513
00514 memcpy(R.Values(), X.Values(), xr*numEigen*sizeof(double));
00515 callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
00516 0.0, X.Values(), xr);
00517 if (M) {
00518 memcpy(R.Values(), MX.Values(), xr*numEigen*sizeof(double));
00519 callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
00520 0.0, MX.Values(), xr);
00521 }
00522 memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
00523 callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
00524 0.0, KX.Values(), xr);
00525
00526 if (localSize == numEigen + workingSize) {
00527 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
00528 S + numEigen, localSize, 0.0, P.Values(), xr);
00529 if (M) {
00530 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
00531 S + numEigen, localSize, 0.0, MP.Values(), xr);
00532 }
00533 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
00534 S + numEigen, localSize, 0.0, KP.Values(), xr);
00535 }
00536
00537 if (localSize > numEigen + workingSize) {
00538 memcpy(RI.Values(), PI.Values(), xr*workingSize*sizeof(double));
00539 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
00540 S + numEigen, localSize, 0.0, P.Values(), xr);
00541 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
00542 S + numEigen + workingSize, localSize, 1.0, P.Values(), xr);
00543 if (M) {
00544 memcpy(RI.Values(), MPI.Values(), xr*workingSize*sizeof(double));
00545 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
00546 S + numEigen, localSize, 0.0, MP.Values(), xr);
00547 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
00548 S + numEigen + workingSize, localSize, 1.0, MP.Values(), xr);
00549 }
00550 memcpy(RI.Values(), KPI.Values(), xr*workingSize*sizeof(double));
00551 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
00552 S + numEigen, localSize, 0.0, KP.Values(), xr);
00553 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
00554 S + numEigen + workingSize, localSize, 1.0, KP.Values(), xr);
00555 }
00556
00557 if (localSize > numEigen) {
00558 callBLAS.AXPY(xr*numEigen, 1.0, P.Values(), X.Values());
00559 if (M)
00560 callBLAS.AXPY(xr*numEigen, 1.0, MP.Values(), MX.Values());
00561 callBLAS.AXPY(xr*numEigen, 1.0, KP.Values(), KX.Values());
00562 }
00563 timeLocalUpdate += MyWatch.WallTime();
00564
00565
00566 timeResidual -= MyWatch.WallTime();
00567 memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
00568 for (j = 0; j < numEigen; ++j) {
00569 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
00570 }
00571 timeResidual += MyWatch.WallTime();
00572 residual += numEigen;
00573
00574
00575 timeNorm -= MyWatch.WallTime();
00576 if (vectWeight) {
00577 R.NormWeighted(*vectWeight, normR);
00578 }
00579 else {
00580 R.Norm2(normR);
00581 }
00582
00583 for (j = 0; j < numEigen; ++j) {
00584 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
00585 }
00586 timeNorm += MyWatch.WallTime();
00587
00588
00589 if (verbose > 2) {
00590 accuracyCheck(&X, &MX, &R);
00591 }
00592
00593 if (localSize == numEigen + workingSize)
00594 localSize += workingSize;
00595
00596
00597 timeNorm -= MyWatch.WallTime();
00598 knownEV = 0;
00599 for (i=0; i < numEigen; ++i) {
00600 if (normR[i] < tolEigenSolve) {
00601 lambda[i] = theta[i];
00602 knownEV += 1;
00603 }
00604 }
00605 timeNorm += MyWatch.WallTime();
00606
00607
00608 if (localVerbose > 2) {
00609 memcpy(resHistory + historyCount, normR, numEigen*sizeof(double));
00610 historyCount += numEigen;
00611 }
00612
00613
00614 if (localVerbose > 0) {
00615 cout << " Iteration " << outerIter;
00616 cout << " - Number of converged eigenvectors " << knownEV << endl;
00617 }
00618
00619 if (localVerbose > 1) {
00620 cout << endl;
00621 cout.precision(2);
00622 cout.setf(ios::scientific, ios::floatfield);
00623 for (i=0; i<numEigen; ++i) {
00624 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00625 cout << " = " << normR[i] << endl;
00626 }
00627 cout << endl;
00628 cout.precision(2);
00629 for (i=0; i<numEigen; ++i) {
00630 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
00631 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
00632 cout << " = " << theta[i] << endl;
00633 }
00634 cout << endl;
00635 }
00636
00637
00638 if (knownEV >= numEigen) {
00639 if (localVerbose == 1) {
00640 cout << endl;
00641 cout.precision(2);
00642 cout.setf(ios::scientific, ios::floatfield);
00643 for (i=0; i<numEigen; ++i) {
00644 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00645 cout << " = " << normR[i] << endl;
00646 }
00647 cout << endl;
00648 }
00649 break;
00650 }
00651
00652
00653 if ((knownEV > 0) && (localSize > numEigen)) {
00654 if (localSize == numEigen + workingSize)
00655 localSize = 2*numEigen - knownEV;
00656 if (localSize == numEigen + 2*workingSize)
00657 localSize = 3*numEigen - 2*knownEV;
00658 }
00659
00660
00661
00662 for (i=0; i<numEigen; ++i) {
00663 if (normR[i] >= tolEigenSolve) {
00664 firstIndex = i;
00665 break;
00666 }
00667 }
00668
00669
00670 if (firstIndex == numEigen-1)
00671 continue;
00672
00673 while (firstIndex < knownEV) {
00674
00675 for (j = firstIndex; j < numEigen; ++j) {
00676 if (normR[j] < tolEigenSolve) {
00677 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
00678 callFortran.SWAP(xr, X.Values() + j*xr, 1, X.Values() + firstIndex*xr, 1);
00679 callFortran.SWAP(xr, KX.Values() + j*xr, 1, KX.Values() + firstIndex*xr, 1);
00680 callFortran.SWAP(xr, R.Values() + j*xr, 1, R.Values() + firstIndex*xr, 1);
00681 callFortran.SWAP(xr, H.Values() + j*xr, 1, H.Values() + firstIndex*xr, 1);
00682 callFortran.SWAP(xr, KH.Values() + j*xr, 1, KH.Values() + firstIndex*xr, 1);
00683 callFortran.SWAP(xr, P.Values() + j*xr, 1, P.Values() + firstIndex*xr, 1);
00684 callFortran.SWAP(xr, KP.Values() + j*xr, 1, KP.Values() + firstIndex*xr, 1);
00685 if (M) {
00686 callFortran.SWAP(xr, MX.Values() + j*xr, 1, MX.Values() + firstIndex*xr, 1);
00687 callFortran.SWAP(xr, MH.Values() + j*xr, 1, MH.Values() + firstIndex*xr, 1);
00688 callFortran.SWAP(xr, MP.Values() + j*xr, 1, MP.Values() + firstIndex*xr, 1);
00689 }
00690 break;
00691 }
00692 }
00693
00694 for (i = firstIndex; i < numEigen; ++i) {
00695 if (normR[i] >= tolEigenSolve) {
00696 firstIndex = i;
00697 break;
00698 }
00699 }
00700
00701 }
00702
00703 }
00704 timeOuterLoop += MyWatch.WallTime();
00705 highMem = (highMem > currentSize()) ? highMem : currentSize();
00706
00707
00708 delete[] work1;
00709 delete[] work2;
00710 if (vectWeight)
00711 delete vectWeight;
00712
00713
00714 timePostProce -= MyWatch.WallTime();
00715 if ((info == 0) && (knownEV > 0)) {
00716 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
00717 }
00718 timePostProce += MyWatch.WallTime();
00719
00720 return (info == 0) ? knownEV : info;
00721
00722 }
00723
00724
00725 void KnyazevLOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
00726 const Epetra_MultiVector *R) const {
00727
00728 cout.precision(2);
00729 cout.setf(ios::scientific, ios::floatfield);
00730 double tmp;
00731
00732 int myPid = MyComm.MyPID();
00733
00734 if (X) {
00735 if (M) {
00736 if (MX) {
00737 tmp = myVerify.errorEquality(X, MX, M);
00738 if (myPid == 0)
00739 cout << " >> Difference between MX and M*X = " << tmp << endl;
00740 }
00741 tmp = myVerify.errorOrthonormality(X, M);
00742 if (myPid == 0)
00743 cout << " >> Error in X^T M X - I = " << tmp << endl;
00744 }
00745 else {
00746 tmp = myVerify.errorOrthonormality(X, 0);
00747 if (myPid == 0)
00748 cout << " >> Error in X^T X - I = " << tmp << endl;
00749 }
00750 }
00751
00752 if ((R) && (X)) {
00753 tmp = myVerify.errorOrthogonality(X, R);
00754 if (myPid == 0)
00755 cout << " >> Orthogonality X^T R up to " << tmp << endl;
00756 }
00757
00758 }
00759
00760
00761 void KnyazevLOBPCG::initializeCounters() {
00762
00763 historyCount = 0;
00764 if (resHistory) {
00765 delete[] resHistory;
00766 resHistory = 0;
00767 }
00768
00769 memRequested = 0.0;
00770 highMem = 0.0;
00771
00772 massOp = 0;
00773 numRestart = 0;
00774 outerIter = 0;
00775 precOp = 0;
00776 residual = 0;
00777 stifOp = 0;
00778
00779 timeLocalProj = 0.0;
00780 timeLocalSolve = 0.0;
00781 timeLocalUpdate = 0.0;
00782 timeMassOp = 0.0;
00783 timeNorm = 0.0;
00784 timeOuterLoop = 0.0;
00785 timePostProce = 0.0;
00786 timePrecOp = 0.0;
00787 timeResidual = 0.0;
00788 timeStifOp = 0.0;
00789
00790
00791 }
00792
00793
00794 void KnyazevLOBPCG::algorithmInfo() const {
00795
00796 int myPid = MyComm.MyPID();
00797
00798 if (myPid ==0) {
00799 cout << " Algorithm: LOBPCG (Knyazev's version) with Cholesky-based local eigensolver\n";
00800 cout << " Block Size: " << blockSize << endl;
00801 }
00802
00803 }
00804
00805
00806 void KnyazevLOBPCG::historyInfo() const {
00807
00808 if (resHistory) {
00809 int j;
00810 cout << " Block Size: " << blockSize << endl;
00811 cout << endl;
00812 cout << " Residuals\n";
00813 cout << endl;
00814 cout.precision(4);
00815 cout.setf(ios::scientific, ios::floatfield);
00816 for (j = 0; j < historyCount; ++j) {
00817 cout << resHistory[j] << endl;
00818 }
00819 cout << endl;
00820 }
00821
00822 }
00823
00824
00825 void KnyazevLOBPCG::memoryInfo() const {
00826
00827 int myPid = MyComm.MyPID();
00828
00829 double maxHighMem = 0.0;
00830 double tmp = highMem;
00831 MyComm.MaxAll(&tmp, &maxHighMem, 1);
00832
00833 double maxMemRequested = 0.0;
00834 tmp = memRequested;
00835 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
00836
00837 if (myPid == 0) {
00838 cout.precision(2);
00839 cout.setf(ios::fixed, ios::floatfield);
00840 cout << " Memory requested per processor by the eigensolver = (EST) ";
00841 cout.width(6);
00842 cout << maxMemRequested << " MB " << endl;
00843 cout << endl;
00844 cout << " High water mark in eigensolver = (EST) ";
00845 cout.width(6);
00846 cout << maxHighMem << " MB " << endl;
00847 cout << endl;
00848 }
00849
00850 }
00851
00852
00853 void KnyazevLOBPCG::operationInfo() const {
00854
00855 int myPid = MyComm.MyPID();
00856
00857 if (myPid == 0) {
00858 cout << " --- Operations ---\n\n";
00859 cout << " Total number of mass matrix multiplications = ";
00860 cout.width(9);
00861 cout << massOp << endl;
00862 cout << " Total number of stiffness matrix operations = ";
00863 cout.width(9);
00864 cout << stifOp << endl;
00865 cout << " Total number of preconditioner applications = ";
00866 cout.width(9);
00867 cout << precOp << endl;
00868 cout << " Total number of computed eigen-residuals = ";
00869 cout.width(9);
00870 cout << residual << endl;
00871 cout << "\n";
00872 cout << " Total number of outer iterations = ";
00873 cout.width(9);
00874 cout << outerIter << endl;
00875 cout << " Number of restarts = ";
00876 cout.width(9);
00877 cout << numRestart << endl;
00878 cout << "\n";
00879 }
00880
00881 }
00882
00883
00884 void KnyazevLOBPCG::timeInfo() const {
00885
00886 int myPid = MyComm.MyPID();
00887
00888 if (myPid == 0) {
00889 cout << " --- Timings ---\n\n";
00890 cout.setf(ios::fixed, ios::floatfield);
00891 cout.precision(2);
00892 cout << " Total time for outer loop = ";
00893 cout.width(9);
00894 cout << timeOuterLoop << " s" << endl;
00895 cout << " Time for mass matrix operations = ";
00896 cout.width(9);
00897 cout << timeMassOp << " s ";
00898 cout.width(5);
00899 cout << 100*timeMassOp/timeOuterLoop << " %\n";
00900 cout << " Time for stiffness matrix operations = ";
00901 cout.width(9);
00902 cout << timeStifOp << " s ";
00903 cout.width(5);
00904 cout << 100*timeStifOp/timeOuterLoop << " %\n";
00905 cout << " Time for preconditioner applications = ";
00906 cout.width(9);
00907 cout << timePrecOp << " s ";
00908 cout.width(5);
00909 cout << 100*timePrecOp/timeOuterLoop << " %\n";
00910 cout << " Time for local projection = ";
00911 cout.width(9);
00912 cout << timeLocalProj << " s ";
00913 cout.width(5);
00914 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
00915 cout << " Time for local eigensolve = ";
00916 cout.width(9);
00917 cout << timeLocalSolve << " s ";
00918 cout.width(5);
00919 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
00920 cout << " Time for local update = ";
00921 cout.width(9);
00922 cout << timeLocalUpdate << " s ";
00923 cout.width(5);
00924 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
00925 cout << " Time for residual computations = ";
00926 cout.width(9);
00927 cout << timeResidual << " s ";
00928 cout.width(5);
00929 cout << 100*timeResidual/timeOuterLoop << " %\n";
00930 cout << " Time for residuals norm computations = ";
00931 cout.width(9);
00932 cout << timeNorm << " s ";
00933 cout.width(5);
00934 cout << 100*timeNorm/timeOuterLoop << " %\n";
00935 cout << "\n";
00936 cout << " Total time for post-processing = ";
00937 cout.width(9);
00938 cout << timePostProce << " s\n";
00939 cout << endl;
00940 }
00941
00942 }
00943
00944