00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "LOBPCG.h"
00030
00031
00032 LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00033 const Epetra_Operator *PP, Projector *YHCP, int _blk,
00034 double _tol, int _maxIter, int _verb) :
00035 myVerify(_Comm),
00036 callBLAS(),
00037 callFortran(),
00038 modalTool(_Comm),
00039 mySort(),
00040 MyComm(_Comm),
00041 K(KK),
00042 M(0),
00043 Prec(PP),
00044 YHC(YHCP),
00045 MyWatch(_Comm),
00046 tolEigenSolve(_tol),
00047 maxIterEigenSolve(_maxIter),
00048 blockSize(_blk),
00049 normWeight(0),
00050 verbose(_verb),
00051 historyCount(0),
00052 resHistory(0),
00053 memRequested(0.0),
00054 highMem(0.0),
00055 massOp(0),
00056 numRestart(0),
00057 outerIter(0),
00058 precOp(0),
00059 residual(0),
00060 stifOp(0),
00061 timeLocalProj(0.0),
00062 timeLocalSolve(0.0),
00063 timeLocalUpdate(0.0),
00064 timeMassOp(0.0),
00065 timeNorm(0.0),
00066 timeOrtho(0.0),
00067 timeOuterLoop(0.0),
00068 timePostProce(0.0),
00069 timePrecOp(0.0),
00070 timeResidual(0.0),
00071 timeRestart(0.0),
00072 timeStifOp(0.0)
00073 {
00074
00075 }
00076
00077
00078 LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00079 const Epetra_Operator *MM, const Epetra_Operator *PP, Projector *YHCP, int _blk,
00080 double _tol, int _maxIter, int _verb,
00081 double *_weight) :
00082 myVerify(_Comm),
00083 callBLAS(),
00084 callFortran(),
00085 modalTool(_Comm),
00086 mySort(),
00087 MyComm(_Comm),
00088 K(KK),
00089 M(MM),
00090 Prec(PP),
00091 YHC(YHCP),
00092 MyWatch(_Comm),
00093 tolEigenSolve(_tol),
00094 maxIterEigenSolve(_maxIter),
00095 blockSize(_blk),
00096 normWeight(_weight),
00097 verbose(_verb),
00098 historyCount(0),
00099 resHistory(0),
00100 memRequested(0.0),
00101 highMem(0.0),
00102 massOp(0),
00103 numRestart(0),
00104 outerIter(0),
00105 precOp(0),
00106 residual(0),
00107 stifOp(0),
00108 timeLocalProj(0.0),
00109 timeLocalSolve(0.0),
00110 timeLocalUpdate(0.0),
00111 timeMassOp(0.0),
00112 timeNorm(0.0),
00113 timeOrtho(0.0),
00114 timeOuterLoop(0.0),
00115 timePostProce(0.0),
00116 timePrecOp(0.0),
00117 timeResidual(0.0),
00118 timeRestart(0.0),
00119 timeStifOp(0.0)
00120 {
00121
00122 }
00123
00124
00125 LOBPCG::~LOBPCG() {
00126
00127 if (resHistory) {
00128 delete[] resHistory;
00129 resHistory = 0;
00130 }
00131
00132 }
00133
00134
00135 int LOBPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186 if (numEigen <= 0) {
00187 return 0;
00188 }
00189
00190 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
00191 if (info < 0)
00192 return info;
00193
00194 int myPid = MyComm.MyPID();
00195
00196
00197 Epetra_Vector *vectWeight = 0;
00198 if (normWeight) {
00199 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
00200 }
00201
00202 int knownEV = 0;
00203 int localVerbose = verbose*(myPid==0);
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 int xr = Q.MyLength();
00221 Epetra_MultiVector X(View, Q, numEigen, blockSize);
00222
00223 int tmp;
00224 tmp = (M == 0) ? 6*blockSize*xr : 9*blockSize*xr;
00225
00226 double *work1 = new (nothrow) double[tmp];
00227 if (work1 == 0) {
00228 if (vectWeight)
00229 delete vectWeight;
00230 info = -30;
00231 return info;
00232 }
00233 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
00234
00235 highMem = (highMem > currentSize()) ? highMem : currentSize();
00236
00237 double *tmpD = work1;
00238
00239 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
00240 tmpD = tmpD + xr*blockSize;
00241
00242 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
00243 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
00244
00245 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
00246 tmpD = tmpD + xr*blockSize;
00247
00248 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
00249 tmpD = tmpD + xr*blockSize;
00250
00251 Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, blockSize);
00252 tmpD = tmpD + xr*blockSize;
00253
00254 Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, blockSize);
00255 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
00256
00257 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
00258 tmpD = tmpD + xr*blockSize;
00259
00260 Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
00261 tmpD = tmpD + xr*blockSize;
00262
00263 Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 int lwork2;
00276 lwork2 = 4*blockSize + 27*blockSize*blockSize;
00277 double *work2 = new (nothrow) double[lwork2];
00278 if (work2 == 0) {
00279 if (vectWeight)
00280 delete vectWeight;
00281 delete[] work1;
00282 info = -30;
00283 return info;
00284 }
00285
00286 highMem = (highMem > currentSize()) ? highMem : currentSize();
00287
00288 tmpD = work2;
00289
00290 double *theta = tmpD;
00291 tmpD = tmpD + 3*blockSize;
00292
00293 double *normR = tmpD;
00294 tmpD = tmpD + blockSize;
00295
00296 double *MM = tmpD;
00297 tmpD = tmpD + 9*blockSize*blockSize;
00298
00299 double *KK = tmpD;
00300 tmpD = tmpD + 9*blockSize*blockSize;
00301
00302 double *S = tmpD;
00303
00304 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
00305
00306
00307 if (localVerbose > 2) {
00308 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
00309 if (resHistory == 0) {
00310 if (vectWeight)
00311 delete vectWeight;
00312 delete[] work1;
00313 delete[] work2;
00314 info = -30;
00315 return info;
00316 }
00317 historyCount = 0;
00318 }
00319
00320
00321
00322 bool reStart = false;
00323 numRestart = 0;
00324
00325 int localSize = 0;
00326 int twoBlocks = 2*blockSize;
00327 int threeBlocks = 3*blockSize;
00328 int nFound = blockSize;
00329 int i, j;
00330
00331 if (localVerbose > 0) {
00332 cout << endl;
00333 cout << " *|* Problem: ";
00334 if (M)
00335 cout << "K*Q = M*Q D ";
00336 else
00337 cout << "K*Q = Q D ";
00338 if (Prec)
00339 cout << " with preconditioner";
00340 cout << endl;
00341 cout << " *|* Algorithm = LOBPCG" << endl;
00342 cout << " *|* Size of blocks = " << blockSize << endl;
00343 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
00344 cout.precision(2);
00345 cout.setf(ios::scientific, ios::floatfield);
00346 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
00347 cout << " *|* Norm used for convergence: ";
00348 if (normWeight){
00349 cout << "weighted L2-norm with user-provided weights" << endl;
00350 }
00351 else {
00352 cout << "L^2-norm" << endl;
00353 }
00354 cout << "\n -- Start iterations -- \n";
00355 }
00356
00357 timeOuterLoop -= MyWatch.WallTime();
00358 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
00359
00360 highMem = (highMem > currentSize()) ? highMem : currentSize();
00361
00362 if ((outerIter == 1) || (reStart == true)) {
00363
00364 reStart = false;
00365 localSize = blockSize;
00366
00367 if (nFound > 0) {
00368
00369 Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
00370 Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
00371 Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
00372
00373
00374 timeMassOp -= MyWatch.WallTime();
00375 if (M)
00376 M->Apply(X2, MX2);
00377 timeMassOp += MyWatch.WallTime();
00378 massOp += nFound;
00379
00380 if (knownEV > 0) {
00381
00382
00383 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
00384 timeOrtho -= MyWatch.WallTime();
00385 info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values(), 1.0);
00386 timeOrtho += MyWatch.WallTime();
00387
00388 if (info < 0) {
00389 info = -10;
00390 if (vectWeight)
00391 delete vectWeight;
00392 delete[] work1;
00393 delete[] work2;
00394 return info;
00395 }
00396 }
00397
00398
00399 timeStifOp -= MyWatch.WallTime();
00400 K->Apply(X2, KX2);
00401 timeStifOp += MyWatch.WallTime();
00402 stifOp += nFound;
00403
00404 }
00405
00406 }
00407 else {
00408
00409
00410 if (Prec) {
00411 timePrecOp -= MyWatch.WallTime();
00412 Prec->ApplyInverse(R, H);
00413 timePrecOp += MyWatch.WallTime();
00414 precOp += blockSize;
00415 }
00416 else {
00417 memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
00418 }
00419
00420
00421 timeMassOp -= MyWatch.WallTime();
00422 if (M)
00423 M->Apply(H, MH);
00424 timeMassOp += MyWatch.WallTime();
00425 massOp += blockSize;
00426
00427
00428 if (knownEV > 0) {
00429
00430
00431 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
00432 timeOrtho -= MyWatch.WallTime();
00433 modalTool.massOrthonormalize(H, MH, M, copyQ, blockSize, 1, R.Values());
00434 timeOrtho += MyWatch.WallTime();
00435 }
00436
00437
00438 if (YHC) {
00439 YHC->Apply(H, H);
00440 }
00441
00442
00443
00444
00445
00446 timeStifOp -= MyWatch.WallTime();
00447 K->Apply(H, KH);
00448 timeStifOp += MyWatch.WallTime();
00449 stifOp += blockSize;
00450
00451 if (localSize == blockSize)
00452 localSize += blockSize;
00453
00454 }
00455
00456
00457
00458
00459 timeLocalProj -= MyWatch.WallTime();
00460 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
00461 KK, localSize, S);
00462 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
00463 MM, localSize, S);
00464 if (localSize > blockSize) {
00465 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KH.Values(), xr,
00466 KK + blockSize*localSize, localSize, S);
00467 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KH.Values(), xr,
00468 KK + blockSize*localSize + blockSize, localSize, S);
00469 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MH.Values(), xr,
00470 MM + blockSize*localSize, localSize, S);
00471 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MH.Values(), xr,
00472 MM + blockSize*localSize + blockSize, localSize, S);
00473 if (localSize > twoBlocks) {
00474 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
00475 KK + twoBlocks*localSize, localSize, S);
00476 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KP.Values(), xr,
00477 KK + twoBlocks*localSize + blockSize, localSize, S);
00478 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
00479 KK + twoBlocks*localSize + twoBlocks, localSize, S);
00480 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
00481 MM + twoBlocks*localSize, localSize, S);
00482 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MP.Values(), xr,
00483 MM + twoBlocks*localSize + blockSize, localSize, S);
00484 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
00485 MM + twoBlocks*localSize + twoBlocks, localSize, S);
00486 }
00487 }
00488 timeLocalProj += MyWatch.WallTime();
00489
00490
00491 timeLocalSolve -= MyWatch.WallTime();
00492 int nevLocal = localSize;
00493 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
00494 S, localSize, theta, localVerbose,
00495 (blockSize == 1) ? 1 : 0);
00496 timeLocalSolve += MyWatch.WallTime();
00497
00498 if (info < 0) {
00499
00500 break;
00501 }
00502
00503
00504 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
00505 if (localVerbose > 0) {
00506 cout << " Iteration " << outerIter;
00507 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
00508 }
00509 if (blockSize == 1) {
00510 X.Random();
00511 nFound = blockSize;
00512 }
00513 else {
00514 Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
00515 Xinit.Random();
00516 nFound = blockSize - 1;
00517 }
00518 reStart = true;
00519 numRestart += 1;
00520 info = 0;
00521 continue;
00522 }
00523
00524 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
00525 for (j = 0; j < nevLocal; ++j)
00526 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
00527 localSize = blockSize;
00528 }
00529
00530 if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
00531 for (j = 0; j < nevLocal; ++j)
00532 memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*sizeof(double));
00533 localSize = twoBlocks;
00534 }
00535
00536
00537 timeResidual -= MyWatch.WallTime();
00538 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
00539 S, localSize, 0.0, R.Values(), xr);
00540 if (localSize >= twoBlocks) {
00541 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
00542 S + blockSize, localSize, 1.0, R.Values(), xr);
00543 if (localSize == threeBlocks) {
00544 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
00545 S + twoBlocks, localSize, 1.0, R.Values(), xr);
00546 }
00547 }
00548 for (j = 0; j < blockSize; ++j)
00549 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
00550 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
00551 S, localSize, 1.0, R.Values(), xr);
00552 if (localSize >= twoBlocks) {
00553 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MH.Values(), xr,
00554 S + blockSize, localSize, 1.0, R.Values(), xr);
00555 if (localSize == threeBlocks) {
00556 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
00557 S + twoBlocks, localSize, 1.0, R.Values(), xr);
00558 }
00559 }
00560 for (j = 0; j < blockSize; ++j)
00561 callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
00562 timeResidual += MyWatch.WallTime();
00563
00564
00565 timeNorm -= MyWatch.WallTime();
00566 if (vectWeight)
00567 R.NormWeighted(*vectWeight, normR);
00568 else
00569 R.Norm2(normR);
00570
00571
00572
00573 nFound = 0;
00574 for (j = 0; j < blockSize; ++j) {
00575 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
00576 if (normR[j] < tolEigenSolve)
00577 nFound += 1;
00578 }
00579 timeNorm += MyWatch.WallTime();
00580
00581
00582 if (localVerbose > 2) {
00583 memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
00584 historyCount += 1;
00585 }
00586
00587
00588 if (localVerbose > 0) {
00589 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
00590 cout << knownEV + nFound << endl;
00591 }
00592
00593 if (localVerbose > 1) {
00594 cout << endl;
00595 cout.precision(2);
00596 cout.setf(ios::scientific, ios::floatfield);
00597 for (i=0; i<blockSize; ++i) {
00598 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00599 cout << " = " << normR[i] << endl;
00600 }
00601 cout << endl;
00602 cout.precision(2);
00603 for (i=0; i<blockSize; ++i) {
00604 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
00605 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
00606 cout << " = " << theta[i] << endl;
00607 }
00608 cout << endl;
00609 }
00610
00611 if (nFound == 0) {
00612
00613
00614 timeLocalUpdate -= MyWatch.WallTime();
00615 memcpy(R.Values(), X.Values(), xr*blockSize*sizeof(double));
00616 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
00617 0.0, X.Values(), xr);
00618 memcpy(R.Values(), KX.Values(), xr*blockSize*sizeof(double));
00619 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
00620 0.0, KX.Values(), xr);
00621 if (M) {
00622 memcpy(R.Values(), MX.Values(), xr*blockSize*sizeof(double));
00623 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
00624 0.0, MX.Values(), xr);
00625 }
00626 if (localSize == twoBlocks) {
00627 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
00628 S+blockSize, localSize, 0.0, P.Values(), xr);
00629 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
00630 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
00631 S+blockSize, localSize, 0.0, KP.Values(), xr);
00632 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
00633 if (M) {
00634 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
00635 S+blockSize, localSize, 0.0, MP.Values(), xr);
00636 callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
00637 }
00638 }
00639 if (localSize == threeBlocks) {
00640 memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
00641 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
00642 S + twoBlocks, localSize, 0.0, P.Values(), xr);
00643 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
00644 S + blockSize, localSize, 1.0, P.Values(), xr);
00645 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
00646 memcpy(R.Values(), KP.Values(), xr*blockSize*sizeof(double));
00647 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
00648 S + twoBlocks, localSize, 0.0, KP.Values(), xr);
00649 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
00650 S + blockSize, localSize, 1.0, KP.Values(), xr);
00651 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
00652 if (M) {
00653 memcpy(R.Values(), MP.Values(), xr*blockSize*sizeof(double));
00654 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
00655 S + twoBlocks, localSize, 0.0, MP.Values(), xr);
00656 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
00657 S + blockSize, localSize, 1.0, MP.Values(), xr);
00658 callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
00659 }
00660 }
00661 timeLocalUpdate += MyWatch.WallTime();
00662
00663 timeResidual -= MyWatch.WallTime();
00664 memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
00665 for (j = 0; j < blockSize; ++j)
00666 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
00667 timeResidual += MyWatch.WallTime();
00668
00669 if (verbose > 2) {
00670 if (knownEV == 0) {
00671 accuracyCheck(&X, &MX, &R, 0, 0, 0);
00672 }
00673 else {
00674 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
00675 accuracyCheck(&X, &MX, &R, ©Q, (localSize>blockSize) ? &H : 0,
00676 (localSize>twoBlocks) ? &P : 0);
00677 }
00678 }
00679 if (localSize < threeBlocks)
00680 localSize += blockSize;
00681 continue;
00682 }
00683
00684
00685 int firstIndex = blockSize;
00686 for (j = 0; j < blockSize; ++j) {
00687 if (normR[j] >= tolEigenSolve) {
00688 firstIndex = j;
00689 break;
00690 }
00691 }
00692 while (firstIndex < nFound) {
00693 for (j = firstIndex; j < blockSize; ++j) {
00694 if (normR[j] < tolEigenSolve) {
00695
00696 callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
00697 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
00698 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
00699 break;
00700 }
00701 }
00702 for (j = 0; j < blockSize; ++j) {
00703 if (normR[j] >= tolEigenSolve) {
00704 firstIndex = j;
00705 break;
00706 }
00707 }
00708 }
00709
00710
00711 memcpy(lambda + knownEV, theta, nFound*sizeof(double));
00712
00713
00714 if (knownEV + nFound >= numEigen) {
00715 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
00716 S, localSize, 0.0, R.Values(), xr);
00717 if (localSize >= twoBlocks) {
00718 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
00719 S + blockSize, localSize, 1.0, R.Values(), xr);
00720 if (localSize == threeBlocks) {
00721 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
00722 S + twoBlocks, localSize, 1.0, R.Values(), xr);
00723 }
00724 }
00725 memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
00726 knownEV += nFound;
00727 if (localVerbose == 1) {
00728 cout << endl;
00729 cout.precision(2);
00730 cout.setf(ios::scientific, ios::floatfield);
00731 for (i=0; i<blockSize; ++i) {
00732 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
00733 cout << " = " << normR[i] << endl;
00734 }
00735 cout << endl;
00736 }
00737 break;
00738 }
00739
00740
00741 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
00742 S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
00743 if (localSize >= twoBlocks) {
00744 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
00745 S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
00746 if (localSize >= threeBlocks)
00747 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
00748 S + twoBlocks, localSize, 1.0, Q.Values() + knownEV*xr, xr);
00749 }
00750 knownEV += nFound;
00751
00752
00753 timeRestart -= MyWatch.WallTime();
00754 int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
00755 double *Snew = S + nFound*localSize;
00756 memcpy(R.Values(), X.Values(), blockSize*xr*sizeof(double));
00757 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
00758 Snew, localSize, 0.0, X.Values(), xr);
00759 memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
00760 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
00761 Snew, localSize, 0.0, KX.Values(), xr);
00762 if (M) {
00763 memcpy(R.Values(), MX.Values(), blockSize*xr*sizeof(double));
00764 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
00765 Snew, localSize, 0.0, MX.Values(), xr);
00766 }
00767 if (localSize >= twoBlocks) {
00768 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
00769 Snew+blockSize, localSize, 1.0, X.Values(), xr);
00770 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KH.Values(), xr,
00771 Snew+blockSize, localSize, 1.0, KX.Values(), xr);
00772 if (M) {
00773 callBLAS.GEMM('N','N', xr, leftOver, blockSize, 1.0, MH.Values(), xr,
00774 Snew+blockSize, localSize, 1.0, MX.Values(), xr);
00775 }
00776 if (localSize >= threeBlocks) {
00777 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
00778 Snew+twoBlocks, localSize, 1.0, X.Values(), xr);
00779 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
00780 Snew+twoBlocks, localSize, 1.0, KX.Values(), xr);
00781 if (M) {
00782 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
00783 Snew+twoBlocks, localSize, 1.0, MX.Values(), xr);
00784 }
00785 }
00786 }
00787 if (nevLocal < blockSize + nFound) {
00788
00789 Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
00790 Xtmp.Random();
00791 }
00792 else {
00793 nFound = 0;
00794 }
00795 reStart = true;
00796 timeRestart += MyWatch.WallTime();
00797
00798 }
00799 timeOuterLoop += MyWatch.WallTime();
00800 highMem = (highMem > currentSize()) ? highMem : currentSize();
00801
00802
00803 delete[] work1;
00804 delete[] work2;
00805 if (vectWeight)
00806 delete vectWeight;
00807
00808
00809 timePostProce -= MyWatch.WallTime();
00810 if ((info == 0) && (knownEV > 0)) {
00811 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
00812 }
00813 timePostProce += MyWatch.WallTime();
00814
00815 return (info == 0) ? knownEV : info;
00816
00817 }
00818
00819
00820 void LOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
00821 const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
00822 const Epetra_MultiVector *H, const Epetra_MultiVector *P) const {
00823
00824 cout.precision(2);
00825 cout.setf(ios::scientific, ios::floatfield);
00826 double tmp;
00827
00828 int myPid = MyComm.MyPID();
00829
00830 if (X) {
00831 if (M) {
00832 if (MX) {
00833 tmp = myVerify.errorEquality(X, MX, M);
00834 if (myPid == 0)
00835 cout << " >> Difference between MX and M*X = " << tmp << endl;
00836 }
00837 tmp = myVerify.errorOrthonormality(X, M);
00838 if (myPid == 0)
00839 cout << " >> Error in X^T M X - I = " << tmp << endl;
00840 }
00841 else {
00842 tmp = myVerify.errorOrthonormality(X, 0);
00843 if (myPid == 0)
00844 cout << " >> Error in X^T X - I = " << tmp << endl;
00845 }
00846 }
00847
00848 if ((R) && (X)) {
00849 tmp = myVerify.errorOrthogonality(X, R);
00850 if (myPid == 0)
00851 cout << " >> Orthogonality X^T R up to " << tmp << endl;
00852 }
00853
00854 if (Q == 0)
00855 return;
00856
00857 if (M) {
00858 tmp = myVerify.errorOrthonormality(Q, M);
00859 if (myPid == 0)
00860 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
00861 if (X) {
00862 tmp = myVerify.errorOrthogonality(Q, X, M);
00863 if (myPid == 0)
00864 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
00865 }
00866 if (H) {
00867 tmp = myVerify.errorOrthogonality(Q, H, M);
00868 if (myPid == 0)
00869 cout << " >> Orthogonality Q^T M H up to " << tmp << endl;
00870 }
00871 if (P) {
00872 tmp = myVerify.errorOrthogonality(Q, P, M);
00873 if (myPid == 0)
00874 cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
00875 }
00876 }
00877 else {
00878 tmp = myVerify.errorOrthonormality(Q, 0);
00879 if (myPid == 0)
00880 cout << " >> Error in Q^T Q - I = " << tmp << endl;
00881 if (X) {
00882 tmp = myVerify.errorOrthogonality(Q, X, 0);
00883 if (myPid == 0)
00884 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
00885 }
00886 if (H) {
00887 tmp = myVerify.errorOrthogonality(Q, H, 0);
00888 if (myPid == 0)
00889 cout << " >> Orthogonality Q^T H up to " << tmp << endl;
00890 }
00891 if (P) {
00892 tmp = myVerify.errorOrthogonality(Q, P, 0);
00893 if (myPid == 0)
00894 cout << " >> Orthogonality Q^T P up to " << tmp << endl;
00895 }
00896 }
00897
00898 }
00899
00900
00901 void LOBPCG::initializeCounters() {
00902
00903 historyCount = 0;
00904 if (resHistory) {
00905 delete[] resHistory;
00906 resHistory = 0;
00907 }
00908
00909 memRequested = 0.0;
00910 highMem = 0.0;
00911
00912 massOp = 0;
00913 numRestart = 0;
00914 outerIter = 0;
00915 precOp = 0;
00916 residual = 0;
00917 stifOp = 0;
00918
00919 timeLocalProj = 0.0;
00920 timeLocalSolve = 0.0;
00921 timeLocalUpdate = 0.0;
00922 timeMassOp = 0.0;
00923 timeNorm = 0.0;
00924 timeOrtho = 0.0;
00925 timeOuterLoop = 0.0;
00926 timePostProce = 0.0;
00927 timePrecOp = 0.0;
00928 timeResidual = 0.0;
00929 timeRestart = 0.0;
00930 timeStifOp = 0.0;
00931
00932
00933 }
00934
00935
00936 void LOBPCG::algorithmInfo() const {
00937
00938 int myPid = MyComm.MyPID();
00939
00940 if (myPid ==0) {
00941 cout << " Algorithm: LOBPCG (block version) with Cholesky-based local eigensolver\n";
00942 cout << " Block Size: " << blockSize << endl;
00943 }
00944
00945 }
00946
00947
00948 void LOBPCG::historyInfo() const {
00949
00950 if (resHistory) {
00951 int j;
00952 cout << " Block Size: " << blockSize << endl;
00953 cout << endl;
00954 cout << " Residuals\n";
00955 cout << endl;
00956 cout.precision(4);
00957 cout.setf(ios::scientific, ios::floatfield);
00958 for (j = 0; j < historyCount; ++j) {
00959 int ii;
00960 for (ii = 0; ii < blockSize; ++ii)
00961 cout << resHistory[blockSize*j + ii] << endl;
00962 }
00963 cout << endl;
00964 }
00965
00966 }
00967
00968
00969 void LOBPCG::memoryInfo() const {
00970
00971 int myPid = MyComm.MyPID();
00972
00973 double maxHighMem = 0.0;
00974 double tmp = highMem;
00975 MyComm.MaxAll(&tmp, &maxHighMem, 1);
00976
00977 double maxMemRequested = 0.0;
00978 tmp = memRequested;
00979 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
00980
00981 if (myPid == 0) {
00982 cout.precision(2);
00983 cout.setf(ios::fixed, ios::floatfield);
00984 cout << " Memory requested per processor by the eigensolver = (EST) ";
00985 cout.width(6);
00986 cout << maxMemRequested << " MB " << endl;
00987 cout << endl;
00988 cout << " High water mark in eigensolver = (EST) ";
00989 cout.width(6);
00990 cout << maxHighMem << " MB " << endl;
00991 cout << endl;
00992 }
00993
00994 }
00995
00996
00997 void LOBPCG::operationInfo() const {
00998
00999 int myPid = MyComm.MyPID();
01000
01001 if (myPid == 0) {
01002 cout << " --- Operations ---\n\n";
01003 cout << " Total number of mass matrix multiplications = ";
01004 cout.width(9);
01005 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
01006 cout << " Mass multiplications in solver loop = ";
01007 cout.width(9);
01008 cout << massOp << endl;
01009 cout << " Mass multiplications for orthogonalization = ";
01010 cout.width(9);
01011 cout << modalTool.getNumProj_MassMult() << endl;
01012 cout << " Mass multiplications for normalization = ";
01013 cout.width(9);
01014 cout << modalTool.getNumNorm_MassMult() << endl;
01015 cout << " Total number of stiffness matrix operations = ";
01016 cout.width(9);
01017 cout << stifOp << endl;
01018 cout << " Total number of preconditioner applications = ";
01019 cout.width(9);
01020 cout << precOp << endl;
01021 cout << " Total number of computed eigen-residuals = ";
01022 cout.width(9);
01023 cout << residual << endl;
01024 cout << "\n";
01025 cout << " Total number of outer iterations = ";
01026 cout.width(9);
01027 cout << outerIter << endl;
01028 cout << " Number of restarts = ";
01029 cout.width(9);
01030 cout << numRestart << endl;
01031 cout << "\n";
01032 }
01033
01034 }
01035
01036
01037 void LOBPCG::timeInfo() const {
01038
01039 int myPid = MyComm.MyPID();
01040
01041 if (myPid == 0) {
01042 cout << " --- Timings ---\n\n";
01043 cout.setf(ios::fixed, ios::floatfield);
01044 cout.precision(2);
01045 cout << " Total time for outer loop = ";
01046 cout.width(9);
01047 cout << timeOuterLoop << " s" << endl;
01048 cout << " Time for mass matrix operations = ";
01049 cout.width(9);
01050 cout << timeMassOp << " s ";
01051 cout.width(5);
01052 cout << 100*timeMassOp/timeOuterLoop << " %\n";
01053 cout << " Time for stiffness matrix operations = ";
01054 cout.width(9);
01055 cout << timeStifOp << " s ";
01056 cout.width(5);
01057 cout << 100*timeStifOp/timeOuterLoop << " %\n";
01058 cout << " Time for preconditioner applications = ";
01059 cout.width(9);
01060 cout << timePrecOp << " s ";
01061 cout.width(5);
01062 cout << 100*timePrecOp/timeOuterLoop << " %\n";
01063 cout << " Time for orthogonalizations = ";
01064 cout.width(9);
01065 cout << timeOrtho << " s ";
01066 cout.width(5);
01067 cout << 100*timeOrtho/timeOuterLoop << " %\n";
01068 cout << " Projection step : ";
01069 cout.width(9);
01070 cout << modalTool.getTimeProj() << " s\n";
01071 cout << " Q^T mult. :: ";
01072 cout.width(9);
01073 cout << modalTool.getTimeProj_QtMult() << " s\n";
01074 cout << " Q mult. :: ";
01075 cout.width(9);
01076 cout << modalTool.getTimeProj_QMult() << " s\n";
01077 cout << " Mass mult. :: ";
01078 cout.width(9);
01079 cout << modalTool.getTimeProj_MassMult() << " s\n";
01080 cout << " Normalization step : ";
01081 cout.width(9);
01082 cout << modalTool.getTimeNorm() << " s\n";
01083 cout << " Mass mult. :: ";
01084 cout.width(9);
01085 cout << modalTool.getTimeNorm_MassMult() << " s\n";
01086 cout << " Time for local projection = ";
01087 cout.width(9);
01088 cout << timeLocalProj << " s ";
01089 cout.width(5);
01090 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
01091 cout << " Time for local eigensolve = ";
01092 cout.width(9);
01093 cout << timeLocalSolve << " s ";
01094 cout.width(5);
01095 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
01096 cout << " Time for local update = ";
01097 cout.width(9);
01098 cout << timeLocalUpdate << " s ";
01099 cout.width(5);
01100 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
01101 cout << " Time for residual computations = ";
01102 cout.width(9);
01103 cout << timeResidual << " s ";
01104 cout.width(5);
01105 cout << 100*timeResidual/timeOuterLoop << " %\n";
01106 cout << " Time for residuals norm computations = ";
01107 cout.width(9);
01108 cout << timeNorm << " s ";
01109 cout.width(5);
01110 cout << 100*timeNorm/timeOuterLoop << " %\n";
01111 cout << " Time for restarting space definition = ";
01112 cout.width(9);
01113 cout << timeRestart << " s ";
01114 cout.width(5);
01115 cout << 100*timeRestart/timeOuterLoop << " %\n";
01116 cout << "\n";
01117 cout << " Total time for post-processing = ";
01118 cout.width(9);
01119 cout << timePostProce << " s\n";
01120 cout << endl;
01121 }
01122
01123 }
01124
01125