00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include <iomanip>
00051 #include "rlog/rlog.h"
00052 #include "Teuchos_ParameterList.hpp"
00053 #include "JDBSYM.h"
00054
00055 JDBSYM::JDBSYM(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00056 const Epetra_Operator *MM, const Epetra_Operator *PP,
00057 const Epetra_Operator *YHCP,
00058 Teuchos::ParameterList& params) :
00059 myVerify(_Comm),
00060 callBLAS(),
00061 callLAPACK(),
00062 ortho(),
00063 modalTool(_Comm),
00064 mySort(),
00065 MyComm(_Comm),
00066 K(KK),
00067 M(MM),
00068 Prec(PP),
00069 MyWatch(_Comm),
00070 YHC(YHCP),
00071 linSolver(0),
00072 projK(0),
00073 projP(0),
00074 historyCount(0),
00075 resHistory(0),
00076 maxSpaceSize(0),
00077 sumSpaceSize(0),
00078 spaceSizeHistory(0),
00079 maxIterPCG(0),
00080 sumIterPCG(0),
00081 iterPCGHistory(0),
00082 memRequested(0.0),
00083 highMem(0.0),
00084 numCorrectionSolve(0),
00085 numOrtho(0),
00086 outerIter(0),
00087 precOp(0),
00088 residual(0),
00089 stifOp(0),
00090 timeBuildG(0.0),
00091 timeBuildH(0.0),
00092 timeCorrectionRHS(0.0),
00093 timeCorrectionSolve(0.0),
00094 timeLocalProj(0.0),
00095 timeLocalSolve(0.0),
00096 timeLocalUpdate(0.0),
00097 timeNorm(0.0),
00098 timeOrtho(0.0),
00099 timeOuterLoop(0.0),
00100 timePostProce(0.0),
00101 timePrecOp(0.0),
00102 timeResidual(0.0),
00103 timeRestart(0.0),
00104 timeStifOp(0.0),
00105 params_(params)
00106 {}
00107
00108 JDBSYM::~JDBSYM() {
00109 delete[] resHistory;
00110 resHistory = 0;
00111 delete[] spaceSizeHistory;
00112 spaceSizeHistory = 0;
00113 delete[] iterPCGHistory;
00114 iterPCGHistory = 0;
00115 delete linSolver;
00116 linSolver = 0;
00117 delete projK;
00118 projK = 0;
00119 delete projP;
00120 projP = 0;
00121 }
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131 int JDBSYM::jdbsym(double tau, double jdtol,
00132 int kmax, int jmax, int jmin, int blockSize, int blkwise,
00133 int V0dim, double *V0,
00134 opType _optype, double eps_tr, double toldecay,
00135 int strategy, int max_it, int max_inner_it,
00136 int clvl,
00137 int &k_conv, double *Q, double *lambda, int &it) {
00138
00139
00140
00141
00142
00143
00144
00145 highMem = (highMem > currentSize()) ? highMem : currentSize();
00146
00147 double* normWeight = 0;
00148
00149
00150
00151 double *V = 0, *Vtmp = 0, *U = 0, *Qb = 0, *Y = 0;
00152 double *s = 0, *Res = 0, *resnrm = 0;
00153 double *matK = 0, *H = 0, *Hlu = 0, *G = 0, *Glu = 0;
00154 double *temp1 = 0, *temp3 = 0;
00155
00156 int *Hpiv = 0, *Gpiv = 0, *idx1 = 0, *idx2 = 0;
00157 int *convind = 0, *keepind = 0, *solvestep = 0;
00158 int *actcorrits = 0;
00159
00160
00161 double *matdummy;
00162
00163
00164 double alpha, it_tol;
00165
00166 int k, j, actblockSize, found, conv, keep, Gisused;
00167 int Misempty, Pisempty, act, cnt, idummy, info;
00168
00169
00170 Epetra_Map MyMap = K->OperatorDomainMap();
00171
00172 int nGlobal = MyMap.NumGlobalElements();
00173 int nLocal = MyMap.NumMyElements();
00174
00175 int myPid = MyComm.MyPID();
00176
00177
00178
00179
00180
00181
00182
00183
00184 Misempty = (M == 0);
00185 Pisempty = (Prec == 0);
00186
00187
00188 if (clvl > 0) {
00189 ostringstream buf;
00190 buf << "JDBSYM parameters:" << endl;
00191 buf << "\tSolving ";
00192 if (Misempty)
00193 buf << "K*x = lambda*x ";
00194 else
00195 buf << "K*x = lambda*M*x ";
00196 if (Pisempty)
00197 buf << "without ";
00198 else
00199 buf << "with ";
00200 buf << "preconditioning" << endl;
00201 buf << "\tN=";
00202 buf.width(10);
00203 buf << nGlobal << " ITMAX=";
00204 buf.width(4);
00205 buf << max_it << endl;
00206 buf << "\tKMAX=";
00207 buf.width(3);
00208 buf << kmax << " JMIN=";
00209 buf.width(3);
00210 buf << jmin << " JMAX=";
00211 buf.width(3);
00212 buf << jmax << " V0DIM=";
00213 buf.width(3);
00214 buf << V0dim << endl;
00215 buf << "\tBLKSIZE=";
00216 buf.width(5);
00217 buf << blockSize << " BLKWISE=";
00218 if (blkwise)
00219 buf << "TRUE\n";
00220 else
00221 buf << "FALSE\n";
00222 buf << "\tTAU=";
00223 buf.precision(4);
00224 buf.setf(ios::scientific, ios::floatfield);
00225 buf << tau << " JDTOL= " << jdtol << " STRATEGY= " << strategy;
00226 buf << " LINSOLVER= QMRS OPTYPE=";
00227 switch (_optype) {
00228 case OP_UNSYM1:
00229 buf << "UNSYM1" << endl;
00230 break;
00231 case OP_UNSYM2:
00232 buf << "UNSYM2" << endl;
00233 break;
00234 case OP_SYM:
00235 buf << "SYM" << endl;
00236 break;
00237 }
00238 buf << "\tLINITMAX=";
00239 buf.width(5);
00240 buf << max_inner_it << " EPS_TR=" << eps_tr << " TOLDECAY=" << toldecay;
00241 rInfo(buf.str().c_str());
00242 }
00243
00244
00245 assert(0 < jdtol);
00246 assert(0 < kmax && kmax <= nGlobal);
00247 assert(0 < jmax && jmax <= nGlobal);
00248 assert(0 < jmin && jmin < jmax);
00249 assert(0 <= max_it);
00250 assert(blockSize <= jmin);
00251 assert(blockSize <= jmax - jmin);
00252 assert(0 < blockSize && blockSize <= kmax);
00253 assert(blkwise == 0 || blkwise == 1);
00254 assert(0 <= V0dim && V0dim < jmax);
00255 assert(_optype == OP_UNSYM1 || _optype == OP_UNSYM2 || _optype == OP_SYM);
00256 assert(0 <= max_inner_it);
00257 assert(0.0 <= eps_tr);
00258 assert(1.0 < toldecay);
00259
00260
00261 V = new (nothrow) double[nLocal*jmax];
00262 U = new (nothrow) double[jmax*jmax];
00263 s = new (nothrow) double[jmax];
00264 Res = new (nothrow) double[nLocal*blockSize];
00265 resnrm = new (nothrow) double[blockSize];
00266 matK = new (nothrow) double[jmax*jmax];
00267 Vtmp = new (nothrow) double[jmax*jmax];
00268 assert(V && U && s && Res && resnrm && matK && Vtmp);
00269 memRequested += sizeof(double)*(nLocal*jmax + 3*jmax*jmax + jmax + nLocal*blockSize +
00270 blockSize)/(1024.0*1024.0);
00271
00272
00273 Epetra_MultiVector *VV = new Epetra_MultiVector(View, MyMap, V, nLocal, jmax);
00274 Epetra_MultiVector *QQ = new Epetra_MultiVector(View, MyMap, Q, nLocal, kmax);
00275 Epetra_MultiVector *RR = new Epetra_MultiVector(View, MyMap, Res, nLocal, blockSize);
00276
00277
00278 Epetra_MultiVector *YY = 0;
00279 if (!Pisempty) {
00280 H = new (nothrow) double[kmax*kmax];
00281 Hlu = new (nothrow) double[kmax*kmax];
00282 Hpiv = new (nothrow) int[kmax];
00283 Y = new (nothrow) double[nLocal*kmax];
00284 assert(H && Hlu && Hpiv && Y);
00285 memRequested += sizeof(double)*(nLocal*kmax + 2*kmax*kmax)/(1024.0*1024.0);
00286 memRequested += sizeof(int)*(kmax)/(1024.0*1024.0);
00287 YY = new Epetra_MultiVector(View, MyMap, Y, nLocal, kmax);
00288 }
00289
00290 Gisused = (_optype == OP_UNSYM2 || _optype == OP_SYM) && (!Misempty);
00291 if (Gisused) {
00292 G = new (nothrow) double[kmax*kmax];
00293 Glu = new (nothrow) double[kmax*kmax];
00294 Gpiv = new (nothrow) int[kmax];
00295 assert(G && Glu && Gpiv);
00296 memRequested += sizeof(double)*(2*kmax*kmax)/(1024.0*1024.0);
00297 memRequested += sizeof(int)*(kmax)/(1024.0*1024.0);
00298 }
00299
00300
00301 Epetra_MultiVector *QbQb = 0;
00302 if (!Misempty) {
00303 Qb = new (nothrow) double[nLocal*kmax];
00304 assert(Qb != 0);
00305 memRequested += sizeof(double)*(nLocal*kmax)/(1024.0*1024.0);
00306 QbQb = new Epetra_MultiVector(View, MyMap, Qb, nLocal, kmax);
00307 }
00308
00309 idx1 = new (nothrow) int[jmax];
00310 idx2 = new (nothrow) int[jmax];
00311 assert(idx1 && idx2);
00312 memRequested += sizeof(int)*(2*jmax)/(1024.0*1024.0);
00313
00314
00315 convind = new (nothrow) int[blockSize];
00316 keepind = new (nothrow) int[blockSize];
00317 solvestep = new (nothrow) int[blockSize];
00318 actcorrits = new (nothrow) int[blockSize];
00319 assert(convind && keepind && solvestep && actcorrits);
00320 memRequested += sizeof(int)*(4*blockSize)/(1024.0*1024.0);
00321
00322
00323 int ltemp1 = 8*nLocal;
00324 temp1 = new (nothrow) double[ltemp1];
00325
00326
00327 int ltemp3 = (jmax > kmax) ? jmax : kmax;
00328 temp3 = new (nothrow) double[ltemp3];
00329
00330 assert(temp1 && temp3);
00331 memRequested += sizeof(double)*(nLocal+jmax)/(1024.0*1024.0);
00332
00333
00334 Epetra_MultiVector *copyQ = 0;
00335 Epetra_MultiVector *copyQb = 0;
00336 Epetra_MultiVector *copyY = 0;
00337
00338 if (projK)
00339 delete projK;
00340 projK = new Projectors(MyComm, K, M, Prec, _optype, 0, kmax, Hpiv, Gpiv, 0.0, Hlu, Glu,
00341 copyQ, copyQb, copyY, temp1, temp1 + nLocal);
00342
00343 if (projP)
00344 delete projP;
00345 projP = new Projectors(MyComm, K, M, Prec, _optype, 0, kmax, Hpiv, Gpiv, 0.0, Hlu, Glu,
00346 copyQ, copyQb, copyY, temp1, temp1 + nLocal);
00347
00348
00349 if (linSolver)
00350 delete linSolver;
00351 linSolver = new LinSolvers(projK, projP, temp1 + 2*nLocal);
00352
00353
00354 Epetra_Vector *vectWeight = 0;
00355 if (normWeight) {
00356 vectWeight = new Epetra_Vector(View, MyMap, normWeight);
00357 }
00358
00359
00360
00361
00362
00363
00364
00365
00366 highMem = (highMem > currentSize()) ? highMem : currentSize();
00367 timeOuterLoop -= MyWatch.WallTime();
00368
00369
00370 memcpy(V, V0, nLocal*V0dim*sizeof(double));
00371 j = V0dim;
00372
00373
00374 if (V0dim < blockSize) {
00375 Epetra_MultiVector Vinit(View, *VV, V0dim, blockSize - V0dim);
00376 Vinit.Random();
00377 j = blockSize;
00378 }
00379
00380
00381
00382 if (YHC) {
00383 Epetra_MultiVector Vinit(View, *VV, 0, blockSize);
00384 YHC->Apply(Vinit, Vinit);
00385 }
00386
00387
00388 timeOrtho -= MyWatch.WallTime();
00389 if (Misempty) {
00390 for (cnt = 0; cnt < j; cnt ++) {
00391 Epetra_Vector Vcnt(View, *VV, cnt);
00392 if (cnt > 0) {
00393 Epetra_MultiVector Vprevious(View, *VV, 0, cnt);
00394 ortho.ModifiedGS(&Vcnt, &Vprevious);
00395 }
00396 Vcnt.Norm2(&alpha);
00397 Vcnt.Scale(1.0/alpha);
00398 }
00399 }
00400 else {
00401 for (cnt = 0; cnt < j; cnt ++) {
00402 Epetra_Vector Vcnt(View, *VV, cnt);
00403 if (cnt == 0) {
00404 ortho.IteratedClassicalGS(&Vcnt, &alpha, 0, temp1, M);
00405 } else {
00406 Epetra_MultiVector Vprevious(View, *VV, 0, cnt);
00407 ortho.IteratedClassicalGS(&Vcnt, &alpha, &Vprevious, temp1, M);
00408 }
00409 alpha = 1.0/alpha;
00410 assert(alpha > 0.0);
00411 Vcnt.Scale(alpha);
00412 }
00413 }
00414 timeOrtho += MyWatch.WallTime();
00415 numOrtho += j;
00416
00417
00418
00419
00420 for (cnt = 0; cnt < j; cnt += blockSize) {
00421 int numVec = (cnt + blockSize < j) ? blockSize : j - cnt;
00422 Epetra_MultiVector Vcnt(View, *VV, cnt, numVec);
00423 timeStifOp -= MyWatch.WallTime();
00424 K->Apply(Vcnt, *RR);
00425 timeStifOp += MyWatch.WallTime();
00426 stifOp += numVec;
00427 timeLocalProj -= MyWatch.WallTime();
00428 callBLAS.GEMM('T', 'N', j, numVec, nLocal, 1.0, V, nLocal, Res, nLocal, 0.0, Vtmp, j);
00429 int iC;
00430 for (iC = 0; iC < numVec; ++iC) {
00431 MyComm.SumAll(Vtmp + iC*j, matK + (cnt+iC)*jmax, j);
00432 }
00433 timeLocalProj += MyWatch.WallTime();
00434 }
00435
00436
00437 k = 0; it = 0; actblockSize = blockSize;
00438 for(act = 0; act < blockSize; act ++)
00439 solvestep[act] = 1;
00440
00441
00442
00443
00444
00445
00446
00447 while (it < max_it) {
00448
00449 highMem = (highMem > currentSize()) ? highMem : currentSize();
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460 timeLocalSolve -= MyWatch.WallTime();
00461 info = modalTool.directSolver(j, matK, jmax, 0, 0, j, U, jmax, s, 0, 10);
00462 timeLocalSolve += MyWatch.WallTime();
00463
00464 if (info != 0)
00465 rError("Error solving the projected eigenproblem. dsyev: info = %d", info);
00466 rAssert(info == 0);
00467
00468
00469 sorteig(j, s, U, jmax, tau, temp3, idx1, idx2, strategy);
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484 found = 1;
00485 while(found) {
00486
00487
00488 conv = 0; keep = 0;
00489
00490 timeLocalUpdate -= MyWatch.WallTime();
00491 callBLAS.GEMM('N', 'N', nLocal, actblockSize, j, 1.0, V, nLocal, U, jmax,
00492 0.0, Q+k*nLocal, nLocal);
00493 timeLocalUpdate += MyWatch.WallTime();
00494
00495
00496 timeResidual -= MyWatch.WallTime();
00497 if (Misempty==1) {
00498
00499 Epetra_MultiVector QVec(View, *QQ, k, actblockSize);
00500 K->Apply(QVec, *RR);
00501 for (act = 0; act < actblockSize; ++act)
00502 callBLAS.AXPY(nLocal, -s[act], QVec.Values() + act*nLocal, Res + act*nLocal);
00503 } else {
00504
00505 Epetra_MultiVector QVec(View, *QQ, k, actblockSize);
00506 Epetra_MultiVector QbVec(View, *QbQb, k, actblockSize);
00507 M->Apply(QVec, QbVec);
00508 K->Apply(QVec, *RR);
00509 for (act = 0; act < actblockSize; ++act)
00510 callBLAS.AXPY(nLocal, -s[act], QbVec.Values() + act*nLocal, Res + act*nLocal);
00511 }
00512 timeResidual += MyWatch.WallTime();
00513 residual += actblockSize;
00514
00515
00516 timeBuildG -= MyWatch.WallTime();
00517 if (Gisused) {
00518
00519
00520 callBLAS.GEMM('T', 'N', k+actblockSize, actblockSize, nLocal, 1.0, Q, nLocal,
00521 Q+k*nLocal, nLocal, 0.0, Glu, k+actblockSize);
00522 for (act = 0; act < actblockSize; ++act) {
00523 int idummy = k + act;
00524 MyComm.SumAll(Glu + act*(k+actblockSize), G + idummy*kmax, idummy+1);
00525 int iC;
00526 for (iC = 0; iC < idummy; ++iC)
00527 G[k + act + iC*kmax] = G[iC + idummy*kmax];
00528 }
00529 }
00530 timeBuildG += MyWatch.WallTime();
00531
00532
00533 if (!Pisempty) {
00534 if (!Misempty) {
00535
00536 matdummy = Qb;
00537 } else {
00538
00539 matdummy = Q;
00540 }
00541
00542 timePrecOp -= MyWatch.WallTime();
00543 if (_optype == OP_UNSYM1) {
00544 Epetra_MultiVector QVec(View, *QQ, k, actblockSize);
00545 Epetra_MultiVector YVec(View, *QQ, k, actblockSize);
00546 Prec->ApplyInverse(QVec, YVec);
00547 } else {
00548 Epetra_MultiVector DVec(View, MyMap, matdummy + k*nLocal, nLocal, actblockSize);
00549 Epetra_MultiVector YVec(View, MyMap, Y + k*nLocal, nLocal, actblockSize);
00550 Prec->ApplyInverse(DVec, YVec);
00551 }
00552 timePrecOp += MyWatch.WallTime();
00553 precOp += actblockSize;
00554
00555 timeBuildH -= MyWatch.WallTime();
00556
00557
00558 callBLAS.GEMM('T', 'N', k+actblockSize, actblockSize, nLocal, 1.0, matdummy, nLocal,
00559 Y+k*nLocal, nLocal, 0.0, Hlu, k+actblockSize);
00560 for (act = 0; act < actblockSize; ++act) {
00561 int idummy = k + act;
00562 MyComm.SumAll(Hlu + act*(k+actblockSize), H + idummy*kmax, idummy+1);
00563 }
00564 if (kmax > 1) {
00565
00566
00567 callBLAS.GEMM('T', 'N', k+actblockSize, actblockSize, nLocal, 1.0, Y, nLocal,
00568 matdummy+k*nLocal, nLocal, 0.0, Hlu, k+actblockSize);
00569 for (act = 0; act < actblockSize; ++act) {
00570 int idummy = k + act;
00571 MyComm.SumAll(Hlu + act*(k+actblockSize), temp3, idummy);
00572 int iC;
00573 for (iC = 0; iC < idummy; ++iC)
00574 H[k + act + iC*kmax] = temp3[iC];
00575 }
00576 }
00577 timeBuildH += MyWatch.WallTime();
00578 }
00579
00580
00581 timeNorm -= MyWatch.WallTime();
00582 if (vectWeight)
00583 RR->NormWeighted(*vectWeight, resnrm);
00584 else
00585 RR->Norm2(resnrm);
00586 for (act = 0; act < actblockSize; ++act) {
00587
00588 if (resnrm[act] < jdtol) { convind[conv] = act; conv = conv + 1; } else { keepind[keep] = act; keep = keep + 1; }
00589 }
00590 timeNorm += MyWatch.WallTime();
00591
00592
00593 if (clvl > 2) {
00594 Epetra_MultiVector copyQ(View, *QQ, 0, k + actblockSize);
00595 Epetra_MultiVector copyQb(View, *QbQb, 0, k + actblockSize);
00596 Epetra_MultiVector copyY(View, *YY, 0, k + actblockSize);
00597 Epetra_MultiVector copyV(View, *VV, 0, j);
00598 validate(©Q, ©Qb, ©Y, ©V, H, kmax, _optype, temp1);
00599 }
00600
00601
00602
00603
00604
00605 found = ((blkwise==1 && conv==actblockSize) || (blkwise==0 && conv!=0))
00606 && (j > actblockSize || k == kmax - actblockSize);
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618 if (found) {
00619
00620
00621 for(act = 0; act < conv; act++)
00622 lambda[k+act] = s[convind[act]];
00623
00624
00625 for(act = 0; act < keep; act++)
00626 s[act] = s[keepind[act]];
00627
00628
00629 idummy = j - actblockSize;
00630 for(act = 0; act < idummy; act ++)
00631 s[act+keep] = s[act+actblockSize];
00632
00633
00634 timeLocalUpdate -= MyWatch.WallTime();
00635 for (act = 0; act < nLocal; act += jmax) {
00636 cnt = (act + jmax < nLocal) ? jmax : nLocal - act;
00637 int iC;
00638 for (iC = 0; iC < j; ++iC)
00639 memcpy(Vtmp + iC*cnt, V + act + iC*nLocal, cnt*sizeof(double));
00640 callBLAS.GEMM('N', 'N', cnt, idummy, j, 1.0, Vtmp, cnt, U+actblockSize*jmax, jmax,
00641 0.0, V + act + keep*nLocal, nLocal);
00642 }
00643 timeLocalUpdate += MyWatch.WallTime();
00644
00645
00646 for(act = 0; act < keep; act++)
00647 memcpy(V + act*nLocal, Q + (k+keepind[act])*nLocal, nLocal*sizeof(double));
00648
00649
00650 for(act = 0; act < conv; act++)
00651 if (act != convind[act])
00652 memcpy(Q + (k+act)*nLocal, Q + (k+convind[act])*nLocal, nLocal*sizeof(double));
00653
00654 if (Misempty==0) {
00655 for(act = 0; act < conv; act++)
00656 if (act != convind[act])
00657 memcpy(Qb + (k+act)*nLocal, Qb + (k+convind[act])*nLocal, nLocal*sizeof(double));
00658 }
00659
00660
00661 if (Pisempty==0) {
00662 for (act = 0; act < conv; act++)
00663 if (act != convind[act])
00664 memcpy(Y + (k+act)*nLocal, Y + (k+convind[act])*nLocal, nLocal*sizeof(double));
00665
00666 for(act=0; act < conv; act++) {
00667 if (act != convind[act]) {
00668 idummy = k + act;
00669
00670 memcpy(H + idummy*kmax, H + (k+convind[act])*kmax, idummy*sizeof(double));
00671
00672 H[idummy*(kmax+1)] = H[(k+convind[act])*(kmax+1)];
00673
00674 int iD;
00675 for (iD = 0; iD < idummy; ++iD)
00676 H[idummy + iD*kmax] = H[k+convind[act] + iD*kmax];
00677 }
00678 }
00679 }
00680
00681
00682 j = j - conv;
00683
00684
00685 for (act = 0; act < j; act++) {
00686 memset(matK + act*jmax, 0, act*sizeof(double));
00687 matK[act*jmax + act] = s[act];
00688 }
00689
00690
00691 memset(U, 0, j*jmax*sizeof(double));
00692 for (act = 0; act < j; act++)
00693 U[act*jmax + act] = 1.0;
00694
00695
00696
00697
00698
00699
00700
00701 if (strategy == 1)
00702 for(act = 0; act < conv; act ++)
00703 if (lambda[k+act] > tau)
00704 tau = lambda[k+act];
00705
00706
00707 k = k + conv;
00708
00709
00710 actblockSize = (blockSize > kmax-k) ? kmax-k : blockSize;
00711
00712
00713 if (k == kmax)
00714 break;
00715
00716
00717 for(act = 0; act < keep; act++)
00718 solvestep[act] = solvestep[keepind[act]];
00719
00720 for(act = keep; act < blockSize; act++)
00721 solvestep[act] = 1;
00722
00723 }
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734 timeRestart -= MyWatch.WallTime();
00735 if (j+actblockSize > jmax) {
00736
00737 idummy = j; j = jmin;
00738
00739 for (act = 0; act < nLocal; act += jmax) {
00740 cnt = (act + jmax < nLocal) ? jmax : nLocal - act;
00741 int iC;
00742 for (iC = 0; iC < idummy; ++iC)
00743 memcpy(Vtmp + iC*cnt, V + act + iC*nLocal, cnt*sizeof(double));
00744 callBLAS.GEMM('N', 'N', cnt, j, idummy, 1.0, Vtmp, cnt, U, jmax, 0.0, V + act, nLocal);
00745 }
00746
00747 for (act = 0; act < j; act++) {
00748 memset(matK + act*jmax, 0, act*sizeof(double));
00749 matK[act*jmax + act] = s[act];
00750 memset(U + act*jmax, 0, j*sizeof(double));
00751 U[act*jmax + act] = 1.0;
00752 }
00753
00754 }
00755 timeRestart += MyWatch.WallTime();
00756
00757 }
00758
00759
00760 if (k == kmax)
00761 break;
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772 if (!Pisempty) {
00773 idummy = k + actblockSize;
00774 int iD;
00775 for (iD = 0; iD < idummy; ++iD) {
00776 memcpy(Hlu + iD*kmax, H + iD*kmax, idummy*sizeof(double));
00777 }
00778 callLAPACK.GETRF(idummy, idummy, Hlu, kmax, Hpiv, &info);
00779 if (info != 0)
00780 rError("Factorization of H failed: info=%d", info);
00781 rAssert(info == 0);
00782 }
00783
00784
00785 if (Gisused) {
00786 idummy = k + actblockSize;
00787 int iD;
00788 for (iD = 0; iD < idummy; ++iD) {
00789 memcpy(Glu + iD*kmax, G + iD*kmax, idummy*sizeof(double));
00790 }
00791 callLAPACK.GETRF(idummy, idummy, Glu, kmax, Gpiv, &info);
00792 if (info != 0)
00793 rError("Factorization of G failed: info=%d", info);
00794 rAssert(info == 0);
00795 }
00796
00797
00798 for (act = 0; act < actblockSize; act ++) {
00799
00800
00801
00802 Epetra_Vector vVec(View, *VV, j);
00803 vVec.PutScalar(0.0);
00804
00805
00806
00807
00808
00809 if (resnrm[act] < eps_tr) {
00810 projK->resetPro_theta(s[act]);
00811 projP->resetPro_theta(s[act]);
00812 } else {
00813 projK->resetPro_theta(tau);
00814 projP->resetPro_theta(tau);
00815 }
00816
00817 it_tol = pow(toldecay, (double)(-solvestep[act]));
00818 solvestep[act] = solvestep[act] + 1;
00819
00820
00821 int prok = k + actblockSize;
00822 projK->resetPro_k(prok);
00823 projP->resetPro_k(prok);
00824
00825 if (copyQ)
00826 delete copyQ;
00827 copyQ = new Epetra_MultiVector(View, *QQ, 0, prok);
00828 projK->resetPro_Q(copyQ);
00829 projP->resetPro_Q(copyQ);
00830
00831 if (M) {
00832 if (copyQb)
00833 delete copyQb;
00834 copyQb = new Epetra_MultiVector(View, *QbQb, 0, prok);
00835 projK->resetPro_Qb(copyQb);
00836 projP->resetPro_Qb(copyQb);
00837 }
00838
00839 if (Prec) {
00840 if (copyY)
00841 delete copyY;
00842 copyY = new Epetra_MultiVector(View, *YY, 0, prok);
00843 projK->resetPro_Y(copyY);
00844 projP->resetPro_Y(copyY);
00845 }
00846
00847
00848
00849 timeCorrectionRHS -= MyWatch.WallTime();
00850 Epetra_Vector rVec(View, MyMap, Res + act*nLocal);
00851 projK->MakeRHS(rVec);
00852 timeCorrectionRHS += MyWatch.WallTime();
00853
00854
00855 timeCorrectionSolve -= MyWatch.WallTime();
00856 info = linSolver->solve(rVec, vVec, it_tol, max_inner_it);
00857 timeCorrectionSolve += MyWatch.WallTime();
00858 numCorrectionSolve += 1;
00859
00860
00861 if (info >= 0) {
00862 sumIterPCG += info;
00863 maxIterPCG = (info > maxIterPCG) ? info : maxIterPCG;
00864 actcorrits[act] = info;
00865 }
00866 if (info == -1) {
00867 sumIterPCG += max_inner_it;
00868 maxIterPCG = max_inner_it;
00869 actcorrits[act] = -1;
00870 info = 0;
00871 }
00872 if (info < 0)
00873 rError("Error detected in correction equation solver (info=%d).", info);
00874 rAssert(info >= 0);
00875 info = 0;
00876
00877
00878
00879
00880 if (YHC) {
00881 YHC->Apply(vVec, vVec);
00882 }
00883
00884
00885
00886
00887 timeOrtho -= MyWatch.WallTime();
00888 if (!Misempty) {
00889 ortho.ModifiedGSB(&vVec, copyQ, copyQb);
00890 Epetra_MultiVector copyV(View, *VV, 0, j);
00891 ortho.IteratedClassicalGS(&vVec, &alpha, ©V, temp1, M);
00892 } else {
00893 ortho.ModifiedGS(&vVec, copyQ);
00894 Epetra_MultiVector copyV(View, *VV, 0, j);
00895 ortho.IteratedClassicalGS(&vVec, &alpha, ©V, temp1);
00896 }
00897 alpha = 1.0 / alpha;
00898 vVec.Scale(alpha);
00899 timeOrtho += MyWatch.WallTime();
00900 numOrtho += 1;
00901
00902
00903 Epetra_Vector t1(View, MyMap, temp1);
00904 timeStifOp -= MyWatch.WallTime();
00905 K->Apply(vVec, t1);
00906 timeStifOp += MyWatch.WallTime();
00907 stifOp += 1;
00908 idummy = j+1;
00909 timeLocalProj -= MyWatch.WallTime();
00910 callBLAS.GEMV('T', nLocal, idummy, 1.0, V, nLocal, temp1, 0.0, temp3);
00911 MyComm.SumAll(temp3, matK + j*jmax, idummy);
00912 timeLocalProj += MyWatch.WallTime();
00913
00914
00915 j ++;
00916
00917 }
00918
00919
00920 it += 1;
00921
00922
00923 print_status(clvl*(myPid == 0), it, k, j - blockSize, kmax, blockSize, actblockSize,
00924 s, resnrm, actcorrits);
00925
00926 }
00927 timeOuterLoop += MyWatch.WallTime();
00928
00929 highMem = (highMem > currentSize()) ? highMem : currentSize();
00930
00931 k_conv = k;
00932
00933
00934 if (copyQ)
00935 delete copyQ;
00936 if (copyQb)
00937 delete copyQb;
00938 if (copyY)
00939 delete copyY;
00940 if (VV)
00941 delete VV;
00942 if (QQ)
00943 delete QQ;
00944 if (RR)
00945 delete RR;
00946 if (YY)
00947 delete YY;
00948 if (QbQb)
00949 delete QbQb;
00950 if (V)
00951 delete[] V;
00952 if (Vtmp)
00953 delete[] Vtmp;
00954 if (U)
00955 delete[] U;
00956 if (Qb)
00957 delete[] Qb;
00958 if (Y)
00959 delete[] Y;
00960 if (s)
00961 delete[] s;
00962 if (Res)
00963 delete[] Res;
00964 if (resnrm)
00965 delete[] resnrm;
00966 if (matK)
00967 delete[] matK;
00968 if (H)
00969 delete[] H;
00970 if (Hlu)
00971 delete[] Hlu;
00972 if (Hpiv)
00973 delete[] Hpiv;
00974 if (G)
00975 delete[] G;
00976 if (Glu)
00977 delete[] Glu;
00978 if (Gpiv)
00979 delete[] Gpiv;
00980 if (temp1)
00981 delete[] temp1;
00982 if (temp3)
00983 delete[] temp3;
00984 if (idx1)
00985 delete[] idx1;
00986 if (idx2)
00987 delete[] idx2;
00988 if (convind)
00989 delete[] convind;
00990 if (keepind)
00991 delete[] keepind;
00992 if (solvestep)
00993 delete[] solvestep;
00994 if (actcorrits)
00995 delete[] actcorrits;
00996 if (vectWeight)
00997 delete vectWeight;
00998
00999 return info;
01000
01001 }
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011 void JDBSYM::validate(Epetra_MultiVector *Q, Epetra_MultiVector *MQ, Epetra_MultiVector *Y,
01012 Epetra_MultiVector *V, double *H, int ldh, opType _optype, double *work1) {
01013
01014 int myPid = MyComm.MyPID();
01015
01016 double VAL_TOL = 0.0;
01017 callLAPACK.LAMCH('E', VAL_TOL);
01018 VAL_TOL *= 100.0;
01019
01020 double s = 0.0;
01021 double maxS;
01022
01023 int r, c;
01024
01025 int k = Q->NumVectors();
01026 int j = V->NumVectors();
01027
01028 Epetra_Vector w1(View, Q->Map(), work1);
01029
01030
01031 if (M) {
01032 maxS = 0.0;
01033 for (c = 0; c < k; c ++) {
01034 M->Apply(*((*Q)(c)), w1);
01035 w1.Update(1.0, *((*MQ)(c)), -1.0);
01036 w1.NormInf(&s);
01037 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01038 }
01039 if ((myPid == 0) && (maxS > VAL_TOL))
01040 rWarning("Norm of MQ - M*Q = %.4e", maxS);
01041 }
01042
01043
01044 if (M == 0) {
01045 maxS = 0.0;
01046 for (r = 0; r < k; r ++) {
01047 for (c = 0; c < k; c ++) {
01048 (*Q)(r)->Dot(*((*Q)(c)), &s);
01049 if (r == c)
01050 s -= 1.0;
01051 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01052 }
01053 }
01054 if ((myPid == 0) && (maxS > VAL_TOL))
01055 rWarning("Norm of Q'*Q - I = %.4e", maxS);
01056 } else {
01057 maxS = 0.0;
01058 for (r = 0; r < k; r ++) {
01059 for (c = 0; c < k; c ++) {
01060 (*MQ)(r)->Dot(*((*Q)(c)), &s);
01061 if (r == c)
01062 s -= 1.0;
01063 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01064 }
01065 }
01066 if ((myPid == 0) && (maxS > VAL_TOL))
01067 rWarning("Norm of MQ'*Q - I = %.4e", maxS);
01068 }
01069
01070
01071 if (M == 0) {
01072 maxS = 0.0;
01073 for (r = 0; r < j; r ++) {
01074 for (c = 0; c < j; c ++) {
01075 (*V)(r)->Dot(*((*V)(c)), &s);
01076 if (r == c)
01077 s -= 1.0;
01078 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01079 }
01080 }
01081 if ((myPid == 0) && (maxS > VAL_TOL))
01082 rWarning("Norm of V'*V - I = %.4e", maxS);
01083 } else {
01084 for (r = 0; r < j; r ++) {
01085 for (c = 0; c < j; c ++) {
01086 M->Apply(*((*V)(r)), w1);
01087 w1.Dot(*((*V)(c)), &s);
01088 if (r == c)
01089 s -= 1.0;
01090 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01091 }
01092 }
01093 if ((myPid == 0) && (maxS > VAL_TOL))
01094 rWarning("Norm of V'*M*V - I = %.4e", maxS);
01095 }
01096
01097
01098 if (Prec) {
01099 maxS = 0.0;
01100 if ((M == 0) || (_optype == OP_UNSYM1)) {
01101 for (c = 0; c < k; c ++) {
01102 Prec->ApplyInverse(*((*Q)(c)), w1);
01103 w1.Update(1.0, *((*Y)(c)), -1.0);
01104 w1.NormInf(&s);
01105 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01106 }
01107 if ((myPid == 0) && (maxS > VAL_TOL))
01108 rWarning("Norm of Y - Prec^{-1}*Q = %.4e", maxS);
01109 } else {
01110 for (c = 0; c < k; c ++) {
01111 Prec->ApplyInverse(*((*MQ)(c)), w1);
01112 w1.Update(1.0, *((*Y)(c)), -1.0);
01113 w1.NormInf(&s);
01114 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01115 }
01116 if ((myPid == 0) && (maxS > VAL_TOL))
01117 rWarning("Norm of Y - Prec^{-1}*M*Q = %.4e", maxS);
01118 }
01119 }
01120
01121
01122 if (Prec) {
01123 maxS = 0.0;
01124 if (M == 0) {
01125 for (r = 0; r < k; r ++) {
01126 for (c = 0; c < k; c ++) {
01127 (*Q)(r)->Dot(*((*Y)(c)), &s);
01128 s -= H[c*ldh + r];
01129 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01130 }
01131 }
01132 if ((myPid == 0) && (maxS > VAL_TOL))
01133 rWarning("Norm of Q'*Y - H = %.4e", maxS);
01134 } else {
01135 for (r = 0; r < k; r ++) {
01136 for (c = 0; c < k; c ++) {
01137 (*MQ)(r)->Dot(*((*Y)(c)), &s);
01138 s -= H[c*ldh + r];
01139 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01140 }
01141 }
01142 if ((myPid == 0) && (maxS > VAL_TOL))
01143 rWarning("Norm of MQ'*Y - H = %.4e", maxS);
01144 }
01145 }
01146
01147 }
01148
01149
01150
01151 void JDBSYM::print_status(int clvl, int it, int k, int j, int kmax,
01152 int _blkSize, int actblkSize,
01153 double *s, double *resnrm, int *actcorrits) {
01154 const int max_vals = 5;
01155 ostringstream buf;
01156
01157 if (clvl > 0) {
01158 if (_blkSize == 1) {
01159 if (it == 1) {
01160 buf << " IT K J RES CGIT RITZVALS(1:" << max_vals << ")";
01161 rInfo(buf.str().c_str());
01162 buf.str("");
01163 int idummy = 27 + ( 13 > max_vals*10 ? 13 : max_vals*10);
01164 for (int i = 0; i < idummy; i ++)
01165 buf << '-';
01166 rInfo(buf.str().c_str());
01167 buf.str("");
01168 }
01169 buf << right << scientific
01170 << setw(4) << it << ' '
01171 << setw(3) << k << ' '
01172 << setw(3) << j << ' '
01173 << setw(9) << setprecision(2) << resnrm[0] << ' '
01174 << setw(4) << actcorrits[0];
01175 for (int i = 0; i < (j < max_vals ? j : max_vals); i ++)
01176 buf << ' ' << setw(9) << setprecision(2) << s[i];
01177 } else {
01178 buf << " Iteration " << it;
01179 buf << " - Number of converged eigenvectors " << k << endl;
01180
01181 if (clvl > 2) {
01182 for (int i = 0; i < _blkSize; ++i) {
01183 resHistory[historyCount*_blkSize+i] = (i < actblkSize) ? resnrm[i] : 0.0;
01184 spaceSizeHistory[historyCount] = j;
01185 iterPCGHistory[historyCount*_blkSize+i] = (i < actblkSize) ? actcorrits[i] : 0;
01186 }
01187 historyCount += 1;
01188 }
01189
01190 if (clvl > 1) {
01191 buf.precision(2);
01192 buf.setf(ios::scientific, ios::floatfield);
01193 for (int i = 0; i < actblkSize; ++i) {
01194 buf << " Iteration " << it;
01195 buf << " - Norm of Residual " << i;
01196 buf << " = " << resnrm[i] << endl;
01197 }
01198 for (int i = 0; i < actblkSize; ++i) {
01199 buf << " Iteration " << it << " - Inner iterations for vector " << i;
01200 buf << " = " << actcorrits[i] << endl;
01201 }
01202 for (int i = 0; i < actblkSize; ++i) {
01203 buf << " Iteration " << it << " - Ritz eigenvalue " << i;
01204 buf << " = " << s[i];
01205 }
01206 }
01207 }
01208 }
01209 rInfo(buf.str().c_str());
01210 }
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234 void JDBSYM::sorteig(int j, double S[], double U[], int ldu, double tau,
01235 double dtemp[], int idx1[], int idx2[], int strategy) {
01236
01237 int i;
01238
01239
01240 switch (strategy) {
01241 case 0:
01242 for (i = 0; i < j; i ++)
01243 dtemp[i] = fabs(S[i] - tau);
01244 break;
01245 case 1:
01246 for (i = 0; i < j; i ++)
01247 if (S[i] < tau)
01248 dtemp[i] = Epetra_MaxDouble;
01249 else
01250 dtemp[i] = fabs(S[i] - tau);
01251 break;
01252 default:
01253 assert(0);
01254 }
01255 for (i = 0; i < j; i ++)
01256 idx1[i] = i;
01257
01258
01259 quicksort(j, dtemp, idx1);
01260
01261
01262 for (i = 0; i < j; i ++)
01263 idx2[idx1[i]] = i;
01264
01265
01266 memcpy(dtemp, S, j * sizeof(double));
01267 for (i = 0; i < j; i ++)
01268 S[i] = dtemp[idx1[i]];
01269
01270
01271 for (i = 0; i < j; i ++) {
01272 if (i != idx1[i]) {
01273 memcpy(dtemp, U+i*ldu, j*sizeof(double));
01274 memcpy(U+i*ldu, U+idx1[i]*ldu, j*sizeof(double));
01275 memcpy(U+idx1[i]*ldu, dtemp, j*sizeof(double));
01276 idx1[idx2[i]] = idx1[i];
01277 idx2[idx1[i]] = idx2[i];
01278 }
01279 }
01280
01281 }
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293 void JDBSYM::quicksort(int n, double arr[], int idx[]) {
01294
01295 double v, td;
01296 int i, j, l, r, ti, tos, stack[32];
01297
01298 l = 0; r = n-1; tos = -1;
01299 for (;;) {
01300 while (r > l) {
01301 v = arr[r]; i = l; j = r-1;
01302 for (;;) {
01303 while (arr[i] < v) i ++;
01304
01305 while (arr[j] >= v && j > l) j --;
01306 if (i >= j) break;
01307 td = arr[i]; arr[i] = arr[j]; arr[j] = td;
01308 ti = idx[i]; idx[i] = idx[j]; idx[j] = ti;
01309 }
01310 td = arr[i]; arr[i] = arr[r]; arr[r] = td;
01311 ti = idx[i]; idx[i] = idx[r]; idx[r] = ti;
01312 if (i-l > r-i) { stack[++tos] = l; stack[++tos] = i-1; l = i+1; } else { stack[++tos] = i+1; stack[++tos] = r; r = i-1; }
01313 assert(tos < 32);
01314 }
01315 if (tos == -1) break;
01316 r = stack[tos--]; l = stack[tos--];
01317 }
01318
01319 }
01320
01321 void JDBSYM::set_defaults(Teuchos::ParameterList& params, int kmax) {
01322
01323
01324 params.set("jmin", kmax < 5? 5 : kmax+1);
01325 params.set("jmax", kmax+10);
01326 params.set("tau", 0.0);
01327 params.set("eps_tr", 1e-3);
01328 params.set("tol_decay", 1.5);
01329 params.set("max_it", 200);
01330 params.set("max_inner_it", 50);
01331 params.set("block_size", 1);
01332 params.set("block_wise", 0);
01333 params.set("strategy", 0);
01334 params.set("op_type", "sym");
01335 params.set("clvl", 1);
01336 }
01337
01338 int JDBSYM::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
01339
01340 int info = 0;
01341
01342 info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
01343 if (info < 0)
01344 return info;
01345
01346 if ((params_.get<int>("clvl") > 2) && (MyComm.MyPID() == 0)) {
01347 resHistory = new double[params_.get<int>("max_it")*params_.get<int>("block_size")];
01348 spaceSizeHistory = new int[params_.get<int>("max_it")];
01349 iterPCGHistory = new int[params_.get<int>("max_it")*params_.get<int>("block_size")];
01350 }
01351
01352 JDBSYM::opType optype;
01353 if (params_.get<string>("op_type") == "sym")
01354 optype = OP_SYM;
01355 else if (params_.get<string>("op_type") == "unsym1")
01356 optype = OP_UNSYM1;
01357 else if (params_.get<string>("op_type") == "unsym2")
01358 optype = OP_UNSYM2;
01359 else
01360 assert(false);
01361
01362 int knownEV = 0;
01363
01364 info = jdbsym(params_.get<double>("tau"),
01365 params_.get<double>("tol"),
01366 numEigen,
01367 params_.get<int>("jmax"),
01368 params_.get<int>("jmin"),
01369 params_.get<int>("block_size"),
01370 params_.get<int>("block_wise"),
01371 0,
01372 0,
01373 optype,
01374 params_.get<double>("eps_tr"),
01375 params_.get<double>("tol_decay"),
01376 params_.get<int>("strategy"),
01377 params_.get<int>("max_it"),
01378 params_.get<int>("max_inner_it"),
01379 params_.get<int>("clvl"),
01380 knownEV,
01381 Q.Values(),
01382 lambda,
01383 outerIter);
01384
01385
01386
01387
01388 timePostProce -= MyWatch.WallTime();
01389 if ((info == 0) && (knownEV > 0)) {
01390 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
01391 }
01392 timePostProce += MyWatch.WallTime();
01393
01394 return (info == 0) ? knownEV : info;
01395 }
01396
01397
01398 void JDBSYM::initializeCounters() {
01399
01400 historyCount = 0;
01401 if (resHistory) {
01402 delete[] resHistory;
01403 resHistory = 0;
01404 }
01405
01406 maxSpaceSize = 0;
01407 sumSpaceSize = 0;
01408 if (spaceSizeHistory) {
01409 delete[] spaceSizeHistory;
01410 spaceSizeHistory = 0;
01411 }
01412
01413 maxIterPCG = 0;
01414 sumIterPCG = 0;
01415 if (iterPCGHistory) {
01416 delete[] iterPCGHistory;
01417 iterPCGHistory = 0;
01418 }
01419
01420 memRequested = 0.0;
01421 highMem = 0.0;
01422
01423 numCorrectionSolve = 0;
01424 numOrtho = 0;
01425 outerIter = 0;
01426 precOp = 0;
01427 residual = 0;
01428 stifOp = 0;
01429
01430 timeBuildG = 0.0;
01431 timeBuildH = 0.0;
01432 timeCorrectionRHS = 0.0;
01433 timeCorrectionSolve = 0.0;
01434 timeLocalProj = 0.0;
01435 timeLocalSolve = 0.0;
01436 timeLocalUpdate = 0.0;
01437 timeNorm = 0.0;
01438 timeOrtho = 0.0;
01439 timeOuterLoop = 0.0;
01440 timePostProce = 0.0;
01441 timePrecOp = 0.0;
01442 timeResidual = 0.0;
01443 timeRestart = 0.0;
01444 timeStifOp = 0.0;
01445 }
01446
01447
01448 void JDBSYM::algorithmInfo() const {
01449 rDebug("Algorithm: Jacobi-Davidson algorithm (Geus' implementation)");
01450 rDebug("Block Size: %d", params_.get<int>("block_size"));
01451 }
01452
01453 void JDBSYM::historyInfo() const {
01454
01455 if (resHistory) {
01456 ostringstream buf;
01457 int j;
01458 buf << " Block Size: " << params_.get<int>("block_size") << endl;
01459 buf << endl;
01460 buf << " Residuals Search Space Dim. Inner Iter. \n";
01461 buf << endl;
01462 buf.precision(4);
01463 buf.setf(ios::scientific, ios::floatfield);
01464 for (j = 0; j < historyCount; ++j) {
01465 int ii;
01466 for (ii = 0; ii < params_.get<int>("block_size"); ++ii) {
01467 buf << resHistory[params_.get<int>("block_size")*j + ii] << " ";
01468 buf.width(4);
01469 buf << spaceSizeHistory[j] << " ";
01470 buf.width(4);
01471 buf << iterPCGHistory[j] << endl;
01472 }
01473 }
01474 rDebug(buf.str().c_str());
01475 }
01476
01477 }
01478
01479 void JDBSYM::memoryInfo() const {
01480 double maxHighMem = 0.0;
01481 double tmp = highMem;
01482 MyComm.MaxAll(&tmp, &maxHighMem, 1);
01483
01484 double maxMemRequested = 0.0;
01485 tmp = memRequested;
01486 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
01487
01488 rDebug("Memory requested per processor by the eigensolver (EST) = %.2f MB", maxMemRequested);
01489 rDebug("High water mark in eigensolver (EST) = %.2f MB", maxHighMem);
01490 }
01491
01492
01493 void JDBSYM::operationInfo() const {
01494 ostringstream buf;
01495 buf << "JDBSYM operations:\n";
01496 buf << "\tTotal number of stiffness matrix operations = ";
01497 buf.width(9);
01498 buf << stifOp << endl;
01499 buf << "\tTotal number of preconditioner applications = ";
01500 buf.width(9);
01501 buf << precOp << endl;
01502 buf << "\tTotal number of linear solves = ";
01503 buf.width(9);
01504 buf << numCorrectionSolve << endl;
01505 buf.setf(ios::fixed, ios::floatfield);
01506 buf.precision(2);
01507 buf.width(9);
01508 buf << "\t\tAverage number of iter. per solve : ";
01509 buf.width(9);
01510 buf << ((double) sumIterPCG)*params_.get<int>("block_size")/((double) numCorrectionSolve) << endl;
01511 buf << "\t\tMaximum number of iter. per solve : ";
01512 buf.width(9);
01513 buf << maxIterPCG << endl;
01514 buf << "\tTotal number of computed eigen-residuals = ";
01515 buf.width(9);
01516 buf << residual << endl;
01517 buf << "\tTotal number of orthogonalization steps = ";
01518 buf.width(9);
01519 buf << numOrtho << endl;
01520 buf << "\tTotal number of outer iterations = ";
01521 buf.width(9);
01522 buf << outerIter << endl;
01523 buf << "\tMaximum size of search space = ";
01524 buf.width(9);
01525 buf << maxSpaceSize << endl;
01526 buf << "\tAverage size of search space = ";
01527 buf.setf(ios::fixed, ios::floatfield);
01528 buf.precision(2);
01529 buf.width(9);
01530 buf << ((double) sumSpaceSize)/((double) outerIter);
01531 rInfo(buf.str().c_str());
01532 }
01533
01534
01535 void JDBSYM::timeInfo() const {
01536 ostringstream buf;
01537 buf << "JDBSYM timings:\n";
01538 buf.setf(ios::fixed, ios::floatfield);
01539 buf.precision(2);
01540 buf << "\tTotal time for outer loop = ";
01541 buf.width(9);
01542 buf << timeOuterLoop << " s" << endl;
01543 buf << "\t\tTime for stiffness matrix operations = ";
01544 buf.width(9);
01545 buf << timeStifOp << " s ";
01546 buf.width(5);
01547 buf << 100*timeStifOp/timeOuterLoop << " %\n";
01548 buf << "\t\tTime for local projection = ";
01549 buf.width(9);
01550 buf << timeLocalProj << " s ";
01551 buf.width(5);
01552 buf << 100*timeLocalProj/timeOuterLoop << " %\n";
01553 buf << "\t\tTime for local update = ";
01554 buf.width(9);
01555 buf << timeLocalUpdate << " s ";
01556 buf.width(5);
01557 buf << 100*timeLocalUpdate/timeOuterLoop << " %\n";
01558 buf << "\t\tTime for preconditioner applications = ";
01559 buf.width(9);
01560 buf << timePrecOp << " s ";
01561 buf.width(5);
01562 buf << 100*timePrecOp/timeOuterLoop << " %\n";
01563 buf << "\t\tTime for making the correction RHS = ";
01564 buf.width(9);
01565 buf << timeCorrectionRHS << " s ";
01566 buf.width(5);
01567 buf << 100*timeCorrectionRHS/timeOuterLoop << " %\n";
01568 buf << "\t\tTime for solving the correction equation = ";
01569 buf.width(9);
01570 buf << timeCorrectionSolve << " s ";
01571 buf.width(5);
01572 buf << 100*timeCorrectionSolve/timeOuterLoop << " %\n";
01573 buf << "\t\t\tPreconditioner Mult. : ";
01574 buf.width(9);
01575 buf << projP->getTimeApplyInverse() << " s" << endl;
01576 buf << "\t\t\tLinear Operator Mult. : ";
01577 buf.width(9);
01578 buf << projK->getTimeApply() << " s" << endl;
01579 buf << "\t\tTime for orthogonalizations = ";
01580 buf.width(9);
01581 buf << timeOrtho << " s ";
01582 buf.width(5);
01583 buf << 100*timeOrtho/timeOuterLoop << " %\n";
01584 buf << "\t\tTime for restart = ";
01585 buf.width(9);
01586 buf << timeRestart << " s ";
01587 buf.width(5);
01588 buf << 100*timeRestart/timeOuterLoop << " %\n";
01589 buf << "\t\tTime for for building Q^T*Q = ";
01590 buf.width(9);
01591 buf << timeBuildG << " s ";
01592 buf.width(5);
01593 buf << 100*timeBuildG/timeOuterLoop << " %\n";
01594 buf << "\t\tTime for for building Q^T*M*P^{-1}*M*Q = ";
01595 buf.width(9);
01596 buf << timeBuildH << " s ";
01597 buf.width(5);
01598 buf << 100*timeBuildH/timeOuterLoop << " %\n";
01599 buf << "\t\tTime for residual computations = ";
01600 buf.width(9);
01601 buf << timeResidual << " s ";
01602 buf.width(5);
01603 buf << 100*timeResidual/timeOuterLoop << " %\n";
01604 buf << "\tTotal time for post-processing = ";
01605 buf.width(9);
01606 buf << timePostProce << " s";
01607 rInfo(buf.str().c_str());
01608 }
01609
01610