00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "BlockPCGSolver.h"
00030
00031
00032 BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00033 double _tol, int _iMax, int _verb)
00034 : MyComm(_Comm),
00035 callBLAS(),
00036 callLAPACK(),
00037 callFortran(),
00038 K(KK),
00039 Prec(0),
00040 mlPrec(false),
00041 ml_handle(0),
00042 ml_agg(0),
00043 vectorPCG(0),
00044 tolCG(_tol),
00045 iterMax(_iMax),
00046 verbose(_verb),
00047 AMG_NLevels(0),
00048 workSpace(0),
00049 numSolve(0),
00050 maxIter(0),
00051 sumIter(0),
00052 minIter(10000)
00053 {
00054
00055 }
00056
00057
00058 BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00059 Epetra_Operator *PP,
00060 double _tol, int _iMax, int _verb)
00061 : MyComm(_Comm),
00062 callBLAS(),
00063 callLAPACK(),
00064 callFortran(),
00065 K(KK),
00066 Prec(PP),
00067 mlPrec(false),
00068 ml_handle(0),
00069 ml_agg(0),
00070 vectorPCG(0),
00071 tolCG(_tol),
00072 iterMax(_iMax),
00073 verbose(_verb),
00074 AMG_NLevels(0),
00075 workSpace(0),
00076 numSolve(0),
00077 maxIter(0),
00078 sumIter(0),
00079 minIter(10000)
00080 {
00081
00082 }
00083
00084
00085 BlockPCGSolver::~BlockPCGSolver() {
00086
00087 if ((mlPrec == true) && (Prec)) {
00088 delete Prec;
00089 Prec = 0;
00090 mlPrec = false;
00091 ML_Destroy(&ml_handle);
00092 ML_Aggregate_Destroy(&ml_agg);
00093 }
00094
00095 if (vectorPCG) {
00096 delete vectorPCG;
00097 vectorPCG = 0;
00098 }
00099
00100 if (workSpace) {
00101 delete[] workSpace;
00102 workSpace = 0;
00103 }
00104
00105 }
00106
00107
00108 void BlockPCGSolver::setAMGPreconditioner(int smoother, int degree, int numDofs,
00109 const Epetra_MultiVector *Z) {
00110
00111
00112 Epetra_RowMatrix *KK = dynamic_cast<Epetra_RowMatrix*>(const_cast<Epetra_Operator*>(K));
00113 if (KK == 0) {
00114 cerr << endl;
00115 cerr << " !!! For AMG preconditioner, the matrix must be 'Epetra_RowMatrix' object !!!\n";
00116 cerr << endl;
00117 return;
00118 }
00119
00120
00121
00122 AMG_NLevels = 10;
00123
00124 ML_Set_PrintLevel(verbose);
00125
00126 ML_Create(&ml_handle, AMG_NLevels);
00127
00128 EpetraMatrix2MLMatrix(ml_handle, 0, KK);
00129
00130 ML_Aggregate_Create(&ml_agg);
00131 ML_Aggregate_Set_MaxCoarseSize(ml_agg, 1);
00132 ML_Aggregate_Set_Threshold(ml_agg, 0.0);
00133
00134 if (Z) {
00135 ML_Aggregate_Set_NullSpace(ml_agg, numDofs, Z->NumVectors(), Z->Values(), Z->MyLength());
00136 }
00137
00138 AMG_NLevels = ML_Gen_MGHierarchy_UsingAggregation(ml_handle, 0, ML_INCREASING, ml_agg);
00139
00140
00141
00142 if (smoother == 1) {
00143 int j;
00144 for (j = 0; j < AMG_NLevels-1; j++) {
00145 ML_Gen_Smoother_MLS(ml_handle, j, ML_BOTH, 30., degree);
00146 }
00147
00148 #if defined(SUPERLU) || defined(DSUPERLU)
00149 ML_Gen_CoarseSolverSuperLU(ml_handle, AMG_NLevels-1);
00150 #endif
00151 }
00152
00153
00154 if (smoother == 2) {
00155 ML_Gen_Smoother_SymGaussSeidel(ml_handle, ML_ALL_LEVELS, ML_BOTH, 1, ML_DEFAULT);
00156 }
00157
00158 if (smoother == 3) {
00159 int j;
00160 for (j = 0; j < AMG_NLevels; j++) {
00161 ML_Gen_Smoother_MLS(ml_handle, j, ML_BOTH, 30., degree);
00162 }
00163 }
00164
00165 ML_Gen_Solver(ml_handle, ML_MGV, 0, AMG_NLevels-1);
00166
00167 mlPrec = true;
00168
00169 Prec = new ML_Epetra::MultiLevelOperator(ml_handle, MyComm, K->OperatorDomainMap(),
00170 K->OperatorDomainMap());
00171
00172 }
00173
00174
00175 void BlockPCGSolver::setPreconditioner(Epetra_Operator *PP) {
00176
00177 Prec = PP;
00178 mlPrec = false;
00179
00180 }
00181
00182
00183 int BlockPCGSolver::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00184
00185 return K->Apply(X, Y);
00186
00187 }
00188
00189
00190 int BlockPCGSolver::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00191
00192 int xcol = X.NumVectors();
00193 int info = 0;
00194
00195 if (Y.NumVectors() < xcol)
00196 return -1;
00197
00198
00199 if (xcol == 1) {
00200
00201
00202 if (vectorPCG == 0) {
00203 vectorPCG = new AztecOO();
00204
00205
00206 Epetra_Operator *KK = const_cast<Epetra_Operator*>(K);
00207 if (dynamic_cast<Epetra_RowMatrix*>(KK) == 0)
00208 vectorPCG->SetUserOperator(KK);
00209 else
00210 vectorPCG->SetUserMatrix(dynamic_cast<Epetra_RowMatrix*>(KK));
00211
00212 vectorPCG->SetAztecOption(AZ_max_iter, iterMax);
00213 vectorPCG->SetAztecOption(AZ_kspace, iterMax);
00214 if (verbose < 3)
00215 vectorPCG->SetAztecOption(AZ_output, AZ_last);
00216 if (verbose < 2)
00217 vectorPCG->SetAztecOption(AZ_output, AZ_none);
00218
00219 vectorPCG->SetAztecOption(AZ_solver, AZ_cg);
00220
00221 if (Prec)
00222 vectorPCG->SetPrecOperator(Prec);
00223 else {
00224 if (K->HasNormInf()) {
00225 vectorPCG->SetAztecOption(AZ_precond, AZ_Neumann);
00226 vectorPCG->SetAztecOption(AZ_poly_ord, 3);
00227 }
00228 }
00229
00230 }
00231
00232 double *valX = X.Values();
00233 double *valY = Y.Values();
00234
00235 int xrow = X.MyLength();
00236
00237 bool allocated = false;
00238 if (valX == valY) {
00239 valX = new double[xrow];
00240 allocated = true;
00241
00242 memcpy(valX, valY, xrow*sizeof(double));
00243 }
00244
00245 Epetra_MultiVector rhs(View, X.Map(), valX, xrow, xcol);
00246 vectorPCG->SetRHS(&rhs);
00247
00248 Y.PutScalar(0.0);
00249 vectorPCG->SetLHS(&Y);
00250
00251 vectorPCG->recursiveIterate(iterMax, tolCG);
00252
00253 numSolve += xcol;
00254
00255 int iter = vectorPCG->NumIters();
00256 maxIter = (iter > maxIter) ? iter : maxIter;
00257 minIter = (iter < minIter) ? iter : minIter;
00258 sumIter += iter;
00259
00260 if (allocated == true)
00261 delete[] valX;
00262
00263 return info;
00264
00265 }
00266
00267
00268 info = Solve(X, Y, xcol);
00269
00270 return info;
00271
00272 }
00273
00274
00275 int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {
00276
00277 int xrow = X.MyLength();
00278 int xcol = X.NumVectors();
00279 int ycol = Y.NumVectors();
00280
00281 int info = 0;
00282 int localVerbose = verbose*(MyComm.MyPID() == 0);
00283
00284
00285 double eps = 0.0;
00286 callLAPACK.LAMCH('E', eps);
00287
00288 double *valX = X.Values();
00289
00290 int NB = 3 + callFortran.LAENV(1, "dsytrd", "u", blkSize, -1, -1, -1, 6, 1);
00291 int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize;
00292
00293 int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;
00294
00295 bool useY = true;
00296 if (ycol % blkSize != 0) {
00297
00298 wSize += blkSize*xrow;
00299 useY = false;
00300 }
00301
00302 if (workSpace == 0){
00303 workSpace = new (nothrow) double[wSize];
00304 if (workSpace == 0) {
00305 info = -1;
00306 return info;
00307 }
00308 }
00309
00310 double *pointer = workSpace;
00311
00312
00313 double *PtKP = pointer;
00314 pointer = pointer + blkSize*blkSize;
00315
00316
00317 double *coeff = pointer;
00318 pointer = pointer + blkSize*blkSize;
00319
00320
00321 double *workD = pointer;
00322 pointer = pointer + lworkD;
00323
00324
00325 double *da = pointer;
00326 pointer = pointer + blkSize;
00327
00328
00329 double *initNorm = pointer;
00330 pointer = pointer + blkSize;
00331
00332
00333 double *resNorm = pointer;
00334 pointer = pointer + blkSize;
00335
00336
00337 double *valR = pointer;
00338 pointer = pointer + xrow*blkSize;
00339 Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);
00340
00341
00342 double *valZ = pointer;
00343 pointer = pointer + xrow*blkSize;
00344 Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);
00345
00346
00347 double *valP = pointer;
00348 pointer = pointer + xrow*blkSize;
00349 Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);
00350
00351
00352 double *valKP = pointer;
00353 pointer = pointer + xrow*blkSize;
00354 Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);
00355
00356
00357 double *valSOL = (useY == true) ? Y.Values() : pointer;
00358
00359 int iRHS;
00360 for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {
00361
00362 int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;
00363
00364
00365 if (numVec < blkSize) {
00366 R.Random();
00367 }
00368 memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));
00369
00370
00371 valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
00372 Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
00373 SOL.PutScalar(0.0);
00374
00375 int ii;
00376 int iter;
00377 int nFound = 0;
00378
00379 R.Norm2(initNorm);
00380
00381 if (localVerbose > 1) {
00382 cout << endl;
00383 cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << endl;
00384 if (localVerbose > 2) {
00385 fprintf(stderr,"\n");
00386 for (ii = 0; ii < numVec; ++ii) {
00387 cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << endl;
00388 }
00389 cout << endl;
00390 }
00391 }
00392
00393
00394 for (iter = 1; iter <= iterMax; ++iter) {
00395
00396
00397 if (Prec)
00398 Prec->ApplyInverse(R, Z);
00399 else
00400 Z = R;
00401
00402
00403 if (iter == 1) {
00404 P = Z;
00405 }
00406 else {
00407
00408 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
00409 0.0, workD, blkSize);
00410 MyComm.SumAll(workD, coeff, blkSize*blkSize);
00411
00412
00413 callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
00414 0.0, workD, blkSize);
00415 for (ii = 0; ii < blkSize; ++ii)
00416 callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
00417 callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
00418 0.0, coeff, blkSize);
00419
00420
00421
00422 memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
00423 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
00424 0.0, P.Values(), xrow);
00425
00426 P.Update(1.0, Z, -1.0);
00427
00428 }
00429
00430 K->Apply(P, KP);
00431
00432
00433 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
00434 0.0, workD, blkSize);
00435 MyComm.SumAll(workD, PtKP, blkSize*blkSize);
00436
00437
00438 callFortran.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
00439 if (info) {
00440
00441 break;
00442 }
00443
00444
00445 for (ii = 0; ii < blkSize; ++ii) {
00446 if (da[ii] < 0.0) {
00447 if (MyComm.MyPID() == 0) {
00448 cerr << endl << endl;
00449 cerr << " !!! Negative eigenvalue for P^tKP (" << da[ii] << ") !!!";
00450 cerr << endl << endl;
00451 }
00452 exit(-1);
00453 }
00454 else {
00455 da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
00456 }
00457 }
00458
00459
00460 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
00461 0.0, workD, blkSize);
00462 MyComm.SumAll(workD, coeff, blkSize*blkSize);
00463
00464
00465 callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
00466 0.0, workD, blkSize);
00467 for (ii = 0; ii < blkSize; ++ii)
00468 callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
00469 callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
00470 0.0, coeff, blkSize);
00471
00472
00473 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
00474 1.0, valSOL, xrow);
00475
00476
00477 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
00478 1.0, R.Values(), xrow);
00479
00480
00481 R.Norm2(resNorm);
00482 nFound = 0;
00483 for (ii = 0; ii < numVec; ++ii) {
00484 if (resNorm[ii] <= tolCG*initNorm[ii])
00485 nFound += 1;
00486 }
00487
00488 if (localVerbose > 1) {
00489 cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
00490 cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n";
00491 if (localVerbose > 2) {
00492 cout << endl;
00493 for (ii = 0; ii < numVec; ++ii) {
00494 cout << " ... ";
00495 cout.width(5);
00496 cout << ii << " ... Residual = ";
00497 cout.precision(2);
00498 cout.setf(ios::scientific, ios::floatfield);
00499 cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << endl;
00500 }
00501 cout << endl;
00502 }
00503 }
00504
00505 if (nFound == numVec) {
00506 break;
00507 }
00508
00509 }
00510
00511 if (useY == false) {
00512
00513 memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
00514 }
00515
00516 numSolve += nFound;
00517
00518 if (nFound == numVec) {
00519 minIter = (iter < minIter) ? iter : minIter;
00520 maxIter = (iter > maxIter) ? iter : maxIter;
00521 sumIter += iter;
00522 }
00523
00524 }
00525
00526 return info;
00527
00528 }
00529
00530