FEMAXX (Finite Element Maxwell Eigensolver): src/eigsolv/BlockPCGSolver.cpp Source File

00001 //**************************************************************************
00002 //
00003 //                                 NOTICE
00004 //
00005 // This software is a result of the research described in the report
00006 //
00007 // " A comparison of algorithms for modal analysis in the absence 
00008 //   of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
00009 //  Sandia National Laboratories, Technical report SAND2003-1028J.
00010 //
00011 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
00012 // framework ( http://software.sandia.gov/trilinos/ ).
00013 //
00014 // The distribution of this software follows also the rules defined in Trilinos.
00015 // This notice shall be marked on any reproduction of this software, in whole or
00016 // in part.
00017 //
00018 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00019 // license for use of this work by or on behalf of the U.S. Government.
00020 //
00021 // This program is distributed in the hope that it will be useful, but
00022 // WITHOUT ANY WARRANTY; without even the implied warranty of
00023 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00024 //
00025 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
00026 //
00027 //**************************************************************************
00028 
00029 #include "BlockPCGSolver.h"
00030 
00031 
00032 BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00033                                double _tol, int _iMax, int _verb)
00034     : MyComm(_Comm),
00035       callBLAS(),
00036       callLAPACK(),
00037       callFortran(),
00038       K(KK),
00039       Prec(0),
00040       mlPrec(false),
00041       ml_handle(0),
00042       ml_agg(0),
00043       vectorPCG(0),
00044       tolCG(_tol),
00045       iterMax(_iMax),
00046       verbose(_verb),
00047       AMG_NLevels(0),
00048       workSpace(0),
00049       numSolve(0),
00050       maxIter(0),
00051       sumIter(0),
00052       minIter(10000)
00053 {
00054 
00055 }
00056 
00057 
00058 BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00059                                Epetra_Operator *PP, 
00060                                double _tol, int _iMax, int _verb)
00061     : MyComm(_Comm),
00062       callBLAS(),
00063       callLAPACK(),
00064       callFortran(),
00065       K(KK),
00066       Prec(PP),
00067       mlPrec(false),
00068       ml_handle(0),
00069       ml_agg(0),
00070       vectorPCG(0),
00071       tolCG(_tol),
00072       iterMax(_iMax),
00073       verbose(_verb),
00074       AMG_NLevels(0),
00075       workSpace(0),
00076       numSolve(0),
00077       maxIter(0),
00078       sumIter(0),
00079       minIter(10000)
00080 {
00081 
00082 }
00083 
00084 
00085 BlockPCGSolver::~BlockPCGSolver() {
00086 
00087     if ((mlPrec == true) && (Prec)) {
00088         delete Prec;
00089         Prec = 0;
00090         mlPrec = false;
00091         ML_Destroy(&ml_handle);
00092         ML_Aggregate_Destroy(&ml_agg);
00093     }
00094 
00095     if (vectorPCG) {
00096         delete vectorPCG;
00097         vectorPCG = 0;
00098     }
00099 
00100     if (workSpace) {
00101         delete[] workSpace;
00102         workSpace = 0;
00103     }
00104 
00105 }
00106 
00107 
00108 void BlockPCGSolver::setAMGPreconditioner(int smoother, int degree, int numDofs,
00109                                           const Epetra_MultiVector *Z) {
00110 
00111     // Note the constness is cast away for ML
00112     Epetra_RowMatrix *KK = dynamic_cast<Epetra_RowMatrix*>(const_cast<Epetra_Operator*>(K));
00113     if (KK == 0) {
00114         cerr << endl;
00115         cerr << " !!! For AMG preconditioner, the matrix must be 'Epetra_RowMatrix' object !!!\n";
00116         cerr << endl;
00117         return;
00118     }
00119   
00120     // Generate an ML multilevel preconditioner
00121 
00122     AMG_NLevels = 10;
00123 
00124     ML_Set_PrintLevel(verbose);
00125 
00126     ML_Create(&ml_handle, AMG_NLevels);
00127   
00128     EpetraMatrix2MLMatrix(ml_handle, 0, KK);
00129   
00130     ML_Aggregate_Create(&ml_agg);
00131     ML_Aggregate_Set_MaxCoarseSize(ml_agg, 1);
00132     ML_Aggregate_Set_Threshold(ml_agg, 0.0);
00133   
00134     if (Z) {
00135         ML_Aggregate_Set_NullSpace(ml_agg, numDofs, Z->NumVectors(), Z->Values(), Z->MyLength());
00136     }
00137 
00138     AMG_NLevels = ML_Gen_MGHierarchy_UsingAggregation(ml_handle, 0, ML_INCREASING, ml_agg);
00139   
00140     // Set a smoother for the MG method
00141     // Ray recommends to use MLS (polynomial)
00142     if (smoother == 1) {
00143         int j;
00144         for (j = 0; j < AMG_NLevels-1; j++) {
00145             ML_Gen_Smoother_MLS(ml_handle, j, ML_BOTH, 30., degree);
00146         }
00147 
00148 #if defined(SUPERLU) || defined(DSUPERLU)
00149         ML_Gen_CoarseSolverSuperLU(ml_handle, AMG_NLevels-1);
00150 #endif
00151     }
00152 
00153     // Note that Symmetric Gauss Seidel does not parallelize well
00154     if (smoother == 2) {
00155         ML_Gen_Smoother_SymGaussSeidel(ml_handle, ML_ALL_LEVELS, ML_BOTH, 1, ML_DEFAULT);
00156     }
00157 
00158     if (smoother == 3) {
00159         int j;
00160         for (j = 0; j < AMG_NLevels; j++) {
00161             ML_Gen_Smoother_MLS(ml_handle, j, ML_BOTH, 30., degree);
00162         }
00163     }
00164 
00165     ML_Gen_Solver(ml_handle, ML_MGV, 0, AMG_NLevels-1);
00166 
00167     mlPrec = true;
00168   
00169     Prec = new ML_Epetra::MultiLevelOperator(ml_handle, MyComm, K->OperatorDomainMap(), 
00170                                              K->OperatorDomainMap());
00171 
00172 }
00173 
00174 
00175 void BlockPCGSolver::setPreconditioner(Epetra_Operator *PP) {
00176 
00177     Prec = PP;
00178     mlPrec = false;
00179 
00180 }
00181 
00182 
00183 int BlockPCGSolver::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00184 
00185     return K->Apply(X, Y);
00186 
00187 }
00188 
00189 
00190 int BlockPCGSolver::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00191 
00192     int xcol = X.NumVectors();
00193     int info = 0;
00194 
00195     if (Y.NumVectors() < xcol)
00196         return -1;
00197 
00198     // Use AztecOO's PCG for one right-hand side
00199     if (xcol == 1) {
00200 
00201         // Define the AztecOO object 
00202         if (vectorPCG == 0) {
00203             vectorPCG = new AztecOO();
00204 
00205             // Cast away the constness for AztecOO
00206             Epetra_Operator *KK = const_cast<Epetra_Operator*>(K);
00207             if (dynamic_cast<Epetra_RowMatrix*>(KK) == 0)
00208                 vectorPCG->SetUserOperator(KK);
00209             else
00210                 vectorPCG->SetUserMatrix(dynamic_cast<Epetra_RowMatrix*>(KK));
00211 
00212             vectorPCG->SetAztecOption(AZ_max_iter, iterMax);
00213             vectorPCG->SetAztecOption(AZ_kspace, iterMax);
00214             if (verbose < 3)
00215                 vectorPCG->SetAztecOption(AZ_output, AZ_last);
00216             if (verbose < 2)
00217                 vectorPCG->SetAztecOption(AZ_output, AZ_none);
00218 
00219             vectorPCG->SetAztecOption(AZ_solver, AZ_cg);
00220 
00221             if (Prec)
00222                 vectorPCG->SetPrecOperator(Prec);
00223             else {
00224                 if (K->HasNormInf()) {
00225                     vectorPCG->SetAztecOption(AZ_precond, AZ_Neumann);
00226                     vectorPCG->SetAztecOption(AZ_poly_ord, 3);
00227                 }
00228             }
00229 
00230         }
00231 
00232         double *valX = X.Values();
00233         double *valY = Y.Values();
00234 
00235         int xrow = X.MyLength();
00236 
00237         bool allocated = false;
00238         if (valX == valY) {
00239             valX = new double[xrow];
00240             allocated = true;
00241             // Copy valY into valX
00242             memcpy(valX, valY, xrow*sizeof(double));
00243         }
00244 
00245         Epetra_MultiVector rhs(View, X.Map(), valX, xrow, xcol);
00246         vectorPCG->SetRHS(&rhs);
00247 
00248         Y.PutScalar(0.0);
00249         vectorPCG->SetLHS(&Y);
00250 
00251         vectorPCG->recursiveIterate(iterMax, tolCG);
00252 
00253         numSolve += xcol;
00254 
00255         int iter = vectorPCG->NumIters();
00256         maxIter = (iter > maxIter) ? iter : maxIter;
00257         minIter = (iter < minIter) ? iter : minIter;
00258         sumIter += iter;
00259 
00260         if (allocated == true)
00261             delete[] valX;
00262 
00263         return info;
00264 
00265     } // if (xcol == 1)
00266 
00267     // Use block PCG for multiple right-hand sides
00268     info = Solve(X, Y, xcol);
00269 
00270     return info;
00271 
00272 }
00273 
00274 
00275 int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {
00276 
00277     int xrow = X.MyLength();
00278     int xcol = X.NumVectors();
00279     int ycol = Y.NumVectors();
00280 
00281     int info = 0;
00282     int localVerbose = verbose*(MyComm.MyPID() == 0);
00283 
00284     // Machine epsilon to check singularities
00285     double eps = 0.0;
00286     callLAPACK.LAMCH('E', eps);
00287 
00288     double *valX = X.Values();
00289 
00290     int NB = 3 + callFortran.LAENV(1, "dsytrd", "u", blkSize, -1, -1, -1, 6, 1);
00291     int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize; 
00292 
00293     int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;
00294 
00295     bool useY = true;
00296     if (ycol % blkSize != 0) {
00297         // Allocate an extra block to store the solutions
00298         wSize += blkSize*xrow;
00299         useY = false;
00300     }
00301 
00302     if (workSpace == 0){
00303         workSpace = new (nothrow) double[wSize];
00304         if (workSpace == 0) {
00305             info = -1;
00306             return info;
00307         }
00308     } // if (workSpace == 0)
00309 
00310     double *pointer = workSpace;
00311 
00312     // Array to store the matrix PtKP
00313     double *PtKP = pointer;
00314     pointer = pointer + blkSize*blkSize;
00315 
00316     // Array to store coefficient matrices
00317     double *coeff = pointer;
00318     pointer = pointer + blkSize*blkSize;
00319 
00320     // Workspace array
00321     double *workD = pointer;
00322     pointer = pointer + lworkD; 
00323 
00324     // Array to store the eigenvalues of P^t K P
00325     double *da = pointer;
00326     pointer = pointer + blkSize;
00327 
00328     // Array to store the norms of right hand sides
00329     double *initNorm = pointer;
00330     pointer = pointer + blkSize;
00331 
00332     // Array to store the norms of residuals
00333     double *resNorm = pointer;
00334     pointer = pointer + blkSize;
00335   
00336     // Array to store the residuals
00337     double *valR = pointer;
00338     pointer = pointer + xrow*blkSize;
00339     Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);
00340 
00341     // Array to store the preconditioned residuals
00342     double *valZ = pointer;
00343     pointer = pointer + xrow*blkSize;
00344     Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);
00345 
00346     // Array to store the search directions
00347     double *valP = pointer;
00348     pointer = pointer + xrow*blkSize;
00349     Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);
00350 
00351     // Array to store the image of the search directions
00352     double *valKP = pointer;
00353     pointer = pointer + xrow*blkSize;
00354     Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);
00355 
00356     // Pointer to store the solutions
00357     double *valSOL = (useY == true) ? Y.Values() : pointer;
00358 
00359     int iRHS;
00360     for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {
00361 
00362         int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;
00363 
00364         // Set the initial residuals to the right hand sides
00365         if (numVec < blkSize) {
00366             R.Random();
00367         }
00368         memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));
00369 
00370         // Set the initial guess to zero
00371         valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
00372         Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
00373         SOL.PutScalar(0.0);
00374 
00375         int ii;
00376         int iter;
00377         int nFound = 0;
00378 
00379         R.Norm2(initNorm);
00380 
00381         if (localVerbose > 1) {
00382             cout << endl;
00383             cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << endl;
00384             if (localVerbose > 2) {
00385                 fprintf(stderr,"\n");
00386                 for (ii = 0; ii < numVec; ++ii) {
00387                     cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << endl;
00388                 }
00389                 cout << endl;
00390             }
00391         }
00392 
00393         // Iteration loop
00394         for (iter = 1; iter <= iterMax; ++iter) {
00395 
00396             // Apply the preconditioner
00397             if (Prec)
00398                 Prec->ApplyInverse(R, Z);
00399             else
00400                 Z = R;
00401 
00402             // Define the new search directions
00403             if (iter == 1) {
00404                 P = Z;
00405             }
00406             else {
00407                 // Compute P^t K Z
00408                 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
00409                               0.0, workD, blkSize);
00410                 MyComm.SumAll(workD, coeff, blkSize*blkSize);
00411 
00412                 // Compute the coefficient (P^t K P)^{-1} P^t K Z
00413                 callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
00414                               0.0, workD, blkSize);
00415                 for (ii = 0; ii < blkSize; ++ii)
00416                     callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
00417                 callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
00418                               0.0, coeff, blkSize);
00419 
00420                 // Update the search directions 
00421                 // Note: Use KP as a workspace
00422                 memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
00423                 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
00424                               0.0, P.Values(), xrow);
00425 
00426                 P.Update(1.0, Z, -1.0);
00427 
00428             } // if (iter == 1)
00429 
00430             K->Apply(P, KP);
00431 
00432             // Compute P^t K P
00433             callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
00434                           0.0, workD, blkSize);
00435             MyComm.SumAll(workD, PtKP, blkSize*blkSize);
00436 
00437             // Eigenvalue decomposition of P^t K P
00438             callFortran.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
00439             if (info) {
00440                 // Break the loop as spectral decomposition failed
00441                 break;
00442             } // if (info)
00443 
00444             // Compute the pseudo-inverse of the eigenvalues
00445             for (ii = 0; ii < blkSize; ++ii) {
00446                 if (da[ii] < 0.0) {
00447                     if (MyComm.MyPID() == 0) {
00448                         cerr << endl << endl;
00449                         cerr << " !!! Negative eigenvalue for P^tKP (" << da[ii] << ") !!!";
00450                         cerr << endl << endl;
00451                     }
00452                     exit(-1);
00453                 }
00454                 else {
00455                     da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
00456                 }
00457             } // for (ii = 0; ii < blkSize; ++ii)
00458 
00459             // Compute P^t R
00460             callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
00461                           0.0, workD, blkSize);
00462             MyComm.SumAll(workD, coeff, blkSize*blkSize);
00463 
00464             // Compute the coefficient (P^t K P)^{-1} P^t R
00465             callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
00466                           0.0, workD, blkSize);
00467             for (ii = 0; ii < blkSize; ++ii)
00468                 callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
00469             callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
00470                           0.0, coeff, blkSize);
00471 
00472             // Update the solutions
00473             callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
00474                           1.0, valSOL, xrow);
00475 
00476             // Update the residuals
00477             callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
00478                           1.0, R.Values(), xrow);
00479 
00480             // Check convergence 
00481             R.Norm2(resNorm);
00482             nFound = 0;
00483             for (ii = 0; ii < numVec; ++ii) {
00484                 if (resNorm[ii] <= tolCG*initNorm[ii])
00485                     nFound += 1;
00486             }
00487 
00488             if (localVerbose > 1) {
00489                 cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
00490                 cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n"; 
00491                 if (localVerbose > 2) {
00492                     cout << endl;
00493                     for (ii = 0; ii < numVec; ++ii) {
00494                         cout << " ... ";
00495                         cout.width(5);
00496                         cout << ii << " ... Residual = ";
00497                         cout.precision(2);
00498                         cout.setf(ios::scientific, ios::floatfield);
00499                         cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << endl;
00500                     }
00501                     cout << endl;
00502                 }
00503             }
00504 
00505             if (nFound == numVec) {
00506                 break;
00507             }
00508 
00509         }  // for (iter = 1; iter <= maxIter; ++iter)
00510 
00511         if (useY == false) {
00512             // Copy the solutions back into Y
00513             memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
00514         }
00515 
00516         numSolve += nFound;
00517 
00518         if (nFound == numVec) {
00519             minIter = (iter < minIter) ? iter : minIter;
00520             maxIter = (iter > maxIter) ? iter : maxIter;
00521             sumIter += iter;
00522         }
00523 
00524     } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize)
00525 
00526     return info;
00527 
00528 }
00529 
00530