FEMAXX (Finite Element Maxwell Eigensolver): src/eigsolv/JDBSYM.cpp Source File

00001 //**************************************************************************
00002 //
00003 //                                 NOTICE
00004 //
00005 // This software is a result of the research described in the report
00006 //
00007 // " A comparison of algorithms for modal analysis in the absence
00008 //   of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
00009 //  Sandia National Laboratories, Technical report SAND2003-1028J.
00010 //
00011 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
00012 // framework ( http://software.sandia.gov/trilinos/ ).
00013 //
00014 // The distribution of this software follows also the rules defined in Trilinos.
00015 // This notice shall be marked on any reproduction of this software, in whole or
00016 // in part.
00017 //
00018 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00019 // license for use of this work by or on behalf of the U.S. Government.
00020 //
00021 // This program is distributed in the hope that it will be useful, but
00022 // WITHOUT ANY WARRANTY; without even the implied warranty of
00023 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00024 //
00025 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
00026 //
00027 //**************************************************************************
00028 /**************************************************************************
00029  *                                                                         *
00030  *               Swiss Federal Institute of Technology (ETH)               *
00031  *                       CH-8092 Zuerich, Switzerland                      *
00032  *                                                                         *
00033  *                       (C) 1999 All Rights Reserved                      *
00034  *                                                                         *
00035  *                                NOTICE                                   *
00036  *                                                                         *
00037  *  Permission to use, copy, modify, and distribute this software and      *
00038  *  its documentation for any purpose and without fee is hereby granted    *
00039  *  provided that the above copyright notice appear in all copies and      *
00040  *  that both the copyright notice and this permission notice appear in    *
00041  *  supporting documentation.                                              *
00042  *                                                                         *
00043  *  Neither the Swiss Federal Institute of Technology nor the author make  *
00044  *  any representations about the suitability of this software for any     *
00045  *  purpose.  This software is provided ``as is'' without express or       *
00046  *  implied warranty.                                                      *
00047  *                                                                         *
00048  ***************************************************************************/
00049 
00050 #include <iomanip>
00051 #include "rlog/rlog.h"
00052 #include "Teuchos_ParameterList.hpp"
00053 #include "JDBSYM.h"
00054 
00055 JDBSYM::JDBSYM(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
00056                const Epetra_Operator *MM, const Epetra_Operator *PP,
00057                const Epetra_Operator *YHCP,
00058                Teuchos::ParameterList& params) :
00059     myVerify(_Comm),
00060     callBLAS(),
00061     callLAPACK(),
00062     ortho(),
00063     modalTool(_Comm),
00064     mySort(),
00065     MyComm(_Comm),
00066     K(KK),
00067     M(MM),
00068     Prec(PP),
00069     MyWatch(_Comm),
00070     YHC(YHCP),
00071     linSolver(0),
00072     projK(0),
00073     projP(0),
00074     historyCount(0),
00075     resHistory(0),
00076     maxSpaceSize(0),
00077     sumSpaceSize(0),
00078     spaceSizeHistory(0),
00079     maxIterPCG(0),
00080     sumIterPCG(0),
00081     iterPCGHistory(0),
00082     memRequested(0.0),
00083     highMem(0.0),
00084     numCorrectionSolve(0),
00085     numOrtho(0),
00086     outerIter(0),
00087     precOp(0),
00088     residual(0),
00089     stifOp(0),
00090     timeBuildG(0.0),
00091     timeBuildH(0.0),
00092     timeCorrectionRHS(0.0),
00093     timeCorrectionSolve(0.0),
00094     timeLocalProj(0.0),
00095     timeLocalSolve(0.0),
00096     timeLocalUpdate(0.0),
00097     timeNorm(0.0),
00098     timeOrtho(0.0),
00099     timeOuterLoop(0.0),
00100     timePostProce(0.0),
00101     timePrecOp(0.0),
00102     timeResidual(0.0),
00103     timeRestart(0.0),
00104     timeStifOp(0.0),
00105     params_(params)
00106 {}
00107 
00108 JDBSYM::~JDBSYM() {
00109     delete[] resHistory;
00110     resHistory = 0;
00111     delete[] spaceSizeHistory;
00112     spaceSizeHistory = 0;
00113     delete[] iterPCGHistory;
00114     iterPCGHistory = 0;
00115     delete linSolver;
00116     linSolver = 0;
00117     delete projK;
00118     projK = 0;
00119     delete projP;
00120     projP = 0;
00121 }
00122 
00123 
00124 /****************************************************************************
00125  *                                                                          *
00126  * Main eigensolver routine                                                 *
00127  *                                                                          *
00128  ****************************************************************************/
00129 
00130 
00131 int JDBSYM::jdbsym(double tau, double jdtol,
00132                    int kmax, int jmax, int jmin, int blockSize, int blkwise,
00133                    int V0dim, double *V0,
00134                    opType _optype, double eps_tr, double toldecay,
00135                    int strategy, int max_it, int max_inner_it,
00136                    int clvl,
00137                    int &k_conv, double *Q, double *lambda, int &it) {
00138 
00139     //***************************************************************************
00140     //                                                                          *
00141     // Local variables                                                          *
00142     //                                                                          *
00143     //***************************************************************************/
00144 
00145     highMem = (highMem > currentSize()) ? highMem : currentSize();
00146 
00147     double* normWeight = 0;         // disable M-inverse norm (for now)
00148 
00149     // allocatables:
00150     // initialize with 0, so we can free even unallocated ptrs */
00151     double *V = 0, *Vtmp = 0, *U = 0, *Qb = 0, *Y = 0;
00152     double *s = 0, *Res = 0, *resnrm = 0;
00153     double *matK = 0, *H = 0, *Hlu = 0, *G = 0, *Glu = 0;
00154     double *temp1 = 0, *temp3 = 0;
00155 
00156     int *Hpiv = 0, *Gpiv = 0, *idx1 = 0, *idx2 = 0;
00157     int *convind = 0, *keepind = 0, *solvestep = 0;
00158     int *actcorrits = 0;
00159 
00160     // non-allocated ptrs */
00161     double *matdummy;
00162 
00163     // scalar vars */
00164     double alpha, it_tol;
00165 
00166     int k, j, actblockSize, found, conv, keep, Gisused;
00167     int Misempty, Pisempty, act, cnt, idummy, info;
00168 
00169     // Define Epetra_Map and the global and local sizes */
00170     Epetra_Map MyMap = K->OperatorDomainMap();
00171 
00172     int nGlobal = MyMap.NumGlobalElements();
00173     int nLocal = MyMap.NumMyElements();
00174 
00175     int myPid = MyComm.MyPID();
00176 
00177     //***************************************************************************
00178     //                                                                          *
00179     // Execution starts here...                                                 *
00180     //                                                                          *
00181     //***************************************************************************
00182 
00183     // Set "derived" parameters
00184     Misempty = (M == 0);
00185     Pisempty = (Prec == 0);
00186 
00187     // print info header
00188     if (clvl > 0) {
00189         ostringstream buf;
00190         buf << "JDBSYM parameters:" << endl;
00191         buf << "\tSolving  ";
00192         if (Misempty)
00193             buf << "K*x = lambda*x  ";
00194         else
00195             buf << "K*x = lambda*M*x  ";
00196         if (Pisempty)
00197             buf << "without ";
00198         else
00199             buf << "with ";
00200         buf << "preconditioning" << endl;
00201         buf << "\tN=";
00202         buf.width(10);
00203         buf << nGlobal << "  ITMAX=";
00204         buf.width(4);
00205         buf << max_it << endl;
00206         buf << "\tKMAX=";
00207         buf.width(3);
00208         buf << kmax << "  JMIN=";
00209         buf.width(3);
00210         buf << jmin << "  JMAX=";
00211         buf.width(3);
00212         buf << jmax << "  V0DIM=";
00213         buf.width(3);
00214         buf << V0dim << endl;
00215         buf << "\tBLKSIZE=";
00216         buf.width(5);
00217         buf << blockSize << "  BLKWISE=";
00218         if (blkwise)
00219             buf << "TRUE\n";
00220         else
00221             buf << "FALSE\n";
00222         buf << "\tTAU=";
00223         buf.precision(4);
00224         buf.setf(ios::scientific, ios::floatfield);
00225         buf << tau << "  JDTOL= " << jdtol << " STRATEGY= " << strategy;
00226         buf << "  LINSOLVER=    QMRS  OPTYPE=";
00227         switch (_optype) {
00228         case OP_UNSYM1:
00229             buf << "UNSYM1" << endl;
00230             break;
00231         case OP_UNSYM2:
00232             buf << "UNSYM2" << endl;
00233             break;
00234         case OP_SYM:
00235             buf << "SYM" << endl;
00236             break;
00237         }
00238         buf << "\tLINITMAX=";
00239         buf.width(5);
00240         buf << max_inner_it << "  EPS_TR=" << eps_tr << "  TOLDECAY=" << toldecay;
00241         rInfo(buf.str().c_str());
00242     }
00243 
00244     // Validate input parameters
00245     assert(0 < jdtol);
00246     assert(0 < kmax && kmax <= nGlobal);
00247     assert(0 < jmax && jmax <= nGlobal);
00248     assert(0 < jmin && jmin < jmax);
00249     assert(0 <= max_it);
00250     assert(blockSize <= jmin);
00251     assert(blockSize <= jmax - jmin);
00252     assert(0 < blockSize && blockSize <= kmax);
00253     assert(blkwise == 0 || blkwise == 1);
00254     assert(0 <= V0dim && V0dim < jmax);
00255     assert(_optype == OP_UNSYM1 || _optype == OP_UNSYM2 || _optype == OP_SYM);
00256     assert(0 <= max_inner_it);
00257     assert(0.0 <= eps_tr);
00258     assert(1.0 < toldecay);
00259 
00260     // Allocating memory for matrices & vectors
00261     V = new (nothrow) double[nLocal*jmax];
00262     U = new (nothrow) double[jmax*jmax];
00263     s = new (nothrow) double[jmax];
00264     Res = new (nothrow) double[nLocal*blockSize];
00265     resnrm = new (nothrow) double[blockSize];
00266     matK = new (nothrow) double[jmax*jmax];
00267     Vtmp = new (nothrow) double[jmax*jmax];
00268     assert(V && U && s && Res && resnrm && matK && Vtmp);
00269     memRequested += sizeof(double)*(nLocal*jmax + 3*jmax*jmax + jmax + nLocal*blockSize +
00270                                     blockSize)/(1024.0*1024.0);
00271 
00272     // Define Epetra_MultiVector objects
00273     Epetra_MultiVector *VV = new Epetra_MultiVector(View, MyMap, V, nLocal, jmax);
00274     Epetra_MultiVector *QQ = new Epetra_MultiVector(View, MyMap, Q, nLocal, kmax);
00275     Epetra_MultiVector *RR = new Epetra_MultiVector(View, MyMap, Res, nLocal, blockSize);
00276 
00277     // Allocate matrices H, Y and G only if necessary
00278     Epetra_MultiVector *YY = 0;
00279     if (!Pisempty) {
00280         H = new (nothrow) double[kmax*kmax];
00281         Hlu = new (nothrow) double[kmax*kmax];
00282         Hpiv = new (nothrow) int[kmax];
00283         Y = new (nothrow) double[nLocal*kmax];
00284         assert(H && Hlu && Hpiv && Y);
00285         memRequested += sizeof(double)*(nLocal*kmax + 2*kmax*kmax)/(1024.0*1024.0);
00286         memRequested += sizeof(int)*(kmax)/(1024.0*1024.0);
00287         YY = new Epetra_MultiVector(View, MyMap, Y, nLocal, kmax);
00288     }
00289 
00290     Gisused = (_optype == OP_UNSYM2 || _optype == OP_SYM) && (!Misempty);
00291     if (Gisused) {
00292         G   = new (nothrow) double[kmax*kmax];
00293         Glu = new (nothrow) double[kmax*kmax];
00294         Gpiv = new (nothrow) int[kmax];
00295         assert(G && Glu && Gpiv);
00296         memRequested += sizeof(double)*(2*kmax*kmax)/(1024.0*1024.0);
00297         memRequested += sizeof(int)*(kmax)/(1024.0*1024.0);
00298     }
00299 
00300     // Analogous for Qb
00301     Epetra_MultiVector *QbQb = 0;
00302     if (!Misempty) {
00303         Qb = new (nothrow) double[nLocal*kmax];
00304         assert(Qb != 0);
00305         memRequested += sizeof(double)*(nLocal*kmax)/(1024.0*1024.0);
00306         QbQb = new Epetra_MultiVector(View, MyMap, Qb, nLocal, kmax);
00307     }
00308 
00309     idx1 = new (nothrow) int[jmax];
00310     idx2 = new (nothrow) int[jmax];
00311     assert(idx1 && idx2);
00312     memRequested += sizeof(int)*(2*jmax)/(1024.0*1024.0);
00313 
00314     // Idices for (non-)converged approximations
00315     convind = new (nothrow) int[blockSize];
00316     keepind = new (nothrow) int[blockSize];
00317     solvestep = new (nothrow) int[blockSize];
00318     actcorrits = new (nothrow) int[blockSize];
00319     assert(convind && keepind && solvestep && actcorrits);
00320     memRequested += sizeof(int)*(4*blockSize)/(1024.0*1024.0);
00321 
00322     // temp1 is used for the orthogonalization, the linear solves
00323     int ltemp1 = 8*nLocal;
00324     temp1 = new (nothrow) double[ltemp1];
00325 
00326     // temp3 is used for computing inner products
00327     int ltemp3 = (jmax > kmax) ? jmax : kmax;
00328     temp3 = new (nothrow) double[ltemp3];
00329 
00330     assert(temp1 && temp3);
00331     memRequested += sizeof(double)*(nLocal+jmax)/(1024.0*1024.0);
00332 
00333     // Allocating work-space for projection operations
00334     Epetra_MultiVector *copyQ = 0;
00335     Epetra_MultiVector *copyQb = 0;
00336     Epetra_MultiVector *copyY = 0;
00337 
00338     if (projK)
00339         delete projK;
00340     projK = new Projectors(MyComm, K, M, Prec, _optype, 0, kmax, Hpiv, Gpiv, 0.0, Hlu, Glu,
00341                            copyQ, copyQb, copyY, temp1, temp1 + nLocal);
00342 
00343     if (projP)
00344         delete projP;
00345     projP = new Projectors(MyComm, K, M, Prec, _optype, 0, kmax, Hpiv, Gpiv, 0.0, Hlu, Glu,
00346                            copyQ, copyQb, copyY, temp1, temp1 + nLocal);
00347 
00348     // Define the linear solver
00349     if (linSolver)
00350         delete linSolver;
00351     linSolver = new LinSolvers(projK, projP, temp1 + 2*nLocal);
00352 
00353     // Get the weight for approximating the M-inverse norm
00354     Epetra_Vector *vectWeight = 0;
00355     if (normWeight) {
00356         vectWeight = new Epetra_Vector(View, MyMap, normWeight);
00357     }
00358 
00359     //*************************************************************************
00360     //                                                                        *
00361     // Generate initial search subspace V. Vectors are taken from V0 and if   *
00362     // necessary randomly generated.                                          *
00363     //                                                                        *
00364     //*************************************************************************
00365 
00366     highMem = (highMem > currentSize()) ? highMem : currentSize();
00367     timeOuterLoop -= MyWatch.WallTime();
00368 
00369     // copy V0 to V
00370     memcpy(V, V0, nLocal*V0dim*sizeof(double));
00371     j = V0dim;
00372 
00373     // if V0dim < blockSize: generate additional random vectors //
00374     if (V0dim < blockSize) {
00375         Epetra_MultiVector Vinit(View, *VV, V0dim, blockSize - V0dim);
00376         Vinit.Random();
00377         j = blockSize;
00378     }
00379 
00380     // Applying projector to vectors of V:
00381     // v = (I - Y * H^-1 * C^T) * v
00382     if (YHC) {
00383         Epetra_MultiVector Vinit(View, *VV, 0, blockSize);
00384         YHC->Apply(Vinit, Vinit);
00385     }
00386 
00387     // (M-)orthogonalize columns of V //
00388     timeOrtho -= MyWatch.WallTime();
00389     if (Misempty) {
00390         for (cnt = 0; cnt < j; cnt ++) {
00391             Epetra_Vector Vcnt(View, *VV, cnt);
00392             if (cnt > 0) {
00393                 Epetra_MultiVector Vprevious(View, *VV, 0, cnt);
00394                 ortho.ModifiedGS(&Vcnt, &Vprevious);
00395             }
00396             Vcnt.Norm2(&alpha);
00397             Vcnt.Scale(1.0/alpha);
00398         }
00399     } // if (Misempty)
00400     else {
00401         for (cnt = 0; cnt < j; cnt ++) {
00402             Epetra_Vector Vcnt(View, *VV, cnt);
00403             if (cnt == 0) {
00404                 ortho.IteratedClassicalGS(&Vcnt, &alpha, 0, temp1, M);
00405             } else {
00406                 Epetra_MultiVector Vprevious(View, *VV, 0, cnt);
00407                 ortho.IteratedClassicalGS(&Vcnt, &alpha, &Vprevious, temp1, M);
00408             }
00409             alpha = 1.0/alpha;
00410             assert(alpha > 0.0);
00411             Vcnt.Scale(alpha);
00412         }
00413     } // if (Misempty)
00414     timeOrtho += MyWatch.WallTime();
00415     numOrtho += j;
00416 
00417     // Generate interaction matrix matK = V'*K*V.
00418     // Note: Only the upper triangle is computed
00419     //       Use Res as a workspace
00420     for (cnt = 0; cnt < j; cnt += blockSize) {
00421         int numVec = (cnt + blockSize < j) ? blockSize : j - cnt;
00422         Epetra_MultiVector Vcnt(View, *VV, cnt, numVec);
00423         timeStifOp -= MyWatch.WallTime();
00424         K->Apply(Vcnt, *RR);
00425         timeStifOp += MyWatch.WallTime();
00426         stifOp += numVec;
00427         timeLocalProj -= MyWatch.WallTime();
00428         callBLAS.GEMM('T', 'N', j, numVec, nLocal, 1.0, V, nLocal, Res, nLocal, 0.0, Vtmp, j);
00429         int iC;
00430         for (iC = 0; iC < numVec; ++iC) {
00431             MyComm.SumAll(Vtmp + iC*j, matK + (cnt+iC)*jmax, j);
00432         }
00433         timeLocalProj += MyWatch.WallTime();
00434     } // for (cnt = 0; cnt < j; cnt += blockSize)
00435 
00436     // Other initializations
00437     k = 0; it = 0; actblockSize = blockSize;
00438     for(act = 0; act < blockSize; act ++)
00439         solvestep[act] = 1;
00440 
00441     //***************************************************************************
00442     //                                                                          *
00443     // Main JD-iteration loop                                                   *
00444     //                                                                          *
00445     //***************************************************************************/
00446 
00447     while (it < max_it) {
00448 
00449         highMem = (highMem > currentSize()) ? highMem : currentSize();
00450 
00451         //***************************************************************************
00452         //                                                                          *
00453         // Solving the projected eigenproblem                                       *
00454         //                                                                          *
00455         // matK*u = V'*K*V*u = s*u                                                  *
00456         // matK is symmetric, only the upper triangle is stored                     *
00457         //                                                                          *
00458         //***************************************************************************/
00459 
00460         timeLocalSolve -= MyWatch.WallTime();
00461         info = modalTool.directSolver(j, matK, jmax, 0, 0, j, U, jmax, s, 0, 10);
00462         timeLocalSolve += MyWatch.WallTime();
00463 
00464         if (info != 0)
00465             rError("Error solving the projected eigenproblem. dsyev: info = %d", info);
00466         rAssert(info == 0);
00467 
00468         // sort eigenpairs, such that |S(i)-tau| <= |S(i+1)-tau| for i=1..j-1
00469         sorteig(j, s, U, jmax, tau, temp3, idx1, idx2, strategy);
00470 
00471         //***************************************************************************
00472         //                                                                          *
00473         // Convergence/Restart Check                                                *
00474         //                                                                          *
00475         // In case of convergence, strip off a whole block or just the converged    *
00476         // ones and put 'em into Q.  Update the matrices Q, V, U, s and if          *
00477         // necessary Qb, Y, H and G.                                                *
00478         //                                                                          *
00479         // In case of a restart update the V, U and M matrices and recompute the    *
00480         // Eigenvectors                                                             *
00481         //                                                                          *
00482         //***************************************************************************
00483 
00484         found = 1;
00485         while(found) {
00486 
00487             // conv/keep = Number of converged/non-converged Approximations
00488             conv = 0; keep = 0;
00489 
00490             timeLocalUpdate -= MyWatch.WallTime();
00491             callBLAS.GEMM('N', 'N', nLocal, actblockSize, j, 1.0, V, nLocal, U, jmax,
00492                           0.0, Q+k*nLocal, nLocal);
00493             timeLocalUpdate += MyWatch.WallTime();
00494 
00495             // Compute the residual and update the matrix Qb if necessary.
00496             timeResidual -= MyWatch.WallTime();
00497             if (Misempty==1) {
00498                 // M is Identity
00499                 Epetra_MultiVector QVec(View, *QQ, k, actblockSize);
00500                 K->Apply(QVec, *RR);
00501                 for (act = 0; act < actblockSize; ++act)
00502                     callBLAS.AXPY(nLocal, -s[act], QVec.Values() + act*nLocal, Res + act*nLocal);
00503             } else {
00504                 // M is NOT Identity
00505                 Epetra_MultiVector QVec(View, *QQ, k, actblockSize);
00506                 Epetra_MultiVector QbVec(View, *QbQb, k, actblockSize);
00507                 M->Apply(QVec, QbVec);
00508                 K->Apply(QVec, *RR);
00509                 for (act = 0; act < actblockSize; ++act)
00510                     callBLAS.AXPY(nLocal, -s[act], QbVec.Values() + act*nLocal, Res + act*nLocal);
00511             }
00512             timeResidual += MyWatch.WallTime();
00513             residual += actblockSize;
00514 
00515             // Update G, if necessary
00516             timeBuildG -= MyWatch.WallTime();
00517             if (Gisused) {
00518                 // Compute G(:,k+act) = Q' * q
00519                 // Use Glu as workspace
00520                 callBLAS.GEMM('T', 'N', k+actblockSize, actblockSize, nLocal, 1.0, Q, nLocal,
00521                               Q+k*nLocal, nLocal, 0.0, Glu, k+actblockSize);
00522                 for (act = 0; act < actblockSize; ++act) {
00523                     int idummy = k + act;
00524                     MyComm.SumAll(Glu + act*(k+actblockSize), G + idummy*kmax, idummy+1);
00525                     int iC;
00526                     for (iC = 0; iC < idummy; ++iC)
00527                         G[k + act + iC*kmax] = G[iC + idummy*kmax];
00528                 }
00529             }
00530             timeBuildG += MyWatch.WallTime();
00531 
00532             // Finally update matrices H, Y  if necessary */
00533             if (!Pisempty) {
00534                 if (!Misempty) {
00535                     // If B exists, then also Qb does
00536                     matdummy = Qb;
00537                 } else {
00538                     // Without B, no Qb
00539                     matdummy = Q;
00540                 }
00541                 // Calculation of Y depends on optype (see header of Projectors.c)
00542                 timePrecOp -= MyWatch.WallTime();
00543                 if (_optype == OP_UNSYM1) {
00544                     Epetra_MultiVector QVec(View, *QQ, k, actblockSize);
00545                     Epetra_MultiVector YVec(View, *QQ, k, actblockSize);
00546                     Prec->ApplyInverse(QVec, YVec);
00547                 } else {
00548                     Epetra_MultiVector DVec(View, MyMap, matdummy + k*nLocal, nLocal, actblockSize);
00549                     Epetra_MultiVector YVec(View, MyMap, Y + k*nLocal, nLocal, actblockSize);
00550                     Prec->ApplyInverse(DVec, YVec);
00551                 }
00552                 timePrecOp += MyWatch.WallTime();
00553                 precOp += actblockSize;
00554 
00555                 timeBuildH -= MyWatch.WallTime();
00556                 // update H, starting with the column ... */
00557                 // Note: Use Hlu as workspace             */
00558                 callBLAS.GEMM('T', 'N', k+actblockSize, actblockSize, nLocal, 1.0, matdummy, nLocal,
00559                               Y+k*nLocal, nLocal, 0.0, Hlu, k+actblockSize);
00560                 for (act = 0; act < actblockSize; ++act) {
00561                     int idummy = k + act;
00562                     MyComm.SumAll(Hlu + act*(k+actblockSize), H + idummy*kmax, idummy+1);
00563                 }
00564                 if (kmax > 1) {
00565                     // ... and then the row
00566                     // Note: Use Hlu as workspace
00567                     callBLAS.GEMM('T', 'N', k+actblockSize, actblockSize, nLocal, 1.0, Y, nLocal,
00568                                   matdummy+k*nLocal, nLocal, 0.0, Hlu, k+actblockSize);
00569                     for (act = 0; act < actblockSize; ++act) {
00570                         int idummy = k + act;
00571                         MyComm.SumAll(Hlu + act*(k+actblockSize), temp3, idummy);
00572                         int iC;
00573                         for (iC = 0; iC < idummy; ++iC)
00574                             H[k + act + iC*kmax] = temp3[iC];
00575                     }
00576                 }
00577                 timeBuildH += MyWatch.WallTime();
00578             } // if (!Pisempty)
00579 
00580             // Compute norm of the residual and update arrays convind/keepind*/
00581             timeNorm -= MyWatch.WallTime();
00582             if (vectWeight)
00583                 RR->NormWeighted(*vectWeight, resnrm);
00584             else
00585                 RR->Norm2(resnrm);
00586             for (act = 0; act < actblockSize; ++act) {
00587                 //resnrm[act] = resnrm[act]/s[act];
00588                 if (resnrm[act] < jdtol) { convind[conv] = act; conv = conv + 1; } else { keepind[keep] = act; keep = keep + 1; }
00589             }
00590             timeNorm += MyWatch.WallTime();
00591 
00592             // Perform some (expensive runtime validation checks) */
00593             if (clvl > 2) {
00594                 Epetra_MultiVector copyQ(View, *QQ, 0, k + actblockSize);
00595                 Epetra_MultiVector copyQb(View, *QbQb, 0, k + actblockSize);
00596                 Epetra_MultiVector copyY(View, *YY, 0, k + actblockSize);
00597                 Epetra_MultiVector copyV(View, *VV, 0, j);
00598                 validate(&copyQ, &copyQb, &copyY, &copyV, H, kmax, _optype, temp1);
00599             }
00600 
00601             // Check whether the blkwise-mode is chosen and ALL the
00602             // approximations converged, or whether the strip-off mode is
00603             // active and SOME of the approximations converged
00604 
00605             found = ((blkwise==1 && conv==actblockSize) || (blkwise==0 && conv!=0))
00606                 && (j > actblockSize || k == kmax - actblockSize);
00607 
00608             //**************************************************************************
00609             //*                                                                        *
00610             //* Convergence Case                                                       *
00611             //*                                                                        *
00612             //* In case of convergence, strip off a whole block or just the converged  *
00613             //* ones and put 'em into Q.  Update the matrices Q, V, U, s and if        *
00614             //* necessary Qb, Y, H and G.                                              *
00615             //*                                                                        *
00616             //**************************************************************************
00617 
00618             if (found) {
00619 
00620                 // Store Eigenvalues */
00621                 for(act = 0; act < conv; act++)
00622                     lambda[k+act] = s[convind[act]];
00623 
00624                 // Re-use non approximated Ritz-Values */
00625                 for(act = 0; act < keep; act++)
00626                     s[act] = s[keepind[act]];
00627 
00628                 // Shift the others in the right position */
00629                 idummy = j - actblockSize;
00630                 for(act = 0; act < idummy; act ++)
00631                     s[act+keep] = s[act+actblockSize];
00632 
00633                 // Update V. Re-use the V-Vectors not looked at yet. */
00634                 timeLocalUpdate -= MyWatch.WallTime();
00635                 for (act = 0; act < nLocal; act += jmax) {
00636                     cnt = (act + jmax < nLocal) ? jmax : nLocal - act;
00637                     int iC;
00638                     for (iC = 0; iC < j; ++iC)
00639                         memcpy(Vtmp + iC*cnt, V + act + iC*nLocal, cnt*sizeof(double));
00640                     callBLAS.GEMM('N', 'N', cnt, idummy, j, 1.0, Vtmp, cnt, U+actblockSize*jmax, jmax,
00641                                   0.0, V + act + keep*nLocal, nLocal);
00642                 }
00643                 timeLocalUpdate += MyWatch.WallTime();
00644 
00645                 // Insert the not converged approximations as first columns in V */
00646                 for(act = 0; act < keep; act++)
00647                     memcpy(V + act*nLocal, Q + (k+keepind[act])*nLocal, nLocal*sizeof(double));
00648 
00649                 // Store Eigenvectors */
00650                 for(act = 0; act < conv; act++)
00651                     if (act != convind[act])
00652                         memcpy(Q + (k+act)*nLocal, Q + (k+convind[act])*nLocal, nLocal*sizeof(double));
00653                 // Update Qb if necessary */
00654                 if (Misempty==0) {
00655                     for(act = 0; act < conv; act++)
00656                         if (act != convind[act])
00657                             memcpy(Qb + (k+act)*nLocal, Qb + (k+convind[act])*nLocal, nLocal*sizeof(double));
00658                 }
00659 
00660                 // Update H and Y if necessary */
00661                 if (Pisempty==0) {
00662                     for (act = 0; act < conv; act++)
00663                         if (act != convind[act])
00664                             memcpy(Y + (k+act)*nLocal, Y + (k+convind[act])*nLocal, nLocal*sizeof(double));
00665 
00666                     for(act=0; act < conv; act++) {
00667                         if (act != convind[act]) {
00668                             idummy = k + act;
00669                             // Copy column ... */
00670                             memcpy(H + idummy*kmax, H + (k+convind[act])*kmax, idummy*sizeof(double));
00671                             // ... diagonal element ... */
00672                             H[idummy*(kmax+1)] = H[(k+convind[act])*(kmax+1)];
00673                             // ... and row */
00674                             int iD;
00675                             for (iD = 0; iD < idummy; ++iD)
00676                                 H[idummy + iD*kmax] = H[k+convind[act] + iD*kmax];
00677                         }
00678                     }
00679                 }
00680 
00681                 // Update SearchSpaceSize j */
00682                 j = j - conv;
00683 
00684                 // Let matK become a diagonal matrix with the Ritz values as entries ... */
00685                 for (act = 0; act < j; act++) {
00686                     memset(matK + act*jmax, 0, act*sizeof(double));
00687                     matK[act*jmax + act] = s[act];
00688                 }
00689 
00690                 // ... and U the Identity(jnew,jnew) */
00691                 memset(U, 0, j*jmax*sizeof(double));
00692                 for (act = 0; act < j; act++)
00693                     U[act*jmax + act] = 1.0;
00694 
00695                 // Avoid computation of zero eigenvalues:
00696                 //
00697                 //   If STRATEGY == 1: set tau to the largest of the now
00698                 //   converged eigenvalues.
00699                 //
00700                 //   Warning: This may not work well if BLKSIZE > 1.
00701                 if (strategy == 1)
00702                     for(act = 0; act < conv; act ++)
00703                         if (lambda[k+act] > tau)
00704                             tau = lambda[k+act];
00705 
00706                 // Update Converged-Eigenpair-counter and Pro_k */
00707                 k = k + conv;
00708 
00709                 // Update the new blocksize */
00710                 actblockSize = (blockSize > kmax-k) ? kmax-k : blockSize;
00711 
00712                 // Exit this loop when kmax eigenpairs have converged */
00713                 if (k == kmax)
00714                     break;
00715 
00716                 // Counter for the linear-solver-accuracy */
00717                 for(act = 0; act < keep; act++)
00718                     solvestep[act] = solvestep[keepind[act]];
00719 
00720                 for(act = keep; act < blockSize; act++)
00721                     solvestep[act] = 1;
00722 
00723             } // if(found)
00724 
00725             //*************************************************************************
00726             //                                                                        *
00727             // Restart                                                                *
00728             //                                                                        *
00729             // The Eigenvector-Aproximations corresponding to the first jmin          *
00730             // Petrov-Vectors are kept.                                               *
00731             //                                                                        *
00732             //*************************************************************************
00733 
00734             timeRestart -= MyWatch.WallTime();
00735             if (j+actblockSize > jmax) {
00736 
00737                 idummy = j; j = jmin;
00738 
00739                 for (act = 0; act < nLocal; act += jmax) {
00740                     cnt = (act + jmax < nLocal) ? jmax : nLocal - act;
00741                     int iC;
00742                     for (iC = 0; iC < idummy; ++iC)
00743                         memcpy(Vtmp + iC*cnt, V + act + iC*nLocal, cnt*sizeof(double));
00744                     callBLAS.GEMM('N', 'N', cnt, j, idummy, 1.0, Vtmp, cnt, U, jmax, 0.0, V + act, nLocal);
00745                 }
00746 
00747                 for (act = 0; act < j; act++) {
00748                     memset(matK + act*jmax, 0, act*sizeof(double));
00749                     matK[act*jmax + act] = s[act];
00750                     memset(U + act*jmax, 0, j*sizeof(double));
00751                     U[act*jmax + act] = 1.0;
00752                 }
00753 
00754             }
00755             timeRestart += MyWatch.WallTime();
00756 
00757         } // while(found)
00758 
00759         // Exit main iteration loop when kmax eigenpairs have converged */
00760         if (k == kmax)
00761             break;
00762 
00763         //***************************************************************************
00764         //                                                                          *
00765         // Solving the correction equations                                         *
00766         //                                                                          *
00767         // Depending on the input-arguments we choose an appropriate lin.solver.    *
00768         //                                                                          *
00769         //***************************************************************************
00770 
00771         // calculate Hlu (LU-factorization), if necessary
00772         if (!Pisempty) {
00773             idummy = k + actblockSize;
00774             int iD;
00775             for (iD = 0; iD < idummy; ++iD) {
00776                 memcpy(Hlu + iD*kmax, H + iD*kmax, idummy*sizeof(double));
00777             }
00778             callLAPACK.GETRF(idummy, idummy, Hlu, kmax, Hpiv, &info);
00779             if (info != 0)
00780                 rError("Factorization of H failed: info=%d", info);
00781             rAssert(info == 0);
00782         }
00783 
00784         // Factorize G if necessary
00785         if (Gisused) {
00786             idummy = k + actblockSize;
00787             int iD;
00788             for (iD = 0; iD < idummy; ++iD) {
00789                 memcpy(Glu + iD*kmax, G + iD*kmax, idummy*sizeof(double));
00790             }
00791             callLAPACK.GETRF(idummy, idummy, Glu, kmax, Gpiv, &info);
00792             if (info != 0)
00793                 rError("Factorization of G failed: info=%d", info);
00794             rAssert(info == 0);
00795         }
00796 
00797         // Solve actblockSize times the correction equation ...
00798         for (act = 0; act < actblockSize; act ++) {
00799 
00800             // Setting start-value for vector v as zeros(n,1).
00801             // Guarantees (M-)orthogonality
00802             Epetra_Vector vVec(View, *VV, j);
00803             vVec.PutScalar(0.0);
00804 
00805             // Adaptive accuracy and shift for the lin.solver. In case the
00806             // residual is big, we don't need a too precise solution for the
00807             // correction equation, since even in exact arithmetic the
00808             // solution wouldn't be too usefull for the Eigenproblem.
00809             if (resnrm[act] < eps_tr) {
00810                 projK->resetPro_theta(s[act]);
00811                 projP->resetPro_theta(s[act]);
00812             } else {
00813                 projK->resetPro_theta(tau);
00814                 projP->resetPro_theta(tau);
00815             }
00816 
00817             it_tol = pow(toldecay, (double)(-solvestep[act]));
00818             solvestep[act] = solvestep[act] + 1;
00819 
00820             // Tells the Projectors-Lib what size Q, Qb, Y etc. has */
00821             int prok = k + actblockSize;
00822             projK->resetPro_k(prok);
00823             projP->resetPro_k(prok);
00824 
00825             if (copyQ)
00826                 delete copyQ;
00827             copyQ = new Epetra_MultiVector(View, *QQ, 0, prok);
00828             projK->resetPro_Q(copyQ);
00829             projP->resetPro_Q(copyQ);
00830 
00831             if (M) {
00832                 if (copyQb)
00833                     delete copyQb;
00834                 copyQb = new Epetra_MultiVector(View, *QbQb, 0, prok);
00835                 projK->resetPro_Qb(copyQb);
00836                 projP->resetPro_Qb(copyQb);
00837             }
00838 
00839             if (Prec) {
00840                 if (copyY)
00841                     delete copyY;
00842                 copyY = new Epetra_MultiVector(View, *YY, 0, prok);
00843                 projK->resetPro_Y(copyY);
00844                 projP->resetPro_Y(copyY);
00845             }
00846 
00847             // Apply preconditioner to the right hand side of the correction
00848             // equation and project if necessary
00849             timeCorrectionRHS -= MyWatch.WallTime();
00850             Epetra_Vector rVec(View, MyMap, Res + act*nLocal);
00851             projK->MakeRHS(rVec);
00852             timeCorrectionRHS += MyWatch.WallTime();
00853 
00854             // Solve the correction equation
00855             timeCorrectionSolve -= MyWatch.WallTime();
00856             info = linSolver->solve(rVec, vVec, it_tol, max_inner_it);
00857             timeCorrectionSolve += MyWatch.WallTime();
00858             numCorrectionSolve += 1;
00859 
00860             // Actualizing profiling data
00861             if (info >= 0) {
00862                 sumIterPCG += info;
00863                 maxIterPCG = (info > maxIterPCG) ? info : maxIterPCG;
00864                 actcorrits[act] = info;
00865             }
00866             if (info == -1) {
00867                 sumIterPCG += max_inner_it;
00868                 maxIterPCG = max_inner_it;
00869                 actcorrits[act] = -1;
00870                 info = 0;
00871             }
00872             if (info < 0)
00873                 rError("Error detected in correction equation solver (info=%d).", info);
00874             rAssert(info >= 0);
00875             info = 0;
00876 
00877             // Applying projector :
00878             // v = (I - Y * H^-1 * C^T) * v
00879 
00880             if (YHC) {
00881                 YHC->Apply(vVec, vVec);
00882             }
00883 
00884             // (M-)orthonormalize v to Q, cause the implicit
00885             // orthogonalization in the solvers may be too inaccurate. Then
00886             // apply "IteratedCGS" to prevent numerical breakdown
00887             timeOrtho -= MyWatch.WallTime();
00888             if (!Misempty) {
00889                 ortho.ModifiedGSB(&vVec, copyQ, copyQb);
00890                 Epetra_MultiVector copyV(View, *VV, 0, j);
00891                 ortho.IteratedClassicalGS(&vVec, &alpha, &copyV, temp1, M);
00892             } else {
00893                 ortho.ModifiedGS(&vVec, copyQ);
00894                 Epetra_MultiVector copyV(View, *VV, 0, j);
00895                 ortho.IteratedClassicalGS(&vVec, &alpha, &copyV, temp1);
00896             }
00897             alpha = 1.0 / alpha;
00898             vVec.Scale(alpha);
00899             timeOrtho += MyWatch.WallTime();
00900             numOrtho += 1;
00901 
00902             // update interaction matrix matK
00903             Epetra_Vector t1(View, MyMap, temp1);
00904             timeStifOp -= MyWatch.WallTime();
00905             K->Apply(vVec, t1);
00906             timeStifOp += MyWatch.WallTime();
00907             stifOp += 1;
00908             idummy = j+1;
00909             timeLocalProj -= MyWatch.WallTime();
00910             callBLAS.GEMV('T', nLocal, idummy, 1.0, V, nLocal, temp1, 0.0, temp3);
00911             MyComm.SumAll(temp3, matK + j*jmax, idummy);
00912             timeLocalProj += MyWatch.WallTime();
00913 
00914             // Increasing SearchSpaceSize j
00915             j ++;
00916 
00917         }   // for (act = 0; act < actblockSize; act ++)
00918 
00919         // Increase iteration-counter for outer loop
00920         it += 1;
00921 
00922         // Print information line
00923         print_status(clvl*(myPid == 0), it, k, j - blockSize, kmax, blockSize, actblockSize,
00924                      s, resnrm, actcorrits);
00925 
00926     } // while (it < maxIterEigenSolve)
00927     timeOuterLoop += MyWatch.WallTime();
00928 
00929     highMem = (highMem > currentSize()) ? highMem : currentSize();
00930 
00931     k_conv = k;
00932 
00933     // Free the memory
00934     if (copyQ)
00935         delete copyQ;
00936     if (copyQb)
00937         delete copyQb;
00938     if (copyY)
00939         delete copyY;
00940     if (VV)
00941         delete VV;
00942     if (QQ)
00943         delete QQ;
00944     if (RR)
00945         delete RR;
00946     if (YY)
00947         delete YY;
00948     if (QbQb)
00949         delete QbQb;
00950     if (V)
00951         delete[] V;
00952     if (Vtmp)
00953         delete[] Vtmp;
00954     if (U)
00955         delete[] U;
00956     if (Qb)
00957         delete[] Qb;
00958     if (Y)
00959         delete[] Y;
00960     if (s)
00961         delete[] s;
00962     if (Res)
00963         delete[] Res;
00964     if (resnrm)
00965         delete[] resnrm;
00966     if (matK)
00967         delete[] matK;
00968     if (H)
00969         delete[] H;
00970     if (Hlu)
00971         delete[] Hlu;
00972     if (Hpiv)
00973         delete[] Hpiv;
00974     if (G)
00975         delete[] G;
00976     if (Glu)
00977         delete[] Glu;
00978     if (Gpiv)
00979         delete[] Gpiv;
00980     if (temp1)
00981         delete[] temp1;
00982     if (temp3)
00983         delete[] temp3;
00984     if (idx1)
00985         delete[] idx1;
00986     if (idx2)
00987         delete[] idx2;
00988     if (convind)
00989         delete[] convind;
00990     if (keepind)
00991         delete[] keepind;
00992     if (solvestep)
00993         delete[] solvestep;
00994     if (actcorrits)
00995         delete[] actcorrits;
00996     if (vectWeight)
00997         delete vectWeight;
00998 
00999     return info;
01000 
01001 } /* JDBSYM::jdbsym(.....) */
01002 
01003 
01004 /****************************************************************************
01005  *                                                                          *
01006  * Supporting functions                                                     *
01007  *                                                                          *
01008  ****************************************************************************/
01009 
01010 // VALIDATE - perform runtime check to validate JD-routine //
01011 void JDBSYM::validate(Epetra_MultiVector *Q, Epetra_MultiVector *MQ, Epetra_MultiVector *Y,
01012                       Epetra_MultiVector *V, double *H, int ldh, opType _optype, double *work1) {
01013 
01014     int myPid = MyComm.MyPID();
01015 
01016     double VAL_TOL = 0.0;
01017     callLAPACK.LAMCH('E', VAL_TOL);
01018     VAL_TOL *= 100.0;
01019 
01020     double s = 0.0;
01021     double maxS;
01022 
01023     int r, c;
01024 
01025     int k = Q->NumVectors();
01026     int j = V->NumVectors();
01027 
01028     Epetra_Vector w1(View, Q->Map(), work1);
01029 
01030     // Check MQ = M*Q
01031     if (M) {
01032         maxS = 0.0;
01033         for (c = 0; c < k; c ++) {
01034             M->Apply(*((*Q)(c)), w1);
01035             w1.Update(1.0, *((*MQ)(c)), -1.0);
01036             w1.NormInf(&s);
01037             maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01038         }
01039         if ((myPid == 0) && (maxS > VAL_TOL))
01040             rWarning("Norm of MQ - M*Q = %.4e", maxS);
01041     }
01042 
01043     // Check Q'*Q = I or Q'*MQ = I resp. //
01044     if (M == 0) {
01045         maxS = 0.0;
01046         for (r = 0; r < k; r ++) {
01047             for (c = 0; c < k; c ++) {
01048                 (*Q)(r)->Dot(*((*Q)(c)), &s);
01049                 if (r == c)
01050                     s -= 1.0;
01051                 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01052             }
01053         }
01054         if ((myPid == 0) && (maxS > VAL_TOL))
01055             rWarning("Norm of Q'*Q - I = %.4e", maxS);
01056     } else {
01057         maxS = 0.0;
01058         for (r = 0; r < k; r ++) {
01059             for (c = 0; c < k; c ++) {
01060                 (*MQ)(r)->Dot(*((*Q)(c)), &s);
01061                 if (r == c)
01062                     s -= 1.0;
01063                 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01064             }
01065         }
01066         if ((myPid == 0) && (maxS > VAL_TOL))
01067             rWarning("Norm of MQ'*Q - I = %.4e", maxS);
01068     }
01069 
01070     // Check V'*V = I or V'*M*V = I resp. //
01071     if (M == 0) {
01072         maxS = 0.0;
01073         for (r = 0; r < j; r ++) {
01074             for (c = 0; c < j; c ++) {
01075                 (*V)(r)->Dot(*((*V)(c)), &s);
01076                 if (r == c)
01077                     s -= 1.0;
01078                 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01079             }
01080         }
01081         if ((myPid == 0) && (maxS > VAL_TOL))
01082             rWarning("Norm of V'*V - I = %.4e", maxS);
01083     } else {
01084         for (r = 0; r < j; r ++) {
01085             for (c = 0; c < j; c ++) {
01086                 M->Apply(*((*V)(r)), w1);
01087                 w1.Dot(*((*V)(c)), &s);
01088                 if (r == c)
01089                     s -= 1.0;
01090                 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01091             }
01092         }
01093         if ((myPid == 0) && (maxS > VAL_TOL))
01094             rWarning("Norm of V'*M*V - I = %.4e", maxS);
01095     }
01096 
01097     // Check Y = inv(K)*Q or Y = inv(K)*MQ resp. //
01098     if (Prec) {
01099         maxS = 0.0;
01100         if ((M == 0) || (_optype == OP_UNSYM1)) {
01101             for (c = 0; c < k; c ++) {
01102                 Prec->ApplyInverse(*((*Q)(c)), w1);
01103                 w1.Update(1.0, *((*Y)(c)), -1.0);
01104                 w1.NormInf(&s);
01105                 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01106             }
01107             if ((myPid == 0) && (maxS > VAL_TOL))
01108                 rWarning("Norm of Y - Prec^{-1}*Q = %.4e", maxS);
01109         } else {
01110             for (c = 0; c < k; c ++) {
01111                 Prec->ApplyInverse(*((*MQ)(c)), w1);
01112                 w1.Update(1.0, *((*Y)(c)), -1.0);
01113                 w1.NormInf(&s);
01114                 maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01115             }
01116             if ((myPid == 0) && (maxS > VAL_TOL))
01117                 rWarning("Norm of Y - Prec^{-1}*M*Q = %.4e", maxS);
01118         }
01119     }
01120 
01121     // Check H = Q'*Y or H = MQ'*Y resp. //
01122     if (Prec) {
01123         maxS = 0.0;
01124         if (M == 0) {
01125             for (r = 0; r < k; r ++) {
01126                 for (c = 0; c < k; c ++) {
01127                     (*Q)(r)->Dot(*((*Y)(c)), &s);
01128                     s -= H[c*ldh + r];
01129                     maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01130                 }
01131             }
01132             if ((myPid == 0) && (maxS > VAL_TOL))
01133                 rWarning("Norm of Q'*Y - H = %.4e", maxS);
01134         } else {
01135             for (r = 0; r < k; r ++) {
01136                 for (c = 0; c < k; c ++) {
01137                     (*MQ)(r)->Dot(*((*Y)(c)), &s);
01138                     s -= H[c*ldh + r];
01139                     maxS = (fabs(s) > maxS) ? fabs(s) : maxS;
01140                 }
01141             }
01142             if ((myPid == 0) && (maxS > VAL_TOL))
01143                 rWarning("Norm of MQ'*Y - H = %.4e", maxS);
01144         }
01145     }
01146 
01147 }
01148 
01149 
01150 // PRINT_STATUS - print status line (called for each outer iteration)
01151 void JDBSYM::print_status(int clvl, int it, int k, int j, int kmax,
01152                           int _blkSize, int actblkSize,
01153                           double *s, double *resnrm, int *actcorrits) {
01154     const int max_vals = 5;
01155     ostringstream buf;
01156 
01157     if (clvl > 0) {
01158         if (_blkSize == 1) {
01159             if (it == 1) {
01160                 buf << "  IT   K   J       RES CGIT  RITZVALS(1:" << max_vals << ")";
01161                 rInfo(buf.str().c_str());
01162                 buf.str("");
01163                 int idummy = 27 + ( 13 > max_vals*10 ? 13 : max_vals*10);
01164                 for (int i = 0; i < idummy; i ++)
01165                     buf << '-';
01166                 rInfo(buf.str().c_str());
01167                 buf.str("");
01168             }
01169             buf << right << scientific
01170                 << setw(4) << it << ' '
01171                 << setw(3) << k << ' '
01172                 << setw(3) << j << ' '
01173                 << setw(9) << setprecision(2) << resnrm[0] << ' '
01174                 << setw(4) << actcorrits[0];
01175             for (int i = 0; i < (j < max_vals ? j : max_vals); i ++)
01176                 buf << ' ' << setw(9) << setprecision(2) << s[i];
01177         } else {
01178             buf << " Iteration " << it;
01179             buf << " - Number of converged eigenvectors " << k << endl;
01180 
01181             if (clvl > 2) {
01182                 for (int i = 0; i < _blkSize; ++i) {
01183                     resHistory[historyCount*_blkSize+i] = (i < actblkSize) ? resnrm[i] : 0.0;
01184                     spaceSizeHistory[historyCount] = j;
01185                     iterPCGHistory[historyCount*_blkSize+i] = (i < actblkSize) ? actcorrits[i] : 0;
01186                 }
01187                 historyCount += 1;
01188             }
01189 
01190             if (clvl > 1) {
01191                 buf.precision(2);
01192                 buf.setf(ios::scientific, ios::floatfield);
01193                 for (int i = 0; i < actblkSize; ++i) {
01194                     buf << " Iteration " << it;
01195                     buf << " - Norm of Residual " << i;
01196                     buf << " = " << resnrm[i] << endl;
01197                 }
01198                 for (int i = 0; i < actblkSize; ++i) {
01199                     buf << " Iteration " << it << " - Inner iterations for vector " << i;
01200                     buf << " = " << actcorrits[i] << endl;
01201                 }
01202                 for (int i = 0; i < actblkSize; ++i) {
01203                     buf << " Iteration " << it << " - Ritz eigenvalue " << i;
01204                     buf << " = " << s[i];
01205                 }
01206             }
01207         }
01208     }
01209     rInfo(buf.str().c_str());
01210 }
01211 
01212 
01213 /*
01214  * SORTEIG
01215  *
01216  * Default behaviour (strategy == 0):
01217  *
01218  *   Sort eigenpairs (S(i),U(:,i)), such that
01219  *
01220  *       |S(i) - tau| <= |S(i+1) -tau| for i=1..j-1.
01221  *
01222  *     j  : dimension of S
01223  *     ldu: leading dimension of U
01224  *   dtemp: double array of length j
01225  *     idx: int array of length j
01226  *
01227  * Alternate behaviour (strategy == 1):
01228  *
01229  *   Same as above but put all S(i) < tau to the end. This is used to
01230  *   avoid computation of zero eigenvalues.
01231  */
01232 
01233 
01234 void JDBSYM::sorteig(int j, double S[], double U[], int ldu, double tau,
01235                      double dtemp[], int idx1[], int idx2[], int strategy) {
01236 
01237     int i;
01238 
01239     /* setup vector to be sorted and index vector */
01240     switch (strategy) {
01241     case 0:
01242         for (i = 0; i < j; i ++)
01243             dtemp[i] = fabs(S[i] - tau);
01244         break;
01245     case 1:
01246         for (i = 0; i < j; i ++)
01247             if (S[i] < tau)
01248                 dtemp[i] = Epetra_MaxDouble;
01249             else
01250                 dtemp[i] = fabs(S[i] - tau);
01251         break;
01252     default:
01253         assert(0);
01254     }
01255     for (i = 0; i < j; i ++)
01256         idx1[i] = i;
01257 
01258     /* sort dtemp in ascending order carrying itemp along */
01259     quicksort(j, dtemp, idx1);
01260 
01261     /* compute 'inverse' index vector */
01262     for (i = 0; i < j; i ++)
01263         idx2[idx1[i]] = i;
01264 
01265     /* sort eigenvalues */
01266     memcpy(dtemp, S, j * sizeof(double));
01267     for (i = 0; i < j; i ++)
01268         S[i] = dtemp[idx1[i]];
01269 
01270     /* sort eigenvectors (in place) */
01271     for (i = 0; i < j; i ++) {
01272         if (i != idx1[i]) {
01273             memcpy(dtemp, U+i*ldu, j*sizeof(double));
01274             memcpy(U+i*ldu, U+idx1[i]*ldu, j*sizeof(double));
01275             memcpy(U+idx1[i]*ldu, dtemp, j*sizeof(double));
01276             idx1[idx2[i]] = idx1[i];
01277             idx2[idx1[i]] = idx2[i];
01278         }
01279     }
01280 
01281 }
01282 
01283 
01284 /*
01285  * QUICKSORT 
01286  *
01287  * Sorts a double array using a non-recursive quicksort algorithm in
01288  * ascending order carrying along an int array.
01289  *  
01290  */
01291 
01292 
01293 void JDBSYM::quicksort(int n, double arr[], int idx[]) {
01294 
01295     double v, td;
01296     int i, j, l, r, ti, tos, stack[32];
01297 
01298     l = 0; r = n-1; tos = -1;
01299     for (;;) {
01300         while (r > l) {
01301             v = arr[r]; i = l; j = r-1;
01302             for (;;) {
01303                 while (arr[i] < v) i ++;
01304                 /* j > l prevents underflow */
01305                 while (arr[j] >= v && j > l) j --;
01306                 if (i >= j) break;
01307                 td = arr[i]; arr[i] = arr[j]; arr[j] = td;
01308                 ti = idx[i]; idx[i] = idx[j]; idx[j] = ti;
01309             }
01310             td = arr[i]; arr[i] = arr[r]; arr[r] = td;
01311             ti = idx[i]; idx[i] = idx[r]; idx[r] = ti;
01312             if (i-l > r-i) { stack[++tos] = l; stack[++tos] = i-1; l = i+1; } else { stack[++tos] = i+1; stack[++tos] = r; r = i-1; }
01313             assert(tos < 32);
01314         }
01315         if (tos == -1) break;
01316         r = stack[tos--]; l = stack[tos--];
01317     }
01318 
01319 }
01320 
01321 void JDBSYM::set_defaults(Teuchos::ParameterList& params, int kmax) {
01322     // The tol parameter is set directly before constructing the solver object
01323     // params.set("tol", 1e-8);
01324     params.set("jmin", kmax < 5? 5 : kmax+1);
01325     params.set("jmax", kmax+10);
01326     params.set("tau", 0.0);
01327     params.set("eps_tr", 1e-3);
01328     params.set("tol_decay", 1.5);
01329     params.set("max_it", 200);
01330     params.set("max_inner_it", 50);
01331     params.set("block_size", 1);
01332     params.set("block_wise", 0);
01333     params.set("strategy", 0);
01334     params.set("op_type", "sym");
01335     params.set("clvl", 1);
01336 }
01337 
01338 int JDBSYM::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
01339 
01340     int info = 0;
01341 
01342     info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
01343     if (info < 0)
01344         return info;
01345 
01346     if ((params_.get<int>("clvl") > 2) && (MyComm.MyPID() == 0)) {
01347         resHistory = new double[params_.get<int>("max_it")*params_.get<int>("block_size")];
01348         spaceSizeHistory = new int[params_.get<int>("max_it")];
01349         iterPCGHistory = new int[params_.get<int>("max_it")*params_.get<int>("block_size")];
01350     }
01351 
01352     JDBSYM::opType optype;
01353     if (params_.get<string>("op_type") == "sym")
01354         optype = OP_SYM;
01355     else if (params_.get<string>("op_type") == "unsym1")
01356         optype = OP_UNSYM1;
01357     else if (params_.get<string>("op_type") == "unsym2")
01358         optype = OP_UNSYM2;
01359     else
01360         assert(false);
01361 
01362     int knownEV = 0;
01363 
01364     info = jdbsym(params_.get<double>("tau"),
01365                   params_.get<double>("tol"),
01366                   numEigen,
01367                   params_.get<int>("jmax"),
01368                   params_.get<int>("jmin"),
01369                   params_.get<int>("block_size"),
01370                   params_.get<int>("block_wise"),
01371                   0, // V0
01372                   0, // V0dim
01373                   optype,
01374                   params_.get<double>("eps_tr"),
01375                   params_.get<double>("tol_decay"),
01376                   params_.get<int>("strategy"),
01377                   params_.get<int>("max_it"),
01378                   params_.get<int>("max_inner_it"),
01379                   params_.get<int>("clvl"),
01380                   knownEV,
01381                   Q.Values(),
01382                   lambda,
01383                   outerIter);
01384     //  info = jdbsym(tau, tolEigenSolve, numEigen, 2*numEigen, numEigen, 0, 0, 0, OP_SYM,
01385     //                sqrt(tolEigenSolve), 1.5, 0, verbose, knownEV, Q.Values(), lambda, outerIter);
01386 
01387     // Sort the eigenpairs
01388     timePostProce -= MyWatch.WallTime();
01389     if ((info == 0) && (knownEV > 0)) {
01390         mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
01391     }
01392     timePostProce += MyWatch.WallTime();
01393 
01394     return (info == 0) ? knownEV : info;
01395 }
01396 
01397 
01398 void JDBSYM::initializeCounters() {
01399 
01400     historyCount = 0;
01401     if (resHistory) {
01402         delete[] resHistory;
01403         resHistory = 0;
01404     }
01405 
01406     maxSpaceSize = 0;
01407     sumSpaceSize = 0;
01408     if (spaceSizeHistory) {
01409         delete[] spaceSizeHistory;
01410         spaceSizeHistory = 0;
01411     }
01412 
01413     maxIterPCG = 0;
01414     sumIterPCG = 0;
01415     if (iterPCGHistory) {
01416         delete[] iterPCGHistory;
01417         iterPCGHistory = 0;
01418     }
01419 
01420     memRequested = 0.0;
01421     highMem = 0.0;
01422 
01423     numCorrectionSolve = 0;
01424     numOrtho = 0;
01425     outerIter = 0;
01426     precOp = 0;
01427     residual = 0;
01428     stifOp = 0;
01429 
01430     timeBuildG = 0.0;
01431     timeBuildH = 0.0;
01432     timeCorrectionRHS = 0.0;
01433     timeCorrectionSolve = 0.0;
01434     timeLocalProj = 0.0;
01435     timeLocalSolve = 0.0;
01436     timeLocalUpdate = 0.0;
01437     timeNorm = 0.0;
01438     timeOrtho = 0.0;
01439     timeOuterLoop = 0.0;
01440     timePostProce = 0.0;
01441     timePrecOp = 0.0;
01442     timeResidual = 0.0;
01443     timeRestart = 0.0;
01444     timeStifOp = 0.0;
01445 }
01446 
01447 
01448 void JDBSYM::algorithmInfo() const {
01449     rDebug("Algorithm: Jacobi-Davidson algorithm (Geus' implementation)");
01450     rDebug("Block Size: %d", params_.get<int>("block_size"));
01451 }
01452 
01453 void JDBSYM::historyInfo() const {
01454 
01455     if (resHistory) {
01456         ostringstream buf;
01457         int j;
01458         buf << " Block Size: " << params_.get<int>("block_size") << endl;
01459         buf << endl;
01460         buf << " Residuals    Search Space Dim.   Inner Iter. \n";
01461         buf << endl;
01462         buf.precision(4);
01463         buf.setf(ios::scientific, ios::floatfield);
01464         for (j = 0; j < historyCount; ++j) {
01465             int ii;
01466             for (ii = 0; ii < params_.get<int>("block_size"); ++ii) {
01467                 buf << resHistory[params_.get<int>("block_size")*j + ii] << "      ";
01468                 buf.width(4);
01469                 buf << spaceSizeHistory[j] << "          ";
01470                 buf.width(4);
01471                 buf << iterPCGHistory[j] << endl;
01472             }
01473         }
01474         rDebug(buf.str().c_str());
01475     }
01476 
01477 }
01478 
01479 void JDBSYM::memoryInfo() const {
01480     double maxHighMem = 0.0;
01481     double tmp = highMem;
01482     MyComm.MaxAll(&tmp, &maxHighMem, 1);
01483 
01484     double maxMemRequested = 0.0;
01485     tmp = memRequested;
01486     MyComm.MaxAll(&tmp, &maxMemRequested, 1);
01487 
01488     rDebug("Memory requested per processor by the eigensolver (EST) = %.2f MB", maxMemRequested);
01489     rDebug("High water mark in eigensolver                    (EST) = %.2f MB", maxHighMem);
01490 }
01491 
01492 
01493 void JDBSYM::operationInfo() const {
01494     ostringstream buf;
01495     buf << "JDBSYM operations:\n";
01496     buf << "\tTotal number of stiffness matrix operations      = ";
01497     buf.width(9);
01498     buf << stifOp << endl;
01499     buf << "\tTotal number of preconditioner applications      = ";
01500     buf.width(9);
01501     buf << precOp << endl;
01502     buf << "\tTotal number of linear solves                    = ";
01503     buf.width(9);
01504     buf << numCorrectionSolve << endl;
01505     buf.setf(ios::fixed, ios::floatfield);
01506     buf.precision(2);
01507     buf.width(9);
01508     buf << "\t\tAverage number of iter. per solve   : ";
01509     buf.width(9);
01510     buf << ((double) sumIterPCG)*params_.get<int>("block_size")/((double) numCorrectionSolve) << endl;
01511     buf << "\t\tMaximum number of iter. per solve   : ";
01512     buf.width(9);
01513     buf << maxIterPCG << endl;
01514     buf << "\tTotal number of computed eigen-residuals         = ";
01515     buf.width(9);
01516     buf << residual << endl;
01517     buf << "\tTotal number of orthogonalization steps          = ";
01518     buf.width(9);
01519     buf << numOrtho << endl;
01520     buf << "\tTotal number of outer iterations                 = ";
01521     buf.width(9);
01522     buf << outerIter << endl;
01523     buf << "\tMaximum size of search space                     = ";
01524     buf.width(9);
01525     buf << maxSpaceSize << endl;
01526     buf << "\tAverage size of search space                     = ";
01527     buf.setf(ios::fixed, ios::floatfield);
01528     buf.precision(2);
01529     buf.width(9);
01530     buf << ((double) sumSpaceSize)/((double) outerIter);
01531     rInfo(buf.str().c_str());
01532 }
01533 
01534 
01535 void JDBSYM::timeInfo() const {
01536     ostringstream buf;
01537     buf << "JDBSYM timings:\n";
01538     buf.setf(ios::fixed, ios::floatfield);
01539     buf.precision(2);
01540     buf << "\tTotal time for outer loop                         = ";
01541     buf.width(9);
01542     buf << timeOuterLoop << " s" << endl;
01543     buf << "\t\tTime for stiffness matrix operations      = ";
01544     buf.width(9);
01545     buf << timeStifOp << " s     ";
01546     buf.width(5);
01547     buf << 100*timeStifOp/timeOuterLoop << " %\n";
01548     buf << "\t\tTime for local projection                 = ";
01549     buf.width(9);
01550     buf << timeLocalProj << " s     ";
01551     buf.width(5);
01552     buf << 100*timeLocalProj/timeOuterLoop << " %\n";
01553     buf << "\t\tTime for local update                     = ";
01554     buf.width(9);
01555     buf << timeLocalUpdate << " s     ";
01556     buf.width(5);
01557     buf << 100*timeLocalUpdate/timeOuterLoop << " %\n";
01558     buf << "\t\tTime for preconditioner applications      = ";
01559     buf.width(9);
01560     buf << timePrecOp << " s     ";
01561     buf.width(5);
01562     buf << 100*timePrecOp/timeOuterLoop << " %\n";
01563     buf << "\t\tTime for making the correction RHS        = ";
01564     buf.width(9);
01565     buf << timeCorrectionRHS << " s     ";
01566     buf.width(5);
01567     buf << 100*timeCorrectionRHS/timeOuterLoop << " %\n";
01568     buf << "\t\tTime for solving the correction equation  = ";
01569     buf.width(9);
01570     buf << timeCorrectionSolve << " s     ";
01571     buf.width(5);
01572     buf << 100*timeCorrectionSolve/timeOuterLoop << " %\n";
01573     buf << "\t\t\tPreconditioner Mult.     : ";
01574     buf.width(9);
01575     buf << projP->getTimeApplyInverse() << " s" << endl;
01576     buf << "\t\t\tLinear Operator Mult.    : ";
01577     buf.width(9);
01578     buf << projK->getTimeApply() << " s" << endl;
01579     buf << "\t\tTime for orthogonalizations               = ";
01580     buf.width(9);
01581     buf << timeOrtho << " s     ";
01582     buf.width(5);
01583     buf << 100*timeOrtho/timeOuterLoop << " %\n";
01584     buf << "\t\tTime for restart                          = ";
01585     buf.width(9);
01586     buf << timeRestart << " s     ";
01587     buf.width(5);
01588     buf << 100*timeRestart/timeOuterLoop << " %\n";
01589     buf << "\t\tTime for for building Q^T*Q               = ";
01590     buf.width(9);
01591     buf << timeBuildG << " s     ";
01592     buf.width(5);
01593     buf << 100*timeBuildG/timeOuterLoop << " %\n";
01594     buf << "\t\tTime for for building Q^T*M*P^{-1}*M*Q    = ";
01595     buf.width(9);
01596     buf << timeBuildH << " s     ";
01597     buf.width(5);
01598     buf << 100*timeBuildH/timeOuterLoop << " %\n";
01599     buf << "\t\tTime for residual computations            = ";
01600     buf.width(9);
01601     buf << timeResidual << " s     ";
01602     buf.width(5);
01603     buf << 100*timeResidual/timeOuterLoop << " %\n";
01604     buf << "\tTotal time for post-processing                    = ";
01605     buf.width(9);
01606     buf << timePostProce << " s";
01607     rInfo(buf.str().c_str());
01608 }
01609 
01610