00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "ModalTools.h"
00030
00031
00032 ModalTools::ModalTools(const Epetra_Comm &_Comm) :
00033 callFortran(),
00034 callBLAS(),
00035 callLAPACK(),
00036 MyComm(_Comm),
00037 MyWatch(_Comm),
00038 eps(0.0),
00039 timeQtMult(0.0),
00040 timeQMult(0.0),
00041 timeProj_MassMult(0.0),
00042 timeNorm_MassMult(0.0),
00043 timeProj(0.0),
00044 timeNorm(0.0),
00045 numProj_MassMult(0),
00046 numNorm_MassMult(0) {
00047
00048 callLAPACK.LAMCH('E', eps);
00049
00050 }
00051
00052
00053 int ModalTools::makeSimpleLumpedMass(const Epetra_Operator *M, double *normWeight) const {
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067 if (M == 0)
00068 return -1;
00069
00070 int row = (M->OperatorDomainMap()).NumMyElements();
00071
00072 double *zVal = new double[row];
00073
00074 Epetra_Vector z(View, M->OperatorDomainMap(), zVal);
00075 Epetra_Vector Mz(View, M->OperatorDomainMap(), normWeight);
00076
00077 z.PutScalar(1.0);
00078 M->Apply(z, Mz);
00079
00080 delete[] zVal;
00081
00082 int i;
00083 double rho = 0.0;
00084 for (i = 0; i < row; ++i)
00085 rho += normWeight[i];
00086
00087 normWeight[0] = rho;
00088 MyComm.SumAll(normWeight, &rho, 1);
00089
00090 int info = 0;
00091 if (rho <= 0.0) {
00092 info = -2;
00093 if (MyComm.MyPID() == 0)
00094 cerr << "\n !!! The mass matrix gives a negative mass: " << rho << " !!! \n\n";
00095 return info;
00096 }
00097 rho = rho/(M->OperatorDomainMap()).NumGlobalElements();
00098
00099
00100
00101
00102
00103
00104 rho = sqrt(rho/(M->OperatorDomainMap()).NumGlobalElements());
00105
00106 for (i = 0; i < row; ++i)
00107 normWeight[i] = rho;
00108
00109 return info;
00110
00111 }
00112
00113
00114 int ModalTools::massOrthonormalize(Epetra_MultiVector &X, Epetra_MultiVector &MX,
00115 const Epetra_Operator *M, const Epetra_MultiVector &Q,
00116 int howMany, int type, double *WS, double kappa) {
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 int info = 0;
00160
00161
00162 timeProj -= MyWatch.WallTime();
00163 if (type != 2) {
00164
00165 int xc = howMany;
00166 int xr = X.MyLength();
00167 int qc = Q.NumVectors();
00168
00169 Epetra_MultiVector XX(View, X, X.NumVectors()-howMany, howMany);
00170
00171 Epetra_MultiVector MXX(View, X.Map(), (M) ? MX.Values() + xr*(MX.NumVectors()-howMany)
00172 : X.Values() + xr*(X.NumVectors()-howMany),
00173 xr, howMany);
00174
00175
00176
00177
00178 double *oldDot = new double[xc];
00179 MXX.Dot(XX, oldDot);
00180
00182 int zz;
00183 for (zz = 0; zz < xc; ++zz) {
00184 oldDot[zz] = sqrt(oldDot[zz]);
00185 double tmp = 1.0/oldDot[zz];
00186 XX(zz)->Scale(tmp);
00187 if (M)
00188 MXX(zz)->Scale(tmp);
00189 }
00191
00192
00193 double *qTmx = new double[2*qc*xc];
00194
00195
00196 timeQtMult -= MyWatch.WallTime();
00197 callBLAS.GEMM('T', 'N', qc, xc, xr, 1.0, Q.Values(), xr, MXX.Values(), xr,
00198 0.0, qTmx + qc*xc, qc);
00199 MyComm.SumAll(qTmx + qc*xc, qTmx, qc*xc);
00200 timeQtMult += MyWatch.WallTime();
00201
00202
00203 timeQMult -= MyWatch.WallTime();
00204 callBLAS.GEMM('N', 'N', xr, xc, qc, -1.0, Q.Values(), xr, qTmx, qc,
00205 1.0, XX.Values(), xr);
00206 timeQMult += MyWatch.WallTime();
00207
00208
00209 if (M) {
00210 if ((qc >= xc) || (WS == 0)) {
00211 timeProj_MassMult -= MyWatch.WallTime();
00212 M->Apply(XX, MXX);
00213 timeProj_MassMult += MyWatch.WallTime();
00214 numProj_MassMult += xc;
00215 }
00216 else {
00217 Epetra_MultiVector MQ(View, Q.Map(), WS, Q.MyLength(), qc);
00218 timeProj_MassMult -= MyWatch.WallTime();
00219 M->Apply(Q, MQ);
00220 timeProj_MassMult += MyWatch.WallTime();
00221 numProj_MassMult += qc;
00222 callBLAS.GEMM('N', 'N', xr, xc, qc, -1.0, MQ.Values(), xr, qTmx, qc,
00223 1.0, MXX.Values(), xr);
00224 }
00225 }
00226
00227 double newDot = 0.0;
00228 int j;
00229 for (j = 0; j < xc; ++j) {
00230
00231 MXX(j)->Dot(*(XX(j)), &newDot);
00232
00233
00234 if (kappa*newDot < 1.0) {
00235
00236
00237 timeQtMult -= MyWatch.WallTime();
00238 callBLAS.GEMM('T', 'N', qc, xc, xr, 1.0, Q.Values(), xr, MXX.Values(), xr,
00239 0.0, qTmx + qc*xc, qc);
00240 MyComm.SumAll(qTmx + qc*xc, qTmx, qc*xc);
00241 timeQtMult += MyWatch.WallTime();
00242
00243 timeQMult -= MyWatch.WallTime();
00244 callBLAS.GEMM('N', 'N', xr, xc, qc, -1.0, Q.Values(), xr, qTmx, qc,
00245 1.0, XX.Values(), xr);
00246 timeQMult += MyWatch.WallTime();
00247
00248
00249 if (M) {
00250 if ((qc >= xc) || (WS == 0)) {
00251 timeProj_MassMult -= MyWatch.WallTime();
00252 M->Apply(XX, MXX);
00253 timeProj_MassMult += MyWatch.WallTime();
00254 numProj_MassMult += xc;
00255 }
00256 else {
00257 Epetra_MultiVector MQ(View, Q.Map(), WS, Q.MyLength(), qc);
00258 timeProj_MassMult -= MyWatch.WallTime();
00259 M->Apply(Q, MQ);
00260 timeProj_MassMult += MyWatch.WallTime();
00261 numProj_MassMult += qc;
00262 callBLAS.GEMM('N', 'N', xr, xc, qc, -1.0, MQ.Values(), xr, qTmx, qc,
00263 1.0, MXX.Values(), xr);
00264 }
00265 }
00266
00267 break;
00268 }
00269 }
00270
00272 for (zz = 0; zz < xc; ++zz) {
00273 XX(zz)->Scale(oldDot[zz]);
00274 if (M)
00275 MXX(zz)->Scale(oldDot[zz]);
00276 }
00278
00279 delete[] qTmx;
00280 delete[] oldDot;
00281
00282 }
00283 timeProj += MyWatch.WallTime();
00284
00285
00286 timeNorm -= MyWatch.WallTime();
00287 if (type != 1) {
00288
00289 int j;
00290 int xc = X.NumVectors();
00291 int xr = X.MyLength();
00292 int globalSize = X.GlobalLength();
00293 int shift = (type == 2) ? 0 : Q.NumVectors();
00294 int mxc = (M) ? MX.NumVectors() : X.NumVectors();
00295
00296 bool allocated = false;
00297 if (WS == 0) {
00298 allocated = true;
00299 WS = new double[xr];
00300 }
00301
00302 double *oldMXj = WS;
00303 double *MXX = (M) ? MX.Values() : X.Values();
00304 double *product = new double[2*xc];
00305
00306 double dTmp;
00307
00308 for (j = 0; j < howMany; ++j) {
00309
00310 int numX = xc - howMany + j;
00311 int numMX = mxc - howMany + j;
00312
00313
00314 if (numX + shift >= globalSize) {
00315 Epetra_Vector XXj(View, X, numX);
00316 XXj.PutScalar(0.0);
00317 if (M) {
00318 Epetra_Vector MXXj(View, MX, numMX);
00319 MXXj.PutScalar(0.0);
00320 }
00321 info = -1;
00322 }
00323
00324 int numTrials;
00325 bool rankDef = true;
00326 for (numTrials = 0; numTrials < 10; ++numTrials) {
00327
00328 double *Xj = X.Values() + xr*numX;
00329 double *MXj = MXX + xr*numMX;
00330
00331 double oldDot = 0.0;
00332 dTmp = callBLAS.DOT(xr, Xj, MXj);
00333 MyComm.SumAll(&dTmp, &oldDot, 1);
00334
00335 memcpy(oldMXj, MXj, xr*sizeof(double));
00336
00337 if (numX > 0) {
00338
00339
00340
00341 callBLAS.GEMV('T', xr, numX, 1.0, X.Values(), xr, MXj, 0.0, product + xc);
00342 MyComm.SumAll(product + xc, product, numX);
00343 callBLAS.GEMV('N', xr, numX, -1.0, X.Values(), xr, product, 1.0, Xj);
00344 if (M) {
00345 if (xc == mxc) {
00346 callBLAS.GEMV('N', xr, numX, -1.0, MXX, xr, product, 1.0, MXj);
00347 }
00348 else {
00349 Epetra_Vector XXj(View, X, numX);
00350 Epetra_Vector MXXj(View, MX, numMX);
00351 timeNorm_MassMult -= MyWatch.WallTime();
00352 M->Apply(XXj, MXXj);
00353 timeNorm_MassMult += MyWatch.WallTime();
00354 numNorm_MassMult += 1;
00355 }
00356 }
00357
00358 double dot = 0.0;
00359 dTmp = callBLAS.DOT(xr, Xj, MXj);
00360 MyComm.SumAll(&dTmp, &dot, 1);
00361
00362 if (kappa*dot < oldDot) {
00363 callBLAS.GEMV('T', xr, numX, 1.0, X.Values(), xr, MXj, 0.0, product + xc);
00364 MyComm.SumAll(product + xc, product, numX);
00365 callBLAS.GEMV('N', xr, numX, -1.0, X.Values(), xr, product, 1.0, Xj);
00366 if (M) {
00367 if (xc == mxc) {
00368 callBLAS.GEMV('N', xr, numX, -1.0, MXX, xr, product, 1.0, MXj);
00369 }
00370 else {
00371 Epetra_Vector XXj(View, X, numX);
00372 Epetra_Vector MXXj(View, MX, numMX);
00373 timeNorm_MassMult -= MyWatch.WallTime();
00374 M->Apply(XXj, MXXj);
00375 timeNorm_MassMult += MyWatch.WallTime();
00376 numNorm_MassMult += 1;
00377 }
00378 }
00379 }
00380
00381 }
00382
00383 double norm = 0.0;
00384 dTmp = callBLAS.DOT(xr, Xj, oldMXj);
00385 MyComm.SumAll(&dTmp, &norm, 1);
00386
00387 if (norm > oldDot*eps*eps) {
00388 norm = 1.0/sqrt(norm);
00389 callBLAS.SCAL(xr, norm, Xj);
00390 if (M)
00391 callBLAS.SCAL(xr, norm, MXj);
00392 rankDef = false;
00393 break;
00394 }
00395 else {
00396 info += 1;
00397 Epetra_Vector XXj(View, X, numX);
00398 XXj.Random();
00399 Epetra_Vector MXXj(View, MX, numMX);
00400 if (M) {
00401 timeNorm_MassMult -= MyWatch.WallTime();
00402 M->Apply(XXj, MXXj);
00403 timeNorm_MassMult += MyWatch.WallTime();
00404 numNorm_MassMult += 1;
00405 }
00406 if (type == 0)
00407 massOrthonormalize(XXj, MXXj, M, Q, 1, 1, WS, kappa);
00408 }
00409
00410 }
00411
00412 if (rankDef == true) {
00413 Epetra_Vector XXj(View, X, numX);
00414 XXj.PutScalar(0.0);
00415 if (M) {
00416 Epetra_Vector MXXj(View, MX, numMX);
00417 MXXj.PutScalar(0.0);
00418 }
00419 info = -1;
00420 break;
00421 }
00422
00423 }
00424
00425 delete[] product;
00426
00427 if (allocated == true) {
00428 delete[] WS;
00429 }
00430
00431 }
00432 timeNorm += MyWatch.WallTime();
00433
00434 return info;
00435
00436 }
00437
00438
00439 void ModalTools::localProjection(int numRow, int numCol, int dotLength,
00440 double *U, int ldU, double *MatV, int ldV,
00441 double *UtMatV, int ldUtMatV, double *work) const {
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466 int j;
00467 int offSet = numRow*numCol;
00468
00469 callBLAS.GEMM('T', 'N', numRow, numCol, dotLength, 1.0, U, ldU, MatV, ldV,
00470 0.0, work + offSet, numRow);
00471 MyComm.SumAll(work + offSet, work, offSet);
00472
00473 double *source = work;
00474 double *result = UtMatV;
00475 int howMany = numRow*sizeof(double);
00476
00477 for (j = 0; j < numCol; ++j) {
00478 memcpy(result, source, howMany);
00479
00480 source = source + numRow;
00481 result = result + ldUtMatV;
00482 }
00483
00484 }
00485
00486
00487 int ModalTools::directSolver(int size, double *KK, int ldK, double *MM, int ldM,
00488 int &nev, double *EV, int ldEV, double *theta, int verbose, int esType) const {
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537 double *kp = 0;
00538 double *mp = 0;
00539 double *tt = 0;
00540
00541 double *U = 0;
00542
00543 int i, j;
00544 int rank = 0;
00545 int info = 0;
00546 int tmpI;
00547
00548 int NB = 5 + callFortran.LAENV(1, "dsytrd", "u", size, -1, -1, -1, 6, 1);
00549 int lwork = size*NB;
00550 double *work = new double[lwork];
00551
00552 double tol = sqrt(eps);
00553
00554 switch (esType) {
00555
00556 default:
00557 case 0:
00558
00559
00560
00561
00562 kp = new double[size*size];
00563 mp = new double[size*size];
00564 tt = new double[size];
00565 U = new double[size*size];
00566
00567 if (verbose > 4)
00568 cout << endl;
00569
00570
00571 tmpI = sizeof(double);
00572 for (rank = size; rank > 0; --rank) {
00573 memset(kp, 0, size*size*tmpI);
00574 for (i = 0; i < rank; ++i) {
00575 memcpy(kp + i*size, KK + i*ldK, (i+1)*tmpI);
00576 memcpy(mp + i*size, MM + i*ldM, (i+1)*tmpI);
00577 }
00578
00579 info = 0;
00580 callFortran.SYGV(1, 'V', 'U', rank, kp, size, mp, size, tt, work, lwork, &info);
00581
00582 if (info < 0) {
00583 if (verbose > 0) {
00584 cerr << endl;
00585 cerr << " In DSYGV, argument " << -info << "has an illegal value.\n";
00586 cerr << endl;
00587 }
00588 return -20;
00589 }
00590 if (info > 0) {
00591 if (info > rank)
00592 rank = info - rank;
00593 continue;
00594 }
00595
00596 for (i = 0; i < size; ++i) {
00597 memcpy(mp + i*size, MM + i*ldM, (i+1)*tmpI);
00598 for (j = 0; j < i; ++j)
00599 mp[i + j*size] = mp[j + i*size];
00600 }
00601 callBLAS.GEMM('N', 'N', size, rank, size, 1.0, mp, size, kp, size, 0.0, U, size);
00602 callBLAS.GEMM('T', 'N', rank, rank, size, 1.0, kp, size, U, size, 0.0, mp, rank);
00603 double maxNorm = 0.0;
00604 double maxOrth = 0.0;
00605 for (i = 0; i < rank; ++i) {
00606 for (j = i; j < rank; ++j) {
00607 if (j == i) {
00608 maxNorm = (fabs(mp[j+i*rank]-1.0) > maxNorm) ? fabs(mp[j+i*rank]-1.0) : maxNorm;
00609 }
00610 else {
00611 maxOrth = (fabs(mp[j+i*rank]) > maxOrth) ? fabs(mp[j+i*rank]) : maxOrth;
00612 }
00613 }
00614 }
00615 if (verbose > 4) {
00616 cout << " >> Local eigensolve >> Size: " << rank;
00617 cout.precision(2);
00618 cout.setf(ios::scientific, ios::floatfield);
00619 cout << " Normalization error: " << maxNorm;
00620 cout << " Orthogonality error: " << maxOrth;
00621 cout << endl;
00622 }
00623 if ((maxNorm <= tol) && (maxOrth <= tol))
00624 break;
00625 }
00626
00627 if (verbose > 4)
00628 cout << endl;
00629
00630
00631 memset(EV, 0, nev*ldEV*tmpI);
00632 nev = (rank < nev) ? rank : nev;
00633 memcpy(theta, tt, nev*tmpI);
00634 tmpI = rank*tmpI;
00635 for (i = 0; i < nev; ++i) {
00636 memcpy(EV + i*ldEV, kp + i*size, tmpI);
00637 }
00638
00639 break;
00640
00641 case 1:
00642
00643
00644
00645
00646 kp = new double[size*size];
00647 mp = new double[size*size];
00648 tt = new double[size];
00649
00650
00651 tmpI = sizeof(double);
00652 for (i = 0; i < size; ++i) {
00653 memcpy(kp + i*size, KK + i*ldK, (i+1)*tmpI);
00654 memcpy(mp + i*size, MM + i*ldM, (i+1)*tmpI);
00655 }
00656
00657
00658 info = 0;
00659 callFortran.SYGV(1, 'V', 'U', size, kp, size, mp, size, tt, work, lwork, &info);
00660
00661
00662 if (info < 0) {
00663 if (verbose > 0) {
00664 cerr << endl;
00665 cerr << " In DSYGV, argument " << -info << "has an illegal value.\n";
00666 cerr << endl;
00667 }
00668 return -20;
00669 }
00670 if (info > 0) {
00671 if (info > size)
00672 nev = 0;
00673 else {
00674 if (verbose > 0) {
00675 cerr << endl;
00676 cerr << " In DSYGV, DPOTRF or DSYEV returned an error code (" << info << ").\n";
00677 cerr << endl;
00678 }
00679 return -20;
00680 }
00681 }
00682
00683
00684 memcpy(theta, tt, nev*tmpI);
00685 tmpI = size*tmpI;
00686 for (i = 0; i < nev; ++i) {
00687 memcpy(EV + i*ldEV, kp + i*size, tmpI);
00688 }
00689
00690 break;
00691
00692 case 10:
00693
00694
00695
00696
00697 kp = new double[size*size];
00698 tt = new double[size];
00699
00700
00701 tmpI = sizeof(double);
00702 for (i=0; i<size; ++i) {
00703 memcpy(kp + i*size, KK + i*ldK, (i+1)*tmpI);
00704 }
00705
00706
00707 callFortran.SYEV('V', 'U', size, kp, size, tt, work, lwork, &info);
00708
00709
00710 if (info != 0) {
00711 if (verbose > 0) {
00712 cerr << endl;
00713 if (info < 0)
00714 cerr << " In DSYEV, argument " << -info << " has an illegal value\n";
00715 else
00716 cerr << " In DSYEV, the algorithm failed to converge (" << info << ").\n";
00717 cerr << endl;
00718 }
00719 info = -20;
00720 break;
00721 }
00722
00723
00724 memcpy(theta, tt, nev*tmpI);
00725 tmpI = size*tmpI;
00726 for (i = 0; i < nev; ++i) {
00727 memcpy(EV + i*ldEV, kp + i*size, tmpI);
00728 }
00729
00730 break;
00731
00732 }
00733
00734
00735
00736 if (kp)
00737 delete[] kp;
00738 if (mp)
00739 delete[] mp;
00740 if (tt)
00741 delete[] tt;
00742 if (work)
00743 delete[] work;
00744 if (U)
00745 delete[] U;
00746
00747 return info;
00748
00749 }
00750
00751