00001 #include <vector>
00002 #include <iomanip>
00003
00004 #include <sys/times.h>
00005 #include <unistd.h>
00006
00007 #include <blas.h>
00008 #include <matrix.h>
00009 #include <vector.h>
00010
00011 using namespace std;
00012
00013 template <typename TYPE>
00014 void fillMatrix(colarray::Matrix<TYPE>& A) {
00015 for (int i=0; i < A._m; i ++)
00016 for (int j=0; j < A._n; j ++)
00017 if (i == j)
00018 A(i,j) = 5*(A._n + A._m);
00019 else
00020 A(i,j) = i - j;
00021 }
00022
00023 template <typename TYPE>
00024 void bench() {
00025 colarray::Matrix<TYPE> *A;
00026 colarray::Vector<int> *piv;
00027 int nList[] = {10, 20, 50, 100, 200, 500, 1000, 1500, 2000, 2500, 3000};
00028 int nofTest = sizeof(nList) / sizeof(nList[0]);
00029 int n, count, info;
00030 clock_t t1, t2;
00031 double tWall, tTot, MFlops;
00032 struct tms timeBuf;
00033
00034 long clk_tck = sysconf(_SC_CLK_TCK);
00035
00036 for (int i = 0; i < nofTest; i ++) {
00037 n = nList[i];
00038 A = new colarray::Matrix< TYPE >(n, n);
00039 piv = new colarray::ColumnVector< blas::int_t >(n);
00040
00041 count = 0; tTot = 0.0;
00042 do {
00043 fillMatrix(*A);
00044
00045 t1 = times(&timeBuf);
00046 blas::getrf(*A, *piv, info);
00047 assert(info == 0);
00048 t2 = times(&timeBuf);
00049
00050 tWall = static_cast< double >(t2-t1) / clk_tck;
00051 tTot += tWall;
00052 count ++;
00053
00054 } while (tTot < 2.0);
00055
00056 tTot /= count;
00057
00058 delete A;
00059 delete piv;
00060
00061 MFlops = 2.0/3.0 * n*n*n / tTot / 1e6;
00062 cout << "n=" << setw(6) << n <<
00063 ", Mflops=" << fixed << setw(8) << setprecision(1) << MFlops << endl;
00064 }
00065
00066 }
00067
00068 int main() {
00069
00070 cout << "DGETRF performance:" << endl;
00071 bench< double >();
00072 cout << endl << "SGETRF performance:" << endl;
00073 bench< float >();
00074
00075 }