00001 #include <vector>
00002 #include <iomanip>
00003
00004 #include <sys/times.h>
00005 #include <unistd.h>
00006
00007 #include <blas.h>
00008 #include <matrix.h>
00009 #include <vector.h>
00010
00011 using namespace std;
00012
00013 template <typename TYPE>
00014 void bench() {
00015 colarray::Matrix<TYPE> *A, *B, *C;
00016 int nList[] = {10, 20, 50, 100, 200, 500, 1000, 1500, 2000, 2500, 3000};
00017 int nofTest = sizeof(nList) / sizeof(nList[0]);
00018 int n, count;
00019 clock_t t1, t2;
00020 double tWall, MFlops;
00021 struct tms timeBuf;
00022
00023 long clk_tck = sysconf(_SC_CLK_TCK);
00024
00025 for (int i = 0; i < nofTest; i ++) {
00026 n = nList[i];
00027 A = new colarray::Matrix<TYPE>(n, n);
00028 B = new colarray::Matrix<TYPE>(n, n);
00029 C = new colarray::Matrix<TYPE>(n, n);
00030
00031 *A = 2.5345;
00032 *B = 4.5675;
00033 *C = 0;
00034
00035 t1 = times(&timeBuf);
00036 count = 0;
00037 do {
00038
00039 blas::gemm('N', 'N', *A, *B, *C, 1.0, 0.0);
00040 t2 = times(&timeBuf);
00041 tWall = static_cast< double >(t2-t1) / clk_tck;
00042 count ++;
00043
00044 } while (tWall < 2.0);
00045
00046 tWall /= count;
00047
00048 delete A;
00049 delete B;
00050 delete C;
00051
00052 MFlops = 2*static_cast< double >(n)*n*n / tWall / 1e6;
00053 cout << "n=" << setw(6) << n <<
00054 ", Mflops=" << fixed << setw(8) << setprecision(1) << MFlops << endl;
00055 }
00056
00057 }
00058
00059 int main() {
00060
00061 cout << "DGEMM performance:" << endl;
00062 bench< double >();
00063 cout << endl << "SGEMM performance:" << endl;
00064 bench< float >();
00065
00066 }