00001 #include <vector> 00002 #include <iomanip> 00003 00004 #include <sys/times.h> 00005 #include <unistd.h> 00006 00007 #include <blas.h> 00008 #include <matrix.h> 00009 #include <vector.h> 00010 00011 using namespace std; 00012 00013 template <typename TYPE> 00014 void bench() { 00015 colarray::Matrix<TYPE> *A, *B, *C; 00016 int nList[] = {10, 20, 50, 100, 200, 500, 1000, 1500, 2000, 2500, 3000}; 00017 int nofTest = sizeof(nList) / sizeof(nList[0]); 00018 int n, count; 00019 clock_t t1, t2; 00020 double tWall, MFlops; 00021 struct tms timeBuf; 00022 00023 long clk_tck = sysconf(_SC_CLK_TCK); 00024 00025 for (int i = 0; i < nofTest; i ++) { 00026 n = nList[i]; 00027 A = new colarray::Matrix<TYPE>(n, n); 00028 B = new colarray::Matrix<TYPE>(n, n); 00029 C = new colarray::Matrix<TYPE>(n, n); 00030 00031 *A = 2.5345; 00032 *B = 4.5675; 00033 *C = 0; 00034 00035 t1 = times(&timeBuf); 00036 count = 0; 00037 do { 00038 00039 blas::gemm('N', 'N', *A, *B, *C, 1.0, 0.0); 00040 t2 = times(&timeBuf); 00041 tWall = static_cast< double >(t2-t1) / clk_tck; 00042 count ++; 00043 00044 } while (tWall < 2.0); 00045 00046 tWall /= count; 00047 00048 delete A; 00049 delete B; 00050 delete C; 00051 00052 MFlops = 2*static_cast< double >(n)*n*n / tWall / 1e6; 00053 cout << "n=" << setw(6) << n << 00054 ", Mflops=" << fixed << setw(8) << setprecision(1) << MFlops << endl; 00055 } 00056 00057 } 00058 00059 int main() { 00060 00061 cout << "DGEMM performance:" << endl; 00062 bench< double >(); 00063 cout << endl << "SGEMM performance:" << endl; 00064 bench< float >(); 00065 00066 }