1 template<
int MR,
int NR,
typename T>
16 T c_reg[ MR * NR ] = { 0.0 };
19 for (
int p = 0; p < k; p ++ )
21 for (
int j = 0; j < NR; j ++ )
23 for (
int i = 0; i < MR; i ++ )
24 c_reg[ j * MR + i ] += a[ p * MR + i ] * b[ p * NR + j ];
30 for (
int j = 0; j < NR; j ++ )
32 for (
int i = 0; i < MR; i ++ )
33 c_reg[ j * MR + i ] += c[ j * ldc + i ];
37 for (
int j = 0; j < NR; j ++ )
40 for (
int i = 0; i < MR; i ++ )
42 c_reg[ j * MR + i ] *= -2.0;
43 c_reg[ j * MR + i ] += a2[ i ] + b2[ j ];
Definition: hmlp_internal.hpp:38