HMLP: High-performance Machine Learning Primitives
gsks_d12x16.hpp
1 #include <stdio.h>
2 #include <math.h>
3 
5 #include <hmlp.h>
6 #include <hmlp_internal.hpp>
7 
9 BLIS_GEMM_KERNEL(bli_sgemm_opt_12x32_l2,float);
10 BLIS_GEMM_KERNEL(bli_dgemm_opt_12x16_l2,double);
11 
12 
14 {
15  inline GSKS_OPERATOR(float) const
16  {
17  printf( "not implemented yet\n" );
18  exit( 1 );
19  };
20 };
21 
22 
24 {
25  const size_t mr = 16;
26  const size_t nr = 12;
27  const size_t pack_mr = 16;
28  const size_t pack_nr = 12;
29  const size_t align_size = 64;
30  const bool row_major = false;
31 
32 
33  //inline void operator()
34  //(
35  // kernel_s<double> *ker,
36  // int k,
37  // int rhs,
38  // double *u,
39  // double *a, double *aa,
40  // double *b, double *bb,
41  // double *w,
42  // double *c, int ldc,
43  // aux_s<double, double, double, double> *aux
44  //) const
45 
46  inline GSKS_OPERATOR(double) const
47  {
48  double ctmp[ mr * nr ];
49  double alpha = 1.0;
51  double beta = aux->pc ? 1.0 : 0.0;
53  if ( aux->pc )
54  {
55  for ( size_t j = 0; j < aux->jb; j ++ )
56  for ( size_t i = 0; i < aux->ib; i ++ )
57  //ctmp[ j * mr + i ] = c[ ( aux->j + j ) * ldc + ( aux->i + i ) ];
58  ctmp[ j * mr + i ] = c[ j * ldc + i ];
59  }
60 
62  bli_dgemm_opt_12x16_l2
63  (
64  k,
65  &alpha,
66  b,
67  a,
68  &beta,
69  ctmp, mr, 1,
70  aux
71  );
72 
73  for ( size_t j = 0; j < aux->jb; j ++ )
74  {
75  for ( size_t i = 0; i < aux->ib; i ++ )
76  {
77  ctmp[ j * mr + i ] *= -2.0;
78  ctmp[ j * mr + i ] += aa[ i ] + bb[ j ];
79  ctmp[ j * mr + i ] = std::max( ctmp[ j * mr + i ], (double)0 );
83  u[ i ] += std::exp( ker->scal * ctmp[ j * mr + i ] ) * w[ j ];
84  }
85  }
86 
87  };
89 };
GSKS_OPERATOR(double) const
Definition: gsks_d12x16.hpp:46
Definition: gsks_d12x16.hpp:23
Definition: gsks_d12x16.hpp:13