docs/html/gsks__d12x16_8hpp_source.html

 #include <stdio.h>
 #include <math.h>

 #include <hmlp.h>
 #include <hmlp_internal.hpp>

 BLIS_GEMM_KERNEL(bli_sgemm_opt_12x32_l2,float);
 BLIS_GEMM_KERNEL(bli_dgemm_opt_12x16_l2,double);


 struct gsks_gaussian_int_s12x32
 {
   inline GSKS_OPERATOR(float) const
   {
     printf( "not implemented yet\n" );
     exit( 1 );
   };
 };


 struct gsks_gaussian_int_d12x16
 {
   const size_t mr         = 16;
   const size_t nr         = 12;
   const size_t pack_mr    = 16;
   const size_t pack_nr    = 12;
   const size_t align_size = 64;
   const bool   row_major  = false;


   //inline void operator()
   //(
   //  kernel_s<double> *ker,
   //  int k,
   //  int rhs,
   //  double *u,
   //  double *a, double *aa,
   //  double *b, double *bb,
   //  double *w,
   //  double *c, int ldc,
   //  aux_s<double, double, double, double> *aux
   //) const

   inline GSKS_OPERATOR(double) const
   {
     double ctmp[ mr * nr ];
     double alpha = 1.0;
     double beta = aux->pc ? 1.0 : 0.0;
     if ( aux->pc )
     {
       for ( size_t j = 0; j < aux->jb; j ++ )
         for ( size_t i = 0; i < aux->ib; i ++ )
           //ctmp[ j * mr + i ] = c[ ( aux->j + j ) * ldc + ( aux->i + i ) ];
           ctmp[ j * mr + i ] = c[ j * ldc + i ];
     }

     bli_dgemm_opt_12x16_l2
     (
       k,
       &alpha,
       b,
       a,
       &beta,
       ctmp, mr, 1,
       aux
     );

     for ( size_t j = 0; j < aux->jb; j ++ )
     {
       for ( size_t i = 0; i < aux->ib; i ++ )
       {
         ctmp[ j * mr + i ] *= -2.0;
         ctmp[ j * mr + i ] += aa[ i ] + bb[ j ];
         ctmp[ j * mr + i ] = std::max( ctmp[ j * mr + i ], (double)0 );
         u[ i ] += std::exp( ker->scal * ctmp[ j * mr + i ] ) * w[ j ];
       }
     }

   };
 };
gsks_gaussian_int_d12x16::GSKS_OPERATOR
GSKS_OPERATOR(double) const
Definition: gsks_d12x16.hpp:46

gsks_gaussian_int_d12x16
Definition: gsks_d12x16.hpp:23

gsks_gaussian_int_s12x32
Definition: gsks_d12x16.hpp:13