4 #include <hmlp_internal.hpp> 11 inline void operator()
24 v8df_t c07_0, c07_1, c07_2, c07_3, c07_4, c07_5, c07_6, c07_7;
25 v8df_t c15_0, c15_1, c15_2, c15_3, c15_4, c15_5, c15_6, c15_7;
26 v8df_t c23_0, c23_1, c23_2, c23_3, c23_4, c23_5, c23_6, c23_7;
35 __asm__
volatile(
"prefetcht0 0(%0) \n\t" : :
"r"( c ) );
36 __asm__
volatile(
"prefetcht0 192(%0) \n\t" : :
"r"( c ) );
37 __asm__
volatile(
"prefetcht0 384(%0) \n\t" : :
"r"( c ) );
38 __asm__
volatile(
"prefetcht0 576(%0) \n\t" : :
"r"( c ) );
39 __asm__
volatile(
"prefetcht0 768(%0) \n\t" : :
"r"( c ) );
40 __asm__
volatile(
"prefetcht0 960(%0) \n\t" : :
"r"( c ) );
41 __asm__
volatile(
"prefetcht0 1152(%0) \n\t" : :
"r"( c ) );
42 __asm__
volatile(
"prefetcht0 1344(%0) \n\t" : :
"r"( c ) );
44 #include "rank_k_int_d24x8.segment" 47 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[0], c07_1.d[0], c07_2.d[0], c07_3.d[0], c07_4.d[0], c07_5.d[0], c07_6.d[0], c07_7.d[0] );
48 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[1], c07_1.d[1], c07_2.d[1], c07_3.d[1], c07_4.d[1], c07_5.d[1], c07_6.d[1], c07_7.d[1] );
49 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[2], c07_1.d[2], c07_2.d[2], c07_3.d[2], c07_4.d[2], c07_5.d[2], c07_6.d[2], c07_7.d[2] );
50 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[3], c07_1.d[3], c07_2.d[3], c07_3.d[3], c07_4.d[3], c07_5.d[3], c07_6.d[3], c07_7.d[3] );
51 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[4], c07_1.d[4], c07_2.d[4], c07_3.d[4], c07_4.d[4], c07_5.d[4], c07_6.d[4], c07_7.d[4] );
52 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[5], c07_1.d[5], c07_2.d[5], c07_3.d[5], c07_4.d[5], c07_5.d[5], c07_6.d[5], c07_7.d[5] );
53 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[6], c07_1.d[6], c07_2.d[6], c07_3.d[6], c07_4.d[6], c07_5.d[6], c07_6.d[6], c07_7.d[6] );
54 printf(
"%lf, %lf, %lf, %lf\n", c07_0.d[7], c07_1.d[7], c07_2.d[7], c07_3.d[7], c07_4.d[7], c07_5.d[7], c07_6.d[7], c07_7.d[7] );
57 _mm512_store_pd( c + 0 , c07_0.v );
58 _mm512_store_pd( c + 8 , c15_0.v );
59 _mm512_store_pd( c + 16 , c23_0.v );
63 _mm512_store_pd( c + 0 , c07_1.v );
64 _mm512_store_pd( c + 8 , c15_1.v );
65 _mm512_store_pd( c + 16 , c23_1.v );
69 _mm512_store_pd( c + 0 , c07_2.v );
70 _mm512_store_pd( c + 8 , c15_2.v );
71 _mm512_store_pd( c + 16 , c23_2.v );
75 _mm512_store_pd( c + 0 , c07_3.v );
76 _mm512_store_pd( c + 8 , c15_3.v );
77 _mm512_store_pd( c + 16 , c23_3.v );
81 _mm512_store_pd( c + 0 , c07_4.v );
82 _mm512_store_pd( c + 8 , c15_4.v );
83 _mm512_store_pd( c + 16 , c23_4.v );
87 _mm512_store_pd( c + 0 , c07_5.v );
88 _mm512_store_pd( c + 8 , c15_5.v );
89 _mm512_store_pd( c + 16 , c23_5.v );
93 _mm512_store_pd( c + 0 , c07_6.v );
94 _mm512_store_pd( c + 8 , c15_6.v );
95 _mm512_store_pd( c + 16 , c23_6.v );
99 _mm512_store_pd( c + 0 , c07_7.v );
100 _mm512_store_pd( c + 8 , c15_7.v );
101 _mm512_store_pd( c + 16 , c23_7.v );
Definition: hmlp_internal.hpp:38
Definition: rank_k_int_d24x8.hpp:9