HMLP: High-performance Machine Learning Primitives
common.h
1 //#define PREFETCH_C_L2
2 
3 #define A_L1_PREFETCH_DIST 4 //should be multiple of 2
4 
5 /*The pointer of B is moved ahead by one iteration of k
6 before the loop starts.Therefore, prefetching 3 k iterations
7 ahead*/
8 #define B_L1_PREFETCH_DIST 4
9 
10 #define TAIL_NITER 8
11 
12 //#define PREFETCH_A_BEFORE
13 //#define PREFETCH_B_BEFORE
14 //#define PREFETCH_A_AFTER
15 //#define PREFETCH_B_AFTER