HMLP: High-performance Machine Learning Primitives
Main Page
Namespaces
Classes
Files
File List
bli_avx512_macros.h
1
#ifndef BLIS_AVX512_MACROS_H
2
#define BLIS_AVX512_MACROS_H
3
4
//
5
// Assembly macros to make AVX-512 with AT&T syntax somewhat less painful
6
//
7
8
#define COMMENT_BEGIN "#"
9
#define COMMENT_END
10
11
#define STRINGIFY(...) #__VA_ARGS__
12
#define ASM(...) STRINGIFY(__VA_ARGS__) "\n\t"
13
#define LABEL(label) STRINGIFY(label) ":\n\t"
14
15
#define XMM(x) %% xmm##x
16
#define YMM(x) %% ymm##x
17
#define ZMM(x) %% zmm##x
18
#define EAX %%eax
19
#define EBX %%ebx
20
#define ECX %%ecx
21
#define EDX %%edx
22
#define EBP %%ebp
23
#define EDI %%edi
24
#define ESI %%esi
25
#define RAX %%rax
26
#define RBX %%rbx
27
#define RCX %%rcx
28
#define RDX %%rdx
29
#define RBP %%rbp
30
#define RDI %%rdi
31
#define RSI %%rsi
32
#define K(x) %% k##x
33
#define R(x) %% r##x
34
#define R8 %%r8
35
#define R9 %%r9
36
#define R10 %%r10
37
#define R11 %%r11
38
#define R12 %%r12
39
#define R13 %%r13
40
#define R14 %%r14
41
#define R15 %%r15
42
#define RD(x) %% r##x##d
43
#define R8D %%r8d
44
#define R9D %%r9d
45
#define R10D %%r10d
46
#define R11D %%r11d
47
#define R12D %%r12d
48
#define R13D %%r13d
49
#define R14D %%r14d
50
#define R15D %%r15d
51
#define IMM(x) $##x
52
#define VAR(x) %[x]
53
54
#define MEM_4(reg,off,scale,disp) disp(reg,off,scale)
55
#define MEM_3(reg,off,scale) (reg,off,scale)
56
#define MEM_2(reg,disp) disp(reg)
57
#define MEM_1(reg) (reg)
58
59
#define MEM_1TO8_4(reg,off,scale,disp) MEM(reg,off,scale,disp) %{1to8%}
60
#define MEM_1TO8_3(reg,off,scale) MEM(reg,off,scale) %{1to8%}
61
#define MEM_1TO8_2(reg,disp) MEM(reg,disp) %{1to8%}
62
#define MEM_1TO8_1(reg) MEM(reg) %{1to8%}
63
64
#define MEM_1TO16_4(reg,off,scale,disp) MEM(reg,off,scale,disp) %{1to16%}
65
#define MEM_1TO16_3(reg,off,scale) MEM(reg,off,scale) %{1to16%}
66
#define MEM_1TO16_2(reg,disp) MEM(reg,disp) %{1to16%}
67
#define MEM_1TO16_1(reg) MEM(reg) %{1to16%}
68
69
#define GET_MACRO(_1,_2,_3,_4,NAME,...) NAME
70
#define MEM(...) GET_MACRO(__VA_ARGS__,MEM_4,MEM_3,MEM_2,MEM_1)(__VA_ARGS__)
71
#define MEM_1TO8(...) GET_MACRO(__VA_ARGS__,MEM_1TO8_4,MEM_1TO8_3,MEM_1TO8_2,MEM_1TO8_1)(__VA_ARGS__)
72
#define MEM_1TO16(...) GET_MACRO(__VA_ARGS__,MEM_1TO16_4,MEM_1TO16_3,MEM_1TO16_2,MEM_1TO16_1)(__VA_ARGS__)
73
74
#define MASK_K(n) %{%% k##n %}
75
#define MASK_KZ(n) %{%% k##n %} %{z%}
76
#define KMOV(to,from) ASM(kmovw from, to)
77
#define JKNZD(kreg,label) \
78
ASM(kortestw kreg, kreg) \
79
ASM(jnz label)
80
#define KXNORW(_0, _1, _2) ASM(kxnorw _2, _1, _0)
81
#define KSHIFTRW(_0, _1, _2) ASM(kshiftrw _2, _1, _0)
82
83
#define ALIGN16 ASM(.p2align 4)
84
#define ALIGN32 ASM(.p2align 5)
85
#define RDTSC ASM(rdstc)
86
#define MOV(_0, _1) ASM(mov _1, _0)
87
#define MOVD(_0, _1) ASM(movd _1, _0)
88
#define MOVL(_0, _1) ASM(movl _1, _0)
89
#define MOVQ(_0, _1) ASM(movq _1, _0)
90
#define VMOVD(_0, _1) ASM(vmovd _1, _0)
91
#define VMOVQ(_0, _1) ASM(vmovq _1, _0)
92
#define CMP(_0, _1) ASM(cmp _1, _0)
93
#define AND(_0, _1) ASM(and _1, _0)
94
#define ADD(_0, _1) ASM(add _1, _0)
95
#define SUB(_0, _1) ASM(sub _1, _0)
96
#define SAL(_0, _1) ASM(sal _1, _0)
97
#define SHLX(_0, _1, _2) ASM(shlx _2, _1, _0)
98
#define SAR(_0, _1) ASM(sar _1, _0)
99
#define SAL1(_0) ASM(sal _0)
100
#define SAR1(_0) ASM(sar _0)
101
#define LEA(_0, _1) ASM(lea _1, _0)
102
#define TEST(_0, _1) ASM(test _1, _0)
103
//#define DEC(_0) ASM(dec _0)
104
#define DEC(_0) SUB(_0, IMM(1))
105
#define JLE(_0) ASM(jle _0)
106
#define JNZ(_0) ASM(jnz _0)
107
#define JZ(_0) ASM(jz _0)
108
#define JNE(_0) ASM(jne _0)
109
#define JE(_0) ASM(je _0)
110
#define JNC(_0) ASM(jnc _0)
111
#define JC(_0) ASM(jc _0)
112
#define JMP(_0) ASM(jmp _0)
113
#define VGATHERDPS(_0, _1) ASM(vgatherdps _1, _0)
114
#define VSCATTERDPS(_0, _1) ASM(vscatterdps _1, _0)
115
#define VGATHERDPD(_0, _1) ASM(vgatherdpd _1, _0)
116
#define VSCATTERDPD(_0, _1) ASM(vscatterdpd _1, _0)
117
#define VMULPS(_0, _1, _2) ASM(vmulps _2, _1, _0)
118
#define VMULPD(_0, _1, _2) ASM(vmulpd _2, _1, _0)
119
#define VPMULLD(_0, _1, _2) ASM(vpmulld _2, _1, _0)
120
#define VPADDD(_0, _1, _2) ASM(vpaddd _2, _1, _0)
121
#define VPSLLD(_0, _1, _2) ASM(vpslld _2, _1, _0)
122
#define VPXORD(_0, _1, _2) ASM(vpxord _2, _1, _0)
123
#define VFMADD132PS(_0, _1, _2) ASM(vfmadd132ps _2, _1, _0)
124
#define VFMADD213PS(_0, _1, _2) ASM(vfmadd213ps _2, _1, _0)
125
#define VFMADD231PS(_0, _1, _2) ASM(vfmadd231ps _2, _1, _0)
126
#define VFMADD132PD(_0, _1, _2) ASM(vfmadd132pd _2, _1, _0)
127
#define VFMADD213PD(_0, _1, _2) ASM(vfmadd213pd _2, _1, _0)
128
#define VFMADD231PD(_0, _1, _2) ASM(vfmadd231pd _2, _1, _0)
129
#define VMOVAPS(_0, _1) ASM(vmovaps _1, _0)
130
#define VMOVUPS(_0, _1) ASM(vmovups _1, _0)
131
#define VMOVAPD(_0, _1) ASM(vmovapd _1, _0)
132
#define VMOVUPD(_0, _1) ASM(vmovupd _1, _0)
133
#define VBROADCASTSS(_0, _1) ASM(vbroadcastss _1, _0)
134
#define VBROADCASTSD(_0, _1) ASM(vbroadcastsd _1, _0)
135
#define VPBROADCASTD(_0, _1) ASM(vpbroadcastd _1, _0)
136
#define VPBROADCASTQ(_0, _1) ASM(vpbroadcastq _1, _0)
137
#define VBROADCASTF64X4(_0, _1) ASM(vbroadcastf64x4 _1, _0)
138
#define VINSERTF64X4(_0, _1, _2, _3) ASM(vinsertf64x4 _3, _2, _1, _0)
139
#define VUNPCKLPD(_0, _1, _2) ASM(vunpcklpd _2, _1, _0)
140
#define VUNPCKHPD(_0, _1, _2) ASM(vunpckhpd _2, _1, _0)
141
#define VSHUFF64X2(_0, _1, _2, _3) ASM(vshuff64x2 _3, _2, _1, _0)
142
#define PREFETCH(LEVEL,ADDRESS) ASM(prefetcht##LEVEL ADDRESS)
143
#define PREFETCHW0(ADDRESS) ASM(prefetchw ADDRESS)
144
#define PREFETCHW1(ADDRESS) ASM(prefetchwt1 ADDRESS)
145
#define VGATHERPFDPS(LEVEL,ADDRESS) ASM(vgatherpf##LEVEL##dps ADDRESS)
146
#define VSCATTERPFDPS(LEVEL,ADDRESS) ASM(vscatterpf##LEVEL##dps ADDRESS)
147
#define VGATHERPFDPD(LEVEL,ADDRESS) ASM(vgatherpf##LEVEL##dpd ADDRESS)
148
#define VSCATTERPFDPD(LEVEL,ADDRESS) ASM(vscatterpf##LEVEL##dpd ADDRESS)
149
150
#endif
kernel
mic
knl
bli_avx512_macros.h
Generated by
1.8.11