1 |
|
#ifndef PACKING_HPP |
2 |
|
#define PACKING_HPP |
3 |
|
|
4 |
|
namespace hmlp |
5 |
|
{ |
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
/** arbitrary packing routine */ |
10 |
|
template<size_t NB, typename T, typename TPACK> |
11 |
|
struct pack_pbxib |
12 |
|
{ |
13 |
|
/** structure closure, e.g. ldx */ |
14 |
|
|
15 |
|
|
16 |
|
/** |
17 |
|
* Loop over ib * pb of X to fill packX. Type cast from T to TPACK |
18 |
|
* if necessary. |
19 |
|
*/ |
20 |
|
inline virtual void operator () |
21 |
|
( |
22 |
|
/** k is the number cols, pc is the col offset, pb is the packed size */ |
23 |
|
size_t k, size_t pc, size_t pb, |
24 |
|
/** m is the number rows, ic is the row offset, ib is the packed size */ |
25 |
|
size_t m, size_t ic, size_t ib, |
26 |
|
/** input data in type T */ |
27 |
|
T *X, |
28 |
|
/** packed data in type TPACK */ |
29 |
|
TPACK *packX |
30 |
|
) = 0; |
31 |
|
|
32 |
|
}; /** end struct pack_pbxib */ |
33 |
|
|
34 |
|
|
35 |
|
/** arbitrary unpacking routine */ |
36 |
|
template<size_t NB, typename T, typename TPACK> |
37 |
|
struct unpack_ibxjb |
38 |
|
{ |
39 |
|
/** structure closure, e.g. ldx, rs_c and cs_c */ |
40 |
|
size_t rs_c = 0; |
41 |
|
size_t cs_c = 0; |
42 |
|
|
43 |
|
inline virtual void operator () |
44 |
|
( |
45 |
|
size_t m, size_t ic, size_t ib, |
46 |
|
size_t n, size_t jc, size_t jb, |
47 |
|
T *X, |
48 |
|
TPACK *packX |
49 |
|
) = 0; |
50 |
|
|
51 |
|
}; /** end struct unpack_ibxjb */ |
52 |
|
|
53 |
|
|
54 |
|
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
|
65 |
|
/** column-major matrix packing routine */ |
66 |
|
template<int NB, typename T, typename TPACK> |
67 |
|
struct pack2D_pbxib : public pack_pbxib<NB, T, TPACK> |
68 |
|
{ |
69 |
|
/** structure closure, e.g. ldx */ |
70 |
|
bool trans = false; |
71 |
|
size_t ldx = 0; |
72 |
|
|
73 |
|
/** |
74 |
|
* Loop over ib * pb of X to fill packX. Type cast from T to TPACK |
75 |
|
* if necessary. |
76 |
|
*/ |
77 |
|
inline virtual void operator () |
78 |
|
( |
79 |
|
/** k is the number cols, pc is the col offset, pb is the packed size */ |
80 |
|
size_t k, size_t pc, size_t pb, |
81 |
|
/** m is the number rows, ic is the row offset, ib is the packed size */ |
82 |
|
size_t m, size_t ic, size_t ib, |
83 |
|
/** input data in type T */ |
84 |
|
T *X, |
85 |
|
/** packed data in type TPACK */ |
86 |
|
TPACK *packX |
87 |
|
) |
88 |
|
{ |
89 |
|
T *x_pntr[ NB ]; |
90 |
|
|
91 |
|
if ( trans ) |
92 |
|
{ |
93 |
|
/** ( pc, ic ) offset */ |
94 |
|
X += ( ic * ldx + pc ); |
95 |
|
|
96 |
|
for ( auto i = 0; i < ib; i ++ ) |
97 |
|
{ |
98 |
|
x_pntr[ i ] = X + ldx * i; |
99 |
|
} |
100 |
|
for ( auto i = ib; i < NB; i ++ ) |
101 |
|
{ |
102 |
|
x_pntr[ i ] = X; |
103 |
|
} |
104 |
|
|
105 |
|
for ( auto p = 0; p < pb; p ++ ) |
106 |
|
{ |
107 |
|
for ( auto i = 0; i < ib; i ++ ) |
108 |
|
{ |
109 |
|
*packX ++ = *x_pntr[ i ] ++; |
110 |
|
} |
111 |
|
for ( auto i = ib; i < NB; i ++ ) |
112 |
|
{ |
113 |
|
*packX ++ = 0; |
114 |
|
} |
115 |
|
} |
116 |
|
} |
117 |
|
else |
118 |
|
{ |
119 |
|
/** ( ic, pc ) offset */ |
120 |
|
X += ( pc * ldx + ic ); |
121 |
|
|
122 |
|
for ( auto i = 0; i < ib; i ++ ) |
123 |
|
{ |
124 |
|
x_pntr[ i ] = X + i; |
125 |
|
} |
126 |
|
for ( auto i = ib; i < NB; i ++ ) |
127 |
|
{ |
128 |
|
x_pntr[ i ] = X; |
129 |
|
} |
130 |
|
|
131 |
|
for ( auto p = 0; p < pb; p ++ ) |
132 |
|
{ |
133 |
|
for ( auto i = 0; i < ib; i ++ ) |
134 |
|
{ |
135 |
|
*packX = *x_pntr[ i ]; |
136 |
|
packX ++; |
137 |
|
x_pntr[ i ] += ldx; |
138 |
|
} |
139 |
|
for ( auto i = ib; i < NB; i ++ ) |
140 |
|
{ |
141 |
|
*packX ++ = 0; |
142 |
|
} |
143 |
|
} |
144 |
|
} |
145 |
|
}; |
146 |
|
}; /** end struct pack2D_pbxib */ |
147 |
|
|
148 |
|
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
|
/** arbitrary unpacking routine */ |
153 |
|
template<size_t NB, typename T, typename TPACK> |
154 |
|
struct unpack2D_ibxjb : public unpack_ibxjb<NB, T, TPACK> |
155 |
|
{ |
156 |
|
/** structure closure, e.g. ldx */ |
157 |
|
|
158 |
|
inline virtual void operator () |
159 |
|
( |
160 |
|
size_t m, size_t ic, size_t ib, |
161 |
|
size_t n, size_t jc, size_t jb, |
162 |
|
T *X, |
163 |
|
TPACK *packX |
164 |
|
) |
165 |
|
{ |
166 |
|
for ( size_t j = 0; j < jb; j ++ ) |
167 |
|
for ( size_t i = 0; i < ib; i ++ ) |
168 |
|
X[ ( jc + j ) * this->cs_c + ( ic + i ) * this->rs_c ] |
169 |
|
= packX[ j * NB + i ]; |
170 |
|
}; |
171 |
|
|
172 |
|
}; /** end struct unpack2D_ibxjb */ |
173 |
|
|
174 |
|
template<size_t NB, typename T, typename TPACK> |
175 |
|
struct MatrifyableObject |
176 |
|
{ |
177 |
|
size_t m = 0; |
178 |
|
size_t n = 0; |
179 |
|
|
180 |
|
inline virtual void Pack |
181 |
|
( |
182 |
|
size_t m, size_t ic, size_t ib, |
183 |
|
size_t n, size_t jc, size_t jb, |
184 |
|
TPACK *packX |
185 |
|
) = 0; |
186 |
|
|
187 |
|
inline virtual void Unpack |
188 |
|
( |
189 |
|
size_t m, size_t ic, size_t ib, |
190 |
|
size_t n, size_t jc, size_t jb, |
191 |
|
TPACK *packX |
192 |
|
) = 0; |
193 |
|
|
194 |
|
}; /** end struct MatrifyableObject */ |
195 |
|
|
196 |
|
|
197 |
|
template<size_t NB, typename T, typename TPACK> |
198 |
|
struct MatrixLike : public MatrifyableObject<NB, T, TPACK> |
199 |
|
{ |
200 |
|
T* X = NULL; |
201 |
|
|
202 |
|
size_t rs = 0; |
203 |
|
|
204 |
|
size_t cs = 0; |
205 |
|
|
206 |
|
bool trans = false; |
207 |
|
|
208 |
|
inline virtual void Set( T* X, size_t m, size_t n, size_t rs, size_t cs, bool trans ) |
209 |
|
{ |
210 |
|
this->X = X; |
211 |
|
this->m = m; |
212 |
|
this->n = n; |
213 |
|
this->rs = rs; |
214 |
|
this->cs = cs; |
215 |
|
this->trans = trans; |
216 |
|
}; |
217 |
|
|
218 |
|
/** |
219 |
|
* packX is ib-by-jb (column-majored) withd leading dimension NB |
220 |
|
*/ |
221 |
|
inline virtual void Pack |
222 |
|
( |
223 |
|
size_t m, size_t ic, size_t ib, |
224 |
|
size_t n, size_t jc, size_t jb, |
225 |
|
TPACK *packX |
226 |
|
) |
227 |
|
{ |
228 |
|
T *x_pntr[ NB ]; |
229 |
|
|
230 |
|
/** Shift by ( ic, jc ) offset */ |
231 |
|
T *x = X + ic * rs + jc * cs; |
232 |
|
|
233 |
|
if ( trans ) |
234 |
|
{ |
235 |
|
/** Set x_pntr to the initial position for pointer calculation */ |
236 |
|
for ( size_t j = 0; j < jb; j ++ ) x_pntr[ j ] = x + j * cs; |
237 |
|
|
238 |
|
/** Loop over each row */ |
239 |
|
for ( size_t i = 0; i < ib; i ++ ) |
240 |
|
{ |
241 |
|
for ( size_t j = 0; j < jb; j ++ ) |
242 |
|
{ |
243 |
|
*packX ++ = *x_pntr[ j ]; |
244 |
|
x_pntr[ j ] += rs; |
245 |
|
} |
246 |
|
for ( size_t j = jb; j < NB; j ++ ) *packX ++ = 0; |
247 |
|
} |
248 |
|
} |
249 |
|
else |
250 |
|
{ |
251 |
|
/** Set x_pntr to the initial position for pointer calculation */ |
252 |
|
for ( size_t i = 0; i < ib; i ++ ) x_pntr[ i ] = x + i * rs; |
253 |
|
//for ( size_t i = ib; i < NB; i ++ ) x_pntr[ i ] = x; |
254 |
|
|
255 |
|
/** Loop over each column */ |
256 |
|
for ( size_t j = 0; j < jb; j ++ ) |
257 |
|
{ |
258 |
|
for ( size_t i = 0; i < ib; i ++ ) |
259 |
|
{ |
260 |
|
*packX ++ = *x_pntr[ i ]; |
261 |
|
x_pntr[ i ] += cs; |
262 |
|
} |
263 |
|
for ( size_t i = ib; i < NB; i ++ ) *packX ++ = 0; |
264 |
|
} |
265 |
|
} |
266 |
|
}; |
267 |
|
|
268 |
|
inline virtual void Unpack |
269 |
|
( |
270 |
|
size_t m, size_t ic, size_t ib, |
271 |
|
size_t n, size_t jc, size_t jb, |
272 |
|
TPACK *packX |
273 |
|
) |
274 |
|
{ |
275 |
|
T *x_pntr[ NB ]; |
276 |
|
|
277 |
|
/** Shift by ( ic, jc ) offset */ |
278 |
|
T *x = X + ic * rs + jc * cs; |
279 |
|
|
280 |
|
if ( trans ) |
281 |
|
{ |
282 |
|
/** Set x_pntr to the initial position for pointer calculation */ |
283 |
|
for ( size_t j = 0; j < jb; j ++ ) x_pntr[ j ] = x + j * cs; |
284 |
|
|
285 |
|
/** Loop over each row */ |
286 |
|
for ( size_t i = 0; i < ib; i ++ ) |
287 |
|
{ |
288 |
|
for ( size_t j = 0; j < jb; j ++ ) |
289 |
|
{ |
290 |
|
*x_pntr[ j ] = *packX ++; |
291 |
|
x_pntr[ j ] += rs; |
292 |
|
} |
293 |
|
for ( size_t j = jb; j < NB; j ++ ) packX ++; |
294 |
|
} |
295 |
|
} |
296 |
|
else |
297 |
|
{ |
298 |
|
/** Set x_pntr to the initial position for pointer calculation */ |
299 |
|
for ( size_t i = 0; i < ib; i ++ ) x_pntr[ i ] = x + i * rs; |
300 |
|
//for ( size_t i = ib; i < NB; i ++ ) x_pntr[ i ] = x; |
301 |
|
|
302 |
|
/** Loop over each column */ |
303 |
|
for ( size_t j = 0; j < jb; j ++ ) |
304 |
|
{ |
305 |
|
for ( size_t i = 0; i < ib; i ++ ) |
306 |
|
{ |
307 |
|
*x_pntr[ i ] = *packX ++; |
308 |
|
x_pntr[ i ] += cs; |
309 |
|
} |
310 |
|
for ( size_t i = ib; i < NB; i ++ ) packX ++; |
311 |
|
} |
312 |
|
} |
313 |
|
}; |
314 |
|
|
315 |
|
|
316 |
|
|
317 |
|
}; /** end struct MatrixLike */ |
318 |
|
|
319 |
|
|
320 |
|
}; /** end namespace hmlp */ |
321 |
|
|
322 |
|
|
323 |
|
#endif /** define PACKING_HPP */ |