HMLP: High-performance Machine Learning Primitives
packing.hpp
1 #ifndef PACKING_HPP
2 #define PACKING_HPP
3 
4 namespace hmlp
5 {
6 
7 
8 
10 template<size_t NB, typename T, typename TPACK>
11 struct pack_pbxib
12 {
20  inline virtual void operator ()
21  (
23  size_t k, size_t pc, size_t pb,
25  size_t m, size_t ic, size_t ib,
27  T *X,
29  TPACK *packX
30  ) = 0;
31 
32 };
36 template<size_t NB, typename T, typename TPACK>
38 {
40  size_t rs_c = 0;
41  size_t cs_c = 0;
42 
43  inline virtual void operator ()
44  (
45  size_t m, size_t ic, size_t ib,
46  size_t n, size_t jc, size_t jb,
47  T *X,
48  TPACK *packX
49  ) = 0;
50 
51 };
66 template<int NB, typename T, typename TPACK>
67 struct pack2D_pbxib : public pack_pbxib<NB, T, TPACK>
68 {
70  bool trans = false;
71  size_t ldx = 0;
72 
77  inline virtual void operator ()
78  (
80  size_t k, size_t pc, size_t pb,
82  size_t m, size_t ic, size_t ib,
84  T *X,
86  TPACK *packX
87  )
88  {
89  T *x_pntr[ NB ];
90 
91  if ( trans )
92  {
94  X += ( ic * ldx + pc );
95 
96  for ( auto i = 0; i < ib; i ++ )
97  {
98  x_pntr[ i ] = X + ldx * i;
99  }
100  for ( auto i = ib; i < NB; i ++ )
101  {
102  x_pntr[ i ] = X;
103  }
104 
105  for ( auto p = 0; p < pb; p ++ )
106  {
107  for ( auto i = 0; i < ib; i ++ )
108  {
109  *packX ++ = *x_pntr[ i ] ++;
110  }
111  for ( auto i = ib; i < NB; i ++ )
112  {
113  *packX ++ = 0;
114  }
115  }
116  }
117  else
118  {
120  X += ( pc * ldx + ic );
121 
122  for ( auto i = 0; i < ib; i ++ )
123  {
124  x_pntr[ i ] = X + i;
125  }
126  for ( auto i = ib; i < NB; i ++ )
127  {
128  x_pntr[ i ] = X;
129  }
130 
131  for ( auto p = 0; p < pb; p ++ )
132  {
133  for ( auto i = 0; i < ib; i ++ )
134  {
135  *packX = *x_pntr[ i ];
136  packX ++;
137  x_pntr[ i ] += ldx;
138  }
139  for ( auto i = ib; i < NB; i ++ )
140  {
141  *packX ++ = 0;
142  }
143  }
144  }
145  };
146 };
153 template<size_t NB, typename T, typename TPACK>
154 struct unpack2D_ibxjb : public unpack_ibxjb<NB, T, TPACK>
155 {
158  inline virtual void operator ()
159  (
160  size_t m, size_t ic, size_t ib,
161  size_t n, size_t jc, size_t jb,
162  T *X,
163  TPACK *packX
164  )
165  {
166  for ( size_t j = 0; j < jb; j ++ )
167  for ( size_t i = 0; i < ib; i ++ )
168  X[ ( jc + j ) * this->cs_c + ( ic + i ) * this->rs_c ]
169  = packX[ j * NB + i ];
170  };
171 
172 };
174 template<size_t NB, typename T, typename TPACK>
176 {
177  size_t m = 0;
178  size_t n = 0;
179 
180  inline virtual void Pack
181  (
182  size_t m, size_t ic, size_t ib,
183  size_t n, size_t jc, size_t jb,
184  TPACK *packX
185  ) = 0;
186 
187  inline virtual void Unpack
188  (
189  size_t m, size_t ic, size_t ib,
190  size_t n, size_t jc, size_t jb,
191  TPACK *packX
192  ) = 0;
193 
194 };
197 template<size_t NB, typename T, typename TPACK>
198 struct MatrixLike : public MatrifyableObject<NB, T, TPACK>
199 {
200  T* X = NULL;
201 
202  size_t rs = 0;
203 
204  size_t cs = 0;
205 
206  bool trans = false;
207 
208  inline virtual void Set( T* X, size_t m, size_t n, size_t rs, size_t cs, bool trans )
209  {
210  this->X = X;
211  this->m = m;
212  this->n = n;
213  this->rs = rs;
214  this->cs = cs;
215  this->trans = trans;
216  };
217 
221  inline virtual void Pack
222  (
223  size_t m, size_t ic, size_t ib,
224  size_t n, size_t jc, size_t jb,
225  TPACK *packX
226  )
227  {
228  T *x_pntr[ NB ];
229 
231  T *x = X + ic * rs + jc * cs;
232 
233  if ( trans )
234  {
236  for ( size_t j = 0; j < jb; j ++ ) x_pntr[ j ] = x + j * cs;
237 
239  for ( size_t i = 0; i < ib; i ++ )
240  {
241  for ( size_t j = 0; j < jb; j ++ )
242  {
243  *packX ++ = *x_pntr[ j ];
244  x_pntr[ j ] += rs;
245  }
246  for ( size_t j = jb; j < NB; j ++ ) *packX ++ = 0;
247  }
248  }
249  else
250  {
252  for ( size_t i = 0; i < ib; i ++ ) x_pntr[ i ] = x + i * rs;
253  //for ( size_t i = ib; i < NB; i ++ ) x_pntr[ i ] = x;
254 
256  for ( size_t j = 0; j < jb; j ++ )
257  {
258  for ( size_t i = 0; i < ib; i ++ )
259  {
260  *packX ++ = *x_pntr[ i ];
261  x_pntr[ i ] += cs;
262  }
263  for ( size_t i = ib; i < NB; i ++ ) *packX ++ = 0;
264  }
265  }
266  };
267 
268  inline virtual void Unpack
269  (
270  size_t m, size_t ic, size_t ib,
271  size_t n, size_t jc, size_t jb,
272  TPACK *packX
273  )
274  {
275  T *x_pntr[ NB ];
276 
278  T *x = X + ic * rs + jc * cs;
279 
280  if ( trans )
281  {
283  for ( size_t j = 0; j < jb; j ++ ) x_pntr[ j ] = x + j * cs;
284 
286  for ( size_t i = 0; i < ib; i ++ )
287  {
288  for ( size_t j = 0; j < jb; j ++ )
289  {
290  *x_pntr[ j ] = *packX ++;
291  x_pntr[ j ] += rs;
292  }
293  for ( size_t j = jb; j < NB; j ++ ) packX ++;
294  }
295  }
296  else
297  {
299  for ( size_t i = 0; i < ib; i ++ ) x_pntr[ i ] = x + i * rs;
300  //for ( size_t i = ib; i < NB; i ++ ) x_pntr[ i ] = x;
301 
303  for ( size_t j = 0; j < jb; j ++ )
304  {
305  for ( size_t i = 0; i < ib; i ++ )
306  {
307  *x_pntr[ i ] = *packX ++;
308  x_pntr[ i ] += cs;
309  }
310  for ( size_t i = ib; i < NB; i ++ ) packX ++;
311  }
312  }
313  };
314 
315 
316 
317 };
320 };
323 #endif
Definition: packing.hpp:154
Definition: packing.hpp:67
Definition: packing.hpp:37
Definition: packing.hpp:175
Definition: packing.hpp:11
Definition: packing.hpp:198
Definition: gofmm.hpp:83