1 |
|
/** |
2 |
|
* HMLP (High-Performance Machine Learning Primitives) |
3 |
|
* |
4 |
|
* Copyright (C) 2014-2017, The University of Texas at Austin |
5 |
|
* |
6 |
|
* This program is free software: you can redistribute it and/or modify |
7 |
|
* it under the terms of the GNU General Public License as published by |
8 |
|
* the Free Software Foundation, either version 3 of the License, or |
9 |
|
* (at your option) any later version. |
10 |
|
* |
11 |
|
* This program is distributed in the hope that it will be useful, |
12 |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
|
* GNU General Public License for more details. |
15 |
|
* |
16 |
|
* You should have received a copy of the GNU General Public License |
17 |
|
* along with this program. If not, see the LICENSE file. |
18 |
|
* |
19 |
|
**/ |
20 |
|
|
21 |
|
|
22 |
|
/** GNBX templates */ |
23 |
|
#include <primitives/nbody.hpp> |
24 |
|
|
25 |
|
/** Haswell micro-kernels */ |
26 |
|
#include <rank_k_d8x6.hpp> |
27 |
|
#include <rank_k_d6x8.hpp> |
28 |
|
|
29 |
|
using namespace hmlp; |
30 |
|
|
31 |
|
template<typename T> |
32 |
|
struct identity |
33 |
|
{ |
34 |
|
inline T operator()( const T& x, int i, int j, int b ) const |
35 |
|
{ |
36 |
|
return x; |
37 |
|
} |
38 |
|
T** A2; |
39 |
|
T** B2; |
40 |
|
}; |
41 |
|
|
42 |
|
void nbody |
43 |
|
( |
44 |
|
int m, int n, int k, |
45 |
|
float *A, int lda, |
46 |
|
float *B, int ldb, |
47 |
|
float *C, int ldc |
48 |
|
) |
49 |
|
{ |
50 |
|
/** microkernel */ |
51 |
|
//rank_k_asm_d6x8 semiringkernel; |
52 |
|
//rank_k_asm_d6x8 microkernel; |
53 |
|
rank_k_asm_d8x6 semiringkernel; |
54 |
|
rank_k_asm_d8x6 microkernel; |
55 |
|
|
56 |
|
//const size_t PACK_MR = rank_k_asm_d6x8::pack_mr; |
57 |
|
//const size_t PACK_NR = rank_k_asm_d6x8::pack_nr; |
58 |
|
const size_t PACK_MR = rank_k_asm_d8x6::pack_mr; |
59 |
|
const size_t PACK_NR = rank_k_asm_d8x6::pack_nr; |
60 |
|
const size_t MC = 72; |
61 |
|
const size_t NC = 2040; |
62 |
|
const size_t KC = 256; |
63 |
|
|
64 |
|
/** ObjA */ |
65 |
|
MatrixLike<PACK_MR, float, double> ObjA; |
66 |
|
ObjA.Set( A, m, k, 1, lda, false ); |
67 |
|
|
68 |
|
/** ObjB */ |
69 |
|
MatrixLike<PACK_NR, float, double> ObjB; |
70 |
|
ObjB.Set( B, k, n, 1, ldb, true ); |
71 |
|
|
72 |
|
/** ObjC */ |
73 |
|
//MatrixLike<PACK_MR, double, double> ObjC; |
74 |
|
MatrixLike<PACK_MR, float, double> ObjC; |
75 |
|
ObjC.Set( C, m, n, 1, ldc, false ); |
76 |
|
|
77 |
|
/** General N-body operator (these 6 types are essential) */ |
78 |
|
nbody::nbody<MC, NC, KC, double, double, double> |
79 |
|
( |
80 |
|
0, m, n, k, |
81 |
|
ObjA, |
82 |
|
ObjB, |
83 |
|
ObjC, |
84 |
|
semiringkernel, |
85 |
|
microkernel |
86 |
|
); |
87 |
|
|
88 |
|
}; /** end gnbx() */ |
89 |
|
|
90 |
|
|
91 |
|
void nbody |
92 |
|
( |
93 |
|
int m, int n, int k, |
94 |
|
double *A, int lda, |
95 |
|
double *B, int ldb, |
96 |
|
double *C, int ldc |
97 |
|
) |
98 |
|
{ |
99 |
|
/** microkernel */ |
100 |
|
rank_k_asm_s16x6 semiringkernel; |
101 |
|
rank_k_asm_s16x6 microkernel; |
102 |
|
|
103 |
|
const size_t PACK_MR = rank_k_asm_s16x6::pack_mr; |
104 |
|
const size_t PACK_NR = rank_k_asm_s16x6::pack_nr; |
105 |
|
const size_t MC = 144; |
106 |
|
const size_t NC = 2040; |
107 |
|
const size_t KC = 256; |
108 |
|
|
109 |
|
/** ObjA, stored in double, computed in float */ |
110 |
|
MatrixLike<PACK_MR, double, float> ObjA; |
111 |
|
ObjA.Set( A, m, k, 1, lda, false ); |
112 |
|
|
113 |
|
/** ObjB, stored in double, computed in float */ |
114 |
|
MatrixLike<PACK_NR, double, float> ObjB; |
115 |
|
ObjB.Set( B, k, n, 1, ldb, true ); |
116 |
|
|
117 |
|
/** ObjC, stored in double, computed in float */ |
118 |
|
MatrixLike<PACK_MR, double, float> ObjC; |
119 |
|
ObjC.Set( C, m, n, 1, ldc, false ); |
120 |
|
|
121 |
|
/** General N-body operator (these 6 types are essential) */ |
122 |
|
nbody::nbody<MC, NC, KC, float, float, float> |
123 |
|
( |
124 |
|
0, m, n, k, |
125 |
|
ObjA, |
126 |
|
ObjB, |
127 |
|
ObjC, |
128 |
|
semiringkernel, |
129 |
|
microkernel |
130 |
|
); |
131 |
|
|
132 |
|
}; /** end nbody() */ |
133 |
|
|
134 |
|
//void gnbx_simple |
135 |
|
//( |
136 |
|
// int m, int n, int k, |
137 |
|
// double *A, int lda, |
138 |
|
// double *B, int ldb, |
139 |
|
// double *C, int ldc |
140 |
|
//) |
141 |
|
//{ |
142 |
|
// std::plus<float> op1; |
143 |
|
// std::multiplies<float> op2; |
144 |
|
// identity<float> opkernel; |
145 |
|
// float initV = 0.0; |
146 |
|
// |
147 |
|
// const size_t MR = 8; |
148 |
|
// const size_t NR = 4; |
149 |
|
// const size_t MC = 128; |
150 |
|
// const size_t NC = 4096; |
151 |
|
// const size_t KC = 384; |
152 |
|
// |
153 |
|
// /** ObjA, stored in double, computed in float */ |
154 |
|
// MatrixLike<MR, double, float> ObjA; |
155 |
|
// ObjA.Set( A, m, k, 1, lda, false ); |
156 |
|
// |
157 |
|
// /** ObjB, stored in double, computed in float */ |
158 |
|
// MatrixLike<NR, double, float> ObjB; |
159 |
|
// ObjB.Set( B, k, n, 1, ldb, true ); |
160 |
|
// |
161 |
|
// /** ObjC, stored in double, computed in float */ |
162 |
|
// MatrixLike<MR, double, float> ObjC; |
163 |
|
// ObjC.Set( C, m, n, 1, ldc, false ); |
164 |
|
// |
165 |
|
// /** General N-body operator (these 6 types are essential) */ |
166 |
|
// gnbx::gnbx<MR, NR, MC, NC, KC, float, float, float, float> |
167 |
|
// ( |
168 |
|
// 0, m, n, k, |
169 |
|
// ObjA, |
170 |
|
// ObjB, |
171 |
|
// ObjC, |
172 |
|
// opkernel, op1, op2, initV |
173 |
|
// ); |
174 |
|
// |
175 |
|
//}; /** end gnbx() */ |