27 #include <hmlp_base.hpp> 68 name = string(
"gemm" );
71 double flops = 0.0, mops = 0.0;
72 cost = 2.0 * C.
row() * C.
col();
73 event.Set( name + label, flops, mops );
82 assert( !this->TryEnqueue() );
85 void Execute(
Worker* user_worker )
87 string transA, transB;
88 if ( A.IsTransposed() ) transA =
"Transpose";
89 else transA =
"No transpose";
90 if ( B.IsTransposed() ) transB =
"Transpose";
91 else transB =
"No transpose";
97 assert( A.
row() == m );
98 assert( B.
row() == k );
99 assert( B.
col() == n );
107 xgemm( transA.data(), transB.data(), m, n, k,
136 this->stealable =
false;
139 name = string(
"gemmBarrier" );
146 event.Set( name + label, flops, mops );
152 void Execute(
Worker* user_worker ) {};
161 task->
Set( alpha, A, B, beta, C );
163 task->DependencyAnalysis();
170 template<
size_t NB = 512,
typename T>
186 while ( AL.
col() < A.
col() )
189 size_t b = std::min( AR.
col(), NB );
194 A0, A1, A2, b, RIGHT );
201 CreatexgemmTask( alpha, A1, B1, beta, C );
219 template<
size_t NB = 512,
typename T>
231 while ( BL.
col() < B.
col() )
233 size_t b = std::min( BR.
col(), NB );
238 C0, C1, C2, b, RIGHT );
240 Repartition1x2To1x3( BL, BR,
242 B0, B1, B2, b, RIGHT );
245 xgemm_var1( alpha, A, B1, beta, C1 );
262 template<
size_t NB = 512,
typename T>
275 while ( AT.
row() < A.
row() )
277 size_t b = std::min( AB.
row(), NB );
284 Repartition2x1To3x1( CT, C0,
289 xgemm_var2( alpha, A1, B, beta, C1 );
305 template<
size_t NB = 512,
typename T>
335 if ( hmlp_is_in_epoch_session() )
351 begXGEMMtask->
Set( alpha, A, B, beta, C );
352 begXGEMMtask->Submit();
353 begXGEMMtask->DependencyAnalysis();
360 xgemm_var3( alpha, A, B, beta, C );
366 endXGEMMtask->
Set( alpha, A, B, beta, C );
371 begXGEMMtask->TryEnqueue();
376 xgemm_var3( alpha, A, B, beta, C );
382 void xgemm( hmlpOperation_t transA, hmlpOperation_t transB,
385 const bool TRANS =
true;
386 const bool NOTRANS =
true;
392 if ( transA == HMLP_OP_T ) Aview.
Set(
true, A );
393 else Aview.
Set(
false, A );
394 if ( transB == HMLP_OP_T ) Bview.
Set(
true, B );
395 else Bview.
Set(
false, B );
399 xgemm( alpha, Aview, Bview, beta, Cview );
407 xgemm( HMLP_OP_N, HMLP_OP_N, alpha, A, B, beta, C );
void Repartition2x1To3x1(View< T > &AT, View< T > &A0, View< T > &A1, View< T > &AB, View< T > &A2, size_t mb, SideType side)
Definition: View.hpp:523
void Partition1x2(View< T > &A1, View< T > &A2, size_t nb, SideType side)
Definition: View.hpp:180
void CallBackWhileWaiting()
This is the callback function for the owner of thenested task.
Definition: runtime.cpp:417
This task is generated by the top level routine.
Definition: gemm.hpp:123
void DependencyAnalysis(ReadWriteType type, Task *task)
If leaf r/w blocks were created, then the r/w dependency applies to all leaf r/w blocks covered by th...
Definition: View.hpp:312
void ContinueWith3x1To2x1(View< T > &AT, View< T > &A0, View< T > &A1, View< T > &AB, View< T > &A2, SideType side)
Definition: View.hpp:557
void CreateLeafMatrixBlocks(size_t mb, size_t nb)
Definition: View.hpp:267
void Set(T alpha, View< T > &A, View< T > &B, T beta, View< T > &C)
Definition: gemm.hpp:129
size_t row()
Definition: View.hpp:345
void xgemm(const char *transA, const char *transB, int m, int n, int k, double alpha, const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc)
DGEMM wrapper.
Definition: blas_lapack.cpp:130
void Submit()
Ask the runtime to create an normal task in file.
Definition: runtime.cpp:264
size_t col()
Definition: View.hpp:348
T * data()
Definition: View.hpp:354
void DependencyAnalysis()
Definition: gemm.hpp:76
void Set(T alpha, View< T > &A, View< T > &B, T beta, View< T > &C)
Definition: gemm.hpp:54
void Repartition1x2To1x3(View< T > &AL, View< T > &AR, View< T > &A0, View< T > &A1, View< T > &A2, size_t nb, SideType side)
Definition: View.hpp:458
void Set(bool TRANS, Data< T > &buff)
Definition: View.hpp:60
size_t ld()
Definition: View.hpp:351
void DependencyAnalysis()
Definition: gemm.hpp:150
void ContinueWith1x3To1x2(View< T > &AL, View< T > &AR, View< T > &A0, View< T > &A1, View< T > &A2, SideType side)
Definition: View.hpp:490
Definition: runtime.hpp:174
void Partition2x1(View< T > &A1, View< T > &A2, size_t mb, SideType side)
Definition: View.hpp:155
Definition: thread.hpp:166