27 #include <external/blas_lapack_prototypes.h> 28 #include <external/mpi_prototypes.h> 43 HMLP_ERROR_NOT_INITIALIZED,
44 HMLP_ERROR_ALLOC_FAILED,
45 HMLP_ERROR_INVALID_VALUE,
46 HMLP_ERROR_EXECUTION_FAILED,
47 HMLP_ERROR_NOT_SUPPORTED,
48 HMLP_ERROR_INTERNAL_ERROR
52 hmlpError_t hmlp_init(
int *argc,
char ***argv );
53 hmlpError_t hmlp_init(
int *argc,
char ***argv, MPI_Comm comm );
54 hmlpError_t hmlp_init();
55 hmlpError_t hmlp_init( MPI_Comm comm );
56 hmlpError_t hmlp_set_num_workers(
int n_worker );
57 hmlpError_t hmlp_run();
58 hmlpError_t hmlp_finalize();
60 int hmlp_is_in_epoch_session();
61 int hmlp_get_mpi_rank();
62 int hmlp_get_mpi_size();
68 hmlpOperation_t transA, hmlpOperation_t transB,
78 hmlpOperation_t transA, hmlpOperation_t transB,
88 hmlpOperation_t transA, hmlpOperation_t transB,
96 void gkmx_dconv_relu_pool
98 hmlpOperation_t transA, hmlpOperation_t transB,
106 hmlpOperation_t transA, hmlpOperation_t transB,
115 int w0,
int h0,
int d0,
int s,
int p,
int batchSize,
117 int w1,
int h1,
int d1,
124 int w0,
int h0,
int d0,
int s,
int p,
int batchSize,
126 int w1,
int h1,
int d1,
133 int w0,
int h0,
int d0,
int s,
int p,
int batchSize,
135 int w1,
int h1,
int d1,
142 int w0,
int h0,
int d0,
int s,
int p,
int batchSize,
144 int w1,
int h1,
int d1,
204 int m,
int n,
int k,
int r,
205 double *A,
double *A2,
int *amap,
206 double *B,
double *B2,
int *bmap,
212 int m,
int n,
int k,
int r,
213 double *A,
double *A2,
int *amap,
214 double *B,
double *B2,
int *bmap,
220 int m,
int n,
int k,
int r,
221 double *A,
double *A2,
int *amap,
222 double *B,
double *B2,
int *bmap,
227 #include <cuda_runtime.h> 228 #include <cublas_v2.h> 229 #include <cusparse_v2.h> 230 #include <thrust/pair.h> 235 hmlpOperation_t transX,
237 double* X2array[],
const double* Xarray[],
double* X,
int ldx,
244 hmlpOperation_t transA, hmlpOperation_t transB,
246 const double *Aarray[],
int lda,
247 const double *Barray[],
int ldb,
248 double *Carray[],
int ldc,
255 hmlpOperation_t transA, hmlpOperation_t transB,
257 const double *Aarray,
int lda,
int loa,
258 const double *Barray,
int ldb,
int lob,
259 double *Carray,
int ldc,
int loc,
266 hmlpOperation_t transA, hmlpOperation_t transB,
268 const double *Aarray,
int lda,
int loa,
269 const double *Barray,
int ldb,
int lob,
270 float *Carray,
int ldc,
int loc,
277 hmlpOperation_t transA, hmlpOperation_t transB,
279 double *Aarray[],
double *A2array[],
int lda,
280 double *Barray[],
double *B2array[],
int ldb,
281 thrust::pair<double,int> *Carray[],
int ldc,
289 double *Aarray[],
double *A2array[],
int lda,
290 double *Barray[],
double *B2array[],
int ldb,
291 thrust::pair<double,int> *Carray[],
int ldc,
298 hmlpOperation_t transA, hmlpOperation_t transB,
300 const double *Aarray[],
int lda,
301 const double *Barray[],
int ldb,
302 double *Carray[],
int ldc,
309 hmlpOperation_t transA, hmlpOperation_t transB,
311 const double *Aarray,
int lda,
int loa,
312 const double *Barray,
int ldb,
int lob,
313 double *Carray,
int ldc,
int loc,
316 #endif // end ifdef HMLP_USE_CUDA 318 #endif // define HMLP_H