Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	package/x86_64/haswell/gkmx.cpp	Lines:	0	7	0.0 %
Date:	2019-01-14	Branches:	0	0	0.0 %


/**
 *  HMLP (High-Performance Machine Learning Primitives)
 *
 *  Copyright (C) 2014-2017, The University of Texas at Austin
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program. If not, see the LICENSE file.
 *
 **/




/** GKMX templates */
#include <primitives/gkmx.hpp>

/** Haswell micro-kernels */
#include <rank_k_d8x6.hpp>

using namespace hmlp;

template<typename T>
struct identity
{
  inline T operator()( const T& x, int i, int j, int b ) const
  {
    return x;
  }
  T** A2;
  T** B2;
};

template<typename TC, typename TV>
struct downcast
{
  inline TC operator()( const TV& x, int i, int j, int b ) const
  {
    return (TC)x;
  }
  TV** A2;
  TV** B2;
};

void gkmx_dfma
(
  hmlpOperation_t transA, hmlpOperation_t transB,
	int m, int n, int k,
	double *A, int lda,
  double *B, int ldb,
  double *C, int ldc
)
{
  rank_k_asm_d8x6 semiringkernel;
  rank_k_asm_d8x6 microkernel;

  gkmx::gkmx<
    72, 960, 256, 8, 6,
    72, 960,      8, 6, 32,
    false, true,
    rank_k_asm_d8x6,
    rank_k_asm_d8x6,
    double, double, double, double>
  (
    transA, transB,
    m, n, k,
    A, lda,
    B, ldb,
    C, ldc,
    0, // batchId
    semiringkernel,
    microkernel
  );

};


void gkmx_dfma_simple
(
  hmlpOperation_t transA, hmlpOperation_t transB,
	int m, int n, int k,
	double *A, int lda,
  double *B, int ldb,
  double *C, int ldc
)
{
  std::plus<double> op1;
  std::multiplies<double> op2;

  identity<double> opkernel;

  double initV = 0.0;

  gkmx::gkmm<
    72, 960, 256, 8, 6,
    72, 960,      8, 6, 32,
    false, true>
  (
    transA, transB,
    m, n, k,
    A, lda,
    B, ldb,
    C, ldc,
    0, // batchId
    opkernel, op1, op2, initV
  );
};

void gkmx_mixfma_simple
(
  hmlpOperation_t transA, hmlpOperation_t transB,
	int m, int n, int k,
	double *A, int lda,
  double *B, int ldb,
  float  *C, int ldc
)
{
  std::plus<double> op1;
  std::multiplies<double> op2;

  //identity<double> opkernel;
  downcast<float, double> opkernel;

  double initV = 0.0;

  //gkmm
  //<104, 4096, 256, 8, 4, 104, 4096, 8, 4, 32,
  //false>
  //(
  //  transA, transB,
  //  m, n, k,
  //  A, lda,
  //  B, ldb,
  //  C, ldc,
  //  opkernel, op1, op2, initV
  //);
};



Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/**
2		* HMLP (High-Performance Machine Learning Primitives)
3		*
4		* Copyright (C) 2014-2017, The University of Texas at Austin
5		*
6		* This program is free software: you can redistribute it and/or modify
7		* it under the terms of the GNU General Public License as published by
8		* the Free Software Foundation, either version 3 of the License, or
9		* (at your option) any later version.
10		*
11		* This program is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		* GNU General Public License for more details.
15		*
16		* You should have received a copy of the GNU General Public License
17		* along with this program. If not, see the LICENSE file.
18		*
19		**/
20
21
22
23
24		/** GKMX templates */
25		#include <primitives/gkmx.hpp>
26
27		/** Haswell micro-kernels */
28		#include <rank_k_d8x6.hpp>
29
30		using namespace hmlp;
31
32		template<typename T>
33		struct identity
34		{
35		inline T operator()( const T& x, int i, int j, int b ) const
36		{
37		return x;
38		}
39		T** A2;
40		T** B2;
41		};
42
43		template<typename TC, typename TV>
44		struct downcast
45		{
46		inline TC operator()( const TV& x, int i, int j, int b ) const
47		{
48		return (TC)x;
49		}
50		TV** A2;
51		TV** B2;
52		};
53
54		void gkmx_dfma
55		(
56		hmlpOperation_t transA, hmlpOperation_t transB,
57		int m, int n, int k,
58		double *A, int lda,
59		double *B, int ldb,
60		double *C, int ldc
61		)
62		{
63		rank_k_asm_d8x6 semiringkernel;
64		rank_k_asm_d8x6 microkernel;
65
66		gkmx::gkmx<
67		72, 960, 256, 8, 6,
68		72, 960, 8, 6, 32,
69		false, true,
70		rank_k_asm_d8x6,
71		rank_k_asm_d8x6,
72		double, double, double, double>
73		(
74		transA, transB,
75		m, n, k,
76		A, lda,
77		B, ldb,
78		C, ldc,
79		0, // batchId
80		semiringkernel,
81		microkernel
82		);
83
84		};
85
86
87		void gkmx_dfma_simple
88		(
89		hmlpOperation_t transA, hmlpOperation_t transB,
90		int m, int n, int k,
91		double *A, int lda,
92		double *B, int ldb,
93		double *C, int ldc
94		)
95		{
96		std::plus<double> op1;
97		std::multiplies<double> op2;
98
99		identity<double> opkernel;
100
101		double initV = 0.0;
102
103		gkmx::gkmm<
104		72, 960, 256, 8, 6,
105		72, 960, 8, 6, 32,
106		false, true>
107		(
108		transA, transB,
109		m, n, k,
110		A, lda,
111		B, ldb,
112		C, ldc,
113		0, // batchId
114		opkernel, op1, op2, initV
115		);
116		};
117
118		void gkmx_mixfma_simple
119		(
120		hmlpOperation_t transA, hmlpOperation_t transB,
121		int m, int n, int k,
122		double *A, int lda,
123		double *B, int ldb,
124		float *C, int ldc
125		)
126		{
127		std::plus<double> op1;
128		std::multiplies<double> op2;
129
130		//identity<double> opkernel;
131		downcast<float, double> opkernel;
132
133		double initV = 0.0;
134
135		//gkmm
136		//<104, 4096, 256, 8, 4, 104, 4096, 8, 4, 32,
137		//false>
138		//(
139		// transA, transB,
140		// m, n, k,
141		// A, lda,
142		// B, ldb,
143		// C, ldc,
144		// opkernel, op1, op2, initV
145		//);
146		};
147