GCC Code Coverage Report
Directory: . Exec Total Coverage
File: frame/base/util.hpp Lines: 0 57 0.0 %
Date: 2019-01-14 Branches: 0 60 0.0 %

Line Exec Source
1
/**
2
 *  HMLP (High-Performance Machine Learning Primitives)
3
 *
4
 *  Copyright (C) 2014-2017, The University of Texas at Austin
5
 *
6
 *  This program is free software: you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation, either version 3 of the License, or
9
 *  (at your option) any later version.
10
 *
11
 *  This program is distributed in the hope that it will be useful,
12
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 *  GNU General Public License for more details.
15
 *
16
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program. If not, see the LICENSE file.
18
 *
19
 **/
20
21
22
#ifndef HMLP_UTIL_HPP
23
#define HMLP_UTIL_HPP
24
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <cmath>
28
#include <tuple>
29
#include <limits>
30
#include <algorithm>
31
#include <type_traits>
32
#include <cstdint>
33
#include <omp.h>
34
35
36
#include <hmlp.h>
37
38
#define HANDLE_ERROR( err ) (hmlp::handleError( err, __FILE__, __LINE__ ))
39
40
41
42
using namespace std;
43
44
45
namespace hmlp
46
{
47
48
/**
49
 *  \breif Handling runtime error with information.
50
 */
51
void handleError( hmlpError_t error, const char* file, int line );
52
53
/**
54
 *  \brief The default function to allocate memory for HMLP.
55
 *         Memory allocated by this function is aligned. Most of the
56
 *         HMLP primitives require memory alignment.
57
 */
58
template<int ALIGN_SIZE, typename T>
59
T *hmlp_malloc( int m, int n, int size )
60
{
61
  T *ptr = NULL;
62
#ifdef HMLP_MIC_AVX512
63
  int err = hbw_posix_memalign( (void**)&ptr, (size_t)ALIGN_SIZE, size * m * n );
64
#else
65
  int err = posix_memalign( (void**)&ptr, (size_t)ALIGN_SIZE, size * m * n );
66
#endif
67
68
  if ( err )
69
  {
70
    printf( "hmlp_malloc(): posix_memalign() failures\n" );
71
    exit( 1 );
72
  }
73
74
  return ptr;
75
};
76
77
/** @brief Another interface. */
78
template<int ALIGN_SIZE, typename T>
79
T *hmlp_malloc( int n )
80
{
81
  return hmlp_malloc<ALIGN_SIZE, T>( n, 1, sizeof(T) );
82
};
83
84
/**
85
 *  @brief Free the aligned memory.
86
 */
87
template<typename T>
88
void hmlp_free( T *ptr )
89
{
90
#ifdef HMLP_MIC_AVX512
91
  if ( ptr ) hbw_free( ptr );
92
#else
93
  if ( ptr ) free( ptr );
94
#endif
95
};
96
97
template<typename T>
98
void hmlp_print_binary( T number )
99
{
100
  char binary[ 33 ];
101
102
  for ( int i = 31; i >= 0; i -- )
103
  {
104
    if ( i % 5 ) printf( " " );
105
    else         printf( "%d", i / 5 );
106
  }
107
  printf( "\n" );
108
109
110
  for ( size_t i = 0; i < sizeof(T) * 4; i ++ )
111
  {
112
    if ( number & 1 ) binary[ 31 - i ] = '1';
113
    else              binary[ 31 - i ] = '0';
114
    number >>= 1;
115
    if ( i == 31 ) break;
116
  }
117
  binary[ 32 ] = '\0';
118
119
  //size_t i = 0;
120
121
  //while ( number )
122
  //{
123
  //  if ( i < 32 && ( number & 1 )  ) binary[ i ] = '1';
124
  //  else                             binary[ i ] = '0';
125
  //  //if ( number & 1) printf( "1" );
126
  //  //else             printf( "0" );
127
  //  number >>= 1;
128
  //  i ++;
129
  //}
130
  //binary[ 32 ] = 3;
131
  //printf("\n");
132
  printf( "%s\n", binary );
133
};
134
135
136
137
/**
138
 *  @brief Split into m x n, get the subblock starting from ith row
139
 *         and jth column. (for STRASSEN)
140
 */
141
template<typename T>
142
void hmlp_acquire_mpart
143
(
144
  hmlpOperation_t transX,
145
  int m,
146
  int n,
147
  T *src_buff,
148
  int lda,
149
  int x,
150
  int y,
151
  int i,
152
  int j,
153
  T **dst_buff
154
)
155
{
156
  //printf( "m: %d, n: %d, lda: %d, x: %d, y: %d, i: %d, j: %d\n", m, n, lda, x, y, i, j );
157
  if ( transX == HMLP_OP_N )
158
  {
159
    *dst_buff = &src_buff[ ( m / x * i ) + ( n / y * j ) * lda ]; //src( m/x*i, n/y*j )
160
  }
161
  else
162
  {
163
    *dst_buff = &src_buff[ ( m / x * i ) * lda + ( n / y * j ) ]; //src( m/x*i, n/y*j )
164
    /* x,y,i,j split partition dimension and id after the transposition */
165
  }
166
};
167
168
template<typename T>
169
T hmlp_norm
170
(
171
  int m, int n,
172
  T *A, int lda
173
)
174
{
175
  T nrm2 = 0.0;
176
  for ( int j = 0; j < n; j ++ )
177
  {
178
    for ( int i = 0; i < m; i ++ )
179
    {
180
      nrm2 += A[ j * lda + i ] * A[ j * lda + i ];
181
    }
182
  }
183
  return std::sqrt( nrm2 );
184
}; // end hmlp_norm()
185
186
187
template<typename TA, typename TB>
188
TB hmlp_relative_error
189
(
190
  int m, int n,
191
  TA *A, int lda,
192
  TB *B, int ldb
193
)
194
{
195
  TB nrmB = 0.0;
196
  TB nrm2 = 0.0;
197
198
  std::tuple<int, int, TB> max_error( -1, -1, 0.0 );
199
  for ( int j = 0; j < n; j ++ )
200
  {
201
    for ( int i = 0; i < m; i ++ )
202
    {
203
      TA a = A[ j * lda + i ];
204
      TB b = B[ j * ldb + i ];
205
      TB r = a - b;
206
      nrmB += b * b;
207
      nrm2 += r * r;
208
      if ( r * r > std::get<2>( max_error ) )
209
      {
210
        max_error = std::make_tuple( i, j, r );
211
      }
212
    }
213
  }
214
  nrm2 = std::sqrt( nrm2 ) / std::sqrt( nrmB );
215
216
  if ( nrm2 > 1E-7 )
217
  {
218
    printf( "relative error % .2E maxinum elemenwise error % .2E ( %d, %d )\n",
219
        nrm2,
220
        std::get<2>( max_error ),
221
        std::get<0>( max_error ), std::get<1>( max_error ) );
222
  }
223
224
  return nrm2;
225
};
226
227
228
template<typename TA, typename TB>
229
TB hmlp_relative_error
230
(
231
  int m, int n,
232
  TA *A, int lda, int loa,
233
  TB *B, int ldb, int lob,
234
  int batchSize
235
)
236
{
237
  TB err = 0.0;
238
  for ( int b = 0; b < batchSize; b ++ )
239
  {
240
    err +=
241
    hmlp_relative_error
242
    (
243
      m, n,
244
      A + b * loa, lda,
245
      B + b * lob, ldb
246
    );
247
  }
248
  if ( err > 1E-7 )
249
  {
250
    printf( "average relative error % .2E\n", err / batchSize );
251
  }
252
  return err;
253
};
254
255
256
257
template<typename T>
258
int hmlp_count_error
259
(
260
  int m, int n,
261
  T *A, int lda,
262
  T *B, int ldb
263
)
264
{
265
  int error_count = 0;
266
  std::tuple<int, int> err_location( -1, -1 );
267
268
  for ( int j = 0; j < n; j ++ )
269
  {
270
    for ( int i = 0; i < m; i ++ )
271
    {
272
      T a = A[ j * lda + i ];
273
      T b = B[ j * ldb + i ];
274
      if ( a != b )
275
      {
276
        err_location = std::make_tuple( i, j );
277
        error_count ++;
278
      }
279
    }
280
  }
281
  if ( error_count )
282
  {
283
    printf( "total error count %d\n", error_count );
284
  }
285
  return error_count;
286
};
287
288
289
template<typename T>
290
int hmlp_count_error
291
(
292
  int m, int n,
293
  T *A, int lda, int loa,
294
  T *B, int ldb, int lob,
295
  int batchSize
296
)
297
{
298
  int error_count = 0;
299
  for ( int b = 0; b < batchSize; b ++ )
300
  {
301
    error_count +=
302
    hmlp_count_error
303
    (
304
      m, n,
305
      A + b * loa, lda,
306
      B + b * lob, ldb
307
    );
308
  }
309
  if ( error_count )
310
  {
311
    printf( "total error count %d\n", error_count );
312
  }
313
  return error_count;
314
};
315
316
317
318
template<bool IGNOREZERO=false, bool COLUMNINDEX=true, typename T>
319
void hmlp_printmatrix( int m, int n, T *A, int lda )
320
{
321
  if ( COLUMNINDEX )
322
  {
323
    for ( int j = 0; j < n; j ++ )
324
    {
325
      if ( j % 5 == 0 || j == 0 || j == n - 1 )
326
      {
327
        if ( is_same<T, pair<double, size_t>>::value || is_same<T, pair<float, size_t>>::value )
328
        {
329
          printf( "col[%10d] ", j );
330
        }
331
        else
332
        {
333
          printf( "col[%4d] ", j );
334
        }
335
      }
336
      else
337
      {
338
        if ( is_same<T, pair<double, size_t>>::value || is_same<T, pair<float, size_t>>::value )
339
        {
340
          printf( "                " );
341
        }
342
        else
343
        {
344
          printf( "          " );
345
        }
346
      }
347
    }
348
    printf( "\n" );
349
    if ( is_same<T, pair<double, size_t>>::value || is_same<T, pair<float, size_t>>::value )
350
    {
351
      printf( "===============================================================================\n" );
352
    }
353
    else
354
    {
355
      printf( "===========================================================\n" );
356
    }
357
  }
358
  printf( "A = [\n" );
359
  for ( int i = 0; i < m; i ++ )
360
  {
361
    for ( int j = 0; j < n; j ++ )
362
    {
363
      if ( is_same<T, pair<double, size_t>>::value )
364
      {
365
        auto* A_pair = reinterpret_cast<pair<double, size_t>*>( A );
366
        printf( "(% .1E,%5lu)", (double) A_pair[ j * lda + i ].first, A_pair[ j * lda + i ].second );
367
      }
368
      else if ( is_same<T, pair<float, size_t>>::value )
369
      {
370
        auto* A_pair = reinterpret_cast<pair<float, size_t>*>( A );
371
        printf( "(% .1E,%5lu)", (double) A_pair[ j * lda + i ].first, A_pair[ j * lda + i ].second );
372
      }
373
      else if ( is_same<T, double>::value )
374
      {
375
        auto* A_double = reinterpret_cast<double*>( A );
376
        if ( std::fabs( A_double[ j * lda + i ] ) < 1E-15 )
377
        {
378
          printf( "          " );
379
        }
380
        else
381
        {
382
          printf( "% .4E ", (double) A_double[ j * lda + i ] );
383
        }
384
      }
385
      else if ( is_same<T, double>::value )
386
      {
387
        auto* A_float = reinterpret_cast<float*>( A );
388
        if ( std::fabs( A_float[ j * lda + i ] ) < 1E-15 )
389
        {
390
          printf( "          " );
391
        }
392
        else
393
        {
394
          printf( "% .4E ", (double) A_float[ j * lda + i ] );
395
        }
396
      }
397
    }
398
    printf(";\n");
399
  }
400
  printf("];\n");
401
};
402
403
404
//__host__ __device__
405
static inline int hmlp_ceildiv( int x, int y )
406
{
407
    return ( x + y - 1 ) / y;
408
}
409
410
static inline int hmlp_read_nway_from_env( const char* env )
411
{
412
  int number = 1;
413
  char* str = getenv( env );
414
  if( str != NULL )
415
  {
416
    number = strtol( str, NULL, 10 );
417
  }
418
  return number;
419
};
420
421
/**
422
 *  @brief A swap function. Just in case we do not have one.
423
 *         (for GSKNN)
424
 */
425
template<typename T>
426
inline void swap( T *x, int i, int j )
427
{
428
  T tmp = x[ i ];
429
  x[ i ] = x[ j ];
430
  x[ j ] = tmp;
431
};
432
433
/**
434
 *  @brief This function is called after the root of the heap
435
 *         is replaced by an new candidate. We need to readjust
436
 *         such the heap condition is satisfied.
437
 */
438
template<typename T>
439
inline void heap_adjust
440
(
441
  T *D,
442
  int s,
443
  int n,
444
  int *I
445
)
446
{
447
  int j;
448
449
  while ( 2 * s + 1 < n )
450
  {
451
    j = 2 * s + 1;
452
    if ( ( j + 1 ) < n )
453
    {
454
      if ( D[ j ] < D[ j + 1 ] ) j ++;
455
    }
456
    if ( D[ s ] < D[ j ] )
457
    {
458
      swap<T>( D, s, j );
459
      swap<int>( I, s, j );
460
      s = j;
461
    }
462
    else break;
463
  }
464
}
465
466
template<typename T>
467
inline void heap_select
468
(
469
  int m,
470
  int r,
471
  T *x,
472
  int *alpha,
473
  T *D,
474
  int *I
475
)
476
{
477
  int i;
478
479
  for ( i = 0; i < m; i ++ )
480
  {
481
    if ( x[ i ] > D[ 0 ] )
482
    {
483
      continue;
484
    }
485
    else // Replace the root with the new query.
486
    {
487
      D[ 0 ] = x[ i ];
488
      I[ 0 ] = alpha[ i ];
489
      heap_adjust<T>( D, 0, r, I );
490
    }
491
  }
492
};
493
494
495
template<typename T>
496
void HeapAdjust
497
(
498
  size_t s, size_t n,
499
  std::pair<T, size_t> *NN
500
)
501
{
502
  while ( 2 * s + 1 < n )
503
  {
504
    size_t j = 2 * s + 1;
505
    if ( ( j + 1 ) < n )
506
    {
507
      if ( NN[ j ].first < NN[ j + 1 ].first ) j ++;
508
    }
509
    if ( NN[ s ].first < NN[ j ].first )
510
    {
511
      std::swap( NN[ s ], NN[ j ] );
512
      s = j;
513
    }
514
    else break;
515
  }
516
}; /** end HeapAdjust() */
517
518
template<typename T>
519
void HeapSelect
520
(
521
  size_t n, size_t k,
522
  std::pair<T, size_t> *Query,
523
  std::pair<T, size_t> *NN
524
)
525
{
526
  for ( size_t i = 0; i < n; i ++ )
527
  {
528
    if ( Query[ i ].first > NN[ 0 ].first )
529
    {
530
      continue;
531
    }
532
    else // Replace the root with the new query.
533
    {
534
      NN[ 0 ] = Query[ i ];
535
      HeapAdjust<T>( 0, k, NN );
536
    }
537
  }
538
}; /** end HeapSelect() */
539
540
541
/**
542
 *  @brief A bubble sort for reference.
543
 */
544
template<typename T>
545
void bubble_sort
546
(
547
  int n,
548
  T *D,
549
  int *I
550
)
551
{
552
  int i, j;
553
554
  for ( i = 0; i < n - 1; i ++ )
555
  {
556
    for ( j = 0; j < n - 1 - i; j ++ )
557
    {
558
      if ( D[ j ] > D[ j + 1 ] )
559
      {
560
        swap<T>( D, j, j + 1 );
561
        swap<int>( I, j, j + 1 );
562
      }
563
    }
564
  }
565
}; /** end bubble_sort() */
566
567
568
/**
569
 *
570
 *
571
 **/
572
class Statistic
573
{
574
  public:
575
576
    Statistic()
577
    {
578
      _num = 0;
579
      _max = std::numeric_limits<double>::min();
580
      _min = std::numeric_limits<double>::max();
581
      _avg = 0.0;
582
    };
583
584
    std::size_t _num;
585
586
    double _max;
587
588
    double _min;
589
590
    double _avg;
591
592
    void Update( double query )
593
    {
594
      // Compute the sum
595
      _avg = _num * _avg;
596
      _num += 1;
597
      _max = std::max( _max, query );
598
      _min = std::min( _min, query );
599
      _avg += query;
600
      _avg /= _num;
601
    };
602
603
    void Print()
604
    {
605
      printf( "num %5lu min %.1E max %.1E avg %.1E\n", _num, _min, _max, _avg );
606
    };
607
608
}; /** end class Statistic */
609
610
}; /** end namespace hmlp */
611
612
#endif /** define HMLP_UTIL_HPP */