GCC Code Coverage Report
Directory: . Exec Total Coverage
File: frame/base/Data.hpp Lines: 0 126 0.0 %
Date: 2019-01-14 Branches: 0 710 0.0 %

Line Exec Source
1
/**
2
 *  HMLP (High-Performance Machine Learning Primitives)
3
 *
4
 *  Copyright (C) 2014-2017, The University of Texas at Austin
5
 *
6
 *  This program is free software: you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation, either version 3 of the License, or
9
 *  (at your option) any later version.
10
 *
11
 *  This program is distributed in the hope that it will be useful,
12
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 *  GNU General Public License for more details.
15
 *
16
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program. If not, see the LICENSE file.
18
 *
19
 **/
20
21
#ifndef DATA_HPP
22
#define DATA_HPP
23
24
/** Use mmap as simple out-of-core solution. */
25
#include <sys/types.h>
26
#include <sys/mman.h>
27
#include <sys/stat.h>
28
#include <unistd.h>
29
#include <fcntl.h>
30
#include <math.h>
31
32
/** Use STL and inherit vector<T>. */
33
#include <cassert>
34
#include <typeinfo>
35
#include <algorithm>
36
#include <random>
37
#include <chrono>
38
#include <set>
39
#include <unordered_set>
40
#include <map>
41
#include <unordered_map>
42
#include <vector>
43
#include <deque>
44
#include <map>
45
#include <string>
46
47
/** std::istringstream */
48
#include <iostream>
49
#include <fstream>
50
#include <sstream>
51
52
/** Use HMLP support (device, read/write). */
53
#include <base/device.hpp>
54
#include <base/runtime.hpp>
55
#include <base/util.hpp>
56
57
58
/** -lmemkind */
59
#ifdef HMLP_MIC_AVX512
60
#include <hbwmalloc.h>
61
#include <hbw_allocator.h>
62
#endif // ifdef HMLP}_MIC_AVX512
63
64
/** gpu related */
65
#ifdef HMLP_USE_CUDA
66
#include <hmlp_gpu.hpp>
67
#include <thrust/tuple.h>
68
#include <thrust/host_vector.h>
69
#include <thrust/device_vector.h>
70
#include <thrust/system/cuda/experimental/pinned_allocator.h>
71
#include <thrust/system_error.h>
72
#include <thrust/system/cuda/error.h>
73
74
template<class T>
75
class managed_allocator
76
{
77
  public:
78
79
    T* allocate( size_t n )
80
    {
81
      T* result = nullptr;
82
83
      cudaError_t error = cudaMallocManaged(
84
          &result, n * sizeof(T), cudaMemAttachGlobal );
85
86
      if ( error != cudaSuccess )
87
      {
88
        throw thrust::system_error( error, thrust::cuda_category(),
89
            "managed_allocator::allocate(): cudaMallocManaged" );
90
      }
91
92
      return result;
93
    }; /** end allocate() */
94
95
    void deallocate( T* ptr, size_t )
96
    {
97
      cudaError_t error = cudaFree( ptr );
98
99
      if ( error != cudaSuccess )
100
      {
101
        throw thrust::system_error( error, thrust::cuda_category(),
102
            "managed_allocator::deallocate(): cudaFree" );
103
      }
104
    }; /** end deallocate() */
105
};
106
#endif // ifdef HMLP_USE_CUDA
107
108
109
110
111
112
/** debug flag */
113
#define DEBUG_DATA 1
114
115
116
using namespace std;
117
118
119
120
121
namespace hmlp
122
{
123
124
#ifdef HMLP_MIC_AVX512
125
/** use hbw::allocator for Intel Xeon Phi */
126
template<class T, class Allocator = hbw::allocator<T> >
127
#elif  HMLP_USE_CUDA
128
/** use pinned (page-lock) memory for NVIDIA GPUs */
129
template<class T, class Allocator = thrust::system::cuda::experimental::pinned_allocator<T> >
130
#else
131
/** use default stl allocator */
132
template<class T, class Allocator = std::allocator<T> >
133
#endif
134
class Data : public ReadWrite, public vector<T, Allocator>
135
#ifdef HMLP_USE_CUDA
136
/** inheritate the interface fot the host-device model. */
137
, public DeviceMemory<T>
138
#endif
139
{
140
  public:
141
142
    /** Default empty constructor. */
143
    Data() : vector<T, Allocator>() {};
144
145
    /** Copy constructor for hmlp::Data. */
146
    Data( const Data<T>& other_data ) : vector<T, Allocator>( other_data )
147
    {
148
      resize_( other_data.row(), other_data.col() );
149
    }
150
151
    /** TODO: Copy constructor for std::vector. */
152
    Data( size_t m, size_t n, const vector<T>& other_vector ) : vector<T, Allocator>( other_vector )
153
    {
154
      assert( other_vector.size() == m * n );
155
      resize( m, n );
156
    };
157
158
    Data( size_t m, size_t n ) { resize( m, n ); };
159
160
    Data( size_t m, size_t n, T initT ) { resize( m, n, initT ); };
161
162
    Data( size_t m, size_t n, string &filename ) : Data( m, n )
163
    {
164
      this->read( m, n, filename );
165
    };
166
167
    void resize( size_t m, size_t n ) { resize_( m, n ); };
168
169
    void resize( size_t m, size_t n, T initT ) { resize_( m, n, initT ); };
170
171
    void reserve( size_t m, size_t n ) { reserve_( m, n ); };
172
173
    void clear() { clear_(); };
174
175
    void read( size_t m, size_t n, string &filename )
176
    {
177
      assert( this->m == m );
178
      assert( this->n == n );
179
      assert( this->size() == m * n );
180
181
      cout << filename << endl;
182
183
      ifstream file( filename.data(), ios::in|ios::binary|ios::ate );
184
      if ( file.is_open() )
185
      {
186
        auto size = file.tellg();
187
        assert( size == m * n * sizeof(T) );
188
        file.seekg( 0, ios::beg );
189
        file.read( (char*)this->data(), size );
190
        file.close();
191
      }
192
    };
193
194
		void write( std::string &filename )
195
		{
196
			ofstream myFile ( filename.data(), ios::out | ios::binary );
197
      myFile.write( (char*)(this->data()), this->size() * sizeof(T) );
198
		};
199
200
    template<int SKIP_ATTRIBUTES = 0, bool TRANS = false>
201
		void readmtx( size_t m, size_t n, string &filename )
202
		{
203
      assert( this->m == m );
204
      assert( this->n == n );
205
      assert( this->size() == m * n );
206
207
      cout << filename << endl;
208
209
      ifstream file( filename.data() );
210
			string line;
211
			if ( file.is_open() )
212
			{
213
				size_t j = 0;
214
				while ( getline( file, line ) )
215
				{
216
					if ( j == 0 ) printf( "%s\n", line.data() );
217
218
219
					if ( j % 1000 == 0 ) printf( "%4lu ", j ); fflush( stdout );
220
					if ( j >= n )
221
					{
222
						printf( "more data then execpted n %lu\n", n );
223
					}
224
225
          /** Replace all ',' and ';' with white space ' ' */
226
					replace( line.begin(), line.end(), ',', '\n' );
227
					replace( line.begin(), line.end(), ';', '\n' );
228
229
					istringstream iss( line );
230
231
					for ( size_t i = 0; i < m + SKIP_ATTRIBUTES; i ++ )
232
					{
233
						T tmp;
234
						if ( !( iss >> tmp ) )
235
						{
236
							printf( "line %lu does not have enough elements (only %lu)\n", j, i );
237
					    printf( "%s\n", line.data() );
238
							exit( 1 );
239
						}
240
						if ( i >= SKIP_ATTRIBUTES )
241
						{
242
							if ( TRANS ) (*this)[ j * m + i ] = tmp;
243
							else         (*this)[ i * n + j ] = tmp;
244
						}
245
					}
246
					j ++;
247
				}
248
				printf( "\nfinish readmatrix %s\n", filename.data() );
249
			}
250
		};
251
252
253
254
    tuple<size_t, size_t> shape() { return make_tuple( m, n ); };
255
256
257
    T* rowdata( size_t i )
258
    {
259
      assert( i < m );
260
      return ( this->data() + i );
261
    };
262
263
    T* columndata( size_t j )
264
    {
265
      assert( j < n );
266
      return ( this->data() + j * m );
267
    };
268
269
		T getvalue( size_t i ) { return (*this)[ i ]; };
270
271
		void setvalue( size_t i, T v ) { (*this)[ i ] = v; };
272
273
		T getvalue( size_t i, size_t j ) { return (*this)( i, j ); };
274
275
		void setvalue( size_t i, size_t j, T v ) { (*this)( i, j ) = v; };
276
277
    /** ESSENTIAL: return number of coumns */
278
    size_t row() const noexcept { return m; };
279
280
    /** ESSENTIAL: return number of rows */
281
    size_t col() const noexcept { return n; };
282
283
    /** ESSENTIAL: return an element */
284
    T& operator()( size_t i, size_t j )       { return (*this)[ m * j + i ]; };
285
    T  operator()( size_t i, size_t j ) const { return (*this)[ m * j + i ]; };
286
287
288
    /** ESSENTIAL: return a submatrix */
289
    Data<T> operator()( const vector<size_t>& I, const vector<size_t>& J ) const
290
    {
291
      Data<T> KIJ( I.size(), J.size() );
292
      for ( int j = 0; j < J.size(); j ++ )
293
      {
294
        for ( int i = 0; i < I.size(); i ++ )
295
        {
296
          KIJ[ j * I.size() + i ] = (*this)[ m * J[ j ] + I[ i ] ];
297
        }
298
      }
299
      return KIJ;
300
    };
301
302
    /** ESSENTIAL: */
303
    pair<T, size_t> ImportantSample( size_t j )
304
    {
305
      size_t i = std::rand() % m;
306
      pair<T, size_t> sample( (*this)( i, j ), i );
307
      return sample;
308
    };
309
310
311
    Data<T> operator()( const vector<size_t> &jmap )
312
    {
313
      Data<T> submatrix( m, jmap.size() );
314
      #pragma omp parallel for
315
      for ( int j = 0; j < jmap.size(); j ++ )
316
        for ( int i = 0; i < m; i ++ )
317
          submatrix[ j * m + i ] = (*this)[ m * jmap[ j ] + i ];
318
      return submatrix;
319
    };
320
321
    template<typename TINDEX>
322
    void GatherColumns( bool TRANS, vector<TINDEX> &jmap, Data<T> &submatrix )
323
    {
324
      if ( TRANS )
325
      {
326
        submatrix.resize( jmap.size(), m );
327
        for ( int j = 0; j < jmap.size(); j ++ )
328
          for ( int i = 0; i < m; i ++ )
329
            submatrix[ i * jmap.size() + j ] = (*this)[ m * jmap[ j ] + i ];
330
      }
331
      else
332
      {
333
        submatrix.resize( m, jmap.size() );
334
        for ( int j = 0; j < jmap.size(); j ++ )
335
          for ( int i = 0; i < m; i ++ )
336
            submatrix[ j * m + i ] = (*this)[ m * jmap[ j ] + i ];
337
      }
338
    };
339
340
    void setvalue( T value )
341
    {
342
      for ( auto it = this->begin(); it != this->end(); it ++ )
343
        (*it) = value;
344
    };
345
346
    template<bool SYMMETRIC = false>
347
    void rand( T a, T b )
348
    {
349
      default_random_engine generator;
350
      uniform_real_distribution<T> distribution( a, b );
351
352
      if ( SYMMETRIC ) assert( m == n );
353
354
      for ( std::size_t j = 0; j < n; j ++ )
355
      {
356
        for ( std::size_t i = 0; i < m; i ++ )
357
        {
358
          if ( SYMMETRIC )
359
          {
360
            if ( i > j )
361
              (*this)[ j * m + i ] = distribution( generator );
362
            else
363
              (*this)[ j * m + i ] = (*this)[ i * m + j ];
364
          }
365
          else
366
          {
367
            (*this)[ j * m + i ] = distribution( generator );
368
          }
369
        }
370
      }
371
    };
372
373
    template<bool SYMMETRIC = false>
374
    void rand()
375
    {
376
      rand<SYMMETRIC>( 0.0, 1.0 );
377
    };
378
379
    void randn( T mu, T sd )
380
    {
381
      unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
382
      std::default_random_engine generator( seed );
383
      std::normal_distribution<T> distribution( mu, sd );
384
      for ( std::size_t i = 0; i < m * n; i ++ )
385
      {
386
        (*this)[ i ] = distribution( generator );
387
      }
388
    };
389
390
    void randn() { randn( 0.0, 1.0 ); };
391
392
    template<bool USE_LOWRANK=true>
393
    void randspd( T a, T b )
394
    {
395
      std::default_random_engine generator;
396
      std::uniform_real_distribution<T> distribution( a, b );
397
398
      assert( m == n );
399
400
      if ( USE_LOWRANK )
401
      {
402
        hmlp::Data<T> X( ( std::rand() % n ) / 10 + 1, n );
403
        X.randn( a, b );
404
        xgemm
405
        (
406
          "T", "N",
407
          n, n, X.row(),
408
          1.0, X.data(), X.row(),
409
               X.data(), X.row(),
410
          0.0, this->data(), this->row()
411
        );
412
      }
413
      else /** diagonal dominating */
414
      {
415
        for ( std::size_t j = 0; j < n; j ++ )
416
        {
417
          for ( std::size_t i = 0; i < m; i ++ )
418
          {
419
            if ( i > j )
420
              (*this)[ j * m + i ] = distribution( generator );
421
            else
422
              (*this)[ j * m + i ] = (*this)[ i * m + j ];
423
424
            /** Make sure diagonal dominated */
425
            (*this)[ j * m + j ] += std::abs( (*this)[ j * m + i ] );
426
          }
427
        }
428
      }
429
    };
430
431
    void randspd()
432
    {
433
      randspd<true>( 0.0, 1.0 );
434
    };
435
436
    bool HasIllegalValue()
437
    {
438
      for ( auto it = this->begin(); it != this->end(); it ++ )
439
      {
440
        if ( std::isinf( *it ) ) return true;
441
        if ( std::isnan( *it ) ) return true;
442
      }
443
      return false;
444
    };
445
446
    void Print()
447
    {
448
      printf( "Data in %lu * %lu\n", m, n );
449
      if ( m < 11 && n < 7 )
450
      {
451
        hmlp_printmatrix( m, n, this->data(), m );
452
      }
453
      else
454
      {
455
        hmlp_printmatrix( 10, 6, this->data(), m );
456
      }
457
    };
458
459
    void WriteFile( char *name )
460
    {
461
      FILE * pFile;
462
      pFile = fopen ( name, "w" );
463
      fprintf( pFile, "%s=[\n", name );
464
      for ( size_t i = 0; i < m; i ++ )
465
      {
466
        for ( size_t j = 0; j < n; j ++ )
467
        {
468
          fprintf( pFile, "%lf,", (*this)( i, j ) );
469
        }
470
        fprintf( pFile, ";\n" );
471
      }
472
      fprintf( pFile, "];\n" );
473
      fclose( pFile );
474
    };
475
476
    template<typename TINDEX>
477
    double flops( TINDEX na, TINDEX nb ) { return 0.0; };
478
479
#ifdef HMLP_USE_CUDA
480
481
    void CacheD( hmlp::Device *dev )
482
    {
483
      DeviceMemory<T>::CacheD( dev, this->size() * sizeof(T) );
484
    };
485
486
    void AllocateD( hmlp::Device *dev )
487
    {
488
      double beg = omp_get_wtime();
489
      DeviceMemory<T>::AllocateD( dev, m * n * sizeof(T) );
490
      double alloc_time = omp_get_wtime() - beg;
491
      printf( "AllocateD %5.3lf\n", alloc_time );
492
    };
493
494
    void FreeD( Device *dev )
495
    {
496
      DeviceMemory<T>::FreeD( dev, this->size() * sizeof(T) );
497
    };
498
499
    void PrefetchH2D( Device *dev, int stream_id )
500
    {
501
      double beg = omp_get_wtime();
502
      DeviceMemory<T>::PrefetchH2D
503
        ( dev, stream_id, m * n * sizeof(T), this->data() );
504
      double alloc_time = omp_get_wtime() - beg;
505
      //printf( "PrefetchH2D %5.3lf\n", alloc_time );
506
    };
507
508
    void PrefetchD2H( Device *dev, int stream_id )
509
    {
510
      DeviceMemory<T>::PrefetchD2H
511
        ( dev, stream_id, m * n * sizeof(T), this->data() );
512
    };
513
514
    void WaitPrefetch( Device *dev, int stream_id )
515
    {
516
      DeviceMemory<T>::Wait( dev, stream_id );
517
    };
518
519
    void FetchH2D( Device *dev )
520
    {
521
      DeviceMemory<T>::FetchH2D( dev, m * n * sizeof(T), this->data() );
522
    };
523
524
    void FetchD2H( Device *dev )
525
    {
526
      DeviceMemory<T>::FetchD2H( dev, m * n * sizeof(T), this->data() );
527
    };
528
#endif
529
530
  private:
531
532
    void resize_( size_t m, size_t n )
533
    {
534
      vector<T, Allocator>::resize( m * n );
535
      this->m = m;
536
      this->n = n;
537
    };
538
539
    void resize_( size_t m, size_t n, T initT )
540
    {
541
      vector<T, Allocator>::resize( m * n, initT );
542
      this->m = m; this->n = n;
543
    };
544
545
    void reserve_( size_t m, size_t n )
546
    {
547
      vector<T, Allocator>::reserve( m * n );
548
    }
549
550
    void clear_()
551
    {
552
      vector<T, Allocator>::clear();
553
      this->m = 0;
554
      this->n = 0;
555
    }
556
557
    size_t m = 0;
558
559
    size_t n = 0;
560
561
}; /** end class Data */
562
563
564
565
566
567
568
template<typename T, class Allocator = std::allocator<T> >
569
class SparseData : public ReadWrite
570
{
571
  public:
572
573
    /** (Default) constructor. */
574
    SparseData( size_t m = 0, size_t n = 0, size_t nnz = 0, bool issymmetric = true )
575
    {
576
      Resize( m, n, nnz, issymmetric );
577
    };
578
579
    /** Adjust the storage size. */
580
    void Resize( size_t m, size_t n, size_t nnz, bool issymmetric )
581
    {
582
      this->m = m;
583
      this->n = n;
584
      this->nnz = nnz;
585
      this->val.resize( nnz, 0.0 );
586
      this->row_ind.resize( nnz, 0 );
587
      this->col_ptr.resize( n + 1, 0 );
588
      this->issymmetric = issymmetric;
589
    }; /** end Resize() */
590
591
    /** Construct from three arrays: val[ nnz ],row_ind[ nnz ], and col_ptr[ n + 1 ]. */
592
    void fromCSC( size_t m, size_t n, size_t nnz, bool issymmetric,
593
        const T *val, const size_t *row_ind, const size_t *col_ptr )
594
    {
595
      Resize( m, n, nnz, issymmetric );
596
      for ( size_t i = 0; i < nnz; i ++ )
597
      {
598
        this->val[ i ] = val[ i ];
599
        this->row_ind[ i ] = row_ind[ i ];
600
      }
601
      for ( size_t j = 0; j < n + 1; j ++ )
602
      {
603
        this->col_ptr[ j ] = col_ptr[ j ];
604
      }
605
    }; /** end fromCSC()*/
606
607
    /** Retrive an element K( i, j ).  */
608
    T operator () ( size_t i, size_t j ) const
609
    {
610
      if ( issymmetric && i < j ) std::swap( i, j );
611
      auto row_beg = col_ptr[ j ];
612
      auto row_end = col_ptr[ j + 1 ];
613
      /** Early return if there is no nonzero entry in this column. */
614
      if ( row_beg == row_end ) return 0;
615
      /** Search (BST) for row indices. */
616
      auto lower = find( row_ind.begin() + row_beg, row_ind.begin() + row_end - 1, i );
617
      //if ( lower != row_ind.end() ) printf( "lower %lu, i %lu, j %lu, row_beg %lu, row_end %lu\n",
618
      //    *lower, i, j, row_beg, row_end ); fflush( stdout );
619
      /** If the lower bound matches, then return the value. */
620
      if ( *lower == i ) return val[ distance( row_ind.begin(), lower ) ];
621
      /** Otherwise, return 0. */
622
      return 0;
623
    }; /** end operator () */
624
625
    /** Retrive a subblock K( I, J ).*/
626
    Data<T> operator()( const vector<size_t> &I, const vector<size_t> &J ) const
627
    {
628
      Data<T> KIJ( I.size(), J.size() );
629
      /** Evaluate Kij element by element. */
630
      for ( size_t j = 0; j < J.size(); j ++ )
631
        for ( size_t i = 0; i < I.size(); i ++ )
632
          KIJ( i, j ) = (*this)( I[ i ], J[ j ] );
633
      /** Return submatrix KIJ. */
634
      return KIJ;
635
    }; /** end operator () */
636
637
    size_t ColPtr( size_t j ) { return col_ptr[ j ]; };
638
639
    size_t RowInd( size_t offset ) { return row_ind[ offset ]; };
640
641
    T Value( size_t offset ) { return val[ offset ]; };
642
643
    pair<T, size_t> ImportantSample( size_t j )
644
    {
645
      size_t offset = col_ptr[ j ] + rand() % ( col_ptr[ j + 1 ] - col_ptr[ j ] );
646
      pair<T, size_t> sample( val[ offset ], row_ind[ offset ] );
647
      return sample;
648
    };
649
650
    void Print()
651
    {
652
      for ( size_t j = 0; j < n; j ++ ) printf( "%8lu ", j );
653
      printf( "\n" );
654
      for ( size_t i = 0; i < m; i ++ )
655
      {
656
        for ( size_t j = 0; j < n; j ++ )
657
        {
658
          printf( "% 3.1E ", (*this)( i, j ) );
659
        }
660
        printf( "\n" );
661
      }
662
    }; // end Print()
663
664
665
    /**
666
     *  @brief Read matrix market format (ijv) format. Only lower triangular
667
     *         part is stored
668
     */
669
    template<bool LOWERTRIANGULAR, bool ISZEROBASE, bool IJONLY = false>
670
    void readmtx( string &filename )
671
    {
672
      size_t m_mtx, n_mtx, nnz_mtx;
673
674
      vector<deque<size_t>> full_row_ind( n );
675
      vector<deque<T>> full_val( n );
676
677
      // Read all tuples.
678
      printf( "%s ", filename.data() ); fflush( stdout );
679
      ifstream file( filename.data() );
680
      string line;
681
      if ( file.is_open() )
682
      {
683
        size_t nnz_count = 0;
684
685
        while ( std::getline( file, line ) )
686
        {
687
          if ( line.size() )
688
          {
689
            if ( line[ 0 ] != '%' )
690
            {
691
              std::istringstream iss( line );
692
              iss >> m_mtx >> n_mtx >> nnz_mtx;
693
              assert( this->m == m_mtx );
694
              assert( this->n == n_mtx );
695
              assert( this->nnz == nnz_mtx );
696
              break;
697
            }
698
          }
699
        }
700
701
        while ( std::getline( file, line ) )
702
        {
703
          if ( nnz_count % ( nnz / 10 ) == 0 )
704
          {
705
            printf( "%lu%% ", ( nnz_count * 100 ) / nnz ); fflush( stdout );
706
          }
707
708
          std::istringstream iss( line );
709
710
					size_t i, j;
711
          T v;
712
713
          if ( IJONLY )
714
          {
715
            if ( !( iss >> i >> j ) )
716
            {
717
              printf( "line %lu has illegle format\n", nnz_count );
718
              break;
719
            }
720
            v = 1;
721
          }
722
          else
723
          {
724
            if ( !( iss >> i >> j >> v ) )
725
            {
726
              printf( "line %lu has illegle format\n", nnz_count );
727
              break;
728
            }
729
          }
730
731
          if ( !ISZEROBASE )
732
          {
733
            i -= 1;
734
            j -= 1;
735
          }
736
737
          if ( v != 0.0 )
738
          {
739
            full_row_ind[ j ].push_back( i );
740
            full_val[ j ].push_back( v );
741
742
            if ( LOWERTRIANGULAR && i > j  )
743
            {
744
              full_row_ind[ i ].push_back( j );
745
              full_val[ i ].push_back( v );
746
            }
747
          }
748
          nnz_count ++;
749
        }
750
        assert( nnz_count == nnz );
751
      }
752
      printf( "Done.\n" ); fflush( stdout );
753
      // Close the file.
754
      file.close();
755
756
      //printf( "Here nnz %lu\n", nnz );
757
758
      // Recount nnz for the full storage.
759
      size_t full_nnz = 0;
760
      for ( size_t j = 0; j < n; j ++ )
761
      {
762
        col_ptr[ j ] = full_nnz;
763
        full_nnz += full_row_ind[ j ].size();
764
      }
765
      nnz = full_nnz;
766
      col_ptr[ n ] = full_nnz;
767
      row_ind.resize( full_nnz );
768
      val.resize( full_nnz );
769
770
      //printf( "Here nnz %lu\n", nnz );
771
772
      //full_nnz = 0;
773
      //for ( size_t j = 0; j < n; j ++ )
774
      //{
775
      //  for ( size_t i = 0; i < full_row_ind[ j ].size(); i ++ )
776
      //  {
777
      //    row_ind[ full_nnz ] = full_row_ind[ j ][ i ];
778
      //    val[ full_nnz ] = full_val[ j ][ i ];
779
      //    full_nnz ++;
780
      //  }
781
      //}
782
783
      //printf( "Close the file. Reformat.\n" );
784
785
      #pragma omp parallel for
786
      for ( size_t j = 0; j < n; j ++ )
787
      {
788
        for ( size_t i = 0; i < full_row_ind[ j ].size(); i ++ )
789
        {
790
          row_ind[ col_ptr[ j ] + i ] = full_row_ind[ j ][ i ];
791
          val[ col_ptr[ j ] + i ] = full_val[ j ][ i ];
792
        }
793
      }
794
795
      printf( "finish readmatrix %s\n", filename.data() ); fflush( stdout );
796
    };
797
798
799
    size_t row() { return m; };
800
801
    size_t col() { return n; };
802
803
    template<typename TINDEX>
804
    double flops( TINDEX na, TINDEX nb ) { return 0.0; };
805
806
  private:
807
808
    size_t m = 0;
809
810
    size_t n = 0;
811
812
    size_t nnz = 0;
813
814
    bool issymmetric = false;
815
816
    vector<T, Allocator> val;
817
818
    vector<size_t> row_ind;
819
820
    vector<size_t> col_ptr;
821
822
}; /** end class CSC */
823
824
825
826
#ifdef HMLP_MIC_AVX512
827
template<class T, class Allocator = hbw::allocator<T> >
828
#else
829
template<class T, class Allocator = std::allocator<T> >
830
#endif
831
class OOCData : public ReadWrite
832
{
833
  public:
834
835
    OOCData() {};
836
837
    OOCData( size_t m, size_t n, string filename ) { Set( m, n, filename ); };
838
839
    ~OOCData()
840
    {
841
      /** Unmap */
842
      int rc = munmap( mmappedData, m * n * sizeof(T) );
843
      assert( rc == 0 );
844
      close( fd );
845
      printf( "finish readmatrix %s\n", filename.data() );
846
    };
847
848
    void Set( size_t m, size_t n, string filename )
849
    {
850
      this->m = m;
851
      this->n = n;
852
      this->filename = filename;
853
      /** Open the file */
854
      fd = open( filename.data(), O_RDONLY, 0 );
855
      assert( fd != -1 );
856
#ifdef __APPLE__
857
      mmappedData = (T*)mmap( NULL, m * n * sizeof(T),
858
          PROT_READ, MAP_PRIVATE, fd, 0 );
859
#else /** Assume Linux */
860
      mmappedData = (T*)mmap( NULL, m * n * sizeof(T),
861
          PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0 );
862
#endif
863
      assert( mmappedData != MAP_FAILED );
864
      cout << filename << endl;
865
    };
866
867
    //template<typename TINDEX>
868
    T operator()( size_t i, size_t j ) const
869
    {
870
      assert( i < m && j < n );
871
      return mmappedData[ j * m + i ];
872
    };
873
874
    //template<typename TINDEX>
875
    Data<T> operator()( const vector<size_t>& I, const vector<size_t>& J ) const
876
    {
877
      Data<T> KIJ( I.size(), J.size() );
878
      for ( int j = 0; j < J.size(); j ++ )
879
        for ( int i = 0; i < I.size(); i ++ )
880
          KIJ[ j * I.size() + i ] = (*this)( I[ i ], J[ j ] );
881
      return KIJ;
882
    };
883
884
    template<typename TINDEX>
885
    pair<T, TINDEX> ImportantSample( TINDEX j )
886
    {
887
      TINDEX i = std::rand() % m;
888
      pair<T, TINDEX> sample( (*this)( i, j ), i );
889
      return sample;
890
    };
891
892
    size_t row() const noexcept { return m; };
893
894
    size_t col() const noexcept { return n; };
895
896
    template<typename TINDEX>
897
    double flops( TINDEX na, TINDEX nb ) { return 0.0; };
898
899
  private:
900
901
    size_t m = 0;
902
903
    size_t n = 0;
904
905
    string filename;
906
907
    /** Use mmap */
908
    T *mmappedData = NULL;
909
910
    int fd = -1;
911
912
}; /** end class OOCData */
913
914
915
916
917
918
919
920
}; /** end namespace hmlp */
921
922
#endif /** define DATA_HPP */