1 |
|
/** |
2 |
|
* HMLP (High-Performance Machine Learning Primitives) |
3 |
|
* |
4 |
|
* Copyright (C) 2014-2017, The University of Texas at Austin |
5 |
|
* |
6 |
|
* This program is free software: you can redistribute it and/or modify |
7 |
|
* it under the terms of the GNU General Public License as published by |
8 |
|
* the Free Software Foundation, either version 3 of the License, or |
9 |
|
* (at your option) any later version. |
10 |
|
* |
11 |
|
* This program is distributed in the hope that it will be useful, |
12 |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
|
* GNU General Public License for more details. |
15 |
|
* |
16 |
|
* You should have received a copy of the GNU General Public License |
17 |
|
* along with this program. If not, see the LICENSE file. |
18 |
|
* |
19 |
|
**/ |
20 |
|
|
21 |
|
#ifndef DATA_HPP |
22 |
|
#define DATA_HPP |
23 |
|
|
24 |
|
/** Use mmap as simple out-of-core solution. */ |
25 |
|
#include <sys/types.h> |
26 |
|
#include <sys/mman.h> |
27 |
|
#include <sys/stat.h> |
28 |
|
#include <unistd.h> |
29 |
|
#include <fcntl.h> |
30 |
|
#include <math.h> |
31 |
|
|
32 |
|
/** Use STL and inherit vector<T>. */ |
33 |
|
#include <cassert> |
34 |
|
#include <typeinfo> |
35 |
|
#include <algorithm> |
36 |
|
#include <random> |
37 |
|
#include <chrono> |
38 |
|
#include <set> |
39 |
|
#include <unordered_set> |
40 |
|
#include <map> |
41 |
|
#include <unordered_map> |
42 |
|
#include <vector> |
43 |
|
#include <deque> |
44 |
|
#include <map> |
45 |
|
#include <string> |
46 |
|
|
47 |
|
/** std::istringstream */ |
48 |
|
#include <iostream> |
49 |
|
#include <fstream> |
50 |
|
#include <sstream> |
51 |
|
|
52 |
|
/** Use HMLP support (device, read/write). */ |
53 |
|
#include <base/device.hpp> |
54 |
|
#include <base/runtime.hpp> |
55 |
|
#include <base/util.hpp> |
56 |
|
|
57 |
|
|
58 |
|
/** -lmemkind */ |
59 |
|
#ifdef HMLP_MIC_AVX512 |
60 |
|
#include <hbwmalloc.h> |
61 |
|
#include <hbw_allocator.h> |
62 |
|
#endif // ifdef HMLP}_MIC_AVX512 |
63 |
|
|
64 |
|
/** gpu related */ |
65 |
|
#ifdef HMLP_USE_CUDA |
66 |
|
#include <hmlp_gpu.hpp> |
67 |
|
#include <thrust/tuple.h> |
68 |
|
#include <thrust/host_vector.h> |
69 |
|
#include <thrust/device_vector.h> |
70 |
|
#include <thrust/system/cuda/experimental/pinned_allocator.h> |
71 |
|
#include <thrust/system_error.h> |
72 |
|
#include <thrust/system/cuda/error.h> |
73 |
|
|
74 |
|
template<class T> |
75 |
|
class managed_allocator |
76 |
|
{ |
77 |
|
public: |
78 |
|
|
79 |
|
T* allocate( size_t n ) |
80 |
|
{ |
81 |
|
T* result = nullptr; |
82 |
|
|
83 |
|
cudaError_t error = cudaMallocManaged( |
84 |
|
&result, n * sizeof(T), cudaMemAttachGlobal ); |
85 |
|
|
86 |
|
if ( error != cudaSuccess ) |
87 |
|
{ |
88 |
|
throw thrust::system_error( error, thrust::cuda_category(), |
89 |
|
"managed_allocator::allocate(): cudaMallocManaged" ); |
90 |
|
} |
91 |
|
|
92 |
|
return result; |
93 |
|
}; /** end allocate() */ |
94 |
|
|
95 |
|
void deallocate( T* ptr, size_t ) |
96 |
|
{ |
97 |
|
cudaError_t error = cudaFree( ptr ); |
98 |
|
|
99 |
|
if ( error != cudaSuccess ) |
100 |
|
{ |
101 |
|
throw thrust::system_error( error, thrust::cuda_category(), |
102 |
|
"managed_allocator::deallocate(): cudaFree" ); |
103 |
|
} |
104 |
|
}; /** end deallocate() */ |
105 |
|
}; |
106 |
|
#endif // ifdef HMLP_USE_CUDA |
107 |
|
|
108 |
|
|
109 |
|
|
110 |
|
|
111 |
|
|
112 |
|
/** debug flag */ |
113 |
|
#define DEBUG_DATA 1 |
114 |
|
|
115 |
|
|
116 |
|
using namespace std; |
117 |
|
|
118 |
|
|
119 |
|
|
120 |
|
|
121 |
|
namespace hmlp |
122 |
|
{ |
123 |
|
|
124 |
|
#ifdef HMLP_MIC_AVX512 |
125 |
|
/** use hbw::allocator for Intel Xeon Phi */ |
126 |
|
template<class T, class Allocator = hbw::allocator<T> > |
127 |
|
#elif HMLP_USE_CUDA |
128 |
|
/** use pinned (page-lock) memory for NVIDIA GPUs */ |
129 |
|
template<class T, class Allocator = thrust::system::cuda::experimental::pinned_allocator<T> > |
130 |
|
#else |
131 |
|
/** use default stl allocator */ |
132 |
|
template<class T, class Allocator = std::allocator<T> > |
133 |
|
#endif |
134 |
|
class Data : public ReadWrite, public vector<T, Allocator> |
135 |
|
#ifdef HMLP_USE_CUDA |
136 |
|
/** inheritate the interface fot the host-device model. */ |
137 |
|
, public DeviceMemory<T> |
138 |
|
#endif |
139 |
|
{ |
140 |
|
public: |
141 |
|
|
142 |
|
/** Default empty constructor. */ |
143 |
|
Data() : vector<T, Allocator>() {}; |
144 |
|
|
145 |
|
/** Copy constructor for hmlp::Data. */ |
146 |
|
Data( const Data<T>& other_data ) : vector<T, Allocator>( other_data ) |
147 |
|
{ |
148 |
|
resize_( other_data.row(), other_data.col() ); |
149 |
|
} |
150 |
|
|
151 |
|
/** TODO: Copy constructor for std::vector. */ |
152 |
|
Data( size_t m, size_t n, const vector<T>& other_vector ) : vector<T, Allocator>( other_vector ) |
153 |
|
{ |
154 |
|
assert( other_vector.size() == m * n ); |
155 |
|
resize( m, n ); |
156 |
|
}; |
157 |
|
|
158 |
|
Data( size_t m, size_t n ) { resize( m, n ); }; |
159 |
|
|
160 |
|
Data( size_t m, size_t n, T initT ) { resize( m, n, initT ); }; |
161 |
|
|
162 |
|
Data( size_t m, size_t n, string &filename ) : Data( m, n ) |
163 |
|
{ |
164 |
|
this->read( m, n, filename ); |
165 |
|
}; |
166 |
|
|
167 |
|
void resize( size_t m, size_t n ) { resize_( m, n ); }; |
168 |
|
|
169 |
|
void resize( size_t m, size_t n, T initT ) { resize_( m, n, initT ); }; |
170 |
|
|
171 |
|
void reserve( size_t m, size_t n ) { reserve_( m, n ); }; |
172 |
|
|
173 |
|
void clear() { clear_(); }; |
174 |
|
|
175 |
|
void read( size_t m, size_t n, string &filename ) |
176 |
|
{ |
177 |
|
assert( this->m == m ); |
178 |
|
assert( this->n == n ); |
179 |
|
assert( this->size() == m * n ); |
180 |
|
|
181 |
|
cout << filename << endl; |
182 |
|
|
183 |
|
ifstream file( filename.data(), ios::in|ios::binary|ios::ate ); |
184 |
|
if ( file.is_open() ) |
185 |
|
{ |
186 |
|
auto size = file.tellg(); |
187 |
|
assert( size == m * n * sizeof(T) ); |
188 |
|
file.seekg( 0, ios::beg ); |
189 |
|
file.read( (char*)this->data(), size ); |
190 |
|
file.close(); |
191 |
|
} |
192 |
|
}; |
193 |
|
|
194 |
|
void write( std::string &filename ) |
195 |
|
{ |
196 |
|
ofstream myFile ( filename.data(), ios::out | ios::binary ); |
197 |
|
myFile.write( (char*)(this->data()), this->size() * sizeof(T) ); |
198 |
|
}; |
199 |
|
|
200 |
|
template<int SKIP_ATTRIBUTES = 0, bool TRANS = false> |
201 |
|
void readmtx( size_t m, size_t n, string &filename ) |
202 |
|
{ |
203 |
|
assert( this->m == m ); |
204 |
|
assert( this->n == n ); |
205 |
|
assert( this->size() == m * n ); |
206 |
|
|
207 |
|
cout << filename << endl; |
208 |
|
|
209 |
|
ifstream file( filename.data() ); |
210 |
|
string line; |
211 |
|
if ( file.is_open() ) |
212 |
|
{ |
213 |
|
size_t j = 0; |
214 |
|
while ( getline( file, line ) ) |
215 |
|
{ |
216 |
|
if ( j == 0 ) printf( "%s\n", line.data() ); |
217 |
|
|
218 |
|
|
219 |
|
if ( j % 1000 == 0 ) printf( "%4lu ", j ); fflush( stdout ); |
220 |
|
if ( j >= n ) |
221 |
|
{ |
222 |
|
printf( "more data then execpted n %lu\n", n ); |
223 |
|
} |
224 |
|
|
225 |
|
/** Replace all ',' and ';' with white space ' ' */ |
226 |
|
replace( line.begin(), line.end(), ',', '\n' ); |
227 |
|
replace( line.begin(), line.end(), ';', '\n' ); |
228 |
|
|
229 |
|
istringstream iss( line ); |
230 |
|
|
231 |
|
for ( size_t i = 0; i < m + SKIP_ATTRIBUTES; i ++ ) |
232 |
|
{ |
233 |
|
T tmp; |
234 |
|
if ( !( iss >> tmp ) ) |
235 |
|
{ |
236 |
|
printf( "line %lu does not have enough elements (only %lu)\n", j, i ); |
237 |
|
printf( "%s\n", line.data() ); |
238 |
|
exit( 1 ); |
239 |
|
} |
240 |
|
if ( i >= SKIP_ATTRIBUTES ) |
241 |
|
{ |
242 |
|
if ( TRANS ) (*this)[ j * m + i ] = tmp; |
243 |
|
else (*this)[ i * n + j ] = tmp; |
244 |
|
} |
245 |
|
} |
246 |
|
j ++; |
247 |
|
} |
248 |
|
printf( "\nfinish readmatrix %s\n", filename.data() ); |
249 |
|
} |
250 |
|
}; |
251 |
|
|
252 |
|
|
253 |
|
|
254 |
|
tuple<size_t, size_t> shape() { return make_tuple( m, n ); }; |
255 |
|
|
256 |
|
|
257 |
|
T* rowdata( size_t i ) |
258 |
|
{ |
259 |
|
assert( i < m ); |
260 |
|
return ( this->data() + i ); |
261 |
|
}; |
262 |
|
|
263 |
|
T* columndata( size_t j ) |
264 |
|
{ |
265 |
|
assert( j < n ); |
266 |
|
return ( this->data() + j * m ); |
267 |
|
}; |
268 |
|
|
269 |
|
T getvalue( size_t i ) { return (*this)[ i ]; }; |
270 |
|
|
271 |
|
void setvalue( size_t i, T v ) { (*this)[ i ] = v; }; |
272 |
|
|
273 |
|
T getvalue( size_t i, size_t j ) { return (*this)( i, j ); }; |
274 |
|
|
275 |
|
void setvalue( size_t i, size_t j, T v ) { (*this)( i, j ) = v; }; |
276 |
|
|
277 |
|
/** ESSENTIAL: return number of coumns */ |
278 |
|
size_t row() const noexcept { return m; }; |
279 |
|
|
280 |
|
/** ESSENTIAL: return number of rows */ |
281 |
|
size_t col() const noexcept { return n; }; |
282 |
|
|
283 |
|
/** ESSENTIAL: return an element */ |
284 |
|
T& operator()( size_t i, size_t j ) { return (*this)[ m * j + i ]; }; |
285 |
|
T operator()( size_t i, size_t j ) const { return (*this)[ m * j + i ]; }; |
286 |
|
|
287 |
|
|
288 |
|
/** ESSENTIAL: return a submatrix */ |
289 |
|
Data<T> operator()( const vector<size_t>& I, const vector<size_t>& J ) const |
290 |
|
{ |
291 |
|
Data<T> KIJ( I.size(), J.size() ); |
292 |
|
for ( int j = 0; j < J.size(); j ++ ) |
293 |
|
{ |
294 |
|
for ( int i = 0; i < I.size(); i ++ ) |
295 |
|
{ |
296 |
|
KIJ[ j * I.size() + i ] = (*this)[ m * J[ j ] + I[ i ] ]; |
297 |
|
} |
298 |
|
} |
299 |
|
return KIJ; |
300 |
|
}; |
301 |
|
|
302 |
|
/** ESSENTIAL: */ |
303 |
|
pair<T, size_t> ImportantSample( size_t j ) |
304 |
|
{ |
305 |
|
size_t i = std::rand() % m; |
306 |
|
pair<T, size_t> sample( (*this)( i, j ), i ); |
307 |
|
return sample; |
308 |
|
}; |
309 |
|
|
310 |
|
|
311 |
|
Data<T> operator()( const vector<size_t> &jmap ) |
312 |
|
{ |
313 |
|
Data<T> submatrix( m, jmap.size() ); |
314 |
|
#pragma omp parallel for |
315 |
|
for ( int j = 0; j < jmap.size(); j ++ ) |
316 |
|
for ( int i = 0; i < m; i ++ ) |
317 |
|
submatrix[ j * m + i ] = (*this)[ m * jmap[ j ] + i ]; |
318 |
|
return submatrix; |
319 |
|
}; |
320 |
|
|
321 |
|
template<typename TINDEX> |
322 |
|
void GatherColumns( bool TRANS, vector<TINDEX> &jmap, Data<T> &submatrix ) |
323 |
|
{ |
324 |
|
if ( TRANS ) |
325 |
|
{ |
326 |
|
submatrix.resize( jmap.size(), m ); |
327 |
|
for ( int j = 0; j < jmap.size(); j ++ ) |
328 |
|
for ( int i = 0; i < m; i ++ ) |
329 |
|
submatrix[ i * jmap.size() + j ] = (*this)[ m * jmap[ j ] + i ]; |
330 |
|
} |
331 |
|
else |
332 |
|
{ |
333 |
|
submatrix.resize( m, jmap.size() ); |
334 |
|
for ( int j = 0; j < jmap.size(); j ++ ) |
335 |
|
for ( int i = 0; i < m; i ++ ) |
336 |
|
submatrix[ j * m + i ] = (*this)[ m * jmap[ j ] + i ]; |
337 |
|
} |
338 |
|
}; |
339 |
|
|
340 |
|
void setvalue( T value ) |
341 |
|
{ |
342 |
|
for ( auto it = this->begin(); it != this->end(); it ++ ) |
343 |
|
(*it) = value; |
344 |
|
}; |
345 |
|
|
346 |
|
template<bool SYMMETRIC = false> |
347 |
|
void rand( T a, T b ) |
348 |
|
{ |
349 |
|
default_random_engine generator; |
350 |
|
uniform_real_distribution<T> distribution( a, b ); |
351 |
|
|
352 |
|
if ( SYMMETRIC ) assert( m == n ); |
353 |
|
|
354 |
|
for ( std::size_t j = 0; j < n; j ++ ) |
355 |
|
{ |
356 |
|
for ( std::size_t i = 0; i < m; i ++ ) |
357 |
|
{ |
358 |
|
if ( SYMMETRIC ) |
359 |
|
{ |
360 |
|
if ( i > j ) |
361 |
|
(*this)[ j * m + i ] = distribution( generator ); |
362 |
|
else |
363 |
|
(*this)[ j * m + i ] = (*this)[ i * m + j ]; |
364 |
|
} |
365 |
|
else |
366 |
|
{ |
367 |
|
(*this)[ j * m + i ] = distribution( generator ); |
368 |
|
} |
369 |
|
} |
370 |
|
} |
371 |
|
}; |
372 |
|
|
373 |
|
template<bool SYMMETRIC = false> |
374 |
|
void rand() |
375 |
|
{ |
376 |
|
rand<SYMMETRIC>( 0.0, 1.0 ); |
377 |
|
}; |
378 |
|
|
379 |
|
void randn( T mu, T sd ) |
380 |
|
{ |
381 |
|
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); |
382 |
|
std::default_random_engine generator( seed ); |
383 |
|
std::normal_distribution<T> distribution( mu, sd ); |
384 |
|
for ( std::size_t i = 0; i < m * n; i ++ ) |
385 |
|
{ |
386 |
|
(*this)[ i ] = distribution( generator ); |
387 |
|
} |
388 |
|
}; |
389 |
|
|
390 |
|
void randn() { randn( 0.0, 1.0 ); }; |
391 |
|
|
392 |
|
template<bool USE_LOWRANK=true> |
393 |
|
void randspd( T a, T b ) |
394 |
|
{ |
395 |
|
std::default_random_engine generator; |
396 |
|
std::uniform_real_distribution<T> distribution( a, b ); |
397 |
|
|
398 |
|
assert( m == n ); |
399 |
|
|
400 |
|
if ( USE_LOWRANK ) |
401 |
|
{ |
402 |
|
hmlp::Data<T> X( ( std::rand() % n ) / 10 + 1, n ); |
403 |
|
X.randn( a, b ); |
404 |
|
xgemm |
405 |
|
( |
406 |
|
"T", "N", |
407 |
|
n, n, X.row(), |
408 |
|
1.0, X.data(), X.row(), |
409 |
|
X.data(), X.row(), |
410 |
|
0.0, this->data(), this->row() |
411 |
|
); |
412 |
|
} |
413 |
|
else /** diagonal dominating */ |
414 |
|
{ |
415 |
|
for ( std::size_t j = 0; j < n; j ++ ) |
416 |
|
{ |
417 |
|
for ( std::size_t i = 0; i < m; i ++ ) |
418 |
|
{ |
419 |
|
if ( i > j ) |
420 |
|
(*this)[ j * m + i ] = distribution( generator ); |
421 |
|
else |
422 |
|
(*this)[ j * m + i ] = (*this)[ i * m + j ]; |
423 |
|
|
424 |
|
/** Make sure diagonal dominated */ |
425 |
|
(*this)[ j * m + j ] += std::abs( (*this)[ j * m + i ] ); |
426 |
|
} |
427 |
|
} |
428 |
|
} |
429 |
|
}; |
430 |
|
|
431 |
|
void randspd() |
432 |
|
{ |
433 |
|
randspd<true>( 0.0, 1.0 ); |
434 |
|
}; |
435 |
|
|
436 |
|
bool HasIllegalValue() |
437 |
|
{ |
438 |
|
for ( auto it = this->begin(); it != this->end(); it ++ ) |
439 |
|
{ |
440 |
|
if ( std::isinf( *it ) ) return true; |
441 |
|
if ( std::isnan( *it ) ) return true; |
442 |
|
} |
443 |
|
return false; |
444 |
|
}; |
445 |
|
|
446 |
|
void Print() |
447 |
|
{ |
448 |
|
printf( "Data in %lu * %lu\n", m, n ); |
449 |
|
if ( m < 11 && n < 7 ) |
450 |
|
{ |
451 |
|
hmlp_printmatrix( m, n, this->data(), m ); |
452 |
|
} |
453 |
|
else |
454 |
|
{ |
455 |
|
hmlp_printmatrix( 10, 6, this->data(), m ); |
456 |
|
} |
457 |
|
}; |
458 |
|
|
459 |
|
void WriteFile( char *name ) |
460 |
|
{ |
461 |
|
FILE * pFile; |
462 |
|
pFile = fopen ( name, "w" ); |
463 |
|
fprintf( pFile, "%s=[\n", name ); |
464 |
|
for ( size_t i = 0; i < m; i ++ ) |
465 |
|
{ |
466 |
|
for ( size_t j = 0; j < n; j ++ ) |
467 |
|
{ |
468 |
|
fprintf( pFile, "%lf,", (*this)( i, j ) ); |
469 |
|
} |
470 |
|
fprintf( pFile, ";\n" ); |
471 |
|
} |
472 |
|
fprintf( pFile, "];\n" ); |
473 |
|
fclose( pFile ); |
474 |
|
}; |
475 |
|
|
476 |
|
template<typename TINDEX> |
477 |
|
double flops( TINDEX na, TINDEX nb ) { return 0.0; }; |
478 |
|
|
479 |
|
#ifdef HMLP_USE_CUDA |
480 |
|
|
481 |
|
void CacheD( hmlp::Device *dev ) |
482 |
|
{ |
483 |
|
DeviceMemory<T>::CacheD( dev, this->size() * sizeof(T) ); |
484 |
|
}; |
485 |
|
|
486 |
|
void AllocateD( hmlp::Device *dev ) |
487 |
|
{ |
488 |
|
double beg = omp_get_wtime(); |
489 |
|
DeviceMemory<T>::AllocateD( dev, m * n * sizeof(T) ); |
490 |
|
double alloc_time = omp_get_wtime() - beg; |
491 |
|
printf( "AllocateD %5.3lf\n", alloc_time ); |
492 |
|
}; |
493 |
|
|
494 |
|
void FreeD( Device *dev ) |
495 |
|
{ |
496 |
|
DeviceMemory<T>::FreeD( dev, this->size() * sizeof(T) ); |
497 |
|
}; |
498 |
|
|
499 |
|
void PrefetchH2D( Device *dev, int stream_id ) |
500 |
|
{ |
501 |
|
double beg = omp_get_wtime(); |
502 |
|
DeviceMemory<T>::PrefetchH2D |
503 |
|
( dev, stream_id, m * n * sizeof(T), this->data() ); |
504 |
|
double alloc_time = omp_get_wtime() - beg; |
505 |
|
//printf( "PrefetchH2D %5.3lf\n", alloc_time ); |
506 |
|
}; |
507 |
|
|
508 |
|
void PrefetchD2H( Device *dev, int stream_id ) |
509 |
|
{ |
510 |
|
DeviceMemory<T>::PrefetchD2H |
511 |
|
( dev, stream_id, m * n * sizeof(T), this->data() ); |
512 |
|
}; |
513 |
|
|
514 |
|
void WaitPrefetch( Device *dev, int stream_id ) |
515 |
|
{ |
516 |
|
DeviceMemory<T>::Wait( dev, stream_id ); |
517 |
|
}; |
518 |
|
|
519 |
|
void FetchH2D( Device *dev ) |
520 |
|
{ |
521 |
|
DeviceMemory<T>::FetchH2D( dev, m * n * sizeof(T), this->data() ); |
522 |
|
}; |
523 |
|
|
524 |
|
void FetchD2H( Device *dev ) |
525 |
|
{ |
526 |
|
DeviceMemory<T>::FetchD2H( dev, m * n * sizeof(T), this->data() ); |
527 |
|
}; |
528 |
|
#endif |
529 |
|
|
530 |
|
private: |
531 |
|
|
532 |
|
void resize_( size_t m, size_t n ) |
533 |
|
{ |
534 |
|
vector<T, Allocator>::resize( m * n ); |
535 |
|
this->m = m; |
536 |
|
this->n = n; |
537 |
|
}; |
538 |
|
|
539 |
|
void resize_( size_t m, size_t n, T initT ) |
540 |
|
{ |
541 |
|
vector<T, Allocator>::resize( m * n, initT ); |
542 |
|
this->m = m; this->n = n; |
543 |
|
}; |
544 |
|
|
545 |
|
void reserve_( size_t m, size_t n ) |
546 |
|
{ |
547 |
|
vector<T, Allocator>::reserve( m * n ); |
548 |
|
} |
549 |
|
|
550 |
|
void clear_() |
551 |
|
{ |
552 |
|
vector<T, Allocator>::clear(); |
553 |
|
this->m = 0; |
554 |
|
this->n = 0; |
555 |
|
} |
556 |
|
|
557 |
|
size_t m = 0; |
558 |
|
|
559 |
|
size_t n = 0; |
560 |
|
|
561 |
|
}; /** end class Data */ |
562 |
|
|
563 |
|
|
564 |
|
|
565 |
|
|
566 |
|
|
567 |
|
|
568 |
|
template<typename T, class Allocator = std::allocator<T> > |
569 |
|
class SparseData : public ReadWrite |
570 |
|
{ |
571 |
|
public: |
572 |
|
|
573 |
|
/** (Default) constructor. */ |
574 |
|
SparseData( size_t m = 0, size_t n = 0, size_t nnz = 0, bool issymmetric = true ) |
575 |
|
{ |
576 |
|
Resize( m, n, nnz, issymmetric ); |
577 |
|
}; |
578 |
|
|
579 |
|
/** Adjust the storage size. */ |
580 |
|
void Resize( size_t m, size_t n, size_t nnz, bool issymmetric ) |
581 |
|
{ |
582 |
|
this->m = m; |
583 |
|
this->n = n; |
584 |
|
this->nnz = nnz; |
585 |
|
this->val.resize( nnz, 0.0 ); |
586 |
|
this->row_ind.resize( nnz, 0 ); |
587 |
|
this->col_ptr.resize( n + 1, 0 ); |
588 |
|
this->issymmetric = issymmetric; |
589 |
|
}; /** end Resize() */ |
590 |
|
|
591 |
|
/** Construct from three arrays: val[ nnz ],row_ind[ nnz ], and col_ptr[ n + 1 ]. */ |
592 |
|
void fromCSC( size_t m, size_t n, size_t nnz, bool issymmetric, |
593 |
|
const T *val, const size_t *row_ind, const size_t *col_ptr ) |
594 |
|
{ |
595 |
|
Resize( m, n, nnz, issymmetric ); |
596 |
|
for ( size_t i = 0; i < nnz; i ++ ) |
597 |
|
{ |
598 |
|
this->val[ i ] = val[ i ]; |
599 |
|
this->row_ind[ i ] = row_ind[ i ]; |
600 |
|
} |
601 |
|
for ( size_t j = 0; j < n + 1; j ++ ) |
602 |
|
{ |
603 |
|
this->col_ptr[ j ] = col_ptr[ j ]; |
604 |
|
} |
605 |
|
}; /** end fromCSC()*/ |
606 |
|
|
607 |
|
/** Retrive an element K( i, j ). */ |
608 |
|
T operator () ( size_t i, size_t j ) const |
609 |
|
{ |
610 |
|
if ( issymmetric && i < j ) std::swap( i, j ); |
611 |
|
auto row_beg = col_ptr[ j ]; |
612 |
|
auto row_end = col_ptr[ j + 1 ]; |
613 |
|
/** Early return if there is no nonzero entry in this column. */ |
614 |
|
if ( row_beg == row_end ) return 0; |
615 |
|
/** Search (BST) for row indices. */ |
616 |
|
auto lower = find( row_ind.begin() + row_beg, row_ind.begin() + row_end - 1, i ); |
617 |
|
//if ( lower != row_ind.end() ) printf( "lower %lu, i %lu, j %lu, row_beg %lu, row_end %lu\n", |
618 |
|
// *lower, i, j, row_beg, row_end ); fflush( stdout ); |
619 |
|
/** If the lower bound matches, then return the value. */ |
620 |
|
if ( *lower == i ) return val[ distance( row_ind.begin(), lower ) ]; |
621 |
|
/** Otherwise, return 0. */ |
622 |
|
return 0; |
623 |
|
}; /** end operator () */ |
624 |
|
|
625 |
|
/** Retrive a subblock K( I, J ).*/ |
626 |
|
Data<T> operator()( const vector<size_t> &I, const vector<size_t> &J ) const |
627 |
|
{ |
628 |
|
Data<T> KIJ( I.size(), J.size() ); |
629 |
|
/** Evaluate Kij element by element. */ |
630 |
|
for ( size_t j = 0; j < J.size(); j ++ ) |
631 |
|
for ( size_t i = 0; i < I.size(); i ++ ) |
632 |
|
KIJ( i, j ) = (*this)( I[ i ], J[ j ] ); |
633 |
|
/** Return submatrix KIJ. */ |
634 |
|
return KIJ; |
635 |
|
}; /** end operator () */ |
636 |
|
|
637 |
|
size_t ColPtr( size_t j ) { return col_ptr[ j ]; }; |
638 |
|
|
639 |
|
size_t RowInd( size_t offset ) { return row_ind[ offset ]; }; |
640 |
|
|
641 |
|
T Value( size_t offset ) { return val[ offset ]; }; |
642 |
|
|
643 |
|
pair<T, size_t> ImportantSample( size_t j ) |
644 |
|
{ |
645 |
|
size_t offset = col_ptr[ j ] + rand() % ( col_ptr[ j + 1 ] - col_ptr[ j ] ); |
646 |
|
pair<T, size_t> sample( val[ offset ], row_ind[ offset ] ); |
647 |
|
return sample; |
648 |
|
}; |
649 |
|
|
650 |
|
void Print() |
651 |
|
{ |
652 |
|
for ( size_t j = 0; j < n; j ++ ) printf( "%8lu ", j ); |
653 |
|
printf( "\n" ); |
654 |
|
for ( size_t i = 0; i < m; i ++ ) |
655 |
|
{ |
656 |
|
for ( size_t j = 0; j < n; j ++ ) |
657 |
|
{ |
658 |
|
printf( "% 3.1E ", (*this)( i, j ) ); |
659 |
|
} |
660 |
|
printf( "\n" ); |
661 |
|
} |
662 |
|
}; // end Print() |
663 |
|
|
664 |
|
|
665 |
|
/** |
666 |
|
* @brief Read matrix market format (ijv) format. Only lower triangular |
667 |
|
* part is stored |
668 |
|
*/ |
669 |
|
template<bool LOWERTRIANGULAR, bool ISZEROBASE, bool IJONLY = false> |
670 |
|
void readmtx( string &filename ) |
671 |
|
{ |
672 |
|
size_t m_mtx, n_mtx, nnz_mtx; |
673 |
|
|
674 |
|
vector<deque<size_t>> full_row_ind( n ); |
675 |
|
vector<deque<T>> full_val( n ); |
676 |
|
|
677 |
|
// Read all tuples. |
678 |
|
printf( "%s ", filename.data() ); fflush( stdout ); |
679 |
|
ifstream file( filename.data() ); |
680 |
|
string line; |
681 |
|
if ( file.is_open() ) |
682 |
|
{ |
683 |
|
size_t nnz_count = 0; |
684 |
|
|
685 |
|
while ( std::getline( file, line ) ) |
686 |
|
{ |
687 |
|
if ( line.size() ) |
688 |
|
{ |
689 |
|
if ( line[ 0 ] != '%' ) |
690 |
|
{ |
691 |
|
std::istringstream iss( line ); |
692 |
|
iss >> m_mtx >> n_mtx >> nnz_mtx; |
693 |
|
assert( this->m == m_mtx ); |
694 |
|
assert( this->n == n_mtx ); |
695 |
|
assert( this->nnz == nnz_mtx ); |
696 |
|
break; |
697 |
|
} |
698 |
|
} |
699 |
|
} |
700 |
|
|
701 |
|
while ( std::getline( file, line ) ) |
702 |
|
{ |
703 |
|
if ( nnz_count % ( nnz / 10 ) == 0 ) |
704 |
|
{ |
705 |
|
printf( "%lu%% ", ( nnz_count * 100 ) / nnz ); fflush( stdout ); |
706 |
|
} |
707 |
|
|
708 |
|
std::istringstream iss( line ); |
709 |
|
|
710 |
|
size_t i, j; |
711 |
|
T v; |
712 |
|
|
713 |
|
if ( IJONLY ) |
714 |
|
{ |
715 |
|
if ( !( iss >> i >> j ) ) |
716 |
|
{ |
717 |
|
printf( "line %lu has illegle format\n", nnz_count ); |
718 |
|
break; |
719 |
|
} |
720 |
|
v = 1; |
721 |
|
} |
722 |
|
else |
723 |
|
{ |
724 |
|
if ( !( iss >> i >> j >> v ) ) |
725 |
|
{ |
726 |
|
printf( "line %lu has illegle format\n", nnz_count ); |
727 |
|
break; |
728 |
|
} |
729 |
|
} |
730 |
|
|
731 |
|
if ( !ISZEROBASE ) |
732 |
|
{ |
733 |
|
i -= 1; |
734 |
|
j -= 1; |
735 |
|
} |
736 |
|
|
737 |
|
if ( v != 0.0 ) |
738 |
|
{ |
739 |
|
full_row_ind[ j ].push_back( i ); |
740 |
|
full_val[ j ].push_back( v ); |
741 |
|
|
742 |
|
if ( LOWERTRIANGULAR && i > j ) |
743 |
|
{ |
744 |
|
full_row_ind[ i ].push_back( j ); |
745 |
|
full_val[ i ].push_back( v ); |
746 |
|
} |
747 |
|
} |
748 |
|
nnz_count ++; |
749 |
|
} |
750 |
|
assert( nnz_count == nnz ); |
751 |
|
} |
752 |
|
printf( "Done.\n" ); fflush( stdout ); |
753 |
|
// Close the file. |
754 |
|
file.close(); |
755 |
|
|
756 |
|
//printf( "Here nnz %lu\n", nnz ); |
757 |
|
|
758 |
|
// Recount nnz for the full storage. |
759 |
|
size_t full_nnz = 0; |
760 |
|
for ( size_t j = 0; j < n; j ++ ) |
761 |
|
{ |
762 |
|
col_ptr[ j ] = full_nnz; |
763 |
|
full_nnz += full_row_ind[ j ].size(); |
764 |
|
} |
765 |
|
nnz = full_nnz; |
766 |
|
col_ptr[ n ] = full_nnz; |
767 |
|
row_ind.resize( full_nnz ); |
768 |
|
val.resize( full_nnz ); |
769 |
|
|
770 |
|
//printf( "Here nnz %lu\n", nnz ); |
771 |
|
|
772 |
|
//full_nnz = 0; |
773 |
|
//for ( size_t j = 0; j < n; j ++ ) |
774 |
|
//{ |
775 |
|
// for ( size_t i = 0; i < full_row_ind[ j ].size(); i ++ ) |
776 |
|
// { |
777 |
|
// row_ind[ full_nnz ] = full_row_ind[ j ][ i ]; |
778 |
|
// val[ full_nnz ] = full_val[ j ][ i ]; |
779 |
|
// full_nnz ++; |
780 |
|
// } |
781 |
|
//} |
782 |
|
|
783 |
|
//printf( "Close the file. Reformat.\n" ); |
784 |
|
|
785 |
|
#pragma omp parallel for |
786 |
|
for ( size_t j = 0; j < n; j ++ ) |
787 |
|
{ |
788 |
|
for ( size_t i = 0; i < full_row_ind[ j ].size(); i ++ ) |
789 |
|
{ |
790 |
|
row_ind[ col_ptr[ j ] + i ] = full_row_ind[ j ][ i ]; |
791 |
|
val[ col_ptr[ j ] + i ] = full_val[ j ][ i ]; |
792 |
|
} |
793 |
|
} |
794 |
|
|
795 |
|
printf( "finish readmatrix %s\n", filename.data() ); fflush( stdout ); |
796 |
|
}; |
797 |
|
|
798 |
|
|
799 |
|
size_t row() { return m; }; |
800 |
|
|
801 |
|
size_t col() { return n; }; |
802 |
|
|
803 |
|
template<typename TINDEX> |
804 |
|
double flops( TINDEX na, TINDEX nb ) { return 0.0; }; |
805 |
|
|
806 |
|
private: |
807 |
|
|
808 |
|
size_t m = 0; |
809 |
|
|
810 |
|
size_t n = 0; |
811 |
|
|
812 |
|
size_t nnz = 0; |
813 |
|
|
814 |
|
bool issymmetric = false; |
815 |
|
|
816 |
|
vector<T, Allocator> val; |
817 |
|
|
818 |
|
vector<size_t> row_ind; |
819 |
|
|
820 |
|
vector<size_t> col_ptr; |
821 |
|
|
822 |
|
}; /** end class CSC */ |
823 |
|
|
824 |
|
|
825 |
|
|
826 |
|
#ifdef HMLP_MIC_AVX512 |
827 |
|
template<class T, class Allocator = hbw::allocator<T> > |
828 |
|
#else |
829 |
|
template<class T, class Allocator = std::allocator<T> > |
830 |
|
#endif |
831 |
|
class OOCData : public ReadWrite |
832 |
|
{ |
833 |
|
public: |
834 |
|
|
835 |
|
OOCData() {}; |
836 |
|
|
837 |
|
OOCData( size_t m, size_t n, string filename ) { Set( m, n, filename ); }; |
838 |
|
|
839 |
|
~OOCData() |
840 |
|
{ |
841 |
|
/** Unmap */ |
842 |
|
int rc = munmap( mmappedData, m * n * sizeof(T) ); |
843 |
|
assert( rc == 0 ); |
844 |
|
close( fd ); |
845 |
|
printf( "finish readmatrix %s\n", filename.data() ); |
846 |
|
}; |
847 |
|
|
848 |
|
void Set( size_t m, size_t n, string filename ) |
849 |
|
{ |
850 |
|
this->m = m; |
851 |
|
this->n = n; |
852 |
|
this->filename = filename; |
853 |
|
/** Open the file */ |
854 |
|
fd = open( filename.data(), O_RDONLY, 0 ); |
855 |
|
assert( fd != -1 ); |
856 |
|
#ifdef __APPLE__ |
857 |
|
mmappedData = (T*)mmap( NULL, m * n * sizeof(T), |
858 |
|
PROT_READ, MAP_PRIVATE, fd, 0 ); |
859 |
|
#else /** Assume Linux */ |
860 |
|
mmappedData = (T*)mmap( NULL, m * n * sizeof(T), |
861 |
|
PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0 ); |
862 |
|
#endif |
863 |
|
assert( mmappedData != MAP_FAILED ); |
864 |
|
cout << filename << endl; |
865 |
|
}; |
866 |
|
|
867 |
|
//template<typename TINDEX> |
868 |
|
T operator()( size_t i, size_t j ) const |
869 |
|
{ |
870 |
|
assert( i < m && j < n ); |
871 |
|
return mmappedData[ j * m + i ]; |
872 |
|
}; |
873 |
|
|
874 |
|
//template<typename TINDEX> |
875 |
|
Data<T> operator()( const vector<size_t>& I, const vector<size_t>& J ) const |
876 |
|
{ |
877 |
|
Data<T> KIJ( I.size(), J.size() ); |
878 |
|
for ( int j = 0; j < J.size(); j ++ ) |
879 |
|
for ( int i = 0; i < I.size(); i ++ ) |
880 |
|
KIJ[ j * I.size() + i ] = (*this)( I[ i ], J[ j ] ); |
881 |
|
return KIJ; |
882 |
|
}; |
883 |
|
|
884 |
|
template<typename TINDEX> |
885 |
|
pair<T, TINDEX> ImportantSample( TINDEX j ) |
886 |
|
{ |
887 |
|
TINDEX i = std::rand() % m; |
888 |
|
pair<T, TINDEX> sample( (*this)( i, j ), i ); |
889 |
|
return sample; |
890 |
|
}; |
891 |
|
|
892 |
|
size_t row() const noexcept { return m; }; |
893 |
|
|
894 |
|
size_t col() const noexcept { return n; }; |
895 |
|
|
896 |
|
template<typename TINDEX> |
897 |
|
double flops( TINDEX na, TINDEX nb ) { return 0.0; }; |
898 |
|
|
899 |
|
private: |
900 |
|
|
901 |
|
size_t m = 0; |
902 |
|
|
903 |
|
size_t n = 0; |
904 |
|
|
905 |
|
string filename; |
906 |
|
|
907 |
|
/** Use mmap */ |
908 |
|
T *mmappedData = NULL; |
909 |
|
|
910 |
|
int fd = -1; |
911 |
|
|
912 |
|
}; /** end class OOCData */ |
913 |
|
|
914 |
|
|
915 |
|
|
916 |
|
|
917 |
|
|
918 |
|
|
919 |
|
|
920 |
|
}; /** end namespace hmlp */ |
921 |
|
|
922 |
|
#endif /** define DATA_HPP */ |