1 |
|
/** |
2 |
|
* HMLP (High-Performance Machine Learning Primitives) |
3 |
|
* |
4 |
|
* Copyright (C) 2014-2017, The University of Texas at Austin |
5 |
|
* |
6 |
|
* This program is free software: you can redistribute it and/or modify |
7 |
|
* it under the terms of the GNU General Public License as published by |
8 |
|
* the Free Software Foundation, either version 3 of the License, or |
9 |
|
* (at your option) any later version. |
10 |
|
* |
11 |
|
* This program is distributed in the hope that it will be useful, |
12 |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
|
* GNU General Public License for more details. |
15 |
|
* |
16 |
|
* You should have received a copy of the GNU General Public License |
17 |
|
* along with this program. If not, see the LICENSE file. |
18 |
|
* |
19 |
|
**/ |
20 |
|
|
21 |
|
#ifndef TREE_HPP |
22 |
|
#define TREE_HPP |
23 |
|
|
24 |
|
|
25 |
|
#include <assert.h> |
26 |
|
#include <typeinfo> |
27 |
|
#include <type_traits> |
28 |
|
#include <algorithm> |
29 |
|
#include <functional> |
30 |
|
#include <set> |
31 |
|
#include <vector> |
32 |
|
#include <deque> |
33 |
|
#include <iostream> |
34 |
|
#include <random> |
35 |
|
#include <cstdint> |
36 |
|
|
37 |
|
|
38 |
|
|
39 |
|
|
40 |
|
/** Use HMLP related support. */ |
41 |
|
#include <hmlp.h> |
42 |
|
#include <hmlp_base.hpp> |
43 |
|
/** Use HMLP primitives. */ |
44 |
|
#include <primitives/combinatorics.hpp> |
45 |
|
/** Use STL and HMLP namespaces. */ |
46 |
|
using namespace std; |
47 |
|
using namespace hmlp; |
48 |
|
|
49 |
|
#define REPORT_ANN_STATUS 0 |
50 |
|
|
51 |
|
//#define DEBUG_TREE 1 |
52 |
|
|
53 |
|
|
54 |
|
bool has_uneven_split = false; |
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
namespace hmlp |
60 |
|
{ |
61 |
|
|
62 |
|
class MortonHelper |
63 |
|
{ |
64 |
|
public: |
65 |
|
|
66 |
|
typedef pair<size_t, size_t> Recursor; |
67 |
|
|
68 |
|
static Recursor Root() |
69 |
|
{ |
70 |
|
return Recursor( 0, 0 ); |
71 |
|
}; |
72 |
|
|
73 |
|
static Recursor RecurLeft( Recursor r ) |
74 |
|
{ |
75 |
|
return Recursor( ( r.first << 1 ) + 0, r.second + 1 ); |
76 |
|
}; |
77 |
|
|
78 |
|
static Recursor RecurRight( Recursor r ) |
79 |
|
{ |
80 |
|
return Recursor( ( r.first << 1 ) + 1, r.second + 1 ); |
81 |
|
}; |
82 |
|
|
83 |
|
static size_t MortonID( Recursor r ) |
84 |
|
{ |
85 |
|
/** Compute the correct shift. */ |
86 |
|
size_t shift = Shift( r.second ); |
87 |
|
/** Append the depth of the tree. */ |
88 |
|
return ( r.first << shift ) + r.second; |
89 |
|
}; |
90 |
|
|
91 |
|
static size_t SiblingMortonID( Recursor r ) |
92 |
|
{ |
93 |
|
/** Compute the correct shift. */ |
94 |
|
size_t shift = Shift( r.second ); |
95 |
|
/** Append the depth of the tree. */ |
96 |
|
if ( r.first % 2 ) |
97 |
|
return ( ( r.first - 1 ) << shift ) + r.second; |
98 |
|
else |
99 |
|
return ( ( r.first + 1 ) << shift ) + r.second; |
100 |
|
}; |
101 |
|
|
102 |
|
/** @brief return the MPI rank that owns it. */ |
103 |
|
static int Morton2Rank( size_t it, int size ) |
104 |
|
{ |
105 |
|
size_t itdepth = Depth( it ); |
106 |
|
size_t mpidepth = 0; |
107 |
|
while ( size >>= 1 ) mpidepth ++; |
108 |
|
if ( itdepth > mpidepth ) itdepth = mpidepth; |
109 |
|
size_t itshift = Shift( itdepth ); |
110 |
|
return ( it >> itshift ) << ( mpidepth - itdepth ); |
111 |
|
}; /** end Morton2rank() */ |
112 |
|
|
113 |
|
static void Morton2Offsets( Recursor r, size_t depth, vector<size_t> &offsets ) |
114 |
|
{ |
115 |
|
if ( r.second == depth ) |
116 |
|
{ |
117 |
|
offsets.push_back( r.first ); |
118 |
|
} |
119 |
|
else |
120 |
|
{ |
121 |
|
Morton2Offsets( RecurLeft( r ), depth, offsets ); |
122 |
|
Morton2Offsets( RecurRight( r ), depth, offsets ); |
123 |
|
} |
124 |
|
}; /** end Morton2Offsets() */ |
125 |
|
|
126 |
|
|
127 |
|
static vector<size_t> Morton2Offsets( size_t me, size_t depth ) |
128 |
|
{ |
129 |
|
vector<size_t> offsets; |
130 |
|
size_t mydepth = Depth( me ); |
131 |
|
assert( mydepth <= depth ); |
132 |
|
Recursor r( me >> Shift( mydepth ), mydepth ); |
133 |
|
Morton2Offsets( r, depth, offsets ); |
134 |
|
return offsets; |
135 |
|
}; /** end Morton2Offsets() */ |
136 |
|
|
137 |
|
|
138 |
|
/** |
139 |
|
* @brief Check if ``it'' is ``me'''s ancestor by checking two facts. |
140 |
|
* 1) itlevel >= mylevel and |
141 |
|
* 2) morton above itlevel should be identical. For example, |
142 |
|
* |
143 |
|
* me = 01110 100 (level 4) |
144 |
|
* it = 01100 010 (level 2) is my parent |
145 |
|
* |
146 |
|
* me = 00000 001 (level 1) |
147 |
|
* it = 00000 011 (level 3) is not my parent |
148 |
|
*/ |
149 |
|
static bool IsMyParent( size_t me, size_t it ) |
150 |
|
{ |
151 |
|
size_t itlevel = Depth( it ); |
152 |
|
size_t mylevel = Depth( me ); |
153 |
|
size_t itshift = Shift( itlevel ); |
154 |
|
bool is_my_parent = !( ( me ^ it ) >> itshift ) && ( itlevel <= mylevel ); |
155 |
|
#ifdef DEBUG_TREE |
156 |
|
hmlp_print_binary( me ); |
157 |
|
hmlp_print_binary( it ); |
158 |
|
hmlp_print_binary( ( me ^ it ) >> itshift ); |
159 |
|
printf( "ismyparent %d itlevel %lu mylevel %lu shift %lu fixed shift %d\n", |
160 |
|
is_my_parent, itlevel, mylevel, itshift, 1 << LEVELOFFSET ); |
161 |
|
#endif |
162 |
|
return is_my_parent; |
163 |
|
}; /** end IsMyParent() */ |
164 |
|
|
165 |
|
|
166 |
|
template<typename TQUERY> |
167 |
|
static bool ContainAny( size_t target, TQUERY &querys ) |
168 |
|
{ |
169 |
|
for ( auto & q : querys ) |
170 |
|
if ( IsMyParent( q, target ) ) return true; |
171 |
|
return false; |
172 |
|
}; /** end ContainAny() */ |
173 |
|
|
174 |
|
|
175 |
|
private: |
176 |
|
|
177 |
|
static size_t Depth( size_t it ) |
178 |
|
{ |
179 |
|
size_t filter = ( 1 << LEVELOFFSET ) - 1; |
180 |
|
return it & filter; |
181 |
|
}; /** end Depth() */ |
182 |
|
|
183 |
|
static size_t Shift( size_t depth ) |
184 |
|
{ |
185 |
|
return ( 1 << LEVELOFFSET ) - depth + LEVELOFFSET; |
186 |
|
}; /** end Shift() */ |
187 |
|
|
188 |
|
const static int LEVELOFFSET = 4; |
189 |
|
|
190 |
|
}; /** end class MortonHelper */ |
191 |
|
|
192 |
|
|
193 |
|
template<typename T> |
194 |
|
bool less_first( const pair<T, size_t> &a, const pair<T, size_t> &b ) |
195 |
|
{ |
196 |
|
return ( a.first < b.first ); |
197 |
|
}; |
198 |
|
template<typename T> |
199 |
|
bool less_second( const pair<T, size_t> &a, const pair<T, size_t> &b ) |
200 |
|
{ |
201 |
|
return ( a.second < b.second ); |
202 |
|
}; |
203 |
|
template<typename T> |
204 |
|
bool equal_second( const pair<T, size_t> &a, const pair<T, size_t> &b ) |
205 |
|
{ |
206 |
|
return ( a.second == b.second ); |
207 |
|
}; |
208 |
|
|
209 |
|
|
210 |
|
|
211 |
|
template<typename T> |
212 |
|
void MergeNeighbors( size_t k, pair<T, size_t> *A, |
213 |
|
pair<T, size_t> *B, vector<pair<T, size_t>> &aux ) |
214 |
|
{ |
215 |
|
/** Enlarge temporary buffer if it is too small. */ |
216 |
|
if ( aux.size() != 2 * k ) aux.resize( 2 * k ); |
217 |
|
|
218 |
|
for ( size_t i = 0; i < k; i++ ) aux[ i ] = A[ i ]; |
219 |
|
for ( size_t i = 0; i < k; i++ ) aux[ k + i ] = B[ i ]; |
220 |
|
|
221 |
|
sort( aux.begin(), aux.end(), less_second<T> ); |
222 |
|
auto it = unique( aux.begin(), aux.end(), equal_second<T> ); |
223 |
|
sort( aux.begin(), it, less_first<T> ); |
224 |
|
|
225 |
|
for ( size_t i = 0; i < k; i++ ) A[ i ] = aux[ i ]; |
226 |
|
}; /** end MergeNeighbors() */ |
227 |
|
|
228 |
|
|
229 |
|
template<typename T> |
230 |
|
void MergeNeighbors( size_t k, size_t n, |
231 |
|
vector<pair<T, size_t>> &A, vector<pair<T, size_t>> &B ) |
232 |
|
{ |
233 |
|
assert( A.size() >= n * k && B.size() >= n * k ); |
234 |
|
#pragma omp parallel |
235 |
|
{ |
236 |
|
vector<pair<T, size_t> > aux( 2 * k ); |
237 |
|
#pragma omp for |
238 |
|
for( size_t i = 0; i < n; i++ ) |
239 |
|
{ |
240 |
|
MergeNeighbors( k, &(A[ i * k ]), &(B[ i * k ]), aux ); |
241 |
|
} |
242 |
|
} |
243 |
|
}; /** end MergeNeighbors() */ |
244 |
|
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
namespace tree |
254 |
|
{ |
255 |
|
/** |
256 |
|
* @brief Permuate the order of gids for each internal node |
257 |
|
* to the order of leaf nodes. |
258 |
|
* |
259 |
|
* @para The parallelism is exploited in the task level using a |
260 |
|
* bottom up traversal. |
261 |
|
*/ |
262 |
|
template<typename NODE> |
263 |
|
class IndexPermuteTask : public Task |
264 |
|
{ |
265 |
|
public: |
266 |
|
|
267 |
|
NODE *arg; |
268 |
|
|
269 |
|
void Set( NODE *user_arg ) |
270 |
|
{ |
271 |
|
name = string( "Permutation" ); |
272 |
|
arg = user_arg; |
273 |
|
// Need an accurate cost model. |
274 |
|
cost = 1.0; |
275 |
|
}; |
276 |
|
|
277 |
|
void DependencyAnalysis() |
278 |
|
{ |
279 |
|
arg->DependencyAnalysis( RW, this ); |
280 |
|
if ( !arg->isleaf ) |
281 |
|
{ |
282 |
|
arg->lchild->DependencyAnalysis( R, this ); |
283 |
|
arg->rchild->DependencyAnalysis( R, this ); |
284 |
|
} |
285 |
|
this->TryEnqueue(); |
286 |
|
}; |
287 |
|
|
288 |
|
|
289 |
|
void Execute( Worker* user_worker ) |
290 |
|
{ |
291 |
|
auto &gids = arg->gids; |
292 |
|
auto *lchild = arg->lchild; |
293 |
|
auto *rchild = arg->rchild; |
294 |
|
|
295 |
|
if ( !arg->isleaf ) |
296 |
|
{ |
297 |
|
auto &lgids = lchild->gids; |
298 |
|
auto &rgids = rchild->gids; |
299 |
|
gids = lgids; |
300 |
|
gids.insert( gids.end(), rgids.begin(), rgids.end() ); |
301 |
|
} |
302 |
|
}; |
303 |
|
|
304 |
|
}; /** end class IndexPermuteTask */ |
305 |
|
|
306 |
|
|
307 |
|
/** |
308 |
|
* @brief |
309 |
|
* |
310 |
|
*/ |
311 |
|
template<typename NODE> |
312 |
|
class SplitTask : public Task |
313 |
|
{ |
314 |
|
public: |
315 |
|
|
316 |
|
NODE *arg = NULL; |
317 |
|
|
318 |
|
void Set( NODE *user_arg ) |
319 |
|
{ |
320 |
|
name = string( "Split" ); |
321 |
|
arg = user_arg; |
322 |
|
// Need an accurate cost model. |
323 |
|
cost = 1.0; |
324 |
|
}; |
325 |
|
|
326 |
|
void DependencyAnalysis() { arg->DependOnParent( this ); }; |
327 |
|
|
328 |
|
void Execute( Worker* user_worker ) { arg->Split(); }; |
329 |
|
|
330 |
|
}; /** end class SplitTask */ |
331 |
|
|
332 |
|
|
333 |
|
///** |
334 |
|
// * @brief This is the default ball tree splitter. Given coordinates, |
335 |
|
// * compute the direction from the two most far away points. |
336 |
|
// * Project all points to this line and split into two groups |
337 |
|
// * using a median select. |
338 |
|
// * |
339 |
|
// * @para |
340 |
|
// * |
341 |
|
// * @TODO Need to explit the parallelism. |
342 |
|
// */ |
343 |
|
//template<int N_SPLIT, typename T> |
344 |
|
//struct centersplit |
345 |
|
//{ |
346 |
|
// /** closure */ |
347 |
|
// Data<T> *Coordinate = NULL; |
348 |
|
// |
349 |
|
// inline vector<vector<size_t> > operator() |
350 |
|
// ( |
351 |
|
// vector<size_t>& gids |
352 |
|
// ) const |
353 |
|
// { |
354 |
|
// assert( N_SPLIT == 2 ); |
355 |
|
// |
356 |
|
// Data<T> &X = *Coordinate; |
357 |
|
// size_t d = X.row(); |
358 |
|
// size_t n = gids.size(); |
359 |
|
// |
360 |
|
// T rcx0 = 0.0, rx01 = 0.0; |
361 |
|
// size_t x0, x1; |
362 |
|
// vector<vector<size_t> > split( N_SPLIT ); |
363 |
|
// |
364 |
|
// |
365 |
|
// vector<T> centroid = combinatorics::Mean( d, n, X, gids ); |
366 |
|
// vector<T> direction( d ); |
367 |
|
// vector<T> projection( n, 0.0 ); |
368 |
|
// |
369 |
|
// //printf( "After Mean\n" ); |
370 |
|
// |
371 |
|
// // Compute the farest x0 point from the centroid |
372 |
|
// for ( size_t i = 0; i < n; i ++ ) |
373 |
|
// { |
374 |
|
// T rcx = 0.0; |
375 |
|
// for ( size_t p = 0; p < d; p ++ ) |
376 |
|
// { |
377 |
|
// T tmp = X( p, gids[ i ] ) - centroid[ p ]; |
378 |
|
// |
379 |
|
// |
380 |
|
// rcx += tmp * tmp; |
381 |
|
// } |
382 |
|
// if ( rcx > rcx0 ) |
383 |
|
// { |
384 |
|
// rcx0 = rcx; |
385 |
|
// x0 = i; |
386 |
|
// } |
387 |
|
// } |
388 |
|
// |
389 |
|
// //printf( "After Farest\n" ); |
390 |
|
// //for ( int p = 0; p < d; p ++ ) |
391 |
|
// //{ |
392 |
|
// //} |
393 |
|
// //printf( "\n" ); |
394 |
|
// |
395 |
|
// // Compute the farest point x1 from x0 |
396 |
|
// for ( size_t i = 0; i < n; i ++ ) |
397 |
|
// { |
398 |
|
// T rxx = 0.0; |
399 |
|
// for ( size_t p = 0; p < d; p ++ ) |
400 |
|
// { |
401 |
|
// T tmp = X( p, gids[ i ] ) - X( p, gids[ x0 ] ); |
402 |
|
// rxx += tmp * tmp; |
403 |
|
// } |
404 |
|
// if ( rxx > rx01 ) |
405 |
|
// { |
406 |
|
// rx01 = rxx; |
407 |
|
// x1 = i; |
408 |
|
// } |
409 |
|
// } |
410 |
|
// |
411 |
|
// |
412 |
|
// |
413 |
|
// // Compute direction |
414 |
|
// for ( size_t p = 0; p < d; p ++ ) |
415 |
|
// { |
416 |
|
// direction[ p ] = X( p, gids[ x1 ] ) - X( p, gids[ x0 ] ); |
417 |
|
// } |
418 |
|
// |
419 |
|
// //printf( "After Direction\n" ); |
420 |
|
// //for ( int p = 0; p < d; p ++ ) |
421 |
|
// //{ |
422 |
|
// // printf( "%5.2lf ", direction[ p ] ); |
423 |
|
// //} |
424 |
|
// //printf( "\n" ); |
425 |
|
// //exit( 1 ); |
426 |
|
// |
427 |
|
// |
428 |
|
// |
429 |
|
// // Compute projection |
430 |
|
// projection.resize( n, 0.0 ); |
431 |
|
// for ( size_t i = 0; i < n; i ++ ) |
432 |
|
// for ( size_t p = 0; p < d; p ++ ) |
433 |
|
// projection[ i ] += X( p, gids[ i ] ) * direction[ p ]; |
434 |
|
// |
435 |
|
// //printf( "After Projetion\n" ); |
436 |
|
// //for ( int p = 0; p < d; p ++ ) |
437 |
|
// //{ |
438 |
|
// // printf( "%5.2lf ", projec[ p ] ); |
439 |
|
// //} |
440 |
|
// //printf( "\n" ); |
441 |
|
// |
442 |
|
// |
443 |
|
// |
444 |
|
// /** Parallel median search */ |
445 |
|
// T median; |
446 |
|
// |
447 |
|
// if ( 1 ) |
448 |
|
// { |
449 |
|
// median = combinatorics::Select( n, n / 2, projection ); |
450 |
|
// } |
451 |
|
// else |
452 |
|
// { |
453 |
|
// auto proj_copy = projection; |
454 |
|
// sort( proj_copy.begin(), proj_copy.end() ); |
455 |
|
// median = proj_copy[ n / 2 ]; |
456 |
|
// } |
457 |
|
// |
458 |
|
// |
459 |
|
// |
460 |
|
// split[ 0 ].reserve( n / 2 + 1 ); |
461 |
|
// split[ 1 ].reserve( n / 2 + 1 ); |
462 |
|
// |
463 |
|
// /** TODO: Can be parallelized */ |
464 |
|
// vector<size_t> middle; |
465 |
|
// for ( size_t i = 0; i < n; i ++ ) |
466 |
|
// { |
467 |
|
// if ( projection[ i ] < median ) split[ 0 ].push_back( i ); |
468 |
|
// else if ( projection[ i ] > median ) split[ 1 ].push_back( i ); |
469 |
|
// else middle.push_back( i ); |
470 |
|
// } |
471 |
|
// |
472 |
|
// for ( size_t i = 0; i < middle.size(); i ++ ) |
473 |
|
// { |
474 |
|
// if ( split[ 0 ].size() <= split[ 1 ].size() ) split[ 0 ].push_back( middle[ i ] ); |
475 |
|
// else split[ 1 ].push_back( middle[ i ] ); |
476 |
|
// } |
477 |
|
// |
478 |
|
// |
479 |
|
// //printf( "split median %lf left %d right %d\n", |
480 |
|
// // median, |
481 |
|
// // (int)split[ 0 ].size(), (int)split[ 1 ].size() ); |
482 |
|
// |
483 |
|
// //if ( split[ 0 ].size() > 0.6 * n || |
484 |
|
// // split[ 1 ].size() > 0.6 * n ) |
485 |
|
// //{ |
486 |
|
// // for ( int i = 0; i < n; i ++ ) |
487 |
|
// // { |
488 |
|
// // printf( "%E ", projection[ i ] ); |
489 |
|
// // } |
490 |
|
// // printf( "\n" ); |
491 |
|
// //} |
492 |
|
// |
493 |
|
// |
494 |
|
// return split; |
495 |
|
// }; |
496 |
|
//}; |
497 |
|
// |
498 |
|
// |
499 |
|
///** |
500 |
|
// * @brief This is the splitter used in the randomized tree. Given |
501 |
|
// * coordinates, project all points onto a random direction |
502 |
|
// * and split into two groups using a median select. |
503 |
|
// * |
504 |
|
// * @para |
505 |
|
// * |
506 |
|
// * @TODO Need to explit the parallelism. |
507 |
|
// */ |
508 |
|
//template<int N_SPLIT, typename T> |
509 |
|
//struct randomsplit |
510 |
|
//{ |
511 |
|
// /** Closure */ |
512 |
|
// Data<T> *Coordinate = NULL; |
513 |
|
// |
514 |
|
// inline vector<vector<size_t> > operator() |
515 |
|
// ( |
516 |
|
// vector<size_t>& gids |
517 |
|
// ) const |
518 |
|
// { |
519 |
|
// assert( N_SPLIT == 2 ); |
520 |
|
// |
521 |
|
// Data<T> &X = *Coordinate; |
522 |
|
// size_t d = X.row(); |
523 |
|
// size_t n = gids.size(); |
524 |
|
// |
525 |
|
// vector<vector<size_t> > split( N_SPLIT ); |
526 |
|
// |
527 |
|
// vector<T> direction( d ); |
528 |
|
// vector<T> projection( n, 0.0 ); |
529 |
|
// |
530 |
|
// // Compute random direction |
531 |
|
// static default_random_engine generator; |
532 |
|
// normal_distribution<T> distribution; |
533 |
|
// for ( int p = 0; p < d; p ++ ) |
534 |
|
// { |
535 |
|
// direction[ p ] = distribution( generator ); |
536 |
|
// } |
537 |
|
// |
538 |
|
// // Compute projection |
539 |
|
// projection.resize( n, 0.0 ); |
540 |
|
// for ( size_t i = 0; i < n; i ++ ) |
541 |
|
// for ( size_t p = 0; p < d; p ++ ) |
542 |
|
// projection[ i ] += X( p, gids[ i ] ) * direction[ p ]; |
543 |
|
// |
544 |
|
// |
545 |
|
// // Parallel median search |
546 |
|
// // T median = Select( n, n / 2, projection ); |
547 |
|
// auto proj_copy = projection; |
548 |
|
// sort( proj_copy.begin(), proj_copy.end() ); |
549 |
|
// T median = proj_copy[ n / 2 ]; |
550 |
|
// |
551 |
|
// split[ 0 ].reserve( n / 2 + 1 ); |
552 |
|
// split[ 1 ].reserve( n / 2 + 1 ); |
553 |
|
// |
554 |
|
// /** TODO: Can be parallelized */ |
555 |
|
// vector<size_t> middle; |
556 |
|
// for ( size_t i = 0; i < n; i ++ ) |
557 |
|
// { |
558 |
|
// if ( projection[ i ] < median ) split[ 0 ].push_back( i ); |
559 |
|
// else if ( projection[ i ] > median ) split[ 1 ].push_back( i ); |
560 |
|
// else middle.push_back( i ); |
561 |
|
// } |
562 |
|
// |
563 |
|
// for ( size_t i = 0; i < middle.size(); i ++ ) |
564 |
|
// { |
565 |
|
// if ( split[ 0 ].size() <= split[ 1 ].size() ) split[ 0 ].push_back( middle[ i ] ); |
566 |
|
// else split[ 1 ].push_back( middle[ i ] ); |
567 |
|
// } |
568 |
|
// |
569 |
|
// |
570 |
|
// //printf( "split median %lf left %d right %d\n", |
571 |
|
// // median, |
572 |
|
// // (int)split[ 0 ].size(), (int)split[ 1 ].size() ); |
573 |
|
// |
574 |
|
// //if ( split[ 0 ].size() > 0.6 * n || |
575 |
|
// // split[ 1 ].size() > 0.6 * n ) |
576 |
|
// //{ |
577 |
|
// // for ( int i = 0; i < n; i ++ ) |
578 |
|
// // { |
579 |
|
// // printf( "%E ", projection[ i ] ); |
580 |
|
// // } |
581 |
|
// // printf( "\n" ); |
582 |
|
// //} |
583 |
|
// |
584 |
|
// |
585 |
|
// return split; |
586 |
|
// }; |
587 |
|
//}; |
588 |
|
|
589 |
|
|
590 |
|
/** |
591 |
|
* @brief |
592 |
|
*/ |
593 |
|
//template<typename SETUP, int N_CHILDREN, typename NODEDATA> |
594 |
|
template<typename SETUP, typename NODEDATA> |
595 |
|
class Node : public ReadWrite |
596 |
|
{ |
597 |
|
public: |
598 |
|
|
599 |
|
/** Deduce data type from SETUP. */ |
600 |
|
typedef typename SETUP::T T; |
601 |
|
/** Use binary trees. */ |
602 |
|
static const int N_CHILDREN = 2; |
603 |
|
|
604 |
|
Node( SETUP* setup, size_t n, size_t l, |
605 |
|
Node *parent, unordered_map<size_t, Node*> *morton2node, Lock *treelock ) |
606 |
|
{ |
607 |
|
this->setup = setup; |
608 |
|
this->n = n; |
609 |
|
this->l = l; |
610 |
|
this->morton = 0; |
611 |
|
this->treelist_id = 0; |
612 |
|
this->gids.resize( n ); |
613 |
|
this->isleaf = false; |
614 |
|
this->parent = parent; |
615 |
|
this->lchild = NULL; |
616 |
|
this->rchild = NULL; |
617 |
|
this->morton2node = morton2node; |
618 |
|
this->treelock = treelock; |
619 |
|
for ( int i = 0; i < N_CHILDREN; i++ ) kids[ i ] = NULL; |
620 |
|
}; |
621 |
|
|
622 |
|
Node( SETUP *setup, int n, int l, vector<size_t> gids, |
623 |
|
Node *parent, unordered_map<size_t, Node*> *morton2node, Lock *treelock ) |
624 |
|
{ |
625 |
|
this->setup = setup; |
626 |
|
this->n = n; |
627 |
|
this->l = l; |
628 |
|
this->morton = 0; |
629 |
|
this->treelist_id = 0; |
630 |
|
this->gids = gids; |
631 |
|
this->isleaf = false; |
632 |
|
this->parent = parent; |
633 |
|
this->lchild = NULL; |
634 |
|
this->rchild = NULL; |
635 |
|
this->morton2node = morton2node; |
636 |
|
this->treelock = treelock; |
637 |
|
for ( int i = 0; i < N_CHILDREN; i++ ) kids[ i ] = NULL; |
638 |
|
}; |
639 |
|
|
640 |
|
|
641 |
|
/** |
642 |
|
* Constructor of local essential tree (LET) node: |
643 |
|
* This constructor will only be used in the distributed environment. |
644 |
|
*/ |
645 |
|
Node( size_t morton ) { this->morton = morton; }; |
646 |
|
|
647 |
|
/** (Default) destructor */ |
648 |
|
~Node() {}; |
649 |
|
|
650 |
|
void Resize( int n ) |
651 |
|
{ |
652 |
|
this->n = n; |
653 |
|
gids.resize( n ); |
654 |
|
}; |
655 |
|
|
656 |
|
|
657 |
|
void Split() |
658 |
|
{ |
659 |
|
try |
660 |
|
{ |
661 |
|
/** Early return if this is a leaf node. */ |
662 |
|
if ( isleaf ) return; |
663 |
|
|
664 |
|
int m = setup->m; |
665 |
|
int max_depth = setup->max_depth; |
666 |
|
|
667 |
|
double beg = omp_get_wtime(); |
668 |
|
auto split = setup->splitter( gids ); |
669 |
|
double splitter_time = omp_get_wtime() - beg; |
670 |
|
//printf( "splitter %5.3lfs\n", splitter_time ); |
671 |
|
|
672 |
|
if ( std::abs( (int)split[ 0 ].size() - (int)split[ 1 ].size() ) > 1 ) |
673 |
|
{ |
674 |
|
if ( !has_uneven_split ) |
675 |
|
{ |
676 |
|
printf( "\n\nWARNING! uneven split. Using random split instead %lu %lu\n\n", |
677 |
|
split[ 0 ].size(), split[ 1 ].size() ); |
678 |
|
has_uneven_split = true; |
679 |
|
} |
680 |
|
//printf( "split[ 0 ].size() %lu split[ 1 ].size() %lu\n", |
681 |
|
// split[ 0 ].size(), split[ 1 ].size() ); |
682 |
|
split[ 0 ].resize( gids.size() / 2 ); |
683 |
|
split[ 1 ].resize( gids.size() - ( gids.size() / 2 ) ); |
684 |
|
//#pragma omp parallel for |
685 |
|
for ( size_t i = 0; i < gids.size(); i ++ ) |
686 |
|
{ |
687 |
|
if ( i < gids.size() / 2 ) split[ 0 ][ i ] = i; |
688 |
|
else split[ 1 ][ i - ( gids.size() / 2 ) ] = i; |
689 |
|
} |
690 |
|
} |
691 |
|
|
692 |
|
for ( size_t i = 0; i < N_CHILDREN; i ++ ) |
693 |
|
{ |
694 |
|
int nchild = split[ i ].size(); |
695 |
|
|
696 |
|
/** TODO: need a better way */ |
697 |
|
kids[ i ]->Resize( nchild ); |
698 |
|
for ( int j = 0; j < nchild; j ++ ) |
699 |
|
{ |
700 |
|
kids[ i ]->gids[ j ] = gids[ split[ i ][ j ] ]; |
701 |
|
} |
702 |
|
} |
703 |
|
} |
704 |
|
catch ( const exception & e ) |
705 |
|
{ |
706 |
|
cout << e.what() << endl; |
707 |
|
} |
708 |
|
}; /** end Split() */ |
709 |
|
|
710 |
|
|
711 |
|
/** |
712 |
|
* @brief Check if this node contain any query using morton. |
713 |
|
* Notice that queries[] contains gids; thus, morton[] |
714 |
|
* needs to be accessed using gids. |
715 |
|
* |
716 |
|
*/ |
717 |
|
bool ContainAny( vector<size_t> &queries ) |
718 |
|
{ |
719 |
|
if ( !setup->morton.size() ) |
720 |
|
{ |
721 |
|
printf( "Morton id was not initialized.\n" ); |
722 |
|
exit( 1 ); |
723 |
|
} |
724 |
|
for ( size_t i = 0; i < queries.size(); i ++ ) |
725 |
|
{ |
726 |
|
if ( MortonHelper::IsMyParent( setup->morton[ queries[ i ] ], morton ) ) |
727 |
|
{ |
728 |
|
#ifdef DEBUG_TREE |
729 |
|
printf( "\n" ); |
730 |
|
hmlp_print_binary( setup->morton[ queries[ i ] ] ); |
731 |
|
hmlp_print_binary( morton ); |
732 |
|
printf( "\n" ); |
733 |
|
#endif |
734 |
|
return true; |
735 |
|
} |
736 |
|
} |
737 |
|
return false; |
738 |
|
|
739 |
|
}; /** end ContainAny() */ |
740 |
|
|
741 |
|
|
742 |
|
bool ContainAny( set<Node*> &querys ) |
743 |
|
{ |
744 |
|
if ( !setup->morton.size() ) |
745 |
|
{ |
746 |
|
printf( "Morton id was not initialized.\n" ); |
747 |
|
exit( 1 ); |
748 |
|
} |
749 |
|
for ( auto it = querys.begin(); it != querys.end(); it ++ ) |
750 |
|
{ |
751 |
|
if ( MortonHelper::IsMyParent( (*it)->morton, morton ) ) |
752 |
|
{ |
753 |
|
return true; |
754 |
|
} |
755 |
|
} |
756 |
|
return false; |
757 |
|
|
758 |
|
}; /** end ContainAny() */ |
759 |
|
|
760 |
|
|
761 |
|
void Print() |
762 |
|
{ |
763 |
|
printf( "l %lu offset %lu n %lu\n", this->l, this->offset, this->n ); |
764 |
|
hmlp_print_binary( this->morton ); |
765 |
|
}; |
766 |
|
|
767 |
|
|
768 |
|
/** Support dependency analysis. */ |
769 |
|
void DependOnChildren( Task *task ) |
770 |
|
{ |
771 |
|
if ( this->lchild ) this->lchild->DependencyAnalysis( R, task ); |
772 |
|
if ( this->rchild ) this->rchild->DependencyAnalysis( R, task ); |
773 |
|
this->DependencyAnalysis( RW, task ); |
774 |
|
/** Try to enqueue if there is no dependency. */ |
775 |
|
task->TryEnqueue(); |
776 |
|
}; |
777 |
|
|
778 |
|
void DependOnParent( Task *task ) |
779 |
|
{ |
780 |
|
this->DependencyAnalysis( R, task ); |
781 |
|
if ( this->lchild ) this->lchild->DependencyAnalysis( RW, task ); |
782 |
|
if ( this->rchild ) this->rchild->DependencyAnalysis( RW, task ); |
783 |
|
/** Try to enqueue if there is no dependency. */ |
784 |
|
task->TryEnqueue(); |
785 |
|
}; |
786 |
|
|
787 |
|
void DependOnNoOne( Task *task ) |
788 |
|
{ |
789 |
|
this->DependencyAnalysis( RW, task ); |
790 |
|
/** Try to enqueue if there is no dependency. */ |
791 |
|
task->TryEnqueue(); |
792 |
|
}; |
793 |
|
|
794 |
|
|
795 |
|
/** This is the call back pointer to the shared setup. */ |
796 |
|
SETUP *setup = NULL; |
797 |
|
|
798 |
|
/** Per node private data */ |
799 |
|
NODEDATA data; |
800 |
|
|
801 |
|
/** Number of points in this node. */ |
802 |
|
size_t n; |
803 |
|
|
804 |
|
/** Level in the tree */ |
805 |
|
size_t l; |
806 |
|
|
807 |
|
/** Morton ID and offset. */ |
808 |
|
size_t morton = 0; |
809 |
|
size_t offset = 0; |
810 |
|
|
811 |
|
/** ID in top-down topology order. */ |
812 |
|
size_t treelist_id; |
813 |
|
|
814 |
|
vector<size_t> gids; |
815 |
|
|
816 |
|
/** These two prunning lists are used when no NN pruning. */ |
817 |
|
set<size_t> FarIDs; |
818 |
|
set<Node*> FarNodes; |
819 |
|
set<size_t> FarNodeMortonIDs; |
820 |
|
|
821 |
|
/** Only leaf nodes will have this list. */ |
822 |
|
set<size_t> NearIDs; |
823 |
|
set<Node*> NearNodes; |
824 |
|
set<size_t> NearNodeMortonIDs; |
825 |
|
|
826 |
|
/** These two prunning lists are used when in NN pruning. */ |
827 |
|
set<size_t> NNFarIDs; |
828 |
|
set<Node*> NNFarNodes; |
829 |
|
set<Node*> ProposedNNFarNodes; |
830 |
|
set<size_t> NNFarNodeMortonIDs; |
831 |
|
|
832 |
|
/** Only leaf nodes will have this list. */ |
833 |
|
set<size_t> NNNearIDs; |
834 |
|
set<Node*> NNNearNodes; |
835 |
|
set<Node*> ProposedNNNearNodes; |
836 |
|
set<size_t> NNNearNodeMortonIDs; |
837 |
|
|
838 |
|
/** Node interaction lists recorded in MortonID. */ |
839 |
|
//set<size_t> HSSNear; |
840 |
|
//set<size_t> HSSFar; |
841 |
|
//set<size_t> FMMNear; |
842 |
|
//set<size_t> FMMFar; |
843 |
|
|
844 |
|
|
845 |
|
/** DistFar[ p ] contains a pair of gid and cached KIJ received from p. */ |
846 |
|
vector<map<size_t, Data<T>>> DistFar; |
847 |
|
vector<map<size_t, Data<T>>> DistNear; |
848 |
|
|
849 |
|
|
850 |
|
|
851 |
|
|
852 |
|
/** Lock for exclusively modifying or accessing the tree. */ |
853 |
|
Lock *treelock = NULL; |
854 |
|
|
855 |
|
/** All points to other tree nodes. */ |
856 |
|
Node *kids[ N_CHILDREN ]; |
857 |
|
Node *lchild = NULL; |
858 |
|
Node *rchild = NULL; |
859 |
|
Node *sibling = NULL; |
860 |
|
Node *parent = NULL; |
861 |
|
unordered_map<size_t, Node*> *morton2node = NULL; |
862 |
|
|
863 |
|
bool isleaf; |
864 |
|
|
865 |
|
private: |
866 |
|
|
867 |
|
}; /** end class Node */ |
868 |
|
|
869 |
|
|
870 |
|
/** |
871 |
|
* @brief Data and setup that are shared with all nodes. |
872 |
|
* |
873 |
|
* |
874 |
|
*/ |
875 |
|
template<typename SPLITTER, typename DATATYPE> |
876 |
|
class Setup |
877 |
|
{ |
878 |
|
public: |
879 |
|
|
880 |
|
typedef DATATYPE T; |
881 |
|
|
882 |
|
Setup() {}; |
883 |
|
|
884 |
|
~Setup() {}; |
885 |
|
|
886 |
|
/** maximum leaf node size */ |
887 |
|
size_t m = 0; |
888 |
|
|
889 |
|
/** by default we use 4 bits = 0-15 levels */ |
890 |
|
size_t max_depth = 15; |
891 |
|
|
892 |
|
/** Coordinates (accessed with gids) */ |
893 |
|
//Data<T> *X = NULL; |
894 |
|
|
895 |
|
/** neighbors<distance, gid> (accessed with gids) */ |
896 |
|
Data<pair<T, size_t>> *NN = NULL; |
897 |
|
|
898 |
|
/** MortonIDs of all indices. */ |
899 |
|
vector<size_t> morton; |
900 |
|
|
901 |
|
/** Tree splitter */ |
902 |
|
SPLITTER splitter; |
903 |
|
|
904 |
|
|
905 |
|
|
906 |
|
/** |
907 |
|
* @brief Check if this node contain any query using morton. |
908 |
|
* Notice that queries[] contains gids; thus, morton[] |
909 |
|
* needs to be accessed using gids. |
910 |
|
* |
911 |
|
*/ |
912 |
|
vector<size_t> ContainAny( vector<size_t> &queries, size_t target ) |
913 |
|
{ |
914 |
|
vector<size_t> validation( queries.size(), 0 ); |
915 |
|
|
916 |
|
if ( !morton.size() ) |
917 |
|
{ |
918 |
|
printf( "Morton id was not initialized.\n" ); |
919 |
|
exit( 1 ); |
920 |
|
} |
921 |
|
|
922 |
|
for ( size_t i = 0; i < queries.size(); i ++ ) |
923 |
|
{ |
924 |
|
/** notice that setup->morton only contains local morton ids */ |
925 |
|
//auto it = this->setup->morton.find( queries[ i ] ); |
926 |
|
|
927 |
|
//if ( it != this->setup->morton.end() ) |
928 |
|
//{ |
929 |
|
// if ( tree::IsMyParent( *it, this->morton ) ) validation[ i ] = 1; |
930 |
|
//} |
931 |
|
|
932 |
|
|
933 |
|
//if ( tree::IsMyParent( morton[ queries[ i ] ], target ) ) |
934 |
|
if ( MortonHelper::IsMyParent( morton[ queries[ i ] ], target ) ) |
935 |
|
validation[ i ] = 1; |
936 |
|
|
937 |
|
} |
938 |
|
return validation; |
939 |
|
|
940 |
|
}; /** end ContainAny() */ |
941 |
|
|
942 |
|
|
943 |
|
|
944 |
|
|
945 |
|
|
946 |
|
|
947 |
|
|
948 |
|
|
949 |
|
|
950 |
|
}; /** end class Setup */ |
951 |
|
|
952 |
|
|
953 |
|
/** */ |
954 |
|
template<class SETUP, class NODEDATA> |
955 |
|
class Tree |
956 |
|
{ |
957 |
|
public: |
958 |
|
|
959 |
|
typedef typename SETUP::T T; |
960 |
|
/** Define our tree node type as NODE. */ |
961 |
|
typedef Node<SETUP, NODEDATA> NODE; |
962 |
|
|
963 |
|
static const int N_CHILDREN = 2; |
964 |
|
|
965 |
|
|
966 |
|
|
967 |
|
/** data shared by all tree nodes */ |
968 |
|
SETUP setup; |
969 |
|
|
970 |
|
/** number of points */ |
971 |
|
size_t n = 0; |
972 |
|
|
973 |
|
/** maximum leaf node size */ |
974 |
|
size_t m = 0; |
975 |
|
|
976 |
|
/** depth of local tree */ |
977 |
|
size_t depth = 0; |
978 |
|
|
979 |
|
|
980 |
|
/** Mutex for exclusive right to modify treelist and morton2node. */ |
981 |
|
Lock lock; |
982 |
|
|
983 |
|
/** Local tree nodes in the top-down order. */ |
984 |
|
vector<NODE*> treelist; |
985 |
|
|
986 |
|
/** |
987 |
|
* Map MortonID to tree nodes. When distributed tree inherits Tree, |
988 |
|
* morton2node will also contain distributed and LET node. |
989 |
|
*/ |
990 |
|
unordered_map<size_t, NODE*> morton2node; |
991 |
|
|
992 |
|
/** (Default) Tree constructor. */ |
993 |
|
Tree() {}; |
994 |
|
|
995 |
|
/** (Default) Tree destructor. */ |
996 |
|
~Tree() |
997 |
|
{ |
998 |
|
//printf( "~Tree() shared treelist.size() %lu treequeue.size() %lu\n", |
999 |
|
// treelist.size(), treequeue.size() ); |
1000 |
|
for ( int i = 0; i < treelist.size(); i ++ ) |
1001 |
|
{ |
1002 |
|
if ( treelist[ i ] ) delete treelist[ i ]; |
1003 |
|
} |
1004 |
|
morton2node.clear(); |
1005 |
|
//printf( "end ~Tree() shared\n" ); |
1006 |
|
}; |
1007 |
|
|
1008 |
|
/** Currently only used in DrawInteraction() */ |
1009 |
|
void Offset( NODE *node, size_t offset ) |
1010 |
|
{ |
1011 |
|
if ( node ) |
1012 |
|
{ |
1013 |
|
node->offset = offset; |
1014 |
|
if ( node->lchild ) |
1015 |
|
{ |
1016 |
|
Offset( node->lchild, offset + 0 ); |
1017 |
|
Offset( node->rchild, offset + node->lchild->gids.size() ); |
1018 |
|
} |
1019 |
|
} |
1020 |
|
}; /** end Offset() */ |
1021 |
|
|
1022 |
|
|
1023 |
|
void RecursiveMorton( NODE *node, MortonHelper::Recursor r ) |
1024 |
|
{ |
1025 |
|
/** Return dirctly while no children exist. */ |
1026 |
|
if ( !node ) return; |
1027 |
|
/** Set my MortonID. */ |
1028 |
|
node->morton = MortonHelper::MortonID( r ); |
1029 |
|
/** Recur to children. */ |
1030 |
|
RecursiveMorton( node->lchild, MortonHelper::RecurLeft( r ) ); |
1031 |
|
RecursiveMorton( node->rchild, MortonHelper::RecurRight( r ) ); |
1032 |
|
/** Fill the */ |
1033 |
|
if ( !node->lchild ) |
1034 |
|
{ |
1035 |
|
for ( auto it : node->gids ) setup.morton[ it ] = node->morton; |
1036 |
|
} |
1037 |
|
}; |
1038 |
|
|
1039 |
|
|
1040 |
|
/** |
1041 |
|
* @brief Allocate the local tree using the local root |
1042 |
|
* with n points and depth l. |
1043 |
|
* |
1044 |
|
* This routine will be called in two situations: |
1045 |
|
* 1) called by Tree::TreePartition(), or |
1046 |
|
* 2) called by MPITree::TreePartition(). |
1047 |
|
*/ |
1048 |
|
void AllocateNodes( NODE *root ) |
1049 |
|
{ |
1050 |
|
/** Compute the global tree depth using std::log2(). */ |
1051 |
|
int glb_depth = std::ceil( std::log2( (double)n / m ) ); |
1052 |
|
if ( glb_depth > setup.max_depth ) glb_depth = setup.max_depth; |
1053 |
|
/** Compute the local tree depth. */ |
1054 |
|
depth = glb_depth - root->l; |
1055 |
|
|
1056 |
|
//printf( "local AllocateNodes n %lu m %lu glb_depth %d loc_depth %lu\n", |
1057 |
|
// n, m, glb_depth, depth ); |
1058 |
|
|
1059 |
|
/** Clean up and reserve space for local tree nodes. */ |
1060 |
|
for ( auto node_ptr : treelist ) delete node_ptr; |
1061 |
|
treelist.clear(); |
1062 |
|
morton2node.clear(); |
1063 |
|
treelist.reserve( 1 << ( depth + 1 ) ); |
1064 |
|
deque<NODE*> treequeue; |
1065 |
|
/** Push root into the treelist. */ |
1066 |
|
treequeue.push_back( root ); |
1067 |
|
|
1068 |
|
|
1069 |
|
/** Allocate children with BFS (queue solution). */ |
1070 |
|
while ( auto *node = treequeue.front() ) |
1071 |
|
{ |
1072 |
|
/** Assign local treenode_id. */ |
1073 |
|
node->treelist_id = treelist.size(); |
1074 |
|
/** Account for the depth of the distributed tree. */ |
1075 |
|
if ( node->l < glb_depth ) |
1076 |
|
{ |
1077 |
|
for ( int i = 0; i < N_CHILDREN; i ++ ) |
1078 |
|
{ |
1079 |
|
node->kids[ i ] = new NODE( &setup, 0, node->l + 1, node, &morton2node, &lock ); |
1080 |
|
treequeue.push_back( node->kids[ i ] ); |
1081 |
|
} |
1082 |
|
node->lchild = node->kids[ 0 ]; |
1083 |
|
node->rchild = node->kids[ 1 ]; |
1084 |
|
if ( node->lchild ) node->lchild->sibling = node->rchild; |
1085 |
|
if ( node->rchild ) node->rchild->sibling = node->lchild; |
1086 |
|
} |
1087 |
|
else |
1088 |
|
{ |
1089 |
|
/** Leaf nodes are annotated with this flag */ |
1090 |
|
node->isleaf = true; |
1091 |
|
treequeue.push_back( NULL ); |
1092 |
|
} |
1093 |
|
treelist.push_back( node ); |
1094 |
|
treequeue.pop_front(); |
1095 |
|
} |
1096 |
|
|
1097 |
|
}; /** end AllocateNodes() */ |
1098 |
|
|
1099 |
|
|
1100 |
|
|
1101 |
|
/** @brief Shared-memory tree partition. */ |
1102 |
|
void TreePartition() |
1103 |
|
{ |
1104 |
|
double beg, alloc_time, split_time, morton_time, permute_time; |
1105 |
|
|
1106 |
|
this->n = setup.ProblemSize(); |
1107 |
|
this->m = setup.LeafNodeSize(); |
1108 |
|
|
1109 |
|
/** Reset and initialize global indices with lexicographical order. */ |
1110 |
|
global_indices.clear(); |
1111 |
|
for ( size_t i = 0; i < n; i ++ ) global_indices.push_back( i ); |
1112 |
|
|
1113 |
|
/** Reset the warning flag and clean up the treelist */ |
1114 |
|
has_uneven_split = false; |
1115 |
|
|
1116 |
|
/** Allocate all tree nodes in advance. */ |
1117 |
|
beg = omp_get_wtime(); |
1118 |
|
AllocateNodes( new NODE( &setup, n, 0, global_indices, NULL, &morton2node, &lock ) ); |
1119 |
|
alloc_time = omp_get_wtime() - beg; |
1120 |
|
|
1121 |
|
/** Recursive spliting (topdown). */ |
1122 |
|
beg = omp_get_wtime(); |
1123 |
|
SplitTask<NODE> splittask; |
1124 |
|
TraverseDown( splittask ); |
1125 |
|
ExecuteAllTasks(); |
1126 |
|
split_time = omp_get_wtime() - beg; |
1127 |
|
|
1128 |
|
|
1129 |
|
/** Compute node and point MortonID. */ |
1130 |
|
setup.morton.resize( n ); |
1131 |
|
/** Compute MortonID recursively. */ |
1132 |
|
RecursiveMorton( treelist[ 0 ], MortonHelper::Root() ); |
1133 |
|
|
1134 |
|
|
1135 |
|
Offset( treelist[ 0 ], 0 ); |
1136 |
|
|
1137 |
|
/** Construct morton2node map for the local tree. */ |
1138 |
|
morton2node.clear(); |
1139 |
|
for ( size_t i = 0; i < treelist.size(); i ++ ) |
1140 |
|
{ |
1141 |
|
morton2node[ treelist[ i ]->morton ] = treelist[ i ]; |
1142 |
|
} |
1143 |
|
|
1144 |
|
/** Adgust gids to the appropriate order. */ |
1145 |
|
IndexPermuteTask<NODE> indexpermutetask; |
1146 |
|
TraverseUp( indexpermutetask ); |
1147 |
|
ExecuteAllTasks(); |
1148 |
|
|
1149 |
|
}; /** end TreePartition() */ |
1150 |
|
|
1151 |
|
|
1152 |
|
|
1153 |
|
vector<size_t> GetPermutation() |
1154 |
|
{ |
1155 |
|
int n_nodes = 1 << this->depth; |
1156 |
|
auto level_beg = this->treelist.begin() + n_nodes - 1; |
1157 |
|
|
1158 |
|
vector<size_t> perm; |
1159 |
|
|
1160 |
|
for ( int node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1161 |
|
{ |
1162 |
|
auto *node = *(level_beg + node_ind); |
1163 |
|
auto gids = node->gids; |
1164 |
|
perm.insert( perm.end(), gids.begin(), gids.end() ); |
1165 |
|
} |
1166 |
|
|
1167 |
|
return perm; |
1168 |
|
}; /** end GetTreePermutation() */ |
1169 |
|
|
1170 |
|
|
1171 |
|
|
1172 |
|
|
1173 |
|
|
1174 |
|
/** |
1175 |
|
* |
1176 |
|
*/ |
1177 |
|
template<typename KNNTASK> |
1178 |
|
Data<pair<T, size_t>> AllNearestNeighbor( size_t n_tree, size_t k, |
1179 |
|
size_t max_depth, pair<T, size_t> initNN, |
1180 |
|
KNNTASK &dummy ) |
1181 |
|
{ |
1182 |
|
/** k-by-N, neighbor pairs. */ |
1183 |
|
Data<pair<T, size_t>> NN( k, setup.ProblemSize(), initNN ); |
1184 |
|
|
1185 |
|
/** Use leaf_size = 4 * k. */ |
1186 |
|
setup.m = 4 * k; |
1187 |
|
if ( setup.m < 32 ) setup.m = 32; |
1188 |
|
setup.NN = &NN; |
1189 |
|
|
1190 |
|
if ( REPORT_ANN_STATUS ) |
1191 |
|
{ |
1192 |
|
printf( "========================================================\n"); |
1193 |
|
} |
1194 |
|
|
1195 |
|
/** This loop has to be sequential to avoid race condiditon on NN. */ |
1196 |
|
for ( int t = 0; t < n_tree; t ++ ) |
1197 |
|
{ |
1198 |
|
/** Report accuracy */ |
1199 |
|
double knn_acc = 0.0; |
1200 |
|
size_t num_acc = 0; |
1201 |
|
/** Randomize metric tree and exhausted search for each leaf node. */ |
1202 |
|
TreePartition(); |
1203 |
|
TraverseLeafs( dummy ); |
1204 |
|
ExecuteAllTasks(); |
1205 |
|
|
1206 |
|
size_t n_nodes = 1 << depth; |
1207 |
|
auto level_beg = treelist.begin() + n_nodes - 1; |
1208 |
|
for ( size_t node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1209 |
|
{ |
1210 |
|
auto *node = *(level_beg + node_ind); |
1211 |
|
knn_acc += node->data.knn_acc; |
1212 |
|
num_acc += node->data.num_acc; |
1213 |
|
} |
1214 |
|
if ( REPORT_ANN_STATUS ) |
1215 |
|
{ |
1216 |
|
printf( "ANN iter %2d, average accuracy %.2lf%% (over %4lu samples)\n", |
1217 |
|
t, knn_acc / num_acc, num_acc ); |
1218 |
|
} |
1219 |
|
|
1220 |
|
/** Increase leaf_size with less than 80% accuracy. */ |
1221 |
|
if ( knn_acc / num_acc < 0.8 ) |
1222 |
|
{ |
1223 |
|
if ( 2.0 * setup.m < 2048 ) setup.m = 2.0 * setup.m; |
1224 |
|
} |
1225 |
|
else break; |
1226 |
|
|
1227 |
|
|
1228 |
|
#ifdef DEBUG_TREE |
1229 |
|
printf( "Iter %2d NN 0 ", t ); |
1230 |
|
for ( size_t i = 0; i < NN.row(); i ++ ) |
1231 |
|
{ |
1232 |
|
printf( "%E(%lu) ", NN[ i ].first, NN[ i ].second ); |
1233 |
|
} |
1234 |
|
printf( "\n" ); |
1235 |
|
#endif |
1236 |
|
} /** end for each tree. */ |
1237 |
|
|
1238 |
|
if ( REPORT_ANN_STATUS ) |
1239 |
|
{ |
1240 |
|
printf( "========================================================\n\n"); |
1241 |
|
} |
1242 |
|
|
1243 |
|
/** Sort neighbor pairs in ascending order. */ |
1244 |
|
#pragma omp parallel for |
1245 |
|
for ( size_t j = 0; j < NN.col(); j ++ ) |
1246 |
|
sort( NN.data() + j * NN.row(), NN.data() + ( j + 1 ) * NN.row() ); |
1247 |
|
|
1248 |
|
/** Check for illegle values. */ |
1249 |
|
for ( auto &neig : NN ) |
1250 |
|
{ |
1251 |
|
if ( neig.second < 0 || neig.second >= NN.col() ) |
1252 |
|
{ |
1253 |
|
printf( "Illegle neighbor gid %lu\n", neig.second ); |
1254 |
|
break; |
1255 |
|
} |
1256 |
|
} |
1257 |
|
|
1258 |
|
/** Return a matrix of neighbor pairs. */ |
1259 |
|
return NN; |
1260 |
|
|
1261 |
|
}; /** end AllNearestNeighbor() */ |
1262 |
|
|
1263 |
|
|
1264 |
|
Data<int> CheckAllInteractions() |
1265 |
|
{ |
1266 |
|
/** Get the total depth of the tree. */ |
1267 |
|
int total_depth = treelist.back()->l; |
1268 |
|
/** Number of total leaf nodes. */ |
1269 |
|
int num_leafs = 1 << total_depth; |
1270 |
|
/** Create a 2^l-by-2^l table to check all interactions. */ |
1271 |
|
Data<int> A( num_leafs, num_leafs, 0 ); |
1272 |
|
/** Now traverse all tree nodes (excluding the root). */ |
1273 |
|
for ( int t = 1; t < treelist.size(); t ++ ) |
1274 |
|
{ |
1275 |
|
auto *node = treelist[ t ]; |
1276 |
|
/** Loop over all near interactions. */ |
1277 |
|
for ( auto *it : node->NNNearNodes ) |
1278 |
|
{ |
1279 |
|
auto I = MortonHelper::Morton2Offsets( node->morton, total_depth ); |
1280 |
|
auto J = MortonHelper::Morton2Offsets( it->morton, total_depth ); |
1281 |
|
for ( auto i : I ) for ( auto j : J ) A( i, j ) += 1; |
1282 |
|
} |
1283 |
|
/** Loop over all far interactions. */ |
1284 |
|
for ( auto *it : node->NNFarNodes ) |
1285 |
|
{ |
1286 |
|
auto I = MortonHelper::Morton2Offsets( node->morton, total_depth ); |
1287 |
|
auto J = MortonHelper::Morton2Offsets( it->morton, total_depth ); |
1288 |
|
for ( auto i : I ) for ( auto j : J ) A( i, j ) += 1; |
1289 |
|
} |
1290 |
|
} |
1291 |
|
|
1292 |
|
for ( size_t i = 0; i < num_leafs; i ++ ) |
1293 |
|
{ |
1294 |
|
for ( size_t j = 0; j < num_leafs; j ++ ) printf( "%d", A( i, j ) ); |
1295 |
|
printf( "\n" ); |
1296 |
|
} |
1297 |
|
|
1298 |
|
|
1299 |
|
return A; |
1300 |
|
}; /** end CheckAllInteractions() */ |
1301 |
|
|
1302 |
|
|
1303 |
|
|
1304 |
|
|
1305 |
|
|
1306 |
|
|
1307 |
|
|
1308 |
|
|
1309 |
|
|
1310 |
|
template<typename TASK, typename... Args> |
1311 |
|
void TraverseLeafs( TASK &dummy, Args&... args ) |
1312 |
|
{ |
1313 |
|
/** Contain at lesat one tree node. */ |
1314 |
|
assert( this->treelist.size() ); |
1315 |
|
|
1316 |
|
int n_nodes = 1 << this->depth; |
1317 |
|
auto level_beg = this->treelist.begin() + n_nodes - 1; |
1318 |
|
|
1319 |
|
if ( out_of_order_traversal ) |
1320 |
|
{ |
1321 |
|
for ( int node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1322 |
|
{ |
1323 |
|
auto *node = *(level_beg + node_ind); |
1324 |
|
RecuTaskSubmit( node, dummy, args... ); |
1325 |
|
} |
1326 |
|
} |
1327 |
|
else |
1328 |
|
{ |
1329 |
|
int nthd_glb = omp_get_max_threads(); |
1330 |
|
/** do not parallelize if there is less nodes than threads */ |
1331 |
|
#pragma omp parallel for if ( n_nodes > nthd_glb / 2 ) schedule( dynamic ) |
1332 |
|
for ( int node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1333 |
|
{ |
1334 |
|
auto *node = *(level_beg + node_ind); |
1335 |
|
RecuTaskExecute( node, dummy, args... ); |
1336 |
|
} |
1337 |
|
} |
1338 |
|
}; /** end TraverseLeafs() */ |
1339 |
|
|
1340 |
|
|
1341 |
|
|
1342 |
|
|
1343 |
|
|
1344 |
|
template<typename TASK, typename... Args> |
1345 |
|
void TraverseUp( TASK &dummy, Args&... args ) |
1346 |
|
{ |
1347 |
|
/** contain at lesat one tree node */ |
1348 |
|
assert( this->treelist.size() ); |
1349 |
|
|
1350 |
|
/** |
1351 |
|
* traverse the local tree without the root |
1352 |
|
* |
1353 |
|
* IMPORTANT: local root alias of the distributed leaf node |
1354 |
|
* IMPORTANT: here l must be int, size_t will wrap over |
1355 |
|
* |
1356 |
|
*/ |
1357 |
|
|
1358 |
|
int local_begin_level = ( treelist[ 0 ]->l ) ? 1 : 0; |
1359 |
|
|
1360 |
|
/** traverse level-by-level in sequential */ |
1361 |
|
for ( int l = this->depth; l >= local_begin_level; l -- ) |
1362 |
|
{ |
1363 |
|
size_t n_nodes = 1 << l; |
1364 |
|
auto level_beg = this->treelist.begin() + n_nodes - 1; |
1365 |
|
|
1366 |
|
|
1367 |
|
if ( out_of_order_traversal ) |
1368 |
|
{ |
1369 |
|
/** loop over each node at level-l */ |
1370 |
|
for ( size_t node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1371 |
|
{ |
1372 |
|
auto *node = *(level_beg + node_ind); |
1373 |
|
RecuTaskSubmit( node, dummy, args... ); |
1374 |
|
} |
1375 |
|
} |
1376 |
|
else |
1377 |
|
{ |
1378 |
|
int nthd_glb = omp_get_max_threads(); |
1379 |
|
/** do not parallelize if there is less nodes than threads */ |
1380 |
|
#pragma omp parallel for if ( n_nodes > nthd_glb / 2 ) schedule( dynamic ) |
1381 |
|
for ( size_t node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1382 |
|
{ |
1383 |
|
auto *node = *(level_beg + node_ind); |
1384 |
|
RecuTaskExecute( node, dummy, args... ); |
1385 |
|
} |
1386 |
|
} |
1387 |
|
} |
1388 |
|
}; /** end TraverseUp() */ |
1389 |
|
|
1390 |
|
|
1391 |
|
|
1392 |
|
|
1393 |
|
|
1394 |
|
|
1395 |
|
template<typename TASK, typename... Args> |
1396 |
|
void TraverseDown( TASK &dummy, Args&... args ) |
1397 |
|
{ |
1398 |
|
/** contain at lesat one tree node */ |
1399 |
|
assert( this->treelist.size() ); |
1400 |
|
|
1401 |
|
/** |
1402 |
|
* traverse the local tree without the root |
1403 |
|
* |
1404 |
|
* IMPORTANT: local root alias of the distributed leaf node |
1405 |
|
* IMPORTANT: here l must be int, size_t will wrap over |
1406 |
|
* |
1407 |
|
*/ |
1408 |
|
int local_begin_level = ( treelist[ 0 ]->l ) ? 1 : 0; |
1409 |
|
|
1410 |
|
for ( int l = local_begin_level; l <= this->depth; l ++ ) |
1411 |
|
{ |
1412 |
|
size_t n_nodes = 1 << l; |
1413 |
|
auto level_beg = this->treelist.begin() + n_nodes - 1; |
1414 |
|
|
1415 |
|
if ( out_of_order_traversal ) |
1416 |
|
{ |
1417 |
|
/** loop over each node at level-l */ |
1418 |
|
for ( size_t node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1419 |
|
{ |
1420 |
|
auto *node = *(level_beg + node_ind); |
1421 |
|
RecuTaskSubmit( node, dummy, args... ); |
1422 |
|
} |
1423 |
|
} |
1424 |
|
else |
1425 |
|
{ |
1426 |
|
int nthd_glb = omp_get_max_threads(); |
1427 |
|
/** do not parallelize if there is less nodes than threads */ |
1428 |
|
#pragma omp parallel for if ( n_nodes > nthd_glb / 2 ) schedule( dynamic ) |
1429 |
|
for ( size_t node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1430 |
|
{ |
1431 |
|
auto *node = *(level_beg + node_ind); |
1432 |
|
RecuTaskExecute( node, dummy, args... ); |
1433 |
|
} |
1434 |
|
} |
1435 |
|
} |
1436 |
|
}; /** end TraverseDown() */ |
1437 |
|
|
1438 |
|
|
1439 |
|
|
1440 |
|
/** |
1441 |
|
* @brief For unordered traversal, we just call local |
1442 |
|
* downward traversal. |
1443 |
|
*/ |
1444 |
|
template<typename TASK, typename... Args> |
1445 |
|
void TraverseUnOrdered( TASK &dummy, Args&... args ) |
1446 |
|
{ |
1447 |
|
TraverseDown( dummy, args... ); |
1448 |
|
}; /** end TraverseUnOrdered() */ |
1449 |
|
|
1450 |
|
|
1451 |
|
void DependencyCleanUp() |
1452 |
|
{ |
1453 |
|
//for ( size_t i = 0; i < treelist.size(); i ++ ) |
1454 |
|
//{ |
1455 |
|
// treelist[ i ]->DependencyCleanUp(); |
1456 |
|
//} |
1457 |
|
for ( auto node : treelist ) node->DependencyCleanUp(); |
1458 |
|
|
1459 |
|
for ( auto it : morton2node ) |
1460 |
|
{ |
1461 |
|
auto *node = it.second; |
1462 |
|
if ( node ) node->DependencyCleanUp(); |
1463 |
|
} |
1464 |
|
}; /** end DependencyCleanUp() */ |
1465 |
|
|
1466 |
|
void ExecuteAllTasks() |
1467 |
|
{ |
1468 |
|
hmlp_run(); |
1469 |
|
DependencyCleanUp(); |
1470 |
|
}; /** end ExecuteAllTasks() */ |
1471 |
|
|
1472 |
|
|
1473 |
|
bool DoOutOfOrder() { return out_of_order_traversal; }; |
1474 |
|
|
1475 |
|
|
1476 |
|
/** @brief Summarize all events in each level. */ |
1477 |
|
template<typename SUMMARY> |
1478 |
|
void Summary( SUMMARY &summary ) |
1479 |
|
{ |
1480 |
|
assert( N_CHILDREN == 2 ); |
1481 |
|
|
1482 |
|
for ( std::size_t l = 0; l <= depth; l ++ ) |
1483 |
|
{ |
1484 |
|
size_t n_nodes = 1 << l; |
1485 |
|
auto level_beg = treelist.begin() + n_nodes - 1; |
1486 |
|
for ( size_t node_ind = 0; node_ind < n_nodes; node_ind ++ ) |
1487 |
|
{ |
1488 |
|
auto *node = *(level_beg + node_ind); |
1489 |
|
summary( node ); |
1490 |
|
} |
1491 |
|
} |
1492 |
|
}; /** end void Summary() */ |
1493 |
|
|
1494 |
|
|
1495 |
|
private: |
1496 |
|
|
1497 |
|
bool out_of_order_traversal = true; |
1498 |
|
|
1499 |
|
protected: |
1500 |
|
|
1501 |
|
vector<size_t> global_indices; |
1502 |
|
|
1503 |
|
}; /** end class Tree */ |
1504 |
|
}; /** end namespace tree */ |
1505 |
|
}; /** end namespace hmlp */ |
1506 |
|
|
1507 |
|
#endif /** define TREE_HPP */ |