GCC Code Coverage Report
Directory: . Exec Total Coverage
File: frame/base/tci.cpp Lines: 3 118 2.5 %
Date: 2019-01-14 Branches: 0 62 0.0 %

Line Exec Source
1
#include <base/tci.hpp>
2
3
namespace hmlp
4
{
5
6
7
8
Range::Range( int beg, int end, int inc )
9
{
10
  info = make_tuple( beg, end, inc );
11
};
12
13
int Range::beg() { return get<0>( info ); };
14
15
int Range::end() { return get<1>( info ); };
16
17
int Range::inc() { return get<2>( info ); };
18
19
void Range::Print( int prefix )
20
{
21
  printf( "%2d %5d %5d %5d\n", prefix, beg(), end(), inc() );
22
  fflush( stdout );
23
};
24
25
26
27
28
/** @brief Shared-memory lock that calls either pthread or omp mutex.. */
29
345
Lock::Lock()
30
{
31
#ifdef USE_PTHREAD_RUNTIME
32
  if ( pthread_mutex_init( &lock, NULL ) )
33
  {
34
    printf( "pthread_mutex_init(): cannot initialize locks properly\n" );
35
  }
36
#else
37
345
  omp_init_lock( &lock );
38
#endif
39
345
}; /** end Lock::Lock() */
40
41
Lock::~Lock()
42
{
43
#ifdef USE_PTHREAD_RUNTIME
44
  if ( pthread_mutex_destroy( &lock ) )
45
  {
46
    printf( "pthread_mutex_destroy(): cannot destroy locks properly\n" );
47
  }
48
#else
49
  omp_destroy_lock( &lock );
50
#endif
51
}; /** end Lock::~Lock() */
52
53
void Lock::Acquire()
54
{
55
#ifdef USE_PTHREAD_RUNTIME
56
  if ( pthread_mutex_lock( &lock ) )
57
  {
58
    printf( "pthread_mutex_lock(): cannot acquire locks properly\n" );
59
  }
60
#else
61
  omp_set_lock( &lock );
62
#endif
63
};
64
65
void Lock::Release()
66
{
67
#ifdef USE_PTHREAD_RUNTIME
68
  if ( pthread_mutex_unlock( &lock ) )
69
  {
70
    printf( "pthread_mutex_lock(): cannot release locks properly\n" );
71
  }
72
#else
73
  omp_unset_lock( &lock );
74
#endif
75
};
76
77
78
79
80
namespace tci
81
{
82
83
void Context::Barrier( int size )
84
{
85
  /** Early return if there is only one thread. */
86
  if ( size < 2 ) return;
87
88
  //printf( "%2d size %2d Barrier( BEG ) %lu\n", omp_get_thread_num(), size, this );
89
  //fflush( stdout );
90
91
92
  /** Get my barrier sense. */
93
  bool my_sense = barrier_sense;
94
  /** Check how many threads in the communicator have arrived. */
95
  int my_threads_arrived;
96
  #pragma omp atomic capture
97
  my_threads_arrived = ++ barrier_threads_arrived;
98
99
  //printf( "%2d my_threads_arrived %d\n", omp_get_thread_num(),
100
  //    my_threads_arrived ); fflush( stdout );
101
102
  /** If I am the last thread to arrive, then reset. */
103
  if ( my_threads_arrived == size )
104
  {
105
    barrier_threads_arrived = 0;
106
    barrier_sense = !barrier_sense;
107
  }
108
  /** Otherwise, wait until barrier_sense is changed. */
109
  else
110
  {
111
    volatile bool *listener = &barrier_sense;
112
    while ( *listener == my_sense ) {}
113
  }
114
115
  //printf( "%2d size %2d Barrier( END )\n", omp_get_thread_num(), size );
116
  //fflush( stdout );
117
118
}; /** end Context::Barrier() */
119
120
121
122
/** (Default) within OpenMP parallel construct (all threads). */
123
Comm::Comm()
124
{
125
  /** Assign all threads to the communicator.  */
126
  size = omp_get_num_threads();
127
  /** Assign my rank (tid) in the communicator. */
128
  rank = omp_get_thread_num();
129
}; /** end Comm::Comm() */
130
131
Comm::Comm( Context* context ) : Comm::Comm()
132
{
133
  /** Assign the shared context. */
134
  this->context = context;
135
}; /** end Comm::Comm() */
136
137
Comm::Comm( Comm* parent, Context* context, int assigned_size, int assigned_rank )
138
{
139
  /** Use the assigned size as my size. */
140
  size = assigned_size;
141
  /** Use the assigned rank as my rank. */
142
  rank = assigned_rank;
143
  /** Assign the shared context. */
144
  this->context = context;
145
  /** Assign the parent communicator pointer. */
146
  this->parent = parent;
147
}; /** end Comm::Comm() */
148
149
Comm Comm::Split( int num_splits )
150
{
151
  /** Early return if possible. */
152
  if ( num_splits == 1 || size <= 1 ) return Comm( this, context, size, rank );
153
  /** Prepare to create gang_size subcommunicators. */
154
  gang_size = num_splits;
155
  /**
156
   *  By default, we split threads evenly using "close" affinity.
157
   *  Threads with the same color will be in the same subcomm.
158
   *
159
   *  example: (num_splits=2)
160
   *
161
   *  rank       0  1  2  3  4  5  6  7
162
   *  color      0  0  0  0  1  1  1  1  (gang_rank)
163
   *  first      0  0  0  0  4  4  4  4
164
   *  last       4  4  4  4  8  8  8  8
165
   *  child_rank 0  1  2  3  0  1  2  3
166
   *             4  4  4  4  4  4  4  4  (child_size)
167
   */
168
  size_t color = ( num_splits * rank ) / size;
169
  size_t first = ( ( color + 0 ) * size ) / num_splits;
170
  size_t last  = ( ( color + 1 ) * size ) / num_splits;
171
  size_t child_rank = rank - first;
172
  size_t child_size = last - first;
173
  /** Use color to be the gang_rank. */
174
  gang_rank = color;
175
  /** Create new contexts. */
176
  Context** child_contexts;
177
  if ( Master() ) child_contexts = new Context*[ num_splits ];
178
  /** Master bcast its buffer. */
179
  Bcast( child_contexts, 0 );
180
  /** The master of each gang will allocate the new context. */
181
  if ( child_rank == 0 ) child_contexts[ color ] = new Context();
182
  Barrier();
183
  /** Create and return the subcommunicator. */
184
  Comm child_comm( this, child_contexts[ color ], child_size, child_rank );
185
  Barrier();
186
  if ( Master() ) delete child_contexts;
187
  return child_comm;
188
}; /** end Comm::Split() */
189
190
191
bool Comm::Master() { return rank == 0; };
192
193
void Comm::Barrier() { context->Barrier( size ); };
194
195
void Comm::Send( void** sent_object ) { context->buffer = *sent_object; };
196
197
void Comm::Recv( void** recv_object ) { *recv_object = context->buffer; };
198
199
void Comm::Create1DLocks( int n )
200
{
201
  if ( Master() )
202
  {
203
    lock1d = new vector<Lock>( n );
204
  }
205
  Bcast( lock1d, 0 );
206
};
207
208
void Comm::Destroy1DLocks()
209
{
210
  Barrier();
211
  if ( Master() ) delete lock1d;
212
};
213
214
void Comm::Create2DLocks( int m, int n )
215
{
216
  if ( Master() )
217
  {
218
    lock2d = new vector<vector<Lock>>( m );
219
    for ( int i = 0; i < m; i ++ ) (*lock2d)[ i ].resize( n );
220
  }
221
  Bcast( lock2d, 0 );
222
};
223
224
225
void Comm::Destroy2DLocks()
226
{
227
  Barrier();
228
  if ( Master() ) delete lock2d;
229
};
230
231
void Comm::Acquire1DLocks( int j )
232
{
233
  if ( lock1d )
234
  {
235
    auto n = lock1d->size();
236
    (*lock1d)[ j % n ].Acquire();
237
  }
238
  else if ( parent ) parent->Acquire1DLocks( j );
239
};
240
241
void Comm::Release1DLocks( int j )
242
{
243
  if ( lock1d )
244
  {
245
    auto n = lock1d->size();
246
    (*lock1d)[ j % n ].Release();
247
  }
248
  else if ( parent ) parent->Release1DLocks( j );
249
};
250
251
void Comm::Acquire2DLocks( int i, int j )
252
{
253
  if ( lock2d )
254
  {
255
    auto m = (*lock2d).size();
256
    auto n = (*lock2d)[ 0 ].size();
257
    (*lock2d)[ i % m ][ j % n ].Acquire();
258
  }
259
  else if ( parent ) parent->Acquire2DLocks( i, j );
260
};
261
262
void Comm::Release2DLocks( int i, int j )
263
{
264
  if ( lock2d )
265
  {
266
    auto m = (*lock2d).size();
267
    auto n = (*lock2d)[ 0 ].size();
268
    (*lock2d)[ i % m ][ j % n ].Release();
269
  }
270
  else if ( parent ) parent->Release2DLocks( i, j );
271
};
272
273
int Comm::GetCommSize() { return size; };
274
275
int Comm::GetCommRank() { return rank; };
276
277
int Comm::GetGangSize() { return gang_size; };
278
279
int Comm::GetGangRank() { return gang_rank; };
280
281
void Comm::Print( int prefix )
282
{
283
  printf( "%2d size %2d rank %2d gang_size %2d gang_rank %2d\n",
284
      prefix, size, rank, gang_size, gang_rank );
285
  fflush( stdout );
286
};
287
288
289
int Comm::BalanceOver1DGangs( int n, int default_size, int nb )
290
{
291
  size_t nparts = gang_size;
292
  if ( nparts > 1 ) return ( ( n - 1 ) / ( nb * nparts ) + 1 ) * nb;
293
  return default_size;
294
};
295
296
Range Comm::DistributeOver1DThreads( int beg, int end, int nb )
297
{
298
  size_t nparts = size;
299
  /** Select the proper partitioning policy. */
300
  SchedulePolicy strategy = HMLP_SCHEDULE_DEFAULT;
301
  //SchedulePolicy strategy = HMLP_SCHEDULE_UNIFORM;
302
  /** Return the tuple accordingly. */
303
  switch ( strategy )
304
  {
305
    case HMLP_SCHEDULE_DEFAULT:
306
    {
307
      /** Default is Round Robin. */
308
    }
309
    case HMLP_SCHEDULE_ROUND_ROBIN:
310
    {
311
      return Range( beg + rank * nb, end, nparts * nb );
312
    }
313
    case HMLP_SCHEDULE_UNIFORM:
314
    {
315
      int len = ( ( end - beg - 1 ) / ( nparts * nb ) + 1 ) * nb;
316
      beg = beg + rank * len;
317
      end = std::min( end, beg + len );
318
      return Range( beg, end, nb );
319
      //printf( "GetRange(): HMLP_SCHEDULE_UNIFORM not yet implemented yet.\n" );
320
      //exit( 1 );
321
    }
322
    case HMLP_SCHEDULE_HEFT:
323
    {
324
      printf( "GetRange(): HMLP_SCHEDULE_HEFT not yet implemented yet.\n" );
325
      exit( 1 );
326
    }
327
    default:
328
    {
329
      exit( 1 );
330
    }
331
  }
332
}; /** end Comm::DistributeOver1DThreads() */
333
334
335
Range Comm::DistributeOver1DGangs( int beg, int end, int nb )
336
{
337
  size_t nparts = gang_size;
338
  /** Select the proper partitioning policy. */
339
  SchedulePolicy strategy = HMLP_SCHEDULE_DEFAULT;
340
  //SchedulePolicy strategy = HMLP_SCHEDULE_UNIFORM;
341
  /** Return the tuple accordingly. */
342
  switch ( strategy )
343
  {
344
    case HMLP_SCHEDULE_DEFAULT:
345
    {
346
      /** Default is Round Robin. */
347
    }
348
    case HMLP_SCHEDULE_ROUND_ROBIN:
349
    {
350
      return Range( beg + gang_rank * nb, end, nparts * nb );
351
    }
352
    case HMLP_SCHEDULE_UNIFORM:
353
    {
354
      int len = ( ( end - beg - 1 ) / ( nparts * nb ) + 1 ) * nb;
355
      beg = beg + gang_rank * len;
356
      end = std::min( end, beg + len );
357
      return Range( beg, end, nb );
358
      //printf( "GetRange(): HMLP_SCHEDULE_UNIFORM not yet implemented yet.\n" );
359
      //exit( 1 );
360
    }
361
    case HMLP_SCHEDULE_HEFT:
362
    {
363
      printf( "GetRange(): HMLP_SCHEDULE_HEFT not yet implemented yet.\n" );
364
      exit( 1 );
365
    }
366
    default:
367
    {
368
      exit( 1 );
369
    }
370
  }
371
}; /** end Comm::DistributeOver1DGangs() */
372
373
374
375
}; /** end namespace tci */
376
}; /** end namespace hmlp */