HMLP: High-performance Machine Learning Primitives
thread.hpp
1 
26 #ifndef HMLP_THREAD_HPP
27 #define HMLP_THREAD_HPP
28 
29 #include <string>
30 #include <stdio.h>
31 //#include <iostream>
32 #include <cstddef>
33 #include <cassert>
34 #include <map>
35 #include <set>
36 #include <omp.h>
37 
38 
39 #include <base/tci.hpp>
40 #include <base/device.hpp>
41 
42 
43 
44 using namespace std;
45 
46 
47 namespace hmlp
48 {
49 
50 
51 //typedef enum
52 //{
53 // HMLP_SCHEDULE_DEFAULT,
54 // HMLP_SCHEDULE_ROUND_ROBIN,
55 // HMLP_SCHEDULE_UNIFORM,
56 // HMLP_SCHEDULE_HEFT
57 //} SchedulePolicy;
58 
59 
60 
61 class range
62 {
63  public:
64 
65  range( int beg, int end, int inc );
66 
67  int beg();
68 
69  int end();
70 
71  int inc();
72 
73  private:
74 
75  std::tuple<int, int, int > info;
76 
77 };
78 
79 range GetRange
80 (
81  SchedulePolicy strategy,
82  int beg, int end, int nb,
83  int tid, int nparts
84 );
85 
86 range GetRange
87 (
88  int beg, int end, int nb,
89  int tid, int nparts
90 );
91 
92 range GetRange
93 (
94  int beg, int end, int nb
95 );
96 
97 
98 
99 
100 
101 
102 
103 
104 
105 
106 
108 {
109  public:
110 
112 
113  thread_communicator( int jc_nt, int pc_nt, int ic_nt, int jr_nt );
114 
115  void Create( int level, int num_threads, int *config );
116 
117  //void Initialize( int )
118 
119 
120  void Barrier();
121 
122  void Send( void** buffer );
123 
124  void Recv( void** buffer );
125 
126  void Print();
127 
128  int GetNumThreads();
129 
130  int GetNumGroups();
131 
132  friend ostream& operator<<( ostream& os, const thread_communicator& obj );
133 
134  thread_communicator *kids;
135 
136  string name;
137 
138 
139 
140 
141 
142 
143  private:
144 
145  void *sent_object;
146 
147  int comm_id;
148 
149  int n_threads = 1;
150 
151  int n_groups = 1;
152 
153  volatile bool barrier_sense;
154 
155  int barrier_threads_arrived;
156 
157 };
166 class Worker
167 {
168  public:
169 
170  Worker();
171 
172  //worker( int jc_nt, int pc_nt, int ic_nt, int jr_nt );
173 
174  Worker( thread_communicator *my_comm );
175 
176  void Communicator( thread_communicator *comm );
177 
178 // void SetDevice( class Device *device );
179 //
180 // class Device *GetDevice();
181 //
182 // bool Execute( class Task *task );
183 //
184 // void WaitExecute();
185 //
186 // float EstimateCost( class Task* task );
187 //
188 // class Scheduler *scheduler;
189 //
190 //#ifdef USE_PTHREAD_RUNTIME
191 // pthread_t pthreadid;
192 //#endif
193 
194  int tid = 0;
195 
196  int gid = 0;
197 
198  int child_gid = 0;
199 
200  int jc_id;
201 
202  int pc_id;
203 
204  int ic_id;
205 
206  int jr_id;
207 
208  int ic_jr;
209 
210  int jc_nt;
211 
212  int pc_nt;
213 
214  int ic_nt;
215 
216  int jr_nt;
217 
218  thread_communicator *my_comm;
219 
220  thread_communicator *jc_comm;
221 
222  thread_communicator *pc_comm;
223 
224  thread_communicator *ic_comm;
225 
226  bool Master();
227 
228  void Barrier();
229 
230  void InitWithCommunicator( thread_communicator* comm, size_t tid, size_t gid );
231 
232  Worker Split();
233 
234  thread_communicator *comm = NULL;
235 
236  template<typename Arg>
237  void Bcast( Arg& buffer )
238  {
239  if ( Master() ) comm->Send( (void**)&buffer );
240  Barrier();
241  if ( !Master() ) comm->Recv( (void**)&buffer );
242  }
243 
244  template<int ALIGN_SIZE, typename T>
245  T *AllocateSharedMemory( size_t count )
246  {
247  T* ptr = NULL;
248  if ( Master() ) ptr = hmlp_malloc<ALIGN_SIZE, T>( count );
249  Bcast( ptr );
250  return ptr;
251  };
252 
253  template<typename T>
254  void FreeSharedMemory( T *ptr )
255  {
256  if ( Master() ) hmlp_free( ptr );
257  };
258 
259  size_t BalanceOver1DGangs( size_t n, size_t default_size, size_t nb );
260 
261  tuple<size_t, size_t, size_t> DistributeOver1DGangs(
262  size_t beg, size_t end, size_t nb );
263 
264  tuple<size_t, size_t, size_t> DistributeOver1DThreads(
265  size_t beg, size_t end, size_t nb );
266 
267 
268 
269 
270 
271 
272 
273 
274 
275 
276 
277 
278 
279 
280 
281  void SetDevice( class Device *device );
282 
283  class Device *GetDevice();
284 
285  //bool Execute( vector<hmlp::Task*> &batch );
286 
287  bool Execute( class Task *task );
288 
289  void WaitExecute();
290 
291  float EstimateCost( class Task* task );
292 
293  class Scheduler *scheduler;
294 
295 #ifdef USE_PTHREAD_RUNTIME
296  pthread_t pthreadid;
297 #endif
298 
299  private:
300 
301  class Task *current_task = NULL;
302 
303  class Device *device = NULL;
304 
305 
306 };
308 };
310 #endif
Definition: thread.hpp:61
Definition: runtime.hpp:562
Definition: thread.hpp:107
This class describes devices or accelerators that require a master thread to control. A device can accept tasks from multiple workers. All received tasks are expected to be executed independently in a time-sharing fashion. Whether these tasks are executed in parallel, sequential or with some built-in context switching scheme does not matter.
Definition: device.hpp:125
void hmlp_free(T *ptr)
Free the aligned memory.
Definition: util.hpp:88
Definition: gofmm.hpp:83
Definition: runtime.hpp:174
Definition: thread.hpp:166