1 |
|
#ifndef HMLP_TPI_HPP |
2 |
|
#define HMLP_TPI_HPP |
3 |
|
|
4 |
|
#include <assert.h> |
5 |
|
#include <stdio.h> |
6 |
|
#include <string> |
7 |
|
#include <vector> |
8 |
|
#include <tuple> |
9 |
|
|
10 |
|
#include <omp.h> |
11 |
|
|
12 |
|
#include <base/util.hpp> |
13 |
|
|
14 |
|
using namespace std; |
15 |
|
|
16 |
|
|
17 |
|
namespace hmlp |
18 |
|
{ |
19 |
|
typedef enum |
20 |
|
{ |
21 |
|
HMLP_SCHEDULE_DEFAULT, |
22 |
|
HMLP_SCHEDULE_ROUND_ROBIN, |
23 |
|
HMLP_SCHEDULE_UNIFORM, |
24 |
|
HMLP_SCHEDULE_HEFT |
25 |
|
} SchedulePolicy; |
26 |
|
|
27 |
|
|
28 |
|
class Range |
29 |
|
{ |
30 |
|
public: |
31 |
|
|
32 |
|
Range( int beg, int end, int inc ); |
33 |
|
|
34 |
|
int beg(); |
35 |
|
|
36 |
|
int end(); |
37 |
|
|
38 |
|
int inc(); |
39 |
|
|
40 |
|
void Print( int prefix ); |
41 |
|
|
42 |
|
private: |
43 |
|
|
44 |
|
tuple<int, int, int > info; |
45 |
|
|
46 |
|
}; |
47 |
|
|
48 |
|
|
49 |
|
/** @brief Wrapper for omp or pthread mutex. */ |
50 |
|
class Lock |
51 |
|
{ |
52 |
|
public: |
53 |
|
|
54 |
|
Lock(); |
55 |
|
|
56 |
|
~Lock(); |
57 |
|
|
58 |
|
void Acquire(); |
59 |
|
|
60 |
|
void Release(); |
61 |
|
|
62 |
|
private: |
63 |
|
#ifdef USE_PTHREAD_RUNTIME |
64 |
|
pthread_mutex_t lock; |
65 |
|
#else |
66 |
|
omp_lock_t lock; |
67 |
|
#endif |
68 |
|
}; /** end class Lock */ |
69 |
|
|
70 |
|
|
71 |
|
namespace tci |
72 |
|
{ |
73 |
|
|
74 |
|
class Context |
75 |
|
{ |
76 |
|
public: |
77 |
|
|
78 |
|
void* buffer = NULL; |
79 |
|
|
80 |
|
volatile bool barrier_sense = false; |
81 |
|
|
82 |
|
volatile int barrier_threads_arrived = 0; |
83 |
|
|
84 |
|
void Barrier( int size ); |
85 |
|
|
86 |
|
}; /** end class Context */ |
87 |
|
|
88 |
|
|
89 |
|
class Comm |
90 |
|
{ |
91 |
|
public: |
92 |
|
|
93 |
|
Comm(); |
94 |
|
|
95 |
|
Comm( Context* context ); |
96 |
|
|
97 |
|
Comm( Comm* parent, Context* context, int assigned_size, int assigned_rank ); |
98 |
|
|
99 |
|
Comm Split( int num_groups ); |
100 |
|
|
101 |
|
bool Master(); |
102 |
|
|
103 |
|
void Barrier(); |
104 |
|
|
105 |
|
void Send( void** sent_object ); |
106 |
|
|
107 |
|
void Recv( void** recv_object ); |
108 |
|
|
109 |
|
template<typename Arg> |
110 |
|
void Bcast( Arg& buffer, int root ) |
111 |
|
{ |
112 |
|
if ( rank == root ) Send( (void**)&buffer ); |
113 |
|
Barrier(); |
114 |
|
if ( rank != root ) Recv( (void**)&buffer ); |
115 |
|
}; |
116 |
|
|
117 |
|
template<int ALIGN_SIZE, typename T> |
118 |
|
T *AllocateSharedMemory( size_t count ) |
119 |
|
{ |
120 |
|
T* ptr = NULL; |
121 |
|
if ( Master() ) ptr = hmlp_malloc<ALIGN_SIZE, T>( count ); |
122 |
|
Bcast( ptr, 0 ); |
123 |
|
return ptr; |
124 |
|
}; |
125 |
|
|
126 |
|
template<typename T> |
127 |
|
void FreeSharedMemory( T *ptr ) |
128 |
|
{ |
129 |
|
Barrier(); |
130 |
|
if ( Master() ) hmlp_free( ptr ); |
131 |
|
}; |
132 |
|
|
133 |
|
void Create1DLocks( int n ); |
134 |
|
|
135 |
|
void Destroy1DLocks(); |
136 |
|
|
137 |
|
void Create2DLocks( int m, int n ); |
138 |
|
|
139 |
|
void Destroy2DLocks(); |
140 |
|
|
141 |
|
void Acquire1DLocks( int i ); |
142 |
|
|
143 |
|
void Release1DLocks( int i ); |
144 |
|
|
145 |
|
void Acquire2DLocks( int i , int j ); |
146 |
|
|
147 |
|
void Release2DLocks( int i , int j ); |
148 |
|
|
149 |
|
int GetCommSize(); |
150 |
|
|
151 |
|
int GetCommRank(); |
152 |
|
|
153 |
|
int GetGangSize(); |
154 |
|
|
155 |
|
int GetGangRank(); |
156 |
|
|
157 |
|
int BalanceOver1DGangs( int n, int default_size, int nb ); |
158 |
|
|
159 |
|
Range DistributeOver1DThreads( int beg, int end, int nb ); |
160 |
|
|
161 |
|
Range DistributeOver1DGangs( int beg, int end, int nb ); |
162 |
|
|
163 |
|
void Print( int prefix ); |
164 |
|
|
165 |
|
Comm* parent = NULL; |
166 |
|
|
167 |
|
private: |
168 |
|
|
169 |
|
string name; |
170 |
|
|
171 |
|
int rank = 0; |
172 |
|
|
173 |
|
int size = 1; |
174 |
|
|
175 |
|
int gang_rank = 0; |
176 |
|
|
177 |
|
int gang_size = 1; |
178 |
|
|
179 |
|
Context* context = NULL; |
180 |
|
|
181 |
|
vector<hmlp::Lock>* lock1d = NULL; |
182 |
|
|
183 |
|
vector<vector<hmlp::Lock>>* lock2d = NULL; |
184 |
|
|
185 |
|
}; /** end class Comm */ |
186 |
|
|
187 |
|
template<typename FUNC, typename... Args> |
188 |
|
void Parallelize( tci::Comm* comm, FUNC func, Args&&... args ) |
189 |
|
{ |
190 |
|
if ( comm ) |
191 |
|
{ |
192 |
|
func( *comm, args... ); |
193 |
|
} |
194 |
|
else |
195 |
|
{ |
196 |
|
/** Create a shared context pointer for communication. */ |
197 |
|
Context context; |
198 |
|
/** Create a parallel section with omp_get_num_threads(). */ |
199 |
|
#pragma omp parallel |
200 |
|
{ |
201 |
|
/** Create a global communicator with the shared context. */ |
202 |
|
Comm CommGLB( &context ); |
203 |
|
/** Now call the function in parallel. */ |
204 |
|
func( CommGLB, args... ); |
205 |
|
} /** end pragma omp parallel */ |
206 |
|
} |
207 |
|
}; /** end Parallelize() */ |
208 |
|
|
209 |
|
}; /** end namespace tci */ |
210 |
|
}; /** end namespace hmlp */ |
211 |
|
#endif |