HMLP: High-performance Machine Learning Primitives
device.hpp
1 
22 #ifndef HMLP_DEVICE_HPP
23 #define HMLP_DEVICE_HPP
24 
25 #include <string>
26 #include <stdio.h>
27 //#include <iostream>
28 #include <cstddef>
29 #include <cassert>
30 #include <map>
31 #include <set>
32 #include <omp.h>
33 
34 
35 #define MAX_LINE 16
36 
37 
38 
39 
40 namespace hmlp
41 {
42 
43 class Device *hmlp_get_device_host();
44 
45 
46 
47 typedef enum
48 {
49  CACHE_CLEAN,
50  CACHE_DIRTY
51 } CacheStatus;
52 
53 class CacheLine
54 {
55  public:
56 
57  CacheLine();
58 
59  void Setup( hmlp::Device *device, size_t line_size );
60 
61  bool isClean();
62 
63  void Bind( void *ptr_h );
64 
65  bool isCache( void *ptr_h, size_t size );
66 
67  char *device_data();
68 
69  private:
70 
71  void *ptr_h = NULL;
72 
73  char *ptr_d = NULL;
74 
75  CacheStatus status;
76 
77  size_t line_size;
78 
79 };
80 
81 
82 class Cache
83 {
84  public:
85 
86  Cache();
87 
88  void Setup( hmlp::Device *device );
89 
90  CacheLine *Read( size_t size );
91 
92  private:
93 
94  size_t fifo = 0;
95 
96  class CacheLine line[ MAX_LINE ];
97 };
98 
99 
100 
101 
102 
103 
104 
105 
106 typedef enum
107 {
108  HOST,
109  NVIDIA_GPU,
110  OTHER_GPU,
111  TI_DSP
112 } DeviceType;
113 
114 
125 class Device
126 {
127  public:
128 
129  Device();
130 
131  virtual class CacheLine *getline( size_t size );
132 
133  virtual void prefetchd2h( void *ptr_h, void *ptr_d, size_t size, int stream_id );
134 
135  virtual void prefetchh2d( void *ptr_d, void *ptr_h, size_t size, int stream_id );
136 
137  virtual void waitexecute();
138 
139  virtual void wait( int stream_id );
140 
141  virtual void *malloc( size_t size );
142 
143  virtual void malloc( void *ptr_d, size_t size );
144 
145  virtual size_t get_memory_left();
146 
147  virtual void free( void *ptr_d, size_t size );
148 
149  DeviceType devicetype;
150 
151  std::string name;
152 
153  class Cache cache;
154 
155  private:
156 
157  class Worker *workers;
158 };
159 
160 
161 template<class T>
163 {
165  //friend class DeviceMemory;
166 
167  public:
168 
170  {
171  this->host = hmlp_get_device_host();
172  distribution.insert( host );
173  };
174 
175  void CacheD( hmlp::Device *dev, size_t size )
176  {
177  if ( !isCached( size ) )
178  {
180  cache = dev->getline( size );
181  cache->Bind( this );
182  Redistribute<true>( host );
183  }
184  };
185 
187  //void asCache( DeviceMemory *target )
188  //{
189  // cache = target;
190  //};
191 
192  void AllocateD( hmlp::Device *dev, size_t size )
193  {
194  if ( !device_map.count( dev ) )
195  {
196  T *ptr_d = (T*)dev->malloc( size );
197  if ( !ptr_d ) return;
198  device_map[ dev ] = ptr_d;
199  }
200  };
201 
203  void FreeD( hmlp::Device *dev, size_t size )
204  {
205  if ( device_map.count( dev ) )
206  {
207  if ( !device_map[ dev ] )
208  {
209  printf( "NULL device ptr in device_map\n" ); fflush( stdout );
210  }
211  dev->free( device_map[ dev ], size );
212  device_map.erase( dev );
213  distribution.erase( dev );
214  }
215  };
216 
218  void PrefetchH2D( hmlp::Device *dev, int stream_id, size_t size, T* ptr_h )
219  {
220  this->stream_id = stream_id;
221 
222  if ( cache )
223  {
224  //printf( "Is cached\n" ); fflush( stdout );
225  CacheD( dev, size );
226  if ( !distribution.count( dev ) )
227  {
228  dev->prefetchh2d( cache->device_data(), ptr_h, size, stream_id );
230  Redistribute<false>( dev );
231  }
232  }
233  else
234  {
235  //printf( "Not cached\n" ); fflush( stdout );
236  if ( !distribution.count( dev ) )
237  {
238  //printf( "PrefetchH2D: target device does not have the latest copy.\n" );
239  AllocateD( dev, size );
240  //if ( !device_map.count( dev ) )
241  //{
242  // //printf( "allocate %lu bytes on %s\n", size, dev->name.data() );
243  // T *ptr_d = (T*)dev->malloc( size );
244  // if ( !ptr_d ) return;
245  // device_map[ dev ] = ptr_d;
246  //}
247  //printf( "memcpy H2D\n" );
248  dev->prefetchh2d( device_map[ dev ], ptr_h, size, stream_id );
250  //printf( "redistribute\n" );
251  Redistribute<false>( dev );
252  }
253  else
254  {
255  //printf( "PrefetchH2D: target device has the latest copy\n" );
256  assert( device_map.find( dev ) != device_map.end() );
257  }
258  }
259  };
260 
262  void PrefetchD2H( hmlp::Device *dev, int stream_id, size_t size, T* ptr_h )
263  {
264  this->stream_id = stream_id;
265 
266  if ( cache )
267  {
268  CacheD( dev, size );
269  if ( !distribution.count( host ) )
270  {
271  dev->prefetchd2h( ptr_h, cache->device_data(), size, stream_id );
272  Redistribute<false>( host );
273  }
274  }
275  else
276  {
277  if ( !distribution.count( host ) )
278  {
279  //printf( "PrefetchD2H: host does not have the latest copy.\n" );
280  assert( device_map.count( dev ) );
281  dev->prefetchd2h( ptr_h, device_map[ dev ], size, stream_id );
282  Redistribute<false>( host );
283  }
284  else
285  {
286  //printf( "PrefetchD2H: host has the latest copy\n" );
287  assert( device_map.count( host ) );
288  }
289  }
290  };
291 
292  void FetchH2D( hmlp::Device *dev, size_t size, T* ptr_h )
293  {
294  PrefetchH2D( dev, stream_id, size, ptr_h );
295  dev->wait( stream_id );
296  };
297 
298  void FetchD2H( hmlp::Device *dev, size_t size, T* ptr_h )
299  {
300  PrefetchD2H( dev, stream_id, size, ptr_h );
301  dev->wait( stream_id );
302  };
303 
305  void Wait( hmlp::Device *dev, int stream_id )
306  {
307  dev->wait( stream_id );
308  };
309 
315  template<bool OVERWRITE>
317  {
318  assert( dev );
319  if ( OVERWRITE ) distribution.clear();
320  distribution.insert( dev );
321  };
322 
323  bool is_up_to_date( hmlp::Device *dev )
324  {
325  return distribution.count( dev );
326  };
327 
328  T* device_data( hmlp::Device *dev )
329  {
330  if ( cache )
331  {
332  return (T*)cache->device_data();
333  }
334  else
335  {
336  auto it = device_map.find( dev );
337  if ( it == device_map.end() )
338  {
339  printf( "no device pointer for the target device\n" );
340  return NULL;
341  }
342  return device_map[ dev ];
343  }
344  };
345 
346 
347  private:
348 
349  int stream_id = 0;
350 
351  hmlp::Device *host = NULL;
352 
354  std::map<hmlp::Device*, T*> device_map;
355 
357  std::set<hmlp::Device*> distribution;
358 
359  CacheLine *cache = NULL;
360 
361  bool isCached( size_t size )
362  {
363  bool iscached = false;
364  if ( cache )
365  {
366  iscached = cache->isCache( this, size );
367  }
368  return iscached;
369  };
370 
371 }; // end class DeviceMemory
372 
373 }; // end namespace hmlp
374 
375 #endif //#define HMLP_DEVICE_HPP
Definition: device.hpp:162
class Device * hmlp_get_device_host()
Definition: runtime.cpp:1483
void AllocateD(hmlp::Device *dev, size_t size)
Definition: device.hpp:192
void FreeD(hmlp::Device *dev, size_t size)
Definition: device.hpp:203
void PrefetchH2D(hmlp::Device *dev, int stream_id, size_t size, T *ptr_h)
Definition: device.hpp:218
void CacheD(hmlp::Device *dev, size_t size)
Definition: device.hpp:175
DeviceMemory()
Definition: device.hpp:169
Definition: device.hpp:53
void Redistribute(hmlp::Device *dev)
Definition: device.hpp:316
This class describes devices or accelerators that require a master thread to control. A device can accept tasks from multiple workers. All received tasks are expected to be executed independently in a time-sharing fashion. Whether these tasks are executed in parallel, sequential or with some built-in context switching scheme does not matter.
Definition: device.hpp:125
Definition: gofmm.hpp:83
void PrefetchD2H(hmlp::Device *dev, int stream_id, size_t size, T *ptr_h)
Definition: device.hpp:262
Definition: device.hpp:82
Definition: thread.hpp:166