GCC Code Coverage Report
Directory: . Exec Total Coverage
File: frame/base/device.hpp Lines: 1 1 100.0 %
Date: 2019-01-14 Branches: 0 0 0.0 %

Line Exec Source
1
/**
2
 *  HMLP (High-Performance Machine Learning Primitives)
3
 *
4
 *  Copyright (C) 2014-2017, The University of Texas at Austin
5
 *
6
 *  This program is free software: you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation, either version 3 of the License, or
9
 *  (at your option) any later version.
10
 *
11
 *  This program is distributed in the hope that it will be useful,
12
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 *  GNU General Public License for more details.
15
 *
16
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program. If not, see the LICENSE file.
18
 *
19
 **/
20
21
22
#ifndef HMLP_DEVICE_HPP
23
#define HMLP_DEVICE_HPP
24
25
#include <string>
26
#include <stdio.h>
27
//#include <iostream>
28
#include <cstddef>
29
#include <cassert>
30
#include <map>
31
#include <set>
32
#include <omp.h>
33
34
35
#define MAX_LINE 16
36
37
38
39
40
namespace hmlp
41
{
42
43
class Device *hmlp_get_device_host();
44
45
46
47
typedef enum
48
{
49
  CACHE_CLEAN,
50
  CACHE_DIRTY
51
} CacheStatus;
52
53
class CacheLine
54
{
55
  public:
56
57
    CacheLine();
58
59
    void Setup( hmlp::Device *device, size_t line_size );
60
61
    bool isClean();
62
63
    void Bind( void *ptr_h );
64
65
    bool isCache( void *ptr_h, size_t size );
66
67
    char *device_data();
68
69
  private:
70
71
    void *ptr_h = NULL;
72
73
    char *ptr_d = NULL;
74
75
    CacheStatus status;
76
77
    size_t line_size;
78
79
};
80
81
82
class Cache
83
{
84
  public:
85
86
    Cache();
87
88
    void Setup( hmlp::Device *device );
89
90
    CacheLine *Read( size_t size );
91
92
  private:
93
94
    size_t fifo = 0;
95
96
    class CacheLine line[ MAX_LINE ];
97
};
98
99
100
101
102
103
104
105
106
typedef enum
107
{
108
  HOST,
109
  NVIDIA_GPU,
110
  OTHER_GPU,
111
  TI_DSP
112
} DeviceType;
113
114
115
/**
116
 *  @brief This class describes devices or accelerators that require
117
 *         a master thread to control. A device can accept tasks from
118
 *         multiple workers. All received tasks are expected to be
119
 *         executed independently in a time-sharing fashion.
120
 *         Whether these tasks are executed in parallel, sequential
121
 *         or with some built-in context switching scheme does not
122
 *         matter.
123
 *
124
 */
125
1
class Device
126
{
127
  public:
128
129
    Device();
130
131
    virtual class CacheLine *getline( size_t size );
132
133
    virtual void prefetchd2h( void *ptr_h, void *ptr_d, size_t size, int stream_id );
134
135
    virtual void prefetchh2d( void *ptr_d, void *ptr_h, size_t size, int stream_id );
136
137
    virtual void waitexecute();
138
139
    virtual void wait( int stream_id );
140
141
    virtual void *malloc( size_t size );
142
143
    virtual void malloc( void *ptr_d, size_t size );
144
145
    virtual size_t get_memory_left();
146
147
    virtual void free( void *ptr_d, size_t size );
148
149
    DeviceType devicetype;
150
151
    std::string name;
152
153
    class Cache cache;
154
155
  private:
156
157
    class Worker *workers;
158
};
159
160
161
template<class T>
162
class DeviceMemory
163
{
164
  /** allow other instance to access device_map */
165
  //friend class DeviceMemory;
166
167
  public:
168
169
    DeviceMemory()
170
    {
171
      this->host = hmlp_get_device_host();
172
      distribution.insert( host );
173
    };
174
175
    void CacheD( hmlp::Device *dev, size_t size )
176
    {
177
      if ( !isCached( size ) )
178
      {
179
        /** request a new cache location on the device */
180
        cache = dev->getline( size );
181
        cache->Bind( this );
182
        Redistribute<true>( host );
183
      }
184
    };
185
186
    ///** this will be the cache of target */
187
    //void asCache( DeviceMemory *target )
188
    //{
189
    //  cache = target;
190
    //};
191
192
    void AllocateD( hmlp::Device *dev, size_t size )
193
    {
194
      if ( !device_map.count( dev ) )
195
      {
196
        T *ptr_d = (T*)dev->malloc( size );
197
        if ( !ptr_d ) return;
198
        device_map[ dev ] = ptr_d;
199
      }
200
    };
201
202
    /** free device memory, remove from the map and the distribution */
203
    void FreeD( hmlp::Device *dev, size_t size )
204
    {
205
      if ( device_map.count( dev ) )
206
      {
207
        if ( !device_map[ dev ] )
208
        {
209
          printf( "NULL device ptr in device_map\n" ); fflush( stdout );
210
        }
211
        dev->free( device_map[ dev ], size );
212
        device_map.erase( dev );
213
        distribution.erase( dev );
214
      }
215
    };
216
217
    /** */
218
    void PrefetchH2D( hmlp::Device *dev, int stream_id, size_t size, T* ptr_h )
219
    {
220
      this->stream_id = stream_id;
221
222
      if ( cache )
223
      {
224
        //printf( "Is cached\n" ); fflush( stdout );
225
        CacheD( dev, size );
226
        if ( !distribution.count( dev ) )
227
        {
228
          dev->prefetchh2d( cache->device_data(), ptr_h, size, stream_id );
229
          /** TODO need to be careful about the definition here. */
230
          Redistribute<false>( dev );
231
        }
232
      }
233
      else
234
      {
235
        //printf( "Not cached\n" ); fflush( stdout );
236
        if ( !distribution.count( dev ) )
237
        {
238
          //printf( "PrefetchH2D: target device does not have the latest copy.\n" );
239
          AllocateD( dev, size );
240
          //if ( !device_map.count( dev ) )
241
          //{
242
          //  //printf( "allocate %lu bytes on %s\n", size, dev->name.data() );
243
          //  T *ptr_d = (T*)dev->malloc( size );
244
          //  if ( !ptr_d ) return;
245
          //  device_map[ dev ] = ptr_d;
246
          //}
247
          //printf( "memcpy H2D\n" );
248
          dev->prefetchh2d( device_map[ dev ], ptr_h, size, stream_id );
249
          /** TODO: maybe update the distribution here? */
250
          //printf( "redistribute\n" );
251
          Redistribute<false>( dev );
252
        }
253
        else /** the device has the latest copy */
254
        {
255
          //printf( "PrefetchH2D: target device has the latest copy\n" );
256
          assert( device_map.find( dev ) != device_map.end() );
257
        }
258
      }
259
    };
260
261
    /** if host does not have the latest copy */
262
    void PrefetchD2H( hmlp::Device *dev, int stream_id, size_t size, T* ptr_h )
263
    {
264
      this->stream_id = stream_id;
265
266
      if ( cache )
267
      {
268
        CacheD( dev, size );
269
        if ( !distribution.count( host ) )
270
        {
271
          dev->prefetchd2h( ptr_h, cache->device_data(), size, stream_id );
272
          Redistribute<false>( host );
273
        }
274
      }
275
      else
276
      {
277
        if ( !distribution.count( host ) )
278
        {
279
          //printf( "PrefetchD2H: host does not have the latest copy.\n" );
280
          assert( device_map.count( dev ) );
281
          dev->prefetchd2h( ptr_h, device_map[ dev ], size, stream_id );
282
          Redistribute<false>( host );
283
        }
284
        else /** the host has the latest copy */
285
        {
286
          //printf( "PrefetchD2H: host has the latest copy\n" );
287
          assert( device_map.count( host ) );
288
        }
289
      }
290
    };
291
292
    void FetchH2D( hmlp::Device *dev, size_t size, T* ptr_h )
293
    {
294
      PrefetchH2D( dev, stream_id, size, ptr_h );
295
      dev->wait( stream_id );
296
    };
297
298
    void FetchD2H( hmlp::Device *dev, size_t size, T* ptr_h )
299
    {
300
      PrefetchD2H( dev, stream_id, size, ptr_h );
301
      dev->wait( stream_id );
302
    };
303
304
    /** */
305
    void Wait( hmlp::Device *dev, int stream_id )
306
    {
307
      dev->wait( stream_id );
308
    };
309
310
    /**
311
     *  Redistribute() changes the data distribution by adding
312
     *  an new device to the distribution or flushing the
313
     *  distribution if we force to OVERWRITE the distribution.
314
     */
315
    template<bool OVERWRITE>
316
    void Redistribute( hmlp::Device *dev )
317
    {
318
      assert( dev );
319
      if ( OVERWRITE ) distribution.clear();
320
      distribution.insert( dev );
321
    };
322
323
    bool is_up_to_date( hmlp::Device *dev )
324
    {
325
      return distribution.count( dev );
326
    };
327
328
    T* device_data( hmlp::Device *dev )
329
    {
330
      if ( cache )
331
      {
332
        return (T*)cache->device_data();
333
      }
334
      else
335
      {
336
        auto it = device_map.find( dev );
337
        if ( it == device_map.end() )
338
        {
339
          printf( "no device pointer for the target device\n" );
340
          return NULL;
341
        }
342
        return device_map[ dev ];
343
      }
344
    };
345
346
347
  private:
348
349
    int stream_id = 0;
350
351
    hmlp::Device *host = NULL;
352
353
    /** map a device to its data pointer */
354
    std::map<hmlp::Device*, T*> device_map;
355
356
    /** distribution */
357
    std::set<hmlp::Device*> distribution;
358
359
    CacheLine *cache = NULL;
360
361
    bool isCached( size_t size )
362
    {
363
      bool iscached = false;
364
      if ( cache )
365
      {
366
        iscached = cache->isCache( this, size );
367
      }
368
      return iscached;
369
    };
370
371
}; // end class DeviceMemory
372
373
}; // end namespace hmlp
374
375
#endif //#define HMLP_DEVICE_HPP