GCC Code Coverage Report
Directory: . Exec Total Coverage
File: frame/containers/OOCCovMatrix.hpp Lines: 0 51 0.0 %
Date: 2019-01-14 Branches: 0 80 0.0 %

Line Exec Source
1
#ifndef OOCCOVMATRIX_HPP
2
#define OOCCOVMATRIX_HPP
3
4
#include <exception>
5
6
/** BLAS/LAPACK support */
7
#include <base/blas_lapack.hpp>
8
/** GEMM task support */
9
#include <primitives/gemm.hpp>
10
/** MLPGaussNewton uses VirtualMatrix<T> as base */
11
#include <containers/VirtualMatrix.hpp>
12
/** For GOFMM compatability */
13
#include <containers/SPDMatrix.hpp>
14
15
using namespace std;
16
using namespace hmlp;
17
18
namespace hmlp
19
{
20
21
22
template<typename T>
23
class CovTask : public Task
24
{
25
  public:
26
27
    vector<OOCData<T>> *arg = NULL;
28
29
    vector<size_t> ids;
30
    vector<size_t> I;
31
    vector<size_t> J;
32
33
    Data<T> *KIJ = NULL;
34
35
36
    int *count = NULL;
37
38
    void Set( vector<OOCData<T>> *user_arg,
39
        const vector<size_t> user_ids,
40
        const vector<size_t> user_I,
41
        const vector<size_t> user_J, Data<T> *user_KIJ, int *user_count )
42
    {
43
      arg  = user_arg;
44
      ids  = user_ids;
45
      I    = user_I;
46
      J    = user_J;
47
      KIJ  = user_KIJ;
48
      count = user_count;
49
    };
50
51
    /** Directly enqueue. */
52
    void DependencyAnalysis() { this->TryEnqueue(); };
53
54
    void Execute( Worker* user_worker )
55
    {
56
      try
57
      {
58
        Data<T> C( I.size(), J.size(), 0 );
59
        assert( arg && KIJ );
60
        assert( KIJ->row() == I.size() && KIJ->col() == J.size() );
61
62
        for ( auto id : ids )
63
        {
64
          assert( id < arg->size() );
65
          OOCData<T> &X = (*arg)[ id ];
66
          /** Allocate temporary buffers. */
67
          Data<T> A( X.row(), I.size() );
68
          Data<T> B( X.row(), J.size() );
69
70
          for ( size_t j = 0; j < I.size(); j ++ )
71
            for ( size_t i = 0; i < X.row(); i ++ )
72
              A( i, j ) = X( i, I[ j ] );
73
74
          for ( size_t j = 0; j < J.size(); j ++ )
75
            for ( size_t i = 0; i < X.row(); i ++ )
76
              B( i, j ) = X( i, J[ j ] );
77
78
          /** Further create subtasks. */
79
          //gemm::xgemm( HMLP_OP_T, HMLP_OP_N, (T)1.0, A, B, (T)1.0, C );
80
          xgemm( "T", "N", I.size(), J.size(), X.row(),
81
              1.0, A.data(), A.row(),
82
                   B.data(), B.row(),
83
              1.0, C.data(), C.row() );
84
        }
85
86
        assert( KIJ->row() == I.size() && KIJ->col() == J.size() );
87
        for ( size_t j = 0; j < J.size(); j ++ )
88
          for ( size_t i = 0; i < I.size(); i ++ )
89
            #pragma omp atomic update
90
            (*KIJ)( i, j ) += C( i, j );
91
      }
92
      catch ( exception & e )
93
      {
94
        cout << "Standard execption: " << e.what() << endl;
95
      }
96
    };
97
98
};
99
100
101
template<typename T>
102
class CovReduceTask : public Task
103
{
104
  public:
105
106
    vector<CovTask<T>*> subtasks;
107
108
    int count = 0;
109
110
    const size_t batch_size = 32;
111
112
    void Set( vector<OOCData<T>> *arg,
113
        const vector<size_t> I,
114
        const vector<size_t> J, Data<T> *KIJ )
115
    {
116
      name = string( "CovReduce" );
117
      vector<size_t> ids;
118
119
      /** Create subtasks for each OOCData<T> */
120
      for ( size_t i = 0; i < arg->size(); i ++ )
121
      {
122
        ids.push_back( i );
123
        if ( ids.size() == batch_size )
124
        {
125
          subtasks.push_back( new CovTask<T>() );
126
          subtasks.back()->Submit();
127
          subtasks.back()->Set( arg, ids, I, J, KIJ, &count );
128
          ids.clear();
129
        }
130
      }
131
      if ( ids.size() )
132
      {
133
        subtasks.push_back( new CovTask<T>() );
134
        subtasks.back()->Submit();
135
        subtasks.back()->Set( arg, ids, I, J, KIJ, &count );
136
        ids.clear();
137
      }
138
    };
139
140
    void DependencyAnalysis()
141
    {
142
      for ( auto task : subtasks )
143
      {
144
        Scheduler::DependencyAdd( task, this );
145
        task->DependencyAnalysis();
146
      }
147
    };
148
149
    void Execute( Worker* user_worker ) {};
150
};
151
152
153
template<typename T>
154
class OOCCovMatrix : public VirtualMatrix<T>
155
{
156
  public:
157
158
    OOCCovMatrix( size_t d, size_t n, size_t nb, string filename )
159
    :
160
    VirtualMatrix<T>( d, d )
161
    {
162
      this->nb = nb;
163
      for ( int i = 0; i < n; i += nb )
164
      {
165
        int ib = min( nb, n - i );
166
        Samples.resize( Samples.size() + 1 );
167
        printf( "ib %d d %lu\n", ib, d );
168
        Samples.back().Set( ib, d, filename + to_string( i ) );
169
        //X.Set( ib, d, filename + to_string( i ) );
170
      }
171
172
      int comm_rank; mpi::Comm_rank( MPI_COMM_WORLD, &comm_rank );
173
      int comm_size; mpi::Comm_size( MPI_COMM_WORLD, &comm_size );
174
175
      D.resize( d, 0 );
176
      #pragma omp parallel for
177
      for ( size_t i = comm_rank; i < d; i += comm_size )
178
      {
179
        D[ i ] = (*this)( i, i );
180
        if ( D[ i ] <= 0 ) D[ i ] = 1.0;
181
      }
182
      auto Dsend = D;
183
      mpi::Allreduce( Dsend.data(), D.data(), D.size(), MPI_SUM, MPI_COMM_WORLD );
184
      printf( "Finish diagonal\n" ); fflush( stdout );
185
    };
186
187
188
    /** Need additional support for diagonal evaluation */
189
    Data<T> Diagonal( const vector<size_t> &I )
190
    {
191
      Data<T> DII( I.size(), 1 );
192
      for ( auto i = 0; i < I.size(); i ++ ) DII[ i ] = D[ I[ i ] ];
193
      return DII;
194
    };
195
196
    /** ESSENTIAL: this is an abstract function  */
197
    virtual T operator()( size_t i, size_t j )
198
    {
199
      auto KIJ = (*this)( vector<size_t>( 1, i ), vector<size_t>( 1, j ) );
200
      return KIJ[ 0 ];
201
    };
202
203
    /** ESSENTIAL: return a submatrix */
204
    virtual Data<T> operator()
205
		(
206
      const vector<size_t> &I,
207
      const vector<size_t> &J
208
    )
209
    {
210
      Data<T> KIJ( I.size(), J.size(), 0 );
211
212
      if ( !I.size() || !J.size() ) return KIJ;
213
214
      for ( auto &i : I ) assert( i < this->row() );
215
      for ( auto &j : J ) assert( j < this->col() );
216
217
      double beg = omp_get_wtime();
218
      double ooc_time = 0;
219
220
      //if ( hmlp_is_in_epoch_session() )
221
      if ( 0 )
222
      {
223
        auto *task = new CovReduceTask<T>();
224
        task->Set( &Samples, I, J, &KIJ );
225
        task->Submit();
226
        task->DependencyAnalysis();
227
        task->CallBackWhileWaiting();
228
      }
229
      else
230
      {
231
        for ( auto &X : Samples )
232
        {
233
          Data<T> A( X.row(), I.size() );
234
          Data<T> B( X.row(), J.size() );
235
236
          double ooc_beg = omp_get_wtime();
237
238
          for ( size_t j = 0; j < I.size(); j ++ )
239
            for ( size_t i = 0; i < X.row(); i ++ )
240
              A( i, j ) = X( i, I[ j ] );
241
242
          for ( size_t j = 0; j < J.size(); j ++ )
243
            for ( size_t i = 0; i < X.row(); i ++ )
244
              B( i, j ) = X( i, J[ j ] );
245
246
          ooc_time += omp_get_wtime() - ooc_beg;
247
248
          assert( !A.HasIllegalValue() );
249
          assert( !B.HasIllegalValue() );
250
          //printf( "I %lu J %lu X.row() %lu X.col() %lu\n", I[ 0 ], J[ 0 ], X.row(), X.col() );
251
          //for ( size_t i = 0; i < all_rows.size(); i ++ ) all_rows[ i ] = i;
252
          //auto A = X( all_rows, I );
253
          //printf( "I %lu J %lu X.row() %lu X.col() %lu Finish A\n", I[ 0 ], J[ 0 ], X.row(), X.col() );
254
          //auto B = X( all_rows, J );
255
          //printf( "I %lu J %lu X.row() %lu X.col() %lu Finish B\n", I[ 0 ], J[ 0 ], X.row(), X.col() );
256
          //gemm::xgemm( HMLP_OP_T, HMLP_OP_N, (T)1.0, A, B, (T)1.0, KIJ );
257
          {
258
            xgemm( "T", "N", I.size(), J.size(), X.row(),
259
                1.0, A.data(), A.row(),
260
                     B.data(), B.row(),
261
                1.0, KIJ.data(), KIJ.row() );
262
          }
263
        }
264
      }
265
      //assert( !KIJ.HasIllegalValue() );
266
267
      double KIJ_time = omp_get_wtime() - beg;
268
269
      if ( !reported && I.size() >= 512 && J.size() >= 512  )
270
      {
271
        printf( "KIJ %lu %lu in %lfs ooc %lfs\n", I.size(), J.size(), KIJ_time, ooc_time );
272
        reported = true;
273
      }
274
275
276
      return KIJ;
277
    };
278
279
280
    template<typename TINDEX>
281
    double flops( TINDEX na, TINDEX nb )
282
    {
283
      return 0.0;
284
    };
285
286
  private:
287
288
    bool reported = false;
289
290
    size_t n_samples = 0;
291
292
    size_t nb = 512;
293
294
    vector<T> D;
295
296
    /** n_samples / nb files, each with n_samples */
297
    vector<OOCData<T>> Samples;
298
299
}; /** end class OOCCovMatrix */
300
}; /** end namespace hmlp */
301
302
#endif /** define OOCCOVMATRIX_HPP */