HMLP: High-performance Machine Learning Primitives
regression.hpp
1 #ifndef REGRESSION_HPP
2 #define REGRESSION_HPP
3 
4 
5 
6 #include <limits>
7 
8 #include <iostream>
9 #include <fstream>
10 #include <sstream>
11 
12 #include <stdio.h>
13 
14 #include <hmlp.h>
15 
17 #include <gofmm.hpp>
18 #include <Data.hpp>
19 #include <containers/KernelMatrix.hpp>
20 
21 
22 using namespace std;
23 using namespace hmlp;
24 
25 
26 namespace hmlp
27 {
28 
29 template<typename T>
31 {
32  public:
33 
34  Regression( size_t d, size_t n, Data<T> *X, Data<T> *Y )
35  {
36  this->d = d;
37  this->n = n;
38  this->X = X;
39  this->Y = Y;
40  };
41 
42 
48  Data<T> Ridge( kernel_s<T> &kernel, size_t niter )
49  {
50  size_t nrhs = Y->col();
51 
53  Data<T> XXt( d, d, 0.0 );
54  Data<T> XY( d, nrhs, 0.0 );
55 
57  xsyrk( "Lower", "No transpose", d, n,
58  1.0, X->data(), d, 0.0, XXt.data(), d );
59 
60  for ( size_t i = 0; i < d; i ++ ) XXt( i, i ) += lambda;
61 
63  xgemm( "No transpose", "No transpose", d, n, nrhs,
64  1.0, X->data(), d,
65  Y->data(), Y->row(),
66  0.0, XY.data(), d );
67 
69  xposv( "Lower", d, nrhs, X->data(), d, XY.data(), d );
70 
71  return XY;
72 
73  };
82  Data<T> Lasso( kernel_s<T> &kernel, size_t niter )
83  {
84  };
88  Data<T> SoftMax( kernel_s<T> &kernel, size_t nclass, size_t niter )
89  {
91  KernelMatrix<T> K( n, n, d, kernel, *X );
92 
94  gofmm::SimpleGOFMM<T, KernelMatrix<T>> H( K, 1E-3, 0.03 );
95 
96  Data<T> W( n, nclass, 1.0 );
97  Data<T> P( n, nclass, 0.0 );
98 
99  for ( size_t it = 0; it < niter; it ++ )
100  {
101  Data<T> Gradient( n, nclass, 0.0 );
102 
104  H.Multiply( P, W );
105 
106  #pragma omp parallel for
107  for ( size_t i = 0; i < n; i ++ )
108  {
109  T prob_all = 0.0;
110  for ( size_t j = 0; j < nclass; j ++ ) prob_all += P( i, j );
111  for ( size_t j = 0; j < nclass; j ++ ) P( i, j ) /= prob_all;
112  P( i, (size_t)(*Y)[ i ] ) -= 1.0;
113  }
114 
115  H.Multiply( Gradient, P );
116 
117  #pragma omp parallel for
118  for ( size_t i = 0; i < n; i ++ )
119  {
120  for ( size_t j = 0; j < nclass; j ++ )
121  {
122  W( i, j ) += ( -1.0 * alpha / n ) * Gradient( i, j );
123  }
124  }
125  }
126 
128  H.Multiply( P, W );
129 
130  size_t n_correct = 0;
131  for ( size_t i = 0; i < n; i ++ )
132  {
133  size_t goal = (*Y)[ i ];
134  size_t pred = 0;
135  T prob = 0.0;
136  for ( size_t j = 0; j < nclass; j ++ )
137  {
138  if ( P( i, j ) > prob )
139  {
140  pred = j;
141  prob = P( i, j );
142  }
143  }
144  if ( pred == goal ) n_correct ++;
145  }
146 
147  printf( "Accuracy: %lf\n", (double)n_correct / n );
148 
149  {
150  ofstream fout( "weight.dat", ios::out | ios::binary );
151  fout.write( (char*)W.data(), W.size() * sizeof(T) );
152  fout.close();
153  }
154 
155 
156  return W;
157  };
158 
159 
160 
167  Data<T> Solve( kernel_s<T> &kernel, size_t niter )
168  {
170  KernelMatrix<T> K( n, n, d, kernel, *X );
171 
173  gofmm::SimpleGOFMM<T, KernelMatrix<T>> H( K, 1E-3, 0.03 );
174 
175  Data<T> W( n, (size_t)1.0, 0.0 );
176  Data<T> B( n, (size_t)1.0, 0.0 );
177 
178  for ( size_t it = 0; it < niter; it ++ )
179  {
180  hmlp::Data<T> Gradient( n, (size_t)1.0, 0.0 );
181 
183  //K.Multiply( Gradient, W );
184  H.Multiply( Gradient, W );
185 
187  for ( size_t i = 0; i < n; i ++ )
188  Gradient[ i ] += B[ i ] - (*Y)[ i ];
189 
191  //for ( size_t i = 0; i < n; i ++ )
192  // B[ i ] += ( -1.0 * alpha / n ) * Gradient[ i ];
193 
194  for ( size_t i = 0; i < n; i ++ )
195  Gradient[ i ] += lambda * W[ i ];
196 
197  for ( size_t i = 0; i < n; i ++ )
198  Gradient[ i ] = ( -1.0 * alpha / n ) * Gradient[ i ];
199 
201  //K.Multiply( W, Gradient );
202 
203  hmlp::Data<T> tmp( n, (size_t)1.0, 0.0 );
204  H.Multiply( tmp, Gradient );
205  for ( size_t i = 0; i < n; i ++ )
206  W[ i ] += tmp[ i ];
207 
208 
209  if ( it % 100 == 0 )
210  {
212  hmlp::Data<T> Z = B;
213  //K.Multiply( Z, W );
214  H.Multiply( Z, W );
215 
216  size_t n_correct = 0;
217  for ( size_t i = 0; i < n; i ++ )
218  {
219  double pred = (int)( Z[ i ] + 0.5 );
220  double goal = (*Y)[ i ];
221  if ( pred == goal ) n_correct ++;
222 
223  }
224 
225  printf( "it %4lu Accuracy: %lf\n", it, (double)n_correct / n );
226  }
227  };
228 
230  hmlp::Data<T> Z = B;
231  //K.Multiply( Z, W );
232  H.Multiply( Z, W );
233 
234  size_t n_correct = 0;
235  for ( size_t i = 0; i < n; i ++ )
236  {
237  double pred = (int)( Z[ i ] + 0.5 );
238  double goal = (*Y)[ i ];
239 
240  //printf( "pred %lf goal %lf\n", pred, goal );
241 
242  if ( pred == goal ) n_correct ++;
243  }
244 
245  printf( "Accuracy: %lf\n", (double)n_correct / n );
246 
247 
248  {
249  std::ofstream fout( "weight.dat", std::ios::out | std::ios::binary );
250  fout.write( (char*)W.data(), W.size() * sizeof(T) );
251  fout.close();
252  }
253  {
254  std::ofstream fout( "bias.dat", std::ios::out | std::ios::binary );
255  fout.write( (char*)B.data(), B.size() * sizeof(T) );
256  fout.close();
257  }
258 
259 
260 
261  return W;
262  };
263 
264  private:
265 
266  size_t d = 0;
267  size_t n = 0;
268 
269  T lambda = 0.01;
270 
271  T alpha = 1.0;
272 
274  Data<T> *X = NULL;
275  Data<T> *Y = NULL;
276 
277 };
278 
279 };
280 
281 #endif
Definition: KernelMatrix.hpp:162
Data< T > SoftMax(kernel_s< T > &kernel, size_t nclass, size_t niter)
Definition: regression.hpp:88
Data< T > Lasso(kernel_s< T > &kernel, size_t niter)
Definition: regression.hpp:82
Definition: gofmm.hpp:3779
void xgemm(const char *transA, const char *transB, int m, int n, int k, double alpha, const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc)
DGEMM wrapper.
Definition: blas_lapack.cpp:130
Definition: KernelMatrix.hpp:54
Definition: regression.hpp:30
size_t col() const noexcept
Definition: Data.hpp:281
Data< T > Ridge(kernel_s< T > &kernel, size_t niter)
: Support SVD
Definition: regression.hpp:48
size_t row() const noexcept
Definition: Data.hpp:278
Data< T > Solve(kernel_s< T > &kernel, size_t niter)
gradient descent
Definition: regression.hpp:167
Definition: gofmm.hpp:83