HMLP: High-performance Machine Learning Primitives
cluster.hpp
1 #ifndef CLUSTER_HPP
2 #define CLUSTER_HPP
3 
4 #include <limits>
5 
6 #include <hmlp.h>
7 
9 #include <gofmm.hpp>
10 
11 #include <Data.hpp>
12 #include <containers/KernelMatrix.hpp>
13 #include <primitives/lanczos.hpp>
14 
15 namespace hmlp
16 {
17 
18 template<typename PARAM, typename T>
20 {
21  public:
22 
23  VirtualNormalizedGraph( size_t n, PARAM *param, T sigma )
24  {
25  this->n = n;
26  this->param = param;
27  this->sigma = sigma;
28 
29  assert( param );
30  hmlp::Data<T> ones( n, (size_t)1, 1.0 );
31  Degree.resize( n, (size_t)1, 0.0 );
32  param->Multiply( Degree, ones );
33  };
34 
37  {
38  assert( param );
39  assert( y.row() == n && x.row() == n );
40  assert( y.col() == x.col() );
41 
42  size_t nrhs = y.col();
43  hmlp::Data<T> temp = x;
44 
46  for ( size_t j = 0; j < nrhs; j ++ )
47  for ( size_t i = 0; i < n; i ++ )
48  temp( i, j ) /= std::sqrt( Degree[ i ] );
49 
51  for ( size_t j = 0; j < nrhs; j ++ )
52  for ( size_t i = 0; i < n; i ++ )
53  y( i, j ) = 0.0;
54 
56  param->Multiply( y, temp );
57 
59  for ( size_t j = 0; j < nrhs; j ++ )
60  for ( size_t i = 0; i < n; i ++ )
61  y( i, j ) /= std::sqrt( Degree[ i ] );
62 
64  if ( sigma )
65  {
66  for ( size_t j = 0; j < nrhs; j ++ )
67  for ( size_t i = 0; i < n; i ++ )
68  y( i, j ) += sigma * x( i, j );
69  }
70  };
71 
72  void Multiply( hmlp::Data<T> &y )
73  {
74  assert( param );
75  hmlp::Data<T> x = y;
76  Multiply( y, x );
77  };
78 
79  private:
80 
81  size_t n = 0;
82 
83  T sigma = 0.0;
84 
85  PARAM *param = NULL;
86 
87  hmlp::Data<T> Degree;
88 
89 };
93 #ifdef HMLP_MIC_AVX512
94 
95 template<class T, class Allocator = hbw::allocator<T> >
96 #elif HMLP_USE_CUDA
97 
98 template<class T, class Allocator = thrust::system::cuda::experimental::pinned_allocator<T> >
99 #else
100 
101 template<class T, class Allocator = std::allocator<T> >
102 #endif
103 class Cluster
104 {
105  public:
106 
107  Cluster( size_t d, size_t n, size_t ncluster,
108  hmlp::Data<T> *X, std::vector<int, Allocator> *assignments )
109  {
110  this->d = d;
111  this->n = n;
112  this->ncluster = ncluster;
113  this->X = X;
114  this->assignments = assignments;
115 
116  assert( assignments->size() == n );
117  assert( X->size() == d * n );
118 
120  X2.resize( n, 1, 0.0 );
121 
122  for ( size_t i = 0; i < n; i ++ )
123  {
124  for ( size_t p = 0; p < d; p ++ )
125  {
126  auto xpi = (*X)( p, i );
127  X2[ i ] += xpi * xpi;
128  }
129  };
130  };
131 
132  void ProvideLabels( size_t nclass, std::vector<int, Allocator> *labels )
133  {
134  this->nclass = nclass;
135  this->labels = labels;
136  };
137 
138  void InitializeAssignments()
139  {
140  for ( size_t i = 0; i < n; i ++ )
141  {
142  (*assignments)[ i ] = i % ncluster;
143  }
144  };
145 
146 
148  std::vector<size_t> Centroids(
149  std::vector<int> &amap,
150  std::vector<T, Allocator> &centroids,
151  std::vector<T, Allocator> &centroid_sqnorms )
152  {
153  std::vector<size_t> cluster_sizes( ncluster, 0 );
154 
156  centroids.resize( d * ncluster );
157  centroid_sqnorms.resize( ncluster );
158 
160  for ( size_t i = 0; i < centroids.size(); i ++ )
161  centroids[ i ] = 0.0;
162  for ( size_t i = 0; i < centroid_sqnorms.size(); i ++ )
163  centroid_sqnorms[ i ] = 0.0;
164 
166  for ( size_t i = 0; i < amap.size(); i ++ )
167  {
168  auto pi = (*assignments)[ amap[ i ] ];
169  assert( pi >= 0 && pi < ncluster );
170  for ( size_t p = 0; p < d; p ++ )
171  {
172  centroids[ p + pi * d ] += (*X)( p, (size_t)amap[ i ] );
173  cluster_sizes[ pi ] ++;
174  }
175  };
176 
178  for ( size_t j = 0; j < ncluster; j ++ )
179  {
180  if ( cluster_sizes[ j ] )
181  {
182  for ( size_t p = 0; p < d; p ++ )
183  centroids[ p + j * d ] /= cluster_sizes[ j ];
184  }
185  else
186  {
188  for ( size_t p = 0; p < d; p ++ )
189  {
190  centroids[ p + j * d ] = (*X)( p, std::rand() % n );
191  }
192  }
193  }
194 
196  for ( size_t j = 0; j < ncluster; j ++ )
197  for ( size_t p = 0; p < d; p ++ )
198  centroid_sqnorms[ j ] += centroids[ p + j * d ] * centroids[ p + j * d ];
199 
200  return cluster_sizes;
201 
202  };
206  virtual void Kmeans( std::vector<int> &amap, size_t niter, T tol )
207  {
209  hmlp::Data<T> centroids( d, ncluster, 0.0 );
210  hmlp::Data<T> centroid_sqnorms( 1, ncluster, 0.0 );
211  std::vector<int> centroid_maps( ncluster, 0 );
212  for ( size_t i = 0; i < ncluster; i ++ )
213  centroid_maps[ i ] = i;
214 
216  hmlp::Data<T> distance2centroids( n, 1, std::numeric_limits<T>::max() );
217 
218 
219  //printf( "X: " );
220  //X->Print();
221 
222  //for ( size_t i = 0; i < n; i ++ )
223  // printf( "%5.4E ", (*X)[ i ] );
224  //printf( "\n" );
225 
227  for ( size_t iter = 0; iter < niter; iter ++ )
228  {
229  double beg_t, end_t, centroid_t, gsknn_t;
230 
231  //for ( size_t i = 0; i < n; i ++ )
232  // printf( "%d ", (*assignments)[ i ] );
233  //printf( "\n" );
234 
236  beg_t = omp_get_wtime();
237  auto cluster_sizes = Centroids( amap, centroids, centroid_sqnorms );
238  centroid_t = omp_get_wtime() - beg_t;
239 
240  //printf( "Centroids: " );
241  //centroids.Print();
242 
243 
245  gsknn( amap.size(), ncluster, d, 1,
246  X->data(), X2.data(), amap.data(),
247  centroids.data(), centroid_sqnorms.data(), centroid_maps.data(),
248  distance2centroids.data(),
249  assignments->data() );
250  //printf( "d2c: " );
251  //for ( size_t i = 0; i < n; i ++ )
252  // printf( "%5.4E ", distance2centroids[ i ] );
253  //printf( "\n" );
254 
255 
256  T quality = 0.00;
257  for ( size_t i = 0; i < n; i ++ )
258  quality += distance2centroids[ i ] * distance2centroids[ i ];
259  quality = std::sqrt( quality );
260 
261  //printf( "Kmeans iteration #%2lu quality %E\n", iter, quality );
262  }
263 
264  };
267  virtual void Kmeans( size_t niter, T tol )
268  {
269  std::vector<int> amap( n, 0 );
270  for ( size_t i = 0; i < n; i ++ ) amap[ i ] = i;
271  Kmeans( amap, niter, tol );
272 
273  };
277  template<typename VIRTUALMATRIX>
278  void Spectral( VIRTUALMATRIX &G )
279  {
281  //KernelMatrix<T> K( n, n, d, kernel, *X );
282 
284  //T spectrum_shift = 0.0;
285  //VirtualNormalizedGraph<KernelMatrix<T>, T> G( n, &K, spectrum_shift );
286 
288  size_t neig = ncluster;
289 
291  hmlp::Data<T> Sigma( neig, 1, 0.0 );
292  hmlp::Data<T> V( neig, n );
293 
295  size_t num_krylov = 5 * neig;
296  hmlp::lanczos( G, n, neig, num_krylov, Sigma, V );
297 
299  for ( size_t i = 0; i < n; i ++ )
300  {
301  T vi_norm = hmlp::xnrm2( neig, V.columndata( i ), 1 );
302  for ( size_t p = 0; p < neig; p ++ )
303  {
304  V( p, i ) /= vi_norm;
305  }
307  (*assignments)[ i ] = i % ncluster;
308  }
309 
311  Cluster<T> spherical_cluster( neig, n, ncluster, &V, assignments );
312  spherical_cluster.Kmeans( 30, 1E-3 );
313 
314  };
316  void Spectral( kernel_s<T> &kernel )
317  {
319  KernelMatrix<T> K( n, n, d, kernel, *X );
320 
322  T spectrum_shift = 0.0;
323  VirtualNormalizedGraph<KernelMatrix<T>, T> G( n, &K, spectrum_shift );
324 
326  Spectral( G );
327  };
328 
330  void Spectral( kernel_s<T> &kernel, T stol, T budget )
331  {
333  KernelMatrix<T> K( n, n, d, kernel, *X );
334 
336  hmlp::gofmm::SimpleGOFMM<T, KernelMatrix<T>> H( K, stol, budget );
337 
339  T spectrum_shift = 0.0;
341  G( n, &H, spectrum_shift );
342 
344  Spectral( G );
345 
346  };
353  std::vector<size_t> ClusterPermutation( std::vector<int> &amap,
354  std::vector<std::vector<int>> &bmap )
355  {
356  std::vector<size_t> cluster_sizes( ncluster, 0 );
357 
358  for ( size_t j = 0; j < ncluster; j ++ )
359  {
360  if ( !bmap[ j ].size() ) bmap[ j ].reserve( n );
361  bmap[ j ].clear();
362  }
363 
364  for ( size_t i = 0; i < amap.size(); i ++ )
365  {
366  auto pi = (*assignments)[ amap[ i ] ];
367  assert( pi >= 0 && pi < ncluster );
368  bmap[ pi ].push_back( i );
369  cluster_sizes[ pi ] ++;
370  }
371 
372  return cluster_sizes;
373  };
374 
375 
376 
377  virtual void KernelKmeans( kernel_s<T> &kernel,
378  std::vector<int> &amap, size_t niter, T tol )
379  {
381  KernelMatrix<T> K( n, n, d, kernel, *X );
382 
384  std::vector<T> Diag( n, 0 );
385  #pragma omp parallel for
386  for ( size_t i = 0; i < n; i ++ )
387  {
388  Diag[ i ] = K( i, i );
389  assert( Diag[ i ] > 0.0 );
390  }
391 
393  hmlp::Data<T> ones( n, 1, 1.0 );
394 
396  K.ComputeDegree();
397  std::vector<T, Allocator> &Degree = K.GetDegree();
398 
400  std::vector<std::vector<int>> bmap( ncluster );
401 
403  std::vector<hmlp::Data<T>> Similarity( ncluster );
404  for ( size_t j = 0; j < ncluster; j ++ )
405  Similarity[ j ].resize( amap.size(), 1, 0.0 );
406 
408  std::vector<int> umap( amap.size(), 0 );
409  for ( size_t i = 0; i < amap.size(); i ++ ) umap[ i ] = i;
410 
412  for ( size_t iter = 0; iter < niter; iter ++ )
413  {
415  hmlp::Data<T> distance2centroids( n, 1, std::numeric_limits<T>::max() );
416 
418  std::vector<T, Allocator> centroids( n, 0.0 );
419 
421  auto cluster_sizes = ClusterPermutation( amap, bmap );
422 
424  for ( size_t j = 0; j < ncluster; j ++ )
425  {
426  T Kcc = 0.0;
427  T Dcc = 0.0;
428 
430  #pragma omp parallel for
431  for ( size_t i = 0; i < amap.size(); i ++ )
432  Similarity[ j ][ i ] = 0.0;
433 
434  //printf( "umap.size() %lu, amap.size() %lu, bmap[ j ].size() %lu\n",
435  // umap.size(), amap.size(), bmap[ j ].size() ); fflush( stdout );
436 
438  K.Multiply(
439  1, Similarity[ j ], umap,
440  amap,
441  bmap[ j ],
442  ones, bmap[ j ] );
443 
444 
445  if ( amap.size() == n )
446  {
448  for ( size_t i = 0; i < bmap[ j ].size(); i ++ )
449  Kcc += Similarity[ j ][ bmap[ j ][ i ] ];
450  }
451  else
452  {
454  K.Multiply( 1, centroids, bmap[ j ], bmap[ j ], ones );
455  for ( size_t i = 0; i < bmap[ j ].size(); i ++ )
456  Kcc += centroids[ bmap[ j ][ i ] ];
457  }
458 
459 
461  for ( size_t i = 0; i < bmap[ j ].size(); i ++ )
462  Dcc += Degree[ bmap[ j ][ i ] ];
463 
464 
465 
467  #pragma omp parallel for
468  for ( size_t i = 0; i < amap.size(); i ++ )
469  {
470  T Kii = Diag[ amap[ i ] ];
471  T Kic = Similarity[ j ][ amap[ i ] ];
472  Similarity[ j ][ amap[ i ] ] =
473  Kii / ( Degree[ amap[ i ] ] * Degree[ amap[ i ] ] ) -
474  ( 2.0 / Degree[ amap[ i ] ] ) * ( Kic / Dcc ) +
475  Kcc / ( Dcc * Dcc );
476 
477  if ( Similarity[ j ][ amap[ i ] ] <= distance2centroids[ amap[ i ] ] )
478  {
479  distance2centroids[ amap[ i ] ] = Similarity[ j ][ amap[ i ] ];
480  (*assignments)[ amap[ i ] ] = j;
481  }
482  }
483 
484  //printf( "assignments\n" ); fflush( stdout );
485  }
486  //printf( "here\n" ); fflush( stdout );
487 
488  T quality = 0.0;
489  for ( size_t i = 0; i < amap.size(); i ++ )
490  quality += distance2centroids[ amap[ i ] ] * distance2centroids[ amap[ i ] ];
491 
492  //printf( "KernelKmeans iteration #%2lu quality %E\n", iter, quality );
493  }
494 
495  };
496 
497  virtual void KernelKmeans( kernel_s<T> &kernel, size_t niter, T tol )
498  {
499  std::vector<int> amap( n, 0 );
500  for ( size_t i = 0; i < n; i ++ ) amap[ i ] = i;
501  KernelKmeans( kernel, amap, niter, tol );
502 
503  };
506  virtual void KernelKmeans(
507  kernel_s<T> &kernel,
508  std::vector<int> &amap, size_t niter, T tol, T budget )
509  {
510 
511 
513  KernelMatrix<T> K( n, n, d, kernel, *X );
514 
516  auto *tree_ptr = hmlp::gofmm::Compress<T>( K, tol, budget );
517  auto &tree = *tree_ptr;
518 
520  std::vector<T> Diag( n, 0 );
521  #pragma omp parallel for
522  for ( size_t i = 0; i < n; i ++ )
523  {
524  Diag[ i ] = K( i, i );
525  assert( Diag[ i ] > 0.0 );
526  }
527 
529  hmlp::Data<T> ones( n, 1, 1.0 );
530 
532  auto Degree = hmlp::gofmm::Evaluate( tree, ones );
533 
534 
536  std::size_t ntest = 100;
537  T nnerr_avg = 0.0;
538  T nonnerr_avg = 0.0;
539  T fmmerr_avg = 0.0;
540  //printf( "========================================================\n");
541  //printf( "Accuracy report\n" );
542  //printf( "========================================================\n");
543  for ( size_t i = 0; i < ntest; i ++ )
544  {
545  hmlp::Data<T> potentials( 1, 1 );
547  //hmlp::gofmm::Evaluate<false, true>( tree, i, potentials );
548  //printf( "ASKIT NN\n" ); fflush( stdout );
549  //auto nnerr = hmlp::gofmm::ComputeError( tree, i, potentials );
550  T nnerr = 0.0;
552  //hmlp::gofmm::Evaluate<false, false>( tree, i, potentials );
553  //printf( "ASKIT no NN\n" ); fflush( stdout );
554  //auto nonnerr = hmlp::gofmm::ComputeError( tree, i, potentials );
555  T nonnerr = 0.0;
556  //printf( "potentials.col() %lu\n", potentials.col() ); fflush( stdout );
558  for ( size_t p = 0; p < potentials.col(); p ++ )
559  {
560  potentials[ p ] = Degree( i, p );
561  }
562  auto fmmerr = ComputeError( tree, i, potentials );
563 
565  if ( i < 0 )
566  {
567  printf( "gid %6lu, ASKIT %3.1E, HODLR %3.1E, GOFMM %3.1E\n",
568  i, nnerr, nonnerr, fmmerr );
569  }
570  nnerr_avg += nnerr;
571  nonnerr_avg += nonnerr;
572  fmmerr_avg += fmmerr;
573  }
574  printf( "========================================================\n");
575  printf( " ASKIT %3.1E, HODLR %3.1E, GOFMM %3.1E\n",
576  nnerr_avg / ntest , nonnerr_avg / ntest, fmmerr_avg / ntest );
577  printf( "========================================================\n");
578  // ------------------------------------------------------------------------
579 
580 
581 
582 
583 
584 
585 
586 
587 
588 
589 
591  std::vector<std::vector<int>> bmap( ncluster );
592 
594  hmlp::Data<T> Similarity;
595 
597  hmlp::Data<T> indicators( n, ncluster );
598 
600  for ( size_t iter = 0; iter < niter; iter ++ )
601  {
603  hmlp::Data<T> distance2centroids( n, 1, std::numeric_limits<T>::max() );
604 
606  std::vector<T> centroids( n, 0.0 );
607 
609  auto cluster_sizes = ClusterPermutation( amap, bmap );
610 
612  #pragma omp parallel for
613  for ( size_t i = 0; i < n; i ++ )
614  {
615  for ( size_t j = 0; j < ncluster; j ++ )
616  {
617  indicators( i, j ) = 0.0;
618  }
619  indicators( i, (size_t)(*assignments)[ i ] ) = 1.0;
620  }
621 
623  Similarity = hmlp::gofmm::Evaluate( tree, indicators );
624 
626  for ( size_t j = 0; j < ncluster; j ++ )
627  {
628  T Kcc = 0.0;
629  T Dcc = 0.0;
630 
632  for ( size_t i = 0; i < bmap[ j ].size(); i ++ )
633  Kcc += Similarity( (size_t)bmap[ j ][ i ], j );
634 
636  for ( size_t i = 0; i < bmap[ j ].size(); i ++ )
637  Dcc += Degree[ bmap[ j ][ i ] ];
638 
640  #pragma omp parallel for
641  for ( size_t i = 0; i < amap.size(); i ++ )
642  {
643  T Kii = Diag[ amap[ i ] ];
644  T Kic = Similarity( (size_t)amap[ i ], j );
645 
646  Similarity( (size_t)amap[ i ], j ) =
647  Kii / ( Degree[ amap[ i ] ] * Degree[ amap[ i ] ] ) -
648  ( 2.0 / Degree[ amap[ i ] ] ) * ( Kic / Dcc ) +
649  Kcc / ( Dcc * Dcc );
650 
651  if ( Similarity( (size_t)amap[ i ], j ) <= distance2centroids[ amap[ i ] ] )
652  {
653  distance2centroids[ amap[ i ] ] = Similarity( (size_t)amap[ i ], j );
654  (*assignments)[ amap[ i ] ] = j;
655  }
656  }
657  }
658 
659  T quality = 0.0;
660  for ( size_t i = 0; i < amap.size(); i ++ )
661  quality += distance2centroids[ amap[ i ] ] * distance2centroids[ amap[ i ] ];
662  }
663 
664 
665  };
666 
667  virtual void KernelKmeans( kernel_s<T> &kernel, size_t niter, T tol, T budget )
668  {
669  std::vector<int> amap( n, 0 );
670  for ( size_t i = 0; i < n; i ++ ) amap[ i ] = i;
671  KernelKmeans( kernel, amap, niter, tol, budget );
672 
673  };
679  virtual void KernelKmeansRefinement()
680  {
681  };
682 
684  {
686  assert( labels && nclass );
687 
689  Confusion.resize( 0, 0 );
690  Confusion.resize( ncluster + 1, nclass + 1, 0.0 );
691 
693  for ( size_t i = 0; i < n; i ++ )
694  {
695  auto icluster = (*assignments)[ i ];
696  auto iclass = (*labels)[ i ];
697  Confusion( icluster, iclass ) += 1.0;
698  }
699 
700  for ( size_t q = 0; q < nclass; q ++ )
701  {
702  for ( size_t p = 0; p < ncluster; p ++ )
703  {
704  auto Cpq = Confusion( p, q );
705  Confusion( p, nclass ) += Cpq;
706  Confusion( ncluster, q ) += Cpq;
707  Confusion( ncluster, nclass ) += Cpq;
708  }
709  }
710 
711  //Confusion.Print();
712 
714  if ( nclass == ncluster )
715  {
716  T num_of_correct_assignments = 0.0;
717  std::set<int> cluster_pivots;
718  std::set<int> class_pivots;
719 
721  for ( int q = 0; q < nclass; q ++ )
722  class_pivots.insert( q );
723 
725  for ( int p = 0; p < ncluster; p ++ )
726  cluster_pivots.insert( p );
727 
728  while ( cluster_pivots.size() && class_pivots.size() )
729  {
730  int pivot_p = -1;
731  int pivot_q = -1;
732  size_t max_entry = 0;
733 
735  for ( auto pit = cluster_pivots.begin(); pit != cluster_pivots.end(); pit ++ )
736  {
737  for ( auto qit = class_pivots.begin(); qit != class_pivots.end(); qit ++ )
738  {
739  if ( Confusion( *pit, *qit ) >= max_entry )
740  {
741  max_entry = Confusion( *pit, *qit );
742  pivot_p = *pit;
743  pivot_q = *qit;
744  }
745  }
746  }
747 
748  num_of_correct_assignments += max_entry;
749  cluster_pivots.erase( pivot_p );
750  class_pivots.erase( pivot_q );
751  }
752 
753  printf( "Accuracy: %4.2E\n", num_of_correct_assignments / n );
754  }
755 
756  };
761  T NMI()
762  {
764  assert( labels && nclass );
765 
766  T nmi = 0.0;
767  T nmi_antecedent = 0.0;
768  T nmi_consequent = 0.0;
769 
771  ComputeConfusion();
772 
774  for ( size_t q = 0; q < nclass; q ++ )
775  {
776  for ( size_t p = 0; p < ncluster; p ++ )
777  {
778  auto Cpq = Confusion( p, q );
779  auto Cp = Confusion( p, nclass );
780  auto Cq = Confusion( ncluster, q );
781  if ( Cpq > 0.0 )
782  {
783  nmi_antecedent += (-2.0) * ( Cpq / n ) * std::log2( n * Cpq / ( Cp * Cq ) );
784  }
785  }
786  }
787 
789  for ( size_t q = 0; q < nclass; q ++ )
790  {
791  auto Cq = Confusion( ncluster, q );
792  nmi_consequent += ( Cq / n ) * std::log2( Cq / n );
793  }
794 
796  for ( size_t p = 0; p < ncluster; p ++ )
797  {
798  auto Cp = Confusion( p, nclass );
799  nmi_consequent += ( Cp / n ) * std::log2( Cp / n );
800  }
801 
802  nmi = ( nmi_antecedent / nmi_consequent );
803 
804  printf( "NMI: %E / %E = %4.2E\n", nmi_antecedent, nmi_consequent, nmi );
805 
806  return nmi;
807 
808  };
812  virtual void NormalizedCut()
813  {
814 
815  };
816 
817  private:
818 
819  size_t n = 0;
820 
821  size_t d = 0;
822 
823  size_t ncluster = 0;
824 
825  size_t nclass = 0;
826 
828  hmlp::Data<T> *X = NULL;
829 
830  hmlp::Data<T> X2;
831 
832  std::vector<int, Allocator> *assignments = NULL;
833 
834  std::vector<int, Allocator> *labels = NULL;
835 
837  hmlp::Data<T> Confusion;
838 
839 
840 };
843 };
845 #endif
Definition: KernelMatrix.hpp:162
double xnrm2(int n, double *x, int incx)
DNRM2 wrapper.
Definition: blas_lapack.cpp:83
void ComputeConfusion()
Definition: cluster.hpp:683
std::vector< size_t > ClusterPermutation(std::vector< int > &amap, std::vector< std::vector< int >> &bmap)
Definition: cluster.hpp:353
std::vector< size_t > Centroids(std::vector< int > &amap, std::vector< T, Allocator > &centroids, std::vector< T, Allocator > &centroid_sqnorms)
Definition: cluster.hpp:148
Definition: gofmm.hpp:3779
virtual void KernelKmeansRefinement()
Definition: cluster.hpp:679
void Spectral(VIRTUALMATRIX &G)
Definition: cluster.hpp:278
virtual void KernelKmeans(kernel_s< T > &kernel, std::vector< int > &amap, size_t niter, T tol, T budget)
Definition: cluster.hpp:506
T NMI()
Definition: cluster.hpp:761
Definition: KernelMatrix.hpp:54
virtual void NormalizedCut()
Definition: cluster.hpp:812
virtual void Kmeans(std::vector< int > &amap, size_t niter, T tol)
Definition: cluster.hpp:206
virtual void Kmeans(size_t niter, T tol)
Definition: cluster.hpp:267
void lanczos(VIRTUALMATRIX &A, size_t n, size_t r, size_t nkrylov, std::vector< T > &Sigma, std::vector< T > &V)
Definition: lanczos.hpp:20
void Multiply(hmlp::Data< T > &y, hmlp::Data< T > &x)
Definition: cluster.hpp:36
size_t col() const noexcept
Definition: Data.hpp:281
size_t row() const noexcept
Definition: Data.hpp:278
Definition: cluster.hpp:103
Definition: cluster.hpp:19
void Spectral(kernel_s< T > &kernel, T stol, T budget)
Definition: cluster.hpp:330
void Spectral(kernel_s< T > &kernel)
Definition: cluster.hpp:316
Cluster(size_t d, size_t n, size_t ncluster, hmlp::Data< T > *X, std::vector< int, Allocator > *assignments)
Definition: cluster.hpp:107
virtual void KernelKmeans(kernel_s< T > &kernel, std::vector< int > &amap, size_t niter, T tol)
Definition: cluster.hpp:377
Definition: gofmm.hpp:83