12 #include <containers/KernelMatrix.hpp> 13 #include <primitives/lanczos.hpp> 18 template<
typename PARAM,
typename T>
31 Degree.resize( n, (
size_t)1, 0.0 );
32 param->Multiply( Degree, ones );
39 assert( y.
row() == n && x.
row() == n );
40 assert( y.
col() == x.
col() );
42 size_t nrhs = y.
col();
46 for (
size_t j = 0; j < nrhs; j ++ )
47 for (
size_t i = 0; i < n; i ++ )
48 temp( i, j ) /= std::sqrt( Degree[ i ] );
51 for (
size_t j = 0; j < nrhs; j ++ )
52 for (
size_t i = 0; i < n; i ++ )
56 param->Multiply( y, temp );
59 for (
size_t j = 0; j < nrhs; j ++ )
60 for (
size_t i = 0; i < n; i ++ )
61 y( i, j ) /= std::sqrt( Degree[ i ] );
66 for (
size_t j = 0; j < nrhs; j ++ )
67 for (
size_t i = 0; i < n; i ++ )
68 y( i, j ) += sigma * x( i, j );
93 #ifdef HMLP_MIC_AVX512 95 template<
class T,
class Allocator = hbw::allocator<T> >
98 template<
class T,
class Allocator = thrust::system::cuda::experimental::pinned_allocator<T> >
101 template<
class T,
class Allocator = std::allocator<T> >
112 this->ncluster = ncluster;
114 this->assignments = assignments;
116 assert( assignments->size() == n );
117 assert( X->size() == d * n );
120 X2.resize( n, 1, 0.0 );
122 for (
size_t i = 0; i < n; i ++ )
124 for (
size_t p = 0; p < d; p ++ )
126 auto xpi = (*X)( p, i );
127 X2[ i ] += xpi * xpi;
132 void ProvideLabels(
size_t nclass, std::vector<int, Allocator> *labels )
134 this->nclass = nclass;
135 this->labels = labels;
138 void InitializeAssignments()
140 for (
size_t i = 0; i < n; i ++ )
142 (*assignments)[ i ] = i % ncluster;
149 std::vector<int> &amap,
150 std::vector<T, Allocator> ¢roids,
151 std::vector<T, Allocator> ¢roid_sqnorms )
153 std::vector<size_t> cluster_sizes( ncluster, 0 );
156 centroids.resize( d * ncluster );
157 centroid_sqnorms.resize( ncluster );
160 for (
size_t i = 0; i < centroids.size(); i ++ )
161 centroids[ i ] = 0.0;
162 for (
size_t i = 0; i < centroid_sqnorms.size(); i ++ )
163 centroid_sqnorms[ i ] = 0.0;
166 for (
size_t i = 0; i < amap.size(); i ++ )
168 auto pi = (*assignments)[ amap[ i ] ];
169 assert( pi >= 0 && pi < ncluster );
170 for (
size_t p = 0; p < d; p ++ )
172 centroids[ p + pi * d ] += (*X)( p, (size_t)amap[ i ] );
173 cluster_sizes[ pi ] ++;
178 for (
size_t j = 0; j < ncluster; j ++ )
180 if ( cluster_sizes[ j ] )
182 for (
size_t p = 0; p < d; p ++ )
183 centroids[ p + j * d ] /= cluster_sizes[ j ];
188 for (
size_t p = 0; p < d; p ++ )
190 centroids[ p + j * d ] = (*X)( p, std::rand() % n );
196 for (
size_t j = 0; j < ncluster; j ++ )
197 for (
size_t p = 0; p < d; p ++ )
198 centroid_sqnorms[ j ] += centroids[ p + j * d ] * centroids[ p + j * d ];
200 return cluster_sizes;
206 virtual void Kmeans( std::vector<int> &amap,
size_t niter, T tol )
211 std::vector<int> centroid_maps( ncluster, 0 );
212 for (
size_t i = 0; i < ncluster; i ++ )
213 centroid_maps[ i ] = i;
216 hmlp::Data<T> distance2centroids( n, 1, std::numeric_limits<T>::max() );
227 for (
size_t iter = 0; iter < niter; iter ++ )
229 double beg_t, end_t, centroid_t, gsknn_t;
236 beg_t = omp_get_wtime();
237 auto cluster_sizes = Centroids( amap, centroids, centroid_sqnorms );
238 centroid_t = omp_get_wtime() - beg_t;
245 gsknn( amap.size(), ncluster, d, 1,
246 X->data(), X2.data(), amap.data(),
247 centroids.data(), centroid_sqnorms.data(), centroid_maps.data(),
248 distance2centroids.data(),
249 assignments->data() );
257 for (
size_t i = 0; i < n; i ++ )
258 quality += distance2centroids[ i ] * distance2centroids[ i ];
259 quality = std::sqrt( quality );
267 virtual void Kmeans(
size_t niter, T tol )
269 std::vector<int> amap( n, 0 );
270 for (
size_t i = 0; i < n; i ++ ) amap[ i ] = i;
271 Kmeans( amap, niter, tol );
277 template<
typename VIRTUALMATRIX>
288 size_t neig = ncluster;
295 size_t num_krylov = 5 * neig;
299 for (
size_t i = 0; i < n; i ++ )
301 T vi_norm =
hmlp::xnrm2( neig, V.columndata( i ), 1 );
302 for (
size_t p = 0; p < neig; p ++ )
304 V( p, i ) /= vi_norm;
307 (*assignments)[ i ] = i % ncluster;
311 Cluster<T> spherical_cluster( neig, n, ncluster, &V, assignments );
312 spherical_cluster.
Kmeans( 30, 1E-3 );
322 T spectrum_shift = 0.0;
339 T spectrum_shift = 0.0;
341 G( n, &H, spectrum_shift );
354 std::vector<std::vector<int>> &bmap )
356 std::vector<size_t> cluster_sizes( ncluster, 0 );
358 for (
size_t j = 0; j < ncluster; j ++ )
360 if ( !bmap[ j ].size() ) bmap[ j ].reserve( n );
364 for (
size_t i = 0; i < amap.size(); i ++ )
366 auto pi = (*assignments)[ amap[ i ] ];
367 assert( pi >= 0 && pi < ncluster );
368 bmap[ pi ].push_back( i );
369 cluster_sizes[ pi ] ++;
372 return cluster_sizes;
378 std::vector<int> &amap,
size_t niter, T tol )
384 std::vector<T> Diag( n, 0 );
385 #pragma omp parallel for 386 for (
size_t i = 0; i < n; i ++ )
388 Diag[ i ] = K( i, i );
389 assert( Diag[ i ] > 0.0 );
397 std::vector<T, Allocator> &Degree = K.GetDegree();
400 std::vector<std::vector<int>> bmap( ncluster );
403 std::vector<hmlp::Data<T>> Similarity( ncluster );
404 for (
size_t j = 0; j < ncluster; j ++ )
405 Similarity[ j ].resize( amap.size(), 1, 0.0 );
408 std::vector<int> umap( amap.size(), 0 );
409 for (
size_t i = 0; i < amap.size(); i ++ ) umap[ i ] = i;
412 for (
size_t iter = 0; iter < niter; iter ++ )
415 hmlp::Data<T> distance2centroids( n, 1, std::numeric_limits<T>::max() );
418 std::vector<T, Allocator> centroids( n, 0.0 );
421 auto cluster_sizes = ClusterPermutation( amap, bmap );
424 for (
size_t j = 0; j < ncluster; j ++ )
430 #pragma omp parallel for 431 for (
size_t i = 0; i < amap.size(); i ++ )
432 Similarity[ j ][ i ] = 0.0;
439 1, Similarity[ j ], umap,
445 if ( amap.size() == n )
448 for (
size_t i = 0; i < bmap[ j ].size(); i ++ )
449 Kcc += Similarity[ j ][ bmap[ j ][ i ] ];
454 K.Multiply( 1, centroids, bmap[ j ], bmap[ j ], ones );
455 for (
size_t i = 0; i < bmap[ j ].size(); i ++ )
456 Kcc += centroids[ bmap[ j ][ i ] ];
461 for (
size_t i = 0; i < bmap[ j ].size(); i ++ )
462 Dcc += Degree[ bmap[ j ][ i ] ];
467 #pragma omp parallel for 468 for (
size_t i = 0; i < amap.size(); i ++ )
470 T Kii = Diag[ amap[ i ] ];
471 T Kic = Similarity[ j ][ amap[ i ] ];
472 Similarity[ j ][ amap[ i ] ] =
473 Kii / ( Degree[ amap[ i ] ] * Degree[ amap[ i ] ] ) -
474 ( 2.0 / Degree[ amap[ i ] ] ) * ( Kic / Dcc ) +
477 if ( Similarity[ j ][ amap[ i ] ] <= distance2centroids[ amap[ i ] ] )
479 distance2centroids[ amap[ i ] ] = Similarity[ j ][ amap[ i ] ];
480 (*assignments)[ amap[ i ] ] = j;
489 for (
size_t i = 0; i < amap.size(); i ++ )
490 quality += distance2centroids[ amap[ i ] ] * distance2centroids[ amap[ i ] ];
497 virtual void KernelKmeans(
kernel_s<T> &kernel,
size_t niter, T tol )
499 std::vector<int> amap( n, 0 );
500 for (
size_t i = 0; i < n; i ++ ) amap[ i ] = i;
501 KernelKmeans( kernel, amap, niter, tol );
508 std::vector<int> &amap,
size_t niter, T tol, T budget )
516 auto *tree_ptr = hmlp::gofmm::Compress<T>( K, tol, budget );
517 auto &tree = *tree_ptr;
520 std::vector<T> Diag( n, 0 );
521 #pragma omp parallel for 522 for (
size_t i = 0; i < n; i ++ )
524 Diag[ i ] = K( i, i );
525 assert( Diag[ i ] > 0.0 );
532 auto Degree = hmlp::gofmm::Evaluate( tree, ones );
536 std::size_t ntest = 100;
543 for (
size_t i = 0; i < ntest; i ++ )
558 for (
size_t p = 0; p < potentials.
col(); p ++ )
560 potentials[ p ] = Degree( i, p );
562 auto fmmerr = ComputeError( tree, i, potentials );
567 printf(
"gid %6lu, ASKIT %3.1E, HODLR %3.1E, GOFMM %3.1E\n",
568 i, nnerr, nonnerr, fmmerr );
571 nonnerr_avg += nonnerr;
572 fmmerr_avg += fmmerr;
574 printf(
"========================================================\n");
575 printf(
" ASKIT %3.1E, HODLR %3.1E, GOFMM %3.1E\n",
576 nnerr_avg / ntest , nonnerr_avg / ntest, fmmerr_avg / ntest );
577 printf(
"========================================================\n");
591 std::vector<std::vector<int>> bmap( ncluster );
600 for (
size_t iter = 0; iter < niter; iter ++ )
603 hmlp::Data<T> distance2centroids( n, 1, std::numeric_limits<T>::max() );
606 std::vector<T> centroids( n, 0.0 );
609 auto cluster_sizes = ClusterPermutation( amap, bmap );
612 #pragma omp parallel for 613 for (
size_t i = 0; i < n; i ++ )
615 for (
size_t j = 0; j < ncluster; j ++ )
617 indicators( i, j ) = 0.0;
619 indicators( i, (
size_t)(*assignments)[ i ] ) = 1.0;
623 Similarity = hmlp::gofmm::Evaluate( tree, indicators );
626 for (
size_t j = 0; j < ncluster; j ++ )
632 for (
size_t i = 0; i < bmap[ j ].size(); i ++ )
633 Kcc += Similarity( (
size_t)bmap[ j ][ i ], j );
636 for (
size_t i = 0; i < bmap[ j ].size(); i ++ )
637 Dcc += Degree[ bmap[ j ][ i ] ];
640 #pragma omp parallel for 641 for (
size_t i = 0; i < amap.size(); i ++ )
643 T Kii = Diag[ amap[ i ] ];
644 T Kic = Similarity( (
size_t)amap[ i ], j );
646 Similarity( (
size_t)amap[ i ], j ) =
647 Kii / ( Degree[ amap[ i ] ] * Degree[ amap[ i ] ] ) -
648 ( 2.0 / Degree[ amap[ i ] ] ) * ( Kic / Dcc ) +
651 if ( Similarity( (
size_t)amap[ i ], j ) <= distance2centroids[ amap[ i ] ] )
653 distance2centroids[ amap[ i ] ] = Similarity( (
size_t)amap[ i ], j );
654 (*assignments)[ amap[ i ] ] = j;
660 for (
size_t i = 0; i < amap.size(); i ++ )
661 quality += distance2centroids[ amap[ i ] ] * distance2centroids[ amap[ i ] ];
667 virtual void KernelKmeans(
kernel_s<T> &kernel,
size_t niter, T tol, T budget )
669 std::vector<int> amap( n, 0 );
670 for (
size_t i = 0; i < n; i ++ ) amap[ i ] = i;
671 KernelKmeans( kernel, amap, niter, tol, budget );
686 assert( labels && nclass );
689 Confusion.resize( 0, 0 );
690 Confusion.resize( ncluster + 1, nclass + 1, 0.0 );
693 for (
size_t i = 0; i < n; i ++ )
695 auto icluster = (*assignments)[ i ];
696 auto iclass = (*labels)[ i ];
697 Confusion( icluster, iclass ) += 1.0;
700 for (
size_t q = 0; q < nclass; q ++ )
702 for (
size_t p = 0; p < ncluster; p ++ )
704 auto Cpq = Confusion( p, q );
705 Confusion( p, nclass ) += Cpq;
706 Confusion( ncluster, q ) += Cpq;
707 Confusion( ncluster, nclass ) += Cpq;
714 if ( nclass == ncluster )
716 T num_of_correct_assignments = 0.0;
717 std::set<int> cluster_pivots;
718 std::set<int> class_pivots;
721 for (
int q = 0; q < nclass; q ++ )
722 class_pivots.insert( q );
725 for (
int p = 0; p < ncluster; p ++ )
726 cluster_pivots.insert( p );
728 while ( cluster_pivots.size() && class_pivots.size() )
732 size_t max_entry = 0;
735 for (
auto pit = cluster_pivots.begin(); pit != cluster_pivots.end(); pit ++ )
737 for (
auto qit = class_pivots.begin(); qit != class_pivots.end(); qit ++ )
739 if ( Confusion( *pit, *qit ) >= max_entry )
741 max_entry = Confusion( *pit, *qit );
748 num_of_correct_assignments += max_entry;
749 cluster_pivots.erase( pivot_p );
750 class_pivots.erase( pivot_q );
753 printf(
"Accuracy: %4.2E\n", num_of_correct_assignments / n );
764 assert( labels && nclass );
767 T nmi_antecedent = 0.0;
768 T nmi_consequent = 0.0;
774 for (
size_t q = 0; q < nclass; q ++ )
776 for (
size_t p = 0; p < ncluster; p ++ )
778 auto Cpq = Confusion( p, q );
779 auto Cp = Confusion( p, nclass );
780 auto Cq = Confusion( ncluster, q );
783 nmi_antecedent += (-2.0) * ( Cpq / n ) * std::log2( n * Cpq / ( Cp * Cq ) );
789 for (
size_t q = 0; q < nclass; q ++ )
791 auto Cq = Confusion( ncluster, q );
792 nmi_consequent += ( Cq / n ) * std::log2( Cq / n );
796 for (
size_t p = 0; p < ncluster; p ++ )
798 auto Cp = Confusion( p, nclass );
799 nmi_consequent += ( Cp / n ) * std::log2( Cp / n );
802 nmi = ( nmi_antecedent / nmi_consequent );
804 printf(
"NMI: %E / %E = %4.2E\n", nmi_antecedent, nmi_consequent, nmi );
832 std::vector<int, Allocator> *assignments = NULL;
834 std::vector<int, Allocator> *labels = NULL;
Definition: KernelMatrix.hpp:162
double xnrm2(int n, double *x, int incx)
DNRM2 wrapper.
Definition: blas_lapack.cpp:83
void ComputeConfusion()
Definition: cluster.hpp:683
std::vector< size_t > ClusterPermutation(std::vector< int > &amap, std::vector< std::vector< int >> &bmap)
Definition: cluster.hpp:353
std::vector< size_t > Centroids(std::vector< int > &amap, std::vector< T, Allocator > ¢roids, std::vector< T, Allocator > ¢roid_sqnorms)
Definition: cluster.hpp:148
Definition: gofmm.hpp:3779
virtual void KernelKmeansRefinement()
Definition: cluster.hpp:679
void Spectral(VIRTUALMATRIX &G)
Definition: cluster.hpp:278
virtual void KernelKmeans(kernel_s< T > &kernel, std::vector< int > &amap, size_t niter, T tol, T budget)
Definition: cluster.hpp:506
T NMI()
Definition: cluster.hpp:761
Definition: KernelMatrix.hpp:54
virtual void NormalizedCut()
Definition: cluster.hpp:812
virtual void Kmeans(std::vector< int > &amap, size_t niter, T tol)
Definition: cluster.hpp:206
virtual void Kmeans(size_t niter, T tol)
Definition: cluster.hpp:267
void lanczos(VIRTUALMATRIX &A, size_t n, size_t r, size_t nkrylov, std::vector< T > &Sigma, std::vector< T > &V)
Definition: lanczos.hpp:20
void Multiply(hmlp::Data< T > &y, hmlp::Data< T > &x)
Definition: cluster.hpp:36
size_t col() const noexcept
Definition: Data.hpp:281
size_t row() const noexcept
Definition: Data.hpp:278
Definition: cluster.hpp:103
Definition: cluster.hpp:19
void Spectral(kernel_s< T > &kernel, T stol, T budget)
Definition: cluster.hpp:330
void Spectral(kernel_s< T > &kernel)
Definition: cluster.hpp:316
Cluster(size_t d, size_t n, size_t ncluster, hmlp::Data< T > *X, std::vector< int, Allocator > *assignments)
Definition: cluster.hpp:107
virtual void KernelKmeans(kernel_s< T > &kernel, std::vector< int > &amap, size_t niter, T tol)
Definition: cluster.hpp:377