1 |
|
/** |
2 |
|
* HMLP (High-Performance Machine Learning Primitives) |
3 |
|
* |
4 |
|
* Copyright (C) 2014-2017, The University of Texas at Austin |
5 |
|
* |
6 |
|
* This program is free software: you can redistribute it and/or modify |
7 |
|
* it under the terms of the GNU General Public License as published by |
8 |
|
* the Free Software Foundation, either version 3 of the License, or |
9 |
|
* (at your option) any later version. |
10 |
|
* |
11 |
|
* This program is distributed in the hope that it will be useful, |
12 |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
|
* GNU General Public License for more details. |
15 |
|
* |
16 |
|
* You should have received a copy of the GNU General Public License |
17 |
|
* along with this program. If not, see the LICENSE file. |
18 |
|
* |
19 |
|
**/ |
20 |
|
|
21 |
|
#ifndef VIRTUALMATRIX_HPP |
22 |
|
#define VIRTUALMATRIX_HPP |
23 |
|
|
24 |
|
/** Use hmlp::Data<T> for a concrete dense submatrix */ |
25 |
|
#include <Data.hpp> |
26 |
|
|
27 |
|
using namespace std; |
28 |
|
|
29 |
|
|
30 |
|
/** |
31 |
|
* @breif GOFMM relies on an arbitrary distance metric that |
32 |
|
* described the order between matrix element Kij. |
33 |
|
* Such metric can be the |
34 |
|
* Euclidian distance i.e. "GEOMETRY_DISTANCE", or |
35 |
|
* arbitrary distances defined in the Gram vector space |
36 |
|
* i.e. "KERNEL_DISTANCE" or "ANGLE_DISTANCE" |
37 |
|
*/ |
38 |
|
typedef enum |
39 |
|
{ |
40 |
|
GEOMETRY_DISTANCE, |
41 |
|
KERNEL_DISTANCE, |
42 |
|
ANGLE_DISTANCE, |
43 |
|
USER_DISTANCE |
44 |
|
} DistanceMetric; |
45 |
|
|
46 |
|
|
47 |
|
namespace hmlp |
48 |
|
{ |
49 |
|
|
50 |
|
template<typename T> |
51 |
|
class SPDMatrixMPISupport |
52 |
|
{ |
53 |
|
public: |
54 |
|
virtual void SendIndices( vector<size_t> ids, int dest, mpi::Comm comm ) {}; |
55 |
|
virtual void RecvIndices( int src, mpi::Comm comm, mpi::Status *status ) {}; |
56 |
|
virtual void BcastIndices( vector<size_t> ids, int root, mpi::Comm comm ) {}; |
57 |
|
virtual void RequestIndices( const vector<vector<size_t>>& ids ) {}; |
58 |
|
}; /** end class SPDMatrixMPISupport */ |
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
template<typename DATATYPE, class Allocator = std::allocator<DATATYPE>> |
63 |
|
/** |
64 |
|
* @brief VirtualMatrix is the abstract base class for matrix-free |
65 |
|
* access and operations. Most of the public functions will |
66 |
|
* be virtual. To inherit VirtualMatrix, you "must" implement |
67 |
|
* the evaluation operator. Otherwise, the code won't compile. |
68 |
|
*/ |
69 |
|
class VirtualMatrix : public SPDMatrixMPISupport<DATATYPE> |
70 |
|
{ |
71 |
|
public: |
72 |
|
|
73 |
|
typedef DATATYPE T; |
74 |
|
|
75 |
|
VirtualMatrix() {}; |
76 |
|
|
77 |
|
VirtualMatrix( size_t m, size_t n ) { resize( m, n ); }; |
78 |
|
|
79 |
|
virtual void resize( size_t m, size_t n ) { this->m = m; this->n = n; }; |
80 |
|
|
81 |
|
/** ESSENTIAL: return number of coumns */ |
82 |
|
size_t row() { return m; }; |
83 |
|
|
84 |
|
/** ESSENTIAL: return number of rows */ |
85 |
|
size_t col() { return n; }; |
86 |
|
|
87 |
|
/** ESSENTIAL: this is an abstract function */ |
88 |
|
virtual T operator () ( size_t i, size_t j ) = 0; |
89 |
|
|
90 |
|
/** ESSENTIAL: return a submatrix */ |
91 |
|
virtual Data<T> operator() ( const vector<size_t>& I, const vector<size_t>& J ) |
92 |
|
{ |
93 |
|
Data<T> KIJ( I.size(), J.size() ); |
94 |
|
for ( size_t j = 0; j < J.size(); j ++ ) |
95 |
|
for ( size_t i = 0; i < I.size(); i ++ ) |
96 |
|
KIJ( i, j ) = (*this)( I[ i ], J[ j ] ); |
97 |
|
return KIJ; |
98 |
|
}; |
99 |
|
|
100 |
|
Data<T> KernelDistances( const vector<size_t> &I, const vector<size_t> &J ) |
101 |
|
{ |
102 |
|
auto KIJ = (*this)( I, J ); |
103 |
|
auto DII = Diagonal( I ); |
104 |
|
auto DJJ = Diagonal( J ); |
105 |
|
for ( size_t j = 0; j < J.size(); j ++ ) |
106 |
|
{ |
107 |
|
for ( size_t i = 0; i < I.size(); i ++ ) |
108 |
|
{ |
109 |
|
auto kij = KIJ( i, j ); |
110 |
|
auto kii = DII[ i ]; |
111 |
|
auto kjj = DJJ[ j ]; |
112 |
|
KIJ( i, j ) = kii - 2.0 * kij + kjj; |
113 |
|
} |
114 |
|
} |
115 |
|
return KIJ; |
116 |
|
}; |
117 |
|
|
118 |
|
Data<T> AngleDistances( const vector<size_t> &I, const vector<size_t> &J ) |
119 |
|
{ |
120 |
|
auto KIJ = (*this)( I, J ); |
121 |
|
auto DII = Diagonal( I ); |
122 |
|
auto DJJ = Diagonal( J ); |
123 |
|
for ( size_t j = 0; j < J.size(); j ++ ) |
124 |
|
{ |
125 |
|
for ( size_t i = 0; i < I.size(); i ++ ) |
126 |
|
{ |
127 |
|
auto kij = KIJ( i, j ); |
128 |
|
auto kii = DII[ i ]; |
129 |
|
auto kjj = DJJ[ j ]; |
130 |
|
KIJ( i, j ) = 1.0 - ( kij * kij ) / ( kii * kjj ); |
131 |
|
} |
132 |
|
} |
133 |
|
return KIJ; |
134 |
|
}; |
135 |
|
|
136 |
|
virtual Data<T> UserDistances( const vector<size_t> &I, const vector<size_t> &J ) |
137 |
|
{ |
138 |
|
printf( "UserDistances(): not defined.\n" ); exit( 1 ); |
139 |
|
return Data<T>( I.size(), J.size(), 0 ); |
140 |
|
}; |
141 |
|
|
142 |
|
virtual Data<T> GeometryDistances( const vector<size_t> &I, const vector<size_t> &J ) |
143 |
|
{ |
144 |
|
printf( "GeometricDistances(): not defined.\n" ); exit( 1 ); |
145 |
|
return Data<T>( I.size(), J.size(), 0 ); |
146 |
|
}; |
147 |
|
|
148 |
|
|
149 |
|
Data<T> Distances( DistanceMetric metric, const vector<size_t> &I, const vector<size_t> &J ) |
150 |
|
{ |
151 |
|
switch ( metric ) |
152 |
|
{ |
153 |
|
case KERNEL_DISTANCE: return KernelDistances( I, J ); |
154 |
|
case ANGLE_DISTANCE: return AngleDistances( I, J ); |
155 |
|
case GEOMETRY_DISTANCE: return GeometryDistances( I, J ); |
156 |
|
case USER_DISTANCE: return UserDistances( I, J ); |
157 |
|
default: |
158 |
|
{ |
159 |
|
printf( "ERROR: Unknown distance type." ); |
160 |
|
exit( 1 ); |
161 |
|
} |
162 |
|
} |
163 |
|
}; |
164 |
|
|
165 |
|
virtual Data<pair<T, size_t>> NeighborSearch( |
166 |
|
DistanceMetric metric, size_t kappa, |
167 |
|
const vector<size_t> &Q, |
168 |
|
const vector<size_t> &R, pair<T, size_t> init ) |
169 |
|
{ |
170 |
|
Data<pair<T, size_t>> NN( kappa, Q.size(), init ); |
171 |
|
|
172 |
|
/** Compute all pairwise distances. */ |
173 |
|
auto DRQ = Distances( metric, R, Q ); |
174 |
|
/** Sanity check: distance must be >= 0. */ |
175 |
|
for ( auto &dij: DRQ ) dij = std::max( dij, T(0) ); |
176 |
|
|
177 |
|
/** Loop over each query. */ |
178 |
|
for ( size_t j = 0; j < Q.size(); j ++ ) |
179 |
|
{ |
180 |
|
vector<pair<T, size_t>> candidates( R.size() ); |
181 |
|
for ( size_t i = 0; i < R.size(); i ++) |
182 |
|
{ |
183 |
|
candidates[ i ].first = DRQ( i, j ); |
184 |
|
candidates[ i ].second = R[ i ]; |
185 |
|
} |
186 |
|
/** Sort the query according to distances. */ |
187 |
|
sort( candidates.begin(), candidates.end() ); |
188 |
|
/** Fill-in the neighbor list. */ |
189 |
|
for ( size_t i = 0; i < kappa; i ++ ) NN( i, j ) = candidates[ i ]; |
190 |
|
} |
191 |
|
|
192 |
|
return NN; |
193 |
|
}; |
194 |
|
|
195 |
|
virtual Data<T> Diagonal( const vector<size_t> &I ) |
196 |
|
{ |
197 |
|
Data<T> DII( I.size(), 1, 0.0 ); |
198 |
|
for ( size_t i = 0; i < I.size(); i ++ ) |
199 |
|
DII[ i ] = (*this)( I[ i ], I[ i ] ); |
200 |
|
return DII; |
201 |
|
}; |
202 |
|
|
203 |
|
virtual pair<T, size_t> ImportantSample( size_t j ) |
204 |
|
{ |
205 |
|
size_t i = std::rand() % m; |
206 |
|
pair<T, size_t> sample( (*this)( i, j ), i ); |
207 |
|
return sample; |
208 |
|
}; /** end ImportantSample() */ |
209 |
|
|
210 |
|
virtual pair<T, int> ImportantSample( int j ) |
211 |
|
{ |
212 |
|
int i = std::rand() % m; |
213 |
|
pair<T, int> sample( (*this)( i, j ), i ); |
214 |
|
return sample; |
215 |
|
}; /** end ImportantSample() */ |
216 |
|
|
217 |
|
private: |
218 |
|
|
219 |
|
size_t m = 0; |
220 |
|
|
221 |
|
size_t n = 0; |
222 |
|
|
223 |
|
}; /** end class VirtualMatrix */ |
224 |
|
|
225 |
|
|
226 |
|
|
227 |
|
#ifdef HMLP_MIC_AVX512 |
228 |
|
template<typename T, class Allocator = hbw::allocator<T> > |
229 |
|
#else |
230 |
|
template<typename T, class Allocator = std::allocator<T> > |
231 |
|
#endif |
232 |
|
/** |
233 |
|
* @brief DistVirtualMatrix is the abstract base class for matrix-free |
234 |
|
* access and operations. Most of the public functions will |
235 |
|
* be virtual. To inherit DistVirtualMatrix, you "must" implement |
236 |
|
* the evaluation operator. Otherwise, the code won't compile. |
237 |
|
* |
238 |
|
* Two virtual functions must be implemented: |
239 |
|
* |
240 |
|
* T operator () ( size_t i, size_t j ), and |
241 |
|
* Data<T> operator () ( vector<size_t> &I, vector<size_t> &J ). |
242 |
|
* |
243 |
|
* These two functions can involve nonblocking MPI routines, but |
244 |
|
* blocking collborative communication routines are not allowed. |
245 |
|
* |
246 |
|
* DistVirtualMatrix inherits mpi::MPIObject, which is initalized |
247 |
|
* with the provided comm. MPIObject duplicates comm into |
248 |
|
* sendcomm and recvcomm, which allows concurrent multi-threaded |
249 |
|
* nonblocking send/recv. |
250 |
|
* |
251 |
|
* For example, RequestKIJ( I, J, p ) sends I and J to rank-p, |
252 |
|
* requesting the submatrix. MPI process rank-p has at least |
253 |
|
* one thread will execute BackGroundProcess( do_terminate ), |
254 |
|
* waiting for incoming requests. |
255 |
|
* Rank-p then invoke K( I, J ) locally, and send the submatrix |
256 |
|
* back to the clients. Overall the pattern usually looks like |
257 |
|
* |
258 |
|
* Data<T> operator () ( vector<size_t> &I, vector<size_t> &J ). |
259 |
|
* { |
260 |
|
* for each submatrix KAB entirely owned by p |
261 |
|
* KAB = RequestKIJ( A, B ) |
262 |
|
* pack KAB back to KIJ |
263 |
|
* |
264 |
|
* return KIJ |
265 |
|
* } |
266 |
|
* |
267 |
|
*/ |
268 |
|
class DistVirtualMatrix : public VirtualMatrix<T, Allocator>, |
269 |
|
public mpi::MPIObject |
270 |
|
{ |
271 |
|
public: |
272 |
|
|
273 |
|
/** (Default) constructor */ |
274 |
|
DistVirtualMatrix( size_t m, size_t n, mpi::Comm comm ) |
275 |
|
: VirtualMatrix<T, Allocator>( m, n ), mpi::MPIObject( comm ) |
276 |
|
{}; |
277 |
|
|
278 |
|
|
279 |
|
}; /** end class DistVirtualMatrix */ |
280 |
|
|
281 |
|
|
282 |
|
}; /** end namespace hmlp */ |
283 |
|
|
284 |
|
#endif /** define VIRTUALMATRIX_HPP */ |