HMLP: High-performance Machine Learning Primitives
kernel_matrix.hpp
1 #ifndef _PVFMM_KERNEL_MATRIX_HPP_
2 #define _PVFMM_KERNEL_MATRIX_HPP_
3 
4 #include <omp.h>
5 
6 #include <pvfmm/cheb_utils.hpp>
7 
8 namespace pvfmm {
9 
10 template <class Real> class KernelMatrix {
11 
12  static const Integer DIM = 3;
13  static const Integer ELEMDIM = DIM - 1;
14  static const Integer Nface = 2 * DIM;
15 
16  public:
17 
18  KernelMatrix(size_t m=0, size_t n=0) {
19  size_t N = std::max(m, n);
20  Integer order = 6, depth = 0;
21  auto& ker = Stokes3D<Real>::FxU();
22  Integer Ncoeff = order * (order + 1) / 2 * ker.Dim(0);
23  depth = log(N / (2.0 * DIM * Ncoeff))/log(4.0);
24  Initialize(order, depth, ker);
25  }
26 
27  Real operator() (size_t i, size_t j) const {
28  return (GetElem(i,j) + GetElem(j,i)) * 0.5; // Symmetric part
29  }
30 
31  Long Rows() const {
32  return row_;
33  }
34 
35  Long Columns() const {
36  return col_;
37  }
38 
39  private:
40 
41  void Initialize(Integer order, Integer depth, const KernelFunction<Real, DIM>& ker) {
42  order_ = order;
43  depth_ = depth;
44  ker_ = &ker;
45  { // Set Ncoeff_
46  Ncoeff_ = 1;
47  for (Integer i = 0; i < ELEMDIM; i++) Ncoeff_ = (Ncoeff_ * (order_ + i)) / (i + 1);
48  }
49  GridDim_ = pvfmm::pow<Long>(2, depth);
50  row_ = Nface * pvfmm::pow<ELEMDIM, Long>(GridDim_) * ker_->Dim(0) * Ncoeff_;
51  col_ = Nface * pvfmm::pow<ELEMDIM, Long>(GridDim_) * ker_->Dim(1) * Ncoeff_;
52  }
53 
54  Real GetElem(Long t, Long s) const {
55  if (t >= row_ || s >= col_) return (s==t ? 1.0 : 0.0);
56 
57  Integer src_coeff_idx = s % (ker_->Dim(0) * Ncoeff_);
58  Integer trg_coeff_idx = t % (ker_->Dim(1) * Ncoeff_);
59 
60  Integer src_elem = s / (pvfmm::pow<ELEMDIM, Long>(GridDim_) * ker_->Dim(0) * Ncoeff_);
61  Integer trg_elem = t / (pvfmm::pow<ELEMDIM, Long>(GridDim_) * ker_->Dim(1) * Ncoeff_);
62 
63  StaticArray<Real, DIM> scoord, tcoord;
64  { // Set scoord
65  Long idx = (s / (ker_->Dim(0) * Ncoeff_)) % pvfmm::pow<ELEMDIM, Long>(GridDim_);
66  Real s = pvfmm::pow<Real>(0.5, depth_);
67  for (Integer i = 0; i < DIM; i++) {
68  if (i == src_elem / 2) {
69  scoord[i] = (src_elem % 2 ? 1.0 - s : 0.0);
70  } else {
71  scoord[i] = (idx % GridDim_) * (1.0 / GridDim_);
72  idx = idx / GridDim_;
73  }
74  }
75  }
76  { // Set tcoord
77  Long idx = (t / (ker_->Dim(1) * Ncoeff_)) % pvfmm::pow<ELEMDIM, Long>(GridDim_);
78  Real s = pvfmm::pow<Real>(0.5, depth_);
79  for (Integer i = 0; i < DIM; i++) {
80  if (i == trg_elem / 2) {
81  tcoord[i] = (trg_elem % 2 ? 1.0 - s : 0.0);
82  } else {
83  tcoord[i] = (idx % GridDim_) * (1.0 / GridDim_);
84  idx = idx / GridDim_;
85  }
86  }
87  }
88 
89  return GetMatrixElem(order_, scoord, src_elem, tcoord, trg_elem, src_coeff_idx, trg_coeff_idx);
90  }
91 
92  Real GetMatrixElem(Integer order, ConstIterator<Real> scoord, Integer src_elem, ConstIterator<Real> tcoord, Integer trg_elem, Long src_coeff_idx, Long trg_coeff_idx) const {
93  Real elem = 0;
94  StaticArray<Long, DIM + 2> idx_arr;
95  for (Integer i = 0; i < DIM; i++) idx_arr[i] = (tcoord[i] - scoord[i]) * GridDim_;
96  if ( 0 || std::abs(idx_arr[0]) < 500
97  && std::abs(idx_arr[1]) < 500
98  && std::abs(idx_arr[2]) < 500 )
99  { // Store near-interaction matrices
100  idx_arr[DIM + 0] = src_elem;
101  idx_arr[DIM + 1] = trg_elem;
102  Long idx = cantor_signed(idx_arr, DIM + 2);
103  #pragma omp critical
104  { // Set elem
105  if (mat.find(idx) == mat.end()) { // Compute and store new matrix
106  mat[idx] = Integ(order_, scoord, src_elem, depth_, tcoord, trg_elem, depth_, *ker_);
107  }
108  elem = mat[idx][src_coeff_idx][trg_coeff_idx];
109  }
110  } else { // TODO: implement optimized algorithm for well-separated octants
111  elem = IntegFar(order_, scoord, src_elem, depth_, tcoord, trg_elem, depth_, *ker_, src_coeff_idx, trg_coeff_idx);
112  //Matrix<Real> M = Integ(order_, scoord, src_elem, depth_, tcoord, trg_elem, depth_, *ker_);
113  //auto elem_ = M[src_coeff_idx][trg_coeff_idx];
114  //static Real max_err = 0;
115  //max_err = std::max(fabs(elem_-elem), max_err);
116  //std::cout<<max_err<<'\n';
117  }
118  return elem;
119  }
120 
121  static void ReferenceElemNodes(Integer order, Vector<Vector<Real>> &ref_coord) {
122  ref_coord.ReInit(Nface);
123  Long Neval = pvfmm::pow<Long>(order, ELEMDIM);
124  { // Set ref_coord
125  Vector<Real> ref_coord_;
126  ChebBasis<Real>::template Nodes<ELEMDIM>(order, ref_coord_);
127  assert(ref_coord_.Dim() == Neval * ELEMDIM);
128  if (ELEMDIM == DIM) {
129  assert(Nface == 1);
130  ref_coord[0].Swap(ref_coord_);
131  } else if (ELEMDIM == DIM - 1) {
132  for (Integer elem = 0; elem < Nface; elem++) {
133  Integer k0 = (elem >> 1);
134  ref_coord[elem].ReInit(Neval * DIM);
135  for (Long j = 0; j < Neval; j++) {
136  for (Integer k = 0; k < ELEMDIM; k++) {
137  ref_coord[elem][j * DIM + ((k + k0 + 1) % DIM)] = ref_coord_[j * ELEMDIM + k];
138  }
139  }
140  if (elem & 1) {
141  for (Long j = 0; j < Neval; j++) {
142  ref_coord[elem][j * DIM + k0] = 1;
143  }
144  } else {
145  for (Long j = 0; j < Neval; j++) {
146  ref_coord[elem][j * DIM + k0] = 0;
147  }
148  }
149  }
150  }
151  }
152  }
153 
154  static Vector<Real> ChebNodes(ConstIterator<Real> coord, Integer elem, Integer depth, Integer order) {
155  static Vector<Vector<Real>> ref_nodes;
156  #pragma omp critical (REF_NODES_CRITICAL)
157  if (!ref_nodes.Dim()) {
158  ReferenceElemNodes(order, ref_nodes);
159  }
160 
161  Vector<Real> nodes;
162  nodes = ref_nodes[elem];
163 
164  Integer N = nodes.Dim() / DIM;
165  Real s = pvfmm::pow<Real>(0.5, depth);
166  for (Integer i = 0; i < N; i++) {
167  for (Integer j = DIM - DIM; j < DIM; j++) {
168  nodes[i * DIM + j] = nodes[i * DIM + j] * s + coord[j];
169  }
170  }
171  return nodes;
172  }
173 
174  static Matrix<Real> Integ(Integer order, ConstIterator<Real> scoord, Integer src_elem, Integer src_depth, ConstIterator<Real> tcoord, Integer trg_elem, Integer trg_depth, const KernelFunction<Real, DIM> &ker, Real tol = -1) {
175  Matrix<Real> Mcoeff2nodes;
176  { // Set Mcoeff2nodes
177  Vector<Real> trg_nodes = ChebNodes(tcoord, trg_elem, trg_depth, order);
178  Integer Ntrg = trg_nodes.Dim() / DIM;
179  PVFMM_ASSERT(Ntrg);
180 
181  for (Integer i = 0; i < Ntrg; i++) { // Shift trg_nodes by scoord
182  for (Integer j = 0; j < DIM; j++) {
183  trg_nodes[i * DIM + j] -= scoord[j];
184  }
185  }
186 
187  Vector<Matrix<Real>> Mcoeff(Ntrg);
188  Real s = pvfmm::pow<Real>(0.5, src_depth);
189  #pragma omp parallel for schedule(dynamic)
190  for (Integer i = 0; i < Ntrg; i++) {
191  ChebBasis<Real>::template Integ<DIM, ELEMDIM>(Mcoeff[i], order, trg_nodes.Begin() + i * DIM, s, src_elem, ker, tol);
192  }
193 
194  Mcoeff2nodes.ReInit(Mcoeff[0].Dim(0), Mcoeff[0].Dim(1) * Ntrg);
195  for (Integer j = 0; j < Mcoeff[0].Dim(0); j++) {
196  for (Integer i0 = 0; i0 < Mcoeff[0].Dim(1); i0++) {
197  for (Integer i1 = 0; i1 < Ntrg; i1++) {
198  Mcoeff2nodes[j][i1 * Mcoeff[0].Dim(1) + i0] = Mcoeff[i1][j][i0];
199  }
200  }
201  }
202  }
203 
204  Matrix<Real> Mcoeff2coeff;
205  { // Set Mcoeff2coeff
206  Integer Ntrg = Mcoeff2nodes.Dim(1) / ker.Dim(1);
207  assert(Ntrg * ker.Dim(1) == Mcoeff2nodes.Dim(1));
208  for (Integer i = 0; i < Mcoeff2nodes.Dim(0); i++) {
209  Matrix<Real> M(Ntrg, ker.Dim(1), Mcoeff2nodes[i], false);
210  M = M.Transpose();
211  }
212 
213  Vector<Real> coeff;
214  const Vector<Real> fn_v(Mcoeff2nodes.Dim(0) * Mcoeff2nodes.Dim(1), Mcoeff2nodes.Begin(), false);
215  ChebBasis<Real>::template Approx<ELEMDIM>(order, fn_v, coeff);
216  ChebBasis<Real>::template Truncate<ELEMDIM>(coeff, order, order);
217  Mcoeff2coeff.ReInit(Mcoeff2nodes.Dim(0), coeff.Dim() / Mcoeff2nodes.Dim(0), coeff.Begin());
218  }
219 
220  return Mcoeff2coeff;
221  }
222 
223  static Long cantor_idx(ConstIterator<Long> x, Integer N) {
224  Long idx = 0;
225  for (Integer d = 0; d < N; d++) {
226  Long sum = 0;
227  for (Integer i = 0; i < N - d; i++) {
228  sum += std::abs(x[i]);
229  }
230  Long offset = 1;
231  for (Integer i = 0; i < N - d; i++) {
232  offset = offset * (sum + i) / (i + 1);
233  }
234  idx += offset;
235  }
236  return idx;
237  }
238 
239  static Long cantor_signed(ConstIterator<Long> x, Integer N) {
240  Long idx = cantor_idx(x, N);
241  for (Integer i = 0; i < N; i++) {
242  idx = 2 * idx + (x[i] < 0 ? 0 : 1);
243  }
244  return idx;
245  }
246 
247 
248 
249 
250  template <Integer DIM, Integer SUBDIM> static Real IntegFar(Integer order, const Vector<Real>& r_trg, Real side, Integer src_face, const KernelFunction<Real, DIM>& ker, Integer src_coeff_idx, Integer trg_coeff_idx) {
251  static const Real eps = ChebBasis<Real>::machine_eps() * 64;
252  Real side_inv = 1.0 / side;
253  Integer Nq = 6;
254 
255  static Vector<Real> qp, qw, p0;
256  #pragma omp critical
257  if (!qp.Dim()) {
258  ChebBasis<Real>::quad_rule(Nq, qp, qw);
259  ChebBasis<Real>::EvalBasis1D(order, qp, p0);
260  }
261 
262  Integer Ncoeff;
263  { // Set Ncoeff
264  Ncoeff = 1;
265  for (Integer i = 0; i < SUBDIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
266  }
267  StaticArray<Integer, 2> kdim;
268  kdim[0] = ker.Dim(0);
269  kdim[1] = ker.Dim(1);
270 
271  Integer sidx0=0, sidx1=0, sidx2=0;
272  Integer tidx0=0, tidx1=0, tidx2=0;
273  { // Set sidx
274  sidx2 = src_coeff_idx / Ncoeff;
275  Integer idx=0, sidx = src_coeff_idx % Ncoeff;
276  for (Integer i0=0;i0<order;i0++){
277  for (Integer i1=0;i0+i1<order;i1++){
278  if(idx==sidx) {
279  sidx0 = i1;
280  sidx1 = i0;
281  }
282  idx++;
283  }
284  }
285  }
286  { // Set tidx
287  tidx2 = trg_coeff_idx / Ncoeff;
288  Integer idx=0, tidx = trg_coeff_idx % Ncoeff;
289  for (Integer i0=0;i0<order;i0++){
290  for (Integer i1=0;i0+i1<order;i1++){
291  if(idx==tidx) {
292  tidx0 = i1;
293  tidx1 = i0;
294  }
295  idx++;
296  }
297  }
298  }
299 
300  PVFMM_ASSERT(SUBDIM==2 && DIM==3);
301  Vector<Real> n_src(pvfmm::pow<SUBDIM, Integer>(Nq)*DIM);
302  Vector<Real> r_src(pvfmm::pow<SUBDIM, Integer>(Nq)*DIM);
303  Vector<Real> v_src(pvfmm::pow<SUBDIM, Integer>(Nq)*kdim[0]);
304  n_src.SetZero();
305  v_src.SetZero();
306  for (Integer i0=0;i0<Nq;i0++){
307  for (Integer i1=0;i1<Nq;i1++){
308  n_src[(i0 * Nq + i1) * DIM + src_face >> 1] = (src_face & 1 ? -1.0 : 1.0);
309  r_src[(i0 * Nq + i1) * DIM + ((src_face>>1)+0)%DIM] = (src_face & 1 ? 1.0 : 0.0) * side;
310  r_src[(i0 * Nq + i1) * DIM + ((src_face>>1)+1)%DIM] = qp[i0] * side;
311  r_src[(i0 * Nq + i1) * DIM + ((src_face>>1)+2)%DIM] = qp[i1] * side;
312  v_src[(i0 * Nq + i1) * kdim[0] + sidx2] = p0[sidx0 * Nq + i0] * p0[sidx1 * Nq + i1] * qw[i0] * qw[i1]*side*side;
313  }
314  }
315 
316  Long Ntrg = r_trg.Dim() / DIM;
317  Vector<Real> v_trg(Ntrg * kdim[1]);
318  v_trg.SetZero();
319  ker(r_src, n_src, v_src, r_trg, v_trg);
320 
321  Vector<Real> fn_v(Ntrg), coeff;
322  for (Long i=0;i<Ntrg;i++) fn_v[i] = v_trg[i * kdim[1] + tidx2];
323  ChebBasis<Real>::template Approx<ELEMDIM>(order, fn_v, coeff);
324  return coeff[trg_coeff_idx % Ncoeff];
325  }
326 
327  static Real IntegFar(Integer order, ConstIterator<Real> scoord, Integer src_elem, Integer src_depth, ConstIterator<Real> tcoord, Integer trg_elem, Integer trg_depth, const KernelFunction<Real, DIM> &ker, Integer src_coeff_idx, Integer trg_coeff_idx) {
328  Vector<Real> trg_nodes = ChebNodes(tcoord, trg_elem, trg_depth, order);
329  Integer Ntrg = trg_nodes.Dim() / DIM; PVFMM_ASSERT(Ntrg);
330  for (Integer i = 0; i < Ntrg; i++) {
331  for (Integer j = 0; j < DIM; j++) {
332  trg_nodes[i * DIM + j] -= scoord[j];
333  }
334  }
335  Real s = pvfmm::pow<Real>(0.5, src_depth);
336  return IntegFar<DIM, ELEMDIM>(order, trg_nodes, s, src_elem, ker, src_coeff_idx, trg_coeff_idx);
337  }
338 
339  mutable std::map<Long, Matrix<Real>> mat;
340  const KernelFunction<Real, DIM>* ker_;
341  Integer order_, depth_, Ncoeff_;
342  Long GridDim_, row_, col_;
343 };
344 
345 } // end namespace
346 
347 #endif //_PVFMM_KERNEL_MATRIX_HPP_
Definition: cheb_utils.hpp:12
Definition: cheb_utils.hpp:1394
Definition: kernel_matrix.hpp:10
Definition: matrix.hpp:15
Definition: kernel.hpp:1010
Definition: matrix.hpp:12
static void EvalBasis1D(Integer order, const Vector< ValueType > &x, Vector< ValueType > &y)
Returns the values of all Chebyshev polynomials up to degree d, evaluated at points in the input vect...
Definition: cheb_utils.hpp:1406