HMLP: High-performance Machine Learning Primitives
cheb_utils.hpp
1 #ifndef _PVFMM_CHEB_UTILS_HPP_
2 #define _PVFMM_CHEB_UTILS_HPP_
3 
4 #include <pvfmm/kernel.hpp>
5 #include <pvfmm/matrix.hpp>
6 #include <pvfmm/vector.hpp>
7 #include <pvfmm/common.hpp>
8 #include <pvfmm/legendre_rule.hpp>
9 
10 #include <type_traits>
11 
12 namespace pvfmm {
13 
14 template <class ValueType, class Derived> class BasisInterface {
15 
16  public:
17  template <Integer DIM> static void Nodes(Integer order, Vector<ValueType>& nodes) {
18  if (DIM == 1) {
19  Derived::Nodes1D(order, nodes);
20  return;
21  }
22 
23  Vector<ValueType> nodes1d;
24  Derived::Nodes1D(order, nodes1d);
25 
26  Integer order_DIM = pvfmm::pow<Integer>(order, DIM);
27  if (nodes.Dim() != order_DIM * DIM) nodes.ReInit(order_DIM * DIM);
28 
29  StaticArray<Integer, DIM> idx;
30  for (Integer i = 0; i < DIM; i++) idx[i] = 0;
31  Integer itr = 0;
32  for (Integer j = 0; j < order_DIM; j++) {
33  for (Integer i = 0; i < DIM; i++) {
34  if (idx[i] == order) idx[i] = 0;
35  nodes[itr + i] = nodes1d[idx[i]];
36  }
37  itr += DIM;
38  idx[0]++;
39  for (Integer i = 1; i < DIM; i++) {
40  if (idx[i - 1] == order) idx[i]++;
41  }
42  }
43  }
44 
50  template <Integer DIM> static void Approx(Integer order, const Vector<ValueType>& fn_v, Vector<ValueType>& coeff) {
52  { // Precompute
53  static Vector<Matrix<ValueType>> precomp(1000);
54  PVFMM_ASSERT(order < precomp.Dim());
55  if (precomp[order].Dim(0) * precomp[order].Dim(1) == 0) {
56  #pragma omp critical(BASIS_APPROX)
57  if (precomp[order].Dim(0) * precomp[order].Dim(1) == 0) {
58  Vector<ValueType> x, p;
59  Derived::Nodes1D(order, x);
60  Derived::EvalBasis1D(order, x, p);
61  Matrix<ValueType> Mp1(order, order, p.Begin(), false);
62  Mp1.pinv().Swap(precomp[order]);
63  }
64  }
65  Mp.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].Begin(), false);
66  }
67 
68  Integer order_DIM = pvfmm::pow<Integer>(order, DIM);
69  Integer order_DIM_ = pvfmm::pow<Integer>(order, DIM - 1);
70  Long dof = fn_v.Dim() / order_DIM;
71  PVFMM_ASSERT(fn_v.Dim() == dof * order_DIM);
72 
73  // Create work buffers
74  Long buff_size = dof * order_DIM;
75  Vector<ValueType> buff(2 * buff_size);
76  Iterator<ValueType> buff1 = buff.Begin() + buff_size * 0;
77  Iterator<ValueType> buff2 = buff.Begin() + buff_size * 1;
78 
79  Vector<ValueType> fn(order_DIM * dof, (Iterator<ValueType>)fn_v.Begin(), false);
80  for (Integer k = 0; k < DIM; k++) { // Apply Mp along k-dimension
81  Matrix<ValueType> Mi(dof * order_DIM_, order, fn.Begin(), false);
82  Matrix<ValueType> Mo(dof * order_DIM_, order, buff2, false);
83  Matrix<ValueType>::GEMM(Mo, Mi, Mp);
84 
85  Matrix<ValueType> Mo_t(order, dof * order_DIM_, buff1, false);
86  for (Long i = 0; i < Mo.Dim(0); i++) {
87  for (Long j = 0; j < Mo.Dim(1); j++) {
88  Mo_t[j][i] = Mo[i][j];
89  }
90  }
91  fn.ReInit(order_DIM * dof, buff1, false);
92  }
93 
94  { // Rearrange and write to coeff
95  Vector<ValueType> tensor(order_DIM * dof, buff1, false);
96  tensor2coeff<DIM>(order, tensor, coeff);
97  }
98  }
99 
100  template <Integer DIM> static void Approx_(Integer order, const Vector<ValueType>& fn_v, Vector<ValueType>& coeff, ValueType scale) {
102  { // Precompute
103  static Vector<Matrix<ValueType>> precomp(1000);
104  PVFMM_ASSERT(order < precomp.Dim());
105  if (precomp[order].Dim(0) * precomp[order].Dim(1) == 0) {
106  #pragma omp critical(BASIS_APPROX)
107  if (precomp[order].Dim(0) * precomp[order].Dim(1) == 0) {
108  Vector<ValueType> x, p;
109  Derived::Nodes1D(order, x);
110  for (Integer i = 0; i < order; i++) x[i] = (x[i] - 0.5) * scale + 0.5;
111  Derived::EvalBasis1D(order, x, p);
112  Matrix<ValueType> Mp1(order, order, p.Begin(), false);
113  Mp1.pinv().Swap(precomp[order]);
114  }
115  }
116  Mp.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].Begin(), false);
117  }
118 
119  Integer order_DIM = pvfmm::pow<Integer>(order, DIM);
120  Integer order_DIM_ = pvfmm::pow<Integer>(order, DIM - 1);
121  Long dof = fn_v.Dim() / order_DIM;
122  PVFMM_ASSERT(fn_v.Dim() == dof * order_DIM);
123 
124  // Create work buffers
125  Long buff_size = dof * order_DIM;
126  Vector<ValueType> buff(2 * buff_size);
127  Iterator<ValueType> buff1 = buff.Begin() + buff_size * 0;
128  Iterator<ValueType> buff2 = buff.Begin() + buff_size * 1;
129 
130  Vector<ValueType> fn(order_DIM * dof, (Iterator<ValueType>)fn_v.Begin(), false);
131  for (Integer k = 0; k < DIM; k++) { // Apply Mp along k-dimension
132  Matrix<ValueType> Mi(dof * order_DIM_, order, fn.Begin(), false);
133  Matrix<ValueType> Mo(dof * order_DIM_, order, buff2, false);
134  Matrix<ValueType>::GEMM(Mo, Mi, Mp);
135 
136  Matrix<ValueType> Mo_t(order, dof * order_DIM_, buff1, false);
137  for (Long i = 0; i < Mo.Dim(0); i++) {
138  for (Long j = 0; j < Mo.Dim(1); j++) {
139  Mo_t[j][i] = Mo[i][j];
140  }
141  }
142  fn.ReInit(order_DIM * dof, buff1, false);
143  }
144 
145  { // Rearrange and write to coeff
146  Vector<ValueType> tensor(order_DIM * dof, buff1, false);
147  tensor2coeff<DIM>(order, tensor, coeff);
148  }
149  }
150 
157  template <Integer DIM> static void Eval(Integer order, const Vector<ValueType>& coeff, ConstIterator<Vector<ValueType>> in_x, Vector<ValueType>& out) {
158  Integer Ncoeff = 1;
159  for (Integer i = 0; i < DIM; i++) {
160  Ncoeff = (Ncoeff * (order + i)) / (i + 1);
161  }
162  Long dof = coeff.Dim() / Ncoeff;
163  PVFMM_ASSERT(coeff.Dim() == Ncoeff * dof);
164 
165  // Precomputation
166  Long buff_size = dof;
167  StaticArray<Matrix<ValueType>, DIM> Mp;
168  for (Integer i = 0; i < DIM; i++) {
169  Integer n = in_x[i].Dim();
170  if (!n) return;
171  Mp[i].ReInit(order, n);
172  Vector<ValueType> p(order * n, Mp[i].Begin(), false);
173  Derived::EvalBasis1D(order, in_x[i], p);
174  buff_size *= std::max(order, n);
175  }
176 
177  // Create work buffers
178  Vector<ValueType> buff(2 * buff_size);
179  Iterator<ValueType> buff1 = buff.Begin() + buff_size * 0;
180  Iterator<ValueType> buff2 = buff.Begin() + buff_size * 1;
181 
182  { // Rearrange coefficients into a tensor.
183  Vector<ValueType> tensor(dof * pvfmm::pow<Integer>(order, DIM), buff1, false);
184  coeff2tensor<DIM>(order, coeff, tensor);
185  }
186 
187  { // ReInit out
188  Long len = dof;
189  for (Integer i = 0; i < DIM; i++) len *= in_x[i].Dim();
190  if (out.Dim() != len) out.ReInit(len);
191  }
192 
193  for (Integer k = 0; k < DIM; k++) { // Apply Mp along k-dimension
194  Integer order_DIM = pvfmm::pow<Integer>(order, DIM - k - 1);
195  for (Integer i = 0; i < k; i++) order_DIM *= in_x[i].Dim();
196 
197  Matrix<ValueType> Mi(dof * order_DIM, order, buff1, false);
198  Matrix<ValueType> Mo(dof * order_DIM, in_x[k].Dim(), buff2, false);
199  Matrix<ValueType>::GEMM(Mo, Mi, Mp[k]);
200 
201  Matrix<ValueType> Mo_t(in_x[k].Dim(), dof * order_DIM, buff1, false);
202  if (k == DIM - 1) Mo_t.ReInit(in_x[k].Dim(), dof * order_DIM, out.Begin(), false);
203  for (Long i = 0; i < Mo.Dim(0); i++) {
204  for (Long j = 0; j < Mo.Dim(1); j++) {
205  Mo_t[j][i] = Mo[i][j];
206  }
207  }
208  }
209  }
210 
216  template <Integer DIM> static ValueType TruncErr(Integer order, const Vector<ValueType>& coeff) {
217  Integer Ncoeff = 1;
218  { // Set Ncoeff
219  for (Integer i = 0; i < DIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
220  }
221  Long dof = coeff.Dim() / Ncoeff;
222  PVFMM_ASSERT(coeff.Dim() == Ncoeff * dof);
223 
224  ValueType err = 0;
225  for (Long l = 0; l < dof; l++) { // TODO: optimize this
226  Long offset0 = l * Ncoeff;
227 
228  Integer indx0 = 0;
229  Integer indx1 = 0;
230  StaticArray<Integer, DIM + 1> i0;
231  for (Integer i = 0; i <= DIM; i++) i0[i] = 0;
232 
233  Integer sum = 0;
234  while (1) {
235  if (sum < order) {
236  if (sum == order - 1) err += pvfmm::fabs<ValueType>(coeff[offset0 + indx0]);
237  indx0++;
238  }
239  indx1++;
240  sum++;
241 
242  i0[0]++;
243  for (Integer j = 0; j < DIM && i0[j] == order; j++) {
244  i0[j] = 0;
245  i0[j + 1]++;
246  sum = sum + 1 - order;
247  }
248  if (i0[DIM]) break;
249  }
250  }
251 
252  return err;
253  }
254 
260  template <Integer DIM> static void Grad(Integer order, const Vector<ValueType>& coeff_in, Vector<ValueType>* coeff_out) {
261  Integer Ncoeff = 1;
262  for (Integer i = 0; i < DIM; i++) {
263  Ncoeff = (Ncoeff * (order + i)) / (i + 1);
264  }
265  Long dof = coeff_in.Dim() / Ncoeff;
266  PVFMM_ASSERT(coeff_in.Dim() == Ncoeff * dof);
267 
268  Matrix<ValueType> Mdiff;
269  { // Precompute
270  static Vector<Matrix<ValueType>> precomp(1000);
271  PVFMM_ASSERT(order < precomp.Dim());
272  if (precomp[order].Dim(0) * precomp[order].Dim(1) == 0) {
273  #pragma omp critical(BASIS_GRAD)
274  if (precomp[order].Dim(0) * precomp[order].Dim(1) == 0) {
276  diff_1d(order, &M);
277  M.Swap(precomp[order]);
278  }
279  }
280  Mdiff.ReInit(precomp[order].Dim(0), precomp[order].Dim(1), precomp[order].Begin(), false);
281  }
282 
283  // Create work buffers
284  Long buff_size = dof * pvfmm::pow<Integer>(order, DIM);
285  Vector<ValueType> buff((3 + DIM) * buff_size);
286  Vector<ValueType> buff0(buff_size * 1, buff.Begin() + buff_size * 0, false);
287  Vector<ValueType> buff1(buff_size * 1, buff.Begin() + buff_size * 1, false);
288  Vector<ValueType> buff2(buff_size * 1, buff.Begin() + buff_size * 2, false);
289  Vector<ValueType> buff3(buff_size * DIM, buff.Begin() + buff_size * 3, false);
290 
291  { // buff0 <-- coeff2tensor(coeff_in);
292  coeff2tensor<DIM>(order, coeff_in, buff0);
293  }
294 
295  for (Integer k = 0; k < DIM; k++) { // buff2 <-- Grad(buff0)
296  Long N0 = pvfmm::pow<Integer>(order, k);
297  Long N1 = order;
298  Long N2 = pvfmm::pow<Integer>(order, DIM - k - 1);
299 
300  for (Long i3 = 0; i3 < dof; i3++) { // buff1 <-- transpose(buff0)
301  for (Long i2 = 0; i2 < N2; i2++) {
302  for (Long i1 = 0; i1 < N1; i1++) {
303  for (Long i0 = 0; i0 < N0; i0++) {
304  buff1[((i3 * N2 + i2) * N0 + i0) * N1 + i1] = buff0[((i3 * N2 + i2) * N1 + i1) * N0 + i0];
305  }
306  }
307  }
308  }
309 
310  { // buff2 <-- buff1 * Mdiff
311  Matrix<ValueType> Mi(dof * N0 * N2, N1, buff1.Begin(), false);
312  Matrix<ValueType> Mo(dof * N0 * N2, N1, buff2.Begin(), false);
313  Matrix<ValueType>::GEMM(Mo, Mi, Mdiff);
314  }
315 
316  for (Long i3 = 0; i3 < dof; i3++) { // buff3 <-- transpose(buff2)
317  for (Long i2 = 0; i2 < N2; i2++) {
318  for (Long i1 = 0; i1 < N1; i1++) {
319  for (Long i0 = 0; i0 < N0; i0++) {
320  buff3[(((i2 * N1 + i1) * N0 + i0) * dof + i3) * DIM + k] = buff2[((i3 * N2 + i2) * N0 + i0) * N1 + i1];
321  }
322  }
323  }
324  }
325  }
326 
327  { // coeff_out <-- tensor2coeff(buff2);
328  tensor2coeff<DIM>(order, buff3, *coeff_out);
329  }
330  }
331 
332  template <Integer DIM, Integer SUBDIM> static void Integ(Matrix<ValueType>& Mcoeff, Integer order, ConstIterator<ValueType> trg_, ValueType side, Integer src_face, const KernelFunction<ValueType, DIM>& ker, ValueType tol = -1, Integer Nq = 0) {
333  if (!Nq) Nq = order;
334  Integ_<DIM, SUBDIM>(Mcoeff, order, trg_, side, src_face, ker, Nq);
335  if (tol < 0) tol = 1e-10; //machine_eps() * 256;
336  ValueType err = tol + 1;
337  Matrix<ValueType> Mtmp;
338  while (err > tol) {
339  err = 0;
340  ValueType max_val = pvfmm::pow<SUBDIM>(side);
341  Nq = std::max((Integer)(Nq * 1.26), Nq + 1);
342  Integ_<DIM, SUBDIM>(Mtmp, order, trg_, side, src_face, ker, Nq);
343  for (Integer i = 0; i < Mtmp.Dim(0) * Mtmp.Dim(1); i++) {
344  err = std::max(err, pvfmm::fabs<ValueType>(Mtmp[0][i] - Mcoeff[0][i]));
345  max_val = std::max(max_val, pvfmm::fabs<ValueType>(Mtmp[0][i]));
346  }
347  err /= max_val;
348  Mcoeff = Mtmp;
349  if (Nq>200) {
350  PVFMM_WARN("Failed to converge, error = "<<err);
351  break;
352  }
353  }
354  Mcoeff = Mcoeff.Transpose();
355  }
356 
357  template <Integer DIM> static void tensor2coeff(Integer order, const Vector<ValueType>& tensor, Vector<ValueType>& coeff) {
358  Integer Ncoeff = 1, Ntensor = pvfmm::pow<Integer>(order, DIM);
359  for (Integer i = 0; i < DIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
360  Long dof = tensor.Dim() / Ntensor;
361  PVFMM_ASSERT(tensor.Dim() == Ntensor * dof);
362  if (coeff.Dim() != Ncoeff * dof) coeff.ReInit(Ncoeff * dof);
363 
364  for (Long l = 0; l < dof; l++) { // TODO: optimize this
365  Long offset0 = l * Ncoeff;
366 
367  Integer indx0 = 0;
368  Integer indx1 = 0;
369  StaticArray<Integer, DIM + 1> i0;
370  for (Integer i = 0; i <= DIM; i++) i0[i] = 0;
371 
372  Integer sum = 0;
373  while (1) {
374  if (sum < order) {
375  coeff[offset0 + indx0] = tensor[l + indx1 * dof];
376  indx0++;
377  }
378  indx1++;
379  sum++;
380 
381  i0[0]++;
382  for (Integer j = 0; j < DIM && i0[j] == order; j++) {
383  i0[j] = 0;
384  i0[j + 1]++;
385  sum = sum + 1 - order;
386  }
387  if (i0[DIM]) break;
388  }
389  }
390  }
391 
392  template <Integer DIM> static void coeff2tensor(Integer order, const Vector<ValueType>& coeff, Vector<ValueType>& tensor) {
393  Integer Ncoeff = 1, Ntensor = pvfmm::pow<Integer>(order, DIM);
394  for (Integer i = 0; i < DIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
395  Long dof = coeff.Dim() / Ncoeff;
396  PVFMM_ASSERT(coeff.Dim() == Ncoeff * dof);
397  if (tensor.Dim() != Ntensor * dof) tensor.ReInit(Ntensor * dof);
398 
399  for (Long l = 0; l < dof; l++) { // TODO: optimize this
400  Long offset0 = l * Ncoeff;
401  Long offset1 = l * Ntensor;
402 
403  Integer indx0 = 0;
404  Integer indx1 = 0;
405  StaticArray<Integer, DIM + 1> i0;
406  for (Integer i = 0; i <= DIM; i++) i0[i] = 0;
407 
408  Integer sum = 0;
409  while (1) {
410  if (sum < order) {
411  tensor[offset1 + indx1] = coeff[offset0 + indx0];
412  indx0++;
413  } else {
414  tensor[offset1 + indx1] = 0;
415  }
416  indx1++;
417  sum++;
418 
419  i0[0]++;
420  for (Integer j = 0; j < DIM && i0[j] == order; j++) {
421  i0[j] = 0;
422  i0[j + 1]++;
423  sum = sum + 1 - order;
424  }
425  if (i0[DIM]) break;
426  }
427  }
428  }
429 
430  template <Integer DIM> static void Truncate(Vector<ValueType> &coeff0, Integer order0, Integer order1) {
431  PVFMM_ASSERT(order1 <= order0);
432  Integer Ncoeff0 = 1, Ncoeff1 = 1;
433  for (Integer i = 0; i < DIM; i++) Ncoeff0 = (Ncoeff0 * (order0 + i)) / (i + 1);
434  for (Integer i = 0; i < DIM; i++) Ncoeff1 = (Ncoeff1 * (order1 + i)) / (i + 1);
435 
436  Long dof = coeff0.Dim() / Ncoeff0;
437  PVFMM_ASSERT(coeff0.Dim() == Ncoeff0 * dof);
438  Vector<ValueType> coeff1(dof * Ncoeff1);
439  coeff1.SetZero();
440 
441  for (Long l = 0; l < dof; l++) { // TODO: optimize this
442  Long offset0 = l * Ncoeff0;
443  Long offset1 = l * Ncoeff1;
444 
445  Integer indx0 = 0;
446  Integer indx1 = 0;
447  StaticArray<Integer, DIM + 1> i0;
448  for (Integer i = 0; i <= DIM; i++) i0[i] = 0;
449 
450  Integer sum = 0;
451  while (1) {
452  if (sum < order1) coeff1[offset1 + indx1] = coeff0[offset0 + indx0];
453  if (sum < order0) indx0++;
454  if (sum < order1) indx1++;
455  sum++;
456 
457  i0[0]++;
458  for (Integer j = 0; j < DIM && i0[j] == order0; j++) {
459  i0[j] = 0;
460  i0[j + 1]++;
461  sum = sum + 1 - order0;
462  }
463  if (i0[DIM]) break;
464  }
465  }
466  coeff0 = coeff1;
467  }
468 
469  template <Integer DIM> static void Reflect(Vector<ValueType> &coeff, Integer order, Integer dir) {
470  PVFMM_ASSERT(dir < DIM);
471  Integer Ncoeff = 1;
472  for (Integer i = 0; i < DIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
473 
474  Long dof = coeff.Dim() / Ncoeff;
475  PVFMM_ASSERT(coeff.Dim() == Ncoeff * dof);
476 
477  for (Long l = 0; l < dof; l++) { // TODO: optimize this
478  Long offset = l * Ncoeff;
479 
480  Integer indx = 0;
481  StaticArray<Integer, DIM + 1> i0;
482  for (Integer i = 0; i <= DIM; i++) i0[i] = 0;
483 
484  Integer sum = 0;
485  while (1) {
486  if (sum < order) coeff[offset + indx] = coeff[offset + indx] * (i0[dir] % 2 ? -1 : 1) * (1);
487  if (sum < order) indx++;
488  sum++;
489 
490  i0[0]++;
491  for (Integer j = 0; j < DIM && i0[j] == order; j++) {
492  i0[j] = 0;
493  i0[j + 1]++;
494  sum = sum + 1 - order;
495  }
496  if (i0[DIM]) break;
497  }
498  }
499  }
500 
501  template <Integer DIM, Integer CONTINUITY> static void MakeContinuous(Vector<ValueType> &coeff0, Vector<ValueType> &coeff1, Integer order, Integer dir0, Integer dir1) {
502  if (dir0>=2*DIM || dir1>=2*DIM) return;
503  Integer Ncoeff = 1;
504  for (Integer i = 0; i < DIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
505 
506  Long dof = coeff0.Dim() / Ncoeff;
507  PVFMM_ASSERT(coeff0.Dim() == Ncoeff * dof);
508  PVFMM_ASSERT(coeff1.Dim() == Ncoeff * dof);
509 
510  static Matrix<Matrix<ValueType>> M(2*DIM, 2*DIM);
511  if (M[dir0][dir1].Dim(0) != 2 * Ncoeff) {
512  Integer Ngrid = pvfmm::pow<Integer>(order, DIM - 1);
513  Vector<ValueType> nodes;
514  Nodes<1>(order, nodes);
515 
516  Matrix<ValueType> M_diff(2*Ncoeff, Ngrid);
517  { // Set M_diff
518  M_diff.SetZero();
519  StaticArray<Vector<ValueType>, DIM> nodes_;
520  for (Integer i = 0; i < DIM; i++) { // Set nodes_
521  nodes_[i].ReInit(nodes.Dim(), nodes.Begin(), false);
522  }
523  Vector<ValueType> nodes0, nodes1;
524  nodes0.PushBack(0);
525  nodes1.PushBack(1);
526 
527  Vector<ValueType> value;
528  Vector<ValueType> coeff(Ncoeff);
529  coeff.SetZero();
530  for (Integer i = 0; i < Ncoeff; i++) {
531  coeff[i]=0.5;
532  value.ReInit(Ngrid, M_diff[i + Ncoeff * 0], false);
533  nodes_[dir0/2].ReInit(1, (dir0 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
534  Eval<DIM>(order, coeff, nodes_, value);
535  nodes_[dir0/2].ReInit(nodes.Dim(), nodes.Begin(), false);
536 
537  coeff[i]=-0.5;
538  value.ReInit(Ngrid, M_diff[i + Ncoeff * 1], false);
539  nodes_[dir1/2].ReInit(1, (dir1 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
540  Eval<DIM>(order, coeff, nodes_, value);
541  nodes_[dir1/2].ReInit(nodes.Dim(), nodes.Begin(), false);
542 
543  coeff[i]=0;
544  }
545  }
546 
547  Matrix<ValueType> M_grad(2 * Ncoeff, 2 * Ncoeff);
548  { // Set M_grad
549  M_grad.SetZero();
550  Vector<ValueType> coeff(Ncoeff * Ncoeff), coeff_grad;
551  coeff.SetZero();
552  for(Integer i = 0; i < Ncoeff; i++) coeff[i * Ncoeff + i] = 1;
553  Grad<DIM>(order, coeff, &coeff_grad);
554  for (Integer i = 0; i < Ncoeff; i++){
555  for (Integer j = 0; j < Ncoeff; j++){
556  M_grad[i + Ncoeff * 0][j + Ncoeff * 0] = coeff_grad[j + (i * DIM + dir0/2) * Ncoeff];
557  M_grad[i + Ncoeff * 1][j + Ncoeff * 1] = coeff_grad[j + (i * DIM + dir1/2) * Ncoeff];
558  }
559  }
560  }
561 
562  auto fn_perturb = [&](std::function<ValueType(ValueType)> fn, bool even) { // Set M0
563  Matrix<ValueType> M0(Ngrid, 2 * Ncoeff);
564  M0.SetZero();
565  { // dir0
566  Integer N0=pvfmm::pow<Integer>(order, dir0/2);
567  Integer N1=pvfmm::pow<Integer>(order, 1);
568  Integer N2=pvfmm::pow<Integer>(order, DIM - dir0/2 - 1);
569  PVFMM_ASSERT(N0 * N2 == Ngrid);
570  Vector<ValueType> val(Ngrid * order), coeff;
571  val.SetZero();
572  for (Integer i0=0;i0<N0;i0++){
573  for (Integer i2=0;i2<N2;i2++){
574  for (Integer i1=0;i1<N1;i1++){
575  val[(i2*N1+i1)*N0+i0] = (dir0 & 1 ? fn(nodes[i1]) : fn(1.0 - nodes[i1])) * (even ? 1.0 : -1.0);
576  }
577  coeff.ReInit(Ncoeff, M0[i2 * N0 + i0] + Ncoeff * 0, false);
578  Approx<DIM>(order, val, coeff);
579  for (Integer i1=0;i1<N1;i1++){
580  val[(i2*N1+i1)*N0+i0] = 0;
581  }
582  }
583  }
584  }
585  { // dir1
586  Integer N0=pvfmm::pow<Integer>(order, dir1/2);
587  Integer N1=pvfmm::pow<Integer>(order, 1);
588  Integer N2=pvfmm::pow<Integer>(order, DIM - dir1/2 - 1);
589  PVFMM_ASSERT(N0 * N2 == Ngrid);
590  Vector<ValueType> val(Ngrid * order), coeff;
591  val.SetZero();
592  for (Integer i0=0;i0<N0;i0++){
593  for (Integer i2=0;i2<N2;i2++){
594  for (Integer i1=0;i1<N1;i1++){
595  val[(i2*N1+i1)*N0+i0] = (dir1 & 1 ? fn(nodes[i1]) : fn(1.0 - nodes[i1]));
596  }
597  coeff.ReInit(Ncoeff, M0[i2 * N0 + i0] + Ncoeff * 1, false);
598  Approx<DIM>(order, val, coeff);
599  for (Integer i1=0;i1<N1;i1++){
600  val[(i2*N1+i1)*N0+i0] = 0;
601  }
602  }
603  }
604  }
605  return M0;
606  };
607 
608  if (CONTINUITY == 0) {
609  auto fn0 = [](ValueType x) {return x;};
610  Matrix<ValueType> M0 = fn_perturb(fn0, 0);
611  M[dir0][dir1] = M_diff * M0;
612  } else if (CONTINUITY == 1) {
613  auto fn0 = [](ValueType x) {return (-2*x + 3) * x * x;};
614  auto fn1 = [](ValueType x) {return (1 - x) * x * x;};
615  Matrix<ValueType> M0 = fn_perturb(fn0, 0);
616  Matrix<ValueType> M1 = fn_perturb(fn1, 1);
617  M[dir0][dir1] = M_diff * M0 + M_grad * M_diff * M1;
618  } else if (CONTINUITY == 2) {
619  auto fn0 = [](ValueType x) {return x*x*x*(6*x*x-15*x+10);};
620  auto fn1 = [](ValueType x) {return x*x*x*(-3*x*x+7*x-4);};
621  auto fn2 = [](ValueType x) {return x*x*x*(0.5*x*x-1*x+0.5);};
622  Matrix<ValueType> M0 = fn_perturb(fn0, 0);
623  Matrix<ValueType> M1 = fn_perturb(fn1, 1);
624  Matrix<ValueType> M2 = fn_perturb(fn2, 0);
625  M[dir0][dir1] = M_diff * M0 + M_grad * M_diff * M1 + M_grad * M_grad * M_diff * M2;
626  }
627 
628  for (Integer i=0;i<2*Ncoeff;i++){
629  M[dir0][dir1][i][i]+=1.0;
630  }
631 
632  if(0){
633  //Matrix<ValueType> Mgrid2coeff;
634  //{ // Set Mgrid2coeff
635  // Integer Ngrid = pvfmm::pow<Integer>(order, DIM);
636  // Matrix<ValueType> M(Ngrid, Ncoeff);
637  // Vector<ValueType> val(Ngrid);
638  // val.SetZero();
639  // for (Integer i=0;i<Ngrid;i++) {
640  // val[i]=1.0;
641  // Vector<ValueType> coeff(Ncoeff, M[i], false);
642  // Approx<DIM>(order, val, coeff);
643  // val[i]=0.0;
644  // }
645 
646  // Mgrid2coeff.ReInit(2*Ngrid, 2*Ncoeff);
647  // Mgrid2coeff.SetZero();
648  // for(Integer i=0;i<Ngrid;i++){
649  // for(Integer j=0;j<Ncoeff;j++){
650  // Mgrid2coeff[i+Ngrid*0][j+Ncoeff*0]=M[i][j];
651  // Mgrid2coeff[i+Ngrid*1][j+Ncoeff*1]=M[i][j];
652  // }
653  // }
654  //}
655 
656  //Matrix<ValueType> Mcoeff2grid;
657  //{ // Set Mgrid2coeff
658  // StaticArray<Vector<ValueType>, DIM> nodes_;
659  // for (Integer i = 0; i < DIM; i++) { // Set nodes_
660  // nodes_[i].ReInit(nodes.Dim(), nodes.Begin(), false);
661  // }
662 
663  // Integer Ngrid = pvfmm::pow<Integer>(order, DIM);
664  // Matrix<ValueType> M(Ncoeff, Ngrid);
665  // Vector<ValueType> coeff(Ncoeff);
666  // coeff.SetZero();
667  // for (Integer i=0;i<Ncoeff;i++) {
668  // coeff[i]=1.0;
669  // Vector<ValueType> val(Ngrid, M[i], false);
670  // Eval<DIM>(order, coeff, nodes_, val);
671  // coeff[i]=0.0;
672  // }
673 
674  // Mcoeff2grid.ReInit(2*Ncoeff, 2*Ngrid);
675  // Mcoeff2grid.SetZero();
676  // for(Integer i=0;i<Ncoeff;i++){
677  // for(Integer j=0;j<Ngrid;j++){
678  // Mcoeff2grid[i+Ncoeff*0][j+Ngrid*0]=M[i][j];
679  // Mcoeff2grid[i+Ncoeff*1][j+Ngrid*1]=M[i][j];
680  // }
681  // }
682  //}
683 
684  //if(0){
685  // Integer Ngrid0 = Ngrid*order;
686  // Matrix<ValueType> MM(2*Ngrid0 + 2*Ngrid, 2*Ngrid0);
687  // MM.SetZero();
688  // for (Integer i=0;i<2*Ngrid0;i++) MM[i][i]=1;
689  // Matrix<ValueType> M0_(Ngrid, 2 * Ngrid0, MM[2 * Ngrid0 + Ngrid * 0], false); M0_ = (Mgrid2coeff * M_diff).Transpose();
690  // Matrix<ValueType> M1_(Ngrid, 2 * Ngrid0, MM[2 * Ngrid0 + Ngrid * 1], false); M1_ = (Mgrid2coeff * M_grad * M_diff).Transpose();
691  // for (Long i=0;i<2*Ngrid*2*Ngrid0;i++) MM[0][2*Ngrid0*2*Ngrid0 +i] *= 10000;
692  // MM = MM.Transpose().pinv();
693  // M[dir].ReInit(2 * Ngrid0, 2 * Ngrid0, MM.Begin());
694  // M[dir] = Mcoeff2grid * M[dir] * Mgrid2coeff;
695  //} else {
696  // PVFMM_ASSERT(DIM==2);
697  // Vector<ValueType> coeff_weight;
698  // for (Integer i=0;i<order;i++) {
699  // for (Integer j=0;j<order;j++) {
700  // if(i+j<order) coeff_weight.PushBack(pvfmm::pow<ValueType>(1.5, i+j)*1e-4);
701  // }
702  // }
703  // PVFMM_ASSERT(coeff_weight.Dim()==Ncoeff);
704 
705  // auto M0_ = M_diff.Transpose();
706  // auto M1_ = (M_grad * M_diff).Transpose();
707 
708  // Matrix<ValueType> MM(2*Ncoeff + 6*Ngrid, 2*Ncoeff);
709  // MM.SetZero();
710  // for (Integer i=0;i<Ncoeff;i++) {
711  // MM[i+Ncoeff*0][i+Ncoeff*0]=coeff_weight[i];
712  // MM[i+Ncoeff*1][i+Ncoeff*1]=coeff_weight[i];
713  // }
714  // for (Integer i=0;i<Ngrid;i++) {
715  // for (Integer j=0;j<Ncoeff;j++) {
716  // MM[2 * Ncoeff + 0 * Ngrid +i][0 * Ncoeff + j] = M0_[0 * Ngrid + i][0 * Ncoeff + j];
717  // MM[2 * Ncoeff + 0 * Ngrid +i][1 * Ncoeff + j] = M0_[0 * Ngrid + i][1 * Ncoeff + j];
718 
719  // MM[2 * Ncoeff + 1 * Ngrid +i][0 * Ncoeff + j] = M1_[0 * Ngrid + i][0 * Ncoeff + j];
720  // MM[2 * Ncoeff + 1 * Ngrid +i][1 * Ncoeff + j] = M1_[0 * Ngrid + i][1 * Ncoeff + j];
721 
722  // MM[2 * Ncoeff + 2 * Ngrid +i][0 * Ncoeff + j] = M0_[0 * Ngrid + i][1 * Ncoeff + j];
723  // MM[2 * Ncoeff + 3 * Ngrid +i][1 * Ncoeff + j] = M0_[0 * Ngrid + i][0 * Ncoeff + j];
724 
725  // MM[2 * Ncoeff + 4 * Ngrid +i][0 * Ncoeff + j] = M1_[0 * Ngrid + i][1 * Ncoeff + j];
726  // MM[2 * Ncoeff + 5 * Ngrid +i][1 * Ncoeff + j] = M1_[0 * Ngrid + i][0 * Ncoeff + j];
727  // }
728  // }
729 
730  // Matrix<ValueType> MMM(2*Ncoeff + 6*Ngrid, 2*Ncoeff);
731  // MMM.SetZero();
732  // for (Integer i=0;i<Ncoeff;i++) {
733  // MMM[i+Ncoeff*0][i+Ncoeff*0]=coeff_weight[i];
734  // MMM[i+Ncoeff*1][i+Ncoeff*1]=coeff_weight[i];
735  // }
736  // for (Integer i=0;i<Ngrid;i++) {
737  // for (Integer j=0;j<Ncoeff;j++) {
738  // // MMM[2 * Ncoeff + 0 * Ngrid +i][0 * Ncoeff + j] = M0_[0 * Ngrid + i][0 * Ncoeff + j];
739  // // MMM[2 * Ncoeff + 0 * Ngrid +i][1 * Ncoeff + j] = M0_[0 * Ngrid + i][1 * Ncoeff + j];
740 
741  // // MMM[2 * Ncoeff + 1 * Ngrid +i][0 * Ncoeff + j] = M1_[0 * Ngrid + i][0 * Ncoeff + j];
742  // // MMM[2 * Ncoeff + 1 * Ngrid +i][1 * Ncoeff + j] = M1_[0 * Ngrid + i][1 * Ncoeff + j];
743 
744  // MMM[2 * Ncoeff + 2 * Ngrid +i][0 * Ncoeff + j] = M0_[0 * Ngrid + i][1 * Ncoeff + j];
745  // MMM[2 * Ncoeff + 3 * Ngrid +i][1 * Ncoeff + j] = M0_[0 * Ngrid + i][0 * Ncoeff + j];
746 
747  // MMM[2 * Ncoeff + 4 * Ngrid +i][0 * Ncoeff + j] = M1_[0 * Ngrid + i][1 * Ncoeff + j];
748  // MMM[2 * Ncoeff + 5 * Ngrid +i][1 * Ncoeff + j] = M1_[0 * Ngrid + i][0 * Ncoeff + j];
749  // }
750  // }
751 
752 
753  // M[dir] = (MM.pinv(1e-10) * MMM).Transpose();
754  //}
755  //M[dir]=M[dir]*M[dir];
771  }
772  }
773 
774  Matrix<ValueType> x(dof, 2 * Ncoeff), y(dof, 2 * Ncoeff);
775  for (Long i = 0; i < dof; i++) {
776  for (Integer j = 0; j < Ncoeff; j++) {
777  x[i][Ncoeff * 0 + j] = coeff0[i * Ncoeff + j];
778  x[i][Ncoeff * 1 + j] = coeff1[i * Ncoeff + j];
779  }
780  }
781  Matrix<ValueType>::GEMM(y, x, M[dir0][dir1]);
782  for (Long i = 0; i < dof; i++) {
783  for (Integer j = 0; j < Ncoeff; j++) {
784  coeff0[i * Ncoeff + j] = y[i][Ncoeff * 0 + j];
785  coeff1[i * Ncoeff + j] = y[i][Ncoeff * 1 + j];
786  }
787  }
788  }
789 
790  template <Integer DIM, Integer CONTINUITY> static void MakeContinuousEdge(Vector<ValueType> &coeff0, Vector<ValueType> &coeff1, Integer order, Integer dir0, Integer dir1, Integer norm0, Integer norm1) {
791  PVFMM_ASSERT(DIM==2);
792  if (dir0>=2*DIM || dir1>=2*DIM) return;
793  Integer Ncoeff = 1;
794  for (Integer i = 0; i < DIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
795 
796  Long dof = coeff0.Dim() / Ncoeff;
797  PVFMM_ASSERT(coeff0.Dim() == Ncoeff * dof);
798  PVFMM_ASSERT(coeff1.Dim() == Ncoeff * dof);
799 
800  static Matrix<Matrix<ValueType>> M(2*DIM, 2*DIM);
801  static Matrix<Matrix<ValueType>> MM(2*DIM, 2*DIM);
802  if (M[dir0][dir1].Dim(0) != 2 * Ncoeff) {
803  Integer Ngrid = pvfmm::pow<Integer>(order, DIM - 1);
804  Vector<ValueType> nodes;
805  Nodes<1>(order, nodes);
806 
807  Matrix<ValueType> Mtrunc(2*Ncoeff, 2*Ncoeff);
808  { // Set Mtrunc
810  w.SetZero();
811  for (Integer i=0;i<order;i++){
812  for (Integer j=0;j<order;j++){
813  if (i+j<order) {
814  w.PushBack(i<order-CONTINUITY*2-1 && j<order-CONTINUITY*2-1);
815  }
816  }
817  }
818  Mtrunc.SetZero();
819  for (Integer i=0;i<Ncoeff;i++){
820  Mtrunc[i + Ncoeff * 0][i + Ncoeff * 0] = w[i];
821  Mtrunc[i + Ncoeff * 1][i + Ncoeff * 1] = w[i];
822  }
823  }
824 
825  Matrix<ValueType> M_diff(2*Ncoeff, Ngrid);
826  { // Set M_diff
827  M_diff.SetZero();
828  StaticArray<Vector<ValueType>, DIM> nodes_;
829  for (Integer i = 0; i < DIM; i++) { // Set nodes_
830  nodes_[i].ReInit(nodes.Dim(), nodes.Begin(), false);
831  }
832  Vector<ValueType> nodes0, nodes1;
833  nodes0.PushBack(0);
834  nodes1.PushBack(1);
835 
836  Vector<ValueType> value;
837  Vector<ValueType> coeff(Ncoeff);
838  coeff.SetZero();
839  for (Integer i = 0; i < Ncoeff; i++) {
840  coeff[i]=0.5;
841  value.ReInit(Ngrid, M_diff[i + Ncoeff * 0], false);
842  nodes_[dir0/2].ReInit(1, (dir0 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
843  Eval<DIM>(order, coeff, nodes_, value);
844  nodes_[dir0/2].ReInit(nodes.Dim(), nodes.Begin(), false);
845 
846  coeff[i]=-0.5;
847  value.ReInit(Ngrid, M_diff[i + Ncoeff * 1], false);
848  nodes_[dir1/2].ReInit(1, (dir1 & 1 ? nodes1.Begin() : nodes0.Begin()), false);
849  Eval<DIM>(order, coeff, nodes_, value);
850  nodes_[dir1/2].ReInit(nodes.Dim(), nodes.Begin(), false);
851 
852  coeff[i]=0;
853  }
854  }
855 
856  Matrix<ValueType> M_grad(2 * Ncoeff, 2 * Ncoeff);
857  { // Set M_grad
858  M_grad.SetZero();
859  Vector<ValueType> coeff(Ncoeff * Ncoeff), coeff_grad;
860  coeff.SetZero();
861  for(Integer i = 0; i < Ncoeff; i++) coeff[i * Ncoeff + i] = 1;
862  Grad<DIM>(order, coeff, &coeff_grad);
863  for (Integer i = 0; i < Ncoeff; i++){
864  for (Integer j = 0; j < Ncoeff; j++){
865  M_grad[i + Ncoeff * 0][j + Ncoeff * 0] = coeff_grad[j + (i * DIM + dir0/2) * Ncoeff];
866  M_grad[i + Ncoeff * 1][j + Ncoeff * 1] = coeff_grad[j + (i * DIM + dir1/2) * Ncoeff];
867  }
868  }
869  }
870 
871  auto fn_perturb = [&](std::function<ValueType(ValueType)> fn, bool even) { // Set M0
872  Matrix<ValueType> M0(Ngrid, 2 * Ncoeff);
873  M0.SetZero();
874  { // dir0
875  Integer N0=pvfmm::pow<Integer>(order, dir0/2);
876  Integer N1=pvfmm::pow<Integer>(order, 1);
877  Integer N2=pvfmm::pow<Integer>(order, DIM - dir0/2 - 1);
878  PVFMM_ASSERT(N0 * N2 == Ngrid);
879  Vector<ValueType> val(Ngrid * order), coeff;
880  val.SetZero();
881  for (Integer i0=0;i0<N0;i0++){
882  for (Integer i2=0;i2<N2;i2++){
883  for (Integer i1=0;i1<N1;i1++){
884  val[(i2*N1+i1)*N0+i0] = (dir0 & 1 ? fn(nodes[i1]) : fn(1.0 - nodes[i1])) * (even ? 1.0 : -1.0);
885  }
886  coeff.ReInit(Ncoeff, M0[i2 * N0 + i0] + Ncoeff * 0, false);
887  Approx<DIM>(order, val, coeff);
888  for (Integer i1=0;i1<N1;i1++){
889  val[(i2*N1+i1)*N0+i0] = 0;
890  }
891  }
892  }
893  }
894  { // dir1
895  Integer N0=pvfmm::pow<Integer>(order, dir1/2);
896  Integer N1=pvfmm::pow<Integer>(order, 1);
897  Integer N2=pvfmm::pow<Integer>(order, DIM - dir1/2 - 1);
898  PVFMM_ASSERT(N0 * N2 == Ngrid);
899  Vector<ValueType> val(Ngrid * order), coeff;
900  val.SetZero();
901  for (Integer i0=0;i0<N0;i0++){
902  for (Integer i2=0;i2<N2;i2++){
903  for (Integer i1=0;i1<N1;i1++){
904  val[(i2*N1+i1)*N0+i0] = (dir1 & 1 ? fn(nodes[i1]) : fn(1.0 - nodes[i1]));
905  }
906  coeff.ReInit(Ncoeff, M0[i2 * N0 + i0] + Ncoeff * 1, false);
907  Approx<DIM>(order, val, coeff);
908  for (Integer i1=0;i1<N1;i1++){
909  val[(i2*N1+i1)*N0+i0] = 0;
910  }
911  }
912  }
913  }
914  return M0;
915  };
916 
917  Matrix<ValueType> Mfilter[2];
918  { // Set Mfilter
919  Mfilter[0].ReInit(2*Ncoeff, 2*Ncoeff);
920  Mfilter[1].ReInit(2*Ncoeff, 2*Ncoeff);
921  Mfilter[0].SetZero();
922  Mfilter[1].SetZero();
923  for (Integer i=0;i<Ncoeff;i++) {
924  Mfilter[0][i + Ncoeff * 0][i + Ncoeff * 0] = 1;
925  Mfilter[1][i + Ncoeff * 1][i + Ncoeff * 1] = 1;
926  }
927  }
928 
929  if (CONTINUITY == 0) {
930  auto fn0 = [](ValueType x) {return x;};
931  Matrix<ValueType> M0 = fn_perturb(fn0, 0);
932  M[dir0][dir1] = M_diff * M0;
933  } else if (CONTINUITY == 1) {
934  auto fn0 = [](ValueType x) {return (-2*x + 3) * x * x;};
935  auto fn1 = [](ValueType x) {return (1 - x) * x * x;};
936  Matrix<ValueType> M0 = fn_perturb(fn0, 0);
937  Matrix<ValueType> M1 = fn_perturb(fn1, 1);
938  M[dir0][dir1] = M_diff * M0;
939  if (dir0 & 1) M[dir0][dir1] += (M_grad+M_grad) * (Mfilter[0] * M_diff * M1 * Mfilter[0]);
940  else M[dir0][dir1] -= (M_grad+M_grad) * (Mfilter[0] * M_diff * M1 * Mfilter[0]);
941  if (dir1 & 1) M[dir0][dir1] -= (M_grad+M_grad) * (Mfilter[1] * M_diff * M1 * Mfilter[1]);
942  else M[dir0][dir1] += (M_grad+M_grad) * (Mfilter[1] * M_diff * M1 * Mfilter[1]);
943  }
944  if (CONTINUITY == 1) {
945  auto fn1 = [](ValueType x) {return (1 - x) * x * x;};
946  Matrix<ValueType> M1 = fn_perturb(fn1, 1);
947  MM[dir0][dir1] = M_grad * M_diff * M1;
948 
949  for (Integer i=0;i<Ncoeff;i++){
950  for (Integer j=0;j<2*Ncoeff;j++){
951  MM[dir0][dir1][i + Ncoeff*1][j]*=-1;
952  }
953  }
954 
955  for (Integer i=0;i<2*Ncoeff;i++){
956  for (Integer j=0;j<Ncoeff;j++){
957  MM[dir0][dir1][i][j + Ncoeff*0]*=(dir0 & 1 ? 1.0 : -1.0);
958  MM[dir0][dir1][i][j + Ncoeff*1]*=(dir1 & 1 ? 1.0 : -1.0);
959  }
960  }
961  }
962 
963  for (Integer i=0;i<2*Ncoeff;i++){
964  M[dir0][dir1][i][i]+=1.0;
965  MM[dir0][dir1][i][i]+=1.0;
966  }
967  M[dir0][dir1] = Mtrunc * M[dir0][dir1] * Mtrunc;
968  MM[dir0][dir1] = Mtrunc * MM[dir0][dir1] * Mtrunc;
969 
970  for (Integer i=0;i<10;i++) {
971  M[dir0][dir1]=M[dir0][dir1]*M[dir0][dir1];
972  }
973  }
974 
975  Matrix<ValueType> x(dof, 2 * Ncoeff), y(dof, 2 * Ncoeff);
976  for (Long i = 0; i < dof; i++) {
977  for (Integer j = 0; j < Ncoeff; j++) {
978  x[i][Ncoeff * 0 + j] = coeff0[i * Ncoeff + j];
979  x[i][Ncoeff * 1 + j] = coeff1[i * Ncoeff + j];
980  }
981  }
982  Matrix<ValueType>::GEMM(y, x, M[dir0][dir1]);
983  {
984  Matrix<ValueType> xx(1, 2*Ncoeff), yy(1, 2*Ncoeff);
985  for (Integer j = 0; j < Ncoeff; j++) {
986  xx[0][Ncoeff * 0 + j] = coeff0[norm0 * Ncoeff + j];
987  xx[0][Ncoeff * 1 + j] = coeff1[norm1 * Ncoeff + j];
988  }
989  Matrix<ValueType>::GEMM(yy, xx, MM[dir0][dir1]);
990  for (Integer j = 0; j < Ncoeff; j++) {
991  y[norm0][Ncoeff * 0 + j] = yy[0][Ncoeff * 0 + j];
992  y[norm1][Ncoeff * 1 + j] = yy[0][Ncoeff * 1 + j];
993  }
994  }
995  for (Long i = 0; i < dof; i++) {
996  for (Integer j = 0; j < Ncoeff; j++) {
997  coeff0[i * Ncoeff + j] = y[i][Ncoeff * 0 + j];
998  coeff1[i * Ncoeff + j] = y[i][Ncoeff * 1 + j];
999  }
1000  }
1001  }
1002 
1003 
1004  //private:
1005  BasisInterface() {
1006  void (*EvalBasis1D)(Integer, const Vector<ValueType>&, Vector<ValueType>&) = Derived::EvalBasis1D;
1007  void (*Nodes1D)(Integer, Vector<ValueType>&) = Derived::Nodes1D;
1008  }
1009 
1010  static void cheb_nodes_1d(Integer order, Vector<ValueType>& nodes) {
1011  if (nodes.Dim() != order) nodes.ReInit(order);
1012  for (Integer i = 0; i < order; i++) {
1013  nodes[i] = -pvfmm::cos<ValueType>((i + (ValueType)0.5) * pvfmm::const_pi<ValueType>() / order) * 0.5 + 0.5;
1014  }
1015  }
1016 
1017  static void cheb_basis_1d(Integer order, const Vector<ValueType>& x, Vector<ValueType>& y) {
1018  Integer n = x.Dim();
1019  if (y.Dim() != order * n) y.ReInit(order * n);
1020 
1021  if (order > 0) {
1022  for (Long i = 0; i < n; i++) {
1023  y[i] = 1.0;
1024  }
1025  }
1026  if (order > 1) {
1027  for (Long i = 0; i < n; i++) {
1028  y[i + n] = x[i] * 2.0 - 1.0;
1029  }
1030  }
1031  for (Integer i = 2; i < order; i++) {
1032  for (Long j = 0; j < n; j++) {
1033  y[i * n + j] = 2 * y[n + j] * y[i * n - 1 * n + j] - y[i * n - 2 * n + j];
1034  }
1035  }
1036  }
1037 
1038  static void quad_rule(Integer order, Vector<ValueType>& x, Vector<ValueType>& w) {
1039  static Vector<Vector<ValueType>> x_lst(10000);
1040  static Vector<Vector<ValueType>> w_lst(x_lst.Dim());
1041  PVFMM_ASSERT(order < x_lst.Dim());
1042 
1043  if (x.Dim() != order) x.ReInit(order);
1044  if (w.Dim() != order) w.ReInit(order);
1045 
1046  bool done = false;
1047  #pragma omp critical(QUAD_RULE)
1048  if (x_lst[order].Dim()) {
1049  Vector<ValueType>& x_ = x_lst[order];
1050  Vector<ValueType>& w_ = w_lst[order];
1051  for (Integer i = 0; i < order; i++) {
1052  x[i] = x_[i];
1053  w[i] = w_[i];
1054  }
1055  done = true;
1056  }
1057  if (done) return;
1058 
1059  Vector<ValueType> x_(order);
1060  Vector<ValueType> w_(order);
1061  if (std::is_same<ValueType, double>::value || std::is_same<ValueType, float>::value) { // Gauss-Legendre quadrature nodes and weights
1062  Vector<double> xd(order);
1063  Vector<double> wd(order);
1064  int kind = 1;
1065  double alpha = 0.0, beta = 0.0, a = -1.0, b = 1.0;
1066  cgqf(order, kind, (double)alpha, (double)beta, (double)a, (double)b, &xd[0], &wd[0]);
1067  for (Integer i = 0; i < order; i++) {
1068  x_[i] = 0.5 * xd[i] + 0.5;
1069  w_[i] = 0.5 * wd[i];
1070  }
1071  } else { // Chebyshev quadrature nodes and weights
1072  cheb_nodes_1d(order, x_);
1073  for (Integer i = 0; i < order; i++) w_[i] = 0;
1074 
1075  Vector<ValueType> V_cheb(order * order);
1076  cheb_basis_1d(order, x_, V_cheb);
1077  for (size_t i = 0; i < order; i++) V_cheb[i] /= 2.0;
1078  Matrix<ValueType> M(order, order, V_cheb.Begin());
1079 
1080  Vector<ValueType> w_sample(order);
1081  w_sample.SetZero();
1082  for (Integer i = 0; i < order; i += 2) w_sample[i] = -((ValueType)2.0 / (i + 1) / (i - 1));
1083 
1084  for (Integer i = 0; i < order; i++) {
1085  for (Integer j = 0; j < order; j++) {
1086  w_[j] += M[i][j] * w_sample[i] / order;
1087  }
1088  }
1089  }
1090  #pragma omp critical(QUAD_RULE)
1091  if (!x_lst[order].Dim()) { // Set x_lst, w_lst
1092  x_lst[order].Swap(x_);
1093  w_lst[order].Swap(w_);
1094  }
1095  quad_rule(order, x, w);
1096  }
1097 
1098  static ValueType machine_eps() {
1099  ValueType eps = 1.0;
1100  while (eps + (ValueType)1.0 > 1.0) eps *= 0.5;
1101  return eps;
1102  }
1103 
1104  template <Integer DIM, Integer SUBDIM> static void Integ_(Matrix<ValueType>& Mcoeff, Integer order, ConstIterator<ValueType> trg_, ValueType side, Integer src_face, const KernelFunction<ValueType, DIM>& ker, Integer Nq = 0) {
1105  static const ValueType eps = machine_eps() * 64;
1106  ValueType side_inv = 1.0 / side;
1107  if (!Nq) Nq = order;
1108 
1109  Vector<ValueType> qp, qw;
1110  quad_rule(Nq, qp, qw);
1111 
1112  Integer Ncoeff;
1113  { // Set Ncoeff
1114  Ncoeff = 1;
1115  for (Integer i = 0; i < SUBDIM; i++) Ncoeff = (Ncoeff * (order + i)) / (i + 1);
1116  }
1117  StaticArray<Integer, 2> kdim;
1118  kdim[0] = ker.Dim(0);
1119  kdim[1] = ker.Dim(1);
1120 
1121  StaticArray<Integer, DIM> perm0;
1122  StaticArray<ValueType, DIM> trg; // target after rotation
1123  { // Set perm0
1124  PVFMM_ASSERT(0 <= src_face && src_face < 2 * DIM);
1125  if (SUBDIM == DIM - 1) {
1126  for (Integer i = 0; i < DIM; i++) {
1127  perm0[i] = (i + (src_face >> 1) + 1) % DIM;
1128  }
1129  } else {
1130  for (Integer i = 0; i < DIM; i++) {
1131  perm0[i] = i;
1132  }
1133  }
1134  for (Integer i = 0; i < DIM; i++) trg[i] = trg_[perm0[i]];
1135  if (SUBDIM == DIM - 1) trg[DIM - 1] -= side * (src_face & 1);
1136  }
1137 
1139  { // Set r
1140  Vector<ValueType> r_;
1141  r_.PushBack(0);
1142  for (Integer i = 0; i < SUBDIM; i++) {
1143  r_.PushBack(fabs(trg[i] - 0.0));
1144  r_.PushBack(fabs(trg[i] - side));
1145  }
1146  std::sort(r_.Begin(), r_.Begin() + r_.Dim());
1147 
1148  ValueType r0, r1 = r_[r_.Dim() - 1];
1149  r0 = (r1 > side ? r1 - side : 0.0);
1150  for (Integer i = SUBDIM; i < DIM; i++) r0 = std::max(r0, fabs(trg[i]));
1151  if (r0 > eps) r.PushBack(-r0);
1152  r.PushBack(r0);
1153 
1154  for (Integer i = 0; i < r_.Dim(); i++) {
1155  if (r_[i] > r0) {
1156  while (r[r.Dim() - 1] > 0.0 && 3.0 * r[r.Dim() - 1] < r_[i]) r.PushBack(3.0 * r[r.Dim() - 1]);
1157  r.PushBack(r_[i]);
1158  }
1159  }
1160  }
1161 
1162  // Work vectors
1163  StaticArray<Vector<ValueType>, SUBDIM> eval_mesh;
1164  StaticArray<Vector<ValueType>, SUBDIM> eval_poly;
1165  Vector<ValueType> eval_coord_tmp;
1166  Vector<ValueType> eval_coord;
1167  Vector<ValueType> kern_value;
1168 
1169  // Temporary vectors
1170  Vector<ValueType> r_src, n_src, v_src;
1171  { // Init r_src, n_src, v_src
1172  r_src.ReInit(DIM);
1173  for (Integer k = 0; k < DIM; k++) r_src[k] = 0.0;
1174  if (SUBDIM == DIM - 1) {
1175  n_src.ReInit(DIM);
1176  for (Integer k = 0; k < DIM; k++) n_src[k] = 0.0;
1177  n_src[src_face >> 1] = (src_face & 1 ? -1.0 : 1.0);
1178  }
1179  v_src.ReInit(kdim[0]);
1180  }
1181  Vector<ValueType> v0;
1182  Vector<ValueType> v1;
1183 
1184  Matrix<ValueType> Mtensor(kdim[1] * kdim[0], pvfmm::pow<Integer>(order, SUBDIM));
1185  Mtensor.SetZero();
1186 
1187  for (Integer i0 = 0; i0 < r.Dim() - 1; i0++) { // for each layer
1188  for (Integer i1 = 0; i1 < 2 * SUBDIM; i1++) { // for each direction
1189  StaticArray<ValueType, 2 * SUBDIM> range0;
1190  StaticArray<ValueType, 2 * SUBDIM> range1;
1191  { // Set range0, range1
1192  for (Integer k = 0; k < SUBDIM; k++) {
1193  if (i1 >> 1 == k) {
1194  ValueType s = (i1 & 1 ? 1.0 : -1.0);
1195  range0[k * 2 + 0] = trg[k] + s * r[i0 + 0];
1196  range0[k * 2 + 1] = trg[k] + s * r[i0 + 0];
1197  range1[k * 2 + 0] = trg[k] + s * r[i0 + 1];
1198  range1[k * 2 + 1] = trg[k] + s * r[i0 + 1];
1199  } else {
1200  range0[k * 2 + 0] = trg[k] - fabs(r[i0 + 0]);
1201  range0[k * 2 + 1] = trg[k] + fabs(r[i0 + 0]);
1202  range1[k * 2 + 0] = trg[k] - fabs(r[i0 + 1]);
1203  range1[k * 2 + 1] = trg[k] + fabs(r[i0 + 1]);
1204  }
1205  }
1206  for (Integer k = 0; k < 2 * SUBDIM; k++) {
1207  if (range0[k] > side) range0[k] = side;
1208  if (range0[k] < 0.0) range0[k] = 0.0;
1209  if (range1[k] > side) range1[k] = side;
1210  if (range1[k] < 0.0) range1[k] = 0.0;
1211  }
1212 
1213  bool continue_flag = false;
1214  for (Integer k = 0; k < SUBDIM; k++) { // continue if volume if 0
1215  if (i1 >> 1 == k) {
1216  if (fabs(range0[2 * k + 0] - range1[2 * k + 0]) < eps && fabs(range0[2 * k + 1] - range1[2 * k + 1]) < eps) {
1217  continue_flag = true;
1218  break;
1219  }
1220  } else {
1221  if (fabs(range0[2 * k + 0] - range0[2 * k + 1]) < eps && fabs(range1[2 * k + 0] - range1[2 * k + 1]) < eps) {
1222  continue_flag = true;
1223  break;
1224  }
1225  }
1226  }
1227  if (continue_flag) continue;
1228  }
1229  for (Integer i2 = 0; i2 < Nq; i2++) { // for each quadrature point
1230  StaticArray<ValueType, 2 * SUBDIM> range;
1231  for (Integer k = 0; k < 2 * SUBDIM; k++) { // Set range
1232  range[k] = range0[k] + (range1[k] - range0[k]) * qp[i2];
1233  }
1234  for (Integer k = 0; k < SUBDIM; k++) { // Set eval_mesh
1235  if (k == (i1 >> 1)) {
1236  eval_mesh[k].ReInit(1);
1237  eval_mesh[k][0] = range[2 * k];
1238  } else {
1239  eval_mesh[k].ReInit(Nq);
1240  for (Integer l = 0; l < Nq; l++) eval_mesh[k][l] = range[2 * k + 0] + (range[2 * k + 1] - range[2 * k + 0]) * qp[l];
1241  }
1242  }
1243  { // Set eval_coord
1244  Integer N = 1;
1245  eval_coord.ReInit(0);
1246  for (Integer k = 0; k < SUBDIM; k++) {
1247  Integer Nk = eval_mesh[k].Dim();
1248  eval_coord_tmp.Swap(eval_coord);
1249  eval_coord.ReInit(Nk * N * DIM);
1250  for (Integer l0 = 0; l0 < Nk; l0++) {
1251  for (Integer l1 = 0; l1 < N; l1++) {
1252  for (Integer l2 = 0; l2 < k; l2++) {
1253  eval_coord[DIM * (N * l0 + l1) + l2] = eval_coord_tmp[DIM * l1 + l2];
1254  }
1255  eval_coord[DIM * (N * l0 + l1) + k] = trg[k] - eval_mesh[k][l0];
1256  }
1257  }
1258  N *= Nk;
1259  }
1260  StaticArray<ValueType, DIM> c;
1261  for (Integer k = 0; k < N; k++) { // Rotate
1262  for (Integer l = 0; l < SUBDIM; l++) c[l] = eval_coord[k * DIM + l];
1263  for (Integer l = SUBDIM; l < DIM; l++) c[l] = trg[l];
1264  for (Integer l = 0; l < DIM; l++) eval_coord[k * DIM + perm0[l]] = c[l];
1265  }
1266  }
1267  for (Integer k = 0; k < SUBDIM; k++) { // Set eval_poly
1268  Integer N = eval_mesh[k].Dim();
1269  for (Integer l = 0; l < eval_mesh[k].Dim(); l++) { // Scale eval_mesh to [0, 1]
1270  eval_mesh[k][l] *= side_inv;
1271  }
1272  Derived::EvalBasis1D(order, eval_mesh[k], eval_poly[k]);
1273  if (k == (i1 >> 1)) {
1274  assert(N == 1);
1275  ValueType qscal = fabs(range1[i1] - range0[i1]);
1276  for (Integer l0 = 0; l0 < order; l0++) {
1277  eval_poly[k][l0] *= qscal * qw[i2];
1278  }
1279  } else {
1280  assert(N == Nq);
1281  ValueType qscal = (range[2 * k + 1] - range[2 * k + 0]);
1282  for (Integer l0 = 0; l0 < order; l0++) {
1283  for (Integer l1 = 0; l1 < N; l1++) {
1284  eval_poly[k][N * l0 + l1] *= qscal * qw[l1];
1285  }
1286  }
1287  }
1288  }
1289  { // Set kern_value
1290  Integer N = eval_coord.Dim() / DIM;
1291  kern_value.ReInit(kdim[0] * N * kdim[1]);
1292  kern_value.SetZero();
1293  for (Integer j = 0; j < kdim[0]; j++) { // Evaluate ker
1294  for (Integer k = 0; k < kdim[0]; k++) v_src[k] = 0.0;
1295  v_src[j] = 1.0;
1296  Vector<ValueType> ker_value(N * kdim[1], kern_value.Begin() + j * N * kdim[1], false);
1297  ker(r_src, n_src, v_src, eval_coord, ker_value);
1298  }
1299  { // Transpose
1300  v0.ReInit(kern_value.Dim());
1301  for (Integer k = 0; k < v0.Dim(); k++) v0[k] = kern_value[k];
1302  Matrix<ValueType> M0(kdim[0], N * kdim[1], v0.Begin(), false);
1303  Matrix<ValueType> M1(N * kdim[1], kdim[0], kern_value.Begin(), false);
1304  for (Integer l0 = 0; l0 < M1.Dim(0); l0++) { // Transpose
1305  for (Integer l1 = 0; l1 < M1.Dim(1); l1++) {
1306  M1[l0][l1] = M0[l1][l0];
1307  }
1308  }
1309  }
1310  }
1311  { // Set Update M
1312  Matrix<ValueType> Mkern(eval_mesh[SUBDIM - 1].Dim(), kern_value.Dim() / eval_mesh[SUBDIM - 1].Dim(), kern_value.Begin(), false);
1313  for (Integer k = SUBDIM - 1; k >= 0; k--) { // Compute v0
1314  Matrix<ValueType> Mpoly(order, eval_mesh[k].Dim(), eval_poly[k].Begin(), false);
1315 
1316  v1.ReInit(Mpoly.Dim(0) * Mkern.Dim(1));
1317  Matrix<ValueType> Mcoef(Mpoly.Dim(0), Mkern.Dim(1), v1.Begin(), false);
1318  Matrix<ValueType>::GEMM(Mcoef, Mpoly, Mkern);
1319 
1320  v0.ReInit(v1.Dim());
1321  Matrix<ValueType> Mt(Mkern.Dim(1), Mpoly.Dim(0), v0.Begin(), false);
1322  for (Integer l0 = 0; l0 < Mt.Dim(0); l0++) { // Transpose
1323  for (Integer l1 = 0; l1 < Mt.Dim(1); l1++) {
1324  Mt[l0][l1] = Mcoef[l1][l0];
1325  }
1326  }
1327 
1328  if (k > 0) { // Reinit Mkern
1329  Mkern.ReInit(eval_mesh[k - 1].Dim(), v0.Dim() / eval_mesh[k - 1].Dim(), v0.Begin(), false);
1330  }
1331  }
1332 
1333  assert(v0.Dim() == Mtensor.Dim(0) * Mtensor.Dim(1));
1334  for (Integer k = 0; k < v0.Dim(); k++) { // Update M
1335  Mtensor[0][k] += v0[k];
1336  }
1337  }
1338  }
1339  if (r[i0] < 0.0) break;
1340  }
1341  }
1342 
1343  Mtensor = Mtensor.Transpose();
1344  { // Set Mcoeff
1345  if (Mcoeff.Dim(0) != kdim[1] || Mcoeff.Dim(1) != kdim[0] * Ncoeff) {
1346  Mcoeff.ReInit(kdim[1], kdim[0] * Ncoeff);
1347  }
1348  Vector<ValueType> Mtensor_(Mtensor.Dim(0) * Mtensor.Dim(1), Mtensor.Begin(), false);
1349  Vector<ValueType> Mcoeff_(Mcoeff.Dim(0) * Mcoeff.Dim(1), Mcoeff.Begin(), false);
1350  tensor2coeff<SUBDIM>(order, Mtensor_, Mcoeff_);
1351  }
1352  }
1353 
1354  static void diff_1d(Integer order, Matrix<ValueType>* M) {
1355  Vector<ValueType> nodes;
1356  Nodes<1>(order, nodes);
1357  Integer N = nodes.Dim();
1358 
1359  Matrix<ValueType> M0(N, N);
1360  for (Integer i = 0; i < N; i++) {
1361  for (Integer j = 0; j < N; j++) {
1362  M0[i][j] = 0;
1363  for (Integer l = 0; l < N; l++) {
1364  if (l != i) {
1365  ValueType Mij = 1;
1366  for (Integer k = 0; k < N; k++) {
1367  if (k != i) {
1368  if (l == k) {
1369  Mij *= 1 / (nodes[i] - nodes[k]);
1370  } else {
1371  Mij *= (nodes[j] - nodes[k]) / (nodes[i] - nodes[k]);
1372  }
1373  }
1374  }
1375  M0[i][j] += Mij;
1376  }
1377  }
1378  }
1379  }
1380 
1382  Derived::EvalBasis1D(order, nodes, p);
1383  Matrix<ValueType> Mp(order, N, p.Begin(), false);
1384  M0 = Mp * M0;
1385 
1386  Vector<ValueType> coeff;
1387  Approx<1>(order, Vector<ValueType>(M0.Dim(0) * M0.Dim(1), M0.Begin(), false), coeff);
1388  (*M) = Matrix<ValueType>(M0.Dim(0), coeff.Dim() / M0.Dim(0), coeff.Begin(), false);
1389  }
1390 
1391  friend Derived;
1392 };
1393 
1394 template <class ValueType> class ChebBasis : public BasisInterface<ValueType, ChebBasis<ValueType>> {
1395 
1396  public:
1397  ChebBasis();
1398 
1399  static void Nodes1D(Integer order, Vector<ValueType>& nodes) { BasisInterface<ValueType, ChebBasis<ValueType>>::cheb_nodes_1d(order, nodes); }
1400 
1406  static void EvalBasis1D(Integer order, const Vector<ValueType>& x, Vector<ValueType>& y) { BasisInterface<ValueType, ChebBasis<ValueType>>::cheb_basis_1d(order, x, y); }
1407 
1409 };
1410 
1411 } // end namespace
1412 
1413 #endif //_PVFMM_CHEB_UTILS_HPP_
Definition: cheb_utils.hpp:14
Definition: cheb_utils.hpp:12
Definition: cheb_utils.hpp:1394
Definition: matrix.hpp:15
static ValueType TruncErr(Integer order, const Vector< ValueType > &coeff)
Returns the sum of the absolute value of coefficients of the highest order terms as an estimate of tr...
Definition: cheb_utils.hpp:216
Definition: matrix.hpp:12
static void Eval(Integer order, const Vector< ValueType > &coeff, ConstIterator< Vector< ValueType >> in_x, Vector< ValueType > &out)
Evaluates values from input coefficients at points on a regular grid defined by in_x, in_y, in_z the values in the input vector.
Definition: cheb_utils.hpp:157
static void Approx(Integer order, const Vector< ValueType > &fn_v, Vector< ValueType > &coeff)
Computes approximation from function values at node points.
Definition: cheb_utils.hpp:50
static void EvalBasis1D(Integer order, const Vector< ValueType > &x, Vector< ValueType > &y)
Returns the values of all Chebyshev polynomials up to degree d, evaluated at points in the input vect...
Definition: cheb_utils.hpp:1406
static void Grad(Integer order, const Vector< ValueType > &coeff_in, Vector< ValueType > *coeff_out)
Compute gradient.
Definition: cheb_utils.hpp:260
Definition: kernel.hpp:14