From 5c29e90388446e0076de557f47d390a024a55308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Del=20Pino?= <stephane.delpino44@gmail.com>
Date: Fri, 12 Jul 2024 18:48:05 +0200
Subject: [PATCH] Improve performances for reconstruction using quadrature

A quadrature approach is (a priori) needed for polynomial
reconstructions of degrees higher than 1
---
 .../PolynomialCenteredCanonicalBasisView.hpp  |   4 +-
 src/scheme/PolynomialReconstruction.cpp       | 199 +++++++++++++++---
 src/scheme/test_reconstruction.cpp            |  52 ++---
 tests/test_PolynomialReconstruction.cpp       |  10 +-
 4 files changed, 204 insertions(+), 61 deletions(-)

diff --git a/src/scheme/PolynomialCenteredCanonicalBasisView.hpp b/src/scheme/PolynomialCenteredCanonicalBasisView.hpp
index dbe9a5d20..2417ec159 100644
--- a/src/scheme/PolynomialCenteredCanonicalBasisView.hpp
+++ b/src/scheme/PolynomialCenteredCanonicalBasisView.hpp
@@ -135,8 +135,8 @@ class PolynomialCenteredCanonicalBasisView
       const double x_xj = (x - m_xj)[0];
 
       std::remove_const_t<DataType> result = m_coefficients[m_degree];
-      for (ssize_t i_coeffiencient = m_degree - 1; i_coeffiencient >= 0; --i_coeffiencient) {
-        result = x_xj * result + m_coefficients[i_coeffiencient];
+      for (ssize_t i = m_degree - 1; i >= 0; --i) {
+        result = x_xj * result + m_coefficients[i];
       }
 
       return result;
diff --git a/src/scheme/PolynomialReconstruction.cpp b/src/scheme/PolynomialReconstruction.cpp
index 7557c284e..a105a1207 100644
--- a/src/scheme/PolynomialReconstruction.cpp
+++ b/src/scheme/PolynomialReconstruction.cpp
@@ -3,6 +3,13 @@
 #include <scheme/DiscreteFunctionUtils.hpp>
 #include <scheme/DiscreteFunctionVariant.hpp>
 
+#include <analysis/GaussLegendreQuadratureDescriptor.hpp>
+#include <analysis/GaussQuadratureDescriptor.hpp>
+#include <analysis/QuadratureFormula.hpp>
+#include <analysis/QuadratureManager.hpp>
+#include <geometry/SquareTransformation.hpp>
+#include <geometry/TriangleTransformation.hpp>
+
 class PolynomialReconstruction::MutableDiscreteFunctionDPkVariant
 {
  public:
@@ -105,12 +112,6 @@ PolynomialReconstruction::_build(
   const std::shared_ptr<const MeshType>& p_mesh,
   const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list) const
 {
-  const size_t degree = m_descriptor.degree();
-
-  if (degree != 1) {
-    throw NotImplementedError("only degree 1 is available");
-  }
-
   const MeshType& mesh = *p_mesh;
 
   using Rd = TinyVector<MeshType::Dimension>;
@@ -146,12 +147,17 @@ PolynomialReconstruction::_build(
   }();
 
   const size_t basis_dimension =
-    DiscreteFunctionDPk<MeshType::Dimension, double>::BasisViewType::dimensionFromDegree(degree);
+    DiscreteFunctionDPk<MeshType::Dimension, double>::BasisViewType::dimensionFromDegree(m_descriptor.degree());
 
   const Stencil& stencil = StencilManager::instance().getStencil(mesh.connectivity());
 
-  auto xj            = MeshDataManager::instance().getMeshData(mesh).xj();
-  auto cell_is_owned = mesh.connectivity().cellIsOwned();
+  auto xr = mesh.xr();
+  auto xj = MeshDataManager::instance().getMeshData(mesh).xj();
+  auto Vj = MeshDataManager::instance().getMeshData(mesh).Vj();
+
+  auto cell_is_owned       = mesh.connectivity().cellIsOwned();
+  auto cell_type           = mesh.connectivity().cellType();
+  auto cell_to_node_matrix = mesh.connectivity().cellToNodeMatrix();
 
   const size_t max_stencil_size = [&]() {
     size_t max_size = 0;
@@ -179,11 +185,12 @@ PolynomialReconstruction::_build(
         if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
           using DataType = std::remove_const_t<std::decay_t<typename DiscreteFunctionT::data_type>>;
           mutable_discrete_function_dpk_variant_list.push_back(
-            DiscreteFunctionDPk<MeshType::Dimension, DataType>(p_mesh, degree));
+            DiscreteFunctionDPk<MeshType::Dimension, DataType>(p_mesh, m_descriptor.degree()));
         } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
           using DataType = std::remove_const_t<std::decay_t<typename DiscreteFunctionT::data_type>>;
           mutable_discrete_function_dpk_variant_list.push_back(
-            DiscreteFunctionDPkVector<MeshType::Dimension, DataType>(p_mesh, degree, discrete_function.size()));
+            DiscreteFunctionDPkVector<MeshType::Dimension, DataType>(p_mesh, m_descriptor.degree(),
+                                                                     discrete_function.size()));
         } else {
           // LCOV_EXCL_START
           throw UnexpectedError("unexpected discrete function type");
@@ -198,11 +205,19 @@ PolynomialReconstruction::_build(
   SmallArray<SmallArray<double>> G_pool(Kokkos::DefaultExecutionSpace::concurrency());
   SmallArray<SmallMatrix<double>> X_pool(Kokkos::DefaultExecutionSpace::concurrency());
 
+  SmallArray<SmallArray<double>> inv_Vj_wq_detJ_ek_pool(Kokkos::DefaultExecutionSpace::concurrency());
+  SmallArray<SmallArray<double>> mean_j_of_ejk_pool(Kokkos::DefaultExecutionSpace::concurrency());
+  SmallArray<SmallArray<double>> mean_i_of_ejk_pool(Kokkos::DefaultExecutionSpace::concurrency());
+
   for (size_t i = 0; i < A_pool.size(); ++i) {
     A_pool[i] = SmallMatrix<double>(max_stencil_size, basis_dimension - 1);
     B_pool[i] = SmallMatrix<double>(max_stencil_size, number_of_columns);
     G_pool[i] = SmallArray<double>(basis_dimension - 1);
     X_pool[i] = SmallMatrix<double>(basis_dimension - 1, number_of_columns);
+
+    inv_Vj_wq_detJ_ek_pool[i] = SmallArray<double>(basis_dimension);
+    mean_j_of_ejk_pool[i]     = SmallArray<double>(basis_dimension - 1);
+    mean_i_of_ejk_pool[i]     = SmallArray<double>(basis_dimension - 1);
   }
 
   parallel_for(
@@ -224,20 +239,20 @@ PolynomialReconstruction::_build(
               using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
               if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
                 using DataType     = std::decay_t<typename DiscreteFunctionT::data_type>;
-                const DataType& pj = discrete_function[cell_j_id];
+                const DataType& qj = discrete_function[cell_j_id];
                 for (size_t i = 0; i < stencil_cell_list.size(); ++i) {
                   const CellId cell_i_id = stencil_cell_list[i];
-                  const DataType& pi_pj  = discrete_function[cell_i_id] - pj;
+                  const DataType& qi_qj  = discrete_function[cell_i_id] - qj;
                   if constexpr (std::is_arithmetic_v<DataType>) {
-                    B(i, column_begin) = pi_pj;
+                    B(i, column_begin) = qi_qj;
                   } else if constexpr (is_tiny_vector_v<DataType>) {
                     for (size_t kB = column_begin, k = 0; k < DataType::Dimension; ++k, ++kB) {
-                      B(i, kB) = pi_pj[k];
+                      B(i, kB) = qi_qj[k];
                     }
                   } else if constexpr (is_tiny_matrix_v<DataType>) {
                     for (size_t k = 0; k < DataType::NumberOfRows; ++k) {
                       for (size_t l = 0; l < DataType::NumberOfColumns; ++l) {
-                        B(i, column_begin + k * DataType::NumberOfColumns + l) = pi_pj(k, l);
+                        B(i, column_begin + k * DataType::NumberOfColumns + l) = qi_qj(k, l);
                       }
                     }
                   }
@@ -250,16 +265,16 @@ PolynomialReconstruction::_build(
                 }
               } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
                 using DataType       = std::decay_t<typename DiscreteFunctionT::data_type>;
-                const auto pj_vector = discrete_function[cell_j_id];
-                for (size_t l = 0; l < pj_vector.size(); ++l) {
-                  const DataType& pj = pj_vector[l];
+                const auto qj_vector = discrete_function[cell_j_id];
+                for (size_t l = 0; l < qj_vector.size(); ++l) {
+                  const DataType& qj = qj_vector[l];
                   for (size_t i = 0; i < stencil_cell_list.size(); ++i) {
                     const CellId cell_i_id = stencil_cell_list[i];
-                    const DataType& pi_pj  = discrete_function[cell_i_id][l] - pj;
-                    B(i, column_begin + l) = pi_pj;
+                    const DataType& qi_qj  = discrete_function[cell_i_id][l] - qj;
+                    B(i, column_begin + l) = qi_qj;
                   }
                 }
-                column_begin += pj_vector.size();
+                column_begin += qj_vector.size();
               } else {
                 // LCOV_EXCL_START
                 throw UnexpectedError("invalid discrete function type");
@@ -271,19 +286,141 @@ PolynomialReconstruction::_build(
 
         ShrinkMatrixView A(A_pool[t], stencil_cell_list.size());
 
-        const Rd& Xj = xj[cell_j_id];
+#warning remove after rework
+        std::string ENV_SWITCH = []() {
+          auto value = std::getenv("INTEGRATE");
+          if (value == nullptr) {
+            return std::string{""};
+          } else {
+            return std::string{value};
+          }
+        }();
 
-        for (size_t i = 0; i < stencil_cell_list.size(); ++i) {
-          const CellId cell_i_id = stencil_cell_list[i];
-          const Rd Xi_Xj         = xj[cell_i_id] - Xj;
-          for (size_t l = 0; l < basis_dimension - 1; ++l) {
-            A(i, l) = Xi_Xj[l];
+        if ((m_descriptor.degree() == 1) and ((MeshType::Dimension != 2) or (ENV_SWITCH == ""))) {
+          const Rd& Xj = xj[cell_j_id];
+          for (size_t i = 0; i < stencil_cell_list.size(); ++i) {
+            const CellId cell_i_id = stencil_cell_list[i];
+            const Rd Xi_Xj         = xj[cell_i_id] - Xj;
+            for (size_t l = 0; l < basis_dimension - 1; ++l) {
+              A(i, l) = Xi_Xj[l];
+            }
+          }
+        } else {
+          if constexpr (MeshType::Dimension == 2) {
+            SmallArray<double> inv_Vj_wq_detJ_ek = inv_Vj_wq_detJ_ek_pool[t];
+            SmallArray<double> mean_j_of_ejk     = mean_j_of_ejk_pool[t];
+            SmallArray<double> mean_i_of_ejk     = mean_i_of_ejk_pool[t];
+
+            auto compute_mean_ejk = [&inv_Vj_wq_detJ_ek](const auto& quadrature, const auto& T, const Rd& Xj,
+                                                         const double Vi, const size_t degree, const size_t dimension,
+                                                         SmallArray<double>& mean_of_ejk) {
+              mean_of_ejk.fill(0);
+
+              for (size_t i_q = 0; i_q < quadrature.numberOfPoints(); ++i_q) {
+                const double wq   = quadrature.weight(i_q);
+                const Rd& xi_q    = quadrature.point(i_q);
+                const double detT = [&] {
+                  if constexpr (std::is_same_v<TriangleTransformation<2>, std::decay_t<decltype(T)>>) {
+                    return T.jacobianDeterminant();
+                  } else {
+                    return T.jacobianDeterminant(xi_q);
+                  }
+                }();
+
+                const Rd X_Xj = T(xi_q) - Xj;
+
+                const double x_xj = X_Xj[0];
+                const double y_yj = X_Xj[1];
+
+                {
+                  size_t k               = 0;
+                  inv_Vj_wq_detJ_ek[k++] = wq * detT / Vi;
+                  for (; k <= degree; ++k) {
+                    inv_Vj_wq_detJ_ek[k] = x_xj * inv_Vj_wq_detJ_ek[k - 1];
+                  }
+
+                  for (size_t i_y = 1; i_y <= degree; ++i_y) {
+                    const size_t begin_i_y_1 = ((i_y - 1) * (2 * degree - i_y + 4)) / 2;
+                    for (size_t l = 0; l <= degree - i_y; ++l) {
+                      inv_Vj_wq_detJ_ek[k++] = y_yj * inv_Vj_wq_detJ_ek[begin_i_y_1 + l];
+                    }
+                  }
+                }
+
+                for (size_t k = 1; k < dimension; ++k) {
+                  mean_of_ejk[k - 1] += inv_Vj_wq_detJ_ek[k];
+                }
+              }
+            };
+
+            const Rd& Xj = xj[cell_j_id];
+
+            if (cell_type[cell_j_id] == CellType::Triangle) {
+              auto cell_node = cell_to_node_matrix[cell_j_id];
+
+              const TriangleTransformation<2> T{xr[cell_node[0]], xr[cell_node[1]], xr[cell_node[2]]};
+
+              const auto& quadrature =
+                QuadratureManager::instance().getTriangleFormula(GaussQuadratureDescriptor{m_descriptor.degree()});
+
+              compute_mean_ejk(quadrature, T, Xj, Vj[cell_j_id], m_descriptor.degree(), basis_dimension, mean_j_of_ejk);
+
+            } else if (cell_type[cell_j_id] == CellType::Quadrangle) {
+              auto cell_node = cell_to_node_matrix[cell_j_id];
+
+              const SquareTransformation<2> T{xr[cell_node[0]], xr[cell_node[1]], xr[cell_node[2]], xr[cell_node[3]]};
+
+              const auto& quadrature = QuadratureManager::instance().getSquareFormula(
+                GaussLegendreQuadratureDescriptor{m_descriptor.degree() + 1});
+
+              compute_mean_ejk(quadrature, T, Xj, Vj[cell_j_id], m_descriptor.degree(), basis_dimension, mean_j_of_ejk);
+
+            } else {
+              throw NotImplementedError("unexpected cell type");
+            }
+
+            for (size_t i = 0; i < stencil_cell_list.size(); ++i) {
+              const CellId cell_i_id = stencil_cell_list[i];
+
+              auto cell_node = cell_to_node_matrix[cell_i_id];
+
+              if (cell_type[cell_i_id] == CellType::Triangle) {
+                const TriangleTransformation<2> T{xr[cell_node[0]], xr[cell_node[1]], xr[cell_node[2]]};
+
+                const auto& quadrature =
+                  QuadratureManager::instance().getTriangleFormula(GaussQuadratureDescriptor{m_descriptor.degree()});
+
+                compute_mean_ejk(quadrature, T, Xj, Vj[cell_i_id], m_descriptor.degree(), basis_dimension,
+                                 mean_i_of_ejk);
+
+              } else if (cell_type[cell_i_id] == CellType::Quadrangle) {
+                const SquareTransformation<2> T{xr[cell_node[0]], xr[cell_node[1]], xr[cell_node[2]], xr[cell_node[3]]};
+
+                const auto& quadrature = QuadratureManager::instance().getSquareFormula(
+                  GaussLegendreQuadratureDescriptor{m_descriptor.degree() + 1});
+
+                compute_mean_ejk(quadrature, T, Xj, Vj[cell_i_id], m_descriptor.degree(), basis_dimension,
+                                 mean_i_of_ejk);
+
+              } else {
+                throw NotImplementedError("unexpected cell type");
+              }
+
+              for (size_t l = 0; l < basis_dimension - 1; ++l) {
+                A(i, l) = mean_i_of_ejk[l] - mean_j_of_ejk[l];
+              }
+            }
+
+          } else {
+            throw NotImplementedError("unexpected dimension");
           }
         }
 
         if (m_descriptor.rowWeighting()) {
           // Add row weighting (give more importance to cells that are
           // closer to j)
+          const Rd& Xj = xj[cell_j_id];
+
           for (size_t i = 0; i < stencil_cell_list.size(); ++i) {
             const CellId cell_i_id = stencil_cell_list[i];
             const double weight    = 1. / l2Norm(xj[cell_i_id] - Xj);
@@ -331,6 +468,12 @@ PolynomialReconstruction::_build(
           Givens::solveCollectionInPlace(A, X, B);
         }
 
+        // Results are now written from the {ejk} basis to the {ek} basis
+        if (m_descriptor.degree() > 1) {
+          throw NotImplementedError(
+            "write the result from the {ejk} basis to the {ek} basis to get correct polynomial values");
+        }
+
         column_begin = 0;
         for (size_t i_dpk_variant = 0; i_dpk_variant < mutable_discrete_function_dpk_variant_list.size();
              ++i_dpk_variant) {
diff --git a/src/scheme/test_reconstruction.cpp b/src/scheme/test_reconstruction.cpp
index 6639b8493..42ec98cc9 100644
--- a/src/scheme/test_reconstruction.cpp
+++ b/src/scheme/test_reconstruction.cpp
@@ -58,32 +58,32 @@ void
 test_reconstruction(const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list)
 {
   PolynomialReconstructionDescriptor descriptor{1};
-  {
-    std::cout << "** variable list one by one (50 times)\n";
-    Timer t;
-    for (auto discrete_function_v : discrete_function_variant_list) {
-      std::visit(
-        [&](auto&& discrete_function) {
-          auto mesh_v = discrete_function.meshVariant();
-          std::visit(
-            [&](auto&& p_mesh) {
-              using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
-              PolynomialReconstruction reconstruction{descriptor};
-              if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
-                for (size_t i = 0; i < 50; ++i) {
-                  auto res = reconstruction.build(*p_mesh, discrete_function);
-                }
-              } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
-                std::cout << "Omitting discrete function p0 vector!\n";
-              }
-            },
-            mesh_v->variant());
-        },
-        discrete_function_v->discreteFunction());
-    }
-    t.pause();
-    std::cout << "t = " << t << '\n';
-  }
+  // {
+  //   std::cout << "** variable list one by one (50 times)\n";
+  //   Timer t;
+  //   for (auto discrete_function_v : discrete_function_variant_list) {
+  //     std::visit(
+  //       [&](auto&& discrete_function) {
+  //         auto mesh_v = discrete_function.meshVariant();
+  //         std::visit(
+  //           [&](auto&& p_mesh) {
+  //             using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
+  //             PolynomialReconstruction reconstruction{descriptor};
+  //             if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
+  //               for (size_t i = 0; i < 50; ++i) {
+  //                 auto res = reconstruction.build(*p_mesh, discrete_function);
+  //               }
+  //             } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
+  //               std::cout << "Omitting discrete function p0 vector!\n";
+  //             }
+  //           },
+  //           mesh_v->variant());
+  //       },
+  //       discrete_function_v->discreteFunction());
+  //   }
+  //   t.pause();
+  //   std::cout << "t = " << t << '\n';
+  // }
 
   {
     std::cout << "** variable list at once (50 times)\n";
diff --git a/tests/test_PolynomialReconstruction.cpp b/tests/test_PolynomialReconstruction.cpp
index d9b33b911..0e9398fab 100644
--- a/tests/test_PolynomialReconstruction.cpp
+++ b/tests/test_PolynomialReconstruction.cpp
@@ -438,7 +438,7 @@ TEST_CASE("PolynomialReconstruction", "[scheme]")
 
                 max_x_slope_error = std::max(max_x_slope_error, std::abs(reconstructed_slope - 1.7));
               }
-              REQUIRE(parallel::allReduceMax(max_x_slope_error) == Catch::Approx(0).margin(1E-14));
+              REQUIRE(parallel::allReduceMax(max_x_slope_error) == Catch::Approx(0).margin(1E-13));
             }
 
             {
@@ -498,7 +498,7 @@ TEST_CASE("PolynomialReconstruction", "[scheme]")
 
                 max_x_slope_error = std::max(max_x_slope_error, l2Norm(reconstructed_slope - R3{1.7, -0.6, 3.1}));
               }
-              REQUIRE(parallel::allReduceMax(max_x_slope_error) == Catch::Approx(0).margin(1E-14));
+              REQUIRE(parallel::allReduceMax(max_x_slope_error) == Catch::Approx(0).margin(1E-13));
             }
 
             {
@@ -509,7 +509,7 @@ TEST_CASE("PolynomialReconstruction", "[scheme]")
 
                 max_y_slope_error = std::max(max_y_slope_error, l2Norm(reconstructed_slope - R3{-2.2, 1.3, -1.1}));
               }
-              REQUIRE(parallel::allReduceMax(max_y_slope_error) == Catch::Approx(0).margin(1E-14));
+              REQUIRE(parallel::allReduceMax(max_y_slope_error) == Catch::Approx(0).margin(1E-13));
             }
           }
         }
@@ -630,7 +630,7 @@ TEST_CASE("PolynomialReconstruction", "[scheme]")
                   max_x_slope_error = std::max(max_x_slope_error, std::abs(reconstructed_slope - slope[i]));
                 }
               }
-              REQUIRE(parallel::allReduceMax(max_x_slope_error) == Catch::Approx(0).margin(1E-14));
+              REQUIRE(parallel::allReduceMax(max_x_slope_error) == Catch::Approx(0).margin(1E-13));
             }
 
             {
@@ -644,7 +644,7 @@ TEST_CASE("PolynomialReconstruction", "[scheme]")
                   max_y_slope_error = std::max(max_y_slope_error, std::abs(reconstructed_slope - slope[i]));
                 }
               }
-              REQUIRE(parallel::allReduceMax(max_y_slope_error) == Catch::Approx(0).margin(1E-14));
+              REQUIRE(parallel::allReduceMax(max_y_slope_error) == Catch::Approx(0).margin(1E-13));
             }
           }
         }
-- 
GitLab