#include <scheme/PolynomialReconstruction.hpp>

#include <algebra/Givens.hpp>
#include <algebra/ShrinkMatrixView.hpp>
#include <algebra/ShrinkVectorView.hpp>
#include <algebra/SmallMatrix.hpp>
#include <analysis/GaussLegendreQuadratureDescriptor.hpp>
#include <analysis/GaussQuadratureDescriptor.hpp>
#include <analysis/QuadratureFormula.hpp>
#include <analysis/QuadratureManager.hpp>
#include <geometry/CubeTransformation.hpp>
#include <geometry/LineTransformation.hpp>
#include <geometry/PrismTransformation.hpp>
#include <geometry/PyramidTransformation.hpp>
#include <geometry/SquareTransformation.hpp>
#include <geometry/TetrahedronTransformation.hpp>
#include <geometry/TriangleTransformation.hpp>
#include <mesh/MeshData.hpp>
#include <mesh/MeshDataManager.hpp>
#include <mesh/MeshFlatFaceBoundary.hpp>
#include <mesh/NamedBoundaryDescriptor.hpp>
#include <mesh/StencilDescriptor.hpp>
#include <mesh/StencilManager.hpp>
#include <scheme/DiscreteFunctionDPkVariant.hpp>
#include <scheme/DiscreteFunctionUtils.hpp>
#include <scheme/DiscreteFunctionVariant.hpp>

template <size_t Dimension>
PUGS_INLINE auto
symmetrize_vector(const TinyVector<Dimension>& normal, const TinyVector<Dimension>& u)
{
  return u - 2 * dot(u, normal) * normal;
}

template <size_t Dimension>
PUGS_INLINE auto
symmetrize_matrix(const TinyVector<Dimension>& normal, const TinyMatrix<Dimension>& A)
{
  const TinyMatrix S = TinyMatrix<Dimension>{identity} - 2 * tensorProduct(normal, normal);
  return S * A * S;
}

template <size_t Dimension>
PUGS_INLINE auto
symmetrize_coordinates(const TinyVector<Dimension>& origin,
                       const TinyVector<Dimension>& normal,
                       const TinyVector<Dimension>& u)
{
  return u - 2 * dot(u - origin, normal) * normal;
}

class PolynomialReconstruction::MutableDiscreteFunctionDPkVariant
{
 public:
  using Variant = std::variant<DiscreteFunctionDPk<1, double>,
                               DiscreteFunctionDPk<1, TinyVector<1>>,
                               DiscreteFunctionDPk<1, TinyVector<2>>,
                               DiscreteFunctionDPk<1, TinyVector<3>>,
                               DiscreteFunctionDPk<1, TinyMatrix<1>>,
                               DiscreteFunctionDPk<1, TinyMatrix<2>>,
                               DiscreteFunctionDPk<1, TinyMatrix<3>>,

                               DiscreteFunctionDPk<2, double>,
                               DiscreteFunctionDPk<2, TinyVector<1>>,
                               DiscreteFunctionDPk<2, TinyVector<2>>,
                               DiscreteFunctionDPk<2, TinyVector<3>>,
                               DiscreteFunctionDPk<2, TinyMatrix<1>>,
                               DiscreteFunctionDPk<2, TinyMatrix<2>>,
                               DiscreteFunctionDPk<2, TinyMatrix<3>>,

                               DiscreteFunctionDPk<3, double>,
                               DiscreteFunctionDPk<3, TinyVector<1>>,
                               DiscreteFunctionDPk<3, TinyVector<2>>,
                               DiscreteFunctionDPk<3, TinyVector<3>>,
                               DiscreteFunctionDPk<3, TinyMatrix<1>>,
                               DiscreteFunctionDPk<3, TinyMatrix<2>>,
                               DiscreteFunctionDPk<3, TinyMatrix<3>>,

                               DiscreteFunctionDPkVector<1, double>,
                               DiscreteFunctionDPkVector<1, TinyVector<1>>,
                               DiscreteFunctionDPkVector<1, TinyVector<2>>,
                               DiscreteFunctionDPkVector<1, TinyVector<3>>,
                               DiscreteFunctionDPkVector<1, TinyMatrix<1>>,
                               DiscreteFunctionDPkVector<1, TinyMatrix<2>>,
                               DiscreteFunctionDPkVector<1, TinyMatrix<3>>,

                               DiscreteFunctionDPkVector<2, double>,
                               DiscreteFunctionDPkVector<2, TinyVector<1>>,
                               DiscreteFunctionDPkVector<2, TinyVector<2>>,
                               DiscreteFunctionDPkVector<2, TinyVector<3>>,
                               DiscreteFunctionDPkVector<2, TinyMatrix<1>>,
                               DiscreteFunctionDPkVector<2, TinyMatrix<2>>,
                               DiscreteFunctionDPkVector<2, TinyMatrix<3>>,

                               DiscreteFunctionDPkVector<3, double>,
                               DiscreteFunctionDPkVector<3, TinyVector<1>>,
                               DiscreteFunctionDPkVector<3, TinyVector<2>>,
                               DiscreteFunctionDPkVector<3, TinyVector<3>>,
                               DiscreteFunctionDPkVector<3, TinyMatrix<1>>,
                               DiscreteFunctionDPkVector<3, TinyMatrix<2>>,
                               DiscreteFunctionDPkVector<3, TinyMatrix<3>>>;

 private:
  Variant m_mutable_discrete_function_dpk;

 public:
  PUGS_INLINE
  const Variant&
  mutableDiscreteFunctionDPk() const
  {
    return m_mutable_discrete_function_dpk;
  }

  template <typename DiscreteFunctionDPkT>
  PUGS_INLINE auto&&
  get() const
  {
    static_assert(is_discrete_function_dpk_v<DiscreteFunctionDPkT>, "invalid template argument");

#ifndef NDEBUG
    if (not std::holds_alternative<DiscreteFunctionDPkT>(this->m_mutable_discrete_function_dpk)) {
      std::ostringstream error_msg;
      error_msg << "invalid discrete function type\n";
      error_msg << "- required " << rang::fgB::red << demangle<DiscreteFunctionDPkT>() << rang::fg::reset << '\n';
      error_msg << "- contains " << rang::fgB::yellow
                << std::visit([](auto&& f) -> std::string { return demangle<decltype(f)>(); },
                              this->m_mutable_discrete_function_dpk)
                << rang::fg::reset;
      throw NormalError(error_msg.str());
    }
#endif   // NDEBUG

    return std::get<DiscreteFunctionDPkT>(this->mutableDiscreteFunctionDPk());
  }

  template <size_t Dimension, typename DataType>
  MutableDiscreteFunctionDPkVariant(const DiscreteFunctionDPk<Dimension, DataType>& discrete_function_dpk)
    : m_mutable_discrete_function_dpk{discrete_function_dpk}
  {
    static_assert(std::is_same_v<DataType, double> or                       //
                    std::is_same_v<DataType, TinyVector<1, double>> or      //
                    std::is_same_v<DataType, TinyVector<2, double>> or      //
                    std::is_same_v<DataType, TinyVector<3, double>> or      //
                    std::is_same_v<DataType, TinyMatrix<1, 1, double>> or   //
                    std::is_same_v<DataType, TinyMatrix<2, 2, double>> or   //
                    std::is_same_v<DataType, TinyMatrix<3, 3, double>>,
                  "DiscreteFunctionDPk with this DataType is not allowed in variant");
  }

  template <size_t Dimension, typename DataType>
  MutableDiscreteFunctionDPkVariant(const DiscreteFunctionDPkVector<Dimension, DataType>& discrete_function_dpk)
    : m_mutable_discrete_function_dpk{discrete_function_dpk}
  {
    static_assert(std::is_same_v<DataType, double> or                       //
                    std::is_same_v<DataType, TinyVector<1, double>> or      //
                    std::is_same_v<DataType, TinyVector<2, double>> or      //
                    std::is_same_v<DataType, TinyVector<3, double>> or      //
                    std::is_same_v<DataType, TinyMatrix<1, 1, double>> or   //
                    std::is_same_v<DataType, TinyMatrix<2, 2, double>> or   //
                    std::is_same_v<DataType, TinyMatrix<3, 3, double>>,
                  "DiscreteFunctionDPkVector with this DataType is not allowed in variant");
  }

  MutableDiscreteFunctionDPkVariant& operator=(MutableDiscreteFunctionDPkVariant&&)      = default;
  MutableDiscreteFunctionDPkVariant& operator=(const MutableDiscreteFunctionDPkVariant&) = default;

  MutableDiscreteFunctionDPkVariant(const MutableDiscreteFunctionDPkVariant&) = default;
  MutableDiscreteFunctionDPkVariant(MutableDiscreteFunctionDPkVariant&&)      = default;

  MutableDiscreteFunctionDPkVariant()  = delete;
  ~MutableDiscreteFunctionDPkVariant() = default;
};

template <typename ConformTransformationT>
void
_computeEjkMean(const QuadratureFormula<1>& quadrature,
                const ConformTransformationT& T,
                const TinyVector<1>& Xj,
                const double Vi,
                const size_t degree,
                const size_t dimension,
                SmallArray<double>& inv_Vj_wq_detJ_ek,
                SmallArray<double>& mean_of_ejk)
{
  using Rd = TinyVector<1>;
  mean_of_ejk.fill(0);

  for (size_t i_q = 0; i_q < quadrature.numberOfPoints(); ++i_q) {
    const double wq   = quadrature.weight(i_q);
    const Rd& xi_q    = quadrature.point(i_q);
    const double detT = T.jacobianDeterminant();

    const Rd X_Xj = T(xi_q) - Xj;

    const double x_xj = X_Xj[0];

    {
      size_t k               = 0;
      inv_Vj_wq_detJ_ek[k++] = wq * detT / Vi;
      for (; k <= degree; ++k) {
        inv_Vj_wq_detJ_ek[k] = x_xj * inv_Vj_wq_detJ_ek[k - 1];
      }
    }

    for (size_t k = 1; k < dimension; ++k) {
      mean_of_ejk[k - 1] += inv_Vj_wq_detJ_ek[k];
    }
  }
}

template <typename ConformTransformationT>
void
_computeEjkMean(const QuadratureFormula<2>& quadrature,
                const ConformTransformationT& T,
                const TinyVector<2>& Xj,
                const double Vi,
                const size_t degree,
                const size_t dimension,
                SmallArray<double>& inv_Vj_wq_detJ_ek,
                SmallArray<double>& mean_of_ejk)
{
  using Rd = TinyVector<2>;

  mean_of_ejk.fill(0);

  for (size_t i_q = 0; i_q < quadrature.numberOfPoints(); ++i_q) {
    const double wq   = quadrature.weight(i_q);
    const Rd& xi_q    = quadrature.point(i_q);
    const double detT = [&] {
      if constexpr (std::is_same_v<TriangleTransformation<2>, std::decay_t<decltype(T)>>) {
        return T.jacobianDeterminant();
      } else {
        return T.jacobianDeterminant(xi_q);
      }
    }();

    const Rd X_Xj = T(xi_q) - Xj;

    const double x_xj = X_Xj[0];
    const double y_yj = X_Xj[1];

    {
      size_t k               = 0;
      inv_Vj_wq_detJ_ek[k++] = wq * detT / Vi;
      for (; k <= degree; ++k) {
        inv_Vj_wq_detJ_ek[k] = x_xj * inv_Vj_wq_detJ_ek[k - 1];
      }

      for (size_t i_y = 1; i_y <= degree; ++i_y) {
        const size_t begin_i_y_1 = ((i_y - 1) * (2 * degree - i_y + 4)) / 2;
        for (size_t l = 0; l <= degree - i_y; ++l) {
          inv_Vj_wq_detJ_ek[k++] = y_yj * inv_Vj_wq_detJ_ek[begin_i_y_1 + l];
        }
      }
    }

    for (size_t k = 1; k < dimension; ++k) {
      mean_of_ejk[k - 1] += inv_Vj_wq_detJ_ek[k];
    }
  }
}

template <typename ConformTransformationT>
void
_computeEjkMean(const QuadratureFormula<3>& quadrature,
                const ConformTransformationT& T,
                const TinyVector<3>& Xj,
                const double Vi,
                const size_t degree,
                const size_t dimension,
                SmallArray<double>& inv_Vj_wq_detJ_ek,
                SmallArray<double>& mean_of_ejk)
{
  using Rd = TinyVector<3>;
  mean_of_ejk.fill(0);

  for (size_t i_q = 0; i_q < quadrature.numberOfPoints(); ++i_q) {
    const double wq   = quadrature.weight(i_q);
    const Rd& xi_q    = quadrature.point(i_q);
    const double detT = [&] {
      if constexpr (std::is_same_v<TetrahedronTransformation, std::decay_t<decltype(T)>>) {
        return T.jacobianDeterminant();
      } else {
        return T.jacobianDeterminant(xi_q);
      }
    }();

    const Rd X_Xj = T(xi_q) - Xj;

    const double x_xj = X_Xj[0];
    const double y_yj = X_Xj[1];
    const double z_zj = X_Xj[2];

    {
      size_t k               = 0;
      inv_Vj_wq_detJ_ek[k++] = wq * detT / Vi;
      for (; k <= degree; ++k) {
        inv_Vj_wq_detJ_ek[k] = x_xj * inv_Vj_wq_detJ_ek[k - 1];
      }

      for (size_t i_y = 1; i_y <= degree; ++i_y) {
        const size_t begin_i_y_1 = ((i_y - 1) * (2 * degree - i_y + 4)) / 2;
        for (size_t l = 0; l <= degree - i_y; ++l) {
          inv_Vj_wq_detJ_ek[k++] = y_yj * inv_Vj_wq_detJ_ek[begin_i_y_1 + l];
        }
      }

      for (size_t i_z = 1; i_z <= degree; ++i_z) {
        const size_t begin_i_z_1 = ((i_z - 1) * (3 * (degree + 2) * (degree + 3) + (i_z - (3 * degree + 8)) * i_z)) / 6;
        for (size_t i_y = 0; i_y <= degree - i_z; ++i_y) {
          const size_t begin_i_y_1 = (i_y * (2 * degree - i_y + 3)) / 2 + begin_i_z_1;
          for (size_t l = 0; l <= degree - i_y - i_z; ++l) {
            inv_Vj_wq_detJ_ek[k++] = z_zj * inv_Vj_wq_detJ_ek[begin_i_y_1 + l];
          }
        }
      }
    }

    for (size_t k = 1; k < dimension; ++k) {
      mean_of_ejk[k - 1] += inv_Vj_wq_detJ_ek[k];
    }
  }
}

template <typename NodeListT, size_t Dimension>
void _computeEjkMean(const TinyVector<Dimension>& Xj,
                     const NodeValue<const TinyVector<Dimension>>& xr,
                     const NodeListT& node_list,
                     const CellType& cell_type,
                     const double Vi,
                     const size_t degree,
                     const size_t basis_dimension,
                     SmallArray<double>& inv_Vj_wq_detJ_ek,
                     SmallArray<double>& mean_of_ejk);

template <typename NodeListT>
void
_computeEjkMean(const TinyVector<1>& Xj,
                const NodeValue<const TinyVector<1>>& xr,
                const NodeListT& node_list,
                const CellType& cell_type,
                const double Vi,
                const size_t degree,
                const size_t basis_dimension,
                SmallArray<double>& inv_Vj_wq_detJ_ek,
                SmallArray<double>& mean_of_ejk)
{
  if (cell_type == CellType::Line) {
    const LineTransformation<1> T{xr[node_list[0]], xr[node_list[1]]};

    const auto& quadrature = QuadratureManager::instance().getLineFormula(GaussLegendreQuadratureDescriptor{degree});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else {
    throw NotImplementedError("unexpected cell type: " + std::string{name(cell_type)});
  }
}

template <typename NodeListT>
void
_computeEjkMeanInSymmetricCell(const TinyVector<1>& origin,
                               const TinyVector<1>& normal,
                               const TinyVector<1>& Xj,
                               const NodeValue<const TinyVector<1>>& xr,
                               const NodeListT& node_list,
                               const CellType& cell_type,
                               const double Vi,
                               const size_t degree,
                               const size_t basis_dimension,
                               SmallArray<double>& inv_Vj_wq_detJ_ek,
                               SmallArray<double>& mean_of_ejk)
{
  if (cell_type == CellType::Line) {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);

    const LineTransformation<1> T{x0, x1};

    const auto& quadrature = QuadratureManager::instance().getLineFormula(GaussLegendreQuadratureDescriptor{degree});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else {
    throw NotImplementedError("unexpected cell type: " + std::string{name(cell_type)});
  }
}

template <typename ConformTransformationT>
void _computeEjkBoundaryMean(const QuadratureFormula<1>& quadrature,
                             const ConformTransformationT& T,
                             const TinyVector<2>& Xj,
                             const double Vi,
                             const size_t degree,
                             const size_t dimension,
                             SmallArray<double>& inv_Vj_alpha_p_1_wq_X_prime_orth_ek,
                             SmallArray<double>& mean_of_ejk);

template <>
void
_computeEjkBoundaryMean(const QuadratureFormula<1>& quadrature,
                        const LineTransformation<2>& T,
                        const TinyVector<2>& Xj,
                        const double Vi,
                        const size_t degree,
                        const size_t dimension,
                        SmallArray<double>& inv_Vj_alpha_p_1_wq_X_prime_orth_ek,
                        SmallArray<double>& mean_of_ejk)
{
  using Rd = TinyVector<2>;

  const double velocity_perp_e1 = T.velocity()[1];

  for (size_t i_q = 0; i_q < quadrature.numberOfPoints(); ++i_q) {
    const double wq          = quadrature.weight(i_q);
    const TinyVector<1> xi_q = quadrature.point(i_q);

    const Rd X_Xj = T(xi_q) - Xj;

    const double x_xj = X_Xj[0];
    const double y_yj = X_Xj[1];

    {
      size_t k                                 = 0;
      inv_Vj_alpha_p_1_wq_X_prime_orth_ek[k++] = x_xj * wq * velocity_perp_e1 / Vi;
      for (; k <= degree; ++k) {
        inv_Vj_alpha_p_1_wq_X_prime_orth_ek[k] =
          x_xj * inv_Vj_alpha_p_1_wq_X_prime_orth_ek[k - 1] * ((1. * k) / (k + 1));
      }

      for (size_t i_y = 1; i_y <= degree; ++i_y) {
        const size_t begin_i_y_1 = ((i_y - 1) * (2 * degree - i_y + 4)) / 2;
        for (size_t l = 0; l <= degree - i_y; ++l) {
          inv_Vj_alpha_p_1_wq_X_prime_orth_ek[k++] = y_yj * inv_Vj_alpha_p_1_wq_X_prime_orth_ek[begin_i_y_1 + l];
        }
      }
    }

    for (size_t k = 1; k < dimension; ++k) {
      mean_of_ejk[k - 1] += inv_Vj_alpha_p_1_wq_X_prime_orth_ek[k];
    }
  }
}

template <MeshConcept MeshType>
void
_computeEjkMeanByBoundary(const MeshType& mesh,
                          const TinyVector<2>& Xj,
                          const CellId& cell_id,
                          const auto& cell_to_face_matrix,
                          const auto& face_to_node_matrix,
                          const auto& cell_face_is_reversed,
                          const CellValue<const double>& Vi,
                          const size_t degree,
                          const size_t basis_dimension,
                          SmallArray<double>& inv_Vj_alpha_p_1_wq_X_prime_orth_ek,
                          SmallArray<double>& mean_of_ejk)
{
  const auto& quadrature = QuadratureManager::instance().getLineFormula(GaussLegendreQuadratureDescriptor(degree + 1));

  auto xr = mesh.xr();
  mean_of_ejk.fill(0);
  auto cell_face_list = cell_to_face_matrix[cell_id];
  for (size_t i_face = 0; i_face < cell_face_list.size(); ++i_face) {
    bool is_reversed = cell_face_is_reversed[cell_id][i_face];

    const FaceId face_id = cell_face_list[i_face];
    auto face_node_list  = face_to_node_matrix[face_id];
    if (is_reversed) {
      const LineTransformation<2> T{xr[face_node_list[1]], xr[face_node_list[0]]};
      _computeEjkBoundaryMean(quadrature, T, Xj, Vi[cell_id], degree, basis_dimension,
                              inv_Vj_alpha_p_1_wq_X_prime_orth_ek, mean_of_ejk);
    } else {
      const LineTransformation<2> T{xr[face_node_list[0]], xr[face_node_list[1]]};
      _computeEjkBoundaryMean(quadrature, T, Xj, Vi[cell_id], degree, basis_dimension,
                              inv_Vj_alpha_p_1_wq_X_prime_orth_ek, mean_of_ejk);
    }
  }
}

void
_computeEjkMeanByBoundaryInSymmetricCell(const TinyVector<2>& origin,
                                         const TinyVector<2>& normal,
                                         const TinyVector<2>& Xj,
                                         const CellId& cell_id,
                                         const NodeValue<const TinyVector<2>>& xr,
                                         const auto& cell_to_face_matrix,
                                         const auto& face_to_node_matrix,
                                         const auto& cell_face_is_reversed,
                                         const CellValue<const double>& Vi,
                                         const size_t degree,
                                         const size_t basis_dimension,
                                         SmallArray<double>& inv_Vj_alpha_p_1_wq_X_prime_orth_ek,
                                         SmallArray<double>& mean_of_ejk)
{
  const auto& quadrature = QuadratureManager::instance().getLineFormula(GaussLegendreQuadratureDescriptor(degree + 1));

  mean_of_ejk.fill(0);
  auto cell_face_list = cell_to_face_matrix[cell_id];
  for (size_t i_face = 0; i_face < cell_face_list.size(); ++i_face) {
    bool is_reversed = cell_face_is_reversed[cell_id][i_face];

    const FaceId face_id = cell_face_list[i_face];
    auto face_node_list  = face_to_node_matrix[face_id];

    const auto x0 = symmetrize_coordinates(origin, normal, xr[face_node_list[1]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[face_node_list[0]]);

    if (is_reversed) {
      const LineTransformation<2> T{x1, x0};
      _computeEjkBoundaryMean(quadrature, T, Xj, Vi[cell_id], degree, basis_dimension,
                              inv_Vj_alpha_p_1_wq_X_prime_orth_ek, mean_of_ejk);
    } else {
      const LineTransformation<2> T{x0, x1};
      _computeEjkBoundaryMean(quadrature, T, Xj, Vi[cell_id], degree, basis_dimension,
                              inv_Vj_alpha_p_1_wq_X_prime_orth_ek, mean_of_ejk);
    }
  }
}

template <typename NodeListT>
void
_computeEjkMean(const TinyVector<2>& Xj,
                const NodeValue<const TinyVector<2>>& xr,
                const NodeListT& node_list,
                const CellType& cell_type,
                const double Vi,
                const size_t degree,
                const size_t basis_dimension,
                SmallArray<double>& inv_Vj_wq_detJ_ek,
                SmallArray<double>& mean_of_ejk)
{
  switch (cell_type) {
  case CellType::Triangle: {
    const TriangleTransformation<2> T{xr[node_list[0]], xr[node_list[1]], xr[node_list[2]]};
    const auto& quadrature = QuadratureManager::instance().getTriangleFormula(GaussQuadratureDescriptor{degree});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);
    break;
  }
  case CellType::Quadrangle: {
    const SquareTransformation<2> T{xr[node_list[0]], xr[node_list[1]], xr[node_list[2]], xr[node_list[3]]};
    const auto& quadrature =
      QuadratureManager::instance().getSquareFormula(GaussLegendreQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);
    break;
  }
  default: {
    throw NotImplementedError("unexpected cell type: " + std::string{name(cell_type)});
  }
  }
}

template <typename NodeListT>
void
_computeEjkMeanInSymmetricCell(const TinyVector<2>& origin,
                               const TinyVector<2>& normal,
                               const TinyVector<2>& Xj,
                               const NodeValue<const TinyVector<2>>& xr,
                               const NodeListT& node_list,
                               const CellType& cell_type,
                               const double Vi,
                               const size_t degree,
                               const size_t basis_dimension,
                               SmallArray<double>& inv_Vj_wq_detJ_ek,
                               SmallArray<double>& mean_of_ejk)
{
  switch (cell_type) {
  case CellType::Triangle: {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[2]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x2 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);

    const TriangleTransformation<2> T{x0, x1, x2};
    const auto& quadrature = QuadratureManager::instance().getTriangleFormula(GaussQuadratureDescriptor{degree});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);
    break;
  }
  case CellType::Quadrangle: {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[3]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[2]]);
    const auto x2 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x3 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);

    const SquareTransformation<2> T{x0, x1, x2, x3};
    const auto& quadrature =
      QuadratureManager::instance().getSquareFormula(GaussLegendreQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);
    break;
  }
  default: {
    throw NotImplementedError("unexpected cell type: " + std::string{name(cell_type)});
  }
  }
}

template <typename NodeListT>
void
_computeEjkMean(const TinyVector<3>& Xj,
                const NodeValue<const TinyVector<3>>& xr,
                const NodeListT& node_list,
                const CellType& cell_type,
                const double Vi,
                const size_t degree,
                const size_t basis_dimension,
                SmallArray<double>& inv_Vj_wq_detJ_ek,
                SmallArray<double>& mean_of_ejk)
{
  if (cell_type == CellType::Tetrahedron) {
    const TetrahedronTransformation T{xr[node_list[0]], xr[node_list[1]], xr[node_list[2]], xr[node_list[3]]};

    const auto& quadrature = QuadratureManager::instance().getTetrahedronFormula(GaussQuadratureDescriptor{degree});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else if (cell_type == CellType::Prism) {
    const PrismTransformation T{xr[node_list[0]], xr[node_list[1]], xr[node_list[2]],   //
                                xr[node_list[3]], xr[node_list[4]], xr[node_list[5]]};

    const auto& quadrature = QuadratureManager::instance().getPrismFormula(GaussQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else if (cell_type == CellType::Pyramid) {
    const PyramidTransformation T{xr[node_list[0]], xr[node_list[1]], xr[node_list[2]], xr[node_list[3]],
                                  xr[node_list[4]]};

    const auto& quadrature = QuadratureManager::instance().getPyramidFormula(GaussQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else if (cell_type == CellType::Hexahedron) {
    const CubeTransformation T{xr[node_list[0]], xr[node_list[1]], xr[node_list[2]], xr[node_list[3]],
                               xr[node_list[4]], xr[node_list[5]], xr[node_list[6]], xr[node_list[7]]};

    const auto& quadrature =
      QuadratureManager::instance().getCubeFormula(GaussLegendreQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else {
    throw NotImplementedError("unexpected cell type: " + std::string{name(cell_type)});
  }
}

template <typename NodeListT>
void
_computeEjkMeanInSymmetricCell(const TinyVector<3>& origin,
                               const TinyVector<3>& normal,
                               const TinyVector<3>& Xj,
                               const NodeValue<const TinyVector<3>>& xr,
                               const NodeListT& node_list,
                               const CellType& cell_type,
                               const double Vi,
                               const size_t degree,
                               const size_t basis_dimension,
                               SmallArray<double>& inv_Vj_wq_detJ_ek,
                               SmallArray<double>& mean_of_ejk)
{
  if (cell_type == CellType::Tetrahedron) {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);
    const auto x2 = symmetrize_coordinates(origin, normal, xr[node_list[2]]);
    const auto x3 = symmetrize_coordinates(origin, normal, xr[node_list[3]]);

    const TetrahedronTransformation T{x0, x1, x2, x3};

    const auto& quadrature = QuadratureManager::instance().getTetrahedronFormula(GaussQuadratureDescriptor{degree});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else if (cell_type == CellType::Prism) {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);
    const auto x2 = symmetrize_coordinates(origin, normal, xr[node_list[2]]);
    const auto x3 = symmetrize_coordinates(origin, normal, xr[node_list[4]]);
    const auto x4 = symmetrize_coordinates(origin, normal, xr[node_list[3]]);
    const auto x5 = symmetrize_coordinates(origin, normal, xr[node_list[5]]);

    const PrismTransformation T{x0, x1, x2,   //
                                x3, x4, x5};

    const auto& quadrature = QuadratureManager::instance().getPrismFormula(GaussQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else if (cell_type == CellType::Pyramid) {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[3]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[2]]);
    const auto x2 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x3 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);
    const auto x4 = symmetrize_coordinates(origin, normal, xr[node_list[4]]);
    const PyramidTransformation T{x0, x1, x2, x3, x4};

    const auto& quadrature = QuadratureManager::instance().getPyramidFormula(GaussQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else if (cell_type == CellType::Hexahedron) {
    const auto x0 = symmetrize_coordinates(origin, normal, xr[node_list[3]]);
    const auto x1 = symmetrize_coordinates(origin, normal, xr[node_list[2]]);
    const auto x2 = symmetrize_coordinates(origin, normal, xr[node_list[1]]);
    const auto x3 = symmetrize_coordinates(origin, normal, xr[node_list[0]]);
    const auto x4 = symmetrize_coordinates(origin, normal, xr[node_list[7]]);
    const auto x5 = symmetrize_coordinates(origin, normal, xr[node_list[6]]);
    const auto x6 = symmetrize_coordinates(origin, normal, xr[node_list[5]]);
    const auto x7 = symmetrize_coordinates(origin, normal, xr[node_list[4]]);

    const CubeTransformation T{x0, x1, x2, x3, x4, x5, x6, x7};

    const auto& quadrature =
      QuadratureManager::instance().getCubeFormula(GaussLegendreQuadratureDescriptor{degree + 1});

    _computeEjkMean(quadrature, T, Xj, Vi, degree, basis_dimension, inv_Vj_wq_detJ_ek, mean_of_ejk);

  } else {
    throw NotImplementedError("unexpected cell type: " + std::string{name(cell_type)});
  }
}

size_t
PolynomialReconstruction::_getNumberOfColumns(
  const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list) const
{
  size_t number_of_columns = 0;
  for (auto discrete_function_variant_p : discrete_function_variant_list) {
    number_of_columns += std::visit(
      [](auto&& discrete_function) -> size_t {
        using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
        if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
          using data_type = std::decay_t<typename DiscreteFunctionT::data_type>;
          if constexpr (std::is_arithmetic_v<data_type>) {
            return 1;
          } else if constexpr (is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>) {
            return data_type::Dimension;
          } else {
            // LCOV_EXCL_START
            throw UnexpectedError("unexpected data type " + demangle<data_type>());
            // LCOV_EXCL_STOP
          }
        } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
          using data_type = std::decay_t<typename DiscreteFunctionT::data_type>;
          if constexpr (std::is_arithmetic_v<data_type>) {
            return discrete_function.size();
          } else if constexpr (is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>) {
            return discrete_function.size() * data_type::Dimension;
          } else {
            // LCOV_EXCL_START
            throw UnexpectedError("unexpected data type " + demangle<data_type>());
            // LCOV_EXCL_STOP
          }
        } else {
          // LCOV_EXCL_START
          throw UnexpectedError("unexpected discrete function type");
          // LCOV_EXCL_STOP
        }
      },
      discrete_function_variant_p->discreteFunction());
  }
  return number_of_columns;
}

template <MeshConcept MeshType>
std::vector<PolynomialReconstruction::MutableDiscreteFunctionDPkVariant>
PolynomialReconstruction::_createMutableDiscreteFunctionDPKVariantList(
  const std::shared_ptr<const MeshType>& p_mesh,
  const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list) const
{
  std::vector<MutableDiscreteFunctionDPkVariant> mutable_discrete_function_dpk_variant_list;
  for (size_t i_discrete_function_variant = 0; i_discrete_function_variant < discrete_function_variant_list.size();
       ++i_discrete_function_variant) {
    auto discrete_function_variant = discrete_function_variant_list[i_discrete_function_variant];

    std::visit(
      [&](auto&& discrete_function) {
        using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
        if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
          using DataType = std::remove_const_t<std::decay_t<typename DiscreteFunctionT::data_type>>;
          mutable_discrete_function_dpk_variant_list.push_back(
            DiscreteFunctionDPk<MeshType::Dimension, DataType>(p_mesh, m_descriptor.degree()));
        } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
          using DataType = std::remove_const_t<std::decay_t<typename DiscreteFunctionT::data_type>>;
          mutable_discrete_function_dpk_variant_list.push_back(
            DiscreteFunctionDPkVector<MeshType::Dimension, DataType>(p_mesh, m_descriptor.degree(),
                                                                     discrete_function.size()));
        } else {
          // LCOV_EXCL_START
          throw UnexpectedError("unexpected discrete function type");
          // LCOV_EXCL_STOP
        }
      },
      discrete_function_variant->discreteFunction());
  }

  return mutable_discrete_function_dpk_variant_list;
}

template <MeshConcept MeshType>
void
PolynomialReconstruction::_checkDataAndSymmetriesCompatibility(
  const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list) const
{
  for (auto&& discrete_function_variant : discrete_function_variant_list) {
    std::visit(
      [&](auto&& discrete_function) {
        using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
        if constexpr (is_discrete_function_P0_v<DiscreteFunctionT> or
                      is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
          using DataType = std::decay_t<typename DiscreteFunctionT::data_type>;
          if constexpr (is_tiny_vector_v<DataType>) {
            if constexpr (DataType::Dimension != MeshType::Dimension) {
              std::stringstream error_msg;
              error_msg << "cannot symmetrize vectors of dimension " << DataType::Dimension
                        << " using a mesh of dimension " << MeshType::Dimension;
              throw NormalError(error_msg.str());
            }
          } else if constexpr (is_tiny_matrix_v<DataType>) {
            if constexpr ((DataType::NumberOfRows != MeshType::Dimension) or
                          (DataType::NumberOfColumns != MeshType::Dimension)) {
              std::stringstream error_msg;
              error_msg << "cannot symmetrize matrices of dimensions " << DataType::NumberOfRows << 'x'
                        << DataType::NumberOfColumns << " using a mesh of dimension " << MeshType::Dimension;
              throw NormalError(error_msg.str());
            }
          }
        } else {
          // LCOV_EXCL_START
          throw UnexpectedError("invalid discrete function type");
          // LCOV_EXCL_STOP
        }
      },
      discrete_function_variant->discreteFunction());
  }
}

template <MeshConcept MeshType>
[[nodiscard]] std::vector<std::shared_ptr<const DiscreteFunctionDPkVariant>>
PolynomialReconstruction::_build(
  const std::shared_ptr<const MeshType>& p_mesh,
  const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list) const
{
  const MeshType& mesh = *p_mesh;

  using Rd = TinyVector<MeshType::Dimension>;

  if (m_descriptor.symmetryBoundaryDescriptorList().size() > 0) {
    this->_checkDataAndSymmetriesCompatibility<MeshType>(discrete_function_variant_list);
  }

  const size_t number_of_columns = this->_getNumberOfColumns(discrete_function_variant_list);

  const size_t basis_dimension =
    DiscreteFunctionDPk<MeshType::Dimension, double>::BasisViewType::dimensionFromDegree(m_descriptor.degree());

  const auto& stencil_array =
    StencilManager::instance().getCellToCellStencilArray(mesh.connectivity(), m_descriptor.stencilDescriptor(),
                                                         m_descriptor.symmetryBoundaryDescriptorList());

  auto xr = mesh.xr();
  auto xj = MeshDataManager::instance().getMeshData(mesh).xj();
  auto Vj = MeshDataManager::instance().getMeshData(mesh).Vj();

  auto cell_is_owned       = mesh.connectivity().cellIsOwned();
  auto cell_type           = mesh.connectivity().cellType();
  auto cell_to_node_matrix = mesh.connectivity().cellToNodeMatrix();
  auto cell_to_face_matrix = mesh.connectivity().cellToFaceMatrix();

  auto full_stencil_size = [&](const CellId cell_id) {
    auto stencil_cell_list = stencil_array[cell_id];
    size_t stencil_size    = stencil_cell_list.size();
    for (size_t i = 0; i < m_descriptor.symmetryBoundaryDescriptorList().size(); ++i) {
      auto& ghost_stencil = stencil_array.symmetryBoundaryStencilArrayList()[i].stencilArray();
      stencil_size += ghost_stencil[cell_id].size();
    }

    return stencil_size;
  };

  const size_t max_stencil_size = [&]() {
    size_t max_size = 0;
    for (CellId cell_id = 0; cell_id < mesh.numberOfCells(); ++cell_id) {
      const size_t stencil_size = full_stencil_size(cell_id);
      if (cell_is_owned[cell_id] and stencil_size > max_size) {
        max_size = stencil_size;
      }
    }
    return max_size;
  }();

  SmallArray<const Rd> symmetry_normal_list = [&] {
    SmallArray<Rd> normal_list(m_descriptor.symmetryBoundaryDescriptorList().size());
    size_t i_symmetry_boundary = 0;
    for (auto p_boundary_descriptor : m_descriptor.symmetryBoundaryDescriptorList()) {
      const IBoundaryDescriptor& boundary_descriptor = *p_boundary_descriptor;

      auto symmetry_boundary             = getMeshFlatFaceBoundary(mesh, boundary_descriptor);
      normal_list[i_symmetry_boundary++] = symmetry_boundary.outgoingNormal();
    }
    return normal_list;
  }();

  SmallArray<const Rd> symmetry_origin_list = [&] {
    SmallArray<Rd> origin_list(m_descriptor.symmetryBoundaryDescriptorList().size());
    size_t i_symmetry_boundary = 0;
    for (auto p_boundary_descriptor : m_descriptor.symmetryBoundaryDescriptorList()) {
      const IBoundaryDescriptor& boundary_descriptor = *p_boundary_descriptor;

      auto symmetry_boundary             = getMeshFlatFaceBoundary(mesh, boundary_descriptor);
      origin_list[i_symmetry_boundary++] = symmetry_boundary.origin();
    }
    return origin_list;
  }();

  Kokkos::Experimental::UniqueToken<Kokkos::DefaultExecutionSpace::execution_space,
                                    Kokkos::Experimental::UniqueTokenScope::Global>
    tokens;

  auto mutable_discrete_function_dpk_variant_list =
    this->_createMutableDiscreteFunctionDPKVariantList(p_mesh, discrete_function_variant_list);

  SmallArray<SmallMatrix<double>> A_pool(Kokkos::DefaultExecutionSpace::concurrency());
  SmallArray<SmallMatrix<double>> B_pool(Kokkos::DefaultExecutionSpace::concurrency());
  SmallArray<SmallVector<double>> G_pool(Kokkos::DefaultExecutionSpace::concurrency());
  SmallArray<SmallMatrix<double>> X_pool(Kokkos::DefaultExecutionSpace::concurrency());

  SmallArray<SmallArray<double>> inv_Vj_wq_detJ_ek_pool(Kokkos::DefaultExecutionSpace::concurrency());
  SmallArray<SmallArray<double>> mean_j_of_ejk_pool(Kokkos::DefaultExecutionSpace::concurrency());
  SmallArray<SmallArray<double>> mean_i_of_ejk_pool(Kokkos::DefaultExecutionSpace::concurrency());

  SmallArray<SmallArray<double>> inv_Vj_alpha_p_1_wq_X_prime_orth_ek_pool(Kokkos::DefaultExecutionSpace::concurrency());
  SmallArray<SmallArray<double>> mean_l_of_ejk_pool(Kokkos::DefaultExecutionSpace::concurrency());

  for (size_t i = 0; i < A_pool.size(); ++i) {
    A_pool[i] = SmallMatrix<double>(max_stencil_size, basis_dimension - 1);
    B_pool[i] = SmallMatrix<double>(max_stencil_size, number_of_columns);
    G_pool[i] = SmallVector<double>(basis_dimension - 1);
    X_pool[i] = SmallMatrix<double>(basis_dimension - 1, number_of_columns);

    inv_Vj_wq_detJ_ek_pool[i] = SmallArray<double>(basis_dimension);
    mean_j_of_ejk_pool[i]     = SmallArray<double>(basis_dimension - 1);
    mean_i_of_ejk_pool[i]     = SmallArray<double>(basis_dimension - 1);

    inv_Vj_alpha_p_1_wq_X_prime_orth_ek_pool[i] = SmallArray<double>(basis_dimension);
  }

  parallel_for(
    mesh.numberOfCells(), PUGS_CLASS_LAMBDA(const CellId cell_j_id) {
      if (cell_is_owned[cell_j_id]) {
        const int32_t t = tokens.acquire();

        auto stencil_cell_list = stencil_array[cell_j_id];

        ShrinkMatrixView B(B_pool[t], full_stencil_size(cell_j_id));

        size_t column_begin = 0;
        for (size_t i_discrete_function_variant = 0;
             i_discrete_function_variant < discrete_function_variant_list.size(); ++i_discrete_function_variant) {
          const auto& discrete_function_variant = discrete_function_variant_list[i_discrete_function_variant];

          std::visit(
            [&](auto&& discrete_function) {
              using DiscreteFunctionT = std::decay_t<decltype(discrete_function)>;
              if constexpr (is_discrete_function_P0_v<DiscreteFunctionT>) {
                using DataType     = std::decay_t<typename DiscreteFunctionT::data_type>;
                const DataType& qj = discrete_function[cell_j_id];
                size_t index       = 0;
                for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
                  const CellId cell_i_id = stencil_cell_list[i];
                  const DataType& qi_qj  = discrete_function[cell_i_id] - qj;
                  if constexpr (std::is_arithmetic_v<DataType>) {
                    B(index, column_begin) = qi_qj;
                  } else if constexpr (is_tiny_vector_v<DataType>) {
                    for (size_t kB = column_begin, k = 0; k < DataType::Dimension; ++k, ++kB) {
                      B(index, kB) = qi_qj[k];
                    }
                  } else if constexpr (is_tiny_matrix_v<DataType>) {
                    for (size_t k = 0; k < DataType::NumberOfRows; ++k) {
                      for (size_t l = 0; l < DataType::NumberOfColumns; ++l) {
                        B(index, column_begin + k * DataType::NumberOfColumns + l) = qi_qj(k, l);
                      }
                    }
                  }
                }

                for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
                     ++i_symmetry) {
                  auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
                  auto ghost_cell_list = ghost_stencil[cell_j_id];
                  for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
                    const CellId cell_i_id = ghost_cell_list[i];
                    if constexpr (std::is_arithmetic_v<DataType>) {
                      const DataType& qi_qj  = discrete_function[cell_i_id] - qj;
                      B(index, column_begin) = qi_qj;
                    } else if constexpr (is_tiny_vector_v<DataType>) {
                      if constexpr (DataType::Dimension == MeshType::Dimension) {
                        const Rd& normal = symmetry_normal_list[i_symmetry];

                        const DataType& qi    = discrete_function[cell_i_id];
                        const DataType& qi_qj = symmetrize_vector(normal, qi) - qj;
                        for (size_t kB = column_begin, k = 0; k < DataType::Dimension; ++k, ++kB) {
                          B(index, kB) = qi_qj[k];
                        }
                      } else {
                        // LCOV_EXCL_START
                        std::stringstream error_msg;
                        error_msg << "cannot symmetrize vectors of dimension " << DataType::Dimension
                                  << " using a mesh of dimension " << MeshType::Dimension;
                        throw UnexpectedError(error_msg.str());
                        // LCOV_EXCL_STOP
                      }
                    } else if constexpr (is_tiny_matrix_v<DataType>) {
                      if constexpr ((DataType::NumberOfColumns == DataType::NumberOfRows) and
                                    (DataType::NumberOfColumns == MeshType::Dimension)) {
                        const Rd& normal = symmetry_normal_list[i_symmetry];

                        const DataType& qi    = discrete_function[cell_i_id];
                        const DataType& qi_qj = symmetrize_matrix(normal, qi) - qj;
                        for (size_t k = 0; k < DataType::NumberOfRows; ++k) {
                          for (size_t l = 0; l < DataType::NumberOfColumns; ++l) {
                            B(index, column_begin + k * DataType::NumberOfColumns + l) = qi_qj(k, l);
                          }
                        }
                      } else {
                        // LCOV_EXCL_START
                        std::stringstream error_msg;
                        error_msg << "cannot symmetrize matrices of dimensions " << DataType::NumberOfRows << 'x'
                                  << DataType::NumberOfColumns << " using a mesh of dimension " << MeshType::Dimension;
                        throw UnexpectedError(error_msg.str());
                        // LCOV_EXCL_STOP
                      }
                    }
                  }
                }

                if constexpr (std::is_arithmetic_v<DataType>) {
                  ++column_begin;
                } else if constexpr (is_tiny_vector_v<DataType> or is_tiny_matrix_v<DataType>) {
                  column_begin += DataType::Dimension;
                }
              } else if constexpr (is_discrete_function_P0_vector_v<DiscreteFunctionT>) {
                using DataType = std::decay_t<typename DiscreteFunctionT::data_type>;

                const auto qj_vector = discrete_function[cell_j_id];

                if constexpr (std::is_arithmetic_v<DataType>) {
                  size_t index = 0;
                  for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
                    const CellId cell_i_id = stencil_cell_list[i];
                    for (size_t l = 0; l < qj_vector.size(); ++l) {
                      const DataType& qj         = qj_vector[l];
                      const DataType& qi_qj      = discrete_function[cell_i_id][l] - qj;
                      B(index, column_begin + l) = qi_qj;
                    }
                  }

                  for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
                       ++i_symmetry) {
                    auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
                    auto ghost_cell_list = ghost_stencil[cell_j_id];
                    for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
                      const CellId cell_i_id = ghost_cell_list[i];
                      for (size_t l = 0; l < qj_vector.size(); ++l) {
                        const DataType& qj         = qj_vector[l];
                        const DataType& qi_qj      = discrete_function[cell_i_id][l] - qj;
                        B(index, column_begin + l) = qi_qj;
                      }
                    }
                  }
                } else if constexpr (is_tiny_vector_v<DataType>) {
                  size_t index = 0;
                  for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
                    const CellId cell_i_id = stencil_cell_list[i];
                    for (size_t l = 0; l < qj_vector.size(); ++l) {
                      const DataType& qj    = qj_vector[l];
                      const DataType& qi_qj = discrete_function[cell_i_id][l] - qj;
                      for (size_t kB = column_begin + l * DataType::Dimension, k = 0; k < DataType::Dimension;
                           ++k, ++kB) {
                        B(index, kB) = qi_qj[k];
                      }
                    }
                  }

                  for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
                       ++i_symmetry) {
                    if constexpr (DataType::Dimension == MeshType::Dimension) {
                      auto& ghost_stencil = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
                      auto ghost_cell_list = ghost_stencil[cell_j_id];

                      const Rd& normal = symmetry_normal_list[i_symmetry];

                      for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
                        const CellId cell_i_id = ghost_cell_list[i];

                        for (size_t l = 0; l < qj_vector.size(); ++l) {
                          const DataType& qj    = qj_vector[l];
                          const DataType& qi    = discrete_function[cell_i_id][l];
                          const DataType& qi_qj = symmetrize_vector(normal, qi) - qj;
                          for (size_t kB = column_begin + l * DataType::Dimension, k = 0; k < DataType::Dimension;
                               ++k, ++kB) {
                            B(index, kB) = qi_qj[k];
                          }
                        }
                      }
                    } else {
                      // LCOV_EXCL_START
                      std::stringstream error_msg;
                      error_msg << "cannot symmetrize vectors of dimension " << DataType::Dimension
                                << " using a mesh of dimension " << MeshType::Dimension;
                      throw UnexpectedError(error_msg.str());
                      // LCOV_EXCL_STOP
                    }
                  }
                } else if constexpr (is_tiny_matrix_v<DataType>) {
                  for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
                       ++i_symmetry) {
                    auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
                    auto ghost_cell_list = ghost_stencil[cell_j_id];
                    if (ghost_cell_list.size() > 0) {
                      throw NotImplementedError("TinyMatrix symmetries for reconstruction of DiscreteFunctionP0Vector");
                    }
                  }
                }

                if constexpr (std::is_arithmetic_v<DataType>) {
                  column_begin += qj_vector.size();
                } else if constexpr (is_tiny_vector_v<DataType> or is_tiny_matrix_v<DataType>) {
                  column_begin += qj_vector.size() * DataType::Dimension;
                }

              } else {
                // LCOV_EXCL_START
                throw UnexpectedError("invalid discrete function type");
                // LCOV_EXCL_STOP
              }
            },
            discrete_function_variant->discreteFunction());
        }

        ShrinkMatrixView A(A_pool[t], full_stencil_size(cell_j_id));

        if ((m_descriptor.degree() == 1) and
            (m_descriptor.integrationMethodType() == IntegrationMethodType::cell_center)) {
          const Rd& Xj = xj[cell_j_id];
          size_t index = 0;
          for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
            const CellId cell_i_id = stencil_cell_list[i];
            const Rd Xi_Xj         = xj[cell_i_id] - Xj;
            for (size_t l = 0; l < basis_dimension - 1; ++l) {
              A(index, l) = Xi_Xj[l];
            }
          }
          for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
               ++i_symmetry) {
            auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
            auto ghost_cell_list = ghost_stencil[cell_j_id];

            const Rd& origin = symmetry_origin_list[i_symmetry];
            const Rd& normal = symmetry_normal_list[i_symmetry];

            for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
              const CellId cell_i_id = ghost_cell_list[i];
              const Rd Xi_Xj         = symmetrize_coordinates(origin, normal, xj[cell_i_id]) - Xj;
              for (size_t l = 0; l < basis_dimension - 1; ++l) {
                A(index, l) = Xi_Xj[l];
              }
            }
          }

        } else if ((m_descriptor.integrationMethodType() == IntegrationMethodType::element) or
                   (m_descriptor.integrationMethodType() == IntegrationMethodType::boundary)) {
          if ((m_descriptor.integrationMethodType() == IntegrationMethodType::boundary) and
              (MeshType::Dimension == 2)) {
            if constexpr (MeshType::Dimension == 2) {
              SmallArray<double>& inv_Vj_alpha_p_1_wq_X_prime_orth_ek = inv_Vj_alpha_p_1_wq_X_prime_orth_ek_pool[t];
              SmallArray<double>& mean_j_of_ejk                       = mean_j_of_ejk_pool[t];
              SmallArray<double>& mean_i_of_ejk                       = mean_i_of_ejk_pool[t];

              auto face_to_node_matrix   = p_mesh->connectivity().faceToNodeMatrix();
              auto cell_face_is_reversed = p_mesh->connectivity().cellFaceIsReversed();
              const Rd& Xj               = xj[cell_j_id];

              _computeEjkMeanByBoundary(*p_mesh, Xj, cell_j_id, cell_to_face_matrix, face_to_node_matrix,
                                        cell_face_is_reversed, Vj, m_descriptor.degree(), basis_dimension,
                                        inv_Vj_alpha_p_1_wq_X_prime_orth_ek, mean_j_of_ejk);

              size_t index = 0;
              for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
                const CellId cell_i_id = stencil_cell_list[i];

                _computeEjkMeanByBoundary(*p_mesh, Xj, cell_i_id, cell_to_face_matrix, face_to_node_matrix,
                                          cell_face_is_reversed, Vj, m_descriptor.degree(), basis_dimension,
                                          inv_Vj_alpha_p_1_wq_X_prime_orth_ek, mean_i_of_ejk);

                for (size_t l = 0; l < basis_dimension - 1; ++l) {
                  A(index, l) = mean_i_of_ejk[l] - mean_j_of_ejk[l];
                }
              }
              for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
                   ++i_symmetry) {
                auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
                auto ghost_cell_list = ghost_stencil[cell_j_id];

                const Rd& origin = symmetry_origin_list[i_symmetry];
                const Rd& normal = symmetry_normal_list[i_symmetry];

                for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
                  const CellId cell_i_id = ghost_cell_list[i];

                  _computeEjkMeanByBoundaryInSymmetricCell(origin, normal,   //
                                                           Xj, cell_i_id, xr, cell_to_face_matrix, face_to_node_matrix,
                                                           cell_face_is_reversed, Vj, m_descriptor.degree(),
                                                           basis_dimension, inv_Vj_alpha_p_1_wq_X_prime_orth_ek,
                                                           mean_i_of_ejk);

                  for (size_t l = 0; l < basis_dimension - 1; ++l) {
                    A(index, l) = mean_i_of_ejk[l] - mean_j_of_ejk[l];
                  }
                }
              }
            } else {
              throw UnexpectedError("invalid mesh dimension");
            }
          } else {
            SmallArray<double>& inv_Vj_wq_detJ_ek = inv_Vj_wq_detJ_ek_pool[t];
            SmallArray<double>& mean_j_of_ejk     = mean_j_of_ejk_pool[t];
            SmallArray<double>& mean_i_of_ejk     = mean_i_of_ejk_pool[t];

            const Rd& Xj = xj[cell_j_id];

            _computeEjkMean(Xj, xr, cell_to_node_matrix[cell_j_id], cell_type[cell_j_id], Vj[cell_j_id],
                            m_descriptor.degree(), basis_dimension, inv_Vj_wq_detJ_ek, mean_j_of_ejk);

            size_t index = 0;
            for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
              const CellId cell_i_id = stencil_cell_list[i];

              _computeEjkMean(Xj, xr, cell_to_node_matrix[cell_i_id], cell_type[cell_i_id], Vj[cell_i_id],
                              m_descriptor.degree(), basis_dimension, inv_Vj_wq_detJ_ek, mean_i_of_ejk);

              for (size_t l = 0; l < basis_dimension - 1; ++l) {
                A(index, l) = mean_i_of_ejk[l] - mean_j_of_ejk[l];
              }
            }
            for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
                 ++i_symmetry) {
              auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
              auto ghost_cell_list = ghost_stencil[cell_j_id];

              const Rd& origin = symmetry_origin_list[i_symmetry];
              const Rd& normal = symmetry_normal_list[i_symmetry];

              for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
                const CellId cell_i_id = ghost_cell_list[i];

                _computeEjkMeanInSymmetricCell(origin, normal,   //
                                               Xj, xr, cell_to_node_matrix[cell_i_id], cell_type[cell_i_id],
                                               Vj[cell_i_id], m_descriptor.degree(), basis_dimension, inv_Vj_wq_detJ_ek,
                                               mean_i_of_ejk);

                for (size_t l = 0; l < basis_dimension - 1; ++l) {
                  A(index, l) = mean_i_of_ejk[l] - mean_j_of_ejk[l];
                }
              }
            }
          }
        } else {
          throw UnexpectedError("invalid integration strategy");
        }

        if (m_descriptor.rowWeighting()) {
          // Add row weighting (give more importance to cells that are
          // closer to j)
          const Rd& Xj = xj[cell_j_id];

          size_t index = 0;
          for (size_t i = 0; i < stencil_cell_list.size(); ++i, ++index) {
            const CellId cell_i_id = stencil_cell_list[i];
            const double weight    = 1. / l2Norm(xj[cell_i_id] - Xj);
            for (size_t l = 0; l < basis_dimension - 1; ++l) {
              A(index, l) *= weight;
            }
            for (size_t l = 0; l < number_of_columns; ++l) {
              B(index, l) *= weight;
            }
          }
          for (size_t i_symmetry = 0; i_symmetry < stencil_array.symmetryBoundaryStencilArrayList().size();
               ++i_symmetry) {
            auto& ghost_stencil  = stencil_array.symmetryBoundaryStencilArrayList()[i_symmetry].stencilArray();
            auto ghost_cell_list = ghost_stencil[cell_j_id];

            const Rd& origin = symmetry_origin_list[i_symmetry];
            const Rd& normal = symmetry_normal_list[i_symmetry];

            for (size_t i = 0; i < ghost_cell_list.size(); ++i, ++index) {
              const CellId cell_i_id = ghost_cell_list[i];
              const double weight    = 1. / l2Norm(symmetrize_coordinates(origin, normal, xj[cell_i_id]) - Xj);
              for (size_t l = 0; l < basis_dimension - 1; ++l) {
                A(index, l) *= weight;
              }
              for (size_t l = 0; l < number_of_columns; ++l) {
                B(index, l) *= weight;
              }
            }
          }
        }

        const SmallMatrix<double>& X = X_pool[t];

        if (m_descriptor.preconditioning()) {
          // Add column  weighting preconditioning (increase the presition)
          SmallVector<double>& G = G_pool[t];

          for (size_t l = 0; l < A.numberOfColumns(); ++l) {
            double g = 0;
            for (size_t i = 0; i < A.numberOfRows(); ++i) {
              const double Ail = A(i, l);

              g += Ail * Ail;
            }
            G[l] = std::sqrt(g);
          }

          for (size_t l = 0; l < A.numberOfColumns(); ++l) {
            const double Gl = G[l];
            for (size_t i = 0; i < A.numberOfRows(); ++i) {
              A(i, l) *= Gl;
            }
          }

          Givens::solveCollectionInPlace(A, X, B);

          for (size_t l = 0; l < X.numberOfRows(); ++l) {
            const double Gl = G[l];
            for (size_t i = 0; i < X.numberOfColumns(); ++i) {
              X(l, i) *= Gl;
            }
          }
        } else {
          Givens::solveCollectionInPlace(A, X, B);
        }

        column_begin = 0;
        for (size_t i_dpk_variant = 0; i_dpk_variant < mutable_discrete_function_dpk_variant_list.size();
             ++i_dpk_variant) {
          const auto& dpk_variant = mutable_discrete_function_dpk_variant_list[i_dpk_variant];

          const auto& discrete_function_variant = discrete_function_variant_list[i_dpk_variant];

          std::visit(
            [&](auto&& dpk_function, auto&& p0_function) {
              using DPkFunctionT = std::decay_t<decltype(dpk_function)>;
              using P0FunctionT  = std::decay_t<decltype(p0_function)>;
              using DataType     = std::remove_const_t<std::decay_t<typename DPkFunctionT::data_type>>;
              using P0DataType   = std::remove_const_t<std::decay_t<typename P0FunctionT::data_type>>;

              if constexpr (std::is_same_v<DataType, P0DataType>) {
                if constexpr (is_discrete_function_P0_v<P0FunctionT>) {
                  if constexpr (is_discrete_function_dpk_scalar_v<DPkFunctionT>) {
                    auto dpk_j = dpk_function.coefficients(cell_j_id);
                    dpk_j[0]   = p0_function[cell_j_id];

                    if constexpr (std::is_arithmetic_v<DataType>) {
                      if (m_descriptor.degree() > 1) {
                        auto& mean_j_of_ejk = mean_j_of_ejk_pool[t];
                        for (size_t i = 0; i < basis_dimension - 1; ++i) {
                          dpk_j[0] -= X(i, column_begin) * mean_j_of_ejk[i];
                        }
                      }

                      for (size_t i = 0; i < basis_dimension - 1; ++i) {
                        auto& dpk_j_ip1 = dpk_j[i + 1];
                        dpk_j_ip1       = X(i, column_begin);
                      }
                      ++column_begin;
                    } else if constexpr (is_tiny_vector_v<DataType>) {
                      if (m_descriptor.degree() > 1) {
                        auto& mean_j_of_ejk = mean_j_of_ejk_pool[t];
                        for (size_t i = 0; i < basis_dimension - 1; ++i) {
                          auto& dpk_j_0 = dpk_j[0];
                          for (size_t k = 0; k < DataType::Dimension; ++k) {
                            dpk_j_0[k] -= X(i, column_begin + k) * mean_j_of_ejk[i];
                          }
                        }
                      }

                      for (size_t i = 0; i < basis_dimension - 1; ++i) {
                        auto& dpk_j_ip1 = dpk_j[i + 1];
                        for (size_t k = 0; k < DataType::Dimension; ++k) {
                          dpk_j_ip1[k] = X(i, column_begin + k);
                        }
                      }
                      column_begin += DataType::Dimension;
                    } else if constexpr (is_tiny_matrix_v<DataType>) {
                      if (m_descriptor.degree() > 1) {
                        auto& mean_j_of_ejk = mean_j_of_ejk_pool[t];
                        for (size_t i = 0; i < basis_dimension - 1; ++i) {
                          auto& dpk_j_0 = dpk_j[0];
                          for (size_t k = 0; k < DataType::NumberOfRows; ++k) {
                            for (size_t l = 0; l < DataType::NumberOfColumns; ++l) {
                              dpk_j_0(k, l) -=
                                X(i, column_begin + k * DataType::NumberOfColumns + l) * mean_j_of_ejk[i];
                            }
                          }
                        }
                      }

                      for (size_t i = 0; i < basis_dimension - 1; ++i) {
                        auto& dpk_j_ip1 = dpk_j[i + 1];
                        for (size_t k = 0; k < DataType::NumberOfRows; ++k) {
                          for (size_t l = 0; l < DataType::NumberOfColumns; ++l) {
                            dpk_j_ip1(k, l) = X(i, column_begin + k * DataType::NumberOfColumns + l);
                          }
                        }
                      }
                      column_begin += DataType::Dimension;
                    } else {
                      // LCOV_EXCL_START
                      throw UnexpectedError("unexpected data type");
                      // LCOV_EXCL_STOP
                    }
                  } else {
                    // LCOV_EXCL_START
                    throw UnexpectedError("unexpected discrete dpk function type");
                    // LCOV_EXCL_STOP
                  }
                } else if constexpr (is_discrete_function_P0_vector_v<P0FunctionT>) {
                  if constexpr (is_discrete_function_dpk_vector_v<DPkFunctionT>) {
                    auto dpk_j        = dpk_function.coefficients(cell_j_id);
                    auto cell_vector  = p0_function[cell_j_id];
                    const size_t size = basis_dimension;

                    for (size_t l = 0; l < cell_vector.size(); ++l) {
                      const size_t component_begin = l * size;
                      dpk_j[component_begin]       = cell_vector[l];
                      if constexpr (std::is_arithmetic_v<DataType>) {
                        if (m_descriptor.degree() > 1) {
                          auto& mean_j_of_ejk = mean_j_of_ejk_pool[t];
                          for (size_t i = 0; i < basis_dimension - 1; ++i) {
                            dpk_j[component_begin] -= X(i, column_begin) * mean_j_of_ejk[i];
                          }
                        }

                        for (size_t i = 0; i < basis_dimension - 1; ++i) {
                          auto& dpk_j_ip1 = dpk_j[component_begin + i + 1];
                          dpk_j_ip1       = X(i, column_begin);
                        }
                        ++column_begin;
                      } else if constexpr (is_tiny_vector_v<DataType>) {
                        if (m_descriptor.degree() > 1) {
                          auto& mean_j_of_ejk = mean_j_of_ejk_pool[t];
                          for (size_t i = 0; i < basis_dimension - 1; ++i) {
                            auto& dpk_j_0 = dpk_j[0];
                            for (size_t k = 0; k < DataType::Dimension; ++k) {
                              dpk_j_0[k] -= X(i, column_begin + k) * mean_j_of_ejk[i];
                            }
                          }
                        }

                        for (size_t i = 0; i < basis_dimension - 1; ++i) {
                          auto& dpk_j_ip1 = dpk_j[component_begin + i + 1];
                          for (size_t k = 0; k < DataType::Dimension; ++k) {
                            dpk_j_ip1[k] = X(i, column_begin + k);
                          }
                        }
                        column_begin += DataType::Dimension;
                      } else {
                        // LCOV_EXCL_START
                        throw UnexpectedError("unexpected data type");
                        // LCOV_EXCL_STOP
                      }
                    }
                  } else {
                    // LCOV_EXCL_START
                    throw UnexpectedError("unexpected discrete dpk function type");
                    // LCOV_EXCL_STOP
                  }
                } else {
                  // LCOV_EXCL_START
                  throw UnexpectedError("unexpected discrete function type");
                  // LCOV_EXCL_STOP
                }
              } else {
                // LCOV_EXCL_START
                throw UnexpectedError("incompatible data types");
                // LCOV_EXCL_STOP
              }
            },
            dpk_variant.mutableDiscreteFunctionDPk(), discrete_function_variant->discreteFunction());
        }

        tokens.release(t);
      }
    });

  std::vector<std::shared_ptr<const DiscreteFunctionDPkVariant>> discrete_function_dpk_variant_list;

  for (auto discrete_function_dpk_variant_p : mutable_discrete_function_dpk_variant_list) {
    std::visit(
      [&](auto&& mutable_function_dpk) {
        synchronize(mutable_function_dpk.cellArrays());
        discrete_function_dpk_variant_list.push_back(
          std::make_shared<DiscreteFunctionDPkVariant>(mutable_function_dpk));
      },
      discrete_function_dpk_variant_p.mutableDiscreteFunctionDPk());
  }

  return discrete_function_dpk_variant_list;
}

std::vector<std::shared_ptr<const DiscreteFunctionDPkVariant>>
PolynomialReconstruction::build(
  const std::vector<std::shared_ptr<const DiscreteFunctionVariant>>& discrete_function_variant_list) const
{
  if (not hasSameMesh(discrete_function_variant_list)) {
    throw NormalError("cannot reconstruct functions living of different meshes simultaneously");
  }

  auto mesh_v = getCommonMesh(discrete_function_variant_list);

  return std::visit([&](auto&& p_mesh) { return this->_build(p_mesh, discrete_function_variant_list); },
                    mesh_v->variant());
}