diff --git a/CMakeLists.txt b/CMakeLists.txt index f249c9025ec9119d11afa4554be4d8a8a417164d..ab43ba3dd8bd0b0c0073f614c3e3c1636740d8da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -741,6 +741,12 @@ else() message(" gnuplot: not found!") endif() +if (GMSH) + message(" gmsh: ${GMSH}") +else() + message(" gmsh: not found!") +endif() + if (PYGMENTIZE) message(" pygmentize: ${PYGMENTIZE}") else() @@ -753,7 +759,7 @@ else() message(" pdflatex: not found!") endif() -if (NOT EMACS OR NOT GNUPLOT_FOUND) +if (NOT EMACS OR NOT GNUPLOT_FOUND OR NOT GMSH) message(" ** Cannot build documentation: missing ") elseif(NOT LATEX_PDFLATEX_FOUND OR NOT PYGMENTIZE) message(" ** Cannot build pdf documentation: missing") @@ -764,6 +770,9 @@ endif() if (NOT GNUPLOT_FOUND) message(" - gnuplot") endif() +if (NOT GMSH) + message(" - gmsh") +endif() if (NOT LATEX_PDFLATEX_FOUND) message(" - pdflatex") endif() diff --git a/README.md b/README.md index 62d6b43773cb0164a29e8a3716a818c5ec219c7d..36e460a851a53d40b4ab442460594091fa22b20f 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ apt install slepc-dev ### User documentation -To build documentation one requires `emacs` and `gnuplot`, +To build documentation one requires `emacs`, `gmsh` and `gnuplot`, additionally since examples results are generated, the documentation can only be produced after the compilation of `pugs` itself. @@ -94,6 +94,10 @@ To install `emacs` on Debian-like systems ```shell apt install emacs ``` +To install `gmsh` on Debian-like systems +```shell +apt install gmsh +``` To install `gnuplot` one can either use ```shell apt install gnuplot-nox @@ -106,6 +110,7 @@ apt install gnuplot-x11 > When building the documentation for the first time, a local `emacs` > configuration is generated. *This requires an internet connection.* + These packages are enough to build the html documentation. To build the pdf documentation one requires a few more packages: `pdflatex` (actually a fresh texlive installation is probably necessary) and `pygmentize` diff --git a/cmake/PugsDoc.cmake b/cmake/PugsDoc.cmake index 1f8701ccc20f6416b529908f3893d971e86c7452..b067a5cc828ec0c245e8d51c0bda46a96dcce702 100644 --- a/cmake/PugsDoc.cmake +++ b/cmake/PugsDoc.cmake @@ -12,10 +12,13 @@ find_program(PYGMENTIZE pygmentize) # check for gnuplot find_package(Gnuplot) +# check for gmsh +find_program(GMSH NAMES gmsh) + add_custom_target(userdoc) add_custom_target(doc DEPENDS userdoc) -if (EMACS AND GNUPLOT_FOUND) +if (EMACS AND GNUPLOT_FOUND AND GMSH) add_custom_command( OUTPUT "${PUGS_BINARY_DIR}/doc" @@ -129,6 +132,13 @@ else() COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --red --bold "gnuplot missing") add_dependencies(userdoc userdoc-missing-gnuplot) endif() + + if (NOT GMSH) + add_custom_target(userdoc-missing-gmsh + COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --no-newline "Cannot build documentation: " + COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --red --bold "gmsh missing") + add_dependencies(userdoc userdoc-missing-gmsh) + endif() endif() add_dependencies(doc userdoc) diff --git a/src/language/utils/OFStream.cpp b/src/language/utils/OFStream.cpp index 39b7cc8ecedee3ff76e3d6d54790d25f90d01569..ca7c77fad8a69fd79ae2b512635acbb7f03277fd 100644 --- a/src/language/utils/OFStream.cpp +++ b/src/language/utils/OFStream.cpp @@ -1,10 +1,12 @@ #include <language/utils/OFStream.hpp> +#include <utils/Filesystem.hpp> #include <utils/Messenger.hpp> OFStream::OFStream(const std::string& filename) { if (parallel::rank() == 0) { + createDirectoryIfNeeded(filename); m_fstream.open(filename); if (m_fstream.is_open()) { m_ostream = &m_fstream; diff --git a/src/mesh/ItemArrayUtils.hpp b/src/mesh/ItemArrayUtils.hpp index b1c3ba8cdad569f73f91029c24fb1333e05037ee..1bb8d929f12a2f834535a70166e43a130fb73e40 100644 --- a/src/mesh/ItemArrayUtils.hpp +++ b/src/mesh/ItemArrayUtils.hpp @@ -5,6 +5,7 @@ #include <mesh/Connectivity.hpp> #include <mesh/ItemArray.hpp> +#include <mesh/ItemValueUtils.hpp> #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> @@ -12,7 +13,7 @@ template <typename DataType, ItemType item_type, typename ConnectivityPtr> std::remove_const_t<DataType> -min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) +min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_array) { using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; using ItemIsOwnedType = ItemValue<const bool, item_type>; @@ -25,7 +26,7 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) class ItemArrayMin { private: - const ItemArrayType& m_item_value; + const ItemArrayType& m_item_array; const ItemIsOwnedType m_is_owned; public: @@ -33,7 +34,7 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator data_type() { data_type reduced_value; - parallel_reduce(m_item_value.numberOfItems(), *this, reduced_value); + parallel_reduce(m_item_array.numberOfItems(), *this, reduced_value); return reduced_value; } @@ -42,8 +43,8 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator()(const index_type& i_item, data_type& data) const { if (m_is_owned[i_item]) { - auto array = m_item_value[i_item]; - for (size_t i = 0; i < m_item_value.sizeOfArrays(); ++i) { + auto array = m_item_array[i_item]; + for (size_t i = 0; i < m_item_array.sizeOfArrays(); ++i) { if (array[i] < data) { data = array[i]; } @@ -69,8 +70,8 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } PUGS_INLINE - ItemArrayMin(const ItemArrayType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { + ItemArrayMin(const ItemArrayType& item_array) + : m_item_array(item_array), m_is_owned([&](const IConnectivity& connectivity) { Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), "unexpected connectivity dimension"); @@ -96,7 +97,7 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } // LCOV_EXCL_STOP } - }(*item_value.connectivity_ptr())) + }(*item_array.connectivity_ptr())) { ; } @@ -105,13 +106,13 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) ~ItemArrayMin() = default; }; - const DataType local_min = ItemArrayMin{item_value}; + const DataType local_min = ItemArrayMin{item_array}; return parallel::allReduceMin(local_min); } template <typename DataType, ItemType item_type, typename ConnectivityPtr> std::remove_const_t<DataType> -max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) +max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_array) { using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; using ItemIsOwnedType = ItemValue<const bool, item_type>; @@ -124,7 +125,7 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) class ItemArrayMax { private: - const ItemArrayType& m_item_value; + const ItemArrayType& m_item_array; const ItemIsOwnedType m_is_owned; public: @@ -132,7 +133,7 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator data_type() { data_type reduced_value; - parallel_reduce(m_item_value.numberOfItems(), *this, reduced_value); + parallel_reduce(m_item_array.numberOfItems(), *this, reduced_value); return reduced_value; } @@ -141,8 +142,8 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator()(const index_type& i_item, data_type& data) const { if (m_is_owned[i_item]) { - auto array = m_item_value[i_item]; - for (size_t i = 0; i < m_item_value.sizeOfArrays(); ++i) { + auto array = m_item_array[i_item]; + for (size_t i = 0; i < m_item_array.sizeOfArrays(); ++i) { if (array[i] > data) { data = array[i]; } @@ -168,8 +169,8 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } PUGS_INLINE - ItemArrayMax(const ItemArrayType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { + ItemArrayMax(const ItemArrayType& item_array) + : m_item_array(item_array), m_is_owned([&](const IConnectivity& connectivity) { Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), "unexpected connectivity dimension"); @@ -195,7 +196,7 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } // LCOV_EXCL_STOP } - }(*item_value.connectivity_ptr())) + }(*item_array.connectivity_ptr())) { ; } @@ -204,106 +205,34 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) ~ItemArrayMax() = default; }; - const DataType local_max = ItemArrayMax{item_value}; + const DataType local_max = ItemArrayMax{item_array}; return parallel::allReduceMax(local_max); } template <typename DataType, ItemType item_type, typename ConnectivityPtr> std::remove_const_t<DataType> -sum(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) +sum(const ItemArray<DataType, item_type, ConnectivityPtr>& item_array) { - using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; - using ItemIsOwnedType = ItemValue<const bool, item_type>; - using data_type = std::remove_const_t<typename ItemArrayType::data_type>; - using index_type = typename ItemArrayType::index_type; + using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; + using data_type = std::remove_const_t<typename ItemArrayType::data_type>; + using index_type = typename ItemArrayType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); - class ItemArraySum - { - private: - const ItemArrayType& m_item_value; - const ItemIsOwnedType m_is_owned; - - public: - PUGS_INLINE - operator data_type() - { - data_type reduced_value; - parallel_reduce(m_item_value.numberOfItems(), *this, reduced_value); - return reduced_value; - } - - PUGS_INLINE - void - operator()(const index_type& i_item, data_type& data) const - { - if (m_is_owned[i_item]) { - auto array = m_item_value[i_item]; - for (size_t i = 0; i < m_item_value.sizeOfArrays(); ++i) { - data += array[i]; - } - } - } + ItemValue<data_type, item_type> item_sum{*item_array.connectivity_ptr()}; - PUGS_INLINE - void - join(data_type& dst, const data_type& src) const - { - dst += src; - } + parallel_for( + item_array.numberOfItems(), PUGS_LAMBDA(index_type item_id) { + auto row = item_array[item_id]; + data_type row_sum = row[0]; - PUGS_INLINE - void - init(data_type& value) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - value = 0; - } else { - static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); - value = zero; + for (size_t i = 1; i < row.size(); ++i) { + row_sum += row[i]; } - } - - PUGS_INLINE - ItemArraySum(const ItemArrayType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_value.connectivity_ptr())) - { - ; - } - - PUGS_INLINE - ~ItemArraySum() = default; - }; + item_sum[item_id] = row_sum; + }); - const DataType local_sum = ItemArraySum{item_value}; - return parallel::allReduceSum(local_sum); + return sum(item_sum); } template <typename DataType, ItemType item_type, typename ConnectivityPtr> diff --git a/src/mesh/ItemValueUtils.hpp b/src/mesh/ItemValueUtils.hpp index 9ef745b550e42add842ac1c822cf65b07e34dc2b..e64f2e1bdc0bc1e934aac22258461947c1c0ffd3 100644 --- a/src/mesh/ItemValueUtils.hpp +++ b/src/mesh/ItemValueUtils.hpp @@ -8,6 +8,7 @@ #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> #include <utils/PugsTraits.hpp> +#include <utils/ReproducibleSumMPI.hpp> #include <iostream> @@ -210,6 +211,33 @@ sum(const ItemValue<DataType, item_type, ConnectivityPtr>& item_value) static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); + auto get_is_owned = [](const IConnectivity& connectivity) -> ItemIsOwnedType { + Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), "unexpected connectivity dimension"); + + switch (connectivity.dimension()) { + case 1: { + const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); + return connectivity_1d.isOwned<item_type>(); + break; + } + case 2: { + const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); + return connectivity_2d.isOwned<item_type>(); + break; + } + case 3: { + const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); + return connectivity_3d.isOwned<item_type>(); + break; + } + // LCOV_EXCL_START + default: { + throw UnexpectedError("unexpected dimension"); + } + // LCOV_EXCL_STOP + } + }; + class ItemValueSum { private: @@ -254,34 +282,8 @@ sum(const ItemValue<DataType, item_type, ConnectivityPtr>& item_value) } PUGS_INLINE - ItemValueSum(const ItemValueType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_value.connectivity_ptr())) + ItemValueSum(const ItemValueType& item_value, const ItemIsOwnedType& is_owned) + : m_item_value{item_value}, m_is_owned{is_owned} { ; } @@ -290,8 +292,34 @@ sum(const ItemValue<DataType, item_type, ConnectivityPtr>& item_value) ~ItemValueSum() = default; }; - const DataType local_sum = ItemValueSum{item_value}; - return parallel::allReduceSum(local_sum); + auto is_owned = get_is_owned(*item_value.connectivity_ptr()); + + if (ReproducibleSumManager::reproducibleSums()) { + if constexpr (std::is_floating_point_v<data_type>) { + ReproducibleScalarSum r_sum(item_value.arrayView(), is_owned.arrayView()); + using ReproducibleSumType = decltype(r_sum); + return ReproducibleSumType::getValue(allReduceReproducibleSum(r_sum)); + } else if constexpr (not std::is_arithmetic_v<data_type>) { + if constexpr (is_tiny_vector_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproducibleTinyVectorSum r_sum(item_value.arrayView(), is_owned.arrayView()); + using ReproducibleSumType = decltype(r_sum); + return ReproducibleSumType::getValue(allReduceReproducibleSum(r_sum)); + } else if constexpr (is_tiny_matrix_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproducibleTinyMatrixSum r_sum(item_value.arrayView(), is_owned.arrayView()); + using ReproducibleSumType = decltype(r_sum); + return ReproducibleSumType::getValue(allReduceReproducibleSum(r_sum)); + } else { + const DataType local_sum = ItemValueSum{item_value, is_owned}; + return parallel::allReduceSum(local_sum); + } + } else { + const DataType local_sum = ItemValueSum{item_value, is_owned}; + return parallel::allReduceSum(local_sum); + } + } else { + const DataType local_sum = ItemValueSum{item_value, is_owned}; + return parallel::allReduceSum(local_sum); + } } template <typename DataType, ItemType item_type, typename ConnectivityPtr> diff --git a/src/mesh/MeshFlatNodeBoundary.cpp b/src/mesh/MeshFlatNodeBoundary.cpp index b7de42715eff88510119b54507ed32323365c817..459a827c84ead76ed23acdf28cbd8525923694da 100644 --- a/src/mesh/MeshFlatNodeBoundary.cpp +++ b/src/mesh/MeshFlatNodeBoundary.cpp @@ -87,57 +87,108 @@ MeshFlatNodeBoundary<2>::_getNormal(const Mesh<Connectivity<2>>& mesh) template <> TinyVector<3, double> -MeshFlatNodeBoundary<3>::_getNormal(const Mesh<Connectivity<3>>& mesh) +MeshFlatNodeBoundary<3>::_getFarestNode(const Mesh<Connectivity<3>>& mesh, const Rd& x0, const Rd& x1) { - using R3 = TinyVector<3, double>; - - std::array<R3, 6> bounds = this->_getBounds(mesh); + const NodeValue<const Rd>& xr = mesh.xr(); + const auto node_number = mesh.connectivity().nodeNumber(); - const R3& xmin = bounds[0]; - const R3& ymin = bounds[1]; - const R3& zmin = bounds[2]; - const R3& xmax = bounds[3]; - const R3& ymax = bounds[4]; - const R3& zmax = bounds[5]; + using NodeNumberType = std::remove_const_t<typename decltype(node_number)::data_type>; - const R3 u = xmax - xmin; - const R3 v = ymax - ymin; - const R3 w = zmax - zmin; + Rd t = x1 - x0; + t *= 1. / l2Norm(t); - const R3 uv = crossProduct(u, v); - const double uv_l2 = dot(uv, uv); + double farest_distance = 0; + Rd farest_x = zero; + NodeNumberType farest_number = std::numeric_limits<NodeNumberType>::max(); - R3 normal = uv; - double normal_l2 = uv_l2; + auto node_list = this->m_ref_node_list.list(); - const R3 uw = crossProduct(u, w); - const double uw_l2 = dot(uw, uw); + for (size_t i_node = 0; i_node < node_list.size(); ++i_node) { + const NodeId& node_id = node_list[i_node]; + const Rd& x = xr[node_id]; + const double distance = l2Norm(crossProduct(t, x - x0)); - if (uw_l2 > uv_l2) { - normal = uw; - normal_l2 = uw_l2; + if ((distance > farest_distance) or ((distance == farest_distance) and (node_number[node_id] < farest_number))) { + farest_distance = distance; + farest_number = node_number[node_id]; + farest_x = x; + } } - const R3 vw = crossProduct(v, w); - const double vw_l2 = dot(vw, vw); + if (parallel::size()) { + Array<double> farest_distance_array = parallel::allGather(farest_distance); + Array<Rd> farest_x_array = parallel::allGather(farest_x); + Array<NodeNumberType> farest_number_array = parallel::allGather(farest_number); - if (vw_l2 > normal_l2) { - normal = vw; - normal_l2 = vw_l2; + Assert(farest_distance_array.size() == farest_x_array.size()); + Assert(farest_distance_array.size() == farest_number_array.size()); + + for (size_t i = 0; i < farest_distance_array.size(); ++i) { + if ((farest_distance_array[i] > farest_distance) or + ((farest_distance_array[i] == farest_distance) and (farest_number_array[i] < farest_number))) { + farest_distance = farest_distance_array[i]; + farest_number = farest_number_array[i]; + farest_x = farest_x_array[i]; + } + } } - if (normal_l2 == 0) { + return farest_x; +} + +template <> +TinyVector<3, double> +MeshFlatNodeBoundary<3>::_getNormal(const Mesh<Connectivity<3>>& mesh) +{ + using R3 = TinyVector<3, double>; + + std::array<R3, 2> diagonal = [](const std::array<R3, 6>& bounds) { + size_t max_i = 0; + size_t max_j = 0; + double max_length = 0; + + for (size_t i = 0; i < bounds.size(); ++i) { + for (size_t j = i + 1; j < bounds.size(); ++j) { + double length = l2Norm(bounds[i] - bounds[j]); + if (length > max_length) { + max_i = i; + max_j = j; + max_length = length; + } + } + } + + return std::array<R3, 2>{bounds[max_i], bounds[max_j]}; + }(this->_getBounds(mesh)); + + const R3& x0 = diagonal[0]; + const R3& x1 = diagonal[1]; + + if (x0 == x1) { std::ostringstream ost; ost << "invalid boundary \"" << rang::fgB::yellow << m_ref_node_list.refId() << rang::style::reset << "\": unable to compute normal"; throw NormalError(ost.str()); } - const double length = sqrt(normal_l2); + const R3 x2 = this->_getFarestNode(mesh, x0, x1); + + const R3 u = x1 - x0; + const R3 v = x2 - x0; + + R3 normal = crossProduct(u, v); + const double normal_norm = l2Norm(normal); + + if (normal_norm == 0) { + std::ostringstream ost; + ost << "invalid boundary \"" << rang::fgB::yellow << m_ref_node_list.refId() << rang::style::reset + << "\": unable to compute normal"; + throw NormalError(ost.str()); + } - normal *= 1. / length; + normal *= (1. / normal_norm); - this->_checkBoundaryIsFlat(normal, 1. / 6. * (xmin + xmax + ymin + ymax + zmin + zmax), length, mesh); + this->_checkBoundaryIsFlat(normal, 1. / 3. * (x0 + x1 + x2), normal_norm, mesh); return normal; } diff --git a/src/mesh/MeshFlatNodeBoundary.hpp b/src/mesh/MeshFlatNodeBoundary.hpp index 8486f02c7377fe2987560d29f2ee7cc45c1b44cf..54a10ae1c1412005440cd9e6cc49973c017265e0 100644 --- a/src/mesh/MeshFlatNodeBoundary.hpp +++ b/src/mesh/MeshFlatNodeBoundary.hpp @@ -13,21 +13,26 @@ class [[nodiscard]] MeshFlatNodeBoundary final private: const Rd m_outgoing_normal; + Rd _getFarestNode(const Mesh<Connectivity<Dimension>>& mesh, const Rd& x0, const Rd& x1); + Rd _getNormal(const Mesh<Connectivity<Dimension>>& mesh); - void _checkBoundaryIsFlat(const TinyVector<Dimension, double>& normal, const TinyVector<Dimension, double>& origin, - const double length, const Mesh<Connectivity<Dimension>>& mesh) const; + void _checkBoundaryIsFlat(const TinyVector<Dimension, double>& normal, + const TinyVector<Dimension, double>& origin, + const double length, + const Mesh<Connectivity<Dimension>>& mesh) const; Rd _getOutgoingNormal(const Mesh<Connectivity<Dimension>>& mesh); public: - const Rd& outgoingNormal() const + const Rd& + outgoingNormal() const { return m_outgoing_normal; } MeshFlatNodeBoundary& operator=(const MeshFlatNodeBoundary&) = default; - MeshFlatNodeBoundary& operator=(MeshFlatNodeBoundary&&) = default; + MeshFlatNodeBoundary& operator=(MeshFlatNodeBoundary&&) = default; template <size_t MeshDimension> friend MeshFlatNodeBoundary<MeshDimension> getMeshFlatNodeBoundary(const Mesh<Connectivity<MeshDimension>>& mesh, @@ -47,7 +52,7 @@ class [[nodiscard]] MeshFlatNodeBoundary final public: MeshFlatNodeBoundary() = default; MeshFlatNodeBoundary(const MeshFlatNodeBoundary&) = default; - MeshFlatNodeBoundary(MeshFlatNodeBoundary &&) = default; + MeshFlatNodeBoundary(MeshFlatNodeBoundary&&) = default; ~MeshFlatNodeBoundary() = default; }; diff --git a/src/mesh/SubItemArrayPerItemUtils.hpp b/src/mesh/SubItemArrayPerItemUtils.hpp index d2553590ab0f0dad219cdfa82491c05a70b5edbe..aec08ef845cc80bafd20ec8fa1a3b9bcc94415de 100644 --- a/src/mesh/SubItemArrayPerItemUtils.hpp +++ b/src/mesh/SubItemArrayPerItemUtils.hpp @@ -4,13 +4,12 @@ #include <utils/Messenger.hpp> #include <mesh/Connectivity.hpp> +#include <mesh/ItemValueUtils.hpp> #include <mesh/SubItemArrayPerItem.hpp> #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> #include <utils/PugsTraits.hpp> -#include <iostream> - template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> min(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_array_per_item) @@ -34,9 +33,9 @@ min(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE operator data_type() { - data_type reduced_array; - parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_array); - return reduced_array; + data_type reduced_value; + parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_value); + return reduced_value; } PUGS_INLINE @@ -67,9 +66,9 @@ min(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE void - init(data_type& array) const + init(data_type& value) const { - array = std::numeric_limits<data_type>::max(); + value = std::numeric_limits<data_type>::max(); } PUGS_INLINE @@ -136,9 +135,9 @@ max(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE operator data_type() { - data_type reduced_array; - parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_array); - return reduced_array; + data_type reduced_value; + parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_value); + return reduced_value; } PUGS_INLINE @@ -169,9 +168,9 @@ max(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE void - init(data_type& array) const + init(data_type& value) const { - array = std::numeric_limits<data_type>::min(); + value = std::numeric_limits<data_type>::min(); } PUGS_INLINE @@ -217,103 +216,38 @@ max(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> -sum(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& item_array) +sum(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_array_per_item) { using SubItemArrayPerItemType = SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>; constexpr ItemType item_type = ItemOfItem::item_type; - using ItemIsOwnedType = ItemValue<const bool, item_type>; using data_type = std::remove_const_t<typename SubItemArrayPerItemType::data_type>; using index_type = typename SubItemArrayPerItemType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); - class SubItemArrayPerItemSum - { - private: - const SubItemArrayPerItemType& m_sub_item_array_per_item; - const ItemIsOwnedType m_is_owned; - - public: - PUGS_INLINE - operator data_type() - { - data_type reduced_array; - parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_array); - return reduced_array; - } - - PUGS_INLINE - void - operator()(const index_type& item_id, data_type& data) const - { - if (m_is_owned[item_id]) { - auto sub_item_table = m_sub_item_array_per_item.itemTable(item_id); - for (size_t i = 0; i < sub_item_table.numberOfRows(); ++i) { - for (size_t j = 0; j < sub_item_table.numberOfColumns(); ++j) { - data += sub_item_table(i, j); - } + ItemValue<data_type, item_type> item_sum{*sub_item_array_per_item.connectivity_ptr()}; + parallel_for( + sub_item_array_per_item.numberOfItems(), PUGS_LAMBDA(index_type item_id) { + auto sub_item_table = sub_item_array_per_item.itemTable(item_id); + data_type sub_item_table_sum = [] { + if constexpr (std::is_arithmetic_v<data_type>) { + return 0; + } else { + static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); + return zero; } - } - } + }(); - PUGS_INLINE - void - join(data_type& dst, const data_type& src) const - { - dst += src; - } - - PUGS_INLINE - void - init(data_type& array) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - array = 0; - } else { - static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); - array = zero; + for (size_t i = 0; i < sub_item_table.numberOfRows(); ++i) { + for (size_t j = 0; j < sub_item_table.numberOfColumns(); ++j) { + sub_item_table_sum += sub_item_table(i, j); + } } - } - PUGS_INLINE - SubItemArrayPerItemSum(const SubItemArrayPerItemType& item_array) - : m_sub_item_array_per_item(item_array), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_array.connectivity_ptr())) - { - ; - } - - PUGS_INLINE - ~SubItemArrayPerItemSum() = default; - }; + item_sum[item_id] = sub_item_table_sum; + }); - const DataType local_sum = SubItemArrayPerItemSum{item_array}; - return parallel::allReduceSum(local_sum); + return sum(item_sum); } template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> diff --git a/src/mesh/SubItemValuePerItemUtils.hpp b/src/mesh/SubItemValuePerItemUtils.hpp index 304acae23a31e610695a2b309e72e7d8035d01a5..575c39c5238877bb064883c54b7d08792547000b 100644 --- a/src/mesh/SubItemValuePerItemUtils.hpp +++ b/src/mesh/SubItemValuePerItemUtils.hpp @@ -4,13 +4,12 @@ #include <utils/Messenger.hpp> #include <mesh/Connectivity.hpp> +#include <mesh/ItemValueUtils.hpp> #include <mesh/SubItemValuePerItem.hpp> #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> #include <utils/PugsTraits.hpp> -#include <iostream> - template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> min(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_value_per_item) @@ -213,101 +212,28 @@ max(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_v template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> -sum(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& item_value) +sum(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_value_per_item) { using SubItemValuePerItemType = SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>; constexpr ItemType item_type = ItemOfItem::item_type; - using ItemIsOwnedType = ItemValue<const bool, item_type>; using data_type = std::remove_const_t<typename SubItemValuePerItemType::data_type>; using index_type = typename SubItemValuePerItemType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); - class SubItemValuePerItemSum - { - private: - const SubItemValuePerItemType& m_sub_item_value_per_item; - const ItemIsOwnedType m_is_owned; - - public: - PUGS_INLINE - operator data_type() - { - data_type reduced_value; - parallel_reduce(m_sub_item_value_per_item.numberOfItems(), *this, reduced_value); - return reduced_value; - } - - PUGS_INLINE - void - operator()(const index_type& item_id, data_type& data) const - { - if (m_is_owned[item_id]) { - auto sub_item_array = m_sub_item_value_per_item.itemArray(item_id); - for (size_t i = 0; i < sub_item_array.size(); ++i) { - data += sub_item_array[i]; - } - } - } - - PUGS_INLINE - void - join(data_type& dst, const data_type& src) const - { - dst += src; - } - - PUGS_INLINE - void - init(data_type& value) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - value = 0; - } else { - static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); - value = zero; + ItemValue<data_type, item_type> item_sum{*sub_item_value_per_item.connectivity_ptr()}; + parallel_for( + sub_item_value_per_item.numberOfItems(), PUGS_LAMBDA(index_type item_id) { + auto sub_item_array = sub_item_value_per_item.itemArray(item_id); + data_type sub_item_array_sum = sub_item_array[0]; + for (size_t i = 1; i < sub_item_array.size(); ++i) { + sub_item_array_sum += sub_item_array[i]; } - } - - PUGS_INLINE - SubItemValuePerItemSum(const SubItemValuePerItemType& item_value) - : m_sub_item_value_per_item(item_value), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_value.connectivity_ptr())) - { - ; - } - PUGS_INLINE - ~SubItemValuePerItemSum() = default; - }; + item_sum[item_id] = sub_item_array_sum; + }); - const DataType local_sum = SubItemValuePerItemSum{item_value}; - return parallel::allReduceSum(local_sum); + return sum(item_sum); } template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> diff --git a/src/output/GnuplotWriter.cpp b/src/output/GnuplotWriter.cpp index 50969799eeb4250b28d21722e8476ab86a88d9eb..8ccf7767180b30524ba54f7c4f085c3ad35783d2 100644 --- a/src/output/GnuplotWriter.cpp +++ b/src/output/GnuplotWriter.cpp @@ -5,6 +5,7 @@ #include <mesh/Mesh.hpp> #include <mesh/MeshData.hpp> #include <mesh/MeshDataManager.hpp> +#include <utils/Filesystem.hpp> #include <utils/Messenger.hpp> #include <utils/PugsTraits.hpp> #include <utils/RevisionInfo.hpp> @@ -243,8 +244,16 @@ GnuplotWriter::_write(const MeshType& mesh, const OutputNamedItemDataSet& output_named_item_data_set, std::optional<double> time) const { + createDirectoryIfNeeded(_getFilename()); + if (parallel::rank() == 0) { std::ofstream fout{_getFilename()}; + if (not fout) { + std::ostringstream error_msg; + error_msg << "cannot create file \"" << rang::fgB::yellow << _getFilename() << rang::fg::reset << '"'; + throw NormalError(error_msg.str()); + } + fout.precision(15); fout.setf(std::ios_base::scientific); @@ -286,6 +295,12 @@ GnuplotWriter::_write(const MeshType& mesh, for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) { if (i_rank == parallel::rank()) { std::ofstream fout(_getFilename(), std::ios_base::app); + if (not fout) { + std::ostringstream error_msg; + error_msg << "cannot open file \"" << rang::fgB::yellow << _getFilename() << rang::fg::reset << '"'; + throw NormalError(error_msg.str()); + } + fout.precision(15); fout.setf(std::ios_base::scientific); diff --git a/src/output/GnuplotWriter1D.cpp b/src/output/GnuplotWriter1D.cpp index a3156904843bdeb77be718de6a10614a3333afae..3e0740446e961505eb01e36eee5ba168e0f47dcc 100644 --- a/src/output/GnuplotWriter1D.cpp +++ b/src/output/GnuplotWriter1D.cpp @@ -5,6 +5,7 @@ #include <mesh/Mesh.hpp> #include <mesh/MeshData.hpp> #include <mesh/MeshDataManager.hpp> +#include <utils/Filesystem.hpp> #include <utils/Messenger.hpp> #include <utils/PugsTraits.hpp> #include <utils/RevisionInfo.hpp> @@ -322,6 +323,12 @@ GnuplotWriter1D::_write(const MeshType& mesh, if (parallel::rank() == 0) { fout.open(_getFilename()); + if (not fout) { + std::ostringstream error_msg; + error_msg << "cannot create file \"" << rang::fgB::yellow << _getFilename() << rang::fg::reset << '"'; + throw NormalError(error_msg.str()); + } + fout.precision(15); fout.setf(std::ios_base::scientific); fout << _getDateAndVersionComment(); diff --git a/src/output/VTKWriter.cpp b/src/output/VTKWriter.cpp index 0a40c7265e05a17d31e59bd41eeb10cb63935821..6ec6295e44b54bef9ea21c110120fdd873f71488 100644 --- a/src/output/VTKWriter.cpp +++ b/src/output/VTKWriter.cpp @@ -4,6 +4,7 @@ #include <mesh/Mesh.hpp> #include <mesh/MeshData.hpp> #include <mesh/MeshDataManager.hpp> +#include <utils/Filesystem.hpp> #include <utils/Messenger.hpp> #include <utils/RevisionInfo.hpp> #include <utils/Stringify.hpp> @@ -364,8 +365,16 @@ VTKWriter::_write(const MeshType& mesh, output_named_item_data_set.add(NamedItemData{"cell_number", mesh.connectivity().cellNumber()}); output_named_item_data_set.add(NamedItemData{"node_number", mesh.connectivity().nodeNumber()}); + createDirectoryIfNeeded(m_base_filename); + if (parallel::rank() == 0) { // write PVTK file std::ofstream fout(_getFilenamePVTU()); + if (not fout) { + std::ostringstream error_msg; + error_msg << "cannot create file \"" << rang::fgB::yellow << _getFilenamePVTU() << rang::fg::reset << '"'; + throw NormalError(error_msg.str()); + } + fout << "<?xml version=\"1.0\"?>\n"; fout << _getDateAndVersionComment(); fout << "<VTKFile type=\"PUnstructuredGrid\">\n"; @@ -424,7 +433,7 @@ VTKWriter::_write(const MeshType& mesh, fout << "</PCellData>\n"; for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) { - fout << "<Piece Source=\"" << _getFilenameVTU(i_rank) << "\"/>\n"; + fout << "<Piece Source=" << std::filesystem::path{_getFilenameVTU(i_rank)}.filename() << "/>\n"; } fout << "</PUnstructuredGrid>\n"; fout << "</VTKFile>\n"; @@ -435,6 +444,13 @@ VTKWriter::_write(const MeshType& mesh, // write VTK files std::ofstream fout(_getFilenameVTU(parallel::rank())); + if (not fout) { + std::ostringstream error_msg; + error_msg << "cannot create file \"" << rang::fgB::yellow << _getFilenameVTU(parallel::rank()) << rang::fg::reset + << '"'; + throw NormalError(error_msg.str()); + } + fout << "<?xml version=\"1.0\"?>\n"; fout << _getDateAndVersionComment(); fout << "<VTKFile type=\"UnstructuredGrid\""; @@ -632,7 +648,13 @@ VTKWriter::_write(const MeshType& mesh, } if (parallel::rank() == 0) { // write PVD file - std::ofstream fout(m_base_filename + ".pvd"); + const std::string pvd_filename = m_base_filename + ".pvd"; + std::ofstream fout(pvd_filename); + if (not fout) { + std::ostringstream error_msg; + error_msg << "cannot create file \"" << rang::fgB::yellow << pvd_filename << rang::fg::reset << '"'; + throw NormalError(error_msg.str()); + } fout << "<?xml version=\"1.0\"?>\n"; fout << _getDateAndVersionComment(); @@ -645,11 +667,13 @@ VTKWriter::_write(const MeshType& mesh, sout << m_base_filename; sout << '.' << std::setfill('0') << std::setw(4) << i_time << ".pvtu"; - fout << "<DataSet timestep=\"" << m_period_manager->savedTime(i_time) << "\" file=\"" << sout.str() << "\"/>\n"; + fout << "<DataSet timestep=\"" << m_period_manager->savedTime(i_time) + << "\" file=" << std::filesystem::path{sout.str()}.filename() << "/>\n"; } - fout << "<DataSet timestep=\"" << *time << "\" file=\"" << _getFilenamePVTU() << "\"/>\n"; + fout << "<DataSet timestep=\"" << *time << "\" file=" << std::filesystem::path{_getFilenamePVTU()}.filename() + << "/>\n"; } else { - fout << "<DataSet file=\"" << _getFilenamePVTU() << "\"/>\n"; + fout << "<DataSet file=" << std::filesystem::path{_getFilenamePVTU()}.filename() << "/>\n"; } fout << "</Collection>\n"; diff --git a/src/scheme/DiscreteFunctionP0.hpp b/src/scheme/DiscreteFunctionP0.hpp index 6587a19f2c77d1f063f38765b4e768d92798f7a4..5a49b9db3d7f6ecd6b589f8a58703ec02d311f6b 100644 --- a/src/scheme/DiscreteFunctionP0.hpp +++ b/src/scheme/DiscreteFunctionP0.hpp @@ -708,79 +708,19 @@ class DiscreteFunctionP0 return sum(f.m_cell_values); } - private: - class Integrator - { - private: - const DiscreteFunctionP0 m_function; - CellValue<const double> m_cell_volume; - CellValue<const bool> m_cell_is_owned; - - public: - PUGS_INLINE - operator std::decay_t<data_type>() - { - std::decay_t<data_type> reduced_value; - if constexpr (std::is_arithmetic_v<std::decay_t<data_type>>) { - reduced_value = 0; - } else { - static_assert(is_tiny_vector_v<std::decay_t<data_type>> or is_tiny_matrix_v<std::decay_t<data_type>>, - "invalid data type"); - reduced_value = zero; - } - parallel_reduce(m_cell_volume.numberOfItems(), *this, reduced_value); - return reduced_value; - } - - PUGS_INLINE - void - operator()(const CellId& cell_id, std::decay_t<data_type>& data) const - { - if (m_cell_is_owned[cell_id]) { - data += m_cell_volume[cell_id] * m_function[cell_id]; - } - } - - PUGS_INLINE - void - join(std::decay_t<data_type>& dst, const std::decay_t<data_type>& src) const - { - dst += src; - } - - PUGS_INLINE - void - init(std::decay_t<data_type>& value) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - value = 0; - } else { - static_assert(is_tiny_vector_v<std::decay_t<data_type>> or is_tiny_matrix_v<std::decay_t<data_type>>, - "invalid data type"); - value = zero; - } - } - - PUGS_INLINE - Integrator(const DiscreteFunctionP0& function) : m_function(function) - { - const Mesh<Connectivity<Dimension>>& mesh = *function.m_mesh; - m_cell_volume = MeshDataManager::instance().getMeshData(mesh).Vj(); - m_cell_is_owned = mesh.connectivity().cellIsOwned(); - } - - PUGS_INLINE - ~Integrator() = default; - }; - - public: PUGS_INLINE friend DataType integrate(const DiscreteFunctionP0& f) { Assert(f.m_cell_values.isBuilt()); - const DataType integral = Integrator{f}; - return parallel::allReduceSum(integral); + const Mesh<Connectivity<Dimension>>& mesh = *f.m_mesh; + CellValue<const double> cell_volume = MeshDataManager::instance().getMeshData(mesh).Vj(); + + CellValue<std::remove_const_t<DataType>> f_v{mesh.connectivity()}; + parallel_for( + mesh.numberOfCells(), PUGS_LAMBDA(CellId cell_id) { f_v[cell_id] = cell_volume[cell_id] * f[cell_id]; }); + + return sum(f_v); } DiscreteFunctionP0(const std::shared_ptr<const MeshType>& mesh) : m_mesh{mesh}, m_cell_values{mesh->connectivity()} {} diff --git a/src/scheme/FluxingAdvectionSolver.cpp b/src/scheme/FluxingAdvectionSolver.cpp index 264ecef042956e6b77230e00f49cd321b50dcc6a..754c6e3ac6448a96e5e05c0a7f60967243b4798f 100644 --- a/src/scheme/FluxingAdvectionSolver.cpp +++ b/src/scheme/FluxingAdvectionSolver.cpp @@ -1,5 +1,8 @@ #include <scheme/FluxingAdvectionSolver.hpp> +#include <analysis/GaussQuadratureDescriptor.hpp> +#include <analysis/QuadratureManager.hpp> +#include <geometry/PrismTransformation.hpp> #include <language/utils/EvaluateAtPoints.hpp> #include <language/utils/InterpolateItemArray.hpp> #include <language/utils/InterpolateItemValue.hpp> @@ -322,6 +325,10 @@ template <> FaceValue<double> FluxingAdvectionSolver<3>::_computeAlgebraicFluxingVolume() { + // due to the z component of the jacobian determinant, degree 3 + // polynomials must be exactly integrated + const QuadratureFormula<3>& gauss = QuadratureManager::instance().getPrismFormula(GaussQuadratureDescriptor(3)); + const auto face_to_node_matrix = m_old_mesh->connectivity().faceToNodeMatrix(); FaceValue<double> algebraic_fluxing_volume(m_new_mesh->connectivity()); auto old_xr = m_old_mesh->xr(); @@ -329,31 +336,10 @@ FluxingAdvectionSolver<3>::_computeAlgebraicFluxingVolume() parallel_for( m_new_mesh->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { const auto& face_to_node = face_to_node_matrix[face_id]; - if (face_to_node.size() == 4) { - const Rd& x0 = old_xr[face_to_node[0]]; - const Rd& x1 = old_xr[face_to_node[1]]; - const Rd& x2 = old_xr[face_to_node[2]]; - const Rd& x3 = old_xr[face_to_node[3]]; - - const Rd& x4 = new_xr[face_to_node[0]]; - const Rd& x5 = new_xr[face_to_node[1]]; - const Rd& x6 = new_xr[face_to_node[2]]; - const Rd& x7 = new_xr[face_to_node[3]]; - - const Rd& a1 = x6 - x1; - const Rd& a2 = x6 - x3; - const Rd& a3 = x6 - x4; - - const Rd& b1 = x7 - x0; - const Rd& b2 = x5 - x0; - const Rd& b3 = x2 - x0; - TinyMatrix<3> M1(a1 + b1, a2, a3); - TinyMatrix<3> M2(b1, a2 + b2, a3); - TinyMatrix<3> M3(a1, b2, a3 + b3); + const size_t face_nb_node = face_to_node.size(); - algebraic_fluxing_volume[face_id] = (det(M1) + det(M2) + det(M3)) / 12; - } else if (face_to_node.size() == 3) { + if (face_nb_node == 3) { const Rd& x0 = old_xr[face_to_node[0]]; const Rd& x1 = old_xr[face_to_node[1]]; const Rd& x2 = old_xr[face_to_node[2]]; @@ -362,21 +348,40 @@ FluxingAdvectionSolver<3>::_computeAlgebraicFluxingVolume() const Rd& x4 = new_xr[face_to_node[1]]; const Rd& x5 = new_xr[face_to_node[2]]; - const Rd& a1 = x5 - x1; - const Rd& a2 = x5 - x2; - const Rd& a3 = x5 - x3; + double volume = 0; - const Rd& b1 = x5 - x0; - const Rd& b2 = x4 - x0; - const Rd& b3 = x2 - x0; - - TinyMatrix<3> M1(a1 + b1, a2, a3); - TinyMatrix<3> M2(b1, a2 + b2, a3); - TinyMatrix<3> M3(a1, b2, a3 + b3); + PrismTransformation T(x0, x1, x2, x3, x4, x5); + for (size_t i = 0; i < gauss.numberOfPoints(); ++i) { + volume += gauss.weight(i) * T.jacobianDeterminant(gauss.point(i)); + } - algebraic_fluxing_volume[face_id] = (det(M1) + det(M2) + det(M3)) / 12; + algebraic_fluxing_volume[face_id] = volume; } else { - throw NotImplementedError("Not implemented for non quad faces"); + Rd xg_old = zero; + for (size_t i_node = 0; i_node < face_nb_node; ++i_node) { + xg_old += old_xr[face_to_node[i_node]]; + } + xg_old *= 1. / face_nb_node; + Rd xg_new = zero; + for (size_t i_node = 0; i_node < face_nb_node; ++i_node) { + xg_new += new_xr[face_to_node[i_node]]; + } + xg_new *= 1. / face_nb_node; + + double volume = 0; + for (size_t i_node = 0; i_node < face_nb_node; ++i_node) { + PrismTransformation T(xg_old, // + old_xr[face_to_node[i_node]], // + old_xr[face_to_node[(i_node + 1) % face_nb_node]], // + xg_new, // + new_xr[face_to_node[i_node]], // + new_xr[face_to_node[(i_node + 1) % face_nb_node]]); + for (size_t i = 0; i < gauss.numberOfPoints(); ++i) { + volume += gauss.weight(i) * T.jacobianDeterminant(gauss.point(i)); + } + } + + algebraic_fluxing_volume[face_id] = volume; } }); diff --git a/src/utils/Array.hpp b/src/utils/Array.hpp index 4943d4c08d983f67d1550262e34aa80030d49bf1..2f0146e2b9d08c231eaadee7e7f50eeea439ddaf 100644 --- a/src/utils/Array.hpp +++ b/src/utils/Array.hpp @@ -6,6 +6,8 @@ #include <utils/PugsAssert.hpp> #include <utils/PugsMacros.hpp> #include <utils/PugsUtils.hpp> +#include <utils/ReproducibleSumManager.hpp> +#include <utils/ReproducibleSumUtils.hpp> #include <utils/Types.hpp> #include <algorithm> @@ -373,8 +375,9 @@ template <typename DataType> [[nodiscard]] std::remove_const_t<DataType> sum(const Array<DataType>& array) { - using ArrayType = Array<DataType>; - using data_type = std::remove_const_t<DataType>; + using ArrayType = Array<DataType>; + using data_type = std::remove_const_t<DataType>; + using index_type = typename ArrayType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); @@ -429,7 +432,26 @@ sum(const Array<DataType>& array) ~ArraySum() = default; }; - return ArraySum(array); + if (ReproducibleSumManager::reproducibleSums()) { + if constexpr (std::is_floating_point_v<data_type>) { + ReproducibleScalarSum r_sum(array); + return r_sum.getSum(); + } else if constexpr (not std::is_arithmetic_v<data_type>) { + if constexpr (is_tiny_vector_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproducibleTinyVectorSum r_sum(array); + return r_sum.getSum(); + } else if constexpr (is_tiny_matrix_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproducibleTinyMatrixSum r_sum(array); + return r_sum.getSum(); + } else { + return ArraySum(array); + } + } else { + return ArraySum(array); + } + } else { + return ArraySum(array); + } } #endif // ARRAY_HPP diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 7c6c99fcb8edee03b3cbed4687275eef3f98a527..9284d3f038b599a073649b63c249b8144ec1bef5 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -4,6 +4,7 @@ add_library( PugsUtils BuildInfo.cpp BacktraceManager.cpp + CommunicatorManager.cpp ConsoleManager.cpp Demangle.cpp Exceptions.cpp @@ -14,6 +15,7 @@ add_library( PETScWrapper.cpp PugsUtils.cpp RandomEngine.cpp + ReproducibleSumManager.cpp RevisionInfo.cpp SignalManager.cpp SLEPcWrapper.cpp diff --git a/src/utils/CommunicatorManager.cpp b/src/utils/CommunicatorManager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..511257c4bb10f941ca592d2500e55b32ef45ec7d --- /dev/null +++ b/src/utils/CommunicatorManager.cpp @@ -0,0 +1,27 @@ +#include <utils/CommunicatorManager.hpp> + +#include <utils/PugsAssert.hpp> + +std::optional<int> CommunicatorManager::s_split_color; + +bool +CommunicatorManager::hasSplitColor() +{ + return s_split_color.has_value(); +} + +int +CommunicatorManager::splitColor() +{ + Assert(s_split_color.has_value(), "split color has not been defined"); + + return s_split_color.value(); +} + +void +CommunicatorManager::setSplitColor(int split_color) +{ + Assert(not s_split_color.has_value(), "split color has already been defined"); + + s_split_color = split_color; +} diff --git a/src/utils/CommunicatorManager.hpp b/src/utils/CommunicatorManager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..df0ea8dba61b7ab51f9dd5819a9a20c9eca684ec --- /dev/null +++ b/src/utils/CommunicatorManager.hpp @@ -0,0 +1,17 @@ +#ifndef COMMUNICATOR_MANAGER_HPP +#define COMMUNICATOR_MANAGER_HPP + +#include <optional> + +class CommunicatorManager +{ + private: + static std::optional<int> s_split_color; + + public: + static bool hasSplitColor(); + static int splitColor(); + static void setSplitColor(int split_color); +}; + +#endif // COMMUNICATOR_MANAGER_HPP diff --git a/src/utils/Filesystem.hpp b/src/utils/Filesystem.hpp new file mode 100644 index 0000000000000000000000000000000000000000..63af52b2e432f1baf55d2cbfd895485a408f7e10 --- /dev/null +++ b/src/utils/Filesystem.hpp @@ -0,0 +1,23 @@ +#ifndef FILESYSTEM_HPP +#define FILESYSTEM_HPP + +#include <utils/Exceptions.hpp> +#include <utils/PugsMacros.hpp> + +#include <filesystem> + +PUGS_INLINE void +createDirectoryIfNeeded(const std::string& filename) +{ + std::filesystem::path path = std::filesystem::path{filename}.parent_path(); + if (not path.empty()) { + try { + std::filesystem::create_directories(path); + } + catch (std::filesystem::filesystem_error& e) { + throw NormalError(e.what()); + } + } +} + +#endif // FILESYSTEM_HPP diff --git a/src/utils/Messenger.cpp b/src/utils/Messenger.cpp index e3fff49d02f94649f641f5a273f9f607cf27edd9..05dfa43e5a22f5223fd6c1e8291302c5366f5015 100644 --- a/src/utils/Messenger.cpp +++ b/src/utils/Messenger.cpp @@ -1,5 +1,7 @@ #include <utils/Messenger.hpp> +#include <utils/CommunicatorManager.hpp> + #include <iostream> namespace parallel @@ -31,13 +33,34 @@ Messenger::Messenger([[maybe_unused]] int& argc, [[maybe_unused]] char* argv[]) #ifdef PUGS_HAS_MPI MPI_Init(&argc, &argv); - m_rank = []() { + if (CommunicatorManager::hasSplitColor()) { + int key = 0; + + auto res = MPI_Comm_split(MPI_COMM_WORLD, CommunicatorManager::splitColor(), key, &m_pugs_comm_world); + if (res) { + MPI_Abort(MPI_COMM_WORLD, res); + } + } + + m_rank = [&]() { + int rank; + MPI_Comm_rank(m_pugs_comm_world, &rank); + return rank; + }(); + + m_size = [&]() { + int size = 0; + MPI_Comm_size(m_pugs_comm_world, &size); + return size; + }(); + + m_global_rank = [&]() { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); return rank; }(); - m_size = []() { + m_global_size = [&]() { int size = 0; MPI_Comm_size(MPI_COMM_WORLD, &size); return size; @@ -64,7 +87,7 @@ void Messenger::barrier() const { #ifdef PUGS_HAS_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(m_pugs_comm_world); #endif // PUGS_HAS_MPI } diff --git a/src/utils/Messenger.hpp b/src/utils/Messenger.hpp index 416dbdf1d65e8b028b9ed39fff2a2acb4ebcb2d5..21e17abf24600869b619a66415dc51714ec2cb70 100644 --- a/src/utils/Messenger.hpp +++ b/src/utils/Messenger.hpp @@ -23,7 +23,7 @@ namespace parallel { class Messenger { - private: + public: struct helper { #ifdef PUGS_HAS_MPI @@ -87,9 +87,16 @@ class Messenger static Messenger* m_instance; Messenger(int& argc, char* argv[]); +#ifdef PUGS_HAS_MPI + MPI_Comm m_pugs_comm_world = MPI_COMM_WORLD; +#endif // PUGS_HAS_MPI size_t m_rank{0}; size_t m_size{1}; + // Rank and size in the whole MPI_COMM_WORLD of the process + size_t m_global_rank{0}; + size_t m_global_size{1}; + template <typename DataType> void _gather(const DataType& data, Array<DataType> gather, size_t rank) const @@ -102,7 +109,7 @@ class Messenger auto gather_address = (gather.size() > 0) ? &(gather[0]) : nullptr; - MPI_Gather(&data, 1, mpi_datatype, gather_address, 1, mpi_datatype, rank, MPI_COMM_WORLD); + MPI_Gather(&data, 1, mpi_datatype, gather_address, 1, mpi_datatype, rank, m_pugs_comm_world); #else // PUGS_HAS_MPI gather[0] = data; #endif // PUGS_HAS_MPI @@ -131,7 +138,7 @@ class Messenger auto gather_address = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr; MPI_Gather(data_address, data_array.size(), mpi_datatype, gather_address, data_array.size(), mpi_datatype, rank, - MPI_COMM_WORLD); + m_pugs_comm_world); #else // PUGS_HAS_MPI copy_to(data_array, gather_array); #endif // PUGS_HAS_MPI @@ -172,7 +179,7 @@ class Messenger auto gather_address = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr; MPI_Gatherv(data_address, data_array.size(), mpi_datatype, gather_address, sizes_address, positions_address, - mpi_datatype, rank, MPI_COMM_WORLD); + mpi_datatype, rank, m_pugs_comm_world); #else // PUGS_HAS_MPI copy_to(data_array, gather_array); #endif // PUGS_HAS_MPI @@ -188,7 +195,7 @@ class Messenger #ifdef PUGS_HAS_MPI MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); - MPI_Allgather(&data, 1, mpi_datatype, &(gather[0]), 1, mpi_datatype, MPI_COMM_WORLD); + MPI_Allgather(&data, 1, mpi_datatype, &(gather[0]), 1, mpi_datatype, m_pugs_comm_world); #else // PUGS_HAS_MPI gather[0] = data; #endif // PUGS_HAS_MPI @@ -217,7 +224,7 @@ class Messenger auto gather_address = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr; MPI_Allgather(data_address, data_array.size(), mpi_datatype, gather_address, data_array.size(), mpi_datatype, - MPI_COMM_WORLD); + m_pugs_comm_world); #else // PUGS_HAS_MPI copy_to(data_array, gather_array); #endif // PUGS_HAS_MPI @@ -257,7 +264,7 @@ class Messenger auto gather_address = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr; MPI_Allgatherv(data_address, data_array.size(), mpi_datatype, gather_address, sizes_address, positions_address, - mpi_datatype, MPI_COMM_WORLD); + mpi_datatype, m_pugs_comm_world); #else // PUGS_HAS_MPI copy_to(data_array, gather_array); #endif // PUGS_HAS_MPI @@ -273,7 +280,7 @@ class Messenger #ifdef PUGS_HAS_MPI MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); - MPI_Bcast(&data, 1, mpi_datatype, root_rank, MPI_COMM_WORLD); + MPI_Bcast(&data, 1, mpi_datatype, root_rank, m_pugs_comm_world); #endif // PUGS_HAS_MPI } @@ -289,7 +296,7 @@ class Messenger auto array_address = (array.size() > 0) ? &(array[0]) : nullptr; MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); - MPI_Bcast(array_address, array.size(), mpi_datatype, root_rank, MPI_COMM_WORLD); + MPI_Bcast(array_address, array.size(), mpi_datatype, root_rank, m_pugs_comm_world); #endif // PUGS_HAS_MPI } @@ -318,7 +325,7 @@ class Messenger auto sent_address = (sent_array.size() > 0) ? &(sent_array[0]) : nullptr; auto recv_address = (recv_array.size() > 0) ? &(recv_array[0]) : nullptr; - MPI_Alltoall(sent_address, count, mpi_datatype, recv_address, count, mpi_datatype, MPI_COMM_WORLD); + MPI_Alltoall(sent_address, count, mpi_datatype, recv_address, count, mpi_datatype, m_pugs_comm_world); #else // PUGS_HAS_MPI copy_to(sent_array, recv_array); #endif // PUGS_HAS_MPI @@ -348,7 +355,7 @@ class Messenger const auto sent_array = sent_array_list[i_send]; if (sent_array.size() > 0) { MPI_Request request; - MPI_Isend(&(sent_array[0]), sent_array.size(), mpi_datatype, i_send, 0, MPI_COMM_WORLD, &request); + MPI_Isend(&(sent_array[0]), sent_array.size(), mpi_datatype, i_send, 0, m_pugs_comm_world, &request); request_list.push_back(request); } } @@ -357,7 +364,7 @@ class Messenger auto recv_array = recv_array_list[i_recv]; if (recv_array.size() > 0) { MPI_Request request; - MPI_Irecv(&(recv_array[0]), recv_array.size(), mpi_datatype, i_recv, 0, MPI_COMM_WORLD, &request); + MPI_Irecv(&(recv_array[0]), recv_array.size(), mpi_datatype, i_recv, 0, m_pugs_comm_world, &request); request_list.push_back(request); } } @@ -424,6 +431,33 @@ class Messenger return m_size; } + // The global rank is the rank in the whole MPI_COMM_WORLD, one + // generally needs rank() for classical parallelism + PUGS_INLINE + const size_t& + globalRank() const + { + return m_global_rank; + } + + // The global size is the size in the whole MPI_COMM_WORLD, one + // generally needs size() for classical parallelism + PUGS_INLINE + const size_t& + globalSize() const + { + return m_global_size; + } + +#ifdef PUGS_HAS_MPI + PUGS_INLINE + const MPI_Comm& + comm() const + { + return m_pugs_comm_world; + } +#endif // PUGS_HAS_MPI + void barrier() const; template <typename DataType> @@ -438,7 +472,7 @@ class Messenger MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); DataType min_data = data; - MPI_Allreduce(&data, &min_data, 1, mpi_datatype, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&data, &min_data, 1, mpi_datatype, MPI_MIN, m_pugs_comm_world); return min_data; #else // PUGS_HAS_MPI @@ -456,7 +490,7 @@ class Messenger MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); DataType max_data = data; - MPI_Allreduce(&data, &max_data, 1, mpi_datatype, MPI_LAND, MPI_COMM_WORLD); + MPI_Allreduce(&data, &max_data, 1, mpi_datatype, MPI_LAND, m_pugs_comm_world); return max_data; #else // PUGS_HAS_MPI @@ -474,7 +508,7 @@ class Messenger MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); DataType max_data = data; - MPI_Allreduce(&data, &max_data, 1, mpi_datatype, MPI_LOR, MPI_COMM_WORLD); + MPI_Allreduce(&data, &max_data, 1, mpi_datatype, MPI_LOR, m_pugs_comm_world); return max_data; #else // PUGS_HAS_MPI @@ -494,7 +528,7 @@ class Messenger MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); DataType max_data = data; - MPI_Allreduce(&data, &max_data, 1, mpi_datatype, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&data, &max_data, 1, mpi_datatype, MPI_MAX, m_pugs_comm_world); return max_data; #else // PUGS_HAS_MPI @@ -514,18 +548,20 @@ class Messenger MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>(); DataType data_sum = data; - MPI_Allreduce(&data, &data_sum, 1, mpi_datatype, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&data, &data_sum, 1, mpi_datatype, MPI_SUM, m_pugs_comm_world); return data_sum; - } else if (is_trivially_castable<DataType>) { + } else if constexpr (is_trivially_castable<DataType>) { using InnerDataType = typename DataType::data_type; MPI_Datatype mpi_datatype = Messenger::helper::mpiType<InnerDataType>(); const int size = sizeof(DataType) / sizeof(InnerDataType); DataType data_sum = data; - MPI_Allreduce(&data, &data_sum, size, mpi_datatype, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&data, &data_sum, size, mpi_datatype, MPI_SUM, m_pugs_comm_world); return data_sum; + } else { + throw UnexpectedError("invalid data type for reduce sum"); } #else // PUGS_HAS_MPI return data; diff --git a/src/utils/PETScWrapper.cpp b/src/utils/PETScWrapper.cpp index 18b16aa1435c2f27dc4d850c0727ddb4e259acb0..fb7fc932a2b5c3013d3831fb22bbbaa997d09bb0 100644 --- a/src/utils/PETScWrapper.cpp +++ b/src/utils/PETScWrapper.cpp @@ -1,5 +1,6 @@ #include <utils/PETScWrapper.hpp> +#include <utils/Messenger.hpp> #include <utils/pugs_config.hpp> #ifdef PUGS_HAS_PETSC @@ -12,6 +13,7 @@ void initialize([[maybe_unused]] int& argc, [[maybe_unused]] char* argv[]) { #ifdef PUGS_HAS_PETSC + PETSC_COMM_WORLD = parallel::Messenger::getInstance().comm(); PetscOptionsSetValue(NULL, "-no_signal_handler", "true"); PetscOptionsSetValue(NULL, "-fp_trap", "false"); PetscInitialize(&argc, &argv, 0, 0); diff --git a/src/utils/Partitioner.cpp b/src/utils/Partitioner.cpp index 347b07da6bffdf62d0218ca5231ed9e03a9c698f..e63a4b48ccfdd57952d77797bbbee3344187352e 100644 --- a/src/utils/Partitioner.cpp +++ b/src/utils/Partitioner.cpp @@ -31,7 +31,8 @@ Partitioner::partition(const CRSGraph& graph) Array<int> part(0); MPI_Group world_group; - MPI_Comm_group(MPI_COMM_WORLD, &world_group); + + MPI_Comm_group(parallel::Messenger::getInstance().comm(), &world_group); MPI_Group mesh_group; std::vector<int> group_ranks = [&]() { @@ -49,7 +50,7 @@ Partitioner::partition(const CRSGraph& graph) MPI_Group_incl(world_group, group_ranks.size(), &(group_ranks[0]), &mesh_group); MPI_Comm parmetis_comm; - MPI_Comm_create_group(MPI_COMM_WORLD, mesh_group, 1, &parmetis_comm); + MPI_Comm_create_group(parallel::Messenger::getInstance().comm(), mesh_group, 1, &parmetis_comm); int local_number_of_nodes = graph.numberOfNodes(); diff --git a/src/utils/PugsUtils.cpp b/src/utils/PugsUtils.cpp index a684a71a98abe11a8899d2c0e15c3ac515adb6cc..c265c5f906dc69fecb73f4e70b7877a735c1b39c 100644 --- a/src/utils/PugsUtils.cpp +++ b/src/utils/PugsUtils.cpp @@ -2,10 +2,12 @@ #include <utils/BacktraceManager.hpp> #include <utils/BuildInfo.hpp> +#include <utils/CommunicatorManager.hpp> #include <utils/ConsoleManager.hpp> #include <utils/FPEManager.hpp> #include <utils/Messenger.hpp> #include <utils/PETScWrapper.hpp> +#include <utils/ReproducibleSumManager.hpp> #include <utils/RevisionInfo.hpp> #include <utils/SLEPcWrapper.hpp> #include <utils/SignalManager.hpp> @@ -81,7 +83,6 @@ setDefaultOMPEnvironment() std::string initialize(int& argc, char* argv[]) { - parallel::Messenger::create(argc, argv); bool enable_fpe = true; bool enable_signals = true; int nb_threads = -1; @@ -118,20 +119,36 @@ initialize(int& argc, char* argv[]) bool pause_on_error = false; app.add_flag("-p,--pause-on-error", pause_on_error, "Pause for debugging on unexpected error [default: false]"); + bool reproducible_sums = true; + app.add_flag("--reproducible-sums,!--no-reproducible-sums", show_preamble, + "Special treatment of array sums to ensure reproducibility [default: true]"); + + int mpi_split_color = -1; + app.add_option("--mpi-split-color", mpi_split_color, "Sets the MPI split color value (for MPMD applications)") + ->check(CLI::Range(0, std::numeric_limits<decltype(mpi_split_color)>::max())); + std::atexit([]() { std::cout << rang::style::reset; }); try { app.parse(argc, argv); } catch (const CLI::ParseError& e) { + // Stupid trick to avoid repetition of error messages in parallel + parallel::Messenger::create(argc, argv); parallel::Messenger::destroy(); std::exit(app.exit(e, std::cout, std::cerr)); } + if (app.count("--mpi-split-color") > 0) { + CommunicatorManager::setSplitColor(mpi_split_color); + } + BacktraceManager::setShow(show_backtrace); ConsoleManager::setShowPreamble(show_preamble); ConsoleManager::init(enable_color); SignalManager::setPauseForDebug(pause_on_error); + ReproducibleSumManager::setReproducibleSums(reproducible_sums); } + parallel::Messenger::create(argc, argv); PETScWrapper::initialize(argc, argv); SLEPcWrapper::initialize(argc, argv); @@ -155,7 +172,14 @@ initialize(int& argc, char* argv[]) std::cout << rang::style::bold; #ifdef PUGS_HAS_MPI - std::cout << "MPI number of ranks " << parallel::size() << '\n'; + if (CommunicatorManager::hasSplitColor()) { + std::cout << "MPI number of global ranks " << parallel::Messenger::getInstance().globalSize() << '\n'; + std::cout << "MPI local pugs communication world\n"; + std::cout << " - number of local ranks " << parallel::size() << '\n'; + std::cout << " - local comm world color " << CommunicatorManager::splitColor() << '\n'; + } else { + std::cout << "MPI number of ranks " << parallel::size() << '\n'; + } #else // PUGS_HAS_MPI std::cout << "Sequential build\n"; #endif // PUGS_HAS_MPI diff --git a/src/utils/ReproducibleSumMPI.hpp b/src/utils/ReproducibleSumMPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d4b80eb5d32384ba14cf49ade607cbf66fcf5632 --- /dev/null +++ b/src/utils/ReproducibleSumMPI.hpp @@ -0,0 +1,124 @@ +#ifndef REPRODUCIBLE_SUM_MPI_HPP +#define REPRODUCIBLE_SUM_MPI_HPP + +#include <utils/Messenger.hpp> +#include <utils/ReproducibleSumUtils.hpp> +#include <utils/pugs_config.hpp> + +#ifdef PUGS_HAS_MPI + +template <typename RSumType> +struct ReproducibleMPIReducer +{ + static void + all_reduce_sum_embedder(void* local_value, void* sum_value, int* length, MPI_Datatype*) + { + Assert(*length == 1, "reduction must be performed on single bin"); + using Bin = typename RSumType::Bin; + Bin* p_local_bin = reinterpret_cast<Bin*>(local_value); + Bin* p_sum_bin = reinterpret_cast<Bin*>(sum_value); + + RSumType::addBinTo(*p_local_bin, *p_sum_bin); + } +}; + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproducibleSum(const ReproducibleScalarSum<ArrayT, BoolArrayT>& s) +{ + using DataType = std::decay_t<typename ArrayT::data_type>; + using RSumType = std::decay_t<decltype(s)>; + using BinType = typename RSumType::Bin; + + BinType local_bin = s.getSummationBin(); + + MPI_Datatype mpi_bin_type; + MPI_Type_contiguous(sizeof(BinType) / sizeof(DataType), parallel::Messenger::helper::mpiType<DataType>(), + &mpi_bin_type); + MPI_Type_commit(&mpi_bin_type); + + MPI_Op mpi_bin_sum_op; + MPI_Op_create(ReproducibleMPIReducer<RSumType>::all_reduce_sum_embedder, 1, &mpi_bin_sum_op); + + BinType sum_bin = zero; + MPI_Allreduce(&local_bin, &sum_bin, 1, mpi_bin_type, mpi_bin_sum_op, parallel::Messenger::getInstance().comm()); + + return sum_bin; +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproducibleSum(const ReproducibleTinyVectorSum<ArrayT, BoolArrayT>& s) +{ + using TinyVectorType = std::decay_t<typename ArrayT::data_type>; + using DataType = typename TinyVectorType::data_type; + using RSumType = std::decay_t<decltype(s)>; + using BinType = typename RSumType::Bin; + + BinType local_bin = s.getSummationBin(); + + MPI_Datatype mpi_bin_type; + MPI_Type_contiguous(sizeof(BinType) / sizeof(DataType), parallel::Messenger::helper::mpiType<DataType>(), + &mpi_bin_type); + MPI_Type_commit(&mpi_bin_type); + + MPI_Op mpi_bin_sum_op; + MPI_Op_create(ReproducibleMPIReducer<RSumType>::all_reduce_sum_embedder, 1, &mpi_bin_sum_op); + + BinType sum_bin = zero; + MPI_Allreduce(&local_bin, &sum_bin, 1, mpi_bin_type, mpi_bin_sum_op, parallel::Messenger::getInstance().comm()); + + return sum_bin; +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproducibleSum(const ReproducibleTinyMatrixSum<ArrayT, BoolArrayT>& s) +{ + using TinyVectorType = std::decay_t<typename ArrayT::data_type>; + using DataType = typename TinyVectorType::data_type; + using RSumType = std::decay_t<decltype(s)>; + using BinType = typename RSumType::Bin; + + BinType local_bin = s.getSummationBin(); + + MPI_Datatype mpi_bin_type; + MPI_Type_contiguous(sizeof(BinType) / sizeof(DataType), parallel::Messenger::helper::mpiType<DataType>(), + &mpi_bin_type); + MPI_Type_commit(&mpi_bin_type); + + MPI_Op mpi_bin_sum_op; + MPI_Op_create(ReproducibleMPIReducer<RSumType>::all_reduce_sum_embedder, 1, &mpi_bin_sum_op); + + BinType sum_bin = zero; + MPI_Allreduce(&local_bin, &sum_bin, 1, mpi_bin_type, mpi_bin_sum_op, parallel::Messenger::getInstance().comm()); + + return sum_bin; +} + +#else // PUGS_HAS_MPI + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproducibleSum(const ReproducibleScalarSum<ArrayT, BoolArrayT>& s) +{ + return s.getSummationBin(); +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproducibleSum(const ReproducibleTinyVectorSum<ArrayT, BoolArrayT>& s) +{ + return s.getSummationBin(); +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproducibleSum(const ReproducibleTinyMatrixSum<ArrayT, BoolArrayT>& s) +{ + return s.getSummationBin(); +} + +#endif // PUGS_HAS_MPI + +#endif // REPRODUCIBLE_SUM_MPI_HPP diff --git a/src/utils/ReproducibleSumManager.cpp b/src/utils/ReproducibleSumManager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b0f7f7d545e36346b7d0c3280024b81198cdce9c --- /dev/null +++ b/src/utils/ReproducibleSumManager.cpp @@ -0,0 +1,15 @@ +#include <utils/ReproducibleSumManager.hpp> + +bool ReproducibleSumManager::s_reproducible_sums = true; + +bool +ReproducibleSumManager::reproducibleSums() +{ + return s_reproducible_sums; +} + +void +ReproducibleSumManager::setReproducibleSums(bool reproducible_sum) +{ + s_reproducible_sums = reproducible_sum; +} diff --git a/src/utils/ReproducibleSumManager.hpp b/src/utils/ReproducibleSumManager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..40e380fd6747a714906fbc15fc85667f551c8586 --- /dev/null +++ b/src/utils/ReproducibleSumManager.hpp @@ -0,0 +1,14 @@ +#ifndef REPRODUCIBLE_SUM_MANAGER_HPP +#define REPRODUCIBLE_SUM_MANAGER_HPP + +class ReproducibleSumManager +{ + private: + static bool s_reproducible_sums; + + public: + static void setReproducibleSums(bool); + static bool reproducibleSums(); +}; + +#endif // REPRODUCIBLE_SUM_MANAGER_HPP diff --git a/src/utils/ReproducibleSumUtils.hpp b/src/utils/ReproducibleSumUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f2d52aa20122452e96130c0c29b6d12ccc32a4aa --- /dev/null +++ b/src/utils/ReproducibleSumUtils.hpp @@ -0,0 +1,878 @@ +#ifndef REPRODUCIBLE_SUM_UTILS_HPP +#define REPRODUCIBLE_SUM_UTILS_HPP + +#include <utils/PugsUtils.hpp> +#include <utils/Types.hpp> + +template <typename DataType> +class Array; + +namespace reproducible_sum_utils +{ +template <size_t NumberOfBits> +struct IntegerFromBitSize +{ +}; + +template <> +struct IntegerFromBitSize<8> +{ + using integer_t = std::int8_t; +}; + +template <> +struct IntegerFromBitSize<16> +{ + using integer_t = std::int16_t; +}; + +template <> +struct IntegerFromBitSize<32> +{ + using integer_t = std::int32_t; +}; + +template <> +struct IntegerFromBitSize<64> +{ + using integer_t = std::int64_t; +}; + +template <typename DataType> +struct IntegerType +{ + using integer_t = typename IntegerFromBitSize<sizeof(DataType) * 8>::integer_t; +}; + +template <typename DataType> +DataType +ulp(const DataType& x) noexcept(NO_ASSERT) +{ + static_assert(std::is_floating_point_v<DataType>, "expecting floating point value"); + + if (x == 0) { + return std::numeric_limits<DataType>::denorm_min(); + } + + return std::pow(DataType{2}, std::ilogb(std::abs(x)) - std::numeric_limits<DataType>::digits); +} + +template <typename DataType> +DataType +ufp(const DataType& x) noexcept(NO_ASSERT) +{ + static_assert(std::is_floating_point_v<DataType>, "expecting floating point value"); + + return std::pow(DataType{2}, std::ilogb(std::abs(x))); +} + +// Useful bits per bin +template <typename DataType> +constexpr inline size_t bin_size = 0; +template <> +constexpr inline size_t bin_size<double> = 40; +template <> +constexpr inline size_t bin_size<float> = 12; + +// IEEE 754 epsilon +template <typename DataType> +constexpr inline double bin_epsilon = 0; +template <> +constexpr inline double bin_epsilon<double> = std::numeric_limits<double>::epsilon(); +template <> +constexpr inline double bin_epsilon<float> = std::numeric_limits<float>::epsilon(); + +// number of bins: improves precision +template <typename DataType> +constexpr inline size_t bin_number = 0; + +template <> +constexpr inline size_t bin_number<double> = 3; +template <> +constexpr inline size_t bin_number<float> = 4; + +// max local sum size to avoid overflow +template <typename DataType> +constexpr inline size_t bin_max_size = 0; + +template <> +constexpr inline size_t bin_max_size<double> = 2048; +template <> +constexpr inline size_t bin_max_size<float> = 1024; + +struct NoMask +{ + PUGS_INLINE bool + operator[](size_t) const + { + return true; + } +}; + +} // namespace reproducible_sum_utils + +template <typename ArrayT, typename MaskType = reproducible_sum_utils::NoMask> +class ReproducibleScalarSum +{ + public: + using DataType = std::decay_t<typename ArrayT::data_type>; + static_assert(std::is_floating_point_v<DataType>); + + static constexpr size_t K = reproducible_sum_utils::bin_number<DataType>; + static constexpr DataType eps = reproducible_sum_utils::bin_epsilon<DataType>; + static constexpr size_t W = reproducible_sum_utils::bin_size<DataType>; + + struct Bin + { + std::array<DataType, K> S; // sum + std::array<DataType, K> C; // carry + + Bin& operator=(const Bin&) = default; + Bin& operator=(Bin&&) = default; + + Bin(Bin&&) = default; + Bin(const Bin&) = default; + + Bin(ZeroType) + { + for (size_t k = 0; k < K; ++k) { + S[k] = 0.75 * eps * std::pow(DataType{2}, (K - k - 1) * W); + C[k] = 0; + } + } + + Bin() = default; + + ~Bin() = default; + }; + + private: + Bin m_summation_bin; + + PUGS_INLINE + static void + _shift(const size_t g, Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + for (size_t k = K - 1; k >= g; --k) { + bin.S[k] = bin.S[k - g]; + bin.C[k] = bin.C[k - g]; + } + for (size_t k = 0; k < std::min(K, g); ++k) { + bin.S[k] = 1.5 * std::pow(DataType{2}, g * W) * ufp(bin.S[k]); + bin.C[k] = 0; + } + } + + PUGS_INLINE static void + _update(const DataType& m, Bin& bin) noexcept(NO_ASSERT) + { + Assert(m > 0); + + using namespace reproducible_sum_utils; + if (m >= std::pow(DataType{2}, W - 1.) * ulp(bin.S[0])) { + const size_t shift = 1 + std::floor(std::log2(m / (std::pow(DataType{2}, W - 1.) * ulp(bin.S[0]))) / W); + + _shift(shift, bin); + } + } + + PUGS_INLINE + void static _split2(DataType& S, DataType& x) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + union + { + DataType as_DataType; + typename IntegerType<DataType>::integer_t as_integer; + } x_bar; + + x_bar.as_DataType = x; + x_bar.as_integer |= 0x1; + + const DataType S0 = S; + S += x_bar.as_DataType; + x -= S - S0; + } + + PUGS_INLINE static void + _renormalize(Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + for (size_t k = 0; k < K; ++k) { + if (bin.S[k] >= 1.75 * ufp(bin.S[k])) { + bin.S[k] -= 0.25 * ufp(bin.S[k]); + bin.C[k] += 1; + } else if (bin.S[k] < 1.25 * ufp(bin.S[k])) { + bin.S[k] += 0.5 * ufp(bin.S[k]); + bin.C[k] -= 2; + } else if (bin.S[k] < 1.5 * ufp(bin.S[k])) { + bin.S[k] += 0.25 * ufp(bin.S[k]); + bin.C[k] -= 1; + } + } + } + + public: + static void + addBinTo(Bin& bin, Bin& bin_sum) + { + using namespace reproducible_sum_utils; + + DataType ulp_bin = ulp(bin.S[0]); + DataType ulp_sum = ulp(bin_sum.S[0]); + + if (ulp_bin < ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_sum / ulp_bin) / W); + if (shift > 0) { + _shift(shift, bin); + } + } else if (ulp_bin > ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_bin / ulp_sum) / W); + if (shift > 0) { + _shift(shift, bin_sum); + } + } + + for (size_t k = 0; k < K; ++k) { + bin_sum.S[k] += bin.S[k] - 1.5 * ufp(bin.S[k]); + bin_sum.C[k] += bin.C[k]; + } + + _renormalize(bin_sum); + } + + PUGS_INLINE + static DataType + getValue(const Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + DataType value = 0; + for (size_t k = 0; k < K; ++k) { + value += 0.25 * (bin.C[k] - 6) * ufp(bin.S[k]) + bin.S[k]; + } + return value; + } + + PUGS_INLINE Bin + getSummationBin() const noexcept(NO_ASSERT) + { + return m_summation_bin; + } + + PUGS_INLINE DataType + getSum() const noexcept(NO_ASSERT) + { + return getValue(m_summation_bin); + } + + ReproducibleScalarSum(const ArrayT& array, const MaskType mask = reproducible_sum_utils::NoMask{}) + { + if constexpr (not std::is_same_v<MaskType, reproducible_sum_utils::NoMask>) { + static_assert(std::is_same_v<std::decay_t<typename MaskType::data_type>, bool>, + "when provided, mask must be an array of bool"); + } + + using TeamPolicyT = Kokkos::TeamPolicy<Kokkos::IndexType<int>>; + using TeamMemberT = TeamPolicyT::member_type; + + int nx = reproducible_sum_utils::bin_max_size<DataType>; + int ny = std::max(array.size() / nx, 1ul); + + const TeamPolicyT policy(ny, Kokkos::AUTO()); + + Array<DataType> thread_sum(policy.team_size() * policy.league_size()); + + Array<Bin> bin_by_thread(policy.team_size() * policy.league_size()); + bin_by_thread.fill(zero); + + Array<DataType> local_max(policy.team_size() * policy.league_size()); + local_max.fill(0); + + parallel_for( + policy, PUGS_LAMBDA(const TeamMemberT& member) { + const int i_team = member.league_rank(); + const int i_thread = member.team_rank(); + + const int thread_id = i_team * member.team_size() + i_thread; + + const int league_start = nx * i_team; + const int block_size = [&] { + int size = nx; + if (i_team == member.league_size() - 1) { + size = array.size() - league_start; + } + return size; + }(); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + DataType& m = local_max[thread_id]; + DataType abs_value = std::abs(array[league_start + i]); + if (abs_value > m) { + m = abs_value; + } + } + }); + + _update(local_max[thread_id], bin_by_thread[thread_id]); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + DataType x = array[nx * i_team + i]; + for (size_t k = 0; k < K; ++k) { + _split2(bin_by_thread[thread_id].S[k], x); + }; + } + }); + + _renormalize(bin_by_thread[thread_id]); + }); + + m_summation_bin = bin_by_thread[0]; + for (size_t i = 1; i < bin_by_thread.size(); ++i) { + addBinTo(bin_by_thread[i], m_summation_bin); + } + } + + ~ReproducibleScalarSum() = default; +}; + +template <typename ArrayT, typename MaskType = reproducible_sum_utils::NoMask> +class ReproducibleTinyVectorSum +{ + public: + using TinyVectorType = std::decay_t<typename ArrayT::data_type>; + static_assert(is_tiny_vector_v<TinyVectorType>); + + using DataType = std::decay_t<typename TinyVectorType::data_type>; + static_assert(std::is_floating_point_v<DataType>); + + static constexpr size_t K = reproducible_sum_utils::bin_number<DataType>; + static constexpr DataType eps = reproducible_sum_utils::bin_epsilon<DataType>; + static constexpr size_t W = reproducible_sum_utils::bin_size<DataType>; + + struct Bin + { + std::array<TinyVectorType, K> S; // sum + std::array<TinyVectorType, K> C; // carry + + Bin& operator=(const Bin&) = default; + Bin& operator=(Bin&&) = default; + + Bin(Bin&&) = default; + Bin(const Bin&) = default; + + Bin(ZeroType) + { + for (size_t k = 0; k < K; ++k) { + const DataType init_value = 0.75 * eps * std::pow(DataType{2}, (K - k - 1) * W); + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + S[k][i_component] = init_value; + } + C[k] = zero; + } + } + + Bin() = default; + + ~Bin() = default; + }; + + private: + Bin m_summation_bin; + + PUGS_INLINE + static void + _shift(const size_t g, Bin& bin, const size_t& i_component) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + for (size_t k = K - 1; k >= g; --k) { + bin.S[k][i_component] = bin.S[k - g][i_component]; + bin.C[k][i_component] = bin.C[k - g][i_component]; + } + for (size_t k = 0; k < std::min(K, g); ++k) { + bin.S[k][i_component] = 1.5 * std::pow(DataType{2}, g * W) * ufp(bin.S[k][i_component]); + bin.C[k][i_component] = 0; + } + } + + PUGS_INLINE static void + _update(const TinyVectorType& m, Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + if (m[i_component] >= std::pow(DataType{2}, W - 1.) * ulp(bin.S[0][i_component])) { + const size_t shift = + 1 + std::floor(std::log2(m[i_component] / (std::pow(DataType{2}, W - 1.) * ulp(bin.S[0][i_component]))) / W); + + _shift(shift, bin, i_component); + } + } + } + + PUGS_INLINE + void static _split2(TinyVectorType& S, TinyVectorType& x) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + union + { + DataType as_DataType; + typename IntegerType<DataType>::integer_t as_integer; + } x_bar; + + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + x_bar.as_DataType = x[i_component]; + x_bar.as_integer |= 0x1; + + DataType& S_i = S[i_component]; + DataType& x_i = x[i_component]; + + const DataType S0 = S_i; + S_i += x_bar.as_DataType; + x_i -= S_i - S0; + } + } + + PUGS_INLINE static void + _renormalize(Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + for (size_t k = 0; k < K; ++k) { + TinyVectorType& S_k = bin.S[k]; + TinyVectorType& C_k = bin.C[k]; + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + DataType& Sk_i = S_k[i_component]; + DataType& Ck_i = C_k[i_component]; + + if (Sk_i >= 1.75 * ufp(Sk_i)) { + Sk_i -= 0.25 * ufp(Sk_i); + Ck_i += 1; + } else if (Sk_i < 1.25 * ufp(Sk_i)) { + Sk_i += 0.5 * ufp(Sk_i); + Ck_i -= 2; + } else if (Sk_i < 1.5 * ufp(Sk_i)) { + Sk_i += 0.25 * ufp(Sk_i); + Ck_i -= 1; + } + } + } + } + + public: + static void + addBinTo(Bin& bin, Bin& bin_sum) + { + using namespace reproducible_sum_utils; + + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + DataType ulp_bin = ulp(bin.S[0][i_component]); + DataType ulp_sum = ulp(bin_sum.S[0][i_component]); + + if (ulp_bin < ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_sum / ulp_bin) / W); + if (shift > 0) { + _shift(shift, bin, i_component); + } + } else if (ulp_bin > ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_bin / ulp_sum) / W); + if (shift > 0) { + _shift(shift, bin_sum, i_component); + } + } + + for (size_t k = 0; k < K; ++k) { + bin_sum.S[k][i_component] += bin.S[k][i_component] - 1.5 * ufp(bin.S[k][i_component]); + bin_sum.C[k][i_component] += bin.C[k][i_component]; + } + } + + _renormalize(bin_sum); + } + + PUGS_INLINE + static TinyVectorType + getValue(const Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + TinyVectorType value = zero; + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + for (size_t k = 0; k < K; ++k) { + value[i_component] += 0.25 * (bin.C[k][i_component] - 6) * ufp(bin.S[k][i_component]) + bin.S[k][i_component]; + } + } + return value; + } + + PUGS_INLINE Bin + getSummationBin() const noexcept(NO_ASSERT) + { + return m_summation_bin; + } + + PUGS_INLINE TinyVectorType + getSum() const noexcept(NO_ASSERT) + { + return getValue(m_summation_bin); + } + + ReproducibleTinyVectorSum(const ArrayT& array, const MaskType mask = reproducible_sum_utils::NoMask{}) + { + if constexpr (not std::is_same_v<MaskType, reproducible_sum_utils::NoMask>) { + static_assert(std::is_same_v<std::decay_t<typename MaskType::data_type>, bool>, + "when provided, mask must be an array of bool"); + } + + using TeamPolicyT = Kokkos::TeamPolicy<Kokkos::IndexType<int>>; + using TeamMemberT = TeamPolicyT::member_type; + + int nx = reproducible_sum_utils::bin_max_size<DataType>; + int ny = std::max(array.size() / nx, 1ul); + + const TeamPolicyT policy(ny, Kokkos::AUTO()); + + Array<TinyVectorType> thread_sum(policy.team_size() * policy.league_size()); + + Array<Bin> bin_by_thread(policy.team_size() * policy.league_size()); + bin_by_thread.fill(zero); + + Array<TinyVectorType> local_max(policy.team_size() * policy.league_size()); + local_max.fill(zero); + + parallel_for( + policy, PUGS_LAMBDA(const TeamMemberT& member) { + const int i_team = member.league_rank(); + const int i_thread = member.team_rank(); + + const int thread_id = i_team * member.team_size() + i_thread; + + const int league_start = nx * i_team; + const int block_size = [&] { + int size = nx; + if (i_team == member.league_size() - 1) { + size = array.size() - league_start; + } + return size; + }(); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + DataType& m = local_max[thread_id][i_component]; + DataType abs_value = std::abs(array[league_start + i][i_component]); + if (abs_value > m) { + m = abs_value; + } + } + } + }); + + _update(local_max[thread_id], bin_by_thread[thread_id]); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + TinyVectorType x = array[nx * i_team + i]; + for (size_t k = 0; k < K; ++k) { + _split2(bin_by_thread[thread_id].S[k], x); + } + } + }); + + _renormalize(bin_by_thread[thread_id]); + }); + + m_summation_bin = bin_by_thread[0]; + for (size_t i = 1; i < bin_by_thread.size(); ++i) { + addBinTo(bin_by_thread[i], m_summation_bin); + } + } + + ~ReproducibleTinyVectorSum() = default; +}; + +template <typename ArrayT, typename MaskType = reproducible_sum_utils::NoMask> +class ReproducibleTinyMatrixSum +{ + public: + using TinyMatrixType = std::decay_t<typename ArrayT::data_type>; + static_assert(is_tiny_matrix_v<TinyMatrixType>); + + using DataType = std::decay_t<typename TinyMatrixType::data_type>; + static_assert(std::is_floating_point_v<DataType>); + + static constexpr size_t K = reproducible_sum_utils::bin_number<DataType>; + static constexpr DataType eps = reproducible_sum_utils::bin_epsilon<DataType>; + static constexpr size_t W = reproducible_sum_utils::bin_size<DataType>; + + struct Bin + { + std::array<TinyMatrixType, K> S; // sum + std::array<TinyMatrixType, K> C; // carry + + Bin& operator=(const Bin&) = default; + Bin& operator=(Bin&&) = default; + + Bin(Bin&&) = default; + Bin(const Bin&) = default; + + Bin(ZeroType) + { + for (size_t k = 0; k < K; ++k) { + const DataType init_value = 0.75 * eps * std::pow(DataType{2}, (K - k - 1) * W); + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + S[k](i_component, j_component) = init_value; + } + } + C[k] = zero; + } + } + + Bin() = default; + ~Bin() = default; + }; + + private: + Bin m_summation_bin; + + PUGS_INLINE + static void + _shift(const size_t g, Bin& bin, const size_t i_component, const size_t j_component) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + for (size_t k = K - 1; k >= g; --k) { + bin.S[k](i_component, j_component) = bin.S[k - g](i_component, j_component); + bin.C[k](i_component, j_component) = bin.C[k - g](i_component, j_component); + } + for (size_t k = 0; k < std::min(K, g); ++k) { + bin.S[k](i_component, j_component) = 1.5 * std::pow(DataType{2}, g * W) * ufp(bin.S[k](i_component, j_component)); + bin.C[k](i_component, j_component) = 0; + } + } + + PUGS_INLINE static void + _update(const TinyMatrixType& m, Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + if (m(i_component, j_component) >= std::pow(DataType{2}, W - 1.) * ulp(bin.S[0](i_component, j_component))) { + const size_t shift = + 1 + std::floor(std::log2(m(i_component, j_component) / + (std::pow(DataType{2}, W - 1.) * ulp(bin.S[0](i_component, j_component)))) / + W); + + _shift(shift, bin, i_component, j_component); + } + } + } + } + + PUGS_INLINE + void static _split2(TinyMatrixType& S, TinyMatrixType& x) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + union + { + DataType as_DataType; + typename IntegerType<DataType>::integer_t as_integer; + } x_bar; + + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType& S_ij = S(i_component, j_component); + DataType& x_ij = x(i_component, j_component); + + x_bar.as_DataType = x_ij; + x_bar.as_integer |= 0x1; + + const DataType S0 = S_ij; + S_ij += x_bar.as_DataType; + x_ij -= S_ij - S0; + } + } + } + + PUGS_INLINE static void + _renormalize(Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + for (size_t k = 0; k < K; ++k) { + TinyMatrixType& S_k = bin.S[k]; + TinyMatrixType& C_k = bin.C[k]; + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType& Sk_ij = S_k(i_component, j_component); + DataType& Ck_ij = C_k(i_component, j_component); + + if (Sk_ij >= 1.75 * ufp(Sk_ij)) { + Sk_ij -= 0.25 * ufp(Sk_ij); + Ck_ij += 1; + } else if (Sk_ij < 1.25 * ufp(Sk_ij)) { + Sk_ij += 0.5 * ufp(Sk_ij); + Ck_ij -= 2; + } else if (Sk_ij < 1.5 * ufp(Sk_ij)) { + Sk_ij += 0.25 * ufp(Sk_ij); + Ck_ij -= 1; + } + } + } + } + } + + public: + static void + addBinTo(Bin& bin, Bin& bin_sum) + { + using namespace reproducible_sum_utils; + + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType ulp_bin = ulp(bin.S[0](i_component, j_component)); + DataType ulp_sum = ulp(bin_sum.S[0](i_component, j_component)); + + if (ulp_bin < ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_sum / ulp_bin) / W); + if (shift > 0) { + _shift(shift, bin, i_component, j_component); + } + } else if (ulp_bin > ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_bin / ulp_sum) / W); + if (shift > 0) { + _shift(shift, bin_sum, i_component, j_component); + } + } + + for (size_t k = 0; k < K; ++k) { + bin_sum.S[k](i_component, j_component) += + bin.S[k](i_component, j_component) - 1.5 * ufp(bin.S[k](i_component, j_component)); + bin_sum.C[k](i_component, j_component) += bin.C[k](i_component, j_component); + } + } + } + + _renormalize(bin_sum); + } + + PUGS_INLINE + static TinyMatrixType + getValue(const Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproducible_sum_utils; + + TinyMatrixType value = zero; + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + for (size_t k = 0; k < K; ++k) { + value(i_component, j_component) += + 0.25 * (bin.C[k](i_component, j_component) - 6) * ufp(bin.S[k](i_component, j_component)) + + bin.S[k](i_component, j_component); + } + } + } + return value; + } + + PUGS_INLINE Bin + getSummationBin() const noexcept(NO_ASSERT) + { + return m_summation_bin; + } + + PUGS_INLINE TinyMatrixType + getSum() const noexcept(NO_ASSERT) + { + return getValue(m_summation_bin); + } + + ReproducibleTinyMatrixSum(const ArrayT& array, const MaskType mask = reproducible_sum_utils::NoMask{}) + { + if constexpr (not std::is_same_v<MaskType, reproducible_sum_utils::NoMask>) { + static_assert(std::is_same_v<std::decay_t<typename MaskType::data_type>, bool>, + "when provided, mask must be an array of bool"); + } + + using TeamPolicyT = Kokkos::TeamPolicy<Kokkos::IndexType<int>>; + using TeamMemberT = TeamPolicyT::member_type; + + int nx = reproducible_sum_utils::bin_max_size<DataType>; + int ny = std::max(array.size() / nx, 1ul); + + const TeamPolicyT policy(ny, Kokkos::AUTO()); + + Array<TinyMatrixType> thread_sum(policy.team_size() * policy.league_size()); + + Array<Bin> bin_by_thread(policy.team_size() * policy.league_size()); + bin_by_thread.fill(zero); + + Array<TinyMatrixType> local_max(policy.team_size() * policy.league_size()); + local_max.fill(zero); + + parallel_for( + policy, PUGS_LAMBDA(const TeamMemberT& member) { + const int i_team = member.league_rank(); + const int i_thread = member.team_rank(); + + const int thread_id = i_team * member.team_size() + i_thread; + + const int league_start = nx * i_team; + const int block_size = [&] { + int size = nx; + if (i_team == member.league_size() - 1) { + size = array.size() - league_start; + } + return size; + }(); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType& m = local_max[thread_id](i_component, j_component); + DataType abs_value = std::abs(array[league_start + i](i_component, j_component)); + if (abs_value > m) { + m = abs_value; + } + } + } + } + }); + + _update(local_max[thread_id], bin_by_thread[thread_id]); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + TinyMatrixType x = array[nx * i_team + i]; + for (size_t k = 0; k < K; ++k) { + _split2(bin_by_thread[thread_id].S[k], x); + } + } + }); + + _renormalize(bin_by_thread[thread_id]); + }); + + m_summation_bin = bin_by_thread[0]; + for (size_t i = 1; i < bin_by_thread.size(); ++i) { + addBinTo(bin_by_thread[i], m_summation_bin); + } + } + + ~ReproducibleTinyMatrixSum() = default; +}; + +#endif // REPRODUCIBLE_SUM_UTILS_HPP diff --git a/tests/test_Array.cpp b/tests/test_Array.cpp index f7d121df0ab11e229bae06f505e8f12361fe00da..7efc346e29693e2ee6302dfd4498ac2a9cb51b04 100644 --- a/tests/test_Array.cpp +++ b/tests/test_Array.cpp @@ -14,6 +14,10 @@ #include <valarray> #include <vector> +#include <utils/ReproducibleSumUtils.hpp> +#include <utils/SmallArray.hpp> +#include <utils/Timer.hpp> + // Instantiate to ensure full coverage is performed template class Array<int>; @@ -319,6 +323,837 @@ TEST_CASE("Array", "[utils]") } } + SECTION("reproducible floating point sum") + { + auto direct_sum = [](auto array) { + auto sum = array[0]; + for (size_t i = 1; i < array.size(); ++i) { + sum += array[i]; + } + return sum; + }; + + SECTION("reproducible double sum") + { + Array<double> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = ((i < (array.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + + REQUIRE(sum_before_shuffle == Catch::Approx(direct_sum(array))); + } + + SECTION("reproducible float sum") + { + Array<float> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = ((i < (array.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + + REQUIRE(sum_before_shuffle == Catch::Approx(direct_sum(array))); + } + + SECTION("reproducible TinyVector<3,double> sum") + { + Array<TinyVector<3, double>> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = + TinyVector<3, double>(((i < (array.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + 17 * i - 3, // + 1); + } + + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + ReproducibleTinyVectorSum s0(array); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + + auto naive_sum = direct_sum(array); + REQUIRE(sum_before_shuffle[0] == Catch::Approx(naive_sum[0])); + REQUIRE(sum_before_shuffle[1] == Catch::Approx(naive_sum[1])); + REQUIRE(sum_before_shuffle[2] == Catch::Approx(naive_sum[2])); + } + + SECTION("reproducible TinyVector<3,float> sum") + { + Array<TinyVector<3, float>> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = + TinyVector<3, float>(((i < (array.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + 17 * i - 3, // + 1); + } + + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + ReproducibleTinyVectorSum s0(array); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + + auto naive_sum = direct_sum(array); + REQUIRE(sum_before_shuffle[0] == Catch::Approx(naive_sum[0])); + REQUIRE(sum_before_shuffle[1] == Catch::Approx(naive_sum[1])); + REQUIRE(sum_before_shuffle[2] == Catch::Approx(naive_sum[2])); + } + + SECTION("reproducible TinyMatrix<2, 3> sum") + { + Array<TinyMatrix<2, 3, double>> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = TinyMatrix<2, 3, double>{((i < (array.size() / 1000)) * 1E25 + 1E10) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + 17 * i - 3, // + 1, // + std::cos(7 * i) * i, // + (2 - 3 * i) * 1E-30, // + (i * 100 + 1000)}; + } + + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + + auto naive_sum = direct_sum(array); + REQUIRE(sum_before_shuffle(0, 0) == Catch::Approx(naive_sum(0, 0))); + REQUIRE(sum_before_shuffle(0, 1) == Catch::Approx(naive_sum(0, 1))); + REQUIRE(sum_before_shuffle(0, 2) == Catch::Approx(naive_sum(0, 2))); + REQUIRE(sum_before_shuffle(1, 0) == Catch::Approx(naive_sum(1, 0))); + REQUIRE(sum_before_shuffle(1, 1) == Catch::Approx(naive_sum(1, 1))); + REQUIRE(sum_before_shuffle(1, 2) == Catch::Approx(naive_sum(1, 2))); + } + + SECTION("reproducible TinyMatrix<2, 3, float> sum") + { + Array<TinyMatrix<2, 3, float>> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = + TinyMatrix<2, 3, float>(((i < (array.size() / 1000)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + 17 * i - 3, // + 1, // + std::cos(7 * i) * i, // + (2 - 3 * i) * 1E-30, // + (i * 100 + 1000)); + } + + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + + auto naive_sum = direct_sum(array); + REQUIRE(sum_before_shuffle(0, 0) == Catch::Approx(naive_sum(0, 0))); + REQUIRE(sum_before_shuffle(0, 1) == Catch::Approx(naive_sum(0, 1))); + REQUIRE(sum_before_shuffle(0, 2) == Catch::Approx(naive_sum(0, 2))); + REQUIRE(sum_before_shuffle(1, 0) == Catch::Approx(naive_sum(1, 0)).epsilon(1E-4)); + REQUIRE(sum_before_shuffle(1, 1) == Catch::Approx(naive_sum(1, 1)).margin(1E-6)); + REQUIRE(sum_before_shuffle(1, 2) == Catch::Approx(naive_sum(1, 2))); + } + + SECTION("scalar bin summation") + { + SECTION("small arrays") + { + Array<double> array1(122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = ((i < (array1.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array2(357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = ((i < (array2.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array3(283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = ((i < (array3.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleScalarSum s1(array1); + ReproducibleScalarSum s2(array2); + ReproducibleScalarSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + + SECTION("small and large arrays") + { + Array<double> array1(122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = ((i < (array1.size() / 100)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array2(7357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = ((i < (array2.size() / 1000)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array3(283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = ((i < (array3.size() / 30)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleScalarSum s1(array1); + ReproducibleScalarSum s2(array2); + ReproducibleScalarSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + + SECTION("large arrays") + { + Array<double> array1(5122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = ((i < (array1.size() / 100)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array2(7357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = ((i < (array2.size() / 1000)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array3(6283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = ((i < (array3.size() / 300)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } + + Array<double> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleScalarSum s1(array1); + ReproducibleScalarSum s2(array2); + ReproducibleScalarSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + } + + SECTION("TinyVector bin summation") + { + SECTION("small arrays") + { + Array<TinyVector<3, double>> array1(122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = + TinyVector<3, double>(((i < (array1.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array1.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array1.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array2(357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = + TinyVector<3, double>(((i < (array2.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array2.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array2.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array3(283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = + TinyVector<3, double>(((i < (array3.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array3.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array3.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleTinyVectorSum s1(array1); + ReproducibleTinyVectorSum s2(array2); + ReproducibleTinyVectorSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + + SECTION("small and large arrays") + { + Array<TinyVector<3, double>> array1(122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = + TinyVector<3, double>(((i < (array1.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array1.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array1.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array2(7357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = + TinyVector<3, double>(((i < (array2.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array2.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array2.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array3(283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = + TinyVector<3, double>(((i < (array3.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array3.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array3.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleTinyVectorSum s1(array1); + ReproducibleTinyVectorSum s2(array2); + ReproducibleTinyVectorSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + + SECTION("large arrays") + { + Array<TinyVector<3, double>> array1(5122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = + TinyVector<3, double>(((i < (array1.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array1.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array1.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array2(7357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = + TinyVector<3, double>(((i < (array2.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array2.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array2.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array3(4283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = + TinyVector<3, double>(((i < (array3.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + ((i < (array3.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array3.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1)); + } + + Array<TinyVector<3, double>> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleTinyVectorSum s1(array1); + ReproducibleTinyVectorSum s2(array2); + ReproducibleTinyVectorSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + } + + SECTION("TinyMatrix bin summation") + { + SECTION("small arrays") + { + Array<TinyMatrix<2, 3, double>> array1(122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = + TinyMatrix<2, 3, double>(((i < (array1.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array1.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array1.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array1.size() / 30)) * 1E6 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array1.size() / 70)) * 1E8 + 1E13) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array1.size() / 11)) * 1E8 + 1E9) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array2(357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = + TinyMatrix<2, 3, double>(((i < (array2.size() / 20)) * 1E25 + 1E20) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array2.size() / 70)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array2.size() / 32)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array2.size() / 45)) * 1E6 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array2.size() / 66)) * 1E8 + 1E13) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array2.size() / 13)) * 1E8 + 1E9) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array3(283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = + TinyMatrix<2, 3, double>(((i < (array3.size() / 23)) * 1E15 + 1E17) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array3.size() / 67)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array3.size() / 62)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array3.size() / 35)) * 1E8 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array3.size() / 63)) * 1E7 + 1E12) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array3.size() / 17)) * 1E9 + 1E8) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleTinyMatrixSum s1(array1); + ReproducibleTinyMatrixSum s2(array2); + ReproducibleTinyMatrixSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + + SECTION("small and large arrays") + { + Array<TinyMatrix<2, 3, double>> array1(122); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = + TinyMatrix<2, 3, double>(((i < (array1.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array1.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array1.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array1.size() / 30)) * 1E6 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array1.size() / 70)) * 1E8 + 1E13) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array1.size() / 11)) * 1E8 + 1E9) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array2(7357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = + TinyMatrix<2, 3, double>(((i < (array2.size() / 20)) * 1E25 + 1E20) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array2.size() / 70)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array2.size() / 32)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array2.size() / 45)) * 1E6 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array2.size() / 66)) * 1E8 + 1E13) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array2.size() / 13)) * 1E8 + 1E9) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array3(283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = + TinyMatrix<2, 3, double>(((i < (array3.size() / 23)) * 1E15 + 1E17) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array3.size() / 67)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array3.size() / 62)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array3.size() / 35)) * 1E8 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array3.size() / 63)) * 1E7 + 1E12) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array3.size() / 17)) * 1E9 + 1E8) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleTinyMatrixSum s1(array1); + ReproducibleTinyMatrixSum s2(array2); + ReproducibleTinyMatrixSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + + SECTION("large arrays") + { + Array<TinyMatrix<2, 3, double>> array1(3762); + for (size_t i = 0; i < array1.size(); ++i) { + array1[i] = + TinyMatrix<2, 3, double>(((i < (array1.size() / 10)) * 1E25 + 1E20) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array1.size() / 50)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array1.size() / 20)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array1.size() / 30)) * 1E6 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array1.size() / 70)) * 1E8 + 1E13) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array1.size() / 11)) * 1E8 + 1E9) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array2(7357); + for (size_t i = 0; i < array2.size(); ++i) { + array2[i] = + TinyMatrix<2, 3, double>(((i < (array2.size() / 20)) * 1E25 + 1E20) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array2.size() / 70)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array2.size() / 32)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array2.size() / 45)) * 1E6 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array2.size() / 66)) * 1E8 + 1E13) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array2.size() / 13)) * 1E8 + 1E9) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array3(6283); + for (size_t i = 0; i < array3.size(); ++i) { + array3[i] = + TinyMatrix<2, 3, double>(((i < (array3.size() / 23)) * 1E15 + 1E17) * ((i + 1) % 1'000) * + std::sin(3 * i + 1), + ((i < (array3.size() / 67)) * 1E15 + 1E10) * ((i + 1) % 100) * std::cos(3 * i + 1), + ((i < (array3.size() / 62)) * 1E7 + 1E10) * ((i + 1) % 10) * std::tan(3 * i + 1), + ((i < (array3.size() / 35)) * 1E8 + 1E11) * ((i + 1) % 100) * std::cos(4 * i + 1), + ((i < (array3.size() / 63)) * 1E7 + 1E12) * ((i + 1) % 100) * std::sin(4 * i + 1), + ((i < (array3.size() / 17)) * 1E9 + 1E8) * ((i + 1) % 100) * std::cos(4 * i + 1)); + } + + Array<TinyMatrix<2, 3, double>> array(array1.size() + array2.size() + array3.size()); + { + size_t I = 0; + for (size_t i = 0; i < array1.size(); ++i) { + array[I++] = array1[i]; + } + + for (size_t i = 0; i < array2.size(); ++i) { + array[I++] = array2[i]; + } + + for (size_t i = 0; i < array3.size(); ++i) { + array[I++] = array3[i]; + } + } + + ReproducibleTinyMatrixSum s1(array1); + ReproducibleTinyMatrixSum s2(array2); + ReproducibleTinyMatrixSum s3(array3); + using RSumT = decltype(s1); + + RSumT::Bin bin1 = s1.getSummationBin(); + RSumT::Bin bin2 = s2.getSummationBin(); + RSumT::Bin bin3 = s3.getSummationBin(); + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin1, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin3, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + + { + RSumT::Bin bin_sum = zero; + + RSumT::addBinTo(bin3, bin_sum); + RSumT::addBinTo(bin2, bin_sum); + RSumT::addBinTo(bin1, bin_sum); + + REQUIRE(sum(array) == RSumT::getValue(bin_sum)); + } + } + } + } + SECTION("checking for subArrayView") { Array<int> array{10}; diff --git a/tests/test_DiscreteFunctionP0.cpp b/tests/test_DiscreteFunctionP0.cpp index 39b91c4de355c1a96feaa1f1b5e2a59bf9c494d8..d0f71e984d516ec243de289f399d882663c9c499 100644 --- a/tests/test_DiscreteFunctionP0.cpp +++ b/tests/test_DiscreteFunctionP0.cpp @@ -2787,7 +2787,7 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") cell_value[cell_id] = cell_volume[cell_id] * positive_function[cell_id]; }); - REQUIRE(integrate(positive_function) == Catch::Approx(sum(cell_value))); + REQUIRE(integrate(positive_function) == sum(cell_value)); } SECTION("integrate vector") @@ -2806,8 +2806,8 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)[0] == Catch::Approx(sum(cell_value)[0])); - REQUIRE(integrate(uh)[1] == Catch::Approx(sum(cell_value)[1])); + REQUIRE(integrate(uh)[0] == sum(cell_value)[0]); + REQUIRE(integrate(uh)[1] == sum(cell_value)[1]); } SECTION("integrate matrix") @@ -2826,10 +2826,10 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)(0, 0) == Catch::Approx(sum(cell_value)(0, 0))); - REQUIRE(integrate(uh)(0, 1) == Catch::Approx(sum(cell_value)(0, 1))); - REQUIRE(integrate(uh)(1, 0) == Catch::Approx(sum(cell_value)(1, 0))); - REQUIRE(integrate(uh)(1, 1) == Catch::Approx(sum(cell_value)(1, 1))); + REQUIRE(integrate(uh)(0, 0) == sum(cell_value)(0, 0)); + REQUIRE(integrate(uh)(0, 1) == sum(cell_value)(0, 1)); + REQUIRE(integrate(uh)(1, 0) == sum(cell_value)(1, 0)); + REQUIRE(integrate(uh)(1, 1) == sum(cell_value)(1, 1)); } } } @@ -3131,7 +3131,7 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") cell_value[cell_id] = cell_volume[cell_id] * positive_function[cell_id]; }); - REQUIRE(integrate(positive_function) == Catch::Approx(sum(cell_value))); + REQUIRE(integrate(positive_function) == sum(cell_value)); } SECTION("integrate vector") @@ -3150,8 +3150,8 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)[0] == Catch::Approx(sum(cell_value)[0])); - REQUIRE(integrate(uh)[1] == Catch::Approx(sum(cell_value)[1])); + REQUIRE(integrate(uh)[0] == sum(cell_value)[0]); + REQUIRE(integrate(uh)[1] == sum(cell_value)[1]); } SECTION("integrate matrix") @@ -3170,10 +3170,10 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)(0, 0) == Catch::Approx(sum(cell_value)(0, 0))); - REQUIRE(integrate(uh)(0, 1) == Catch::Approx(sum(cell_value)(0, 1))); - REQUIRE(integrate(uh)(1, 0) == Catch::Approx(sum(cell_value)(1, 0))); - REQUIRE(integrate(uh)(1, 1) == Catch::Approx(sum(cell_value)(1, 1))); + REQUIRE(integrate(uh)(0, 0) == sum(cell_value)(0, 0)); + REQUIRE(integrate(uh)(0, 1) == sum(cell_value)(0, 1)); + REQUIRE(integrate(uh)(1, 0) == sum(cell_value)(1, 0)); + REQUIRE(integrate(uh)(1, 1) == sum(cell_value)(1, 1)); } } } @@ -3563,7 +3563,7 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") cell_value[cell_id] = cell_volume[cell_id] * positive_function[cell_id]; }); - REQUIRE(integrate(positive_function) == Catch::Approx(sum(cell_value))); + REQUIRE(integrate(positive_function) == sum(cell_value)); } SECTION("integrate vector") @@ -3582,8 +3582,8 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)[0] == Catch::Approx(sum(cell_value)[0])); - REQUIRE(integrate(uh)[1] == Catch::Approx(sum(cell_value)[1])); + REQUIRE(integrate(uh)[0] == sum(cell_value)[0]); + REQUIRE(integrate(uh)[1] == sum(cell_value)[1]); } SECTION("integrate matrix") @@ -3602,10 +3602,10 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)(0, 0) == Catch::Approx(sum(cell_value)(0, 0))); - REQUIRE(integrate(uh)(0, 1) == Catch::Approx(sum(cell_value)(0, 1))); - REQUIRE(integrate(uh)(1, 0) == Catch::Approx(sum(cell_value)(1, 0))); - REQUIRE(integrate(uh)(1, 1) == Catch::Approx(sum(cell_value)(1, 1))); + REQUIRE(integrate(uh)(0, 0) == sum(cell_value)(0, 0)); + REQUIRE(integrate(uh)(0, 1) == sum(cell_value)(0, 1)); + REQUIRE(integrate(uh)(1, 0) == sum(cell_value)(1, 0)); + REQUIRE(integrate(uh)(1, 1) == sum(cell_value)(1, 1)); } } } diff --git a/tests/test_ItemArrayUtils.cpp b/tests/test_ItemArrayUtils.cpp index 8c832d16c1dac5ed7a84828303f68bceaf25c3d8..346345eae1134da16e7e75b7c1d6f553ac14ef6d 100644 --- a/tests/test_ItemArrayUtils.cpp +++ b/tests/test_ItemArrayUtils.cpp @@ -8,6 +8,8 @@ #include <mesh/ItemArray.hpp> #include <mesh/ItemArrayUtils.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <utils/Messenger.hpp> // clazy:excludeall=non-pod-global-static @@ -1055,26 +1057,31 @@ TEST_CASE("ItemArrayUtils", "[mesh]") std::array mesh_list = MeshDataBaseForTests::get().all2DMeshes(); for (const auto& named_mesh : mesh_list) { - SECTION(named_mesh.name() + " for size_t data") + SECTION(named_mesh.name() + " for double data") { auto mesh_2d = named_mesh.mesh(); const Connectivity<2>& connectivity = mesh_2d->connectivity(); - FaceArray<size_t> face_array{connectivity, 3}; - face_array.fill(2); + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + + FaceArray<double> face_array{connectivity, 3}; + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_array[face_id][0] = ll[face_id]; + face_array[face_id][1] = (3 + ll[face_id]) * ll[face_id]; + face_array[face_id][2] = 2 * ll[face_id] - 3; + }); auto face_is_owned = connectivity.faceIsOwned(); - const size_t global_number_of_faces = [&] { - size_t number_of_faces = 0; - for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_faces += face_is_owned[face_id]; - } - return parallel::allReduceSum(number_of_faces); - }(); + FaceValue<double> face_sum{connectivity}; + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_sum[face_id] = face_array[face_id][0] + face_array[face_id][1] + face_array[face_id][2]; + }); - REQUIRE(sum(face_array) == 2 * face_array.sizeOfArrays() * global_number_of_faces); + REQUIRE(sum(face_array) == sum(face_sum)); } SECTION(named_mesh.name() + " for N^2 data") diff --git a/tests/test_ItemValueUtils.cpp b/tests/test_ItemValueUtils.cpp index 755392515ae3078421375f71af8727aa8bff144e..e9df51a05bc9b5a411a186ad742c2309c93ad9d4 100644 --- a/tests/test_ItemValueUtils.cpp +++ b/tests/test_ItemValueUtils.cpp @@ -8,6 +8,8 @@ #include <mesh/ItemValue.hpp> #include <mesh/ItemValueUtils.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <utils/Messenger.hpp> // clazy:excludeall=non-pod-global-static @@ -240,26 +242,60 @@ TEST_CASE("ItemValueUtils", "[mesh]") std::array mesh_list = MeshDataBaseForTests::get().all1DMeshes(); for (const auto& named_mesh : mesh_list) { - SECTION(named_mesh.name()) + SECTION(named_mesh.name() + " for size_t data") { auto mesh_1d = named_mesh.mesh(); const Connectivity<1>& connectivity = mesh_1d->connectivity(); + auto Vj = MeshDataManager::instance().getMeshData(*mesh_1d).Vj(); + CellValue<size_t> cell_value{connectivity}; - cell_value.fill(5); + + parallel_for( + connectivity.numberOfCells(), + PUGS_LAMBDA(CellId cell_id) { cell_value[cell_id] = std::ceil(100 * Vj[cell_id]); }); auto cell_is_owned = connectivity.cellIsOwned(); - const size_t global_number_of_cells = [&] { - size_t number_of_cells = 0; + const size_t global_sum = [&] { + size_t local_dum = 0; for (CellId cell_id = 0; cell_id < cell_is_owned.numberOfItems(); ++cell_id) { - number_of_cells += cell_is_owned[cell_id]; + local_dum += cell_is_owned[cell_id] * cell_value[cell_id]; } - return parallel::allReduceSum(number_of_cells); + return parallel::allReduceSum(local_dum); }(); - REQUIRE(sum(cell_value) == 5 * global_number_of_cells); + REQUIRE(sum(cell_value) == global_sum); + } + + SECTION(named_mesh.name() + " for double data") + { + auto mesh_1d = named_mesh.mesh(); + + const Connectivity<1>& connectivity = mesh_1d->connectivity(); + + auto Vj = MeshDataManager::instance().getMeshData(*mesh_1d).Vj(); + + auto cell_is_owned = connectivity.cellIsOwned(); + + size_t nb_owned_cells = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + nb_owned_cells += cell_is_owned[cell_id]; + } + + Array<double> local_Vj(nb_owned_cells); + { + size_t index = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + if (cell_is_owned[cell_id]) { + local_Vj[index++] = Vj[cell_id]; + } + } + } + Array<double> global_Vj = parallel::allGatherVariable(local_Vj); + + REQUIRE(sum(Vj) == sum(global_Vj)); } } } @@ -275,20 +311,54 @@ TEST_CASE("ItemValueUtils", "[mesh]") const Connectivity<2>& connectivity = mesh_2d->connectivity(); + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + FaceValue<size_t> face_value{connectivity}; - face_value.fill(2); + + parallel_for( + connectivity.numberOfFaces(), + PUGS_LAMBDA(FaceId face_id) { face_value[face_id] = std::ceil(100 * ll[face_id]); }); auto face_is_owned = connectivity.faceIsOwned(); - const size_t global_number_of_faces = [&] { - size_t number_of_faces = 0; + const size_t global_sum = [&] { + size_t local_sum = 0; for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_faces += face_is_owned[face_id]; + local_sum += face_is_owned[face_id] * face_value[face_id]; } - return parallel::allReduceSum(number_of_faces); + return parallel::allReduceSum(local_sum); }(); - REQUIRE(sum(face_value) == 2 * global_number_of_faces); + REQUIRE(sum(face_value) == global_sum); + } + + SECTION(named_mesh.name() + "for double data") + { + auto mesh_2d = named_mesh.mesh(); + + const Connectivity<2>& connectivity = mesh_2d->connectivity(); + + auto Vj = MeshDataManager::instance().getMeshData(*mesh_2d).Vj(); + + auto cell_is_owned = connectivity.cellIsOwned(); + + size_t nb_owned_cells = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + nb_owned_cells += cell_is_owned[cell_id]; + } + + Array<double> local_Vj(nb_owned_cells); + { + size_t index = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + if (cell_is_owned[cell_id]) { + local_Vj[index++] = Vj[cell_id]; + } + } + } + Array<double> global_Vj = parallel::allGatherVariable(local_Vj); + + REQUIRE(sum(Vj) == sum(global_Vj)); } SECTION(named_mesh.name() + "for N^3 data") @@ -297,22 +367,66 @@ TEST_CASE("ItemValueUtils", "[mesh]") const Connectivity<2>& connectivity = mesh_2d->connectivity(); + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + using N3 = TinyVector<3, size_t>; FaceValue<N3> face_value{connectivity}; - const N3 data(2, 1, 4); - face_value.fill(data); + + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_value[face_id] = N3(std::ceil(100 * ll[face_id]), // + std::ceil(200 * ll[face_id] - 40), // + std::ceil(17.3 * ll[face_id] + 12.9)); + }); auto face_is_owned = connectivity.faceIsOwned(); - const size_t global_number_of_faces = [&] { - size_t number_of_faces = 0; + const N3 global_sum = [=] { + N3 local_sum = zero; for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_faces += face_is_owned[face_id]; + local_sum += face_is_owned[face_id] * face_value[face_id]; } - return parallel::allReduceSum(number_of_faces); + return parallel::allReduceSum(local_sum); }(); - REQUIRE(sum(face_value) == global_number_of_faces * data); + REQUIRE(sum(face_value) == global_sum); + } + + SECTION(named_mesh.name() + "for R2 data") + { + auto mesh_2d = named_mesh.mesh(); + + const Connectivity<2>& connectivity = mesh_2d->connectivity(); + + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + + using R2x3 = TinyVector<2, double>; + FaceValue<R2x3> face_value{connectivity}; + + auto face_is_owned = connectivity.faceIsOwned(); + + size_t nb_owned_faces = 0; + for (FaceId face_id = 0; face_id < connectivity.numberOfFaces(); ++face_id) { + nb_owned_faces += face_is_owned[face_id]; + } + + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_value[face_id] = R2x3{ll[face_id], 1. / ll[face_id]}; + }); + + Array<R2x3> local_array(nb_owned_faces); + { + size_t index = 0; + for (FaceId face_id = 0; face_id < connectivity.numberOfFaces(); ++face_id) { + if (face_is_owned[face_id]) { + local_array[index++] = face_value[face_id]; + } + } + } + Array global_array = parallel::allGatherVariable(local_array); + + REQUIRE(sum(face_value) == sum(global_array)); } } } @@ -369,6 +483,48 @@ TEST_CASE("ItemValueUtils", "[mesh]") REQUIRE(sum(node_value) == global_number_of_nodes * data); } + + SECTION(named_mesh.name() + "for R^2x3 data") + { + auto mesh_3d = named_mesh.mesh(); + + const Connectivity<3>& connectivity = mesh_3d->connectivity(); + + auto xr = mesh_3d->xr(); + + using R2x3 = TinyMatrix<2, 3, double>; + NodeValue<R2x3> node_value{connectivity}; + + auto node_is_owned = connectivity.nodeIsOwned(); + + size_t nb_owned_nodes = 0; + for (NodeId node_id = 0; node_id < connectivity.numberOfNodes(); ++node_id) { + nb_owned_nodes += node_is_owned[node_id]; + } + + parallel_for( + connectivity.numberOfNodes(), PUGS_LAMBDA(NodeId node_id) { + node_value[node_id] = R2x3{xr[node_id][0], + l2Norm(xr[node_id]), + 3.178 * xr[node_id][0] - 3 * xr[node_id][2], + xr[node_id][1], + xr[node_id][2] + xr[node_id][0], + xr[node_id][0] * xr[node_id][1] * xr[node_id][2]}; + }); + + Array<R2x3> local_array(nb_owned_nodes); + { + size_t index = 0; + for (NodeId node_id = 0; node_id < connectivity.numberOfNodes(); ++node_id) { + if (node_is_owned[node_id]) { + local_array[index++] = node_value[node_id]; + } + } + } + Array global_array = parallel::allGatherVariable(local_array); + + REQUIRE(sum(node_value) == sum(global_array)); + } } } } diff --git a/tests/test_MeshFlatNodeBoundary.cpp b/tests/test_MeshFlatNodeBoundary.cpp index 9b9169f73c4306f9591ba0a9c0f5c1c622f7ef69..760f5564226f77fa53ec8de15f3f85552f02cbc4 100644 --- a/tests/test_MeshFlatNodeBoundary.cpp +++ b/tests/test_MeshFlatNodeBoundary.cpp @@ -1133,6 +1133,211 @@ TEST_CASE("MeshFlatNodeBoundary", "[mesh]") } } + SECTION("rotated diamond") + { + SECTION("2D") + { + static constexpr size_t Dimension = 2; + + using ConnectivityType = Connectivity<Dimension>; + using MeshType = Mesh<ConnectivityType>; + + using R2 = TinyVector<2>; + + auto T = [](const R2& x) -> R2 { return R2{x[0] + 0.1 * x[1], x[1] + 0.1 * x[0]}; }; + + SECTION("cartesian 2d") + { + std::shared_ptr p_mesh = MeshDataBaseForTests::get().cartesian2DMesh(); + + const ConnectivityType& connectivity = p_mesh->connectivity(); + + auto xr = p_mesh->xr(); + + NodeValue<R2> rotated_xr{connectivity}; + + parallel_for( + connectivity.numberOfNodes(), PUGS_LAMBDA(const NodeId node_id) { rotated_xr[node_id] = T(xr[node_id]); }); + + MeshType mesh{p_mesh->shared_connectivity(), rotated_xr}; + + { + const std::set<size_t> tag_set = {0, 1, 2, 3}; + + for (auto tag : tag_set) { + NumberedBoundaryDescriptor numbered_boundary_descriptor(tag); + const auto& node_boundary = getMeshFlatNodeBoundary(mesh, numbered_boundary_descriptor); + + auto node_list = get_node_list_from_tag(tag, connectivity); + REQUIRE(is_same(node_boundary.nodeList(), node_list)); + + R2 normal = zero; + + switch (tag) { + case 0: { + normal = 1. / std::sqrt(1.01) * R2{-1, 0.1}; + break; + } + case 1: { + normal = 1. / std::sqrt(1.01) * R2{1, -0.1}; + break; + } + case 2: { + normal = 1. / std::sqrt(1.01) * R2{0.1, -1}; + break; + } + case 3: { + normal = 1. / std::sqrt(1.01) * R2{-0.1, 1}; + break; + } + default: { + FAIL("unexpected tag number"); + } + } + REQUIRE(l2Norm(node_boundary.outgoingNormal() - normal) == Catch::Approx(0).margin(1E-13)); + } + } + + { + const std::set<std::string> name_set = {"XMIN", "XMAX", "YMIN", "YMAX"}; + + for (const auto& name : name_set) { + NamedBoundaryDescriptor named_boundary_descriptor(name); + const auto& node_boundary = getMeshFlatNodeBoundary(mesh, named_boundary_descriptor); + + auto node_list = get_node_list_from_name(name, connectivity); + REQUIRE(is_same(node_boundary.nodeList(), node_list)); + + R2 normal = zero; + + if (name == "XMIN") { + normal = 1. / std::sqrt(1.01) * R2{-1, 0.1}; + } else if (name == "XMAX") { + normal = 1. / std::sqrt(1.01) * R2{1, -0.1}; + } else if (name == "YMIN") { + normal = 1. / std::sqrt(1.01) * R2{0.1, -1}; + } else if (name == "YMAX") { + normal = 1. / std::sqrt(1.01) * R2{-0.1, 1}; + } else { + FAIL("unexpected name: " + name); + } + + REQUIRE(l2Norm(node_boundary.outgoingNormal() - normal) == Catch::Approx(0).margin(1E-13)); + } + } + } + } + + SECTION("3D") + { + static constexpr size_t Dimension = 3; + + using ConnectivityType = Connectivity<Dimension>; + using MeshType = Mesh<ConnectivityType>; + + using R3 = TinyVector<3>; + + auto T = [](const R3& x) -> R3 { + return R3{x[0] + 0.1 * x[1] + 0.2 * x[2], x[1] + 0.1 * x[0] + 0.1 * x[2], x[2] + 0.1 * x[0]}; + }; + + SECTION("cartesian 3d") + { + std::shared_ptr p_mesh = MeshDataBaseForTests::get().cartesian3DMesh(); + + const ConnectivityType& connectivity = p_mesh->connectivity(); + + auto xr = p_mesh->xr(); + + NodeValue<R3> rotated_xr{connectivity}; + + parallel_for( + connectivity.numberOfNodes(), PUGS_LAMBDA(const NodeId node_id) { rotated_xr[node_id] = T(xr[node_id]); }); + + MeshType mesh{p_mesh->shared_connectivity(), rotated_xr}; + + { + const std::set<size_t> tag_set = {0, 1, 2, 3, 4, 5}; + + for (auto tag : tag_set) { + NumberedBoundaryDescriptor numbered_boundary_descriptor(tag); + const auto& node_boundary = getMeshFlatNodeBoundary(mesh, numbered_boundary_descriptor); + + auto node_list = get_node_list_from_tag(tag, connectivity); + REQUIRE(is_same(node_boundary.nodeList(), node_list)); + + R3 normal = zero; + + switch (tag) { + case 0: { + normal = R3{-0.977717523265611, 0.0977717523265611, 0.185766329420466}; + break; + } + case 1: { + normal = R3{0.977717523265611, -0.0977717523265612, -0.185766329420466}; + break; + } + case 2: { + normal = R3{0.0911512175788074, -0.992535480302569, 0.0810233045144955}; + break; + } + case 3: { + normal = R3{-0.0911512175788074, 0.992535480302569, -0.0810233045144955}; + break; + } + case 4: { + normal = R3{0.100493631166705, -0.0100493631166705, -0.994886948550377}; + break; + } + case 5: { + normal = R3{-0.100493631166705, 0.0100493631166705, 0.994886948550377}; + break; + } + default: { + FAIL("unexpected tag number"); + } + } + + REQUIRE(l2Norm(node_boundary.outgoingNormal() - normal) == Catch::Approx(0).margin(1E-13)); + } + } + + { + const std::set<std::string> name_set = {"XMIN", "XMAX", "YMIN", "YMAX", "ZMIN", "ZMAX"}; + + for (const auto& name : name_set) { + NamedBoundaryDescriptor named_boundary_descriptor(name); + const auto& node_boundary = getMeshFlatNodeBoundary(mesh, named_boundary_descriptor); + + auto node_list = get_node_list_from_name(name, connectivity); + + REQUIRE(is_same(node_boundary.nodeList(), node_list)); + + R3 normal = zero; + + if (name == "XMIN") { + normal = R3{-0.977717523265611, 0.0977717523265611, 0.185766329420466}; + } else if (name == "XMAX") { + normal = R3{0.977717523265611, -0.0977717523265612, -0.185766329420466}; + } else if (name == "YMIN") { + normal = R3{0.0911512175788074, -0.992535480302569, 0.0810233045144955}; + } else if (name == "YMAX") { + normal = R3{-0.0911512175788074, 0.992535480302569, -0.0810233045144955}; + } else if (name == "ZMIN") { + normal = R3{0.100493631166705, -0.0100493631166705, -0.994886948550377}; + } else if (name == "ZMAX") { + normal = R3{-0.100493631166705, 0.0100493631166705, 0.994886948550377}; + } else { + FAIL("unexpected name: " + name); + } + + REQUIRE(l2Norm(node_boundary.outgoingNormal() - normal) == Catch::Approx(0).margin(1E-13)); + } + } + } + } + } + SECTION("curved mesh") { SECTION("2D") diff --git a/tests/test_OFStream.cpp b/tests/test_OFStream.cpp index fbb5818dbc70451ff212c820edacfa595963e961..6ca7d98df0a6f36bf30add2b891a659beba757c2 100644 --- a/tests/test_OFStream.cpp +++ b/tests/test_OFStream.cpp @@ -13,7 +13,8 @@ TEST_CASE("OFStream", "[language]") { SECTION("ofstream") { - const std::string basename = std::filesystem::path{PUGS_BINARY_DIR}.append("tests").append("ofstream_"); + const std::string basename = + std::filesystem::path{PUGS_BINARY_DIR}.append("tests").append("ofstream_dir").append("ofstream_"); const std::string filename = basename + stringify(parallel::rank()); // Ensures that the file is closed after this line @@ -47,13 +48,4 @@ TEST_CASE("OFStream", "[language]") REQUIRE(not std::filesystem::exists(filename)); } - - SECTION("invalid filename") - { - if (parallel::rank() == 0) { - const std::string filename = "badpath/invalidpath/ofstream"; - - REQUIRE_THROWS_WITH(std::make_shared<OFStream>(filename), "error: cannot create file " + filename); - } - } } diff --git a/tests/test_SubItemArrayPerItemUtils.cpp b/tests/test_SubItemArrayPerItemUtils.cpp index e534ec34600ef526336249a4b7d1cfc794721827..d36829250ce4a5e9b88c7015504c1ffb68362f96 100644 --- a/tests/test_SubItemArrayPerItemUtils.cpp +++ b/tests/test_SubItemArrayPerItemUtils.cpp @@ -6,6 +6,8 @@ #include <algebra/TinyVector.hpp> #include <mesh/Connectivity.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <mesh/SubItemArrayPerItem.hpp> #include <mesh/SubItemArrayPerItemUtils.hpp> #include <utils/Messenger.hpp> @@ -368,6 +370,43 @@ TEST_CASE("SubItemArrayPerItemUtils", "[mesh]") REQUIRE(sum(node_array_per_face) == global_number_of_nodes_per_faces * node_array_per_face.sizeOfArrays() * data); } + + SECTION(named_mesh.name() + " for float data") + { + auto mesh_2d = named_mesh.mesh(); + + const Connectivity<2>& connectivity = mesh_2d->connectivity(); + + EdgeArrayPerCell<float> edge_array_per_cell{connectivity, 3}; + edge_array_per_cell.fill(1E10); + + auto cell_is_owned = connectivity.cellIsOwned(); + parallel_for( + connectivity.numberOfCells(), PUGS_LAMBDA(CellId cell_id) { + if (cell_is_owned[cell_id]) { + auto cell_table = edge_array_per_cell.itemTable(cell_id); + for (size_t i = 0; i < cell_table.numberOfRows(); ++i) { + for (size_t j = 0; j < cell_table.numberOfColumns(); ++j) { + cell_table(i, j) = 10 + parallel::rank() - i - j + 1.5; + } + } + } + }); + + CellValue<float> cell_sum(connectivity); + cell_sum.fill(0); + parallel_for( + connectivity.numberOfCells(), PUGS_LAMBDA(CellId cell_id) { + auto cell_table = edge_array_per_cell.itemTable(cell_id); + for (size_t i = 0; i < cell_table.numberOfRows(); ++i) { + for (size_t j = 0; j < cell_table.numberOfColumns(); ++j) { + cell_sum[cell_id] += cell_table(i, j); + } + } + }); + + REQUIRE(sum(edge_array_per_cell) == sum(cell_sum)); + } } } @@ -405,10 +444,10 @@ TEST_CASE("SubItemArrayPerItemUtils", "[mesh]") const Connectivity<3>& connectivity = mesh_3d->connectivity(); - using N2x3 = TinyMatrix<2, 3, size_t>; - EdgeArrayPerFace<N2x3> edge_array_per_face{connectivity, 3}; + using R2x3 = TinyMatrix<2, 3, size_t>; + EdgeArrayPerFace<R2x3> edge_array_per_face{connectivity, 3}; - const N2x3 data(1, 3, 4, 6, 2, 5); + const R2x3 data(1, 3, 4, 6, 2, 5); edge_array_per_face.fill(data); auto face_is_owned = connectivity.faceIsOwned(); @@ -424,6 +463,46 @@ TEST_CASE("SubItemArrayPerItemUtils", "[mesh]") REQUIRE(sum(edge_array_per_face) == global_number_of_edges_per_faces * edge_array_per_face.sizeOfArrays() * data); } + + SECTION(named_mesh.name() + " for R^2x3 data") + { + auto mesh_3d = named_mesh.mesh(); + + const Connectivity<3>& connectivity = mesh_3d->connectivity(); + + using R2x3 = TinyMatrix<2, 3, double>; + EdgeArrayPerFace<R2x3> edge_array_per_face{connectivity, 3}; + + auto Nl = MeshDataManager::instance().getMeshData(*mesh_3d).Nl(); + + parallel_for( + mesh_3d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + for (size_t i_edge = 0; i_edge < edge_array_per_face.numberOfSubArrays(face_id); ++i_edge) { + for (size_t i = 0; i < edge_array_per_face[face_id][i_edge].size(); ++i) { + R2x3 value((2.3 * i_edge + 1.7) * Nl[face_id][0], // + (3. - i_edge) * Nl[face_id][1], // + (2 * i_edge + 1.3) * Nl[face_id][2], // + (i_edge + 1.7) * Nl[face_id][0] + 3.2 * Nl[face_id][1], // + Nl[face_id][2] + 3 * i_edge, // + 7.13 * i_edge * i_edge * l2Norm(Nl[face_id])); + edge_array_per_face[face_id][i_edge][i] = (i + 1.3) * value; + } + } + }); + + FaceValue<R2x3> face_value{connectivity}; + face_value.fill(zero); + parallel_for( + mesh_3d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + for (size_t i_edge = 0; i_edge < edge_array_per_face.numberOfSubArrays(face_id); ++i_edge) { + for (size_t i = 0; i < edge_array_per_face[face_id][i_edge].size(); ++i) { + face_value[face_id] += edge_array_per_face[face_id][i_edge][i]; + } + } + }); + + REQUIRE(sum(edge_array_per_face) == sum(face_value)); + } } } } diff --git a/tests/test_SubItemValuePerItemUtils.cpp b/tests/test_SubItemValuePerItemUtils.cpp index aca9a14498e447b1cd6ffd21411f45e847b34ac5..353d7a9500a2cba3384ae22b8863f020d1c07361 100644 --- a/tests/test_SubItemValuePerItemUtils.cpp +++ b/tests/test_SubItemValuePerItemUtils.cpp @@ -6,6 +6,8 @@ #include <algebra/TinyVector.hpp> #include <mesh/Connectivity.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <mesh/SubItemValuePerItem.hpp> #include <mesh/SubItemValuePerItemUtils.hpp> #include <utils/Messenger.hpp> @@ -325,29 +327,34 @@ TEST_CASE("SubItemValuePerItemUtils", "[mesh]") REQUIRE(sum(node_value_per_face) == 2 * global_number_of_nodes_per_faces); } - SECTION(named_mesh.name() + " for N^2 data") + SECTION(named_mesh.name() + " for R^2 data") { auto mesh_2d = named_mesh.mesh(); const Connectivity<2>& connectivity = mesh_2d->connectivity(); - using N2 = TinyVector<2, size_t>; - NodeValuePerFace<N2> node_value_per_face{connectivity}; + using R2 = TinyVector<2, double>; + NodeValuePerFace<R2> node_value_per_face{connectivity}; - const N2 data(3, 2); - node_value_per_face.fill(data); + auto Nl = MeshDataManager::instance().getMeshData(*mesh_2d).Nl(); - auto face_is_owned = connectivity.faceIsOwned(); + parallel_for( + mesh_2d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + for (size_t i_node = 0; i_node < node_value_per_face.numberOfSubValues(face_id); ++i_node) { + node_value_per_face[face_id][i_node] = (i_node + 1) * Nl[face_id]; + } + }); - const size_t global_number_of_nodes_per_faces = [&] { - size_t number_of_nodes_per_faces = 0; - for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_nodes_per_faces += face_is_owned[face_id] * node_value_per_face.numberOfSubValues(face_id); - } - return parallel::allReduceSum(number_of_nodes_per_faces); - }(); + FaceValue<R2> face_value{connectivity}; + parallel_for( + mesh_2d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_value[face_id] = node_value_per_face[face_id][0]; + for (size_t i_node = 1; i_node < node_value_per_face.numberOfSubValues(face_id); ++i_node) { + face_value[face_id] += node_value_per_face[face_id][i_node]; + } + }); - REQUIRE(sum(node_value_per_face) == global_number_of_nodes_per_faces * data); + REQUIRE(sum(node_value_per_face) == sum(face_value)); } } } @@ -403,6 +410,42 @@ TEST_CASE("SubItemValuePerItemUtils", "[mesh]") REQUIRE(sum(edge_value_per_face) == global_number_of_edges_per_faces * data); } + + SECTION(named_mesh.name() + " for R^3x2 float") + { + auto mesh_3d = named_mesh.mesh(); + + const Connectivity<3>& connectivity = mesh_3d->connectivity(); + + auto xr = mesh_3d->xr(); + + using R3x2 = TinyMatrix<3, 2, float>; + EdgeValuePerNode<R3x2> edge_value_per_node{connectivity}; + + parallel_for( + mesh_3d->numberOfNodes(), PUGS_LAMBDA(NodeId node_id) { + for (size_t i_edge = 0; i_edge < edge_value_per_node.numberOfSubValues(node_id); ++i_edge) { + R3x2 value((2.3 * i_edge + 1.7) * xr[node_id][0], // + (3. - i_edge) * xr[node_id][1], // + (2 * i_edge + 1.3) * xr[node_id][2], // + (i_edge + 1.7) * xr[node_id][0] + 3.2 * xr[node_id][1], // + xr[node_id][2] + 3 * i_edge, // + 7.13 * i_edge * i_edge * l2Norm(xr[node_id])); + edge_value_per_node[node_id][i_edge] = value; + } + }); + + NodeValue<R3x2> node_value{connectivity}; + parallel_for( + mesh_3d->numberOfNodes(), PUGS_LAMBDA(NodeId node_id) { + node_value[node_id] = edge_value_per_node[node_id][0]; + for (size_t i_edge = 1; i_edge < edge_value_per_node.numberOfSubValues(node_id); ++i_edge) { + node_value[node_id] += edge_value_per_node[node_id][i_edge]; + } + }); + + REQUIRE(sum(edge_value_per_node) == sum(node_value)); + } } } } diff --git a/tests/test_main.cpp b/tests/test_main.cpp index 0e334f92c8a73ca526e905c1c5d6e82236f2d1ea..084d9b9453cb683586ce8f9a6d7b72678bcd3aaa 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -21,7 +21,7 @@ int main(int argc, char* argv[]) { parallel::Messenger::create(argc, argv); - const int nb_threads = std::max(std::thread::hardware_concurrency(), 1u); + const int nb_threads = std::max(std::thread::hardware_concurrency() / 2, 1u); { Kokkos::InitializationSettings args;