diff --git a/src/mesh/ItemArrayUtils.hpp b/src/mesh/ItemArrayUtils.hpp index b1c3ba8cdad569f73f91029c24fb1333e05037ee..1bb8d929f12a2f834535a70166e43a130fb73e40 100644 --- a/src/mesh/ItemArrayUtils.hpp +++ b/src/mesh/ItemArrayUtils.hpp @@ -5,6 +5,7 @@ #include <mesh/Connectivity.hpp> #include <mesh/ItemArray.hpp> +#include <mesh/ItemValueUtils.hpp> #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> @@ -12,7 +13,7 @@ template <typename DataType, ItemType item_type, typename ConnectivityPtr> std::remove_const_t<DataType> -min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) +min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_array) { using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; using ItemIsOwnedType = ItemValue<const bool, item_type>; @@ -25,7 +26,7 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) class ItemArrayMin { private: - const ItemArrayType& m_item_value; + const ItemArrayType& m_item_array; const ItemIsOwnedType m_is_owned; public: @@ -33,7 +34,7 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator data_type() { data_type reduced_value; - parallel_reduce(m_item_value.numberOfItems(), *this, reduced_value); + parallel_reduce(m_item_array.numberOfItems(), *this, reduced_value); return reduced_value; } @@ -42,8 +43,8 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator()(const index_type& i_item, data_type& data) const { if (m_is_owned[i_item]) { - auto array = m_item_value[i_item]; - for (size_t i = 0; i < m_item_value.sizeOfArrays(); ++i) { + auto array = m_item_array[i_item]; + for (size_t i = 0; i < m_item_array.sizeOfArrays(); ++i) { if (array[i] < data) { data = array[i]; } @@ -69,8 +70,8 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } PUGS_INLINE - ItemArrayMin(const ItemArrayType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { + ItemArrayMin(const ItemArrayType& item_array) + : m_item_array(item_array), m_is_owned([&](const IConnectivity& connectivity) { Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), "unexpected connectivity dimension"); @@ -96,7 +97,7 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } // LCOV_EXCL_STOP } - }(*item_value.connectivity_ptr())) + }(*item_array.connectivity_ptr())) { ; } @@ -105,13 +106,13 @@ min(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) ~ItemArrayMin() = default; }; - const DataType local_min = ItemArrayMin{item_value}; + const DataType local_min = ItemArrayMin{item_array}; return parallel::allReduceMin(local_min); } template <typename DataType, ItemType item_type, typename ConnectivityPtr> std::remove_const_t<DataType> -max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) +max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_array) { using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; using ItemIsOwnedType = ItemValue<const bool, item_type>; @@ -124,7 +125,7 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) class ItemArrayMax { private: - const ItemArrayType& m_item_value; + const ItemArrayType& m_item_array; const ItemIsOwnedType m_is_owned; public: @@ -132,7 +133,7 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator data_type() { data_type reduced_value; - parallel_reduce(m_item_value.numberOfItems(), *this, reduced_value); + parallel_reduce(m_item_array.numberOfItems(), *this, reduced_value); return reduced_value; } @@ -141,8 +142,8 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) operator()(const index_type& i_item, data_type& data) const { if (m_is_owned[i_item]) { - auto array = m_item_value[i_item]; - for (size_t i = 0; i < m_item_value.sizeOfArrays(); ++i) { + auto array = m_item_array[i_item]; + for (size_t i = 0; i < m_item_array.sizeOfArrays(); ++i) { if (array[i] > data) { data = array[i]; } @@ -168,8 +169,8 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } PUGS_INLINE - ItemArrayMax(const ItemArrayType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { + ItemArrayMax(const ItemArrayType& item_array) + : m_item_array(item_array), m_is_owned([&](const IConnectivity& connectivity) { Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), "unexpected connectivity dimension"); @@ -195,7 +196,7 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) } // LCOV_EXCL_STOP } - }(*item_value.connectivity_ptr())) + }(*item_array.connectivity_ptr())) { ; } @@ -204,106 +205,34 @@ max(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) ~ItemArrayMax() = default; }; - const DataType local_max = ItemArrayMax{item_value}; + const DataType local_max = ItemArrayMax{item_array}; return parallel::allReduceMax(local_max); } template <typename DataType, ItemType item_type, typename ConnectivityPtr> std::remove_const_t<DataType> -sum(const ItemArray<DataType, item_type, ConnectivityPtr>& item_value) +sum(const ItemArray<DataType, item_type, ConnectivityPtr>& item_array) { - using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; - using ItemIsOwnedType = ItemValue<const bool, item_type>; - using data_type = std::remove_const_t<typename ItemArrayType::data_type>; - using index_type = typename ItemArrayType::index_type; + using ItemArrayType = ItemArray<DataType, item_type, ConnectivityPtr>; + using data_type = std::remove_const_t<typename ItemArrayType::data_type>; + using index_type = typename ItemArrayType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); - class ItemArraySum - { - private: - const ItemArrayType& m_item_value; - const ItemIsOwnedType m_is_owned; - - public: - PUGS_INLINE - operator data_type() - { - data_type reduced_value; - parallel_reduce(m_item_value.numberOfItems(), *this, reduced_value); - return reduced_value; - } - - PUGS_INLINE - void - operator()(const index_type& i_item, data_type& data) const - { - if (m_is_owned[i_item]) { - auto array = m_item_value[i_item]; - for (size_t i = 0; i < m_item_value.sizeOfArrays(); ++i) { - data += array[i]; - } - } - } + ItemValue<data_type, item_type> item_sum{*item_array.connectivity_ptr()}; - PUGS_INLINE - void - join(data_type& dst, const data_type& src) const - { - dst += src; - } + parallel_for( + item_array.numberOfItems(), PUGS_LAMBDA(index_type item_id) { + auto row = item_array[item_id]; + data_type row_sum = row[0]; - PUGS_INLINE - void - init(data_type& value) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - value = 0; - } else { - static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); - value = zero; + for (size_t i = 1; i < row.size(); ++i) { + row_sum += row[i]; } - } - - PUGS_INLINE - ItemArraySum(const ItemArrayType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_value.connectivity_ptr())) - { - ; - } - - PUGS_INLINE - ~ItemArraySum() = default; - }; + item_sum[item_id] = row_sum; + }); - const DataType local_sum = ItemArraySum{item_value}; - return parallel::allReduceSum(local_sum); + return sum(item_sum); } template <typename DataType, ItemType item_type, typename ConnectivityPtr> diff --git a/src/mesh/ItemValueUtils.hpp b/src/mesh/ItemValueUtils.hpp index 9ef745b550e42add842ac1c822cf65b07e34dc2b..48f94728a91c705d61fcfe537084ade59818f3f1 100644 --- a/src/mesh/ItemValueUtils.hpp +++ b/src/mesh/ItemValueUtils.hpp @@ -8,6 +8,7 @@ #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> #include <utils/PugsTraits.hpp> +#include <utils/ReproductibleSumMPI.hpp> #include <iostream> @@ -210,6 +211,33 @@ sum(const ItemValue<DataType, item_type, ConnectivityPtr>& item_value) static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); + auto get_is_owned = [](const IConnectivity& connectivity) -> ItemIsOwnedType { + Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), "unexpected connectivity dimension"); + + switch (connectivity.dimension()) { + case 1: { + const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); + return connectivity_1d.isOwned<item_type>(); + break; + } + case 2: { + const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); + return connectivity_2d.isOwned<item_type>(); + break; + } + case 3: { + const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); + return connectivity_3d.isOwned<item_type>(); + break; + } + // LCOV_EXCL_START + default: { + throw UnexpectedError("unexpected dimension"); + } + // LCOV_EXCL_STOP + } + }; + class ItemValueSum { private: @@ -254,34 +282,8 @@ sum(const ItemValue<DataType, item_type, ConnectivityPtr>& item_value) } PUGS_INLINE - ItemValueSum(const ItemValueType& item_value) - : m_item_value(item_value), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_value.connectivity_ptr())) + ItemValueSum(const ItemValueType& item_value, const ItemIsOwnedType& is_owned) + : m_item_value{item_value}, m_is_owned{is_owned} { ; } @@ -290,8 +292,34 @@ sum(const ItemValue<DataType, item_type, ConnectivityPtr>& item_value) ~ItemValueSum() = default; }; - const DataType local_sum = ItemValueSum{item_value}; - return parallel::allReduceSum(local_sum); + auto is_owned = get_is_owned(*item_value.connectivity_ptr()); + + if (ReproductibleSumManager::reproductibleSums()) { + if constexpr (std::is_floating_point_v<data_type>) { + ReproductibleScalarSum r_sum(item_value.arrayView(), is_owned.arrayView()); + using ReproductibleSumType = decltype(r_sum); + return ReproductibleSumType::getValue(allReduceReproductibleSum(r_sum)); + } else if constexpr (not std::is_arithmetic_v<data_type>) { + if constexpr (is_tiny_vector_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproductibleTinyVectorSum r_sum(item_value.arrayView(), is_owned.arrayView()); + using ReproductibleSumType = decltype(r_sum); + return ReproductibleSumType::getValue(allReduceReproductibleSum(r_sum)); + } else if constexpr (is_tiny_matrix_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproductibleTinyMatrixSum r_sum(item_value.arrayView(), is_owned.arrayView()); + using ReproductibleSumType = decltype(r_sum); + return ReproductibleSumType::getValue(allReduceReproductibleSum(r_sum)); + } else { + const DataType local_sum = ItemValueSum{item_value, is_owned}; + return parallel::allReduceSum(local_sum); + } + } else { + const DataType local_sum = ItemValueSum{item_value, is_owned}; + return parallel::allReduceSum(local_sum); + } + } else { + const DataType local_sum = ItemValueSum{item_value, is_owned}; + return parallel::allReduceSum(local_sum); + } } template <typename DataType, ItemType item_type, typename ConnectivityPtr> diff --git a/src/mesh/SubItemArrayPerItemUtils.hpp b/src/mesh/SubItemArrayPerItemUtils.hpp index d2553590ab0f0dad219cdfa82491c05a70b5edbe..aec08ef845cc80bafd20ec8fa1a3b9bcc94415de 100644 --- a/src/mesh/SubItemArrayPerItemUtils.hpp +++ b/src/mesh/SubItemArrayPerItemUtils.hpp @@ -4,13 +4,12 @@ #include <utils/Messenger.hpp> #include <mesh/Connectivity.hpp> +#include <mesh/ItemValueUtils.hpp> #include <mesh/SubItemArrayPerItem.hpp> #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> #include <utils/PugsTraits.hpp> -#include <iostream> - template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> min(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_array_per_item) @@ -34,9 +33,9 @@ min(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE operator data_type() { - data_type reduced_array; - parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_array); - return reduced_array; + data_type reduced_value; + parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_value); + return reduced_value; } PUGS_INLINE @@ -67,9 +66,9 @@ min(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE void - init(data_type& array) const + init(data_type& value) const { - array = std::numeric_limits<data_type>::max(); + value = std::numeric_limits<data_type>::max(); } PUGS_INLINE @@ -136,9 +135,9 @@ max(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE operator data_type() { - data_type reduced_array; - parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_array); - return reduced_array; + data_type reduced_value; + parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_value); + return reduced_value; } PUGS_INLINE @@ -169,9 +168,9 @@ max(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a PUGS_INLINE void - init(data_type& array) const + init(data_type& value) const { - array = std::numeric_limits<data_type>::min(); + value = std::numeric_limits<data_type>::min(); } PUGS_INLINE @@ -217,103 +216,38 @@ max(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_a template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> -sum(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& item_array) +sum(const SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_array_per_item) { using SubItemArrayPerItemType = SubItemArrayPerItem<DataType, ItemOfItem, ConnectivityPtr>; constexpr ItemType item_type = ItemOfItem::item_type; - using ItemIsOwnedType = ItemValue<const bool, item_type>; using data_type = std::remove_const_t<typename SubItemArrayPerItemType::data_type>; using index_type = typename SubItemArrayPerItemType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); - class SubItemArrayPerItemSum - { - private: - const SubItemArrayPerItemType& m_sub_item_array_per_item; - const ItemIsOwnedType m_is_owned; - - public: - PUGS_INLINE - operator data_type() - { - data_type reduced_array; - parallel_reduce(m_sub_item_array_per_item.numberOfItems(), *this, reduced_array); - return reduced_array; - } - - PUGS_INLINE - void - operator()(const index_type& item_id, data_type& data) const - { - if (m_is_owned[item_id]) { - auto sub_item_table = m_sub_item_array_per_item.itemTable(item_id); - for (size_t i = 0; i < sub_item_table.numberOfRows(); ++i) { - for (size_t j = 0; j < sub_item_table.numberOfColumns(); ++j) { - data += sub_item_table(i, j); - } + ItemValue<data_type, item_type> item_sum{*sub_item_array_per_item.connectivity_ptr()}; + parallel_for( + sub_item_array_per_item.numberOfItems(), PUGS_LAMBDA(index_type item_id) { + auto sub_item_table = sub_item_array_per_item.itemTable(item_id); + data_type sub_item_table_sum = [] { + if constexpr (std::is_arithmetic_v<data_type>) { + return 0; + } else { + static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); + return zero; } - } - } + }(); - PUGS_INLINE - void - join(data_type& dst, const data_type& src) const - { - dst += src; - } - - PUGS_INLINE - void - init(data_type& array) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - array = 0; - } else { - static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); - array = zero; + for (size_t i = 0; i < sub_item_table.numberOfRows(); ++i) { + for (size_t j = 0; j < sub_item_table.numberOfColumns(); ++j) { + sub_item_table_sum += sub_item_table(i, j); + } } - } - PUGS_INLINE - SubItemArrayPerItemSum(const SubItemArrayPerItemType& item_array) - : m_sub_item_array_per_item(item_array), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_array.connectivity_ptr())) - { - ; - } - - PUGS_INLINE - ~SubItemArrayPerItemSum() = default; - }; + item_sum[item_id] = sub_item_table_sum; + }); - const DataType local_sum = SubItemArrayPerItemSum{item_array}; - return parallel::allReduceSum(local_sum); + return sum(item_sum); } template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> diff --git a/src/mesh/SubItemValuePerItemUtils.hpp b/src/mesh/SubItemValuePerItemUtils.hpp index 304acae23a31e610695a2b309e72e7d8035d01a5..575c39c5238877bb064883c54b7d08792547000b 100644 --- a/src/mesh/SubItemValuePerItemUtils.hpp +++ b/src/mesh/SubItemValuePerItemUtils.hpp @@ -4,13 +4,12 @@ #include <utils/Messenger.hpp> #include <mesh/Connectivity.hpp> +#include <mesh/ItemValueUtils.hpp> #include <mesh/SubItemValuePerItem.hpp> #include <mesh/Synchronizer.hpp> #include <mesh/SynchronizerManager.hpp> #include <utils/PugsTraits.hpp> -#include <iostream> - template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> min(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_value_per_item) @@ -213,101 +212,28 @@ max(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_v template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> std::remove_const_t<DataType> -sum(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& item_value) +sum(const SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>& sub_item_value_per_item) { using SubItemValuePerItemType = SubItemValuePerItem<DataType, ItemOfItem, ConnectivityPtr>; constexpr ItemType item_type = ItemOfItem::item_type; - using ItemIsOwnedType = ItemValue<const bool, item_type>; using data_type = std::remove_const_t<typename SubItemValuePerItemType::data_type>; using index_type = typename SubItemValuePerItemType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); - class SubItemValuePerItemSum - { - private: - const SubItemValuePerItemType& m_sub_item_value_per_item; - const ItemIsOwnedType m_is_owned; - - public: - PUGS_INLINE - operator data_type() - { - data_type reduced_value; - parallel_reduce(m_sub_item_value_per_item.numberOfItems(), *this, reduced_value); - return reduced_value; - } - - PUGS_INLINE - void - operator()(const index_type& item_id, data_type& data) const - { - if (m_is_owned[item_id]) { - auto sub_item_array = m_sub_item_value_per_item.itemArray(item_id); - for (size_t i = 0; i < sub_item_array.size(); ++i) { - data += sub_item_array[i]; - } - } - } - - PUGS_INLINE - void - join(data_type& dst, const data_type& src) const - { - dst += src; - } - - PUGS_INLINE - void - init(data_type& value) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - value = 0; - } else { - static_assert(is_tiny_vector_v<data_type> or is_tiny_matrix_v<data_type>, "invalid data type"); - value = zero; + ItemValue<data_type, item_type> item_sum{*sub_item_value_per_item.connectivity_ptr()}; + parallel_for( + sub_item_value_per_item.numberOfItems(), PUGS_LAMBDA(index_type item_id) { + auto sub_item_array = sub_item_value_per_item.itemArray(item_id); + data_type sub_item_array_sum = sub_item_array[0]; + for (size_t i = 1; i < sub_item_array.size(); ++i) { + sub_item_array_sum += sub_item_array[i]; } - } - - PUGS_INLINE - SubItemValuePerItemSum(const SubItemValuePerItemType& item_value) - : m_sub_item_value_per_item(item_value), m_is_owned([&](const IConnectivity& connectivity) { - Assert((connectivity.dimension() > 0) and (connectivity.dimension() <= 3), - "unexpected connectivity dimension"); - - switch (connectivity.dimension()) { - case 1: { - const auto& connectivity_1d = static_cast<const Connectivity1D&>(connectivity); - return connectivity_1d.isOwned<item_type>(); - break; - } - case 2: { - const auto& connectivity_2d = static_cast<const Connectivity2D&>(connectivity); - return connectivity_2d.isOwned<item_type>(); - break; - } - case 3: { - const auto& connectivity_3d = static_cast<const Connectivity3D&>(connectivity); - return connectivity_3d.isOwned<item_type>(); - break; - } - // LCOV_EXCL_START - default: { - throw UnexpectedError("unexpected dimension"); - } - // LCOV_EXCL_STOP - } - }(*item_value.connectivity_ptr())) - { - ; - } - PUGS_INLINE - ~SubItemValuePerItemSum() = default; - }; + item_sum[item_id] = sub_item_array_sum; + }); - const DataType local_sum = SubItemValuePerItemSum{item_value}; - return parallel::allReduceSum(local_sum); + return sum(item_sum); } template <typename DataType, typename ItemOfItem, typename ConnectivityPtr> diff --git a/src/scheme/DiscreteFunctionP0.hpp b/src/scheme/DiscreteFunctionP0.hpp index 6587a19f2c77d1f063f38765b4e768d92798f7a4..5a49b9db3d7f6ecd6b589f8a58703ec02d311f6b 100644 --- a/src/scheme/DiscreteFunctionP0.hpp +++ b/src/scheme/DiscreteFunctionP0.hpp @@ -708,79 +708,19 @@ class DiscreteFunctionP0 return sum(f.m_cell_values); } - private: - class Integrator - { - private: - const DiscreteFunctionP0 m_function; - CellValue<const double> m_cell_volume; - CellValue<const bool> m_cell_is_owned; - - public: - PUGS_INLINE - operator std::decay_t<data_type>() - { - std::decay_t<data_type> reduced_value; - if constexpr (std::is_arithmetic_v<std::decay_t<data_type>>) { - reduced_value = 0; - } else { - static_assert(is_tiny_vector_v<std::decay_t<data_type>> or is_tiny_matrix_v<std::decay_t<data_type>>, - "invalid data type"); - reduced_value = zero; - } - parallel_reduce(m_cell_volume.numberOfItems(), *this, reduced_value); - return reduced_value; - } - - PUGS_INLINE - void - operator()(const CellId& cell_id, std::decay_t<data_type>& data) const - { - if (m_cell_is_owned[cell_id]) { - data += m_cell_volume[cell_id] * m_function[cell_id]; - } - } - - PUGS_INLINE - void - join(std::decay_t<data_type>& dst, const std::decay_t<data_type>& src) const - { - dst += src; - } - - PUGS_INLINE - void - init(std::decay_t<data_type>& value) const - { - if constexpr (std::is_arithmetic_v<data_type>) { - value = 0; - } else { - static_assert(is_tiny_vector_v<std::decay_t<data_type>> or is_tiny_matrix_v<std::decay_t<data_type>>, - "invalid data type"); - value = zero; - } - } - - PUGS_INLINE - Integrator(const DiscreteFunctionP0& function) : m_function(function) - { - const Mesh<Connectivity<Dimension>>& mesh = *function.m_mesh; - m_cell_volume = MeshDataManager::instance().getMeshData(mesh).Vj(); - m_cell_is_owned = mesh.connectivity().cellIsOwned(); - } - - PUGS_INLINE - ~Integrator() = default; - }; - - public: PUGS_INLINE friend DataType integrate(const DiscreteFunctionP0& f) { Assert(f.m_cell_values.isBuilt()); - const DataType integral = Integrator{f}; - return parallel::allReduceSum(integral); + const Mesh<Connectivity<Dimension>>& mesh = *f.m_mesh; + CellValue<const double> cell_volume = MeshDataManager::instance().getMeshData(mesh).Vj(); + + CellValue<std::remove_const_t<DataType>> f_v{mesh.connectivity()}; + parallel_for( + mesh.numberOfCells(), PUGS_LAMBDA(CellId cell_id) { f_v[cell_id] = cell_volume[cell_id] * f[cell_id]; }); + + return sum(f_v); } DiscreteFunctionP0(const std::shared_ptr<const MeshType>& mesh) : m_mesh{mesh}, m_cell_values{mesh->connectivity()} {} diff --git a/src/utils/Array.hpp b/src/utils/Array.hpp index 4943d4c08d983f67d1550262e34aa80030d49bf1..fa3299bea5fefa99be94f4038ebae6da2c9779b8 100644 --- a/src/utils/Array.hpp +++ b/src/utils/Array.hpp @@ -6,6 +6,8 @@ #include <utils/PugsAssert.hpp> #include <utils/PugsMacros.hpp> #include <utils/PugsUtils.hpp> +#include <utils/ReproductibleSumManager.hpp> +#include <utils/ReproductibleSumUtils.hpp> #include <utils/Types.hpp> #include <algorithm> @@ -24,8 +26,7 @@ class [[nodiscard]] Array const size_t m_size; public: - friend std::ostream& - operator<<(std::ostream& os, const UnsafeArrayView& x) + friend std::ostream& operator<<(std::ostream& os, const UnsafeArrayView& x) { if (x.size() > 0) { os << 0 << ':' << NaNHelper(x[0]); @@ -36,21 +37,18 @@ class [[nodiscard]] Array return os; } - [[nodiscard]] PUGS_INLINE size_t - size() const + [[nodiscard]] PUGS_INLINE size_t size() const { return m_size; } - [[nodiscard]] PUGS_INLINE DataType& - operator[](size_t i) const + [[nodiscard]] PUGS_INLINE DataType& operator[](size_t i) const { Assert(i < m_size, "invalid index"); return m_values[i]; } - PUGS_INLINE void - fill(const DataType& data) const + PUGS_INLINE void fill(const DataType& data) const { for (size_t i = 0; i < m_size; ++i) { m_values[i] = data; @@ -58,7 +56,7 @@ class [[nodiscard]] Array } UnsafeArrayView& operator=(const UnsafeArrayView&) = delete; - UnsafeArrayView& operator=(UnsafeArrayView&&) = delete; + UnsafeArrayView& operator=(UnsafeArrayView&&) = delete; UnsafeArrayView(const Array<DataType>& array, index_type begin, index_type size) : m_values{&array[begin]}, m_size{size} @@ -82,14 +80,12 @@ class [[nodiscard]] Array friend Array<std::add_const_t<DataType>>; public: - [[nodiscard]] PUGS_INLINE size_t - size() const noexcept + [[nodiscard]] PUGS_INLINE size_t size() const noexcept { return m_values.extent(0); } - [[nodiscard]] friend PUGS_INLINE Array<std::remove_const_t<DataType>> - copy(const Array<DataType>& source) + [[nodiscard]] friend PUGS_INLINE Array<std::remove_const_t<DataType>> copy(const Array<DataType>& source) { Array<std::remove_const_t<DataType>> image(source.size()); Kokkos::deep_copy(image.m_values, source.m_values); @@ -97,8 +93,8 @@ class [[nodiscard]] Array return image; } - friend PUGS_INLINE void - copy_to(const Array<DataType>& source, const Array<std::remove_const_t<DataType>>& destination) + friend PUGS_INLINE void copy_to(const Array<DataType>& source, + const Array<std::remove_const_t<DataType>>& destination) { Assert(source.size() == destination.size(), "incompatible Array sizes"); Kokkos::deep_copy(destination.m_values, source.m_values); @@ -112,16 +108,14 @@ class [[nodiscard]] Array typename Array<DataType2>::index_type begin, typename Array<DataType2>::index_type size); - [[nodiscard]] PUGS_INLINE DataType& - operator[](index_type i) const noexcept(NO_ASSERT) + [[nodiscard]] PUGS_INLINE DataType& operator[](index_type i) const noexcept(NO_ASSERT) { Assert(i < m_values.extent(0), "invalid index"); return m_values[i]; } PUGS_INLINE - void - fill(const DataType& data) const + void fill(const DataType& data) const { static_assert(not std::is_const_v<DataType>, "Cannot modify Array of const"); @@ -129,8 +123,7 @@ class [[nodiscard]] Array } template <typename DataType2> - PUGS_INLINE Array& - operator=(const Array<DataType2>& array) noexcept + PUGS_INLINE Array& operator=(const Array<DataType2>& array) noexcept { // ensures that DataType is the same as source DataType2 static_assert(std::is_same<std::remove_const_t<DataType>, std::remove_const_t<DataType2>>(), @@ -173,8 +166,7 @@ class [[nodiscard]] Array #endif // NDEBUG } - friend std::ostream& - operator<<(std::ostream& os, const Array& x) + friend std::ostream& operator<<(std::ostream& os, const Array& x) { if (x.size() > 0) { os << 0 << ':' << NaNHelper(x[0]); @@ -192,14 +184,13 @@ class [[nodiscard]] Array Array(const Array&) = default; template <typename DataType2> - PUGS_INLINE - Array(const Array<DataType2>& array) noexcept + PUGS_INLINE Array(const Array<DataType2>& array) noexcept { this->operator=(array); } PUGS_INLINE - Array(Array&&) = default; + Array(Array &&) = default; PUGS_INLINE ~Array() = default; @@ -373,8 +364,9 @@ template <typename DataType> [[nodiscard]] std::remove_const_t<DataType> sum(const Array<DataType>& array) { - using ArrayType = Array<DataType>; - using data_type = std::remove_const_t<DataType>; + using ArrayType = Array<DataType>; + using data_type = std::remove_const_t<DataType>; + using index_type = typename ArrayType::index_type; static_assert(not std::is_same_v<data_type, bool>, "sum cannot be called on boolean data"); @@ -429,7 +421,26 @@ sum(const Array<DataType>& array) ~ArraySum() = default; }; - return ArraySum(array); + if (ReproductibleSumManager::reproductibleSums()) { + if constexpr (std::is_floating_point_v<data_type>) { + ReproductibleScalarSum r_sum(array); + return r_sum.getSum(); + } else if constexpr (not std::is_arithmetic_v<data_type>) { + if constexpr (is_tiny_vector_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproductibleTinyVectorSum r_sum(array); + return r_sum.getSum(); + } else if constexpr (is_tiny_matrix_v<data_type> and std::is_floating_point_v<typename data_type::data_type>) { + ReproductibleTinyMatrixSum r_sum(array); + return r_sum.getSum(); + } else { + return ArraySum(array); + } + } else { + return ArraySum(array); + } + } else { + return ArraySum(array); + } } #endif // ARRAY_HPP diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 2289a91d5f74c566a18af0f9fbcc8bf85ed844bb..3e61d14bb59cf96b873e13c55d92a6e6d9fe3f07 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -13,6 +13,7 @@ add_library( PETScWrapper.cpp PugsUtils.cpp RandomEngine.cpp + ReproductibleSumManager.cpp RevisionInfo.cpp SignalManager.cpp SLEPcWrapper.cpp diff --git a/src/utils/Messenger.hpp b/src/utils/Messenger.hpp index 416dbdf1d65e8b028b9ed39fff2a2acb4ebcb2d5..6685037b1f47249eef70cfb4255378b08df25342 100644 --- a/src/utils/Messenger.hpp +++ b/src/utils/Messenger.hpp @@ -23,7 +23,7 @@ namespace parallel { class Messenger { - private: + public: struct helper { #ifdef PUGS_HAS_MPI @@ -517,7 +517,7 @@ class Messenger MPI_Allreduce(&data, &data_sum, 1, mpi_datatype, MPI_SUM, MPI_COMM_WORLD); return data_sum; - } else if (is_trivially_castable<DataType>) { + } else if constexpr (is_trivially_castable<DataType>) { using InnerDataType = typename DataType::data_type; MPI_Datatype mpi_datatype = Messenger::helper::mpiType<InnerDataType>(); @@ -526,6 +526,8 @@ class Messenger MPI_Allreduce(&data, &data_sum, size, mpi_datatype, MPI_SUM, MPI_COMM_WORLD); return data_sum; + } else { + throw UnexpectedError("invalid data type for reduce sum"); } #else // PUGS_HAS_MPI return data; diff --git a/src/utils/PugsUtils.cpp b/src/utils/PugsUtils.cpp index 60380902556f88f774b698c4f7f196ea5192930e..538d700745bb7f24749fa7e878c28a64db0e0bfd 100644 --- a/src/utils/PugsUtils.cpp +++ b/src/utils/PugsUtils.cpp @@ -6,6 +6,7 @@ #include <utils/FPEManager.hpp> #include <utils/Messenger.hpp> #include <utils/PETScWrapper.hpp> +#include <utils/ReproductibleSumManager.hpp> #include <utils/RevisionInfo.hpp> #include <utils/SLEPcWrapper.hpp> #include <utils/SignalManager.hpp> @@ -117,6 +118,10 @@ initialize(int& argc, char* argv[]) bool pause_on_error = false; app.add_flag("-p,--pause-on-error", pause_on_error, "Pause for debugging on unexpected error [default: false]"); + bool reproductible_sums = true; + app.add_flag("--reproductible-sums,!--no-reproductible-sums", show_preamble, + "Special treatment of array sums to ensure reproducibility [default: true]"); + std::atexit([]() { std::cout << rang::style::reset; }); try { app.parse(argc, argv); @@ -130,6 +135,7 @@ initialize(int& argc, char* argv[]) ConsoleManager::setShowPreamble(show_preamble); ConsoleManager::init(enable_color); SignalManager::setPauseForDebug(pause_on_error); + ReproductibleSumManager::setReproductibleSums(reproductible_sums); } PETScWrapper::initialize(argc, argv); diff --git a/src/utils/ReproductibleSumMPI.hpp b/src/utils/ReproductibleSumMPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..91caafded09f11b31d29427d434c12704b5869e3 --- /dev/null +++ b/src/utils/ReproductibleSumMPI.hpp @@ -0,0 +1,124 @@ +#ifndef REPRODUCTIBLE_SUM_MPI_HPP +#define REPRODUCTIBLE_SUM_MPI_HPP + +#include <utils/Messenger.hpp> +#include <utils/ReproductibleSumUtils.hpp> +#include <utils/pugs_config.hpp> + +#ifdef PUGS_HAS_MPI + +template <typename RSumType> +struct ReproductibeMPIReducer +{ + static void + all_reduce_sum_embedder(void* local_value, void* sum_value, int* length, MPI_Datatype*) + { + Assert(*length == 1, "reduction must be performed on single bin"); + using Bin = typename RSumType::Bin; + Bin* p_local_bin = reinterpret_cast<Bin*>(local_value); + Bin* p_sum_bin = reinterpret_cast<Bin*>(sum_value); + + RSumType::addBinTo(*p_local_bin, *p_sum_bin); + } +}; + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproductibleSum(const ReproductibleScalarSum<ArrayT, BoolArrayT>& s) +{ + using DataType = std::decay_t<typename ArrayT::data_type>; + using RSumType = std::decay_t<decltype(s)>; + using BinType = typename RSumType::Bin; + + BinType local_bin = s.getSummationBin(); + + MPI_Datatype mpi_bin_type; + MPI_Type_contiguous(sizeof(BinType) / sizeof(DataType), parallel::Messenger::helper::mpiType<DataType>(), + &mpi_bin_type); + MPI_Type_commit(&mpi_bin_type); + + MPI_Op mpi_bin_sum_op; + MPI_Op_create(ReproductibeMPIReducer<RSumType>::all_reduce_sum_embedder, 1, &mpi_bin_sum_op); + + BinType sum_bin = zero; + MPI_Allreduce(&local_bin, &sum_bin, 1, mpi_bin_type, mpi_bin_sum_op, MPI_COMM_WORLD); + + return sum_bin; +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproductibleSum(const ReproductibleTinyVectorSum<ArrayT, BoolArrayT>& s) +{ + using TinyVectorType = std::decay_t<typename ArrayT::data_type>; + using DataType = typename TinyVectorType::data_type; + using RSumType = std::decay_t<decltype(s)>; + using BinType = typename RSumType::Bin; + + BinType local_bin = s.getSummationBin(); + + MPI_Datatype mpi_bin_type; + MPI_Type_contiguous(sizeof(BinType) / sizeof(DataType), parallel::Messenger::helper::mpiType<DataType>(), + &mpi_bin_type); + MPI_Type_commit(&mpi_bin_type); + + MPI_Op mpi_bin_sum_op; + MPI_Op_create(ReproductibeMPIReducer<RSumType>::all_reduce_sum_embedder, 1, &mpi_bin_sum_op); + + BinType sum_bin = zero; + MPI_Allreduce(&local_bin, &sum_bin, 1, mpi_bin_type, mpi_bin_sum_op, MPI_COMM_WORLD); + + return sum_bin; +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproductibleSum(const ReproductibleTinyMatrixSum<ArrayT, BoolArrayT>& s) +{ + using TinyVectorType = std::decay_t<typename ArrayT::data_type>; + using DataType = typename TinyVectorType::data_type; + using RSumType = std::decay_t<decltype(s)>; + using BinType = typename RSumType::Bin; + + BinType local_bin = s.getSummationBin(); + + MPI_Datatype mpi_bin_type; + MPI_Type_contiguous(sizeof(BinType) / sizeof(DataType), parallel::Messenger::helper::mpiType<DataType>(), + &mpi_bin_type); + MPI_Type_commit(&mpi_bin_type); + + MPI_Op mpi_bin_sum_op; + MPI_Op_create(ReproductibeMPIReducer<RSumType>::all_reduce_sum_embedder, 1, &mpi_bin_sum_op); + + BinType sum_bin = zero; + MPI_Allreduce(&local_bin, &sum_bin, 1, mpi_bin_type, mpi_bin_sum_op, MPI_COMM_WORLD); + + return sum_bin; +} + +#else // PUGS_HAS_MPI + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproductibleSum(const ReproductibleScalarSum<ArrayT, BoolArrayT>& s) +{ + return s.getSummationBin(); +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproductibleSum(const ReproductibleTinyVectorSum<ArrayT, BoolArrayT>& s) +{ + return s.getSummationBin(); +} + +template <typename ArrayT, typename BoolArrayT> +auto +allReduceReproductibleSum(const ReproductibleTinyMatrixSum<ArrayT, BoolArrayT>& s) +{ + return s.getSummationBin(); +} + +#endif // PUGS_HAS_MPI + +#endif // REPRODUCTIBLE_SUM_MPI_HPP diff --git a/src/utils/ReproductibleSumManager.cpp b/src/utils/ReproductibleSumManager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7f2851c5b740df50578d4f10800ad6255985d5d4 --- /dev/null +++ b/src/utils/ReproductibleSumManager.cpp @@ -0,0 +1,15 @@ +#include <utils/ReproductibleSumManager.hpp> + +bool ReproductibleSumManager::s_reproductible_sums = true; + +bool +ReproductibleSumManager::reproductibleSums() +{ + return s_reproductible_sums; +} + +void +ReproductibleSumManager::setReproductibleSums(bool reproductible_sum) +{ + s_reproductible_sums = reproductible_sum; +} diff --git a/src/utils/ReproductibleSumManager.hpp b/src/utils/ReproductibleSumManager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cd0b3a2df6006024ab4fa86e4ae88857c3a25233 --- /dev/null +++ b/src/utils/ReproductibleSumManager.hpp @@ -0,0 +1,14 @@ +#ifndef REPRODUCTIBLE_SUM_MANAGER_HPP +#define REPRODUCTIBLE_SUM_MANAGER_HPP + +class ReproductibleSumManager +{ + private: + static bool s_reproductible_sums; + + public: + static void setReproductibleSums(bool); + static bool reproductibleSums(); +}; + +#endif // REPRODUCTIBLE_SUM_MANAGER_HPP diff --git a/src/utils/ReproductibleSumUtils.hpp b/src/utils/ReproductibleSumUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4b41980ccfb94e7705a83e201379e4dd16660e88 --- /dev/null +++ b/src/utils/ReproductibleSumUtils.hpp @@ -0,0 +1,877 @@ +#ifndef REPRODUCTIBLE_SUM_UTILS_HPP +#define REPRODUCTIBLE_SUM_UTILS_HPP + +#include <utils/PugsUtils.hpp> +#include <utils/Types.hpp> + +template <typename DataType> +class Array; + +namespace reproductible_sum_utils +{ +template <size_t NumberOfBits> +struct IntegerFromBitSize +{ +}; + +template <> +struct IntegerFromBitSize<8> +{ + using integer_t = std::int8_t; +}; + +template <> +struct IntegerFromBitSize<16> +{ + using integer_t = std::int16_t; +}; + +template <> +struct IntegerFromBitSize<32> +{ + using integer_t = std::int32_t; +}; + +template <> +struct IntegerFromBitSize<64> +{ + using integer_t = std::int64_t; +}; + +template <typename DataType> +struct IntegerType +{ + using integer_t = typename IntegerFromBitSize<sizeof(DataType) * 8>::integer_t; +}; + +template <typename DataType> +DataType +ulp(const DataType& x) noexcept(NO_ASSERT) +{ + static_assert(std::is_floating_point_v<DataType>, "expecting floating point value"); + + if (x == 0) { + return std::numeric_limits<DataType>::denorm_min(); + } + + return std::pow(DataType{2}, std::ilogb(std::abs(x)) - std::numeric_limits<DataType>::digits); +} + +template <typename DataType> +DataType +ufp(const DataType& x) noexcept(NO_ASSERT) +{ + static_assert(std::is_floating_point_v<DataType>, "expecting floating point value"); + + return std::pow(DataType{2}, std::ilogb(std::abs(x))); +} + +// Useful bits per bin +template <typename DataType> +constexpr inline size_t bin_size; +template <> +constexpr inline size_t bin_size<double> = 40; +template <> +constexpr inline size_t bin_size<float> = 12; + +// IEEE 754 epsilon +template <typename DataType> +constexpr inline double bin_epsilon; +template <> +constexpr inline double bin_epsilon<double> = std::pow(2., -53); +template <> +constexpr inline double bin_epsilon<float> = std::pow(2., -24); + +// number of bins: improves precision +template <typename DataType> +constexpr inline size_t bin_number; + +template <> +constexpr inline size_t bin_number<double> = 3; +template <> +constexpr inline size_t bin_number<float> = 4; + +// max local sum size to avoid overflow +template <typename DataType> +constexpr inline size_t bin_max_size; + +template <> +constexpr inline size_t bin_max_size<double> = 2048; +template <> +constexpr inline size_t bin_max_size<float> = 1024; + +struct NoMask +{ + PUGS_INLINE bool operator[](size_t) const + { + return true; + } +}; + +} // namespace reproductible_sum_utils + +template <typename ArrayT, typename MaskType = reproductible_sum_utils::NoMask> +class ReproductibleScalarSum +{ + public: + using DataType = std::decay_t<typename ArrayT::data_type>; + static_assert(std::is_floating_point_v<DataType>); + + static constexpr size_t K = reproductible_sum_utils::bin_number<DataType>; + static constexpr DataType eps = reproductible_sum_utils::bin_epsilon<DataType>; + static constexpr size_t W = reproductible_sum_utils::bin_size<DataType>; + + struct Bin + { + std::array<DataType, K> S; // sum + std::array<DataType, K> C; // carry + + Bin& operator=(const Bin&) = default; + Bin& operator=(Bin&&) = default; + + Bin(Bin&&) = default; + Bin(const Bin&) = default; + + Bin(ZeroType) + { + for (size_t k = 0; k < K; ++k) { + S[k] = 0.75 * eps * std::pow(DataType{2}, (K - k - 1) * W); + C[k] = 0; + } + } + + Bin() = default; + + ~Bin() = default; + }; + + private: + Bin m_summation_bin; + + PUGS_INLINE + static void + _shift(const size_t g, Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + for (size_t k = K - 1; k >= g; --k) { + bin.S[k] = bin.S[k - g]; + bin.C[k] = bin.C[k - g]; + } + for (size_t k = 0; k < std::min(K, g); ++k) { + bin.S[k] = 1.5 * std::pow(DataType{2}, g * W) * ufp(bin.S[k]); + bin.C[k] = 0; + } + } + + PUGS_INLINE static void + _update(const DataType& m, Bin& bin) noexcept(NO_ASSERT) + { + Assert(m > 0); + + using namespace reproductible_sum_utils; + if (m >= std::pow(DataType{2}, W - 1.) * ulp(bin.S[0])) { + const size_t shift = 1 + std::floor(std::log2(m / (std::pow(DataType{2}, W - 1.) * ulp(bin.S[0]))) / W); + + _shift(shift, bin); + } + } + + PUGS_INLINE + void static _split2(DataType& S, DataType& x) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + union + { + DataType as_DataType; + typename IntegerType<DataType>::integer_t as_integer; + } x_bar; + + x_bar.as_DataType = x; + x_bar.as_integer |= 0x1; + + const DataType S0 = S; + S += x_bar.as_DataType; + x -= S - S0; + } + + PUGS_INLINE static void + _renormalize(Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + for (size_t k = 0; k < K; ++k) { + if (bin.S[k] >= 1.75 * ufp(bin.S[k])) { + bin.S[k] -= 0.25 * ufp(bin.S[k]); + bin.C[k] += 1; + } else if (bin.S[k] < 1.25 * ufp(bin.S[k])) { + bin.S[k] += 0.5 * ufp(bin.S[k]); + bin.C[k] -= 2; + } else if (bin.S[k] < 1.5 * ufp(bin.S[k])) { + bin.S[k] += 0.25 * ufp(bin.S[k]); + bin.C[k] -= 1; + } + } + } + + public: + static void + addBinTo(Bin& bin, Bin& bin_sum) + { + using namespace reproductible_sum_utils; + + DataType ulp_bin = ulp(bin.S[0]); + DataType ulp_sum = ulp(bin_sum.S[0]); + + if (ulp_bin < ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_sum / ulp_bin) / W); + if (shift > 0) { + _shift(shift, bin); + } + } else if (ulp_bin > ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_bin / ulp_sum) / W); + if (shift > 0) { + _shift(shift, bin_sum); + } + } + + for (size_t k = 0; k < K; ++k) { + bin_sum.S[k] += bin.S[k] - 1.5 * ufp(bin.S[k]); + bin_sum.C[k] += bin.C[k]; + } + + _renormalize(bin_sum); + } + + PUGS_INLINE + static DataType + getValue(const Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + DataType value = 0; + for (size_t k = 0; k < K; ++k) { + value += 0.25 * (bin.C[k] - 6) * ufp(bin.S[k]) + bin.S[k]; + } + return value; + } + + PUGS_INLINE Bin + getSummationBin() const noexcept(NO_ASSERT) + { + return m_summation_bin; + } + + PUGS_INLINE DataType + getSum() const noexcept(NO_ASSERT) + { + return getValue(m_summation_bin); + } + + ReproductibleScalarSum(const ArrayT& array, const MaskType mask = reproductible_sum_utils::NoMask{}) + { + if constexpr (not std::is_same_v<MaskType, reproductible_sum_utils::NoMask>) { + static_assert(std::is_same_v<std::decay_t<typename MaskType::data_type>, bool>, + "when provided, mask must be an array of bool"); + } + + using TeamPolicyT = Kokkos::TeamPolicy<Kokkos::IndexType<int>>; + using TeamMemberT = TeamPolicyT::member_type; + + int nx = reproductible_sum_utils::bin_max_size<DataType>; + int ny = std::max(array.size() / nx, 1ul); + + const TeamPolicyT policy(ny, Kokkos::AUTO()); + + Array<DataType> thread_sum(policy.team_size() * policy.league_size()); + + Array<Bin> bin_by_thread(policy.team_size() * policy.league_size()); + bin_by_thread.fill(zero); + + Array<DataType> local_max(policy.team_size() * policy.league_size()); + local_max.fill(0); + + parallel_for( + policy, PUGS_LAMBDA(const TeamMemberT& member) { + const int i_team = member.league_rank(); + const int i_thread = member.team_rank(); + + const int thread_id = i_team * member.team_size() + i_thread; + + const int league_start = nx * i_team; + const int block_size = [&] { + int size = nx; + if (i_team == member.league_size() - 1) { + size = array.size() - league_start; + } + return size; + }(); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + DataType& m = local_max[thread_id]; + DataType abs_value = std::abs(array[league_start + i]); + if (abs_value > m) { + m = abs_value; + } + } + }); + + _update(local_max[thread_id], bin_by_thread[thread_id]); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + DataType x = array[nx * i_team + i]; + for (size_t k = 0; k < K; ++k) { + _split2(bin_by_thread[thread_id].S[k], x); + }; + } + }); + + _renormalize(bin_by_thread[thread_id]); + }); + + m_summation_bin = bin_by_thread[0]; + for (size_t i = 1; i < bin_by_thread.size(); ++i) { + addBinTo(bin_by_thread[i], m_summation_bin); + } + } + + ~ReproductibleScalarSum() = default; +}; + +template <typename ArrayT, typename MaskType = reproductible_sum_utils::NoMask> +class ReproductibleTinyVectorSum +{ + public: + using TinyVectorType = std::decay_t<typename ArrayT::data_type>; + static_assert(is_tiny_vector_v<TinyVectorType>); + + using DataType = std::decay_t<typename TinyVectorType::data_type>; + static_assert(std::is_floating_point_v<DataType>); + + static constexpr size_t K = reproductible_sum_utils::bin_number<DataType>; + static constexpr DataType eps = reproductible_sum_utils::bin_epsilon<DataType>; + static constexpr size_t W = reproductible_sum_utils::bin_size<DataType>; + + struct Bin + { + std::array<TinyVectorType, K> S; // sum + std::array<TinyVectorType, K> C; // carry + + Bin& operator=(const Bin&) = default; + Bin& operator=(Bin&&) = default; + + Bin(Bin&&) = default; + Bin(const Bin&) = default; + + Bin(ZeroType) + { + for (size_t k = 0; k < K; ++k) { + const DataType init_value = 0.75 * eps * std::pow(DataType{2}, (K - k - 1) * W); + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + S[k][i_component] = init_value; + } + C[k] = zero; + } + } + + Bin() = default; + + ~Bin() = default; + }; + + private: + Bin m_summation_bin; + + PUGS_INLINE + static void + _shift(const size_t g, Bin& bin, const size_t& i_component) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + for (size_t k = K - 1; k >= g; --k) { + bin.S[k][i_component] = bin.S[k - g][i_component]; + bin.C[k][i_component] = bin.C[k - g][i_component]; + } + for (size_t k = 0; k < std::min(K, g); ++k) { + bin.S[k][i_component] = 1.5 * std::pow(DataType{2}, g * W) * ufp(bin.S[k][i_component]); + bin.C[k][i_component] = 0; + } + } + + PUGS_INLINE static void + _update(const TinyVectorType& m, Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + if (m[i_component] >= std::pow(DataType{2}, W - 1.) * ulp(bin.S[0][i_component])) { + const size_t shift = + 1 + std::floor(std::log2(m[i_component] / (std::pow(DataType{2}, W - 1.) * ulp(bin.S[0][i_component]))) / W); + + _shift(shift, bin, i_component); + } + } + } + + PUGS_INLINE + void static _split2(TinyVectorType& S, TinyVectorType& x) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + union + { + DataType as_DataType; + typename IntegerType<DataType>::integer_t as_integer; + } x_bar; + + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + x_bar.as_DataType = x[i_component]; + x_bar.as_integer |= 0x1; + + DataType& S_i = S[i_component]; + DataType& x_i = x[i_component]; + + const DataType S0 = S_i; + S_i += x_bar.as_DataType; + x_i -= S_i - S0; + } + } + + PUGS_INLINE static void + _renormalize(Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + for (size_t k = 0; k < K; ++k) { + TinyVectorType& S_k = bin.S[k]; + TinyVectorType& C_k = bin.C[k]; + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + DataType& Sk_i = S_k[i_component]; + DataType& Ck_i = C_k[i_component]; + + if (Sk_i >= 1.75 * ufp(Sk_i)) { + Sk_i -= 0.25 * ufp(Sk_i); + Ck_i += 1; + } else if (Sk_i < 1.25 * ufp(Sk_i)) { + Sk_i += 0.5 * ufp(Sk_i); + Ck_i -= 2; + } else if (Sk_i < 1.5 * ufp(Sk_i)) { + Sk_i += 0.25 * ufp(Sk_i); + Ck_i -= 1; + } + } + } + } + + public: + static void + addBinTo(Bin& bin, Bin& bin_sum) + { + using namespace reproductible_sum_utils; + + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + DataType ulp_bin = ulp(bin.S[0][i_component]); + DataType ulp_sum = ulp(bin_sum.S[0][i_component]); + + if (ulp_bin < ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_sum / ulp_bin) / W); + if (shift > 0) { + _shift(shift, bin, i_component); + } + } else if (ulp_bin > ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_bin / ulp_sum) / W); + if (shift > 0) { + _shift(shift, bin_sum, i_component); + } + } + + for (size_t k = 0; k < K; ++k) { + bin_sum.S[k][i_component] += bin.S[k][i_component] - 1.5 * ufp(bin.S[k][i_component]); + bin_sum.C[k][i_component] += bin.C[k][i_component]; + } + } + + _renormalize(bin_sum); + } + + PUGS_INLINE + static TinyVectorType + getValue(const Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + TinyVectorType value = zero; + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + for (size_t k = 0; k < K; ++k) { + value[i_component] += 0.25 * (bin.C[k][i_component] - 6) * ufp(bin.S[k][i_component]) + bin.S[k][i_component]; + } + } + return value; + } + + PUGS_INLINE Bin + getSummationBin() const noexcept(NO_ASSERT) + { + return m_summation_bin; + } + + PUGS_INLINE TinyVectorType + getSum() const noexcept(NO_ASSERT) + { + return getValue(m_summation_bin); + } + + ReproductibleTinyVectorSum(const ArrayT& array, const MaskType mask = reproductible_sum_utils::NoMask{}) + { + if constexpr (not std::is_same_v<MaskType, reproductible_sum_utils::NoMask>) { + static_assert(std::is_same_v<std::decay_t<typename MaskType::data_type>, bool>, + "when provided, mask must be an array of bool"); + } + + using TeamPolicyT = Kokkos::TeamPolicy<Kokkos::IndexType<int>>; + using TeamMemberT = TeamPolicyT::member_type; + + int nx = reproductible_sum_utils::bin_max_size<DataType>; + int ny = std::max(array.size() / nx, 1ul); + + const TeamPolicyT policy(ny, Kokkos::AUTO()); + + Array<TinyVectorType> thread_sum(policy.team_size() * policy.league_size()); + + Array<Bin> bin_by_thread(policy.team_size() * policy.league_size()); + bin_by_thread.fill(zero); + + Array<TinyVectorType> local_max(policy.team_size() * policy.league_size()); + local_max.fill(zero); + + parallel_for( + policy, PUGS_LAMBDA(const TeamMemberT& member) { + const int i_team = member.league_rank(); + const int i_thread = member.team_rank(); + + const int thread_id = i_team * member.team_size() + i_thread; + + const int league_start = nx * i_team; + const int block_size = [&] { + int size = nx; + if (i_team == member.league_size() - 1) { + size = array.size() - league_start; + } + return size; + }(); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + for (size_t i_component = 0; i_component < TinyVectorType::Dimension; ++i_component) { + DataType& m = local_max[thread_id][i_component]; + DataType abs_value = std::abs(array[league_start + i][i_component]); + if (abs_value > m) { + m = abs_value; + } + } + } + }); + + _update(local_max[thread_id], bin_by_thread[thread_id]); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + TinyVectorType x = array[nx * i_team + i]; + for (size_t k = 0; k < K; ++k) { + _split2(bin_by_thread[thread_id].S[k], x); + } + } + }); + + _renormalize(bin_by_thread[thread_id]); + }); + + m_summation_bin = bin_by_thread[0]; + for (size_t i = 1; i < bin_by_thread.size(); ++i) { + addBinTo(bin_by_thread[i], m_summation_bin); + } + } + + ~ReproductibleTinyVectorSum() = default; +}; + +template <typename ArrayT, typename MaskType = reproductible_sum_utils::NoMask> +class ReproductibleTinyMatrixSum +{ + public: + using TinyMatrixType = std::decay_t<typename ArrayT::data_type>; + static_assert(is_tiny_matrix_v<TinyMatrixType>); + + using DataType = std::decay_t<typename TinyMatrixType::data_type>; + static_assert(std::is_floating_point_v<DataType>); + + static constexpr size_t K = reproductible_sum_utils::bin_number<DataType>; + static constexpr DataType eps = reproductible_sum_utils::bin_epsilon<DataType>; + static constexpr size_t W = reproductible_sum_utils::bin_size<DataType>; + + struct Bin + { + std::array<TinyMatrixType, K> S; // sum + std::array<TinyMatrixType, K> C; // carry + + Bin& operator=(const Bin&) = default; + Bin& operator=(Bin&&) = default; + + Bin(Bin&&) = default; + Bin(const Bin&) = default; + + Bin(ZeroType) + { + for (size_t k = 0; k < K; ++k) { + const DataType init_value = 0.75 * eps * std::pow(DataType{2}, (K - k - 1) * W); + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + S[k](i_component, j_component) = init_value; + } + } + C[k] = zero; + } + } + + Bin() = default; + ~Bin() = default; + }; + + private: + Bin m_summation_bin; + + PUGS_INLINE + static void + _shift(const size_t g, Bin& bin, const size_t i_component, const size_t j_component) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + for (size_t k = K - 1; k >= g; --k) { + bin.S[k](i_component, j_component) = bin.S[k - g](i_component, j_component); + bin.C[k](i_component, j_component) = bin.C[k - g](i_component, j_component); + } + for (size_t k = 0; k < std::min(K, g); ++k) { + bin.S[k](i_component, j_component) = 1.5 * std::pow(DataType{2}, g * W) * ufp(bin.S[k](i_component, j_component)); + bin.C[k](i_component, j_component) = 0; + } + } + + PUGS_INLINE static void + _update(const TinyMatrixType& m, Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + if (m(i_component, j_component) >= std::pow(DataType{2}, W - 1.) * ulp(bin.S[0](i_component, j_component))) { + const size_t shift = + 1 + std::floor(std::log2(m(i_component, j_component) / + (std::pow(DataType{2}, W - 1.) * ulp(bin.S[0](i_component, j_component)))) / + W); + + _shift(shift, bin, i_component, j_component); + } + } + } + } + + PUGS_INLINE + void static _split2(TinyMatrixType& S, TinyMatrixType& x) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + union + { + DataType as_DataType; + typename IntegerType<DataType>::integer_t as_integer; + } x_bar; + + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType& S_ij = S(i_component, j_component); + DataType& x_ij = x(i_component, j_component); + + x_bar.as_DataType = x_ij; + x_bar.as_integer |= 0x1; + + const DataType S0 = S_ij; + S_ij += x_bar.as_DataType; + x_ij -= S_ij - S0; + } + } + } + + PUGS_INLINE static void + _renormalize(Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + for (size_t k = 0; k < K; ++k) { + TinyMatrixType& S_k = bin.S[k]; + TinyMatrixType& C_k = bin.C[k]; + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType& Sk_ij = S_k(i_component, j_component); + DataType& Ck_ij = C_k(i_component, j_component); + + if (Sk_ij >= 1.75 * ufp(Sk_ij)) { + Sk_ij -= 0.25 * ufp(Sk_ij); + Ck_ij += 1; + } else if (Sk_ij < 1.25 * ufp(Sk_ij)) { + Sk_ij += 0.5 * ufp(Sk_ij); + Ck_ij -= 2; + } else if (Sk_ij < 1.5 * ufp(Sk_ij)) { + Sk_ij += 0.25 * ufp(Sk_ij); + Ck_ij -= 1; + } + } + } + } + } + + public: + static void + addBinTo(Bin& bin, Bin& bin_sum) + { + using namespace reproductible_sum_utils; + + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType ulp_bin = ulp(bin.S[0](i_component, j_component)); + DataType ulp_sum = ulp(bin_sum.S[0](i_component, j_component)); + + if (ulp_bin < ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_sum / ulp_bin) / W); + if (shift > 0) { + _shift(shift, bin, i_component, j_component); + } + } else if (ulp_bin > ulp_sum) { + const size_t shift = std::floor(std::log2(ulp_bin / ulp_sum) / W); + if (shift > 0) { + _shift(shift, bin_sum, i_component, j_component); + } + } + + for (size_t k = 0; k < K; ++k) { + bin_sum.S[k](i_component, j_component) += + bin.S[k](i_component, j_component) - 1.5 * ufp(bin.S[k](i_component, j_component)); + bin_sum.C[k](i_component, j_component) += bin.C[k](i_component, j_component); + } + } + } + + _renormalize(bin_sum); + } + + PUGS_INLINE + static TinyMatrixType + getValue(const Bin& bin) noexcept(NO_ASSERT) + { + using namespace reproductible_sum_utils; + + TinyMatrixType value = zero; + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + for (size_t k = 0; k < K; ++k) { + value(i_component, j_component) += + 0.25 * (bin.C[k](i_component, j_component) - 6) * ufp(bin.S[k](i_component, j_component)) + + bin.S[k](i_component, j_component); + } + } + } + return value; + } + + PUGS_INLINE Bin + getSummationBin() const noexcept(NO_ASSERT) + { + return m_summation_bin; + } + + PUGS_INLINE TinyMatrixType + getSum() const noexcept(NO_ASSERT) + { + return getValue(m_summation_bin); + } + + ReproductibleTinyMatrixSum(const ArrayT& array, const MaskType mask = reproductible_sum_utils::NoMask{}) + { + if constexpr (not std::is_same_v<MaskType, reproductible_sum_utils::NoMask>) { + static_assert(std::is_same_v<std::decay_t<typename MaskType::data_type>, bool>, + "when provided, mask must be an array of bool"); + } + + using TeamPolicyT = Kokkos::TeamPolicy<Kokkos::IndexType<int>>; + using TeamMemberT = TeamPolicyT::member_type; + + int nx = reproductible_sum_utils::bin_max_size<DataType>; + int ny = std::max(array.size() / nx, 1ul); + + const TeamPolicyT policy(ny, Kokkos::AUTO()); + + Array<TinyMatrixType> thread_sum(policy.team_size() * policy.league_size()); + + Array<Bin> bin_by_thread(policy.team_size() * policy.league_size()); + bin_by_thread.fill(zero); + + Array<TinyMatrixType> local_max(policy.team_size() * policy.league_size()); + local_max.fill(zero); + + parallel_for( + policy, PUGS_LAMBDA(const TeamMemberT& member) { + const int i_team = member.league_rank(); + const int i_thread = member.team_rank(); + + const int thread_id = i_team * member.team_size() + i_thread; + + const int league_start = nx * i_team; + const int block_size = [&] { + int size = nx; + if (i_team == member.league_size() - 1) { + size = array.size() - league_start; + } + return size; + }(); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + for (size_t i_component = 0; i_component < TinyMatrixType::NumberOfRows; ++i_component) { + for (size_t j_component = 0; j_component < TinyMatrixType::NumberOfColumns; ++j_component) { + DataType& m = local_max[thread_id](i_component, j_component); + DataType abs_value = std::abs(array[league_start + i](i_component, j_component)); + if (abs_value > m) { + m = abs_value; + } + } + } + } + }); + + _update(local_max[thread_id], bin_by_thread[thread_id]); + + parallel_for( + Kokkos::TeamThreadRange(member, block_size), PUGS_LAMBDA(int i) { + if (mask[league_start + i]) { + TinyMatrixType x = array[nx * i_team + i]; + for (size_t k = 0; k < K; ++k) { + _split2(bin_by_thread[thread_id].S[k], x); + } + } + }); + + _renormalize(bin_by_thread[thread_id]); + }); + + m_summation_bin = bin_by_thread[0]; + for (size_t i = 1; i < bin_by_thread.size(); ++i) { + addBinTo(bin_by_thread[i], m_summation_bin); + } + } + + ~ReproductibleTinyMatrixSum() = default; +}; + +#endif // REPRODUCTIBLE_SUM_UTILS_HPP diff --git a/tests/test_Array.cpp b/tests/test_Array.cpp index e999cb06d8a4ab0302c1a40896950cd93c4640d4..fb3a50ef9ec02bd3a98d5d7b5af95be8bbb9b439 100644 --- a/tests/test_Array.cpp +++ b/tests/test_Array.cpp @@ -14,152 +14,10 @@ #include <valarray> #include <vector> +#include <utils/ReproductibleSumUtils.hpp> #include <utils/SmallArray.hpp> #include <utils/Timer.hpp> -template <typename DataType> -class ArrayReproductibleSum -{ - private: - static constexpr double eps = std::numeric_limits<DataType>::epsilon(); - - static constexpr size_t W = 40; - static constexpr size_t K = 3; - - static consteval size_t - _getNB() - { - return std::floor((1 / eps) * std::pow(2., -2. - W)); - } - - static constexpr size_t NB = _getNB(); - - Array<const DataType> m_array; - - PUGS_INLINE DataType - ulp(const DataType& x) - { - static_assert(std::is_floating_point_v<DataType>, "expecting floating point value"); - - if (x == 0) { - return std::numeric_limits<DataType>::denorm_min(); - } - - return std::pow(DataType{2}, std::ilogb(std::abs(x)) - std::numeric_limits<DataType>::digits); - }; - - PUGS_INLINE DataType - ufp(const DataType& x) - { - static_assert(std::is_floating_point_v<DataType>, "expecting floating point value"); - - return std::pow(DataType{2}, std::ilogb(std::abs(x))); - }; - - public: - operator DataType() - { - auto local_max = [](const auto& v, const size_t i, const size_t lB) noexcept(NO_ASSERT) { - DataType m = std::abs(v[i]); - for (size_t j = 1; j < lB; ++j) { - const DataType& abs_vj = std::abs(v[i + j]); - if (m < abs_vj) { - m = abs_vj; - } - } - return m; - }; - - auto update = [&](const DataType& m, auto& S, auto& C) noexcept(NO_ASSERT) { - if (m >= std::pow(DataType{2}, W - 1.) * ulp(S[0])) { - const size_t g = 1 + std::floor(std::log2(m / (std::pow(DataType{2}, W - 1.) * ulp(S[0]))) / W); - - for (size_t k = K - 1; k >= g; --k) { - S[k] = S[k - g]; - C[k] = C[k - g]; - } - - for (size_t k = 0; k < std::min(K, g); ++k) { - S[k] = 1.5 * std::pow(DataType{2}, g * W) * ufp(S[k]); - C[k] = 0; - } - } - }; - - auto split2 = [](DataType& S, DataType& x) noexcept(NO_ASSERT) { - union - { - static_assert(sizeof(DataType) == sizeof(unsigned long)); - DataType as_DataType; - unsigned long as_long; - } x_bar; - - x_bar.as_DataType = x; - x_bar.as_long |= 0x1; - - const DataType S0 = S; - S += x_bar.as_DataType; - x -= S - S0; - }; - - auto extract_vector3 = [&](DataType& S, auto& v, size_t i, const size_t lB) noexcept(NO_ASSERT) { - for (size_t j = 0; j < lB; ++j) { - split2(S, v[i + j]); - } - }; - - auto renormalize = [&](auto& S, auto& C) noexcept(NO_ASSERT) { - for (size_t k = 0; k < K; ++k) { - if (S[k] >= 1.75 * ufp(S[k])) { - S[k] -= 0.25 * ufp(S[k]); - C[k] += 1; - } else if (S[k] < 1.25 * ufp(S[k])) { - S[k] += 0.5 * ufp(S[k]); - C[k] -= 2; - } else if (S[k] < 1.5 * ufp(S[k])) { - S[k] += 0.25 * ufp(S[k]); - C[k] -= 1; - } - } - }; - - TinyVector<K, DataType> S; - for (size_t k = 0; k < K; ++k) { - S[k] = 0.75 * eps * std::pow(2., (K - k - 1) * W); - } - - TinyVector<K, DataType> C = zero; - - // Array<DataType> local_array(NB); - Array<DataType> local_array = copy(m_array); - for (size_t i = 0; i < m_array.size(); i += NB) { - const size_t lB = std::min(NB, m_array.size() - i); - - // std::copy_n(&(m_array[i]), lB, &(local_array[i])); - - const DataType m = local_max(local_array, i, lB); - - update(m, S, C); - - for (size_t k = 0; k < K; ++k) { - extract_vector3(S[k], local_array, i, lB); - } - - renormalize(S, C); - } - - DataType sum = 0; - for (size_t k = 0; k < S.dimension(); ++k) { - sum += 0.25 * (C[k] - 6) * ufp(S[k]) + S[k]; - } - - return sum; - }; - - ArrayReproductibleSum(Array<DataType> array) : m_array{array} {} - ~ArrayReproductibleSum() = default; -}; - // Instantiate to ensure full coverage is performed template class Array<int>; @@ -465,63 +323,79 @@ TEST_CASE("Array", "[utils]") } } - SECTION("reproducible floating point sum 2023") + SECTION("reproducible double sum") { - std::clog.precision(16); + Array<double> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = ((i < (array.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1); + } - std::clog << "***** REP 2023 *****\n"; + const auto sum_before_shuffle = sum(array); + + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + } + + SECTION("reproducible TinyVector<3,float> sum") + { + Array<TinyVector<3, float>> array(10'000); - Array<double> array(10'000'000); for (size_t i = 0; i < array.size(); ++i) { - array[i] = 1E25 * ((i + 1) % 1'000'000) * std::sin(3 * i + 1); + array[i] = + TinyVector<3, float>(((i < (array.size() / 10)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + 17 * i - 3, // + 1); } - Timer t_direct_sum1; - double direct_sum1 = sum(array); - t_direct_sum1.pause(); + const auto sum_before_shuffle = sum(array); - Timer t_rsum1; - const double reprod_sum1 = ArrayReproductibleSum(array); - t_rsum1.pause(); + std::random_shuffle(&(array[0]), &(array[0]) + array.size()); + + ReproductibleTinyVectorSum s0(array); + + auto bin0 = s0.getSummationBin(); + std::clog << "S0 = " << bin0.S[0] << ";" << bin0.S[1] << ";" << bin0.S[2] << " C0 = " << bin0.C[0] << ";" + << bin0.C[1] << ";" << bin0.C[2] << "\n"; + const auto sum_after_shuffle = sum(array); + + ReproductibleTinyVectorSum s1(array); + + auto bin1 = s1.getSummationBin(); + std::clog << "S1 = " << bin1.S[0] << ";" << bin1.S[1] << ";" << bin1.S[2] << " C1 = " << bin1.C[0] << ";" + << bin1.C[1] << ";" << bin1.C[2] << "\n"; + + std::clog << "DIFF_BIN: DS = " << bin1.S[0] - bin0.S[0] << ";" << bin1.S[1] - bin0.S[1] << ";" + << bin1.S[1] - bin0.S[2] << '\n'; + + std::clog << "DIFF=" << sum_before_shuffle - sum_after_shuffle << '\n'; + + REQUIRE(sum_before_shuffle == sum_after_shuffle); + } + + SECTION("reproducible TinyMatrix<2, 3> sum") + { + Array<TinyMatrix<2, 3>> array(10'000); + + for (size_t i = 0; i < array.size(); ++i) { + array[i] = TinyMatrix<2, 3>{((i < (array.size() / 1000)) * 1E25 + 1E10) * ((i + 1) % 1'000) * std::sin(3 * i + 1), + 17 * i - 3, // + 1, // + std::cos(7 * i) * i, // + (2 - 3 * i) * 1E-30, // + (i * 100 + 1000)}; + } + + const auto sum_before_shuffle = sum(array); - std::clog << "# shuffling ...\n" << std::flush; - Timer t_shuffle; std::random_shuffle(&(array[0]), &(array[0]) + array.size()); - t_shuffle.pause(); - std::clog << " shuffling done\n" << std::flush; - - Timer t_direct_sum2; - double direct_sum2 = sum(array); - t_direct_sum2.pause(); - - Timer t_rsum2; - const double reprod_sum2 = ArrayReproductibleSum(array); - t_rsum2.pause(); - - std::clog << "---------\n"; - std::clog << "direct sum1 =" << direct_sum1 << '\n'; - std::clog << "direct sum2 =" << direct_sum2 << '\n'; - std::clog << "reprod sum1 = " << reprod_sum1 << '\n'; - std::clog << "reprod sum2 = " << reprod_sum2 << '\n'; - std::clog << "---------\n"; - std::clog << "delta direct =" << direct_sum1 - direct_sum2 << '\n'; - std::clog << "delta reprod =" << reprod_sum1 - reprod_sum2 << '\n'; - std::clog << "---------\n"; - std::clog << "[t shuffle " << t_shuffle << "]\n"; - std::clog << "---------\n"; - std::clog << "[t dsum : " << t_direct_sum1 << " efficiency = " << t_direct_sum1.seconds() / t_direct_sum1.seconds() - << "]\n"; - std::clog << "[t rsum : " << t_rsum1 << " efficiency = " << t_rsum1.seconds() / t_direct_sum1.seconds() << "]\n"; - std::clog << "---------\n"; - std::clog << "[t dsum2: " << t_direct_sum2 << " efficiency = " << t_direct_sum2.seconds() / t_direct_sum2.seconds() - << "]\n"; - std::clog << "[t rsum2: " << t_rsum2 << " efficiency = " << t_rsum2.seconds() / t_direct_sum2.seconds() << "]\n"; - - double sum_new = ArrayReproductibleSum(array); - - std::clog << "sum_new = " << sum_new << '\n'; - - REQUIRE(reprod_sum1 == reprod_sum2); + + const auto sum_after_shuffle = sum(array); + + REQUIRE(sum_before_shuffle == sum_after_shuffle); } SECTION("checking for subArrayView") diff --git a/tests/test_DiscreteFunctionP0.cpp b/tests/test_DiscreteFunctionP0.cpp index 39b91c4de355c1a96feaa1f1b5e2a59bf9c494d8..d0f71e984d516ec243de289f399d882663c9c499 100644 --- a/tests/test_DiscreteFunctionP0.cpp +++ b/tests/test_DiscreteFunctionP0.cpp @@ -2787,7 +2787,7 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") cell_value[cell_id] = cell_volume[cell_id] * positive_function[cell_id]; }); - REQUIRE(integrate(positive_function) == Catch::Approx(sum(cell_value))); + REQUIRE(integrate(positive_function) == sum(cell_value)); } SECTION("integrate vector") @@ -2806,8 +2806,8 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)[0] == Catch::Approx(sum(cell_value)[0])); - REQUIRE(integrate(uh)[1] == Catch::Approx(sum(cell_value)[1])); + REQUIRE(integrate(uh)[0] == sum(cell_value)[0]); + REQUIRE(integrate(uh)[1] == sum(cell_value)[1]); } SECTION("integrate matrix") @@ -2826,10 +2826,10 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)(0, 0) == Catch::Approx(sum(cell_value)(0, 0))); - REQUIRE(integrate(uh)(0, 1) == Catch::Approx(sum(cell_value)(0, 1))); - REQUIRE(integrate(uh)(1, 0) == Catch::Approx(sum(cell_value)(1, 0))); - REQUIRE(integrate(uh)(1, 1) == Catch::Approx(sum(cell_value)(1, 1))); + REQUIRE(integrate(uh)(0, 0) == sum(cell_value)(0, 0)); + REQUIRE(integrate(uh)(0, 1) == sum(cell_value)(0, 1)); + REQUIRE(integrate(uh)(1, 0) == sum(cell_value)(1, 0)); + REQUIRE(integrate(uh)(1, 1) == sum(cell_value)(1, 1)); } } } @@ -3131,7 +3131,7 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") cell_value[cell_id] = cell_volume[cell_id] * positive_function[cell_id]; }); - REQUIRE(integrate(positive_function) == Catch::Approx(sum(cell_value))); + REQUIRE(integrate(positive_function) == sum(cell_value)); } SECTION("integrate vector") @@ -3150,8 +3150,8 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)[0] == Catch::Approx(sum(cell_value)[0])); - REQUIRE(integrate(uh)[1] == Catch::Approx(sum(cell_value)[1])); + REQUIRE(integrate(uh)[0] == sum(cell_value)[0]); + REQUIRE(integrate(uh)[1] == sum(cell_value)[1]); } SECTION("integrate matrix") @@ -3170,10 +3170,10 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)(0, 0) == Catch::Approx(sum(cell_value)(0, 0))); - REQUIRE(integrate(uh)(0, 1) == Catch::Approx(sum(cell_value)(0, 1))); - REQUIRE(integrate(uh)(1, 0) == Catch::Approx(sum(cell_value)(1, 0))); - REQUIRE(integrate(uh)(1, 1) == Catch::Approx(sum(cell_value)(1, 1))); + REQUIRE(integrate(uh)(0, 0) == sum(cell_value)(0, 0)); + REQUIRE(integrate(uh)(0, 1) == sum(cell_value)(0, 1)); + REQUIRE(integrate(uh)(1, 0) == sum(cell_value)(1, 0)); + REQUIRE(integrate(uh)(1, 1) == sum(cell_value)(1, 1)); } } } @@ -3563,7 +3563,7 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") cell_value[cell_id] = cell_volume[cell_id] * positive_function[cell_id]; }); - REQUIRE(integrate(positive_function) == Catch::Approx(sum(cell_value))); + REQUIRE(integrate(positive_function) == sum(cell_value)); } SECTION("integrate vector") @@ -3582,8 +3582,8 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)[0] == Catch::Approx(sum(cell_value)[0])); - REQUIRE(integrate(uh)[1] == Catch::Approx(sum(cell_value)[1])); + REQUIRE(integrate(uh)[0] == sum(cell_value)[0]); + REQUIRE(integrate(uh)[1] == sum(cell_value)[1]); } SECTION("integrate matrix") @@ -3602,10 +3602,10 @@ TEST_CASE("DiscreteFunctionP0", "[scheme]") mesh->numberOfCells(), PUGS_LAMBDA(const CellId cell_id) { cell_value[cell_id] = cell_volume[cell_id] * uh[cell_id]; }); - REQUIRE(integrate(uh)(0, 0) == Catch::Approx(sum(cell_value)(0, 0))); - REQUIRE(integrate(uh)(0, 1) == Catch::Approx(sum(cell_value)(0, 1))); - REQUIRE(integrate(uh)(1, 0) == Catch::Approx(sum(cell_value)(1, 0))); - REQUIRE(integrate(uh)(1, 1) == Catch::Approx(sum(cell_value)(1, 1))); + REQUIRE(integrate(uh)(0, 0) == sum(cell_value)(0, 0)); + REQUIRE(integrate(uh)(0, 1) == sum(cell_value)(0, 1)); + REQUIRE(integrate(uh)(1, 0) == sum(cell_value)(1, 0)); + REQUIRE(integrate(uh)(1, 1) == sum(cell_value)(1, 1)); } } } diff --git a/tests/test_ItemArrayUtils.cpp b/tests/test_ItemArrayUtils.cpp index 8c832d16c1dac5ed7a84828303f68bceaf25c3d8..346345eae1134da16e7e75b7c1d6f553ac14ef6d 100644 --- a/tests/test_ItemArrayUtils.cpp +++ b/tests/test_ItemArrayUtils.cpp @@ -8,6 +8,8 @@ #include <mesh/ItemArray.hpp> #include <mesh/ItemArrayUtils.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <utils/Messenger.hpp> // clazy:excludeall=non-pod-global-static @@ -1055,26 +1057,31 @@ TEST_CASE("ItemArrayUtils", "[mesh]") std::array mesh_list = MeshDataBaseForTests::get().all2DMeshes(); for (const auto& named_mesh : mesh_list) { - SECTION(named_mesh.name() + " for size_t data") + SECTION(named_mesh.name() + " for double data") { auto mesh_2d = named_mesh.mesh(); const Connectivity<2>& connectivity = mesh_2d->connectivity(); - FaceArray<size_t> face_array{connectivity, 3}; - face_array.fill(2); + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + + FaceArray<double> face_array{connectivity, 3}; + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_array[face_id][0] = ll[face_id]; + face_array[face_id][1] = (3 + ll[face_id]) * ll[face_id]; + face_array[face_id][2] = 2 * ll[face_id] - 3; + }); auto face_is_owned = connectivity.faceIsOwned(); - const size_t global_number_of_faces = [&] { - size_t number_of_faces = 0; - for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_faces += face_is_owned[face_id]; - } - return parallel::allReduceSum(number_of_faces); - }(); + FaceValue<double> face_sum{connectivity}; + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_sum[face_id] = face_array[face_id][0] + face_array[face_id][1] + face_array[face_id][2]; + }); - REQUIRE(sum(face_array) == 2 * face_array.sizeOfArrays() * global_number_of_faces); + REQUIRE(sum(face_array) == sum(face_sum)); } SECTION(named_mesh.name() + " for N^2 data") diff --git a/tests/test_ItemValueUtils.cpp b/tests/test_ItemValueUtils.cpp index 755392515ae3078421375f71af8727aa8bff144e..b1901713a2065cb054dbb18debaa60722116cb7d 100644 --- a/tests/test_ItemValueUtils.cpp +++ b/tests/test_ItemValueUtils.cpp @@ -8,6 +8,8 @@ #include <mesh/ItemValue.hpp> #include <mesh/ItemValueUtils.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <utils/Messenger.hpp> // clazy:excludeall=non-pod-global-static @@ -240,26 +242,60 @@ TEST_CASE("ItemValueUtils", "[mesh]") std::array mesh_list = MeshDataBaseForTests::get().all1DMeshes(); for (const auto& named_mesh : mesh_list) { - SECTION(named_mesh.name()) + SECTION(named_mesh.name() + "for size_t data") { auto mesh_1d = named_mesh.mesh(); const Connectivity<1>& connectivity = mesh_1d->connectivity(); + auto Vj = MeshDataManager::instance().getMeshData(*mesh_1d).Vj(); + CellValue<size_t> cell_value{connectivity}; - cell_value.fill(5); + + parallel_for( + connectivity.numberOfCells(), + PUGS_LAMBDA(CellId cell_id) { cell_value[cell_id] = std::ceil(100 * Vj[cell_id]); }); auto cell_is_owned = connectivity.cellIsOwned(); - const size_t global_number_of_cells = [&] { - size_t number_of_cells = 0; + const size_t global_sum = [&] { + size_t local_dum = 0; for (CellId cell_id = 0; cell_id < cell_is_owned.numberOfItems(); ++cell_id) { - number_of_cells += cell_is_owned[cell_id]; + local_dum += cell_is_owned[cell_id] * cell_value[cell_id]; } - return parallel::allReduceSum(number_of_cells); + return parallel::allReduceSum(local_dum); }(); - REQUIRE(sum(cell_value) == 5 * global_number_of_cells); + REQUIRE(sum(cell_value) == global_sum); + } + + SECTION(named_mesh.name() + "for double data") + { + auto mesh_1d = named_mesh.mesh(); + + const Connectivity<1>& connectivity = mesh_1d->connectivity(); + + auto Vj = MeshDataManager::instance().getMeshData(*mesh_1d).Vj(); + + auto cell_is_owned = connectivity.cellIsOwned(); + + size_t nb_owned_cells = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + nb_owned_cells += cell_is_owned[cell_id]; + } + + Array<double> local_Vj(nb_owned_cells); + { + size_t index = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + if (cell_is_owned[cell_id]) { + local_Vj[index++] = Vj[cell_id]; + } + } + } + Array<double> global_Vj = parallel::allGatherVariable(local_Vj); + + REQUIRE(sum(Vj) == sum(global_Vj)); } } } @@ -275,20 +311,54 @@ TEST_CASE("ItemValueUtils", "[mesh]") const Connectivity<2>& connectivity = mesh_2d->connectivity(); + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + FaceValue<size_t> face_value{connectivity}; - face_value.fill(2); + + parallel_for( + connectivity.numberOfFaces(), + PUGS_LAMBDA(FaceId face_id) { face_value[face_id] = std::ceil(100 * ll[face_id]); }); auto face_is_owned = connectivity.faceIsOwned(); - const size_t global_number_of_faces = [&] { - size_t number_of_faces = 0; + const size_t global_sum = [&] { + size_t local_sum = 0; for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_faces += face_is_owned[face_id]; + local_sum += face_is_owned[face_id] * face_value[face_id]; } - return parallel::allReduceSum(number_of_faces); + return parallel::allReduceSum(local_sum); }(); - REQUIRE(sum(face_value) == 2 * global_number_of_faces); + REQUIRE(sum(face_value) == global_sum); + } + + SECTION(named_mesh.name() + "for double data") + { + auto mesh_2d = named_mesh.mesh(); + + const Connectivity<2>& connectivity = mesh_2d->connectivity(); + + auto Vj = MeshDataManager::instance().getMeshData(*mesh_2d).Vj(); + + auto cell_is_owned = connectivity.cellIsOwned(); + + size_t nb_owned_cells = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + nb_owned_cells += cell_is_owned[cell_id]; + } + + Array<double> local_Vj(nb_owned_cells); + { + size_t index = 0; + for (CellId cell_id = 0; cell_id < connectivity.numberOfCells(); ++cell_id) { + if (cell_is_owned[cell_id]) { + local_Vj[index++] = Vj[cell_id]; + } + } + } + Array<double> global_Vj = parallel::allGatherVariable(local_Vj); + + REQUIRE(sum(Vj) == sum(global_Vj)); } SECTION(named_mesh.name() + "for N^3 data") @@ -297,22 +367,66 @@ TEST_CASE("ItemValueUtils", "[mesh]") const Connectivity<2>& connectivity = mesh_2d->connectivity(); + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + using N3 = TinyVector<3, size_t>; FaceValue<N3> face_value{connectivity}; - const N3 data(2, 1, 4); - face_value.fill(data); + + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_value[face_id] = N3(std::ceil(100 * ll[face_id]), // + std::ceil(200 * ll[face_id] - 40), // + std::ceil(17.3 * ll[face_id] + 12.9)); + }); auto face_is_owned = connectivity.faceIsOwned(); - const size_t global_number_of_faces = [&] { - size_t number_of_faces = 0; + const N3 global_sum = [=] { + N3 local_sum = zero; for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_faces += face_is_owned[face_id]; + local_sum += face_is_owned[face_id] * face_value[face_id]; } - return parallel::allReduceSum(number_of_faces); + return parallel::allReduceSum(local_sum); }(); - REQUIRE(sum(face_value) == global_number_of_faces * data); + REQUIRE(sum(face_value) == global_sum); + } + + SECTION(named_mesh.name() + "for R2 data") + { + auto mesh_2d = named_mesh.mesh(); + + const Connectivity<2>& connectivity = mesh_2d->connectivity(); + + auto ll = MeshDataManager::instance().getMeshData(*mesh_2d).ll(); + + using R2x3 = TinyVector<2, double>; + FaceValue<R2x3> face_value{connectivity}; + + auto face_is_owned = connectivity.faceIsOwned(); + + size_t nb_owned_faces = 0; + for (FaceId face_id = 0; face_id < connectivity.numberOfFaces(); ++face_id) { + nb_owned_faces += face_is_owned[face_id]; + } + + parallel_for( + connectivity.numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_value[face_id] = R2x3{ll[face_id], 1. / ll[face_id]}; + }); + + Array<R2x3> local_array(nb_owned_faces); + { + size_t index = 0; + for (FaceId face_id = 0; face_id < connectivity.numberOfFaces(); ++face_id) { + if (face_is_owned[face_id]) { + local_array[index++] = face_value[face_id]; + } + } + } + Array global_array = parallel::allGatherVariable(local_array); + + REQUIRE(sum(face_value) == sum(global_array)); } } } @@ -369,6 +483,48 @@ TEST_CASE("ItemValueUtils", "[mesh]") REQUIRE(sum(node_value) == global_number_of_nodes * data); } + + SECTION(named_mesh.name() + "for R^2x3 data") + { + auto mesh_3d = named_mesh.mesh(); + + const Connectivity<3>& connectivity = mesh_3d->connectivity(); + + auto xr = mesh_3d->xr(); + + using R2x3 = TinyMatrix<2, 3, double>; + NodeValue<R2x3> node_value{connectivity}; + + auto node_is_owned = connectivity.nodeIsOwned(); + + size_t nb_owned_nodes = 0; + for (NodeId node_id = 0; node_id < connectivity.numberOfNodes(); ++node_id) { + nb_owned_nodes += node_is_owned[node_id]; + } + + parallel_for( + connectivity.numberOfNodes(), PUGS_LAMBDA(NodeId node_id) { + node_value[node_id] = R2x3{xr[node_id][0], + l2Norm(xr[node_id]), + 3.178 * xr[node_id][0] - 3 * xr[node_id][2], + xr[node_id][1], + xr[node_id][2] + xr[node_id][0], + xr[node_id][0] * xr[node_id][1] * xr[node_id][2]}; + }); + + Array<R2x3> local_array(nb_owned_nodes); + { + size_t index = 0; + for (NodeId node_id = 0; node_id < connectivity.numberOfNodes(); ++node_id) { + if (node_is_owned[node_id]) { + local_array[index++] = node_value[node_id]; + } + } + } + Array global_array = parallel::allGatherVariable(local_array); + + REQUIRE(sum(node_value) == sum(global_array)); + } } } } diff --git a/tests/test_SubItemArrayPerItemUtils.cpp b/tests/test_SubItemArrayPerItemUtils.cpp index e534ec34600ef526336249a4b7d1cfc794721827..d36829250ce4a5e9b88c7015504c1ffb68362f96 100644 --- a/tests/test_SubItemArrayPerItemUtils.cpp +++ b/tests/test_SubItemArrayPerItemUtils.cpp @@ -6,6 +6,8 @@ #include <algebra/TinyVector.hpp> #include <mesh/Connectivity.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <mesh/SubItemArrayPerItem.hpp> #include <mesh/SubItemArrayPerItemUtils.hpp> #include <utils/Messenger.hpp> @@ -368,6 +370,43 @@ TEST_CASE("SubItemArrayPerItemUtils", "[mesh]") REQUIRE(sum(node_array_per_face) == global_number_of_nodes_per_faces * node_array_per_face.sizeOfArrays() * data); } + + SECTION(named_mesh.name() + " for float data") + { + auto mesh_2d = named_mesh.mesh(); + + const Connectivity<2>& connectivity = mesh_2d->connectivity(); + + EdgeArrayPerCell<float> edge_array_per_cell{connectivity, 3}; + edge_array_per_cell.fill(1E10); + + auto cell_is_owned = connectivity.cellIsOwned(); + parallel_for( + connectivity.numberOfCells(), PUGS_LAMBDA(CellId cell_id) { + if (cell_is_owned[cell_id]) { + auto cell_table = edge_array_per_cell.itemTable(cell_id); + for (size_t i = 0; i < cell_table.numberOfRows(); ++i) { + for (size_t j = 0; j < cell_table.numberOfColumns(); ++j) { + cell_table(i, j) = 10 + parallel::rank() - i - j + 1.5; + } + } + } + }); + + CellValue<float> cell_sum(connectivity); + cell_sum.fill(0); + parallel_for( + connectivity.numberOfCells(), PUGS_LAMBDA(CellId cell_id) { + auto cell_table = edge_array_per_cell.itemTable(cell_id); + for (size_t i = 0; i < cell_table.numberOfRows(); ++i) { + for (size_t j = 0; j < cell_table.numberOfColumns(); ++j) { + cell_sum[cell_id] += cell_table(i, j); + } + } + }); + + REQUIRE(sum(edge_array_per_cell) == sum(cell_sum)); + } } } @@ -405,10 +444,10 @@ TEST_CASE("SubItemArrayPerItemUtils", "[mesh]") const Connectivity<3>& connectivity = mesh_3d->connectivity(); - using N2x3 = TinyMatrix<2, 3, size_t>; - EdgeArrayPerFace<N2x3> edge_array_per_face{connectivity, 3}; + using R2x3 = TinyMatrix<2, 3, size_t>; + EdgeArrayPerFace<R2x3> edge_array_per_face{connectivity, 3}; - const N2x3 data(1, 3, 4, 6, 2, 5); + const R2x3 data(1, 3, 4, 6, 2, 5); edge_array_per_face.fill(data); auto face_is_owned = connectivity.faceIsOwned(); @@ -424,6 +463,46 @@ TEST_CASE("SubItemArrayPerItemUtils", "[mesh]") REQUIRE(sum(edge_array_per_face) == global_number_of_edges_per_faces * edge_array_per_face.sizeOfArrays() * data); } + + SECTION(named_mesh.name() + " for R^2x3 data") + { + auto mesh_3d = named_mesh.mesh(); + + const Connectivity<3>& connectivity = mesh_3d->connectivity(); + + using R2x3 = TinyMatrix<2, 3, double>; + EdgeArrayPerFace<R2x3> edge_array_per_face{connectivity, 3}; + + auto Nl = MeshDataManager::instance().getMeshData(*mesh_3d).Nl(); + + parallel_for( + mesh_3d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + for (size_t i_edge = 0; i_edge < edge_array_per_face.numberOfSubArrays(face_id); ++i_edge) { + for (size_t i = 0; i < edge_array_per_face[face_id][i_edge].size(); ++i) { + R2x3 value((2.3 * i_edge + 1.7) * Nl[face_id][0], // + (3. - i_edge) * Nl[face_id][1], // + (2 * i_edge + 1.3) * Nl[face_id][2], // + (i_edge + 1.7) * Nl[face_id][0] + 3.2 * Nl[face_id][1], // + Nl[face_id][2] + 3 * i_edge, // + 7.13 * i_edge * i_edge * l2Norm(Nl[face_id])); + edge_array_per_face[face_id][i_edge][i] = (i + 1.3) * value; + } + } + }); + + FaceValue<R2x3> face_value{connectivity}; + face_value.fill(zero); + parallel_for( + mesh_3d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + for (size_t i_edge = 0; i_edge < edge_array_per_face.numberOfSubArrays(face_id); ++i_edge) { + for (size_t i = 0; i < edge_array_per_face[face_id][i_edge].size(); ++i) { + face_value[face_id] += edge_array_per_face[face_id][i_edge][i]; + } + } + }); + + REQUIRE(sum(edge_array_per_face) == sum(face_value)); + } } } } diff --git a/tests/test_SubItemValuePerItemUtils.cpp b/tests/test_SubItemValuePerItemUtils.cpp index aca9a14498e447b1cd6ffd21411f45e847b34ac5..353d7a9500a2cba3384ae22b8863f020d1c07361 100644 --- a/tests/test_SubItemValuePerItemUtils.cpp +++ b/tests/test_SubItemValuePerItemUtils.cpp @@ -6,6 +6,8 @@ #include <algebra/TinyVector.hpp> #include <mesh/Connectivity.hpp> #include <mesh/Mesh.hpp> +#include <mesh/MeshData.hpp> +#include <mesh/MeshDataManager.hpp> #include <mesh/SubItemValuePerItem.hpp> #include <mesh/SubItemValuePerItemUtils.hpp> #include <utils/Messenger.hpp> @@ -325,29 +327,34 @@ TEST_CASE("SubItemValuePerItemUtils", "[mesh]") REQUIRE(sum(node_value_per_face) == 2 * global_number_of_nodes_per_faces); } - SECTION(named_mesh.name() + " for N^2 data") + SECTION(named_mesh.name() + " for R^2 data") { auto mesh_2d = named_mesh.mesh(); const Connectivity<2>& connectivity = mesh_2d->connectivity(); - using N2 = TinyVector<2, size_t>; - NodeValuePerFace<N2> node_value_per_face{connectivity}; + using R2 = TinyVector<2, double>; + NodeValuePerFace<R2> node_value_per_face{connectivity}; - const N2 data(3, 2); - node_value_per_face.fill(data); + auto Nl = MeshDataManager::instance().getMeshData(*mesh_2d).Nl(); - auto face_is_owned = connectivity.faceIsOwned(); + parallel_for( + mesh_2d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + for (size_t i_node = 0; i_node < node_value_per_face.numberOfSubValues(face_id); ++i_node) { + node_value_per_face[face_id][i_node] = (i_node + 1) * Nl[face_id]; + } + }); - const size_t global_number_of_nodes_per_faces = [&] { - size_t number_of_nodes_per_faces = 0; - for (FaceId face_id = 0; face_id < face_is_owned.numberOfItems(); ++face_id) { - number_of_nodes_per_faces += face_is_owned[face_id] * node_value_per_face.numberOfSubValues(face_id); - } - return parallel::allReduceSum(number_of_nodes_per_faces); - }(); + FaceValue<R2> face_value{connectivity}; + parallel_for( + mesh_2d->numberOfFaces(), PUGS_LAMBDA(FaceId face_id) { + face_value[face_id] = node_value_per_face[face_id][0]; + for (size_t i_node = 1; i_node < node_value_per_face.numberOfSubValues(face_id); ++i_node) { + face_value[face_id] += node_value_per_face[face_id][i_node]; + } + }); - REQUIRE(sum(node_value_per_face) == global_number_of_nodes_per_faces * data); + REQUIRE(sum(node_value_per_face) == sum(face_value)); } } } @@ -403,6 +410,42 @@ TEST_CASE("SubItemValuePerItemUtils", "[mesh]") REQUIRE(sum(edge_value_per_face) == global_number_of_edges_per_faces * data); } + + SECTION(named_mesh.name() + " for R^3x2 float") + { + auto mesh_3d = named_mesh.mesh(); + + const Connectivity<3>& connectivity = mesh_3d->connectivity(); + + auto xr = mesh_3d->xr(); + + using R3x2 = TinyMatrix<3, 2, float>; + EdgeValuePerNode<R3x2> edge_value_per_node{connectivity}; + + parallel_for( + mesh_3d->numberOfNodes(), PUGS_LAMBDA(NodeId node_id) { + for (size_t i_edge = 0; i_edge < edge_value_per_node.numberOfSubValues(node_id); ++i_edge) { + R3x2 value((2.3 * i_edge + 1.7) * xr[node_id][0], // + (3. - i_edge) * xr[node_id][1], // + (2 * i_edge + 1.3) * xr[node_id][2], // + (i_edge + 1.7) * xr[node_id][0] + 3.2 * xr[node_id][1], // + xr[node_id][2] + 3 * i_edge, // + 7.13 * i_edge * i_edge * l2Norm(xr[node_id])); + edge_value_per_node[node_id][i_edge] = value; + } + }); + + NodeValue<R3x2> node_value{connectivity}; + parallel_for( + mesh_3d->numberOfNodes(), PUGS_LAMBDA(NodeId node_id) { + node_value[node_id] = edge_value_per_node[node_id][0]; + for (size_t i_edge = 1; i_edge < edge_value_per_node.numberOfSubValues(node_id); ++i_edge) { + node_value[node_id] += edge_value_per_node[node_id][i_edge]; + } + }); + + REQUIRE(sum(edge_value_per_node) == sum(node_value)); + } } } }