diff --git a/tests/test_Array.cpp b/tests/test_Array.cpp index 0b71a75b5ccc87f9f3197efd89cb5001bed02f46..e999cb06d8a4ab0302c1a40896950cd93c4640d4 100644 --- a/tests/test_Array.cpp +++ b/tests/test_Array.cpp @@ -59,10 +59,10 @@ class ArrayReproductibleSum public: operator DataType() { - auto local_max = [](const auto& v, const size_t lB) { - DataType m = std::abs(v[0]); + auto local_max = [](const auto& v, const size_t i, const size_t lB) noexcept(NO_ASSERT) { + DataType m = std::abs(v[i]); for (size_t j = 1; j < lB; ++j) { - const DataType& abs_vj = std::abs(v[j]); + const DataType& abs_vj = std::abs(v[i + j]); if (m < abs_vj) { m = abs_vj; } @@ -70,7 +70,7 @@ class ArrayReproductibleSum return m; }; - auto update = [&](const DataType& m, auto& S, auto& C) { + auto update = [&](const DataType& m, auto& S, auto& C) noexcept(NO_ASSERT) { if (m >= std::pow(DataType{2}, W - 1.) * ulp(S[0])) { const size_t g = 1 + std::floor(std::log2(m / (std::pow(DataType{2}, W - 1.) * ulp(S[0]))) / W); @@ -86,7 +86,7 @@ class ArrayReproductibleSum } }; - auto split2 = [](DataType& S, DataType& x) { + auto split2 = [](DataType& S, DataType& x) noexcept(NO_ASSERT) { union { static_assert(sizeof(DataType) == sizeof(unsigned long)); @@ -102,13 +102,13 @@ class ArrayReproductibleSum x -= S - S0; }; - auto extract_vector3 = [&](DataType& S, auto& v, const size_t lB) { - for (size_t i = 0; i < lB; ++i) { - split2(S, v[i]); + auto extract_vector3 = [&](DataType& S, auto& v, size_t i, const size_t lB) noexcept(NO_ASSERT) { + for (size_t j = 0; j < lB; ++j) { + split2(S, v[i + j]); } }; - auto renormalize = [&](auto& S, auto& C) { + auto renormalize = [&](auto& S, auto& C) noexcept(NO_ASSERT) { for (size_t k = 0; k < K; ++k) { if (S[k] >= 1.75 * ufp(S[k])) { S[k] -= 0.25 * ufp(S[k]); @@ -130,18 +130,19 @@ class ArrayReproductibleSum TinyVector<K, DataType> C = zero; - Array<DataType> local_array(NB); + // Array<DataType> local_array(NB); + Array<DataType> local_array = copy(m_array); for (size_t i = 0; i < m_array.size(); i += NB) { const size_t lB = std::min(NB, m_array.size() - i); - std::copy_n(&(m_array[i]), lB, &(local_array[0])); + // std::copy_n(&(m_array[i]), lB, &(local_array[i])); - const DataType m = local_max(local_array, lB); + const DataType m = local_max(local_array, i, lB); update(m, S, C); for (size_t k = 0; k < K; ++k) { - extract_vector3(S[k], local_array, lB); + extract_vector3(S[k], local_array, i, lB); } renormalize(S, C); @@ -472,14 +473,11 @@ TEST_CASE("Array", "[utils]") Array<double> array(10'000'000); for (size_t i = 0; i < array.size(); ++i) { - array[i] = ((i + 1) % 100'000) * std::sin(3 * i + 1); + array[i] = 1E25 * ((i + 1) % 1'000'000) * std::sin(3 * i + 1); } Timer t_direct_sum1; - double direct_sum1 = 0; - for (size_t i = 0; i < array.size(); ++i) { - direct_sum1 += array[i]; - } + double direct_sum1 = sum(array); t_direct_sum1.pause(); Timer t_rsum1; @@ -493,10 +491,7 @@ TEST_CASE("Array", "[utils]") std::clog << " shuffling done\n" << std::flush; Timer t_direct_sum2; - double direct_sum2 = 0; - for (size_t i = 0; i < array.size(); ++i) { - direct_sum2 += array[i]; - } + double direct_sum2 = sum(array); t_direct_sum2.pause(); Timer t_rsum2; diff --git a/tests/test_main.cpp b/tests/test_main.cpp index 0e334f92c8a73ca526e905c1c5d6e82236f2d1ea..084d9b9453cb683586ce8f9a6d7b72678bcd3aaa 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -21,7 +21,7 @@ int main(int argc, char* argv[]) { parallel::Messenger::create(argc, argv); - const int nb_threads = std::max(std::thread::hardware_concurrency(), 1u); + const int nb_threads = std::max(std::thread::hardware_concurrency() / 2, 1u); { Kokkos::InitializationSettings args;