From 3c7ad69612139567d5123b1d2f2d838d91e41e7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Del=20Pino?= <stephane.delpino44@gmail.com>
Date: Wed, 14 Jun 2023 11:45:03 +0200
Subject: [PATCH] Prepare/test reproductible summations

---
 tests/test_Array.cpp | 210 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 210 insertions(+)

diff --git a/tests/test_Array.cpp b/tests/test_Array.cpp
index f7d121df0..0b71a75b5 100644
--- a/tests/test_Array.cpp
+++ b/tests/test_Array.cpp
@@ -14,6 +14,151 @@
 #include <valarray>
 #include <vector>
 
+#include <utils/SmallArray.hpp>
+#include <utils/Timer.hpp>
+
+template <typename DataType>
+class ArrayReproductibleSum
+{
+ private:
+  static constexpr double eps = std::numeric_limits<DataType>::epsilon();
+
+  static constexpr size_t W = 40;
+  static constexpr size_t K = 3;
+
+  static consteval size_t
+  _getNB()
+  {
+    return std::floor((1 / eps) * std::pow(2., -2. - W));
+  }
+
+  static constexpr size_t NB = _getNB();
+
+  Array<const DataType> m_array;
+
+  PUGS_INLINE DataType
+  ulp(const DataType& x)
+  {
+    static_assert(std::is_floating_point_v<DataType>, "expecting floating point value");
+
+    if (x == 0) {
+      return std::numeric_limits<DataType>::denorm_min();
+    }
+
+    return std::pow(DataType{2}, std::ilogb(std::abs(x)) - std::numeric_limits<DataType>::digits);
+  };
+
+  PUGS_INLINE DataType
+  ufp(const DataType& x)
+  {
+    static_assert(std::is_floating_point_v<DataType>, "expecting floating point value");
+
+    return std::pow(DataType{2}, std::ilogb(std::abs(x)));
+  };
+
+ public:
+  operator DataType()
+  {
+    auto local_max = [](const auto& v, const size_t lB) {
+      DataType m = std::abs(v[0]);
+      for (size_t j = 1; j < lB; ++j) {
+        const DataType& abs_vj = std::abs(v[j]);
+        if (m < abs_vj) {
+          m = abs_vj;
+        }
+      }
+      return m;
+    };
+
+    auto update = [&](const DataType& m, auto& S, auto& C) {
+      if (m >= std::pow(DataType{2}, W - 1.) * ulp(S[0])) {
+        const size_t g = 1 + std::floor(std::log2(m / (std::pow(DataType{2}, W - 1.) * ulp(S[0]))) / W);
+
+        for (size_t k = K - 1; k >= g; --k) {
+          S[k] = S[k - g];
+          C[k] = C[k - g];
+        }
+
+        for (size_t k = 0; k < std::min(K, g); ++k) {
+          S[k] = 1.5 * std::pow(DataType{2}, g * W) * ufp(S[k]);
+          C[k] = 0;
+        }
+      }
+    };
+
+    auto split2 = [](DataType& S, DataType& x) {
+      union
+      {
+        static_assert(sizeof(DataType) == sizeof(unsigned long));
+        DataType as_DataType;
+        unsigned long as_long;
+      } x_bar;
+
+      x_bar.as_DataType = x;
+      x_bar.as_long |= 0x1;
+
+      const DataType S0 = S;
+      S += x_bar.as_DataType;
+      x -= S - S0;
+    };
+
+    auto extract_vector3 = [&](DataType& S, auto& v, const size_t lB) {
+      for (size_t i = 0; i < lB; ++i) {
+        split2(S, v[i]);
+      }
+    };
+
+    auto renormalize = [&](auto& S, auto& C) {
+      for (size_t k = 0; k < K; ++k) {
+        if (S[k] >= 1.75 * ufp(S[k])) {
+          S[k] -= 0.25 * ufp(S[k]);
+          C[k] += 1;
+        } else if (S[k] < 1.25 * ufp(S[k])) {
+          S[k] += 0.5 * ufp(S[k]);
+          C[k] -= 2;
+        } else if (S[k] < 1.5 * ufp(S[k])) {
+          S[k] += 0.25 * ufp(S[k]);
+          C[k] -= 1;
+        }
+      }
+    };
+
+    TinyVector<K, DataType> S;
+    for (size_t k = 0; k < K; ++k) {
+      S[k] = 0.75 * eps * std::pow(2., (K - k - 1) * W);
+    }
+
+    TinyVector<K, DataType> C = zero;
+
+    Array<DataType> local_array(NB);
+    for (size_t i = 0; i < m_array.size(); i += NB) {
+      const size_t lB = std::min(NB, m_array.size() - i);
+
+      std::copy_n(&(m_array[i]), lB, &(local_array[0]));
+
+      const DataType m = local_max(local_array, lB);
+
+      update(m, S, C);
+
+      for (size_t k = 0; k < K; ++k) {
+        extract_vector3(S[k], local_array, lB);
+      }
+
+      renormalize(S, C);
+    }
+
+    DataType sum = 0;
+    for (size_t k = 0; k < S.dimension(); ++k) {
+      sum += 0.25 * (C[k] - 6) * ufp(S[k]) + S[k];
+    }
+
+    return sum;
+  };
+
+  ArrayReproductibleSum(Array<DataType> array) : m_array{array} {}
+  ~ArrayReproductibleSum() = default;
+};
+
 // Instantiate to ensure full coverage is performed
 template class Array<int>;
 
@@ -319,6 +464,71 @@ TEST_CASE("Array", "[utils]")
     }
   }
 
+  SECTION("reproducible floating point sum 2023")
+  {
+    std::clog.precision(16);
+
+    std::clog << "***** REP 2023 *****\n";
+
+    Array<double> array(10'000'000);
+    for (size_t i = 0; i < array.size(); ++i) {
+      array[i] = ((i + 1) % 100'000) * std::sin(3 * i + 1);
+    }
+
+    Timer t_direct_sum1;
+    double direct_sum1 = 0;
+    for (size_t i = 0; i < array.size(); ++i) {
+      direct_sum1 += array[i];
+    }
+    t_direct_sum1.pause();
+
+    Timer t_rsum1;
+    const double reprod_sum1 = ArrayReproductibleSum(array);
+    t_rsum1.pause();
+
+    std::clog << "# shuffling ...\n" << std::flush;
+    Timer t_shuffle;
+    std::random_shuffle(&(array[0]), &(array[0]) + array.size());
+    t_shuffle.pause();
+    std::clog << "  shuffling done\n" << std::flush;
+
+    Timer t_direct_sum2;
+    double direct_sum2 = 0;
+    for (size_t i = 0; i < array.size(); ++i) {
+      direct_sum2 += array[i];
+    }
+    t_direct_sum2.pause();
+
+    Timer t_rsum2;
+    const double reprod_sum2 = ArrayReproductibleSum(array);
+    t_rsum2.pause();
+
+    std::clog << "---------\n";
+    std::clog << "direct sum1 =" << direct_sum1 << '\n';
+    std::clog << "direct sum2 =" << direct_sum2 << '\n';
+    std::clog << "reprod sum1 = " << reprod_sum1 << '\n';
+    std::clog << "reprod sum2 = " << reprod_sum2 << '\n';
+    std::clog << "---------\n";
+    std::clog << "delta direct =" << direct_sum1 - direct_sum2 << '\n';
+    std::clog << "delta reprod =" << reprod_sum1 - reprod_sum2 << '\n';
+    std::clog << "---------\n";
+    std::clog << "[t shuffle " << t_shuffle << "]\n";
+    std::clog << "---------\n";
+    std::clog << "[t dsum : " << t_direct_sum1 << " efficiency = " << t_direct_sum1.seconds() / t_direct_sum1.seconds()
+              << "]\n";
+    std::clog << "[t rsum : " << t_rsum1 << " efficiency = " << t_rsum1.seconds() / t_direct_sum1.seconds() << "]\n";
+    std::clog << "---------\n";
+    std::clog << "[t dsum2: " << t_direct_sum2 << " efficiency = " << t_direct_sum2.seconds() / t_direct_sum2.seconds()
+              << "]\n";
+    std::clog << "[t rsum2: " << t_rsum2 << " efficiency = " << t_rsum2.seconds() / t_direct_sum2.seconds() << "]\n";
+
+    double sum_new = ArrayReproductibleSum(array);
+
+    std::clog << "sum_new = " << sum_new << '\n';
+
+    REQUIRE(reprod_sum1 == reprod_sum2);
+  }
+
   SECTION("checking for subArrayView")
   {
     Array<int> array{10};
-- 
GitLab