Skip to content
Snippets Groups Projects
Commit 3c7ad696 authored by Stéphane Del Pino's avatar Stéphane Del Pino
Browse files

Prepare/test reproductible summations

parent 5411697c
Branches
Tags
1 merge request!172Reproducible summation of floating point arrays
...@@ -14,6 +14,151 @@ ...@@ -14,6 +14,151 @@
#include <valarray> #include <valarray>
#include <vector> #include <vector>
#include <utils/SmallArray.hpp>
#include <utils/Timer.hpp>
template <typename DataType>
class ArrayReproductibleSum
{
private:
static constexpr double eps = std::numeric_limits<DataType>::epsilon();
static constexpr size_t W = 40;
static constexpr size_t K = 3;
static consteval size_t
_getNB()
{
return std::floor((1 / eps) * std::pow(2., -2. - W));
}
static constexpr size_t NB = _getNB();
Array<const DataType> m_array;
PUGS_INLINE DataType
ulp(const DataType& x)
{
static_assert(std::is_floating_point_v<DataType>, "expecting floating point value");
if (x == 0) {
return std::numeric_limits<DataType>::denorm_min();
}
return std::pow(DataType{2}, std::ilogb(std::abs(x)) - std::numeric_limits<DataType>::digits);
};
PUGS_INLINE DataType
ufp(const DataType& x)
{
static_assert(std::is_floating_point_v<DataType>, "expecting floating point value");
return std::pow(DataType{2}, std::ilogb(std::abs(x)));
};
public:
operator DataType()
{
auto local_max = [](const auto& v, const size_t lB) {
DataType m = std::abs(v[0]);
for (size_t j = 1; j < lB; ++j) {
const DataType& abs_vj = std::abs(v[j]);
if (m < abs_vj) {
m = abs_vj;
}
}
return m;
};
auto update = [&](const DataType& m, auto& S, auto& C) {
if (m >= std::pow(DataType{2}, W - 1.) * ulp(S[0])) {
const size_t g = 1 + std::floor(std::log2(m / (std::pow(DataType{2}, W - 1.) * ulp(S[0]))) / W);
for (size_t k = K - 1; k >= g; --k) {
S[k] = S[k - g];
C[k] = C[k - g];
}
for (size_t k = 0; k < std::min(K, g); ++k) {
S[k] = 1.5 * std::pow(DataType{2}, g * W) * ufp(S[k]);
C[k] = 0;
}
}
};
auto split2 = [](DataType& S, DataType& x) {
union
{
static_assert(sizeof(DataType) == sizeof(unsigned long));
DataType as_DataType;
unsigned long as_long;
} x_bar;
x_bar.as_DataType = x;
x_bar.as_long |= 0x1;
const DataType S0 = S;
S += x_bar.as_DataType;
x -= S - S0;
};
auto extract_vector3 = [&](DataType& S, auto& v, const size_t lB) {
for (size_t i = 0; i < lB; ++i) {
split2(S, v[i]);
}
};
auto renormalize = [&](auto& S, auto& C) {
for (size_t k = 0; k < K; ++k) {
if (S[k] >= 1.75 * ufp(S[k])) {
S[k] -= 0.25 * ufp(S[k]);
C[k] += 1;
} else if (S[k] < 1.25 * ufp(S[k])) {
S[k] += 0.5 * ufp(S[k]);
C[k] -= 2;
} else if (S[k] < 1.5 * ufp(S[k])) {
S[k] += 0.25 * ufp(S[k]);
C[k] -= 1;
}
}
};
TinyVector<K, DataType> S;
for (size_t k = 0; k < K; ++k) {
S[k] = 0.75 * eps * std::pow(2., (K - k - 1) * W);
}
TinyVector<K, DataType> C = zero;
Array<DataType> local_array(NB);
for (size_t i = 0; i < m_array.size(); i += NB) {
const size_t lB = std::min(NB, m_array.size() - i);
std::copy_n(&(m_array[i]), lB, &(local_array[0]));
const DataType m = local_max(local_array, lB);
update(m, S, C);
for (size_t k = 0; k < K; ++k) {
extract_vector3(S[k], local_array, lB);
}
renormalize(S, C);
}
DataType sum = 0;
for (size_t k = 0; k < S.dimension(); ++k) {
sum += 0.25 * (C[k] - 6) * ufp(S[k]) + S[k];
}
return sum;
};
ArrayReproductibleSum(Array<DataType> array) : m_array{array} {}
~ArrayReproductibleSum() = default;
};
// Instantiate to ensure full coverage is performed // Instantiate to ensure full coverage is performed
template class Array<int>; template class Array<int>;
...@@ -319,6 +464,71 @@ TEST_CASE("Array", "[utils]") ...@@ -319,6 +464,71 @@ TEST_CASE("Array", "[utils]")
} }
} }
SECTION("reproducible floating point sum 2023")
{
std::clog.precision(16);
std::clog << "***** REP 2023 *****\n";
Array<double> array(10'000'000);
for (size_t i = 0; i < array.size(); ++i) {
array[i] = ((i + 1) % 100'000) * std::sin(3 * i + 1);
}
Timer t_direct_sum1;
double direct_sum1 = 0;
for (size_t i = 0; i < array.size(); ++i) {
direct_sum1 += array[i];
}
t_direct_sum1.pause();
Timer t_rsum1;
const double reprod_sum1 = ArrayReproductibleSum(array);
t_rsum1.pause();
std::clog << "# shuffling ...\n" << std::flush;
Timer t_shuffle;
std::random_shuffle(&(array[0]), &(array[0]) + array.size());
t_shuffle.pause();
std::clog << " shuffling done\n" << std::flush;
Timer t_direct_sum2;
double direct_sum2 = 0;
for (size_t i = 0; i < array.size(); ++i) {
direct_sum2 += array[i];
}
t_direct_sum2.pause();
Timer t_rsum2;
const double reprod_sum2 = ArrayReproductibleSum(array);
t_rsum2.pause();
std::clog << "---------\n";
std::clog << "direct sum1 =" << direct_sum1 << '\n';
std::clog << "direct sum2 =" << direct_sum2 << '\n';
std::clog << "reprod sum1 = " << reprod_sum1 << '\n';
std::clog << "reprod sum2 = " << reprod_sum2 << '\n';
std::clog << "---------\n";
std::clog << "delta direct =" << direct_sum1 - direct_sum2 << '\n';
std::clog << "delta reprod =" << reprod_sum1 - reprod_sum2 << '\n';
std::clog << "---------\n";
std::clog << "[t shuffle " << t_shuffle << "]\n";
std::clog << "---------\n";
std::clog << "[t dsum : " << t_direct_sum1 << " efficiency = " << t_direct_sum1.seconds() / t_direct_sum1.seconds()
<< "]\n";
std::clog << "[t rsum : " << t_rsum1 << " efficiency = " << t_rsum1.seconds() / t_direct_sum1.seconds() << "]\n";
std::clog << "---------\n";
std::clog << "[t dsum2: " << t_direct_sum2 << " efficiency = " << t_direct_sum2.seconds() / t_direct_sum2.seconds()
<< "]\n";
std::clog << "[t rsum2: " << t_rsum2 << " efficiency = " << t_rsum2.seconds() / t_direct_sum2.seconds() << "]\n";
double sum_new = ArrayReproductibleSum(array);
std::clog << "sum_new = " << sum_new << '\n';
REQUIRE(reprod_sum1 == reprod_sum2);
}
SECTION("checking for subArrayView") SECTION("checking for subArrayView")
{ {
Array<int> array{10}; Array<int> array{10};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment