From b4746f6d5b8ebbe1267e526678a30d0396238021 Mon Sep 17 00:00:00 2001 From: Stephane Del Pino <stephane.delpino44@gmail.com> Date: Wed, 5 Jun 2024 08:24:21 +0200 Subject: [PATCH] Remove deprecated checkpoints when not resuming from last checkpoint This happens when manually changing the resuming checkpoint. Let n be the number of the last checkpoint and i be the number of the resuming checkpoint. If i!=n, then the checkpoints i+1, i+2, ... and n are removed when one writes the checkpoint i+1. --- src/utils/checkpointing/Checkpoint.cpp | 24 +++++++++++++++++------- src/utils/checkpointing/Resume.cpp | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/utils/checkpointing/Checkpoint.cpp b/src/utils/checkpointing/Checkpoint.cpp index e9fe8ccd2..ffd8c1c46 100644 --- a/src/utils/checkpointing/Checkpoint.cpp +++ b/src/utils/checkpointing/Checkpoint.cpp @@ -51,6 +51,20 @@ checkpoint() << rang::fgB::yellow << ASTExecutionStack::getInstance().currentNode().begin().line << rang::fg::reset << " [using " << rang::fgB::cyan << checkpoint_name << rang::fg::reset << "]\n"; + if (checkpoint_number > 0) { + uint64_t resuming_checkpoint_number = + file.getGroup("resuming_checkpoint").getAttribute("checkpoint_number").read<uint64_t>(); + uint64_t last_checkpoint_number = + file.getGroup("last_checkpoint").getAttribute("checkpoint_number").read<uint64_t>(); + for (uint64_t i = resuming_checkpoint_number + 1; i <= last_checkpoint_number; ++i) { + std::string old_checkpoint_name = "checkpoint_" + std::to_string(i); + std::cout << " - removing deprecated " << rang::fgB::yellow << old_checkpoint_name << rang::fg::reset << '\n'; + if (file.exist(old_checkpoint_name)) { + file.unlink(old_checkpoint_name); + } + } + } + HighFive::Group checkpoint = file.createGroup(checkpoint_name); uint64_t checkpoint_id = @@ -68,6 +82,9 @@ checkpoint() checkpoint.createAttribute("id", checkpoint_id); checkpoint.createAttribute("data.pgs", ASTExecutionStack::getInstance().fileContent()); + checkpoint.createAttribute("checkpoint_number", checkpoint_number); + ++checkpoint_number; + { HighFive::Group global_variables_group = checkpoint.createGroup("singleton/global_variables"); global_variables_group.createAttribute("connectivity_id", GlobalVariableManager::instance().getConnectivityId()); @@ -209,13 +226,6 @@ checkpoint() file.unlink("resuming_checkpoint"); } file.createHardLink("resuming_checkpoint", checkpoint); - - if (file.hasAttribute("checkpoint_number")) { - file.deleteAttribute("checkpoint_number"); - } - file.createAttribute("checkpoint_number", checkpoint_number); - - ++checkpoint_number; } catch (HighFive::Exception& e) { throw NormalError(e.what()); diff --git a/src/utils/checkpointing/Resume.cpp b/src/utils/checkpointing/Resume.cpp index a4991f10e..dbb539771 100644 --- a/src/utils/checkpointing/Resume.cpp +++ b/src/utils/checkpointing/Resume.cpp @@ -37,7 +37,7 @@ resume() ResumingManager& resuming_manager = ResumingManager::getInstance(); - resuming_manager.checkpointNumber() = file.getAttribute("checkpoint_number").read<uint64_t>() + 1; + resuming_manager.checkpointNumber() = checkpoint.getAttribute("checkpoint_number").read<uint64_t>() + 1; std::cout << " * " << rang::fgB::green << "Resuming " << rang::fg::reset << "execution at line " << rang::fgB::yellow << p_node->begin().line << rang::fg::reset << " [using " << rang::fgB::cyan -- GitLab