diff --git a/src/utils/ExecutionStatManager.cpp b/src/utils/ExecutionStatManager.cpp index 6918d4d7acfc89bb12420b1540a240e1bcd026dc..d24d821b49439485e50c204181b7a6ec733643e4 100644 --- a/src/utils/ExecutionStatManager.cpp +++ b/src/utils/ExecutionStatManager.cpp @@ -103,10 +103,19 @@ void ExecutionStatManager::_printElapseTime() const { const double elapse_time = m_instance->m_elapse_time.seconds(); - std::cout << "Execution: " << rang::style::bold << m_instance->m_elapse_time.seconds() << 's' << rang::style::reset; + std::cout << "Execution: " << rang::style::bold << elapse_time << 's' << rang::style::reset; if (elapse_time > 60) { std::cout << " [" << rang::style::bold << this->_prettyPrintTime(elapse_time) << rang::style::reset << ']'; } + if (m_run_number > 1) { + const double cumulative_elapse_time = elapse_time + m_previous_cumulative_elapse_time; + std::cout << " (Run number " << m_run_number << ").\n - Cumulative execution time: " << rang::style::bold + << cumulative_elapse_time << 's' << rang::style::reset; + if (cumulative_elapse_time > 60) { + std::cout << " [" << rang::style::bold << this->_prettyPrintTime(cumulative_elapse_time) << rang::style::reset + << ']'; + } + } std::cout << '\n'; } @@ -117,15 +126,25 @@ ExecutionStatManager::_printTotalCPUTime() const getrusage(RUSAGE_SELF, &u); const double total_cpu_time = - u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6; + parallel::allReduceSum(u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6); - std::cout << "Total CPU: " << rang::style::bold << parallel::allReduceSum(total_cpu_time) << 's' - << rang::style::reset; + std::cout << "Total CPU: " << rang::style::bold << total_cpu_time << 's' << rang::style::reset; std::cout << " (" << parallel::allReduceSum(Kokkos::DefaultHostExecutionSpace::concurrency()) << " threads over " << parallel::size() << " processes)"; if (total_cpu_time > 60) { std::cout << " [" << _prettyPrintTime(total_cpu_time) << ']'; } + + if (m_run_number > 1) { + const double cumulative_total_cpu_time = total_cpu_time + m_previous_cumulative_total_cpu_time; + std::cout << "\n - Cumulative total CPU: " << rang::style::bold << cumulative_total_cpu_time << 's' + << rang::style::reset; + if (cumulative_total_cpu_time > 60) { + std::cout << " [" << rang::style::bold << this->_prettyPrintTime(cumulative_total_cpu_time) << rang::style::reset + << ']'; + } + } + std::cout << '\n'; } @@ -142,6 +161,24 @@ ExecutionStatManager::printInfo() } } +double +ExecutionStatManager::getCumulativeElapseTime() const +{ + return m_previous_cumulative_elapse_time + m_elapse_time.seconds(); +} + +double +ExecutionStatManager::getCumulativeTotalCPUTime() const +{ + rusage u; + getrusage(RUSAGE_SELF, &u); + + const double total_cpu_time = + u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6; + + return m_previous_cumulative_total_cpu_time + parallel::allReduceSum(total_cpu_time); +} + void ExecutionStatManager::create() { diff --git a/src/utils/ExecutionStatManager.hpp b/src/utils/ExecutionStatManager.hpp index 3738492a1741c7ee7a4e4a45edf4139b579a022e..75adce89bd5469df2cc60d6c8db6c220dc3fc85a 100644 --- a/src/utils/ExecutionStatManager.hpp +++ b/src/utils/ExecutionStatManager.hpp @@ -13,6 +13,10 @@ class ExecutionStatManager bool m_do_print = true; int m_exit_code = 0; + size_t m_run_number = 1; + double m_previous_cumulative_elapse_time = 0; + double m_previous_cumulative_total_cpu_time = 0; + std::string _prettyPrintTime(double seconds) const; void _printMaxResidentMemory() const; @@ -25,6 +29,24 @@ class ExecutionStatManager ~ExecutionStatManager() = default; public: + double getCumulativeElapseTime() const; + + double getCumulativeTotalCPUTime() const; + + PUGS_INLINE + void + setPreviousCumulativeElapseTime(double cumulative_elapse_time) + { + m_previous_cumulative_elapse_time = cumulative_elapse_time; + } + + PUGS_INLINE + void + setPreviousCumulativeTotalCPUTime(double cumulative_cpu_time) + { + m_previous_cumulative_total_cpu_time = cumulative_cpu_time; + } + PUGS_INLINE bool doPrint() const @@ -39,6 +61,20 @@ class ExecutionStatManager m_do_print = do_print; } + PUGS_INLINE + size_t + runNumber() const + { + return m_run_number; + } + + PUGS_INLINE + void + setRunNumber(size_t run_number) + { + m_run_number = run_number; + } + PUGS_INLINE int exitCode() const diff --git a/src/utils/checkpointing/Checkpoint.cpp b/src/utils/checkpointing/Checkpoint.cpp index e7325fa862033a5865ee9cddf8f545d485f68808..2598fa1c289eacbb7c5b577e090a1a3e2f278d44 100644 --- a/src/utils/checkpointing/Checkpoint.cpp +++ b/src/utils/checkpointing/Checkpoint.cpp @@ -15,6 +15,7 @@ #include <language/utils/ASTCheckpointsInfo.hpp> #include <utils/Exceptions.hpp> +#include <utils/ExecutionStatManager.hpp> #include <utils/checkpointing/ResumingManager.hpp> #ifdef PUGS_HAS_HDF5 @@ -63,13 +64,21 @@ checkpoint() checkpoint.createAttribute("data.pgs", ASTExecutionStack::getInstance().fileContent()); { - HighFive::Group random_seed = checkpoint.createGroup("singleton/random_seed"); - random_seed.createAttribute("current_seed", RandomEngine::instance().getCurrentSeed()); + HighFive::Group global_variables_group = checkpoint.createGroup("singleton/global_variables"); + global_variables_group.createAttribute("connectivity_id", GlobalVariableManager::instance().getConnectivityId()); + global_variables_group.createAttribute("mesh_id", GlobalVariableManager::instance().getMeshId()); } { - HighFive::Group global_variables = checkpoint.createGroup("singleton/global_variables"); - global_variables.createAttribute("connectivity_id", GlobalVariableManager::instance().getConnectivityId()); - global_variables.createAttribute("mesh_id", GlobalVariableManager::instance().getMeshId()); + HighFive::Group random_seed_group = checkpoint.createGroup("singleton/random_seed"); + random_seed_group.createAttribute("current_seed", RandomEngine::instance().getCurrentSeed()); + } + { + HighFive::Group execution_info_group = checkpoint.createGroup("singleton/execution_info"); + execution_info_group.createAttribute("run_number", ExecutionStatManager::getInstance().runNumber()); + execution_info_group.createAttribute("cumulative_elapse_time", + ExecutionStatManager::getInstance().getCumulativeElapseTime()); + execution_info_group.createAttribute("cumulative_total_cpu_time", + ExecutionStatManager::getInstance().getCumulativeTotalCPUTime()); } { std::cout << rang::fgB::magenta << "Checkpoint DualConnectivityManager NIY" << rang::fg::reset << '\n'; diff --git a/src/utils/checkpointing/Resume.cpp b/src/utils/checkpointing/Resume.cpp index c71f79eb8fa9603108cac6498c4070a50b9942b4..70c610ebe09b34ed8cd83caedaf00450ef912045 100644 --- a/src/utils/checkpointing/Resume.cpp +++ b/src/utils/checkpointing/Resume.cpp @@ -18,6 +18,7 @@ #ifdef PUGS_HAS_HDF5 #include <mesh/Connectivity.hpp> +#include <utils/ExecutionStatManager.hpp> #include <utils/RandomEngine.hpp> #include <utils/checkpointing/ResumeUtils.hpp> #include <utils/checkpointing/ResumingData.hpp> @@ -50,10 +51,21 @@ resume() << checkpoint.getAttribute("name").read<std::string>() << rang::fg::reset << "]\n"; { - HighFive::Group random_seed = checkpoint.getGroup("singleton/random_seed"); - RandomEngine::instance().setRandomSeed(random_seed.getAttribute("current_seed").read<uint64_t>()); + HighFive::Group random_seed_group = checkpoint.getGroup("singleton/random_seed"); + RandomEngine::instance().setRandomSeed(random_seed_group.getAttribute("current_seed").read<uint64_t>()); + } + { + HighFive::Group global_variables_group = checkpoint.getGroup("singleton/execution_info"); + const size_t run_number = global_variables_group.getAttribute("run_number").read<size_t>(); + const double cumulative_elapse_time = + global_variables_group.getAttribute("cumulative_elapse_time").read<double>(); + const double cumulative_total_cpu_time = + global_variables_group.getAttribute("cumulative_total_cpu_time").read<double>(); + + ExecutionStatManager::getInstance().setRunNumber(run_number + 1); + ExecutionStatManager::getInstance().setPreviousCumulativeElapseTime(cumulative_elapse_time); + ExecutionStatManager::getInstance().setPreviousCumulativeTotalCPUTime(cumulative_total_cpu_time); } - { std::cout << rang::fgB::magenta << "Resume DualConnectivityManager NIY" << rang::fg::reset << '\n'; std::cout << rang::fgB::magenta << "Resume DualMeshManager NIY" << rang::fg::reset << '\n';