Skip to content
Snippets Groups Projects
Commit e84e6de3 authored by Stéphane Del Pino's avatar Stéphane Del Pino
Browse files

Improve stat display (especially in parallel)

parent db0d26e7
No related branches found
No related tags found
1 merge request!175Print execution statistics at the end of execution
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
int int
main(int argc, char* argv[]) main(int argc, char* argv[])
{ {
ExecutionStatManager::create();
std::string filename = initialize(argc, argv); std::string filename = initialize(argc, argv);
SynchronizerManager::create(); SynchronizerManager::create();
...@@ -19,13 +21,10 @@ main(int argc, char* argv[]) ...@@ -19,13 +21,10 @@ main(int argc, char* argv[])
MeshDataManager::create(); MeshDataManager::create();
DualConnectivityManager::create(); DualConnectivityManager::create();
DualMeshManager::create(); DualMeshManager::create();
ExecutionStatManager::create();
parser(filename); parser(filename);
ExecutionStatManager::printInfo(); ExecutionStatManager::printInfo();
ExecutionStatManager::destroy();
DualMeshManager::destroy(); DualMeshManager::destroy();
DualConnectivityManager::destroy(); DualConnectivityManager::destroy();
MeshDataManager::destroy(); MeshDataManager::destroy();
...@@ -35,5 +34,7 @@ main(int argc, char* argv[]) ...@@ -35,5 +34,7 @@ main(int argc, char* argv[])
finalize(); finalize();
ExecutionStatManager::destroy();
return 0; return 0;
} }
...@@ -12,29 +12,63 @@ ExecutionStatManager* ExecutionStatManager::m_instance = nullptr; ...@@ -12,29 +12,63 @@ ExecutionStatManager* ExecutionStatManager::m_instance = nullptr;
void void
ExecutionStatManager::_printMaxResidentMemory() const ExecutionStatManager::_printMaxResidentMemory() const
{ {
const std::vector<std::string> units = {"B", "KB", "MB", "GB", "TB", "PB", "EB"}; class Memory
{
private:
double m_value;
double count = [] { public:
rusage u; PUGS_INLINE const double&
getrusage(RUSAGE_SELF, &u); value() const
return u.ru_maxrss * 1024; {
}(); return m_value;
}
std::string
prettyPrint() const
{
const std::vector<std::string> units = {"B", "KB", "MB", "GB", "TB", "PB", "EB"};
double local_memory = m_value;
size_t i_unit = 0; size_t i_unit = 0;
while ((count >= 1024) and (i_unit < units.size())) { while ((local_memory >= 1024) and (i_unit < units.size())) {
++i_unit; ++i_unit;
count /= 1024; local_memory /= 1024;
}
std::ostringstream os;
os << local_memory << units[i_unit];
return os.str();
} }
std::cout << rang::style::bold << "Maximum memory: " << rang::fgB::cyan << count << rang::style::reset Memory()
<< rang::style::bold << units[i_unit] << rang::style::reset << '\n'; {
rusage u;
getrusage(RUSAGE_SELF, &u);
m_value = u.ru_maxrss * 1024;
}
Memory(double value) : m_value{value} {}
};
Memory memory;
std::cout << "Memory: " << rang::style::bold << Memory{parallel::allReduceSum(memory.value())}.prettyPrint()
<< rang::style::reset;
if (parallel::size() > 1) {
std::cout << " Avg: " << rang::style::bold
<< Memory{parallel::allReduceSum(memory.value()) / parallel::size()}.prettyPrint() << rang::style::reset;
std::cout << " Min: " << rang::style::bold << Memory{parallel::allReduceMin(memory.value())}.prettyPrint()
<< rang::style::reset;
std::cout << " Max: " << rang::style::bold << Memory{parallel::allReduceMax(memory.value())}.prettyPrint()
<< rang::style::reset;
}
std::cout << '\n';
} }
void void
ExecutionStatManager::_printElapseTime() const ExecutionStatManager::_printElapseTime() const
{ {
std::cout << rang::style::bold << "Execution time: " << rang::fgB::cyan << m_instance->m_elapse_time.seconds() std::cout << "Execution: " << rang::style::bold << m_instance->m_elapse_time.seconds() << 's' << rang::style::reset
<< rang::style::reset << rang::style::bold << 's' << rang::style::reset << rang::fg::reset << '\n'; << '\n';
} }
void void
...@@ -43,24 +77,54 @@ ExecutionStatManager::_printTotalCPUTime() const ...@@ -43,24 +77,54 @@ ExecutionStatManager::_printTotalCPUTime() const
rusage u; rusage u;
getrusage(RUSAGE_SELF, &u); getrusage(RUSAGE_SELF, &u);
double total_cpu_time = u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6; const double total_cpu_time =
u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6;
std::cout << rang::style::bold << "Total CPU time: " << rang::fgB::cyan << total_cpu_time << rang::style::reset std::cout << "Total CPU: " << rang::style::bold << parallel::allReduceSum(total_cpu_time) << 's'
<< rang::style::bold << 's' << rang::style::reset << rang::fg::reset << '\n'; << rang::style::reset;
std::cout << " Using " << parallel::allReduceSum(Kokkos::DefaultHostExecutionSpace::concurrency()) std::cout << " (" << parallel::allReduceSum(Kokkos::DefaultHostExecutionSpace::concurrency()) << " threads over "
<< " threads distributed over " << parallel::size() << " processes" << parallel::size() << " processes)";
<< "\n"; if (total_cpu_time > 60) {
size_t seconds = std::floor(total_cpu_time);
const size_t days = seconds / (24 * 3600);
seconds -= days * (24 * 3600);
const size_t hours = seconds / 3600;
seconds -= hours * 3600;
const size_t minutes = seconds / 60;
seconds -= minutes * 60;
std::cout << " " << rang::style::bold;
bool print = false;
if (days > 0) {
print = true;
std::cout << days << "d" << ' ';
}
if (print or (hours > 0)) {
print = true;
std::cout << std::setw(2) << std::setfill('0') << hours << "h";
}
if (print or (minutes > 0)) {
print = true;
std::cout << std::setw(2) << std::setfill('0') << minutes << "mn";
}
if (print) {
std::cout << rang::style::bold << std::setw(2) << std::setfill('0') << seconds << "s";
}
std::cout << rang::style::reset;
}
std::cout << '\n';
} }
void void
ExecutionStatManager::printInfo() ExecutionStatManager::printInfo()
{ {
std::cout << "----------------- " << rang::fgB::green << "pugs exec stats" << rang::fg::reset if (ExecutionStatManager::getInstance().doPrint()) {
std::cout << "----------------- " << rang::fg::green << "pugs exec stats" << rang::fg::reset
<< " ---------------------\n"; << " ---------------------\n";
ExecutionStatManager::m_instance->_printElapseTime(); ExecutionStatManager::getInstance()._printElapseTime();
ExecutionStatManager::m_instance->_printTotalCPUTime(); ExecutionStatManager::getInstance()._printTotalCPUTime();
ExecutionStatManager::m_instance->_printMaxResidentMemory(); ExecutionStatManager::getInstance()._printMaxResidentMemory();
}
} }
void void
......
#ifndef EXECUTION_STAT_MANAGER_HPP #ifndef EXECUTION_STAT_MANAGER_HPP
#define EXECUTION_STAT_MANAGER_HPP #define EXECUTION_STAT_MANAGER_HPP
#include <utils/PugsAssert.hpp>
#include <utils/Timer.hpp> #include <utils/Timer.hpp>
class ExecutionStatManager class ExecutionStatManager
...@@ -9,6 +10,7 @@ class ExecutionStatManager ...@@ -9,6 +10,7 @@ class ExecutionStatManager
static ExecutionStatManager* m_instance; static ExecutionStatManager* m_instance;
Timer m_elapse_time; Timer m_elapse_time;
bool m_do_print = true;
void _printMaxResidentMemory() const; void _printMaxResidentMemory() const;
void _printElapseTime() const; void _printElapseTime() const;
...@@ -20,6 +22,28 @@ class ExecutionStatManager ...@@ -20,6 +22,28 @@ class ExecutionStatManager
~ExecutionStatManager() = default; ~ExecutionStatManager() = default;
public: public:
PUGS_INLINE
bool
doPrint() const
{
return m_do_print;
}
PUGS_INLINE
void
setPrint(bool do_print)
{
m_do_print = do_print;
}
PUGS_INLINE
static ExecutionStatManager&
getInstance()
{
Assert(m_instance != nullptr); // LCOV_EXCL_LINE
return *m_instance;
}
static void printInfo(); static void printInfo();
static void create(); static void create();
static void destroy(); static void destroy();
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <utils/BuildInfo.hpp> #include <utils/BuildInfo.hpp>
#include <utils/CommunicatorManager.hpp> #include <utils/CommunicatorManager.hpp>
#include <utils/ConsoleManager.hpp> #include <utils/ConsoleManager.hpp>
#include <utils/ExecutionStatManager.hpp>
#include <utils/FPEManager.hpp> #include <utils/FPEManager.hpp>
#include <utils/Messenger.hpp> #include <utils/Messenger.hpp>
#include <utils/PETScWrapper.hpp> #include <utils/PETScWrapper.hpp>
...@@ -110,6 +111,10 @@ initialize(int& argc, char* argv[]) ...@@ -110,6 +111,10 @@ initialize(int& argc, char* argv[])
bool show_preamble = true; bool show_preamble = true;
app.add_flag("--preamble,!--no-preamble", show_preamble, "Show execution info preamble [default: true]"); app.add_flag("--preamble,!--no-preamble", show_preamble, "Show execution info preamble [default: true]");
bool print_exec_stat = true;
app.add_flag("--exec-stat,!--no-exec-stat", print_exec_stat,
"Display memory and CPU usage after execution [default: true]");
bool show_backtrace = true; bool show_backtrace = true;
app.add_flag("-b,--backtrace,!--no-backtrace", show_backtrace, "Show backtrace on failure [default: true]"); app.add_flag("-b,--backtrace,!--no-backtrace", show_backtrace, "Show backtrace on failure [default: true]");
...@@ -141,6 +146,7 @@ initialize(int& argc, char* argv[]) ...@@ -141,6 +146,7 @@ initialize(int& argc, char* argv[])
CommunicatorManager::setSplitColor(mpi_split_color); CommunicatorManager::setSplitColor(mpi_split_color);
} }
ExecutionStatManager::getInstance().setPrint(print_exec_stat);
BacktraceManager::setShow(show_backtrace); BacktraceManager::setShow(show_backtrace);
ConsoleManager::setShowPreamble(show_preamble); ConsoleManager::setShowPreamble(show_preamble);
ConsoleManager::init(enable_color); ConsoleManager::init(enable_color);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment