diff --git a/src/main.cpp b/src/main.cpp
index bcd10541d725c89b347ea5f62e22b44046cf91f4..53c63326b6702809dcf89127b119d9f377ca68fe 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -11,6 +11,8 @@
 int
 main(int argc, char* argv[])
 {
+  ExecutionStatManager::create();
+
   std::string filename = initialize(argc, argv);
 
   SynchronizerManager::create();
@@ -19,13 +21,10 @@ main(int argc, char* argv[])
   MeshDataManager::create();
   DualConnectivityManager::create();
   DualMeshManager::create();
-  ExecutionStatManager::create();
 
   parser(filename);
-
   ExecutionStatManager::printInfo();
 
-  ExecutionStatManager::destroy();
   DualMeshManager::destroy();
   DualConnectivityManager::destroy();
   MeshDataManager::destroy();
@@ -35,5 +34,7 @@ main(int argc, char* argv[])
 
   finalize();
 
+  ExecutionStatManager::destroy();
+
   return 0;
 }
diff --git a/src/utils/ExecutionStatManager.cpp b/src/utils/ExecutionStatManager.cpp
index 0ea87c550d744cd0e33e11d0a3c71b47a686ffcb..ec018d00b1e8e31c0efc89d237339a7fb8d1f1bb 100644
--- a/src/utils/ExecutionStatManager.cpp
+++ b/src/utils/ExecutionStatManager.cpp
@@ -12,29 +12,63 @@ ExecutionStatManager* ExecutionStatManager::m_instance = nullptr;
 void
 ExecutionStatManager::_printMaxResidentMemory() const
 {
-  const std::vector<std::string> units = {"B", "KB", "MB", "GB", "TB", "PB", "EB"};
-
-  double count = [] {
-    rusage u;
-    getrusage(RUSAGE_SELF, &u);
-    return u.ru_maxrss * 1024;
-  }();
-
-  size_t i_unit = 0;
-  while ((count >= 1024) and (i_unit < units.size())) {
-    ++i_unit;
-    count /= 1024;
-  }
+  class Memory
+  {
+   private:
+    double m_value;
+
+   public:
+    PUGS_INLINE const double&
+    value() const
+    {
+      return m_value;
+    }
+
+    std::string
+    prettyPrint() const
+    {
+      const std::vector<std::string> units = {"B", "KB", "MB", "GB", "TB", "PB", "EB"};
+
+      double local_memory = m_value;
+      size_t i_unit       = 0;
+      while ((local_memory >= 1024) and (i_unit < units.size())) {
+        ++i_unit;
+        local_memory /= 1024;
+      }
+      std::ostringstream os;
+      os << local_memory << units[i_unit];
+      return os.str();
+    }
+
+    Memory()
+    {
+      rusage u;
+      getrusage(RUSAGE_SELF, &u);
+      m_value = u.ru_maxrss * 1024;
+    }
 
-  std::cout << rang::style::bold << "Maximum memory: " << rang::fgB::cyan << count << rang::style::reset
-            << rang::style::bold << units[i_unit] << rang::style::reset << '\n';
+    Memory(double value) : m_value{value} {}
+  };
+
+  Memory memory;
+  std::cout << "Memory: " << rang::style::bold << Memory{parallel::allReduceSum(memory.value())}.prettyPrint()
+            << rang::style::reset;
+  if (parallel::size() > 1) {
+    std::cout << " Avg: " << rang::style::bold
+              << Memory{parallel::allReduceSum(memory.value()) / parallel::size()}.prettyPrint() << rang::style::reset;
+    std::cout << " Min: " << rang::style::bold << Memory{parallel::allReduceMin(memory.value())}.prettyPrint()
+              << rang::style::reset;
+    std::cout << " Max: " << rang::style::bold << Memory{parallel::allReduceMax(memory.value())}.prettyPrint()
+              << rang::style::reset;
+  }
+  std::cout << '\n';
 }
 
 void
 ExecutionStatManager::_printElapseTime() const
 {
-  std::cout << rang::style::bold << "Execution time: " << rang::fgB::cyan << m_instance->m_elapse_time.seconds()
-            << rang::style::reset << rang::style::bold << 's' << rang::style::reset << rang::fg::reset << '\n';
+  std::cout << "Execution: " << rang::style::bold << m_instance->m_elapse_time.seconds() << 's' << rang::style::reset
+            << '\n';
 }
 
 void
@@ -43,24 +77,54 @@ ExecutionStatManager::_printTotalCPUTime() const
   rusage u;
   getrusage(RUSAGE_SELF, &u);
 
-  double total_cpu_time = u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6;
+  const double total_cpu_time =
+    u.ru_utime.tv_sec + u.ru_stime.tv_sec + (u.ru_utime.tv_usec + u.ru_stime.tv_usec) * 1E-6;
 
-  std::cout << rang::style::bold << "Total CPU time: " << rang::fgB::cyan << total_cpu_time << rang::style::reset
-            << rang::style::bold << 's' << rang::style::reset << rang::fg::reset << '\n';
-  std::cout << " Using " << parallel::allReduceSum(Kokkos::DefaultHostExecutionSpace::concurrency())
-            << " threads distributed over " << parallel::size() << " processes"
-            << "\n";
+  std::cout << "Total CPU: " << rang::style::bold << parallel::allReduceSum(total_cpu_time) << 's'
+            << rang::style::reset;
+  std::cout << " (" << parallel::allReduceSum(Kokkos::DefaultHostExecutionSpace::concurrency()) << " threads over "
+            << parallel::size() << " processes)";
+  if (total_cpu_time > 60) {
+    size_t seconds    = std::floor(total_cpu_time);
+    const size_t days = seconds / (24 * 3600);
+    seconds -= days * (24 * 3600);
+    const size_t hours = seconds / 3600;
+    seconds -= hours * 3600;
+    const size_t minutes = seconds / 60;
+    seconds -= minutes * 60;
+    std::cout << " " << rang::style::bold;
+    bool print = false;
+    if (days > 0) {
+      print = true;
+      std::cout << days << "d" << ' ';
+    }
+    if (print or (hours > 0)) {
+      print = true;
+      std::cout << std::setw(2) << std::setfill('0') << hours << "h";
+    }
+    if (print or (minutes > 0)) {
+      print = true;
+      std::cout << std::setw(2) << std::setfill('0') << minutes << "mn";
+    }
+    if (print) {
+      std::cout << rang::style::bold << std::setw(2) << std::setfill('0') << seconds << "s";
+    }
+    std::cout << rang::style::reset;
+  }
+  std::cout << '\n';
 }
 
 void
 ExecutionStatManager::printInfo()
 {
-  std::cout << "----------------- " << rang::fgB::green << "pugs exec stats" << rang::fg::reset
-            << " ---------------------\n";
+  if (ExecutionStatManager::getInstance().doPrint()) {
+    std::cout << "----------------- " << rang::fg::green << "pugs exec stats" << rang::fg::reset
+              << " ---------------------\n";
 
-  ExecutionStatManager::m_instance->_printElapseTime();
-  ExecutionStatManager::m_instance->_printTotalCPUTime();
-  ExecutionStatManager::m_instance->_printMaxResidentMemory();
+    ExecutionStatManager::getInstance()._printElapseTime();
+    ExecutionStatManager::getInstance()._printTotalCPUTime();
+    ExecutionStatManager::getInstance()._printMaxResidentMemory();
+  }
 }
 
 void
diff --git a/src/utils/ExecutionStatManager.hpp b/src/utils/ExecutionStatManager.hpp
index 4728366bc225edcb9c7d79989e6b86d24c609db7..476b31a84f80234a6e8d509ddac88daf9d3b38f8 100644
--- a/src/utils/ExecutionStatManager.hpp
+++ b/src/utils/ExecutionStatManager.hpp
@@ -1,6 +1,7 @@
 #ifndef EXECUTION_STAT_MANAGER_HPP
 #define EXECUTION_STAT_MANAGER_HPP
 
+#include <utils/PugsAssert.hpp>
 #include <utils/Timer.hpp>
 
 class ExecutionStatManager
@@ -9,6 +10,7 @@ class ExecutionStatManager
   static ExecutionStatManager* m_instance;
 
   Timer m_elapse_time;
+  bool m_do_print = true;
 
   void _printMaxResidentMemory() const;
   void _printElapseTime() const;
@@ -20,6 +22,28 @@ class ExecutionStatManager
   ~ExecutionStatManager()                           = default;
 
  public:
+  PUGS_INLINE
+  bool
+  doPrint() const
+  {
+    return m_do_print;
+  }
+
+  PUGS_INLINE
+  void
+  setPrint(bool do_print)
+  {
+    m_do_print = do_print;
+  }
+
+  PUGS_INLINE
+  static ExecutionStatManager&
+  getInstance()
+  {
+    Assert(m_instance != nullptr);   // LCOV_EXCL_LINE
+    return *m_instance;
+  }
+
   static void printInfo();
   static void create();
   static void destroy();
diff --git a/src/utils/PugsUtils.cpp b/src/utils/PugsUtils.cpp
index 8531aa6e8ee3091c952c886ec438b42067d4eee9..f57516ca249abc08fcac6dfbd590f656df503dab 100644
--- a/src/utils/PugsUtils.cpp
+++ b/src/utils/PugsUtils.cpp
@@ -4,6 +4,7 @@
 #include <utils/BuildInfo.hpp>
 #include <utils/CommunicatorManager.hpp>
 #include <utils/ConsoleManager.hpp>
+#include <utils/ExecutionStatManager.hpp>
 #include <utils/FPEManager.hpp>
 #include <utils/Messenger.hpp>
 #include <utils/PETScWrapper.hpp>
@@ -110,6 +111,10 @@ initialize(int& argc, char* argv[])
     bool show_preamble = true;
     app.add_flag("--preamble,!--no-preamble", show_preamble, "Show execution info preamble [default: true]");
 
+    bool print_exec_stat = true;
+    app.add_flag("--exec-stat,!--no-exec-stat", print_exec_stat,
+                 "Display memory and CPU usage after execution [default: true]");
+
     bool show_backtrace = true;
     app.add_flag("-b,--backtrace,!--no-backtrace", show_backtrace, "Show backtrace on failure [default: true]");
 
@@ -141,6 +146,7 @@ initialize(int& argc, char* argv[])
       CommunicatorManager::setSplitColor(mpi_split_color);
     }
 
+    ExecutionStatManager::getInstance().setPrint(print_exec_stat);
     BacktraceManager::setShow(show_backtrace);
     ConsoleManager::setShowPreamble(show_preamble);
     ConsoleManager::init(enable_color);