diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95f96478b331b3790fb57946a031cea15fe73d67..a4aa863b266890293cb48ceaf21990120f4d303b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -113,6 +113,16 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   endif()
 endif()
 
+#------------------------------------------------------
+
+include (TestBigEndian)
+TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
+if(IS_BIG_ENDIAN)
+  set(PUGS_LITTLE_ENDIAN FALSE)
+else()
+  set(PUGS_LITTLE_ENDIAN TRUE)
+endif()
+
 #------------------------------------------------------
 # defaults use of MPI
 set(PUGS_ENABLE_MPI AUTO CACHE STRING
diff --git a/src/language/modules/WriterModule.cpp b/src/language/modules/WriterModule.cpp
index 611952939906e0afce5ea4682428ec1eefd36eaa..9f2fd98f103ec101190011b7a0ed1ded288ef009 100644
--- a/src/language/modules/WriterModule.cpp
+++ b/src/language/modules/WriterModule.cpp
@@ -6,6 +6,7 @@
 #include <mesh/GmshReader.hpp>
 #include <mesh/Mesh.hpp>
 #include <output/GnuplotWriter.hpp>
+#include <output/GnuplotWriter1D.hpp>
 #include <output/IWriter.hpp>
 #include <output/NamedDiscreteFunction.hpp>
 #include <output/VTKWriter.hpp>
@@ -39,6 +40,16 @@ WriterModule::WriterModule()
 
                               ));
 
+  this->_addBuiltinFunction("gnuplot_1d_writer",
+                            std::make_shared<BuiltinFunctionEmbedder<std::shared_ptr<const IWriter>(const std::string&,
+                                                                                                    const double&)>>(
+
+                              [](const std::string& filename, const double& period) {
+                                return std::make_shared<GnuplotWriter1D>(filename, period);
+                              }
+
+                              ));
+
   this->_addBuiltinFunction("name_output",
                             std::make_shared<BuiltinFunctionEmbedder<
                               std::shared_ptr<const NamedDiscreteFunction>(std::shared_ptr<const IDiscreteFunction>,
diff --git a/src/mesh/Connectivity.hpp b/src/mesh/Connectivity.hpp
index f8e4dafdb7a46473434645c26e46ee1c00f86117..2b25c6e9bf6f772acf5e0b02b9e93e9ea02a9877 100644
--- a/src/mesh/Connectivity.hpp
+++ b/src/mesh/Connectivity.hpp
@@ -609,6 +609,23 @@ class Connectivity final : public IConnectivity
     return cell_to_node_matrix.numRows();
   }
 
+  template <ItemType item_type>
+  PUGS_INLINE size_t
+  numberOf() const
+  {
+    if constexpr (item_type == ItemType::cell) {
+      return this->numberOfCells();
+    } else if constexpr (item_type == ItemType::face) {
+      return this->numberOfFaces();
+    } else if constexpr (item_type == ItemType::edge) {
+      return this->numberOfEdges();
+    } else if constexpr (item_type == ItemType::node) {
+      return this->numberOfNodes();
+    } else {
+      static_assert(is_false_item_type_v<item_type>, "unknown ItemType");
+    }
+  }
+
   CellValue<const double>
   invCellNbNodes() const
   {
diff --git a/src/mesh/Mesh.hpp b/src/mesh/Mesh.hpp
index e78383978469292d6529eb931b432406612f7220..21c100384ef2072e43af54dda74eda8d6021c2ea 100644
--- a/src/mesh/Mesh.hpp
+++ b/src/mesh/Mesh.hpp
@@ -63,6 +63,13 @@ class Mesh final : public IMesh
     return m_connectivity->numberOfCells();
   }
 
+  template <ItemType item_type>
+  PUGS_INLINE size_t
+  numberOf() const
+  {
+    return m_connectivity->template numberOf<item_type>();
+  }
+
   PUGS_INLINE
   const NodeValue<const Rd>&
   xr() const
diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt
index a3e6799657d2ed422fee5ab7018939ea080cf4c2..1f33809033731ac4500ee64a046ba11f55e15802 100644
--- a/src/output/CMakeLists.txt
+++ b/src/output/CMakeLists.txt
@@ -3,6 +3,7 @@
 add_library(
   PugsOutput
   GnuplotWriter.cpp
+  GnuplotWriter1D.cpp
   VTKWriter.cpp
   WriterBase.cpp)
 
diff --git a/src/output/GnuplotWriter.cpp b/src/output/GnuplotWriter.cpp
index f503c996c3f86a383f2d12fa95185fd1fe205578..78b1e25ce8277888819395cafe92118a46d992fb 100644
--- a/src/output/GnuplotWriter.cpp
+++ b/src/output/GnuplotWriter.cpp
@@ -121,7 +121,7 @@ GnuplotWriter::_writeValues(const std::shared_ptr<const MeshType>& mesh,
                             const OutputNamedItemValueSet& output_named_item_value_set,
                             std::ostream& fout) const
 {
-  if constexpr (MeshType::Dimension <= 2) {
+  if constexpr (MeshType::Dimension == 1) {
     auto cell_to_node_matrix = mesh->connectivity().cellToNodeMatrix();
     auto cell_is_owned       = mesh->connectivity().cellIsOwned();
 
@@ -132,16 +132,38 @@ GnuplotWriter::_writeValues(const std::shared_ptr<const MeshType>& mesh,
           const NodeId& node_id                     = cell_nodes[i_node];
           const TinyVector<MeshType::Dimension>& xr = mesh->xr()[node_id];
           fout << xr[0];
-          if (MeshType::Dimension == 2) {
-            fout << ' ' << xr[1];
-          }
           for (auto [name, item_value] : output_named_item_value_set) {
             std::visit([&](auto&& cell_value) { _writeValue(cell_value, cell_id, node_id, fout); }, item_value);
           }
+          fout << '\n';
+        }
+
+        fout << "\n\n";
+      }
+    }
+  } else if constexpr (MeshType::Dimension == 2) {
+    auto cell_to_node_matrix = mesh->connectivity().cellToNodeMatrix();
+    auto cell_is_owned       = mesh->connectivity().cellIsOwned();
 
+    for (CellId cell_id = 0; cell_id < mesh->numberOfCells(); ++cell_id) {
+      if (cell_is_owned[cell_id]) {
+        const auto& cell_nodes = cell_to_node_matrix[cell_id];
+        for (size_t i_node = 0; i_node < cell_nodes.size(); ++i_node) {
+          const NodeId& node_id                     = cell_nodes[i_node];
+          const TinyVector<MeshType::Dimension>& xr = mesh->xr()[node_id];
+          fout << xr[0] << ' ' << xr[1];
+          for (auto [name, item_value] : output_named_item_value_set) {
+            std::visit([&](auto&& cell_value) { _writeValue(cell_value, cell_id, node_id, fout); }, item_value);
+          }
           fout << '\n';
         }
-        fout << '\n';
+        const NodeId& node_id                     = cell_nodes[0];
+        const TinyVector<MeshType::Dimension>& xr = mesh->xr()[node_id];
+        fout << xr[0] << ' ' << xr[1];
+        for (auto [name, item_value] : output_named_item_value_set) {
+          std::visit([&](auto&& cell_value) { _writeValue(cell_value, cell_id, node_id, fout); }, item_value);
+        }
+        fout << "\n\n\n";
       }
     }
   } else {
@@ -183,14 +205,14 @@ GnuplotWriter::_write(const std::shared_ptr<const MeshType>& mesh,
     fout.setf(std::ios_base::scientific);
 
     fout << this->_getDateAndVersionComment();
-    fout << "\n# time = " << time << "\n\n";
+    fout << "# time = " << time << "\n\n";
 
     this->_writePreamble<MeshType::Dimension>(output_named_item_value_set, fout);
   }
 
   for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
     if (i_rank == parallel::rank()) {
-      std::ofstream fout{_getFilename(), std::ios_base::app};
+      std::ofstream fout(_getFilename(), std::ios_base::app);
       fout.precision(15);
       fout.setf(std::ios_base::scientific);
 
@@ -205,22 +227,13 @@ GnuplotWriter::writeMesh(const std::shared_ptr<const IMesh>& p_mesh) const
 {
   OutputNamedItemValueSet output_named_item_value_set{};
 
-  std::ofstream fout(_getFilename());
-  fout.precision(15);
-  fout.setf(std::ios_base::scientific);
-  fout << _getDateAndVersionComment();
-
   switch (p_mesh->dimension()) {
   case 1: {
-    this->_writePreamble<1>(output_named_item_value_set, fout);
-    this->_writeValues(std::dynamic_pointer_cast<const Mesh<Connectivity<1>>>(p_mesh), output_named_item_value_set,
-                       fout);
+    this->_write(std::dynamic_pointer_cast<const Mesh<Connectivity<1>>>(p_mesh), output_named_item_value_set, 0);
     break;
   }
   case 2: {
-    this->_writePreamble<2>(output_named_item_value_set, fout);
-    this->_writeValues(std::dynamic_pointer_cast<const Mesh<Connectivity<2>>>(p_mesh), output_named_item_value_set,
-                       fout);
+    this->_write(std::dynamic_pointer_cast<const Mesh<Connectivity<2>>>(p_mesh), output_named_item_value_set, 0);
     break;
   }
   default: {
diff --git a/src/output/GnuplotWriter1D.cpp b/src/output/GnuplotWriter1D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b25ca82accf3189fbdab58ca4161101647cb463d
--- /dev/null
+++ b/src/output/GnuplotWriter1D.cpp
@@ -0,0 +1,308 @@
+#include <output/GnuplotWriter1D.hpp>
+
+#include <mesh/Connectivity.hpp>
+#include <mesh/ItemValue.hpp>
+#include <mesh/Mesh.hpp>
+#include <mesh/MeshData.hpp>
+#include <mesh/MeshDataManager.hpp>
+#include <utils/Messenger.hpp>
+#include <utils/PugsTraits.hpp>
+#include <utils/RevisionInfo.hpp>
+
+#include <utils/Demangle.hpp>
+
+#include <fstream>
+#include <iomanip>
+
+std::string
+GnuplotWriter1D::_getDateAndVersionComment() const
+{
+  std::ostringstream os;
+
+  std::time_t now = std::time(nullptr);
+  os << "#  Generated by pugs: " << std::ctime(&now);
+  os << "#  version: " << RevisionInfo::version() << '\n';
+  os << "#  tag:  " << RevisionInfo::gitTag() << '\n';
+  os << "#  HEAD: " << RevisionInfo::gitHead() << '\n';
+  os << "#  hash: " << RevisionInfo::gitHash() << " (" << ((RevisionInfo::gitIsClean()) ? "clean" : "dirty") << ")\n";
+  os << '\n';
+
+  return os.str();
+}
+
+std::string
+GnuplotWriter1D::_getFilename() const
+{
+  std::ostringstream sout;
+  sout << m_base_filename;
+  sout << '.' << std::setfill('0') << std::setw(4) << m_saved_times.size() << ".gnu";
+  return sout.str();
+}
+
+template <typename DataType>
+bool
+GnuplotWriter1D::_is_cell_value(const CellValue<const DataType>&) const
+{
+  return true;
+}
+
+template <typename DataType>
+bool
+GnuplotWriter1D::_is_cell_value(const NodeValue<const DataType>&) const
+{
+  return false;
+}
+
+template <typename DataType>
+bool
+GnuplotWriter1D::_is_node_value(const CellValue<const DataType>&) const
+{
+  return false;
+}
+
+template <typename DataType>
+bool
+GnuplotWriter1D::_is_node_value(const NodeValue<const DataType>&) const
+{
+  return true;
+}
+
+void
+GnuplotWriter1D::_writePreamble(const OutputNamedItemValueSet& output_named_item_value_set, std::ostream& fout) const
+{
+  fout << "# list of data\n";
+  fout << "# 1:x";
+  uint64_t i = 2;
+  for (const auto& i_named_item_value : output_named_item_value_set) {
+    const std::string name         = i_named_item_value.first;
+    const auto& item_value_variant = i_named_item_value.second;
+    std::visit(
+      [&](auto&& item_value) {
+        using ItemValueType = std::decay_t<decltype(item_value)>;
+        using DataType      = std::decay_t<typename ItemValueType::data_type>;
+        if constexpr (std::is_arithmetic_v<DataType>) {
+          fout << ' ' << i++ << ':' << name;
+        } else if constexpr (is_tiny_vector_v<DataType>) {
+          for (size_t j = 0; j < DataType{}.dimension(); ++j) {
+            fout << ' ' << i++ << ':' << name << '[' << j << ']';
+          }
+        } else if constexpr (is_tiny_matrix_v<DataType>) {
+          for (size_t j = 0; j < DataType{}.nbRows(); ++j) {
+            for (size_t k = 0; k < DataType{}.nbColumns(); ++k) {
+              fout << ' ' << i++ << ':' << name << '(' << j << ',' << k << ')';
+            }
+          }
+        } else {
+          throw UnexpectedError("invalid data type");
+        }
+      },
+      item_value_variant);
+  }
+  fout << "\n\n";
+}
+
+template <typename DataType, ItemType item_type>
+size_t
+GnuplotWriter1D::_itemValueNbRow(const ItemValue<DataType, item_type>&) const
+{
+  if constexpr (std::is_arithmetic_v<DataType>) {
+    return 1;
+  } else if constexpr (is_tiny_vector_v<std::decay_t<DataType>>) {
+    return DataType::Dimension;
+  } else if constexpr (is_tiny_matrix_v<std::decay_t<DataType>>) {
+    return DataType{}.dimension();
+  } else {
+    throw UnexpectedError("invalid data type for cell value output: " + demangle<DataType>());
+  }
+}
+
+template <typename MeshType, ItemType item_type>
+void
+GnuplotWriter1D::_writeItemValues(const std::shared_ptr<const MeshType>& mesh,
+                                  const OutputNamedItemValueSet& output_named_item_value_set,
+                                  std::ostream& fout) const
+{
+  using ItemId = ItemIdT<item_type>;
+
+  const size_t& number_of_columns = [&] {
+    size_t number_of_columns = 1;
+    for (auto [name, item_value] : output_named_item_value_set) {
+      std::visit([&](auto&& value) { number_of_columns += _itemValueNbRow(value); }, item_value);
+    }
+    return number_of_columns;
+  }();
+
+  auto is_owned = mesh->connectivity().template isOwned<item_type>();
+
+  const size_t& number_of_owned_lines = [&]() {
+    if (parallel::size() > 1) {
+      size_t number_of_owned_items = 0;
+      for (ItemId item_id = 0; item_id < mesh->template numberOf<item_type>(); ++item_id) {
+        if (is_owned[item_id]) {
+          ++number_of_owned_items;
+        }
+      }
+
+      return number_of_owned_items;
+    } else {
+      return mesh->template numberOf<item_type>();
+    }
+  }();
+
+  Array<double> values{number_of_columns * number_of_owned_lines};
+
+  if constexpr (item_type == ItemType::cell) {
+    auto& mesh_data         = MeshDataManager::instance().getMeshData(*mesh);
+    const auto& cell_center = mesh_data.xj();
+
+    size_t index = 0;
+    for (ItemId item_id = 0; item_id < mesh->template numberOf<item_type>(); ++item_id) {
+      if (is_owned[item_id]) {
+        values[number_of_columns * index++] = cell_center[item_id][0];
+      }
+    }
+  } else if constexpr (item_type == ItemType::node) {
+    const auto& node_position = mesh->xr();
+
+    size_t index = 0;
+    for (ItemId item_id = 0; item_id < mesh->template numberOf<item_type>(); ++item_id) {
+      if (is_owned[item_id]) {
+        values[number_of_columns * index++] = node_position[item_id][0];
+      }
+    }
+  } else {
+    throw UnexpectedError("invalid item type");
+  }
+
+  size_t column_number = 1;
+  for (auto [name, output_item_value] : output_named_item_value_set) {
+    std::visit(
+      [&](auto&& item_value) {
+        using ItemValueT = std::decay_t<decltype(item_value)>;
+        if constexpr (ItemValueT::item_t == item_type) {
+          using DataT  = std::decay_t<typename ItemValueT::data_type>;
+          size_t index = 0;
+          for (ItemId item_id = 0; item_id < item_value.size(); ++item_id) {
+            if (is_owned[item_id]) {
+              if constexpr (std::is_arithmetic_v<DataT>) {
+                values[number_of_columns * index + column_number] = item_value[item_id];
+              } else if constexpr (is_tiny_vector_v<DataT>) {
+                const size_t k = number_of_columns * index + column_number;
+                for (size_t j = 0; j < DataT::Dimension; ++j) {
+                  values[k + j] = item_value[item_id][j];
+                }
+              } else if constexpr (is_tiny_matrix_v<DataT>) {
+                size_t k = number_of_columns * index + column_number;
+                for (size_t i = 0; i < DataT{}.nbRows(); ++i) {
+                  for (size_t j = 0; j < DataT{}.nbColumns(); ++j) {
+                    values[k++] = item_value[item_id](i, j);
+                  }
+                }
+              }
+              ++index;
+            }
+          }
+        }
+        column_number += _itemValueNbRow(item_value);
+      },
+      output_item_value);
+  }
+
+  if (parallel::size() > 1) {
+    values = parallel::gatherVariable(values, 0);
+  }
+
+  if (parallel::rank() == 0) {
+    Assert(values.size() % number_of_columns == 0);
+
+    std::vector<size_t> line_numbers(values.size() / number_of_columns);
+    for (size_t i = 0; i < line_numbers.size(); ++i) {
+      line_numbers[i] = i;
+    }
+
+    std::sort(line_numbers.begin(), line_numbers.end(),
+              [&](size_t i, size_t j) { return values[i * number_of_columns] < values[j * number_of_columns]; });
+
+    for (auto i_line : line_numbers) {
+      fout << values[i_line * number_of_columns];
+      for (size_t j = 1; j < number_of_columns; ++j) {
+        fout << ' ' << values[i_line * number_of_columns + j];
+      }
+      fout << '\n';
+    }
+  }
+}
+
+template <typename MeshType>
+void
+GnuplotWriter1D::_write(const std::shared_ptr<const MeshType>& mesh,
+                        const OutputNamedItemValueSet& output_named_item_value_set,
+                        double time) const
+{
+  bool has_cell_value = false;
+  for (const auto& [name, item_value_variant] : output_named_item_value_set) {
+    has_cell_value |=
+      std::visit([&](auto&& item_value) { return this->_is_cell_value(item_value); }, item_value_variant);
+  }
+
+  bool has_node_value = false;
+  for (const auto& [name, item_value_variant] : output_named_item_value_set) {
+    has_node_value |=
+      std::visit([&](auto&& item_value) { return this->_is_node_value(item_value); }, item_value_variant);
+  }
+
+  if (has_cell_value and has_node_value) {
+    throw NormalError("cannot store both node and cell values in a gnuplot file");
+  }
+
+  std::ofstream fout;
+
+  if (parallel::rank() == 0) {
+    fout.open(_getFilename());
+    fout.precision(15);
+    fout.setf(std::ios_base::scientific);
+    fout << _getDateAndVersionComment();
+
+    fout << "# time = " << time << "\n\n";
+
+    _writePreamble(output_named_item_value_set, fout);
+  }
+
+  if (has_cell_value) {
+    this->_writeItemValues<MeshType, ItemType::cell>(mesh, output_named_item_value_set, fout);
+  } else {   // has_node_value
+    this->_writeItemValues<MeshType, ItemType::node>(mesh, output_named_item_value_set, fout);
+  }
+}
+
+void
+GnuplotWriter1D::writeMesh(const std::shared_ptr<const IMesh>&) const
+{
+  throw UnexpectedError("This function should not be called");
+}
+
+void
+GnuplotWriter1D::write(const std::vector<std::shared_ptr<const NamedDiscreteFunction>>& named_discrete_function_list,
+                       double time) const
+{
+  std::shared_ptr mesh = this->_getMesh(named_discrete_function_list);
+
+  OutputNamedItemValueSet output_named_item_value_set = this->_getOutputNamedItemValueSet(named_discrete_function_list);
+
+  switch (mesh->dimension()) {
+  case 1: {
+    this->_write(std::dynamic_pointer_cast<const Mesh<Connectivity<1>>>(mesh), output_named_item_value_set, time);
+    break;
+  }
+  case 2: {
+    std::ostringstream errorMsg;
+    errorMsg << "gnuplot_1d_writer is not available in dimension " << std::to_string(mesh->dimension()) << '\n'
+             << rang::style::bold << "note:" << rang::style::reset << " one can use " << rang::fgB::blue
+             << "gnuplot_writer" << rang::style::reset << " in dimension 2";
+    throw NormalError(errorMsg.str());
+  }
+  default: {
+    throw NormalError("gnuplot format is not available in dimension " + std::to_string(mesh->dimension()));
+  }
+  }
+}
diff --git a/src/output/GnuplotWriter1D.hpp b/src/output/GnuplotWriter1D.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6402a6002a105e1ef8b85b60783dce7a1cba82bd
--- /dev/null
+++ b/src/output/GnuplotWriter1D.hpp
@@ -0,0 +1,60 @@
+#ifndef GNUPLOT_WRITER_1D_HPP
+#define GNUPLOT_WRITER_1D_HPP
+
+#include <output/WriterBase.hpp>
+
+#include <algebra/TinyMatrix.hpp>
+#include <algebra/TinyVector.hpp>
+#include <output/OutputNamedItemValueSet.hpp>
+
+class IMesh;
+
+#include <string>
+
+class GnuplotWriter1D : public WriterBase
+{
+ private:
+  std::string _getDateAndVersionComment() const;
+
+  std::string _getFilename() const;
+
+  template <typename DataType>
+  bool _is_cell_value(const CellValue<const DataType>&) const;
+
+  template <typename DataType>
+  bool _is_cell_value(const NodeValue<const DataType>&) const;
+
+  template <typename DataType>
+  bool _is_node_value(const CellValue<const DataType>&) const;
+
+  template <typename DataType>
+  bool _is_node_value(const NodeValue<const DataType>&) const;
+
+  template <typename DataType, ItemType item_type>
+  size_t _itemValueNbRow(const ItemValue<DataType, item_type>&) const;
+
+  template <typename MeshType, ItemType item_type>
+  void _writeItemValues(const std::shared_ptr<const MeshType>& mesh,
+                        const OutputNamedItemValueSet& output_named_item_value_set,
+                        std::ostream& fout) const;
+
+  void _writePreamble(const OutputNamedItemValueSet& output_named_item_value_set, std::ostream& fout) const;
+
+  template <typename MeshType>
+  void _write(const std::shared_ptr<const MeshType>& mesh,
+              const OutputNamedItemValueSet& output_named_item_value_set,
+              double time) const;
+
+ public:
+  void writeMesh(const std::shared_ptr<const IMesh>& mesh) const final;
+
+  void write(const std::vector<std::shared_ptr<const NamedDiscreteFunction>>& named_discrete_function_list,
+             double time) const final;
+
+  GnuplotWriter1D(const std::string& base_filename, const double time_period) : WriterBase(base_filename, time_period)
+  {}
+
+  ~GnuplotWriter1D() = default;
+};
+
+#endif   // GNUPLOT_WRITER_1D_HPP
diff --git a/src/output/VTKWriter.cpp b/src/output/VTKWriter.cpp
index 48f81dd376bd96c36820a535b2ff2fa20a4e831b..f65a4acfa89830c54e131539252e80a08de67ff3 100644
--- a/src/output/VTKWriter.cpp
+++ b/src/output/VTKWriter.cpp
@@ -2,8 +2,11 @@
 
 #include <mesh/Connectivity.hpp>
 #include <mesh/Mesh.hpp>
+#include <mesh/MeshData.hpp>
+#include <mesh/MeshDataManager.hpp>
 #include <utils/Messenger.hpp>
 #include <utils/RevisionInfo.hpp>
+#include <utils/pugs_config.hpp>
 
 #include <ctime>
 #include <fstream>
@@ -11,6 +14,99 @@
 #include <sstream>
 #include <unordered_map>
 
+class ICharArrayEmbedder
+{
+ public:
+  ICharArrayEmbedder()                          = default;
+  ICharArrayEmbedder(const ICharArrayEmbedder&) = default;
+  ICharArrayEmbedder(ICharArrayEmbedder&&)      = default;
+
+  virtual size_t size() const             = 0;
+  virtual void write(std::ostream&) const = 0;
+
+  virtual ~ICharArrayEmbedder() = default;
+};
+
+template <typename InputDataT>
+class CharArrayEmbedder : public ICharArrayEmbedder
+{
+  CastArray<InputDataT, char> m_char_cast_array;
+
+ public:
+  size_t
+  size() const final
+  {
+    return m_char_cast_array.size();
+  }
+
+  void
+  write(std::ostream& os) const final
+  {
+    os.write(&(m_char_cast_array[0]), m_char_cast_array.size());
+  }
+
+  CharArrayEmbedder(Array<InputDataT> array) : m_char_cast_array{array} {}
+
+  CharArrayEmbedder()                         = default;
+  CharArrayEmbedder(const CharArrayEmbedder&) = default;
+  CharArrayEmbedder(CharArrayEmbedder&&)      = default;
+
+  ~CharArrayEmbedder() = default;
+};
+
+class VTKWriter::SerializedDataList
+{
+ private:
+  std::vector<std::shared_ptr<ICharArrayEmbedder>> m_serialized_data_list;
+  size_t m_offset = 0;
+
+ public:
+  size_t
+  offset() const
+  {
+    return m_offset;
+  }
+
+  template <typename DataT>
+  void
+  add(Array<DataT> array)
+  {
+    auto array_data = std::make_shared<CharArrayEmbedder<DataT>>(array);
+    auto size_data  = std::make_shared<CharArrayEmbedder<int>>([&] {
+      Array<int> size_array(1);
+      size_array[0] = array_data->size();
+      return size_array;
+    }());
+
+    m_serialized_data_list.push_back(size_data);
+    m_serialized_data_list.push_back(array_data);
+
+    m_offset += size_data->size() + array_data->size();
+  }
+
+  template <typename DataT, ItemType item_type, typename ConnectivityT>
+  void
+  add(const ItemValue<DataT, item_type, ConnectivityT>& item_value)
+  {
+    Array<std::remove_const_t<DataT>> array(item_value.numberOfItems());
+    parallel_for(
+      item_value.numberOfItems(), PUGS_LAMBDA(ItemIdT<item_type> item_id) { array[item_id] = item_value[item_id]; });
+
+    this->add(array);
+  }
+
+  void
+  write(std::ostream& os) const
+  {
+    for (auto serialized_data : m_serialized_data_list) {
+      serialized_data->write(os);
+    }
+  }
+
+  SerializedDataList()  = default;
+  ~SerializedDataList() = default;
+};
+
 std::string
 VTKWriter::_getFilenamePVTU() const
 {
@@ -132,141 +228,114 @@ struct VTKWriter::VTKType
 
 template <typename DataType>
 void
-VTKWriter::_write_array(std::ofstream& os, const std::string& name, const Array<DataType>& item_value) const
+VTKWriter::_write_array(std::ofstream& os,
+                        const std::string& name,
+                        const Array<DataType>& item_value,
+                        SerializedDataList& serialized_data_list) const
 {
-  os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\">\n";
-  for (typename Array<DataType>::index_type i = 0; i < item_value.size(); ++i) {
-    // The following '+' enforces integer output for char types
-    os << +item_value[i] << ' ';
-  }
-  os << "\n</DataArray>\n";
+  os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" offset=\""
+     << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <size_t N, typename DataType>
 void
 VTKWriter::_write_array(std::ofstream& os,
                         const std::string& name,
-                        const Array<TinyVector<N, DataType>>& item_value) const
+                        const Array<TinyVector<N, DataType>>& item_value,
+                        SerializedDataList& serialized_data_list) const
 {
   os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" NumberOfComponents=\"" << N
-     << "\">\n";
-  for (typename Array<DataType>::index_type i = 0; i < item_value.size(); ++i) {
-    for (size_t j = 0; j < N; ++j) {
-      // The following '+' enforces integer output for char types
-      os << +item_value[i][j] << ' ';
-    }
-  }
-  os << "\n</DataArray>\n";
+     << "\" offset=\"" << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <typename DataType>
 void
 VTKWriter::_write_node_value(std::ofstream& os,
                              const std::string& name,
-                             const NodeValue<const DataType>& item_value) const
+                             const NodeValue<const DataType>& item_value,
+                             SerializedDataList& serialized_data_list) const
 {
-  os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\">\n";
-  for (NodeId i = 0; i < item_value.size(); ++i) {
-    // The following '+' enforces integer output for char types
-    os << +item_value[i] << ' ';
-  }
-  os << "\n</DataArray>\n";
+  os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" offset=\""
+     << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <size_t N, typename DataType>
 void
 VTKWriter::_write_node_value(std::ofstream& os,
                              const std::string& name,
-                             const NodeValue<const TinyVector<N, DataType>>& item_value) const
+                             const NodeValue<const TinyVector<N, DataType>>& item_value,
+                             SerializedDataList& serialized_data_list) const
 {
   os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" NumberOfComponents=\"" << N
-     << "\">\n";
-  for (NodeId i = 0; i < item_value.size(); ++i) {
-    for (size_t j = 0; j < N; ++j) {
-      // The following '+' enforces integer output for char types
-      os << +item_value[i][j] << ' ';
-    }
-  }
-  os << "\n</DataArray>\n";
+     << "\" offset=\"" << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <size_t N, typename DataType>
 void
 VTKWriter::_write_node_value(std::ofstream& os,
                              const std::string& name,
-                             const NodeValue<const TinyMatrix<N, DataType>>& item_value) const
+                             const NodeValue<const TinyMatrix<N, DataType>>& item_value,
+                             SerializedDataList& serialized_data_list) const
 {
   os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" NumberOfComponents=\"" << N * N
-     << "\">\n";
-  for (NodeId i = 0; i < item_value.size(); ++i) {
-    for (size_t j = 0; j < N; ++j) {
-      for (size_t k = 0; k < N; ++k) {
-        // The following '+' enforces integer output for char types
-        os << +item_value[i](j, k) << ' ';
-      }
-    }
-  }
-  os << "\n</DataArray>\n";
+     << "\" offset=\"" << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <typename DataType>
 void
-VTKWriter::_write_node_value(std::ofstream&, const std::string&, const CellValue<const DataType>&) const
+VTKWriter::_write_node_value(std::ofstream&,
+                             const std::string&,
+                             const CellValue<const DataType>&,
+                             SerializedDataList&) const
 {}
 
 template <typename DataType>
 void
 VTKWriter::_write_cell_value(std::ofstream& os,
                              const std::string& name,
-                             const CellValue<const DataType>& item_value) const
+                             const CellValue<const DataType>& item_value,
+                             SerializedDataList& serialized_data_list) const
 {
-  os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\">\n";
-  for (CellId i = 0; i < item_value.size(); ++i) {
-    // The following '+' enforces integer output for char types
-    os << +item_value[i] << ' ';
-  }
-  os << "\n</DataArray>\n";
+  os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" offset=\""
+     << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <size_t N, typename DataType>
 void
 VTKWriter::_write_cell_value(std::ofstream& os,
                              const std::string& name,
-                             const CellValue<const TinyVector<N, DataType>>& item_value) const
+                             const CellValue<const TinyVector<N, DataType>>& item_value,
+                             SerializedDataList& serialized_data_list) const
 {
   os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" NumberOfComponents=\"" << N
-     << "\">\n";
-  for (CellId i = 0; i < item_value.size(); ++i) {
-    for (size_t j = 0; j < N; ++j) {
-      // The following '+' enforces integer output for char types
-      os << +item_value[i][j] << ' ';
-    }
-  }
-  os << "\n</DataArray>\n";
+     << "\" offset=\"" << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <size_t N, typename DataType>
 void
 VTKWriter::_write_cell_value(std::ofstream& os,
                              const std::string& name,
-                             const CellValue<const TinyMatrix<N, DataType>>& item_value) const
+                             const CellValue<const TinyMatrix<N, DataType>>& item_value,
+                             SerializedDataList& serialized_data_list) const
 {
   os << "<DataArray type=\"" << VTKType<DataType>::name << "\" Name=\"" << name << "\" NumberOfComponents=\"" << N * N
-     << "\">\n";
-  for (CellId i = 0; i < item_value.size(); ++i) {
-    for (size_t j = 0; j < N; ++j) {
-      for (size_t k = 0; k < N; ++k) {
-        // The following '+' enforces integer output for char types
-        os << +item_value[i](j, k) << ' ';
-      }
-    }
-  }
-  os << "\n</DataArray>\n";
+     << "\" offset=\"" << serialized_data_list.offset() << "\" format=\"appended\"/>\n";
+  serialized_data_list.add(item_value);
 }
 
 template <typename DataType>
 void
-VTKWriter::_write_cell_value(std::ofstream&, const std::string&, const NodeValue<const DataType>&) const
+VTKWriter::_write_cell_value(std::ofstream&,
+                             const std::string&,
+                             const NodeValue<const DataType>&,
+                             SerializedDataList&) const
 {}
 
 template <typename MeshType>
@@ -279,6 +348,7 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
   // Adding basic mesh information
   output_named_item_value_set.add(NamedItemValue{"cell_number", mesh->connectivity().cellNumber()});
   output_named_item_value_set.add(NamedItemValue{"node_number", mesh->connectivity().nodeNumber()});
+  output_named_item_value_set.add(NamedItemValue{"cell_center", MeshDataManager::instance().getMeshData(*mesh).xj()});
 
   if (parallel::rank() == 0) {   // write PVTK file
     std::ofstream fout(_getFilenamePVTU());
@@ -299,10 +369,6 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
             "NumberOfComponents=\"1\"/>\n";
     fout << "<PDataArray type=\"Int8\" Name=\"types\" "
             "NumberOfComponents=\"1\"/>\n";
-    for (const auto& [name, item_value_variant] : output_named_item_value_set) {
-      std::visit([&, name = name](auto&& item_value) { return this->_write_cell_pvtu(fout, name, item_value); },
-                 item_value_variant);
-    }
     if constexpr (MeshType::Dimension == 3) {
       fout << "<PDataArray type=\"Int64\" IdType=\"1\" Name=\"faces\"/>\n";
       fout << "<PDataArray type=\"Int64\" IdType=\"1\" Name=\"faceoffsets\"/>\n";
@@ -330,23 +396,34 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
     fout << "</VTKFile>\n";
   }
 
-  {   // write VTK files
+  {
+    SerializedDataList serialize_data_list;
+
+    // write VTK files
     std::ofstream fout(_getFilenameVTU(parallel::rank()));
     fout << "<?xml version=\"1.0\"?>\n";
     fout << _getDateAndVersionComment();
-    fout << "<VTKFile type=\"UnstructuredGrid\">\n";
+    fout << "<VTKFile type=\"UnstructuredGrid\"";
+#if defined(PUGS_LITTLE_ENDIAN)
+    fout << " byte_order=\"LittleEndian\"";
+#else
+    fout << " byte_order=\"BigEndian\"";
+#endif
+    fout << ">\n";
     fout << "<UnstructuredGrid>\n";
     fout << "<Piece NumberOfPoints=\"" << mesh->numberOfNodes() << "\" NumberOfCells=\"" << mesh->numberOfCells()
          << "\">\n";
     fout << "<CellData>\n";
     for (const auto& [name, item_value_variant] : output_named_item_value_set) {
-      std::visit([&, name = name](auto&& item_value) { return this->_write_cell_value(fout, name, item_value); },
+      std::visit([&, name = name](
+                   auto&& item_value) { return this->_write_cell_value(fout, name, item_value, serialize_data_list); },
                  item_value_variant);
     }
     fout << "</CellData>\n";
     fout << "<PointData>\n";
     for (const auto& [name, item_value_variant] : output_named_item_value_set) {
-      std::visit([&, name = name](auto&& item_value) { return this->_write_node_value(fout, name, item_value); },
+      std::visit([&, name = name](
+                   auto&& item_value) { return this->_write_node_value(fout, name, item_value, serialize_data_list); },
                  item_value_variant);
     }
     fout << "</PointData>\n";
@@ -364,7 +441,7 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
             positions[r][i] = 0;
           }
         });
-      _write_array(fout, "Positions", positions);
+      _write_array(fout, "Positions", positions, serialize_data_list);
     }
     fout << "</Points>\n";
 
@@ -372,7 +449,7 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
     {
       const auto& cell_to_node_matrix = mesh->connectivity().cellToNodeMatrix();
 
-      _write_array(fout, "connectivity", cell_to_node_matrix.entries());
+      _write_array(fout, "connectivity", cell_to_node_matrix.entries(), serialize_data_list);
     }
 
     {
@@ -384,7 +461,7 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
         offset += cell_nodes.size();
         offsets[j] = offset;
       }
-      _write_array(fout, "offsets", offsets);
+      _write_array(fout, "offsets", offsets, serialize_data_list);
     }
 
     {
@@ -438,7 +515,7 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
           }
           }
         });
-      _write_array(fout, "types", types);
+      _write_array(fout, "types", types, serialize_data_list);
       if constexpr (MeshType::Dimension == 3) {
         const bool has_general_polyhedron = [&] {
           for (size_t i = 0; i < types.size(); ++i) {
@@ -508,6 +585,11 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
     fout << "</Cells>\n";
     fout << "</Piece>\n";
     fout << "</UnstructuredGrid>\n";
+    fout << "<AppendedData encoding=\"raw\">\n";
+    fout << "_";
+    serialize_data_list.write(fout);
+    fout << '\n';
+    fout << "</AppendedData>\n";
     fout << "</VTKFile>\n";
   }
 
@@ -523,10 +605,9 @@ VTKWriter::_write(const std::shared_ptr<const MeshType>& mesh,
       sout << m_base_filename;
       sout << '.' << std::setfill('0') << std::setw(4) << i_time << ".pvtu";
 
-      fout << "<DataSet timestep=\"" << m_saved_times[i_time] << "\" "
-           << "file=\"" << sout.str() << "\"/>\n";
+      fout << "<DataSet timestep=\"" << m_saved_times[i_time] << "\" file=\"" << sout.str() << "\"/>\n";
     }
-    fout << "<DataSet timestep=\"" << time << "\" group=\"\" part=\"0\" file=\"" << _getFilenamePVTU() << "\"/>\n";
+    fout << "<DataSet timestep=\"" << time << "\" file=\"" << _getFilenamePVTU() << "\"/>\n";
 
     fout << "</Collection>\n";
     fout << "</VTKFile>\n";
diff --git a/src/output/VTKWriter.hpp b/src/output/VTKWriter.hpp
index 704ba63f990ead2b6b8e8f64cafdda627c2be64b..7f38119222474af2df46508e96fb27d7a40053f9 100644
--- a/src/output/VTKWriter.hpp
+++ b/src/output/VTKWriter.hpp
@@ -14,6 +14,8 @@ class IMesh;
 class VTKWriter : public WriterBase
 {
  private:
+  class SerializedDataList;
+
   std::string _getDateAndVersionComment() const;
 
   std::string _getFilenamePVTU() const;
@@ -56,42 +58,64 @@ class VTKWriter : public WriterBase
   struct VTKType;
 
   template <typename DataType>
-  void _write_array(std::ofstream& os, const std::string& name, const Array<DataType>& item_value) const;
+  void _write_array(std::ofstream& os,
+                    const std::string& name,
+                    const Array<DataType>& item_value,
+                    SerializedDataList& serialized_data_list) const;
 
   template <size_t N, typename DataType>
-  void _write_array(std::ofstream& os, const std::string& name, const Array<TinyVector<N, DataType>>& item_value) const;
+  void _write_array(std::ofstream& os,
+                    const std::string& name,
+                    const Array<TinyVector<N, DataType>>& item_value,
+                    SerializedDataList& serialized_data_list) const;
 
   template <typename DataType>
-  void _write_node_value(std::ofstream& os, const std::string& name, const NodeValue<const DataType>& item_value) const;
+  void _write_node_value(std::ofstream& os,
+                         const std::string& name,
+                         const NodeValue<const DataType>& item_value,
+                         SerializedDataList& serialized_data_list) const;
 
   template <size_t N, typename DataType>
   void _write_node_value(std::ofstream& os,
                          const std::string& name,
-                         const NodeValue<const TinyVector<N, DataType>>& item_value) const;
+                         const NodeValue<const TinyVector<N, DataType>>& item_value,
+                         SerializedDataList& serialized_data_list) const;
 
   template <size_t N, typename DataType>
   void _write_node_value(std::ofstream& os,
                          const std::string& name,
-                         const NodeValue<const TinyMatrix<N, DataType>>& item_value) const;
+                         const NodeValue<const TinyMatrix<N, DataType>>& item_value,
+                         SerializedDataList& serialized_data_list) const;
 
   template <typename DataType>
-  void _write_node_value(std::ofstream&, const std::string&, const CellValue<const DataType>&) const;
+  void _write_node_value(std::ofstream&,
+                         const std::string&,
+                         const CellValue<const DataType>&,
+                         SerializedDataList&) const;
 
   template <typename DataType>
-  void _write_cell_value(std::ofstream& os, const std::string& name, const CellValue<const DataType>& item_value) const;
+  void _write_cell_value(std::ofstream& os,
+                         const std::string& name,
+                         const CellValue<const DataType>& item_value,
+                         SerializedDataList& serialized_data_list) const;
 
   template <size_t N, typename DataType>
   void _write_cell_value(std::ofstream& os,
                          const std::string& name,
-                         const CellValue<const TinyVector<N, DataType>>& item_value) const;
+                         const CellValue<const TinyVector<N, DataType>>& item_value,
+                         SerializedDataList& serialized_data_list) const;
 
   template <size_t N, typename DataType>
   void _write_cell_value(std::ofstream& os,
                          const std::string& name,
-                         const CellValue<const TinyMatrix<N, DataType>>& item_value) const;
+                         const CellValue<const TinyMatrix<N, DataType>>& item_value,
+                         SerializedDataList& serialized_data_list) const;
 
   template <typename DataType>
-  void _write_cell_value(std::ofstream&, const std::string&, const NodeValue<const DataType>&) const;
+  void _write_cell_value(std::ofstream&,
+                         const std::string&,
+                         const NodeValue<const DataType>&,
+                         SerializedDataList&) const;
 
   template <typename MeshType>
   void _write(const std::shared_ptr<const MeshType>& mesh,
diff --git a/src/utils/Messenger.hpp b/src/utils/Messenger.hpp
index cfe312a0884ae0718ee76c41855ce8b063fecffd..99de952b4d9e084a7981e0e0f43fe5feb4f8b4ab 100644
--- a/src/utils/Messenger.hpp
+++ b/src/utils/Messenger.hpp
@@ -93,6 +93,94 @@ class Messenger
   size_t m_rank{0};
   size_t m_size{1};
 
+  template <typename DataType>
+  void
+  _gather(const DataType& data, Array<DataType> gather, size_t rank) const
+  {
+    static_assert(std::is_arithmetic_v<DataType>);
+    Assert(gather.size() == m_size * (rank == m_rank));   // LCOV_EXCL_LINE
+
+#ifdef PUGS_HAS_MPI
+    MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>();
+
+    auto gather_address = (gather.size() > 0) ? &(gather[0]) : nullptr;
+
+    MPI_Gather(&data, 1, mpi_datatype, gather_address, 1, mpi_datatype, rank, MPI_COMM_WORLD);
+#else    // PUGS_HAS_MPI
+    gather[0] = data;
+#endif   // PUGS_HAS_MPI
+  }
+
+  template <template <typename... SendT> typename SendArrayType,
+            template <typename... RecvT>
+            typename RecvArrayType,
+            typename... SendT,
+            typename... RecvT>
+  void
+  _gather(const SendArrayType<SendT...>& data_array, RecvArrayType<RecvT...> gather_array, size_t rank) const
+  {
+    Assert(gather_array.size() == data_array.size() * m_size * (rank == m_rank));   // LCOV_EXCL_LINE
+
+    using SendDataType = typename SendArrayType<SendT...>::data_type;
+    using RecvDataType = typename RecvArrayType<RecvT...>::data_type;
+
+    static_assert(std::is_same_v<std::remove_const_t<SendDataType>, RecvDataType>);
+    static_assert(std::is_arithmetic_v<SendDataType>);
+
+#ifdef PUGS_HAS_MPI
+    MPI_Datatype mpi_datatype = Messenger::helper::mpiType<RecvDataType>();
+
+    auto data_address   = (data_array.size() > 0) ? &(data_array[0]) : nullptr;
+    auto gather_address = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr;
+
+    MPI_Gather(data_address, data_array.size(), mpi_datatype, gather_address, data_array.size(), mpi_datatype, rank,
+               MPI_COMM_WORLD);
+#else    // PUGS_HAS_MPI
+    value_copy(data_array, gather_array);
+#endif   // PUGS_HAS_MPI
+  }
+
+  template <template <typename... SendT> typename SendArrayType,
+            template <typename... RecvT>
+            typename RecvArrayType,
+            typename... SendT,
+            typename... RecvT>
+  void
+  _gatherVariable(const SendArrayType<SendT...>& data_array,
+                  RecvArrayType<RecvT...> gather_array,
+                  Array<int> sizes_array,
+                  size_t rank) const
+  {
+    Assert(gather_array.size() - sum(sizes_array) * (rank == m_rank) == 0);   // LCOV_EXCL_LINE
+
+    using SendDataType = typename SendArrayType<SendT...>::data_type;
+    using RecvDataType = typename RecvArrayType<RecvT...>::data_type;
+
+    static_assert(std::is_same_v<std::remove_const_t<SendDataType>, RecvDataType>);
+    static_assert(std::is_arithmetic_v<SendDataType>);
+
+#ifdef PUGS_HAS_MPI
+    MPI_Datatype mpi_datatype = Messenger::helper::mpiType<RecvDataType>();
+    Array<int> start_positions{sizes_array.size()};
+    if (start_positions.size() > 0) {
+      start_positions[0] = 0;
+      for (size_t i = 1; i < start_positions.size(); ++i) {
+        start_positions[i] = start_positions[i - 1] + sizes_array[i - 1];
+      }
+    }
+
+    auto data_address      = (data_array.size() > 0) ? &(data_array[0]) : nullptr;
+    auto sizes_address     = (sizes_array.size() > 0) ? &(sizes_array[0]) : nullptr;
+    auto positions_address = (start_positions.size() > 0) ? &(start_positions[0]) : nullptr;
+    auto gather_address    = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr;
+
+    MPI_Gatherv(data_address, data_array.size(), mpi_datatype, gather_address, sizes_address, positions_address,
+                mpi_datatype, rank, MPI_COMM_WORLD);
+#else    // PUGS_HAS_MPI
+    value_copy(data_array, gather_array);
+#endif   // PUGS_HAS_MPI
+  }
+
   template <typename DataType>
   void
   _allGather(const DataType& data, Array<DataType> gather) const
@@ -128,8 +216,51 @@ class Messenger
 #ifdef PUGS_HAS_MPI
     MPI_Datatype mpi_datatype = Messenger::helper::mpiType<RecvDataType>();
 
-    MPI_Allgather(&(data_array[0]), data_array.size(), mpi_datatype, &(gather_array[0]), data_array.size(),
-                  mpi_datatype, MPI_COMM_WORLD);
+    auto data_address   = (data_array.size() > 0) ? &(data_array[0]) : nullptr;
+    auto gather_address = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr;
+
+    MPI_Allgather(data_address, data_array.size(), mpi_datatype, gather_address, data_array.size(), mpi_datatype,
+                  MPI_COMM_WORLD);
+#else    // PUGS_HAS_MPI
+    value_copy(data_array, gather_array);
+#endif   // PUGS_HAS_MPI
+  }
+
+  template <template <typename... SendT> typename SendArrayType,
+            template <typename... RecvT>
+            typename RecvArrayType,
+            typename... SendT,
+            typename... RecvT>
+  void
+  _allGatherVariable(const SendArrayType<SendT...>& data_array,
+                     RecvArrayType<RecvT...> gather_array,
+                     Array<int> sizes_array) const
+  {
+    Assert(gather_array.size() == static_cast<size_t>(sum(sizes_array)));   // LCOV_EXCL_LINE
+
+    using SendDataType = typename SendArrayType<SendT...>::data_type;
+    using RecvDataType = typename RecvArrayType<RecvT...>::data_type;
+
+    static_assert(std::is_same_v<std::remove_const_t<SendDataType>, RecvDataType>);
+    static_assert(std::is_arithmetic_v<SendDataType>);
+
+#ifdef PUGS_HAS_MPI
+    MPI_Datatype mpi_datatype = Messenger::helper::mpiType<RecvDataType>();
+    Array<int> start_positions{sizes_array.size()};
+    if (start_positions.size() > 0) {
+      start_positions[0] = 0;
+      for (size_t i = 1; i < start_positions.size(); ++i) {
+        start_positions[i] = start_positions[i - 1] + sizes_array[i - 1];
+      }
+    }
+
+    auto data_address      = (data_array.size() > 0) ? &(data_array[0]) : nullptr;
+    auto sizes_address     = (sizes_array.size() > 0) ? &(sizes_array[0]) : nullptr;
+    auto positions_address = (start_positions.size() > 0) ? &(start_positions[0]) : nullptr;
+    auto gather_address    = (gather_array.size() > 0) ? &(gather_array[0]) : nullptr;
+
+    MPI_Allgatherv(data_address, data_array.size(), mpi_datatype, gather_address, sizes_address, positions_address,
+                   mpi_datatype, MPI_COMM_WORLD);
 #else    // PUGS_HAS_MPI
     value_copy(data_array, gather_array);
 #endif   // PUGS_HAS_MPI
@@ -158,8 +289,10 @@ class Messenger
     static_assert(std::is_arithmetic_v<DataType>);
 
 #ifdef PUGS_HAS_MPI
+    auto array_address = (array.size() > 0) ? &(array[0]) : nullptr;
+
     MPI_Datatype mpi_datatype = Messenger::helper::mpiType<DataType>();
-    MPI_Bcast(&(array[0]), array.size(), mpi_datatype, root_rank, MPI_COMM_WORLD);
+    MPI_Bcast(array_address, array.size(), mpi_datatype, root_rank, MPI_COMM_WORLD);
 #endif   // PUGS_HAS_MPI
   }
 
@@ -185,7 +318,10 @@ class Messenger
 
     MPI_Datatype mpi_datatype = Messenger::helper::mpiType<SendDataType>();
 
-    MPI_Alltoall(&(sent_array[0]), count, mpi_datatype, &(recv_array[0]), count, mpi_datatype, MPI_COMM_WORLD);
+    auto sent_address = (sent_array.size() > 0) ? &(sent_array[0]) : nullptr;
+    auto recv_address = (recv_array.size() > 0) ? &(recv_array[0]) : nullptr;
+
+    MPI_Alltoall(sent_address, count, mpi_datatype, recv_address, count, mpi_datatype, MPI_COMM_WORLD);
 #else    // PUGS_HAS_MPI
     value_copy(sent_array, recv_array);
 #endif   // PUGS_HAS_MPI
@@ -400,6 +536,83 @@ class Messenger
 #endif   // PUGS_HAS_MPI
   }
 
+  template <typename DataType>
+  PUGS_INLINE Array<DataType>
+  gather(const DataType& data, size_t rank) const
+  {
+    static_assert(not std::is_const_v<DataType>);
+
+    Array<DataType> gather_array((m_rank == rank) ? m_size : 0);
+
+    if constexpr (std::is_arithmetic_v<DataType>) {
+      _gather(data, gather_array, rank);
+    } else if constexpr (is_trivially_castable<DataType>) {
+      using CastType = helper::split_cast_t<DataType>;
+
+      CastArray cast_value_array  = cast_value_to<const CastType>::from(data);
+      CastArray cast_gather_array = cast_array_to<CastType>::from(gather_array);
+
+      _gather(cast_value_array, cast_gather_array, rank);
+    } else {
+      static_assert(is_false_v<DataType>, "unexpected type of data");
+    }
+    return gather_array;
+  }
+
+  template <typename DataType>
+  PUGS_INLINE Array<std::remove_const_t<DataType>>
+  gather(const Array<DataType>& array, size_t rank) const
+  {
+    using MutableDataType = std::remove_const_t<DataType>;
+    Array<MutableDataType> gather_array((m_rank == rank) ? (m_size * array.size()) : 0);
+
+    if constexpr (std::is_arithmetic_v<DataType>) {
+      _gather(array, gather_array, rank);
+    } else if constexpr (is_trivially_castable<DataType>) {
+      using CastType        = helper::split_cast_t<DataType>;
+      using MutableCastType = helper::split_cast_t<MutableDataType>;
+
+      CastArray cast_array        = cast_array_to<CastType>::from(array);
+      CastArray cast_gather_array = cast_array_to<MutableCastType>::from(gather_array);
+
+      _gather(cast_array, cast_gather_array, rank);
+    } else {
+      static_assert(is_false_v<DataType>, "unexpected type of data");
+    }
+    return gather_array;
+  }
+
+  template <typename DataType>
+  PUGS_INLINE Array<std::remove_const_t<DataType>>
+  gatherVariable(const Array<DataType>& array, size_t rank) const
+  {
+    int send_size          = array.size();
+    Array<int> sizes_array = gather(send_size, rank);
+
+    using MutableDataType = std::remove_const_t<DataType>;
+    Array<MutableDataType> gather_array(sum(sizes_array));
+
+    if constexpr (std::is_arithmetic_v<DataType>) {
+      _gatherVariable(array, gather_array, sizes_array, rank);
+    } else if constexpr (is_trivially_castable<DataType>) {
+      using CastType        = helper::split_cast_t<DataType>;
+      using MutableCastType = helper::split_cast_t<MutableDataType>;
+
+      int size_ratio = sizeof(DataType) / sizeof(CastType);
+      for (size_t i = 0; i < sizes_array.size(); ++i) {
+        sizes_array[i] *= size_ratio;
+      }
+
+      CastArray cast_array        = cast_array_to<CastType>::from(array);
+      CastArray cast_gather_array = cast_array_to<MutableCastType>::from(gather_array);
+
+      _gatherVariable(cast_array, cast_gather_array, sizes_array, rank);
+    } else {
+      static_assert(is_false_v<DataType>, "unexpected type of data");
+    }
+    return gather_array;
+  }
+
   template <typename DataType>
   PUGS_INLINE Array<DataType>
   allGather(const DataType& data) const
@@ -446,6 +659,37 @@ class Messenger
     return gather_array;
   }
 
+  template <typename DataType>
+  PUGS_INLINE Array<std::remove_const_t<DataType>>
+  allGatherVariable(const Array<DataType>& array) const
+  {
+    int send_size          = array.size();
+    Array<int> sizes_array = allGather(send_size);
+
+    using MutableDataType = std::remove_const_t<DataType>;
+    Array<MutableDataType> gather_array(sum(sizes_array));
+
+    if constexpr (std::is_arithmetic_v<DataType>) {
+      _allGatherVariable(array, gather_array, sizes_array);
+    } else if constexpr (is_trivially_castable<DataType>) {
+      using CastType        = helper::split_cast_t<DataType>;
+      using MutableCastType = helper::split_cast_t<MutableDataType>;
+
+      int size_ratio = sizeof(DataType) / sizeof(CastType);
+      for (size_t i = 0; i < sizes_array.size(); ++i) {
+        sizes_array[i] *= size_ratio;
+      }
+
+      CastArray cast_array        = cast_array_to<CastType>::from(array);
+      CastArray cast_gather_array = cast_array_to<MutableCastType>::from(gather_array);
+
+      _allGatherVariable(cast_array, cast_gather_array, sizes_array);
+    } else {
+      static_assert(is_false_v<DataType>, "unexpected type of data");
+    }
+    return gather_array;
+  }
+
   template <typename SendDataType>
   PUGS_INLINE Array<std::remove_const_t<SendDataType>>
   allToAll(const Array<SendDataType>& sent_array) const
@@ -624,6 +868,27 @@ allReduceSum(const DataType& data)
   return messenger().allReduceSum(data);
 }
 
+template <typename DataType>
+PUGS_INLINE Array<DataType>
+gather(const DataType& data, size_t rank)
+{
+  return messenger().gather(data, rank);
+}
+
+template <typename DataType>
+PUGS_INLINE Array<std::remove_const_t<DataType>>
+gather(const Array<DataType>& array, size_t rank)
+{
+  return messenger().gather(array, rank);
+}
+
+template <typename DataType>
+PUGS_INLINE Array<std::remove_const_t<DataType>>
+gatherVariable(const Array<DataType>& array, size_t rank)
+{
+  return messenger().gatherVariable(array, rank);
+}
+
 template <typename DataType>
 PUGS_INLINE Array<DataType>
 allGather(const DataType& data)
@@ -638,6 +903,13 @@ allGather(const Array<DataType>& array)
   return messenger().allGather(array);
 }
 
+template <typename DataType>
+PUGS_INLINE Array<std::remove_const_t<DataType>>
+allGatherVariable(const Array<DataType>& array)
+{
+  return messenger().allGatherVariable(array);
+}
+
 template <typename DataType>
 PUGS_INLINE Array<std::remove_const_t<DataType>>
 allToAll(const Array<DataType>& array)
diff --git a/src/utils/pugs_config.hpp.in b/src/utils/pugs_config.hpp.in
index de267d26e10fb41994bbc72438b57d577e4b447a..84a6d8fa0c2d50eb623d5d1c2b52765f5610de24 100644
--- a/src/utils/pugs_config.hpp.in
+++ b/src/utils/pugs_config.hpp.in
@@ -9,6 +9,8 @@
 #cmakedefine SYSTEM_IS_DARWIN
 #cmakedefine SYSTEM_IS_WINDOWS
 
+#cmakedefine PUGS_LITTLE_ENDIAN
+
 #define PUGS_BUILD_TYPE "@CMAKE_BUILD_TYPE@"
 #define PUGS_BINARY_DIR "@PUGS_BINARY_DIR@"
 
diff --git a/tests/test_Messenger.cpp b/tests/test_Messenger.cpp
index 22945c15e2e8ad244cdc25f27a40fdee7cba56f5..eebd5dc939c23c7875c3b92ff697cb1f00afca63 100644
--- a/tests/test_Messenger.cpp
+++ b/tests/test_Messenger.cpp
@@ -253,6 +253,372 @@ TEST_CASE("Messenger", "[mpi]")
     }
   }
 
+  SECTION("gather value to")
+  {
+    {
+      const size_t target_rank = 10 % parallel::size();
+      // simple type
+      size_t value{(3 + parallel::rank()) * 2};
+      Array<size_t> gather_array = parallel::gather(value, target_rank);
+      if (parallel::rank() == target_rank) {
+        REQUIRE(gather_array.size() == parallel::size());
+
+        for (size_t i = 0; i < gather_array.size(); ++i) {
+          REQUIRE((gather_array[i] == (3 + i) * 2));
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 7 % parallel::size();
+      // trivial simple type
+      mpi_check::integer value{static_cast<int>((3 + parallel::rank()) * 2)};
+      Array<mpi_check::integer> gather_array = parallel::gather(value, target_rank);
+      if (parallel::rank() == target_rank) {
+        REQUIRE(gather_array.size() == parallel::size());
+
+        for (size_t i = 0; i < gather_array.size(); ++i) {
+          const int expected_value = (3 + i) * 2;
+          REQUIRE(gather_array[i] == expected_value);
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 5 % parallel::size();
+      // compound trivial type
+      mpi_check::tri_int value{static_cast<int>((3 + parallel::rank()) * 2), static_cast<int>(2 + parallel::rank()),
+                               static_cast<int>(4 - parallel::rank())};
+      Array<mpi_check::tri_int> gather_array = parallel::gather(value, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        REQUIRE(gather_array.size() == parallel::size());
+        for (size_t i = 0; i < gather_array.size(); ++i) {
+          mpi_check::tri_int expected_value{static_cast<int>((3 + i) * 2), static_cast<int>(2 + i),
+                                            static_cast<int>(4 - i)};
+          REQUIRE((gather_array[i] == expected_value));
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+  }
+
+  SECTION("gather array to")
+  {
+    {
+      const size_t target_rank = 17 % parallel::size();
+      // simple type
+      Array<int> array(3);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (3 + parallel::rank()) * 2 + i;
+      }
+      Array<int> gather_array = parallel::gather(array, target_rank);
+      if (parallel::rank() == target_rank) {
+        REQUIRE(gather_array.size() == array.size() * parallel::size());
+
+        for (size_t i = 0; i < gather_array.size(); ++i) {
+          const int expected_value = (3 + i / array.size()) * 2 + (i % array.size());
+          REQUIRE((gather_array[i] == expected_value));
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 13 % parallel::size();
+      // trivial simple type
+      Array<mpi_check::integer> array(3);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (3 + parallel::rank()) * 2 + i;
+      }
+      Array<mpi_check::integer> gather_array = parallel::gather(array, target_rank);
+      if (parallel::rank() == target_rank) {
+        REQUIRE(gather_array.size() == array.size() * parallel::size());
+
+        for (size_t i = 0; i < gather_array.size(); ++i) {
+          const int expected_value = (3 + i / array.size()) * 2 + (i % array.size());
+          REQUIRE((gather_array[i] == expected_value));
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 11 % parallel::size();
+      // compound trivial type
+      Array<mpi_check::tri_int> array(3);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] =
+          mpi_check::tri_int{static_cast<int>((3 + parallel::rank()) * 2), static_cast<int>(2 + parallel::rank() + i),
+                             static_cast<int>(4 - parallel::rank() - i)};
+      }
+      Array<mpi_check::tri_int> gather_array = parallel::gather(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        REQUIRE(gather_array.size() == array.size() * parallel::size());
+        for (size_t i = 0; i < gather_array.size(); ++i) {
+          mpi_check::tri_int expected_value{static_cast<int>((3 + i / array.size()) * 2),
+                                            static_cast<int>(2 + i / array.size() + (i % array.size())),
+                                            static_cast<int>(4 - i / array.size() - (i % array.size()))};
+          REQUIRE((gather_array[i] == expected_value));
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+  }
+
+  SECTION("gather variable array to")
+  {
+    {
+      const size_t target_rank = 17 % parallel::size();
+      // simple type
+      Array<size_t> array(3 + parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (parallel::rank() + i) * 2 + i;
+      }
+      Array<size_t> gather_array = parallel::gatherVariable(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        const size_t gather_array_size = [&]() {
+          size_t size = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            size += (3 + i_rank);
+          }
+          return size;
+        }();
+
+        REQUIRE(gather_array.size() == gather_array_size);
+
+        {
+          size_t i = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            for (size_t j = 0; j < i_rank + 3; ++j) {
+              REQUIRE(gather_array[i++] == (i_rank + j) * 2 + j);
+            }
+          }
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 0;
+      // trivial simple type
+      Array<mpi_check::integer> array(2 + 2 * parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (3 + parallel::rank()) * 2 + i;
+      }
+      Array<mpi_check::integer> gather_array = parallel::gatherVariable(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        const size_t gather_array_size = [&]() {
+          size_t size = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            size += (2 + 2 * i_rank);
+          }
+          return size;
+        }();
+
+        REQUIRE(gather_array.size() == gather_array_size);
+
+        {
+          size_t i = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            for (size_t j = 0; j < 2 + 2 * i_rank; ++j) {
+              REQUIRE(gather_array[i++] == (3 + i_rank) * 2 + j);
+            }
+          }
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 13 % parallel::size();
+      // compound trivial type
+      Array<mpi_check::tri_int> array(3 * parallel::rank() + 1);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] =
+          mpi_check::tri_int{static_cast<int>((3 + parallel::rank()) * 2), static_cast<int>(2 + parallel::rank() + i),
+                             static_cast<int>(4 - parallel::rank() - i)};
+      }
+      Array<mpi_check::tri_int> gather_array = parallel::gatherVariable(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        const size_t gather_array_size = [&]() {
+          size_t size = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            size += (3 * i_rank + 1);
+          }
+          return size;
+        }();
+
+        REQUIRE(gather_array.size() == gather_array_size);
+
+        {
+          size_t i = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            for (size_t j = 0; j < 3 * i_rank + 1; ++j) {
+              auto value = mpi_check::tri_int{static_cast<int>((3 + i_rank) * 2), static_cast<int>(2 + i_rank + j),
+                                              static_cast<int>(4 - i_rank - j)};
+
+              REQUIRE(gather_array[i++] == value);
+            }
+          }
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+  }
+
+  SECTION("gather variable array to (with some empty)")
+  {
+    {
+      const size_t target_rank = 17 % parallel::size();
+      // simple type
+      Array<size_t> array(3 * parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (parallel::rank() + i) * 2 + i;
+      }
+      Array<size_t> gather_array = parallel::gatherVariable(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        const size_t gather_array_size = [&]() {
+          size_t size = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            size += (3 * i_rank);
+          }
+          return size;
+        }();
+
+        REQUIRE(gather_array.size() == gather_array_size);
+
+        {
+          size_t i = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            for (size_t j = 0; j < i_rank * 3; ++j) {
+              REQUIRE(gather_array[i++] == (i_rank + j) * 2 + j);
+            }
+          }
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 0;
+      // trivial simple type
+      Array<mpi_check::integer> array((parallel::rank() < parallel::size() - 1) ? (2 + 2 * parallel::rank()) : 0);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (3 + parallel::rank()) * 2 + i;
+      }
+      Array<mpi_check::integer> gather_array = parallel::gatherVariable(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        const size_t gather_array_size = [&]() {
+          size_t size = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            size += (i_rank < parallel::size() - 1) ? (2 + 2 * i_rank) : 0;
+          }
+          return size;
+        }();
+
+        REQUIRE(gather_array.size() == gather_array_size);
+
+        {
+          size_t i = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            for (size_t j = 0; j < ((i_rank < parallel::size() - 1) ? (2 + 2 * i_rank) : 0); ++j) {
+              REQUIRE(gather_array[i++] == (3 + i_rank) * 2 + j);
+            }
+          }
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+
+    {
+      const size_t target_rank = 13 % parallel::size();
+      // compound trivial type
+      Array<mpi_check::tri_int> array(3 * parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] =
+          mpi_check::tri_int{static_cast<int>((3 + parallel::rank()) * 2), static_cast<int>(2 + parallel::rank() + i),
+                             static_cast<int>(4 - parallel::rank() - i)};
+      }
+      Array<mpi_check::tri_int> gather_array = parallel::gatherVariable(array, target_rank);
+
+      if (parallel::rank() == target_rank) {
+        const size_t gather_array_size = [&]() {
+          size_t size = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            size += (3 * i_rank);
+          }
+          return size;
+        }();
+
+        REQUIRE(gather_array.size() == gather_array_size);
+
+        {
+          size_t i = 0;
+          for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+            for (size_t j = 0; j < 3 * i_rank; ++j) {
+              auto value = mpi_check::tri_int{static_cast<int>((3 + i_rank) * 2), static_cast<int>(2 + i_rank + j),
+                                              static_cast<int>(4 - i_rank - j)};
+
+              REQUIRE(gather_array[i++] == value);
+            }
+          }
+        }
+      } else {
+        REQUIRE(gather_array.size() == 0);
+      }
+    }
+  }
+
+  SECTION("gather variable array to (with all empty arrays)")
+  {
+    {
+      const size_t target_rank = 7 % parallel::size();
+      // simple type [empty arrays]
+      Array<size_t> array;
+      Array<size_t> gather_array = parallel::gatherVariable(array, target_rank);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+
+    {
+      const size_t target_rank = 0;
+      // trivial simple type
+      Array<mpi_check::integer> array;
+      Array<mpi_check::integer> gather_array = parallel::gatherVariable(array, target_rank);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+
+    {
+      const size_t target_rank = 13 % parallel::size();
+      // compound trivial type
+      Array<mpi_check::tri_int> array;
+      Array<mpi_check::tri_int> gather_array = parallel::gatherVariable(array, target_rank);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+  }
+
   SECTION("all gather value")
   {
     {
@@ -345,6 +711,242 @@ TEST_CASE("Messenger", "[mpi]")
     }
   }
 
+  SECTION("all gather empty array")
+  {
+    {
+      // simple type
+      Array<int> array;
+      Array<int> gather_array = parallel::allGather(array);
+      REQUIRE(gather_array.size() == 0);
+    }
+
+    {
+      // trivial simple type
+      Array<mpi_check::integer> array;
+      Array<mpi_check::integer> gather_array = parallel::allGather(array);
+      REQUIRE(gather_array.size() == 0);
+    }
+
+    {
+      // compound trivial type
+      Array<mpi_check::tri_int> array;
+      Array<mpi_check::tri_int> gather_array = parallel::allGather(array);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+  }
+
+  SECTION("all gather variable array")
+  {
+    {
+      // simple type
+      Array<size_t> array(3 + parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (parallel::rank() + i) * 2 + i;
+      }
+      Array<size_t> gather_array = parallel::allGatherVariable(array);
+
+      const size_t gather_array_size = [&]() {
+        size_t size = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          size += (3 + i_rank);
+        }
+        return size;
+      }();
+
+      REQUIRE(gather_array.size() == gather_array_size);
+
+      {
+        size_t i = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          for (size_t j = 0; j < i_rank + 3; ++j) {
+            REQUIRE(gather_array[i++] == (i_rank + j) * 2 + j);
+          }
+        }
+      }
+    }
+
+    {
+      // trivial simple type
+      Array<mpi_check::integer> array(2 + 2 * parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (3 + parallel::rank()) * 2 + i;
+      }
+      Array<mpi_check::integer> gather_array = parallel::allGatherVariable(array);
+
+      const size_t gather_array_size = [&]() {
+        size_t size = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          size += (2 + 2 * i_rank);
+        }
+        return size;
+      }();
+
+      REQUIRE(gather_array.size() == gather_array_size);
+
+      {
+        size_t i = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          for (size_t j = 0; j < 2 + 2 * i_rank; ++j) {
+            REQUIRE(gather_array[i++] == (3 + i_rank) * 2 + j);
+          }
+        }
+      }
+    }
+
+    {
+      // compound trivial type
+      Array<mpi_check::tri_int> array(3 * parallel::rank() + 1);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] =
+          mpi_check::tri_int{static_cast<int>((3 + parallel::rank()) * 2), static_cast<int>(2 + parallel::rank() + i),
+                             static_cast<int>(4 - parallel::rank() - i)};
+      }
+      Array<mpi_check::tri_int> gather_array = parallel::allGatherVariable(array);
+
+      const size_t gather_array_size = [&]() {
+        size_t size = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          size += (3 * i_rank + 1);
+        }
+        return size;
+      }();
+
+      REQUIRE(gather_array.size() == gather_array_size);
+
+      {
+        size_t i = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          for (size_t j = 0; j < 3 * i_rank + 1; ++j) {
+            auto value = mpi_check::tri_int{static_cast<int>((3 + i_rank) * 2), static_cast<int>(2 + i_rank + j),
+                                            static_cast<int>(4 - i_rank - j)};
+
+            REQUIRE(gather_array[i++] == value);
+          }
+        }
+      }
+    }
+  }
+
+  SECTION("all gather variable array to (with some empty)")
+  {
+    {
+      // simple type
+      Array<size_t> array(3 * parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (parallel::rank() + i) * 2 + i;
+      }
+      Array<size_t> gather_array = parallel::allGatherVariable(array);
+
+      const size_t gather_array_size = [&]() {
+        size_t size = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          size += (3 * i_rank);
+        }
+        return size;
+      }();
+
+      REQUIRE(gather_array.size() == gather_array_size);
+
+      {
+        size_t i = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          for (size_t j = 0; j < i_rank * 3; ++j) {
+            REQUIRE(gather_array[i++] == (i_rank + j) * 2 + j);
+          }
+        }
+      }
+    }
+
+    {
+      // trivial simple type
+      Array<mpi_check::integer> array((parallel::rank() < parallel::size() - 1) ? (2 + 2 * parallel::rank()) : 0);
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] = (3 + parallel::rank()) * 2 + i;
+      }
+      Array<mpi_check::integer> gather_array = parallel::allGatherVariable(array);
+
+      const size_t gather_array_size = [&]() {
+        size_t size = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          size += (i_rank < parallel::size() - 1) ? (2 + 2 * i_rank) : 0;
+        }
+        return size;
+      }();
+
+      REQUIRE(gather_array.size() == gather_array_size);
+
+      {
+        size_t i = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          for (size_t j = 0; j < ((i_rank < parallel::size() - 1) ? (2 + 2 * i_rank) : 0); ++j) {
+            REQUIRE(gather_array[i++] == (3 + i_rank) * 2 + j);
+          }
+        }
+      }
+    }
+
+    {
+      // compound trivial type
+      Array<mpi_check::tri_int> array(3 * parallel::rank());
+      for (size_t i = 0; i < array.size(); ++i) {
+        array[i] =
+          mpi_check::tri_int{static_cast<int>((3 + parallel::rank()) * 2), static_cast<int>(2 + parallel::rank() + i),
+                             static_cast<int>(4 - parallel::rank() - i)};
+      }
+      Array<mpi_check::tri_int> gather_array = parallel::allGatherVariable(array);
+
+      const size_t gather_array_size = [&]() {
+        size_t size = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          size += (3 * i_rank);
+        }
+        return size;
+      }();
+
+      REQUIRE(gather_array.size() == gather_array_size);
+
+      {
+        size_t i = 0;
+        for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+          for (size_t j = 0; j < 3 * i_rank; ++j) {
+            auto value = mpi_check::tri_int{static_cast<int>((3 + i_rank) * 2), static_cast<int>(2 + i_rank + j),
+                                            static_cast<int>(4 - i_rank - j)};
+
+            REQUIRE(gather_array[i++] == value);
+          }
+        }
+      }
+    }
+  }
+
+  SECTION("gather variable array to (with all empty arrays)")
+  {
+    {
+      // simple type [empty arrays]
+      Array<size_t> array;
+      Array<size_t> gather_array = parallel::allGatherVariable(array);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+
+    {
+      // trivial simple type
+      Array<mpi_check::integer> array;
+      Array<mpi_check::integer> gather_array = parallel::allGatherVariable(array);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+
+    {
+      // compound trivial type
+      Array<mpi_check::tri_int> array;
+      Array<mpi_check::tri_int> gather_array = parallel::allGatherVariable(array);
+
+      REQUIRE(gather_array.size() == 0);
+    }
+  }
+
   SECTION("all array exchanges")
   {
     {   // simple type