From 7efc43fa21b83a00a7de311b56086c04047ba78e Mon Sep 17 00:00:00 2001
From: Stephane Del Pino <stephane.delpino44@gmail.com>
Date: Mon, 17 Feb 2025 16:59:09 +0100
Subject: [PATCH] Fix connectivity dispatcher when for already partitioned
 meshes

---
 src/mesh/ConnectivityDispatcher.cpp | 169 ++++++++++++++++++++++++----
 src/mesh/MeshBuilderBase.cpp        |  22 +---
 src/utils/ParMETISPartitioner.cpp   |  12 --
 3 files changed, 145 insertions(+), 58 deletions(-)

diff --git a/src/mesh/ConnectivityDispatcher.cpp b/src/mesh/ConnectivityDispatcher.cpp
index bda78f1ac..45e5ebd8a 100644
--- a/src/mesh/ConnectivityDispatcher.cpp
+++ b/src/mesh/ConnectivityDispatcher.cpp
@@ -5,9 +5,7 @@
 #include <utils/Partitioner.hpp>
 
 #include <iostream>
-
-#warning remove
-#include <utils/Demangle.hpp>
+#include <unordered_set>
 
 template <size_t Dimension>
 template <ItemType item_type>
@@ -84,7 +82,6 @@ ConnectivityDispatcher<Dimension>::_buildItemListToSend()
     const auto& cell_is_owned       = m_connectivity.cellIsOwned();
 
     const auto& cell_new_owner = this->_dispatchedInfo<ItemType::cell>().m_new_owner;
-    const auto& cell_number    = m_connectivity.cellNumber();
 
     std::vector<std::vector<CellId>> cell_vector_to_send_by_proc(parallel::size());
     Array<bool> send_to_rank(parallel::size());
@@ -120,6 +117,8 @@ ConnectivityDispatcher<Dimension>::_buildItemListToSend()
     const auto& cell_list_to_send_by_proc = this->_dispatchedInfo<ItemType::cell>().m_list_to_send_by_proc;
 
     const auto& item_is_owned = m_connectivity.template isOwned<item_type>();
+    const auto& item_owner    = m_connectivity.template owner<item_type>();
+    const auto& item_number   = m_connectivity.template number<item_type>();
 
     using ItemId                        = ItemIdT<item_type>;
     const auto& cell_to_sub_item_matrix = m_connectivity.template getItemToItemMatrix<ItemType::cell, item_type>();
@@ -127,18 +126,131 @@ ConnectivityDispatcher<Dimension>::_buildItemListToSend()
     auto& item_list_to_send_by_proc = this->_dispatchedInfo<item_type>().m_list_to_send_by_proc;
     item_list_to_send_by_proc.resize(parallel::size());
 
+    Array<bool> tag(m_connectivity.template numberOf<item_type>());
+
+    struct ToNumber
+    {
+      int to;
+      int number;
+      bool
+      operator==(const ToNumber& x) const
+      {
+        return to == x.to and number == x.number;
+      }
+      ToNumber() = default;
+      ToNumber(int _to, int _number) : to{_to}, number{_number} {}
+    };
+
+    struct hashFunction
+    {
+      size_t
+      operator()(const ToNumber& x) const
+      {
+        return x.to ^ x.number;
+      }
+    };
+
+    std::vector<std::unordered_set<ToNumber, hashFunction>> request_not_owned_to_send_by_proc(parallel::size());
+
+    for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+      tag.fill(false);
+      for (size_t i_cell = 0; i_cell < cell_list_to_send_by_proc[i_rank].size(); ++i_cell) {
+        const CellId& cell_id          = cell_list_to_send_by_proc[i_rank][i_cell];
+        const auto& cell_sub_item_list = cell_to_sub_item_matrix[cell_id];
+        for (size_t i_item = 0; i_item < cell_sub_item_list.size(); ++i_item) {
+          const ItemId& sub_item_id = cell_sub_item_list[i_item];
+          if (not item_is_owned[sub_item_id] and (not tag[sub_item_id])) {
+            request_not_owned_to_send_by_proc[item_owner[sub_item_id]].insert(
+              ToNumber{static_cast<int>(i_rank), item_number[sub_item_id]});
+            tag[sub_item_id] = true;
+          }
+        }
+      }
+    }
+
+    std::vector<Array<int>> not_owned_info_to_send_by_proc(parallel::size());
+    for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+      Array<int> info_to_send_by_proc(2 * request_not_owned_to_send_by_proc[i_rank].size());
+      size_t i = 0;
+      for (auto&& to_send_info : request_not_owned_to_send_by_proc[i_rank]) {
+        info_to_send_by_proc[i]     = to_send_info.to;
+        info_to_send_by_proc[i + 1] = to_send_info.number;
+        i += 2;
+      }
+      not_owned_info_to_send_by_proc[i_rank] = info_to_send_by_proc;
+    }
+
+    std::vector<Array<int>> number_of_not_owned_to_send_by_proc(parallel::size());
+    for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+      number_of_not_owned_to_send_by_proc[i_rank]    = Array<int>(1);
+      number_of_not_owned_to_send_by_proc[i_rank][0] = request_not_owned_to_send_by_proc[i_rank].size();
+    }
+
+    std::vector<Array<int>> number_of_possibly_item_to_send_by_proc(parallel::size());
+    for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+      number_of_possibly_item_to_send_by_proc[i_rank] = Array<int>(1);
+    }
+    parallel::exchange(number_of_not_owned_to_send_by_proc, number_of_possibly_item_to_send_by_proc);
+
+    std::vector<Array<int>> possible_info_to_send_demanded_by_rank(parallel::size());
+    for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+      possible_info_to_send_demanded_by_rank[i_rank] =
+        Array<int>(2 * number_of_possibly_item_to_send_by_proc[i_rank][0]);
+    }
+
+    parallel::exchange(not_owned_info_to_send_by_proc, possible_info_to_send_demanded_by_rank);
+
+    std::vector<std::vector<int>> possible_item_to_send_to_rank(parallel::size());
+    for (size_t i_demanding_rank = 0; i_demanding_rank < parallel::size(); ++i_demanding_rank) {
+      const auto& possible_info_to_send_to = possible_info_to_send_demanded_by_rank[i_demanding_rank];
+      for (size_t i = 0; i < possible_info_to_send_to.size(); i += 2) {
+        const int i_rank = possible_info_to_send_to[i];
+        const int number = possible_info_to_send_to[i + 1];
+        possible_item_to_send_to_rank[i_rank].push_back(number);
+      }
+    }
+
+    const bool has_possible_item_to_send = [&] {
+      for (auto&& item_to_send_list : possible_item_to_send_to_rank) {
+        if (item_to_send_list.size() > 0) {
+          return true;
+        }
+      }
+      return false;
+    }();
+
+    std::unordered_map<int, ItemId> owned_number_to_item_id_map;
+    if (has_possible_item_to_send) {
+      for (ItemId item_id = 0; item_id < m_connectivity.template numberOf<item_type>(); ++item_id) {
+        if (item_is_owned[item_id]) {
+          owned_number_to_item_id_map[item_number[item_id]] = item_id;
+        }
+      }
+    }
+
     for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
-      Array<bool> tag(m_connectivity.template numberOf<item_type>());
       tag.fill(false);
       std::vector<ItemId> item_id_vector;
-      for (size_t j = 0; j < cell_list_to_send_by_proc[i_rank].size(); ++j) {
-        const CellId& cell_id          = cell_list_to_send_by_proc[i_rank][j];
+      const auto& possible_item_to_send = possible_item_to_send_to_rank[i_rank];
+
+      for (auto&& possible_item_number : possible_item_to_send) {
+        const auto& searched_item_id = owned_number_to_item_id_map.find(possible_item_number);
+        Assert(searched_item_id != owned_number_to_item_id_map.end());
+        const ItemId item_id = searched_item_id->second;
+        if (not tag[item_id]) {
+          item_id_vector.push_back(item_id);
+          tag[item_id] = true;
+        }
+      }
+
+      for (size_t i_cell = 0; i_cell < cell_list_to_send_by_proc[i_rank].size(); ++i_cell) {
+        const CellId& cell_id          = cell_list_to_send_by_proc[i_rank][i_cell];
         const auto& cell_sub_item_list = cell_to_sub_item_matrix[cell_id];
-        for (size_t r = 0; r < cell_sub_item_list.size(); ++r) {
-          const ItemId& item_id = cell_sub_item_list[r];
-          if (item_is_owned[item_id] and (not tag[item_id])) {
-            item_id_vector.push_back(item_id);
-            tag[item_id] = true;
+        for (size_t i_item = 0; i_item < cell_sub_item_list.size(); ++i_item) {
+          const ItemId& sub_item_id = cell_sub_item_list[i_item];
+          if (item_is_owned[sub_item_id] and (not tag[sub_item_id])) {
+            item_id_vector.push_back(sub_item_id);
+            tag[sub_item_id] = true;
           }
         }
       }
@@ -364,6 +476,8 @@ ConnectivityDispatcher<Dimension>::_buildItemToSubItemDescriptor()
 
   const auto& item_list_to_recv_size_by_proc = this->_dispatchedInfo<item_type>().m_list_to_recv_size_by_proc;
 
+  const auto& recv_id_correspondance_by_proc = this->_dispatchedInfo<item_type>().m_recv_id_correspondance_by_proc;
+
   const auto& number_of_sub_item_per_item_to_recv_by_proc =
     this->_dispatchedInfo<ItemOfItemT>().m_number_of_sub_item_per_item_to_recv_by_proc;
 
@@ -373,7 +487,14 @@ ConnectivityDispatcher<Dimension>::_buildItemToSubItemDescriptor()
     this->_dispatchedInfo<ItemOfItemT>().m_sub_item_numbers_to_recv_by_proc;
 
   std::vector<std::vector<unsigned int>> item_to_subitem_legacy;
-  size_t number_of_node_by_cell = 0;
+  item_to_subitem_legacy.resize([&] {
+    size_t size = 0;
+    for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
+      size += item_list_to_recv_size_by_proc[i_rank];
+    }
+    return size;
+  }());
+  size_t number_of_subitem_by_item = 0;
   for (size_t i_rank = 0; i_rank < parallel::size(); ++i_rank) {
     int l = 0;
     for (size_t i = 0; i < item_list_to_recv_size_by_proc[i_rank]; ++i) {
@@ -383,14 +504,14 @@ ConnectivityDispatcher<Dimension>::_buildItemToSubItemDescriptor()
         Assert(searched_sub_item_id != sub_item_number_id_map.end());
         sub_item_vector.push_back(searched_sub_item_id->second);
       }
-      number_of_node_by_cell += sub_item_vector.size();
+      number_of_subitem_by_item += sub_item_vector.size();
 
-      item_to_subitem_legacy.emplace_back(sub_item_vector);
+      item_to_subitem_legacy[recv_id_correspondance_by_proc[i_rank][i]] = std::move(sub_item_vector);
     }
   }
 
   Array<unsigned int> item_to_subitem_row_map(item_to_subitem_legacy.size() + 1);
-  Array<unsigned int> item_to_subitem_list(number_of_node_by_cell);
+  Array<unsigned int> item_to_subitem_list(number_of_subitem_by_item);
 
   item_to_subitem_row_map[0] = 0;
   for (size_t i = 0; i < item_to_subitem_legacy.size(); ++i) {
@@ -469,11 +590,6 @@ ConnectivityDispatcher<Dimension>::_buildItemReferenceList()
   }();
 
   if (number_of_item_list_sender > 0) {
-    if (number_of_item_list_sender > 1) {
-      std::cerr << __FILE__ << ':' << __LINE__ << ": " << rang::fgB::red
-                << "need to check that knowing procs know the same item_ref_lists!" << rang::fg::reset << '\n';
-    }
-
     if (number_of_item_list_sender < parallel::size()) {
       const size_t sender_rank = [&]() {
         size_t i_rank = 0;
@@ -634,6 +750,10 @@ ConnectivityDispatcher<Dimension>::_dispatchEdges()
     this->_buildSubItemNumberToIdMap<EdgeOfCell>();
     this->_buildItemToExchangeLists<ItemType::edge>();
 
+    this->_buildNumberOfSubItemPerItemToRecvByProc<NodeOfEdge>();
+    this->_buildSubItemNumbersToRecvByProc<NodeOfEdge>();
+    this->_buildItemToSubItemDescriptor<NodeOfEdge>();
+
     m_new_descriptor.setEdgeNumberVector([&] {
       Array<int> edge_number_vector;
       this->_gatherFrom(m_connectivity.template number<ItemType::edge>(), edge_number_vector);
@@ -642,10 +762,6 @@ ConnectivityDispatcher<Dimension>::_dispatchEdges()
 
     this->_buildItemToSubItemDescriptor<EdgeOfCell>();
 
-    this->_buildNumberOfSubItemPerItemToRecvByProc<NodeOfEdge>();
-    this->_buildSubItemNumbersToRecvByProc<NodeOfEdge>();
-    this->_buildItemToSubItemDescriptor<NodeOfEdge>();
-
     this->_buildNumberOfSubItemPerItemToRecvByProc<EdgeOfFace>();
     this->_buildSubItemNumbersToRecvByProc<EdgeOfFace>();
     this->_buildItemToSubItemDescriptor<EdgeOfFace>();
@@ -716,12 +832,15 @@ ConnectivityDispatcher<Dimension>::ConnectivityDispatcher(const ConnectivityType
   }
 
   this->_buildNewOwner<ItemType::cell>();
+
   if constexpr (Dimension > 1) {
     this->_buildNewOwner<ItemType::face>();
   }
+
   if constexpr (Dimension > 2) {
     this->_buildNewOwner<ItemType::edge>();
   }
+
   this->_buildNewOwner<ItemType::node>();
 
   this->_buildItemToExchangeLists<ItemType::cell>();
diff --git a/src/mesh/MeshBuilderBase.cpp b/src/mesh/MeshBuilderBase.cpp
index 9bc0b5ddd..861050ed7 100644
--- a/src/mesh/MeshBuilderBase.cpp
+++ b/src/mesh/MeshBuilderBase.cpp
@@ -42,27 +42,7 @@ MeshBuilderBase::_dispatch()
   std::shared_ptr dispatched_connectivity = p_dispatcher->dispatchedConnectivity();
   NodeValue<Rd> dispatched_xr             = p_dispatcher->dispatch(mesh.xr());
 
-  std::cout << rang::fgB::magenta << "dispatched xr=" << dispatched_xr << rang::fg::reset << '\n';
-
-  auto dispatched_mesh = std::make_shared<const MeshType>(dispatched_connectivity, dispatched_xr);
-
-  {
-    const auto& d_connectivity = dispatched_mesh->connectivity();
-    auto cell_to_node_matrix   = d_connectivity.cellToNodeMatrix();
-    auto d_xr                  = dispatched_mesh->xr();
-    auto d_cell_number         = d_connectivity.cellNumber();
-
-    for (CellId cell_id = 0; cell_id < dispatched_mesh->numberOfCells(); ++cell_id) {
-      std::cout << cell_id << "(" << d_cell_number[cell_id] << "):\n";
-      const auto cell_nodes = cell_to_node_matrix[cell_id];
-      for (size_t i_node = 0; i_node < cell_nodes.size(); ++i_node) {
-        const NodeId node_id = cell_nodes[i_node];
-        std::cout << i_node << ": " << d_xr[node_id] << '\n';
-      }
-    }
-  }
-  m_mesh = std::make_shared<MeshVariant>(dispatched_mesh);
-  //  m_mesh = std::make_shared<MeshVariant>(std::make_shared<const MeshType>(dispatched_connectivity, dispatched_xr));
+  m_mesh = std::make_shared<MeshVariant>(std::make_shared<const MeshType>(dispatched_connectivity, dispatched_xr));
 }
 
 template void MeshBuilderBase::_dispatch<1>();
diff --git a/src/utils/ParMETISPartitioner.cpp b/src/utils/ParMETISPartitioner.cpp
index 7c7d81ca7..d67e38d1b 100644
--- a/src/utils/ParMETISPartitioner.cpp
+++ b/src/utils/ParMETISPartitioner.cpp
@@ -77,12 +77,6 @@ ParMETISPartitioner::partition(const CRSGraph& graph)
     int* entries_ptr   = const_cast<int*>(&(entries[0]));
     int* neighbors_ptr = const_cast<int*>(&(neighbors[0]));
 
-    if (group_size > 1) {
-#warning remove
-      std::cout << parallel::rank() << ": entries " << graph.entries() << "\n";
-      std::cout << parallel::rank() << ": neighbors " << graph.neighbors() << "\n";
-    }
-
     int result =
       ParMETIS_V3_PartKway(&(vtxdist[0]), entries_ptr, neighbors_ptr, NULL, NULL, &wgtflag, &numflag, &ncon, &npart,
                            &(tpwgts[0]), &(ubvec[0]), &(options[0]), &edgecut, &(partition[0]), &partitioning_comm);
@@ -90,12 +84,6 @@ ParMETISPartitioner::partition(const CRSGraph& graph)
     if (result == METIS_ERROR) {
       throw UnexpectedError("Metis Error");
     }
-
-    if (group_size > 1) {
-#warning remove
-      std::cout << parallel::rank() << ": partition " << partition << "\n";
-    }
-
     // LCOV_EXCL_STOP
   }
 
-- 
GitLab