From b78a82cd6443ab41d846f1920748008709f646a1 Mon Sep 17 00:00:00 2001 From: Stephane Del Pino <stephane.delpino44@gmail.com> Date: Wed, 27 Feb 2019 18:30:53 +0100 Subject: [PATCH] Use few parallel_for instead of standard for This is mainly cosmetic. Performances issue are related to the recalculation of synchronization info. --- src/mesh/ItemValueSynchronizer.hpp | 49 ++++++++++++++++++------------ 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/mesh/ItemValueSynchronizer.hpp b/src/mesh/ItemValueSynchronizer.hpp index 4a2c9939d..6e73d5a2c 100644 --- a/src/mesh/ItemValueSynchronizer.hpp +++ b/src/mesh/ItemValueSynchronizer.hpp @@ -4,7 +4,7 @@ #include <ItemValue.hpp> #include <Connectivity.hpp> -#include <unordered_map> +#include <map> class ItemValueSynchronizer { @@ -18,15 +18,26 @@ class ItemValueSynchronizer { static_assert(not std::is_abstract_v<ConnectivityType>, "_synchronize must be called on a concrete connectivity"); - const auto& item_owner = connectivity.template owner<item_type>(); using ItemId = ItemIdT<item_type>; - std::vector<std::vector<ItemId>> ghost_items_per_proc(parallel::size()); - for (ItemId item_id=0; item_id<item_value.size(); ++item_id) { - if (const size_t owner = item_owner[item_id]; owner != parallel::rank()) { - ghost_items_per_proc[owner].emplace_back(item_id); - } - } + + const auto& item_owner = connectivity.template owner<item_type>(); + + std::vector<Array<const ItemId>> ghost_items_per_proc + = [&] () { + std::vector<std::vector<ItemId>> ghost_items_vector_per_proc(parallel::size()); + for (ItemId item_id=0; item_id<item_value.size(); ++item_id) { + if (const size_t owner = item_owner[item_id]; owner != parallel::rank()) { + ghost_items_vector_per_proc[owner].emplace_back(item_id); + } + } + std::vector<Array<const ItemId>> ghost_items_per_proc(parallel::size()); + for (size_t i_rank=0; i_rank<parallel::size(); ++i_rank) { + const auto& ghost_items_vector = ghost_items_vector_per_proc[i_rank]; + ghost_items_per_proc[i_rank] = convert_to_array(ghost_items_vector); + } + return ghost_items_per_proc; + }(); Array<unsigned int> local_number_of_requested_values(parallel::size()); for (size_t i_rank=0; i_rank<parallel::size(); ++i_rank) { @@ -41,9 +52,9 @@ class ItemValueSynchronizer for (size_t i_rank=0; i_rank<parallel::size(); ++i_rank) { const auto& ghost_items = ghost_items_per_proc[i_rank]; Array<int> item_number_list(ghost_items.size()); - for (size_t i_item = 0; i_item<ghost_items.size(); ++i_item) { - item_number_list[i_item] = item_number[ghost_items[i_item]]; - } + parallel_for (ghost_items.size(), PASTIS_LAMBDA(size_t i_item) { + item_number_list[i_item] = item_number[ghost_items[i_item]]; + }); requested_item_number_list_by_proc[i_rank] = item_number_list; } @@ -54,7 +65,7 @@ class ItemValueSynchronizer parallel::exchange(requested_item_number_list_by_proc, to_send_item_number_list_by_proc); - std::unordered_map<int, ItemId> item_number_to_id_correspondance(connectivity.template numberOf<item_type>()); + std::map<int, ItemId> item_number_to_id_correspondance; for (ItemId item_id=0; item_id<item_number.size(); ++item_id) { item_number_to_id_correspondance[item_number[item_id]] = item_id; } @@ -64,7 +75,7 @@ class ItemValueSynchronizer Array<ItemId> to_send_item_id{local_number_of_values_to_send[i_rank]}; const Array<int>& to_send_item_number = to_send_item_number_list_by_proc[i_rank]; for (size_t i=0; i<to_send_item_number.size(); ++i) { - to_send_item_id[i] = item_number_to_id_correspondance[to_send_item_number[i]]; + to_send_item_id[i] = item_number_to_id_correspondance.find(to_send_item_number[i])->second; } to_send_item_id_list_by_proc[i_rank] = to_send_item_id; } @@ -73,9 +84,9 @@ class ItemValueSynchronizer for (size_t i_rank=0; i_rank<parallel::size(); ++i_rank) { Array<DataType> to_send_data{local_number_of_values_to_send[i_rank]}; const Array<const ItemId>& to_send_item_id = to_send_item_id_list_by_proc[i_rank]; - for (size_t i=0; i<to_send_item_id.size(); ++i) { - to_send_data[i] = item_value[to_send_item_id[i]]; - } + parallel_for(to_send_item_id.size(), PASTIS_LAMBDA(size_t i) { + to_send_data[i] = item_value[to_send_item_id[i]]; + }); to_send_data_by_proc[i_rank] = to_send_data; } @@ -90,9 +101,9 @@ class ItemValueSynchronizer for (size_t i_rank=0; i_rank<parallel::size(); ++i_rank) { const auto& ghost_items = ghost_items_per_proc[i_rank]; const auto& requested_data = requested_data_list_by_proc[i_rank]; - for (size_t i=0; i<ghost_items.size(); ++i) { - item_value[ghost_items[i]] = requested_data[i]; - } + parallel_for(ghost_items.size(), PASTIS_LAMBDA(size_t i) { + item_value[ghost_items[i]] = requested_data[i]; + }); requested_data_list_by_proc[i_rank] = Array<DataType>{ghost_items.size()}; } } -- GitLab