diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml index f6a8f22308c6bdfc12fb79582755e7f0bf59337a..dba3a70745ce57d91e788e92fefd66a915ae0826 100644 --- a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml +++ b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml @@ -13,24 +13,32 @@ jobs: distro: ['fedora:latest', 'fedora:rawhide', 'ubuntu:latest'] cxx: ['g++', 'clang++'] cmake_build_type: ['Release', 'Debug'] - openmp: ['ON'] + backend: ['OPENMP'] include: - distro: 'fedora:intel' cxx: 'icpc' cmake_build_type: 'Release' - openmp: 'ON' + backend: 'OPENMP' - distro: 'fedora:intel' cxx: 'icpc' cmake_build_type: 'Debug' - openmp: 'ON' + backend: 'OPENMP' - distro: 'fedora:intel' cxx: 'icpx' cmake_build_type: 'Release' - openmp: 'ON' + backend: 'OPENMP' - distro: 'fedora:intel' cxx: 'icpx' cmake_build_type: 'Debug' - openmp: 'ON' + backend: 'OPENMP' + - distro: 'ubuntu:latest' + cxx: 'clang++' + cmake_build_type: 'RelWithDebInfo' + backend: 'THREADS' + - distro: 'ubuntu:latest' + cxx: 'g++' + cmake_build_type: 'RelWithDebInfo' + backend: 'THREADS' runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} @@ -54,8 +62,9 @@ jobs: run: | cmake -B builddir \ -DCMAKE_INSTALL_PREFIX=/usr \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_HWLOC=ON \ - -DKokkos_ENABLE_OPENMP=${{ matrix.openmp }} \ + -DKokkos_ENABLE_${{ matrix.backend }}=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo index 6b9388486be0998ff3de0c89d365e3f245d0231a..91e0f8daac3d6ca34dc5bde954f56a1d8ea44f86 100644 --- a/packages/kokkos/.gitrepo +++ b/packages/kokkos/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:kokkos/kokkos.git branch = master - commit = b52f8c835f4df003954dad66d9761094f8baa66c - parent = 1994bb4f069142aa3f886d30aa0585ed9117eed1 + commit = d19aab9981a2c447e832a7b4eb7b16992328fb14 + parent = a64ea7589ca011edd41ad9a3468d091cd093430c method = merge cmdver = 0.4.3 diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index dfbe22eddefef9d634aeb21ccde99d3f0d1a7628..a908507704ed725fd8033aaa4da83d439d659178 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,21 @@ # Change Log +## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01) + +### Bug Fixes: +- Fix Threads: Fix serial resizing scratch space (3.6.01 cherry-pick) [\#5109](https://github.com/kokkos/kokkos/pull/5109) +- Fix ScatterMin/ScatterMax to use proper atomics (3.6.01 cherry-pick) [\#5046](https://github.com/kokkos/kokkos/pull/5046) +- Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907) +- Fix bounds errors with Kokkos::sort [\#4980](https://github.com/kokkos/kokkos/pull/4980) +- Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872) +- Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893) +- Fixed an incorrect struct initialization [\#5028](https://github.com/kokkos/kokkos/pull/5028) +- Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008) +- Avoid deprecation warnings with `OpenMPExec::validate_partition` [\#4982](https://github.com/kokkos/kokkos/pull/4982) +- Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024) + + ## [3.6.00](https://github.com/kokkos/kokkos/tree/3.6.00) (2022-02-18) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.5.00...3.6.00) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index e1c6893725eab7cd75ca388863abf6a146d07f93..b0a54118a0a52482c4670d72f28c133e5b32ef47 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -136,7 +136,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MINOR 6) -set(Kokkos_VERSION_PATCH 00) +set(Kokkos_VERSION_PATCH 01) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index b1afed5d0663a9f70a0896236dd08b71fdf90c2e..10c4bc46300ab9459815b7e4b5282f04150bcabc 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 3 KOKKOS_VERSION_MINOR = 6 -KOKKOS_VERSION_PATCH = 00 +KOKKOS_VERSION_PATCH = 01 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial diff --git a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp index cde5e6857e59e2b14a33d4b30dd6c8650f1143d8..ce97de9b7dfb1f97116a18c91c714433e451eec8 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -422,54 +422,34 @@ class BinSort { template <class KeyViewType> struct BinOp1D { - int max_bins_; - double mul_; - typename KeyViewType::const_value_type range_; - typename KeyViewType::const_value_type min_; + int max_bins_ = {}; + double mul_ = {}; + double min_ = {}; - BinOp1D() - : max_bins_(0), - mul_(0.0), - range_(typename KeyViewType::const_value_type()), - min_(typename KeyViewType::const_value_type()) {} + BinOp1D() = default; // Construct BinOp with number of bins, minimum value and maxuimum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max) : max_bins_(max_bins__ + 1), - // Cast to int64_t to avoid possible overflow when using integer - mul_(std::is_integral<typename KeyViewType::const_value_type>::value - ? 1.0 * max_bins__ / (int64_t(max) - int64_t(min)) - : 1.0 * max_bins__ / (max - min)), - range_(max - min), - min_(min) { + // Cast to double to avoid possible overflow when using integer + mul_(static_cast<double>(max_bins__) / + (static_cast<double>(max) - static_cast<double>(min))), + min_(static_cast<double>(min)) { // For integral types the number of bins may be larger than the range // in which case we can exactly have one unique value per bin // and then don't need to sort bins. if (std::is_integral<typename KeyViewType::const_value_type>::value && - static_cast<uint64_t>(range_) <= static_cast<uint64_t>(max_bins__)) { + (static_cast<double>(max) - static_cast<double>(min)) <= + static_cast<double>(max_bins__)) { mul_ = 1.; } } // Determine bin index from key value - template < - class ViewType, - std::enable_if_t<!std::is_integral<typename ViewType::value_type>::value, - bool> = true> - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return int(mul_ * (keys(i) - min_)); - } - - // Determine bin index from key value - template < - class ViewType, - std::enable_if_t<std::is_integral<typename ViewType::value_type>::value, - bool> = true> + template <class ViewType> KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - // The cast to int64_t is necessary because otherwise HIP returns the wrong - // result. - return int(mul_ * (int64_t(keys(i)) - int64_t(min_))); + return static_cast<int>(mul_ * (static_cast<double>(keys(i)) - min_)); } // Return maximum bin index + 1 @@ -486,10 +466,9 @@ struct BinOp1D { template <class KeyViewType> struct BinOp3D { - int max_bins_[3]; - double mul_[3]; - typename KeyViewType::non_const_value_type range_[3]; - typename KeyViewType::non_const_value_type min_[3]; + int max_bins_[3] = {}; + double mul_[3] = {}; + double min_[3] = {}; BinOp3D() = default; @@ -498,15 +477,15 @@ struct BinOp3D { max_bins_[0] = max_bins__[0]; max_bins_[1] = max_bins__[1]; max_bins_[2] = max_bins__[2]; - mul_[0] = 1.0 * max_bins__[0] / (max[0] - min[0]); - mul_[1] = 1.0 * max_bins__[1] / (max[1] - min[1]); - mul_[2] = 1.0 * max_bins__[2] / (max[2] - min[2]); - range_[0] = max[0] - min[0]; - range_[1] = max[1] - min[1]; - range_[2] = max[2] - min[2]; - min_[0] = min[0]; - min_[1] = min[1]; - min_[2] = min[2]; + mul_[0] = static_cast<double>(max_bins__[0]) / + (static_cast<double>(max[0]) - static_cast<double>(min[0])); + mul_[1] = static_cast<double>(max_bins__[1]) / + (static_cast<double>(max[1]) - static_cast<double>(min[1])); + mul_[2] = static_cast<double>(max_bins__[2]) / + (static_cast<double>(max[2]) - static_cast<double>(min[2])); + min_[0] = static_cast<double>(min[0]); + min_[1] = static_cast<double>(min[1]); + min_[2] = static_cast<double>(min[2]); } template <class ViewType> @@ -596,9 +575,9 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( // TODO: figure out better max_bins then this ... int64_t max_bins = view.extent(0) / 2; if (std::is_integral<typename ViewType::non_const_value_type>::value) { - // Cast to int64_t to avoid possible overflow when using integer - int64_t const max_val = result.max_val; - int64_t const min_val = result.min_val; + // Cast to double to avoid possible overflow when using integer + auto const max_val = static_cast<double>(result.max_val); + auto const min_val = static_cast<double>(result.min_val); // using 10M as the cutoff for special behavior (roughly 40MB for the count // array) if ((max_val - min_val) < 10000000) { @@ -606,6 +585,10 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( sort_in_bins = false; } } + if (std::is_floating_point<typename ViewType::non_const_value_type>::value) { + KOKKOS_ASSERT(std::isfinite(static_cast<double>(result.max_val) - + static_cast<double>(result.min_val))); + } BinSort<ViewType, CompType> bin_sort( view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); diff --git a/packages/kokkos/algorithms/unit_tests/TestSort.hpp b/packages/kokkos/algorithms/unit_tests/TestSort.hpp index a03847f2b26d84946e389aabbd496abaf8ec2d5c..9108731c15800fdd5c95a7cc8b7ae751dfa1272d 100644 --- a/packages/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestSort.hpp @@ -353,6 +353,55 @@ void test_issue_1160_impl() { } } +template <class ExecutionSpace> +void test_issue_4978_impl() { + Kokkos::View<long long*, ExecutionSpace> element_("element", 9); + + auto h_element = Kokkos::create_mirror_view(element_); + + h_element(0) = LLONG_MIN; + h_element(1) = 0; + h_element(2) = 3; + h_element(3) = 2; + h_element(4) = 1; + h_element(5) = 3; + h_element(6) = 6; + h_element(7) = 4; + h_element(8) = 3; + + ExecutionSpace exec; + Kokkos::deep_copy(exec, element_, h_element); + + Kokkos::sort(exec, element_); + + Kokkos::deep_copy(exec, h_element, element_); + exec.fence(); + + ASSERT_EQ(h_element(0), LLONG_MIN); + ASSERT_EQ(h_element(1), 0); + ASSERT_EQ(h_element(2), 1); + ASSERT_EQ(h_element(3), 2); + ASSERT_EQ(h_element(4), 3); + ASSERT_EQ(h_element(5), 3); + ASSERT_EQ(h_element(6), 3); + ASSERT_EQ(h_element(7), 4); + ASSERT_EQ(h_element(8), 6); +} + +template <class ExecutionSpace, class T> +void test_sort_integer_overflow() { + // array with two extrema in reverse order to expose integer overflow bug in + // bin calculation + T a[2] = {Kokkos::Experimental::finite_max<T>::value, + Kokkos::Experimental::finite_min<T>::value}; + auto vd = Kokkos::create_mirror_view_and_copy( + ExecutionSpace(), Kokkos::View<T[2], Kokkos::HostSpace>(a)); + Kokkos::sort(vd, /*force using Kokkos bin sort*/ true); + auto vh = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vd); + EXPECT_TRUE(std::is_sorted(vh.data(), vh.data() + 2)) + << "view (" << vh[0] << ", " << vh[1] << ") is not sorted"; +} + //---------------------------------------------------------------------------- template <class ExecutionSpace, typename KeyType> @@ -376,6 +425,11 @@ void test_issue_1160_sort() { test_issue_1160_impl<ExecutionSpace>(); } +template <class ExecutionSpace> +void test_issue_4978_sort() { + test_issue_4978_impl<ExecutionSpace>(); +} + template <class ExecutionSpace, typename KeyType> void test_sort(unsigned int N) { test_1D_sort<ExecutionSpace, KeyType>(N); @@ -385,6 +439,10 @@ void test_sort(unsigned int N) { test_dynamic_view_sort<ExecutionSpace, KeyType>(N); #endif test_issue_1160_sort<ExecutionSpace>(); + test_issue_4978_sort<ExecutionSpace>(); + test_sort_integer_overflow<ExecutionSpace, long long>(); + test_sort_integer_overflow<ExecutionSpace, unsigned long long>(); + test_sort_integer_overflow<ExecutionSpace, int>(); } } // namespace Impl } // namespace Test diff --git a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp index 024b4618a46c23b2aee0ff18e0f5d34b24cf8267..e4dd9531fc3555299fdafedf447b585f8e5cd0b2 100644 --- a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp +++ b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp @@ -369,18 +369,6 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType, Kokkos::atomic_div(&value, rhs); } - KOKKOS_FORCEINLINE_FUNCTION - void atomic_prod(ValueType& dest, const ValueType& src) const { - bool success = false; - while (!success) { - ValueType dest_old = dest; - ValueType dest_new = dest_old * src; - dest_new = - Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new); - success = ((dest_new - dest_old) / dest_old <= 1e-15); - } - } - KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { atomic_prod(&dest, src); @@ -440,21 +428,9 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType, KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value(other.value) {} - KOKKOS_FORCEINLINE_FUNCTION - void atomic_min(ValueType& dest, const ValueType& src) const { - bool success = false; - while (!success) { - ValueType dest_old = dest; - ValueType dest_new = (dest_old > src) ? src : dest_old; - dest_new = - Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new); - success = ((dest_new - dest_old) / dest_old <= 1e-15); - } - } - KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { - atomic_min(dest, src); + atomic_min(&dest, src); } KOKKOS_INLINE_FUNCTION @@ -511,21 +487,9 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType, KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value(other.value) {} - KOKKOS_FORCEINLINE_FUNCTION - void atomic_max(ValueType& dest, const ValueType& src) const { - bool success = false; - while (!success) { - ValueType dest_old = dest; - ValueType dest_new = (dest_old < src) ? src : dest_old; - dest_new = - Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new); - success = ((dest_new - dest_old) / dest_old <= 1e-15); - } - } - KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { - atomic_max(dest, src); + atomic_max(&dest, src); } KOKKOS_INLINE_FUNCTION diff --git a/packages/kokkos/containers/src/Kokkos_Vector.hpp b/packages/kokkos/containers/src/Kokkos_Vector.hpp index 88721bd89eb2fd86543c480727876a58fd888a56..eddb87800321fd0eeac446eb6a37db6b59d6e8ca 100644 --- a/packages/kokkos/containers/src/Kokkos_Vector.hpp +++ b/packages/kokkos/containers/src/Kokkos_Vector.hpp @@ -162,7 +162,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> { } DV::sync_host(); DV::modify_host(); - if (it < begin() || it > end()) + if (std::less<>()(it, begin()) || std::less<>()(end(), it)) Kokkos::abort("Kokkos::vector::insert : invalid insert iterator"); if (count == 0) return it; ptrdiff_t start = std::distance(begin(), it); @@ -189,27 +189,21 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> { iterator>::type insert(iterator it, InputIterator b, InputIterator e) { ptrdiff_t count = std::distance(b, e); - if (count == 0) return it; DV::sync_host(); DV::modify_host(); - if (it < begin() || it > end()) + if (std::less<>()(it, begin()) || std::less<>()(end(), it)) Kokkos::abort("Kokkos::vector::insert : invalid insert iterator"); - bool resized = false; - if ((size() == 0) && (it == begin())) { - resize(count); - it = begin(); - resized = true; - } ptrdiff_t start = std::distance(begin(), it); auto org_size = size(); - if (!resized) resize(size() + count); - it = begin() + start; + + // Note: resize(...) invalidates it; use begin() + start instead + resize(size() + count); std::copy_backward(begin() + start, begin() + org_size, begin() + org_size + count); - std::copy(b, e, it); + std::copy(b, e, begin() + start); return begin() + start; } diff --git a/packages/kokkos/containers/unit_tests/TestVector.hpp b/packages/kokkos/containers/unit_tests/TestVector.hpp index 57b92c38f81798331ef183facc76a99bbe45e0be..c093c7b0c9d8b897c2cec9d6f7e8330501f021f2 100644 --- a/packages/kokkos/containers/unit_tests/TestVector.hpp +++ b/packages/kokkos/containers/unit_tests/TestVector.hpp @@ -172,6 +172,23 @@ struct test_vector_insert { run_test(a); check_test(a, size); } + { test_vector_insert_into_empty(size); } + } + + void test_vector_insert_into_empty(const size_t size) { + using Vector = Kokkos::vector<Scalar, Device>; + { + Vector a; + Vector b(size); + a.insert(a.begin(), b.begin(), b.end()); + ASSERT_EQ(a.size(), size); + } + + { + Vector c; + c.insert(c.begin(), size, Scalar{}); + ASSERT_EQ(c.size(), size); + } } }; diff --git a/packages/kokkos/core/src/CMakeLists.txt b/packages/kokkos/core/src/CMakeLists.txt index 88cca93f3cda0939bfa6effaf6f25d971cef9cbc..793e07a84153f29334655c0284602b0677f7feb3 100644 --- a/packages/kokkos/core/src/CMakeLists.txt +++ b/packages/kokkos/core/src/CMakeLists.txt @@ -8,6 +8,7 @@ KOKKOS_INCLUDE_DIRECTORIES( INSTALL (DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN desul/src EXCLUDE PATTERN "*.inc" PATTERN "*.inc_*" diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index 294be2774b8f93edbeba3893b62e978f08ff28d9..aaa9ea8ad4f86fad749e89b379dc4e52428e6518 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -1007,6 +1007,15 @@ void CudaSpaceInitializer::print_configuration(std::ostream &msg, } } // namespace Impl + +#ifdef KOKKOS_ENABLE_CXX14 +namespace Tools { +namespace Experimental { +constexpr DeviceType DeviceTypeTraits<Cuda>::id; +} +} // namespace Tools +#endif + } // namespace Kokkos #else diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index 61563a01007f68547a5a4432f36f04ada6fa67f2..dec6ef15e13c01b2778b78fc91a5ed53f200ae38 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -139,7 +139,7 @@ struct CudaLDGFetch { template <typename iType> KOKKOS_INLINE_FUNCTION ValueType operator[](const iType& i) const { -#if defined(__CUDA_ARCH__) && (350 <= _CUDA_ARCH__) +#if defined(__CUDA_ARCH__) && (350 <= __CUDA_ARCH__) AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i])); return *(reinterpret_cast<ValueType*>(&v)); #else diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp index 4a6a3ba99ebd7e6850cf117a787f33cf5d1d49f2..a8a0496afebc8776ef99ba4195d6b7b43a58a497 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -132,7 +132,8 @@ void HIPInternal::print_configuration(std::ostream &s) const { s << "macro KOKKOS_ENABLE_HIP : defined" << '\n'; #if defined(HIP_VERSION) s << "macro HIP_VERSION = " << HIP_VERSION << " = version " - << HIP_VERSION / 100 << "." << HIP_VERSION % 100 << '\n'; + << HIP_VERSION_MAJOR << '.' << HIP_VERSION_MINOR << '.' << HIP_VERSION_PATCH + << '\n'; #endif for (int i = 0; i < dev_info.m_hipDevCount; ++i) { @@ -467,7 +468,6 @@ void HIPInternal::finalize() { } char *HIPInternal::get_next_driver(size_t driverTypeSize) const { - std::lock_guard<std::mutex> const lock(m_mutexWorkArray); if (d_driverWorkArray == nullptr) { KOKKOS_IMPL_HIP_SAFE_CALL( hipHostMalloc(&d_driverWorkArray, diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp index 384b7ffd67e488b727d47e0831da4e84e74b3fc3..70b979e00abd6775904b900899eb3200a8c00581 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp @@ -490,6 +490,8 @@ struct HIPParallelLaunch< KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE(); + std::lock_guard<std::mutex> const lock(hip_instance->m_mutexWorkArray); + // Invoke the driver function on the device DriverType *d_driver = reinterpret_cast<DriverType *>( hip_instance->get_next_driver(sizeof(DriverType))); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp index f334d934123103edb1212e5ab385aebdaf158504..e9cfbf99f7cf11a3773a18b4637363e42a377888 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp @@ -56,8 +56,7 @@ namespace Kokkos { #ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE namespace Impl { -__device__ __constant__ HIPLockArrays g_device_hip_lock_arrays = {nullptr, - nullptr, 0}; +__device__ __constant__ HIPLockArrays g_device_hip_lock_arrays = {nullptr, 0}; } #endif diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp index 6ade677fa89f9e778eb5965d3ae5ae8b13cce711..776b7c6abea81e75ac11497e4447409478c64ce4 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp @@ -464,6 +464,15 @@ void HIPSpaceInitializer::print_configuration(std::ostream& msg, } } // namespace Impl + +#ifdef KOKKOS_ENABLE_CXX14 +namespace Tools { +namespace Experimental { +constexpr DeviceType DeviceTypeTraits<Kokkos::Experimental::HIP>::id; +} +} // namespace Tools +#endif + } // namespace Kokkos //============================================================================== diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp index acf2224f027de3abff1ab0cc84787c0b0ccfc255..623c7da02569c4763f5d830fea9f2b7cf75855ff 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp @@ -199,6 +199,15 @@ void HPXSpaceInitializer::print_configuration(std::ostream &msg, } } // namespace Impl + +#ifdef KOKKOS_ENABLE_CXX14 +namespace Tools { +namespace Experimental { +constexpr DeviceType DeviceTypeTraits<Kokkos::Experimental::HPX>::id; +} +} // namespace Tools +#endif + } // namespace Kokkos #else diff --git a/packages/kokkos/core/src/Kokkos_Cuda.hpp b/packages/kokkos/core/src/Kokkos_Cuda.hpp index 6305a1fa5d2f21ea9ae9e996a7415a7ee6b6a239..0063b1cd1ee865f38bb556ff9653688e575fc546 100644 --- a/packages/kokkos/core/src/Kokkos_Cuda.hpp +++ b/packages/kokkos/core/src/Kokkos_Cuda.hpp @@ -260,6 +260,7 @@ template <> struct DeviceTypeTraits<Cuda> { /// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling static constexpr DeviceType id = DeviceType::Cuda; + static int device_id(const Cuda& exec) { return exec.cuda_device(); } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp index 1371d21d388b681a1327dc6330121686c95d165d..68869a6074369e9e15b6c7743a5bbc572ad38a96 100644 --- a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp +++ b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp @@ -571,6 +571,9 @@ namespace Experimental { template <> struct DeviceTypeTraits<Kokkos::Experimental::HIP> { static constexpr DeviceType id = DeviceType::HIP; + static int device_id(const Kokkos::Experimental::HIP& exec) { + return exec.hip_device(); + } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_HPX.hpp b/packages/kokkos/core/src/Kokkos_HPX.hpp index d2ae9c0ec2b3f755f21cc15b3614df8e1f23ed88..9238ca30a7e4260a740c772b9736f0ba9621c19a 100644 --- a/packages/kokkos/core/src/Kokkos_HPX.hpp +++ b/packages/kokkos/core/src/Kokkos_HPX.hpp @@ -500,6 +500,7 @@ namespace Experimental { template <> struct DeviceTypeTraits<Kokkos::Experimental::HPX> { static constexpr DeviceType id = DeviceType::HPX; + static int device_id(const Kokkos::Experimental::HPX &) { return 0; } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_OpenMP.hpp b/packages/kokkos/core/src/Kokkos_OpenMP.hpp index 5d76e689f21b2809a47b262c7ed7485e9116d164..767e5b9324487e2e050828201c1eae6fcbb296d2 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMP.hpp @@ -179,6 +179,7 @@ namespace Experimental { template <> struct DeviceTypeTraits<OpenMP> { static constexpr DeviceType id = DeviceType::OpenMP; + static int device_id(const OpenMP&) { return 0; } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp index f394f3240832a67f22e4056fa27e33500b38178c..373dc3d9c7bc22fb6e3b539c232e8ebfad7bb9bc 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp @@ -130,6 +130,9 @@ template <> struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> { static constexpr DeviceType id = ::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget; + static int device_id(const Kokkos::Experimental::OpenMPTarget&) { + return omp_get_default_device(); + } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_SYCL.hpp b/packages/kokkos/core/src/Kokkos_SYCL.hpp index 02095ff7b3f01f7558d8e154d1ebc49ff46d1241..e29093db32953b09e5b98541ec3078a46313a885 100644 --- a/packages/kokkos/core/src/Kokkos_SYCL.hpp +++ b/packages/kokkos/core/src/Kokkos_SYCL.hpp @@ -182,6 +182,9 @@ template <> struct DeviceTypeTraits<Kokkos::Experimental::SYCL> { /// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling static constexpr DeviceType id = DeviceType::SYCL; + static int device_id(const Kokkos::Experimental::SYCL& exec) { + return exec.sycl_device(); + } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_Serial.hpp b/packages/kokkos/core/src/Kokkos_Serial.hpp index 9aada48bf607231731895dd96be8e8f9289ce4a4..b2e524c3744556d2797d40bf0ad9becefc69a904 100644 --- a/packages/kokkos/core/src/Kokkos_Serial.hpp +++ b/packages/kokkos/core/src/Kokkos_Serial.hpp @@ -226,6 +226,7 @@ namespace Experimental { template <> struct DeviceTypeTraits<Serial> { static constexpr DeviceType id = DeviceType::Serial; + static int device_id(const Serial&) { return 0; } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/Kokkos_Threads.hpp b/packages/kokkos/core/src/Kokkos_Threads.hpp index 45a2d0e32621a5dd2aff647954aaa56e18692ea2..5879209f12ab8b2abd265878f2d4c276d6d69087 100644 --- a/packages/kokkos/core/src/Kokkos_Threads.hpp +++ b/packages/kokkos/core/src/Kokkos_Threads.hpp @@ -175,6 +175,7 @@ namespace Experimental { template <> struct DeviceTypeTraits<Threads> { static constexpr DeviceType id = DeviceType::Threads; + static int device_id(const Threads&) { return 0; } }; } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp index d2283d456f0883bd6cea5f0856a1abba95df11a3..66dbbacce9bc8d00f0d8ce6719fcb96f9a0fcfcc 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -67,8 +67,9 @@ __thread int t_openmp_hardware_id = 0; __thread Impl::OpenMPExec *t_openmp_instance = nullptr; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 -void OpenMPExec::validate_partition(const int nthreads, int &num_partitions, - int &partition_size) { +void OpenMPExec::validate_partition_impl(const int nthreads, + int &num_partitions, + int &partition_size) { if (nthreads == 1) { num_partitions = 1; partition_size = 1; @@ -506,6 +507,15 @@ void OpenMPSpaceInitializer::print_configuration(std::ostream &msg, } } // namespace Impl + +#ifdef KOKKOS_ENABLE_CXX14 +namespace Tools { +namespace Experimental { +constexpr DeviceType DeviceTypeTraits<OpenMP>::id; +} +} // namespace Tools +#endif + } // namespace Kokkos #else diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 2f647af77eb98d1f5a6d161dc5731450b51c3090..ede24d1094f1582f132b82ba6668cc7264c99086 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -93,7 +93,11 @@ class OpenMPExec { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 KOKKOS_DEPRECATED static void validate_partition(const int nthreads, int& num_partitions, - int& partition_size); + int& partition_size) { + validate_partition_impl(nthreads, num_partitions, partition_size); + } + static void validate_partition_impl(const int nthreads, int& num_partitions, + int& partition_size); #endif private: @@ -179,8 +183,8 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions, Exec* prev_instance = Impl::t_openmp_instance; - Exec::validate_partition(prev_instance->m_pool_size, num_partitions, - partition_size); + Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions, + partition_size); OpenMP::memory_space space; diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp index 907e4e9efe1cd4c2921d0721e73b971fa8104d2f..45aacd7258a62c8afe8b25c439bfb1f505f093bc 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp @@ -72,7 +72,7 @@ class SYCLInternal { bool force_shrink = false); uint32_t impl_get_instance_id() const; - int m_syclDev = -1; + int m_syclDev = 0; size_t m_maxWorkgroupSize = 0; uint32_t m_maxConcurrency = 0; diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 8a7c49871bc0e3bed33de6fe75eaa3f207739c65..9682564ee0bb339f5e9f412c3d1214e8de94771c 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -399,27 +399,68 @@ bool ThreadsExec::wake() { //---------------------------------------------------------------------------- -void *ThreadsExec::root_reduce_scratch() { - return s_threads_process.reduce_memory(); -} +void ThreadsExec::execute_resize_scratch_in_serial() { + const unsigned begin = s_threads_process.m_pool_base ? 1 : 0; -void ThreadsExec::execute_resize_scratch(ThreadsExec &exec, const void *) { - using Record = Kokkos::Impl::SharedAllocationRecord<Kokkos::HostSpace, void>; + auto deallocate_scratch_memory = [](ThreadsExec &exec) { + if (exec.m_scratch) { + using Record = + Kokkos::Impl::SharedAllocationRecord<Kokkos::HostSpace, void>; + Record *const r = Record::get_record(exec.m_scratch); + exec.m_scratch = nullptr; + Record::decrement(r); + } + }; + if (s_threads_process.m_pool_base) { + for (unsigned i = s_thread_pool_size[0]; begin < i;) { + deallocate_scratch_memory(*s_threads_exec[--i]); + } + } - if (exec.m_scratch) { - Record *const r = Record::get_record(exec.m_scratch); + s_current_function = &first_touch_allocate_thread_private_scratch; + s_current_function_arg = &s_threads_process; - exec.m_scratch = nullptr; + // Make sure function and arguments are written before activating threads. + memory_fence(); - Record::decrement(r); + for (unsigned i = s_thread_pool_size[0]; begin < i;) { + ThreadsExec &th = *s_threads_exec[--i]; + + th.m_pool_state = ThreadsExec::Active; + + wait_yield(th.m_pool_state, ThreadsExec::Active); } + if (s_threads_process.m_pool_base) { + deallocate_scratch_memory(s_threads_process); + s_threads_process.m_pool_state = ThreadsExec::Active; + first_touch_allocate_thread_private_scratch(s_threads_process, nullptr); + s_threads_process.m_pool_state = ThreadsExec::Inactive; + } + + s_current_function_arg = nullptr; + s_current_function = nullptr; + + // Make sure function and arguments are cleared before proceeding. + memory_fence(); +} + +//---------------------------------------------------------------------------- + +void *ThreadsExec::root_reduce_scratch() { + return s_threads_process.reduce_memory(); +} + +void ThreadsExec::first_touch_allocate_thread_private_scratch(ThreadsExec &exec, + const void *) { exec.m_scratch_reduce_end = s_threads_process.m_scratch_reduce_end; exec.m_scratch_thread_end = s_threads_process.m_scratch_thread_end; if (s_threads_process.m_scratch_thread_end) { // Allocate tracked memory: { + using Record = + Kokkos::Impl::SharedAllocationRecord<Kokkos::HostSpace, void>; Record *const r = Record::allocate(Kokkos::HostSpace(), "Kokkos::thread_scratch", s_threads_process.m_scratch_thread_end); @@ -461,7 +502,7 @@ void *ThreadsExec::resize_scratch(size_t reduce_size, size_t thread_size) { s_threads_process.m_scratch_reduce_end = reduce_size; s_threads_process.m_scratch_thread_end = reduce_size + thread_size; - execute_resize_scratch(s_threads_process, nullptr); + execute_resize_scratch_in_serial(); s_threads_process.m_scratch = s_threads_exec[0]->m_scratch; } @@ -845,6 +886,15 @@ void ThreadsSpaceInitializer::print_configuration(std::ostream &msg, } } // namespace Impl + +#ifdef KOKKOS_ENABLE_CXX14 +namespace Tools { +namespace Experimental { +constexpr DeviceType DeviceTypeTraits<Threads>::id; +} +} // namespace Tools +#endif + } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 561b1ce292eb387a45891ef2de8c678a3bdda2e1..d17f417bbcac13c2542ed583fbd48bccf0dd3f9b 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -123,12 +123,15 @@ class ThreadsExec { static void global_unlock(); static void spawn(); - static void execute_resize_scratch(ThreadsExec &, const void *); + static void first_touch_allocate_thread_private_scratch(ThreadsExec &, + const void *); static void execute_sleep(ThreadsExec &, const void *); ThreadsExec(const ThreadsExec &); ThreadsExec &operator=(const ThreadsExec &); + static void execute_resize_scratch_in_serial(); + public: KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size; } KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank; } diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index 4e0e81405f00da10a9a6c89f5360edde08df9e1f..d5266820560eb22b5f71309aff7acaabc9de9ee6 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -118,11 +118,14 @@ template <typename ExecutionSpace> constexpr uint32_t device_id_root() { constexpr auto device_id = static_cast<uint32_t>(DeviceTypeTraits<ExecutionSpace>::id); - return (device_id << num_instance_bits); + return (device_id << (num_instance_bits + num_device_bits)); } template <typename ExecutionSpace> inline uint32_t device_id(ExecutionSpace const& space) noexcept { - return device_id_root<ExecutionSpace>() + space.impl_instance_id(); + return device_id_root<ExecutionSpace>() + + (DeviceTypeTraits<ExecutionSpace>::device_id(space) + << num_instance_bits) + + space.impl_instance_id(); } } // namespace Experimental } // namespace Tools diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial.cpp b/packages/kokkos/core/src/impl/Kokkos_Serial.cpp index c49e838d8f0b0961b9dfd2bc76c07b5370cb2629..e5917eb59d1aba7a1cb9197fe841361ecca4d512 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -233,6 +233,15 @@ void SerialSpaceInitializer::print_configuration(std::ostream& msg, } } // namespace Impl + +#ifdef KOKKOS_ENABLE_CXX14 +namespace Tools { +namespace Experimental { +constexpr DeviceType DeviceTypeTraits<Serial>::id; +} +} // namespace Tools +#endif + } // namespace Kokkos #else diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index 09f7af09186966dd797e8c6d6b19b3e735fa4d73..f606a39839ff0f00317709ec9aa24374d762a853 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -1005,15 +1005,15 @@ struct ViewOffset< /* Cardinality of the domain index space */ KOKKOS_INLINE_FUNCTION constexpr size_type size() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6 * m_dim.N7; } /* Span of the range space */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6 * m_dim.N7; } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { @@ -1026,23 +1026,24 @@ struct ViewOffset< return m_dim.N0; } KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { - return m_dim.N0 * m_dim.N1; + return size_type(m_dim.N0) * m_dim.N1; } KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2; } KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3; } KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4; } KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5; } KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6; } // Stride with [ rank ] value is the total length @@ -1288,8 +1289,8 @@ struct ViewOffset< /* Cardinality of the domain index space */ KOKKOS_INLINE_FUNCTION constexpr size_type size() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6 * m_dim.N7; } /* Span of the range space */ @@ -1633,15 +1634,15 @@ struct ViewOffset< /* Cardinality of the domain index space */ KOKKOS_INLINE_FUNCTION constexpr size_type size() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6 * m_dim.N7; } /* Span of the range space */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6 * m_dim.N7; } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { @@ -1916,14 +1917,14 @@ struct ViewOffset< /* Cardinality of the domain index space */ KOKKOS_INLINE_FUNCTION constexpr size_type size() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * + m_dim.N5 * m_dim.N6 * m_dim.N7; } /* Span of the range space */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return size() > 0 ? m_dim.N0 * m_stride : 0; + return size() > 0 ? size_type(m_dim.N0) * m_stride : 0; } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { @@ -2066,27 +2067,29 @@ struct ViewOffset< stride(/* 2 <= rank */ m_dim.N1 * (dimension_type::rank == 2 - ? 1 + ? size_t(1) : m_dim.N2 * (dimension_type::rank == 3 - ? 1 + ? size_t(1) : m_dim.N3 * (dimension_type::rank == 4 - ? 1 + ? size_t(1) : m_dim.N4 * (dimension_type::rank == 5 - ? 1 + ? size_t(1) : m_dim.N5 * (dimension_type:: rank == 6 - ? 1 + ? size_t( + 1) : m_dim.N6 * (dimension_type:: rank == 7 - ? 1 + ? size_t( + 1) : m_dim .N7)))))))) { } @@ -2447,8 +2450,8 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> { constexpr size_type size() const { return dimension_type::rank == 0 ? 1 - : m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * - m_dim.N5 * m_dim.N6 * m_dim.N7; + : size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * + m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7; } private: diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp index fe3651886bd138a0662e4799bcef8950ee4e1e26..972b1b6d9afdffd379f803c003dd35ddab7c751c 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp @@ -91,6 +91,7 @@ struct ViewTracker { template <class RT, class... RP> KOKKOS_INLINE_FUNCTION void assign(const View<RT, RP...>& vt) noexcept { + if (this == reinterpret_cast<const ViewTracker*>(&vt.m_track)) return; KOKKOS_IF_ON_HOST(( if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord< void, void>::tracking_enabled()) { @@ -102,6 +103,7 @@ struct ViewTracker { KOKKOS_INLINE_FUNCTION ViewTracker& operator=( const ViewTracker& rhs) noexcept { + if (this == &rhs) return *this; KOKKOS_IF_ON_HOST(( if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord< void, void>::tracking_enabled()) { diff --git a/packages/kokkos/core/unit_test/TestViewAPI.hpp b/packages/kokkos/core/unit_test/TestViewAPI.hpp index 21602be086b40d582608042f06d0260a5d63cfac..83efae6170dfe098432f16383f600ea01412cbe0 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI.hpp @@ -1087,6 +1087,20 @@ class TestViewAPI { dView4_unmanaged unmanaged_dx = dx; ASSERT_EQ(dx.use_count(), 1); + // Test self assignment +#if defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wself-assign-overloaded" +#endif + dx = dx; // copy-assignment operator +#if defined(__clang__) +#pragma GCC diagnostic pop +#endif + ASSERT_EQ(dx.use_count(), 1); + dx = reinterpret_cast<typename dView4::uniform_type &>( + dx); // conversion assignment operator + ASSERT_EQ(dx.use_count(), 1); + dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged( dx.data(), dx.extent(0), dx.extent(1), dx.extent(2), dx.extent(3)); diff --git a/packages/kokkos/core/unit_test/TestViewAPI_e.hpp b/packages/kokkos/core/unit_test/TestViewAPI_e.hpp index d4f484a530c952a33b20dada3222180c7785f06a..d1d38022a74e256e06103bd18d3e3cccd9db413e 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_e.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_e.hpp @@ -240,6 +240,35 @@ struct TestViewOverloadResolution { TEST(TEST_CATEGORY, view_overload_resolution) { TestViewOverloadResolution<TEST_EXECSPACE>::test_function_overload(); } + +template <typename MemorySpace> +struct TestViewAllocationLargeRank { + using ViewType = Kokkos::View<char********, MemorySpace>; + + KOKKOS_FUNCTION void operator()(int) const { + size_t idx = v.extent(0) - 1; + auto& lhs = v(idx, idx, idx, idx, idx, idx, idx, idx); + lhs = 42; // This is where it segfaulted + } + + ViewType v; +}; + +TEST(TEST_CATEGORY, view_allocation_large_rank) { + using ExecutionSpace = typename TEST_EXECSPACE::execution_space; + using MemorySpace = typename TEST_EXECSPACE::memory_space; + constexpr int dim = 16; + using FunctorType = TestViewAllocationLargeRank<MemorySpace>; + typename FunctorType::ViewType v("v", dim, dim, dim, dim, dim, dim, dim, dim); + + Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, 1), + FunctorType{v}); + typename FunctorType::ViewType v_single(v.data() + v.size() - 1, 1, 1, 1, 1, + 1, 1, 1, 1); + auto result = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v_single); + ASSERT_EQ(result(0, 0, 0, 0, 0, 0, 0, 0), 42); +} } // namespace Test #include <TestViewIsAssignable.hpp> diff --git a/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp b/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp index 08863232ed679c5e70e87a69d02971825427ee07..bb1d3156f53f5b0470303a9748a02b2e532a736a 100644 --- a/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp +++ b/packages/kokkos/core/unit_test/tools/TestEventCorrectness.hpp @@ -238,13 +238,10 @@ TEST(kokkosp, test_id_gen) { using Kokkos::Tools::Experimental::DeviceTypeTraits; test_wrapper([&]() { Kokkos::DefaultExecutionSpace ex; - auto id = device_id(ex); - auto id_ref = identifier_from_devid(id); - auto success = (id_ref.instance_id == ex.impl_instance_id()) && - (id_ref.device_id == - static_cast<uint32_t>( - DeviceTypeTraits<Kokkos::DefaultExecutionSpace>::id)); - ASSERT_TRUE(success); + auto id = device_id(ex); + auto id_ref = identifier_from_devid(id); + ASSERT_EQ(DeviceTypeTraits<decltype(ex)>::id, id_ref.type); + ASSERT_EQ(id_ref.instance_id, ex.impl_instance_id()); }); } @@ -253,6 +250,7 @@ TEST(kokkosp, test_id_gen) { */ TEST(kokkosp, test_kernel_sequence) { test_wrapper([&]() { + Kokkos::DefaultExecutionSpace ex; auto root = Kokkos::Tools::Experimental::device_id_root< Kokkos::DefaultExecutionSpace>(); std::vector<FencePayload> expected{ @@ -260,11 +258,10 @@ TEST(kokkosp, test_kernel_sequence) { {"named_instance", FencePayload::distinguishable_devices::no, root + num_instances}, {"test_kernel", FencePayload::distinguishable_devices::no, - root + num_instances} + Kokkos::Tools::Experimental::device_id(ex)} }; expect_fence_events(expected, [=]() { - Kokkos::DefaultExecutionSpace ex; TestFunctor tf; ex.fence("named_instance"); Kokkos::parallel_for( diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index e174b47f67c1344c2aa355e0d96ab307f41edac0..41c755a8a84be166c3e82b892b8b26b4d5df2bf7 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -27,3 +27,4 @@ tag: 3.4.00 date: 04:26:2021 master: 1fb0c284 release: 5d7738d6 tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8 tag: 3.5.00 date: 11:19:2021 master: c28a8b03 release: 21b879e4 tag: 3.6.00 date: 04:14:2022 master: 2834f94a release: 6ea708ff +tag: 3.6.01 date: 06:16:2022 master: b52f8c83 release: afe9b404 diff --git a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject index 1df32051b40fc17f0dce457d0beb2815d8425e6d..e4ab07dbc250b945ce654803409371ff4f6f1db3 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject +++ b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject @@ -1,5 +1,7 @@ FROM nvidia/cuda:10.1-devel +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + RUN apt-get update && apt-get install -y \ bc \ git \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvcc b/packages/kokkos/scripts/docker/Dockerfile.nvcc index c3e7a875f3707b601bb3a2a8d379accea352ec5f..cbae8e47dde923705a1065c9330bbc11d0febd16 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvcc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvcc @@ -3,6 +3,8 @@ FROM $BASE ARG ADDITIONAL_PACKAGES +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + RUN apt-get update && apt-get install -y \ bc \ wget \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.openmptarget b/packages/kokkos/scripts/docker/Dockerfile.openmptarget index 0599c60857927a68710404a9f3ce597292a58c2f..caeee1821a00866662cce2b6efa30b5b9d1c82a6 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.openmptarget +++ b/packages/kokkos/scripts/docker/Dockerfile.openmptarget @@ -1,4 +1,4 @@ -ARG BASE=nvidia/cuda:11.1-devel-ubuntu20.04 +ARG BASE=nvidia/cuda:11.1.1-devel-ubuntu20.04 FROM $BASE RUN apt-get update && apt-get install -y \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl index 8f08fe2e528f93b09a8c1a1df4624b3a040f07d6..1cd700648a23341eae53068e770eb144a784aad0 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.sycl +++ b/packages/kokkos/scripts/docker/Dockerfile.sycl @@ -1,6 +1,8 @@ ARG BASE=nvidia/cuda:10.2-devel FROM $BASE +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + RUN apt-get update && apt-get install -y \ bc \ wget \