diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo index 25d2de3f21647f67c2e7b2c28678488d25372975..d9131b0c8b44b22749b5732ae9147f20d821d656 100644 --- a/packages/kokkos/.gitrepo +++ b/packages/kokkos/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:kokkos/kokkos.git branch = master - commit = 785d19f23ed4ba43354f9ecd7ef67cd5976ca4c8 - parent = 3c152cb1a42b2192c6ff4ad8066c29541f4dd721 + commit = 953d7968e8fc5908af954f883e2e38d02c279cf2 + parent = 55da1f845ac4f9ea049f2d6a97c7edef95a887ab cmdver = 0.4.1 method = merge diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 149e63ca91c8b9b23afb3e78079c23c0dea5eb38..9595b03ff93066631fc1c8d32bb7ccaab04f4980 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,18 @@ # Change Log +## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1) + +**Fixed bugs:** + +- Fix complex_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989) +- Fix compilation fails when profiling disabled and CUDA enabled [\#3001](https://github.com/kokkos/kokkos/issues/3001) +- Fix cuda reduction of non-trivial scalars of size 4 [\#2990](https://github.com/kokkos/kokkos/issues/2990) +- Configure and install version file when building in Trilinos [\#2957](https://github.com/kokkos/kokkos/pull/2957) +- Fix OpenMPTarget build missing include and namespace [\#3000](https://github.com/kokkos/kokkos/issues/3000) +- fix typo in KOKKOS_SET_EXE_PROPERTY() [\#2959](https://github.com/kokkos/kokkos/issues/2959) +- Fix non-zero span subviews of zero sized subviews [\#2979](https://github.com/kokkos/kokkos/issues/2979) + ## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index db88879039603880fc76bce8e0d195fc60f675c8..0e2aaa1897074826ba672d0c6a5a4a0084a0f504 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -103,7 +103,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MINOR 1) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index afb3a371e9cc471b696e00d363b8b66995bb166f..320d398d9494ff7c1f7148c3c47cfdbe6b1ca461 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 3 KOKKOS_VERSION_MINOR = 1 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial diff --git a/packages/kokkos/cmake/kokkos_install.cmake b/packages/kokkos/cmake/kokkos_install.cmake index 6a39590f036da00f133c0654cf99f0788d7e6075..97bb2bd0b052a31052d7ac3b947501a722ab0e5b 100644 --- a/packages/kokkos/cmake/kokkos_install.cmake +++ b/packages/kokkos/cmake/kokkos_install.cmake @@ -1,3 +1,4 @@ +INCLUDE(CMakePackageConfigHelpers) IF (NOT KOKKOS_HAS_TRILINOS) INCLUDE(GNUInstallDirs) @@ -11,7 +12,6 @@ IF (NOT KOKKOS_HAS_TRILINOS) "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake) - INCLUDE(CMakePackageConfigHelpers) CONFIGURE_PACKAGE_CONFIG_FILE( cmake/KokkosConfigCommon.cmake.in "${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake" @@ -35,6 +35,13 @@ ELSE() CONFIGURE_FILE(cmake/KokkosTrilinosConfig.cmake.in ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake @ONLY) file(READ ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake KOKKOS_TRILINOS_CONFIG) file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_TRILINOS_CONFIG}") + + WRITE_BASIC_PACKAGE_VERSION_FILE("${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake" + VERSION "${Kokkos_VERSION}" + COMPATIBILITY SameMajorVersion) + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake + DESTINATION "${${PROJECT_NAME}_INSTALL_LIB_DIR}/cmake/${PACKAGE_NAME}") ENDIF() INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR}) diff --git a/packages/kokkos/cmake/kokkos_tribits.cmake b/packages/kokkos/cmake/kokkos_tribits.cmake index 1c3b704ada8eb8541694272ba88ed77c64302ba0..6ee1409aa72127d4ae4c127d097b4420a9149cb1 100644 --- a/packages/kokkos/cmake/kokkos_tribits.cmake +++ b/packages/kokkos/cmake/kokkos_tribits.cmake @@ -170,7 +170,7 @@ FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) IF (NOT TARGET ${TARGET_NAME}) MESSAGE(SEND_ERROR "No target ${TARGET_NAME} exists - cannot set target properties") ENDIF() - SET_PROPERTY(TARGET ${TARGET_PROPERTY} PROPERTY ${ARGN}) + SET_PROPERTY(TARGET ${TARGET_NAME} PROPERTY ${ARGN}) ENDFUNCTION() MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 7d996fba0438e05c33ba0dbaf4831b21240429df..8795eb5a38b289c68768de4ddfa553307bb75152 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -97,7 +97,9 @@ __device__ inline // Depending on the ValueType _shared__ memory must be aligned up to 8byte // boundaries The reason not to use ValueType directly is that for types with // constructors it could lead to race conditions - __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; + alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType) + : alignof(double)) + __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; ValueType* result = (ValueType*)&sh_result; const int step = 32 / blockDim.x; int shift = STEP_WIDTH; @@ -282,7 +284,9 @@ __device__ inline // Depending on the ValueType _shared__ memory must be aligned up to 8byte // boundaries The reason not to use ValueType directly is that for types with // constructors it could lead to race conditions - __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; + alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType) + : alignof(double)) + __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; ValueType* result = (ValueType*)&sh_result; const int step = 32 / blockDim.x; int shift = STEP_WIDTH; diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp index f75d2e56f7ae687fc4b6345bc6c1762d1eb801e5..62966f859d1e88acfd96bf0949a6acfeac5ef89c 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -81,18 +81,19 @@ struct in_place_shfl_op { union conv_type { Scalar orig; shfl_type conv; + // This should be fine, members get explicitly reset, which changes the + // active member + KOKKOS_FUNCTION conv_type() { conv = 0; } }; conv_type tmp_in; tmp_in.orig = in; - conv_type tmp_out; - tmp_out.conv = tmp_in.conv; + shfl_type tmp_out; + tmp_out = reinterpret_cast<shfl_type&>(tmp_in.orig); conv_type res; //------------------------------------------------ - res.conv = self().do_shfl_op( - mask, reinterpret_cast<shfl_type const&>(tmp_out.conv), lane_or_delta, - width); + res.conv = self().do_shfl_op(mask, tmp_out, lane_or_delta, width); //------------------------------------------------ - out = res.orig; + out = reinterpret_cast<Scalar&>(res.conv); } // TODO: figure out why 64-bit shfl fails in Clang diff --git a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp index 53e3b7778641d45af2d43b6fd08db499d012e370..7db5dd9561ece7679b7cf5b24bd381b143e7a6f9 100644 --- a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -56,6 +56,8 @@ #include <Kokkos_HostSpace.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + #include <Cuda/Kokkos_Cuda_abort.hpp> #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp index d113f244229ff81e95585a5043a65340846ea1d9..e853b8228d07364c45b28aa669feaef2c502dbdb 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp @@ -59,7 +59,7 @@ #include <Kokkos_TaskPolicy.hpp> #include <Kokkos_Layout.hpp> #include <impl/Kokkos_Tags.hpp> - +#include <impl/Kokkos_Profiling_Interface.hpp> #include <KokkosExp_MDRangePolicy.hpp> /*--------------------------------------------------------------------------*/ @@ -124,8 +124,9 @@ class OpenMPTarget { namespace Profiling { namespace Experimental { template <> -struct DeviceTypeTraits<Experimental::OpenMPTarget> { - static constexpr DeviceType id = DeviceType::OpenMPTarget; +struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> { + static constexpr DeviceType id = + ::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget; }; } // namespace Experimental } // namespace Profiling diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp index 9640e0fccb7595c884ace5a9097cbb09026400ce..6a6559e415dc15b44f881a0f60f37b739ddd7bb8 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp @@ -168,34 +168,7 @@ int get_ctest_gpu(const char* local_rank_str) { namespace { -bool is_unsigned_int(const char* str) { - const size_t len = strlen(str); - for (size_t i = 0; i < len; ++i) { - if (!isdigit(str[i])) { - return false; - } - } - return true; -} - -void initialize_backends(const InitArguments& args) { -// This is an experimental setting -// For KNL in Flat mode this variable should be set, so that -// memkind allocates high bandwidth memory correctly. -#ifdef KOKKOS_ENABLE_HBWSPACE - setenv("MEMKIND_HBW_NODES", "1", 0); -#endif - - // Protect declarations, to prevent "unused variable" warnings. -#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) || \ - defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_HPX) - const int num_threads = args.num_threads; -#endif -#if defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_OPENMPTARGET) - const int use_numa = args.num_numa; -#endif -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \ - defined(KOKKOS_ENABLE_HIP) +int get_gpu(const InitArguments& args) { int use_gpu = args.device_id; const int ndevices = args.ndevices; const int skip_device = args.skip_device; @@ -231,6 +204,38 @@ void initialize_backends(const InitArguments& args) { // shift assignments over by one so no one is assigned to "skip_device" if (use_gpu >= skip_device) ++use_gpu; } + return use_gpu; +} + +bool is_unsigned_int(const char* str) { + const size_t len = strlen(str); + for (size_t i = 0; i < len; ++i) { + if (!isdigit(str[i])) { + return false; + } + } + return true; +} + +void initialize_backends(const InitArguments& args) { +// This is an experimental setting +// For KNL in Flat mode this variable should be set, so that +// memkind allocates high bandwidth memory correctly. +#ifdef KOKKOS_ENABLE_HBWSPACE + setenv("MEMKIND_HBW_NODES", "1", 0); +#endif + + // Protect declarations, to prevent "unused variable" warnings. +#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) || \ + defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_HPX) + const int num_threads = args.num_threads; +#endif +#if defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_OPENMPTARGET) + const int use_numa = args.num_numa; +#endif +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \ + defined(KOKKOS_ENABLE_HIP) + int use_gpu = get_gpu(args); #endif // defined( KOKKOS_ENABLE_CUDA ) #if defined(KOKKOS_ENABLE_OPENMP) diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index a8dc1fb84a6fda5934b0bde58a4ac3f4d828fb90..c8230169e7ce873036d797251827e8e4d75db24b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -1286,8 +1286,8 @@ struct ViewOffset< /* Span of the range space */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; + return (m_dim.N0 > size_type(0) ? m_stride : size_type(0)) * m_dim.N1 * + m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7; } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { @@ -1882,7 +1882,9 @@ struct ViewOffset< /* Span of the range space */ KOKKOS_INLINE_FUNCTION - constexpr size_type span() const { return m_dim.N0 * m_stride; } + constexpr size_type span() const { + return size() > 0 ? m_dim.N0 * m_stride : 0; + } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * @@ -2398,14 +2400,16 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> { /* Span of the range space, largest stride * dimension */ KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - return Max(m_dim.N0 * m_stride.S0, - Max(m_dim.N1 * m_stride.S1, - Max(m_dim.N2 * m_stride.S2, - Max(m_dim.N3 * m_stride.S3, - Max(m_dim.N4 * m_stride.S4, - Max(m_dim.N5 * m_stride.S5, - Max(m_dim.N6 * m_stride.S6, - m_dim.N7 * m_stride.S7))))))); + return size() == size_type(0) + ? size_type(0) + : Max(m_dim.N0 * m_stride.S0, + Max(m_dim.N1 * m_stride.S1, + Max(m_dim.N2 * m_stride.S2, + Max(m_dim.N3 * m_stride.S3, + Max(m_dim.N4 * m_stride.S4, + Max(m_dim.N5 * m_stride.S5, + Max(m_dim.N6 * m_stride.S6, + m_dim.N7 * m_stride.S7))))))); } KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { diff --git a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 33c736c5e01aa2b0e1b33a4b813749ee68ebd220..c27f13e956cef315d12c12607b0766eaa79f2738 100644 --- a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -273,7 +273,7 @@ void check_correct_initialization(const Kokkos::InitArguments& argstruct) { int expected_device = argstruct.device_id; if (argstruct.device_id < 0) { - expected_device = 0; + expected_device = Kokkos::Cuda().cuda_device(); } ASSERT_EQ(expected_device, device); diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index f6eb95292c2ebce443d7350155e059f6686d433a..11e803e76026ef9be546255ef1c5406972ca07c4 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -19,3 +19,4 @@ tag: 2.8.00 date: 02:05:2019 master: 34931a36 develop: d1659d1d tag: 2.9.00 date: 06:24:2019 master: 5d6e7fb3 develop: 4c6cb80a tag: 3.0.00 date: 01:31:2020 master: 2983b80d release-candidate-3.0: fdc904a6 tag: 3.1.00 date: 04:14:2020 master: cd1b1d0a develop: fd90af43 +tag: 3.1.1 date: 05:04:2020 master: 785d19f2 release: 2be028bc