From 5c43f5619370289f8014c3133eb52df68d84f847 Mon Sep 17 00:00:00 2001 From: Stephane Del Pino <stephane.delpino44@gmail.com> Date: Mon, 27 Sep 2021 00:04:34 +0200 Subject: [PATCH] git subrepo pull (merge) packages/kokkos subrepo: subdir: "packages/kokkos" merged: "c28a8b032" upstream: origin: "git@github.com:kokkos/kokkos.git" branch: "master" commit: "c28a8b032" git-subrepo: version: "0.4.3" origin: "git@github.com:ingydotnet/git-subrepo.git" commit: "2f68596" --- .../continuous-integration-workflow.yml | 9 +- packages/kokkos/.gitrepo | 4 +- packages/kokkos/BUILD.md | 3 + packages/kokkos/CHANGELOG.md | 15 ++ packages/kokkos/CMakeLists.txt | 11 +- packages/kokkos/Makefile.kokkos | 22 ++- packages/kokkos/algorithms/CMakeLists.txt | 8 +- .../kokkos/algorithms/src/Kokkos_Random.hpp | 18 +++ .../algorithms/unit_tests/CMakeLists.txt | 2 +- .../algorithms/unit_tests/TestRandom.hpp | 10 ++ .../kokkos/algorithms/unit_tests/TestSort.hpp | 3 + packages/kokkos/bin/nvcc_wrapper | 75 ++++++++-- packages/kokkos/cmake/KokkosCore_config.h.in | 1 + packages/kokkos/cmake/kokkos_arch.cmake | 14 +- .../kokkos/cmake/kokkos_compiler_id.cmake | 2 +- .../kokkos/cmake/kokkos_enable_devices.cmake | 6 +- packages/kokkos/cmake/kokkos_functions.cmake | 2 +- .../impl/Kokkos_StaticCrsGraph_factory.hpp | 19 +-- .../core/src/Kokkos_OpenMPTargetSpace.hpp | 6 - packages/kokkos/core/src/Kokkos_SYCL.hpp | 9 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 23 --- packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp | 139 +++++++++--------- .../core/src/impl/Kokkos_Atomic_Windows.hpp | 6 - .../test-code/test_config_arch_list.bash | 2 +- packages/kokkos/generate_makefile.bash | 1 + packages/kokkos/gnu_generate_makefile.bash | 1 + packages/kokkos/master_history.txt | 1 + .../testing_scripts/generate_makefile.bash | 1 + 28 files changed, 256 insertions(+), 157 deletions(-) diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml index 0e5f523cc..a9dc0ec86 100644 --- a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml +++ b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml @@ -19,16 +19,19 @@ jobs: cxx: 'icpc' cmake_build_type: 'Debug' openmp: 'ON' - - distro: 'fedora:intel-oneapi' + - distro: 'fedora:intel' cxx: 'icpx' cmake_build_type: 'Release' openmp: 'ON' - - distro: 'fedora:intel-oneapi' + - distro: 'fedora:intel' cxx: 'icpx' cmake_build_type: 'Debug' openmp: 'ON' runs-on: ubuntu-latest - container: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} + container: + image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} + # see https://github.com/actions/virtual-environments/issues/3812 + options: --security-opt seccomp=unconfined steps: - name: Checkout code uses: actions/checkout@v2.2.0 diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo index 6dd4101e5..85e71521d 100644 --- a/packages/kokkos/.gitrepo +++ b/packages/kokkos/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:kokkos/kokkos.git branch = master - commit = 4b97a22ff7be7635116930bb97173058d6079202 - parent = f2fc77ba9037b2a2032ab980fb445175441f6d1f + commit = c28a8b03288b185f846ddfb1b7c08213e12e2634 + parent = 2c8a5742df289f02f5ca31bce1e293dbfdb1701e method = merge cmdver = 0.4.3 diff --git a/packages/kokkos/BUILD.md b/packages/kokkos/BUILD.md index e1f0e3e47..bb1a31f26 100644 --- a/packages/kokkos/BUILD.md +++ b/packages/kokkos/BUILD.md @@ -262,6 +262,9 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_ * Kokkos_ARCH_ZEN2 * Whether to optimize for the Zen2 architecture * BOOL Default: OFF +* Kokkos_ARCH_ZEN3 + * Whether to optimize for the Zen3 architecture + * BOOL Default: OFF * Kokkos_ARCH_HSW * Whether to optimize for the HSW architecture * BOOL Default: OFF diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 3ce38c37d..7bb6de4cd 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,20 @@ # Change Log +## [3.4.01](https://github.com/kokkos/kokkos/tree/3.4.01) (2021-05-19) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.00...3.4.01) + +**Bug Fixes:** +- Windows: Remove atomic_compare_exchange_strong overload conflicts with Windows [\#4024](https://github.com/kokkos/kokkos/pull/4024) +- OpenMPTarget: Fixup allocation headers with OpenMPTarget backend [\#4020](https://github.com/kokkos/kokkos/pull/4020) +- OpenMPTarget: Add missing specailization for OMPT to Kokkos Random [\#4022](https://github.com/kokkos/kokkos/pull/4022) +- AMD: Add support for AMD Zen3 CPU architecture [\#4021](https://github.com/kokkos/kokkos/pull/4021) +- SYCL: Implement SYCL::print_configuration [\#4012](https://github.com/kokkos/kokkos/pull/4012) +- Containers: staticcsrgraph: use device type instead of execution space to construct views [\#3998](https://github.com/kokkos/kokkos/pull/3998) +- nvcc_wrapper: fix errors in argument handling, suppress duplicates of GPU architecture and RDC flags [\#4006](https://github.com/kokkos/kokkos/pull/4006) +- CI: Add icpx testing to intel container [\#4004](https://github.com/kokkos/kokkos/pull/4004) +- CMake/TRIBITS: Keep quoted compiler flags when passing to Trilinos [\#4007](https://github.com/kokkos/kokkos/pull/4007) +- CMake: Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945) + ## [3.4.00](https://github.com/kokkos/kokkos/tree/3.4.00) (2021-04-25) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.01...3.4.00) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index 6fc1bf7d2..9452027d8 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -112,7 +112,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MINOR 4) -set(Kokkos_VERSION_PATCH 00) +set(Kokkos_VERSION_PATCH 01) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") @@ -206,8 +206,13 @@ ENDIF() IF (KOKKOS_HAS_TRILINOS) # Overwrite the old flags at the top-level # Because Tribits doesn't use lists, it uses spaces for the list of CXX flags - # we have to match the annoying behavior - STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}") + # we have to match the annoying behavior, also we have to preserve quotes + # which needs another workaround. + SET(KOKKOS_COMPILE_OPTIONS_TMP) + FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS}) + LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP \"${OPTION}\") + ENDFOREACH() + STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) IF (KOKKOS_ENABLE_CUDA) LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS}) diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 2599121d7..bda857207 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 3 KOKKOS_VERSION_MINOR = 4 -KOKKOS_VERSION_PATCH = 00 +KOKKOS_VERSION_PATCH = 01 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,OpenMP,Pthread,Serial @@ -14,7 +14,7 @@ KOKKOS_DEVICES ?= "Pthread" # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Vega900,Vega906,Vega908 -# AMD-CPUS: AMDAVX,Zen,Zen2 +# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" @@ -372,6 +372,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ # AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) +KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3) KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900) @@ -381,12 +382,12 @@ KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega # Any AVX? KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) -KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX)) # Decide what ISA level we are able to support. -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC)) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9)) KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7)) @@ -780,6 +781,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -mavx2 + KOKKOS_LDFLAGS += -mavx2 + else + KOKKOS_CXXFLAGS += -march=znver3 -mtune=znver3 + KOKKOS_LDFLAGS += -march=znver3 -mtune=znver3 + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") diff --git a/packages/kokkos/algorithms/CMakeLists.txt b/packages/kokkos/algorithms/CMakeLists.txt index fd099054b..4df76a1db 100644 --- a/packages/kokkos/algorithms/CMakeLists.txt +++ b/packages/kokkos/algorithms/CMakeLists.txt @@ -5,10 +5,12 @@ KOKKOS_SUBPACKAGE(Algorithms) IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() - -KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) +IF(NOT (KOKKOS_ENABLE_OPENMPTARGET + AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR + KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))) + KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) +ENDIF() KOKKOS_SUBPACKAGE_POSTPROCESS() - diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp index 904cf5ccb..55ce19971 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp @@ -687,6 +687,24 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> { }; #endif +#ifdef KOKKOS_ENABLE_OPENMPTARGET +template <> +struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> { + using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>; + KOKKOS_FUNCTION + static int get_state_idx(const locks_view_type& locks) { + const int team_size = omp_get_num_threads(); + int i = omp_get_team_num() * team_size + omp_get_thread_num(); + const int lock_size = locks.extent_int(0); + + while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) { + i = (i + 1) % lock_size; + } + return i; + } +}; +#endif + } // namespace Impl template <class DeviceType> diff --git a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt index 910983798..50f8f0a33 100644 --- a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -44,7 +44,7 @@ IF(Kokkos_ENABLE_OPENMP) ) ENDIF() -foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL) +foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) # Because there is always an exception to the rule if(Tag STREQUAL "Threads") set(DEVICE "PTHREAD") diff --git a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp index 1f1487509..c37e779c9 100644 --- a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -109,6 +109,16 @@ struct RandomProperties { } }; +// FIXME_OPENMPTARGET: Need this for OpenMPTarget because contra to the standard +// llvm requires the binary operator defined not just the += +KOKKOS_INLINE_FUNCTION +RandomProperties operator+(const RandomProperties& org, + const RandomProperties& add) { + RandomProperties val = org; + val += add; + return val; +} + template <class GeneratorPool, class Scalar> struct test_random_functor { using rnd_type = typename GeneratorPool::generator_type; diff --git a/packages/kokkos/algorithms/unit_tests/TestSort.hpp b/packages/kokkos/algorithms/unit_tests/TestSort.hpp index a3c362ec2..9c6308c84 100644 --- a/packages/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestSort.hpp @@ -370,7 +370,10 @@ template <class ExecutionSpace, typename KeyType> void test_sort(unsigned int N) { test_1D_sort<ExecutionSpace, KeyType>(N); test_3D_sort<ExecutionSpace, KeyType>(N); +// FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet. +#ifndef KOKKOS_ENABLE_OPENMPTARGET test_dynamic_view_sort<ExecutionSpace, KeyType>(N); +#endif test_issue_1160_sort<ExecutionSpace>(); } } // namespace Impl diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper index 5556e888e..4e52e4d09 100755 --- a/packages/kokkos/bin/nvcc_wrapper +++ b/packages/kokkos/bin/nvcc_wrapper @@ -67,6 +67,11 @@ shared_versioned_libraries="" # Does the User set the architecture arch_set=0 +arch_flag="" + +# Does the user set RDC? +rdc_set=0 +rdc_flag="" # Does the user overwrite the host compiler ccbin_set=0 @@ -190,8 +195,34 @@ do host_only_args="$host_only_args $1 $2" shift ;; + # Handle nvcc args controlling whether to generated relocatable device code + --relocatable-device-code=*|-rdc=*) + if [ "$rdc_set" -eq 0 ]; then + rdc_set=1 + rdc_flag="$1" + cuda_args="$cuda_args $rdc_flag" + elif [ "$rdc_flag" != "$1" ]; then + echo "RDC is being set twice with different flags, which is not handled" + echo "$rdc_flag" + echo "$1" + exit 1 + fi + ;; + -rdc) + if [ "$rdc_set" -eq 0 ]; then + rdc_set=1 + rdc_flag="$1 $2" + cuda_args="$cuda_args $rdc_flag" + shift + elif [ "$rdc_flag" != "$1 $2" ]; then + echo "RDC is being set twice with different flags, which is not handled" + echo "$rdc_flag" + echo "$1 $2" + exit 1 + fi + ;; #Handle known nvcc args - --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) + --dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args @@ -199,13 +230,13 @@ do cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument - -rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include) + -maxrregcount=*|--maxrregcount=*) + cuda_args="$cuda_args $1" + ;; + -maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include) cuda_args="$cuda_args $1 $2" shift ;; - -rdc=*|-maxrregcount*|--maxrregcount*) - cuda_args="$cuda_args $1" - ;; #Handle unsupported standard flags --std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a) fallback_std_flag="-std=c++14" @@ -323,20 +354,36 @@ do ;; #Handle -arch argument (if its not set use a default) this is the version with = sign - -arch*|-gencode*) - cuda_args="$cuda_args $1" - arch_set=1 + -arch=*|-gencode=*) + if [ "$arch_set" -eq 0 ]; then + arch_set=1 + arch_flag="$1" + cuda_args="$cuda_args $arch_flag" + elif [ "$arch_flag" != "$1" ]; then + echo "ARCH is being set twice with different flags, which is not handled" + echo "$arch_flag" + echo "$1" + exit 1 + fi + ;; + #Handle -arch argument (if its not set use a default) this is the version without = sign + -arch|-gencode) + if [ "$arch_set" -eq 0 ]; then + arch_set=1 + arch_flag="$1 $2" + cuda_args="$cuda_args $arch_flag" + shift + elif [ "$arch_flag" != "$1 $2" ]; then + echo "ARCH is being set twice with different flags, which is not handled" + echo "$arch_flag" + echo "$1 $2" + exit 1 + fi ;; #Handle -code argument (if its not set use a default) this is the version with = sign -code*) cuda_args="$cuda_args $1" ;; - #Handle -arch argument (if its not set use a default) this is the version without = sign - -arch|-gencode) - cuda_args="$cuda_args $1 $2" - arch_set=1 - shift - ;; #Handle -code argument (if its not set use a default) this is the version without = sign -code) cuda_args="$cuda_args $1 $2" diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in index fbfae3711..3455b0cb4 100644 --- a/packages/kokkos/cmake/KokkosCore_config.h.in +++ b/packages/kokkos/cmake/KokkosCore_config.h.in @@ -99,5 +99,6 @@ #cmakedefine KOKKOS_ARCH_AMPERE86 #cmakedefine KOKKOS_ARCH_AMD_ZEN #cmakedefine KOKKOS_ARCH_AMD_ZEN2 +#cmakedefine KOKKOS_ARCH_AMD_ZEN3 #cmakedefine KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF diff --git a/packages/kokkos/cmake/kokkos_arch.cmake b/packages/kokkos/cmake/kokkos_arch.cmake index ec18e70a3..e8b85542c 100644 --- a/packages/kokkos/cmake/kokkos_arch.cmake +++ b/packages/kokkos/cmake/kokkos_arch.cmake @@ -63,6 +63,7 @@ KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0") KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6") KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture") KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture") +KOKKOS_ARCH_OPTION(ZEN3 HOST "AMD Zen3 architecture") KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900") KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906") KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908") @@ -215,6 +216,15 @@ IF (KOKKOS_ARCH_ZEN2) SET(KOKKOS_ARCH_AMD_AVX2 ON) ENDIF() +IF (KOKKOS_ARCH_ZEN3) + COMPILER_SPECIFIC_FLAGS( + Intel -mavx2 + DEFAULT -march=znver3 -mtune=znver3 + ) + SET(KOKKOS_ARCH_AMD_ZEN3 ON) + SET(KOKKOS_ARCH_AMD_AVX2 ON) +ENDIF() + IF (KOKKOS_ARCH_WSM) COMPILER_SPECIFIC_FLAGS( Intel -xSSE4.2 @@ -284,7 +294,7 @@ IF (KOKKOS_ARCH_SKX) ) ENDIF() -IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2) +IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2 OR KOKKOS_ARCH_ZEN3) SET(KOKKOS_USE_ISA_X86_64 ON) ENDIF() @@ -457,7 +467,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET) ENDIF() IF (KOKKOS_ARCH_INTEL_GEN) COMPILER_SPECIFIC_FLAGS( - IntelClang -fopenmp-targets=spir64 -D__STRICT_ANSI__ + IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__ ) ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_compiler_id.cmake b/packages/kokkos/cmake/kokkos_compiler_id.cmake index 4434d6928..23847263a 100644 --- a/packages/kokkos/cmake/kokkos_compiler_id.cmake +++ b/packages/kokkos/cmake/kokkos_compiler_id.cmake @@ -101,7 +101,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) OUTPUT_STRIP_TRAILING_WHITESPACE) IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang SET(KOKKOS_CLANG_IS_INTEL TRUE) - SET(KOKKOS_CXX_COMPILER_ID IntelClang CACHE STRING INTERNAL FORCE) + SET(KOKKOS_CXX_COMPILER_ID IntelLLVM CACHE STRING INTERNAL FORCE) ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_enable_devices.cmake b/packages/kokkos/cmake/kokkos_enable_devices.cmake index 445dad47c..d7f83ddbd 100644 --- a/packages/kokkos/cmake/kokkos_enable_devices.cmake +++ b/packages/kokkos/cmake/kokkos_enable_devices.cmake @@ -61,7 +61,7 @@ IF(KOKKOS_ENABLE_OPENMP) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID Clang -Xcompiler ${ClangOpenMPFlag} - IntelClang -Xcompiler -fiopenmp + IntelLLVM -Xcompiler -fiopenmp PGI -Xcompiler -mp Cray NO-VALUE-SPECIFIED XL -Xcompiler -qsmp=omp @@ -70,7 +70,7 @@ IF(KOKKOS_ENABLE_OPENMP) ELSE() COMPILER_SPECIFIC_FLAGS( Clang ${ClangOpenMPFlag} - IntelClang -fiopenmp + IntelLLVM -fiopenmp AppleClang -Xpreprocessor -fopenmp PGI -mp Cray NO-VALUE-SPECIFIED @@ -92,7 +92,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET) COMPILER_SPECIFIC_FLAGS( Clang ${ClangOpenMPFlag} -Wno-openmp-mapping - IntelClang -fiopenmp -Wno-openmp-mapping + IntelLLVM -fiopenmp -Wno-openmp-mapping XL -qsmp=omp -qoffload -qnoeh PGI -mp=gpu DEFAULT -fopenmp diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake index 858322394..e1a3e5f8b 100644 --- a/packages/kokkos/cmake/kokkos_functions.cmake +++ b/packages/kokkos/cmake/kokkos_functions.cmake @@ -773,7 +773,7 @@ FUNCTION(kokkos_link_tpl TARGET) ENDFUNCTION() FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER) - SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIPCC Fujitsu) + SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelLLVM GNU HIPCC Fujitsu) CMAKE_PARSE_ARGUMENTS( PARSE "LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES" diff --git a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp index f22e5d1ec..00d3eafd2 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp +++ b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp @@ -114,15 +114,11 @@ namespace Kokkos { template <class StaticCrsGraphType, class InputSizeType> inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph( const std::string& label, const std::vector<InputSizeType>& input) { - using output_type = StaticCrsGraphType; - // using input_type = std::vector<InputSizeType>; // unused - + using output_type = StaticCrsGraphType; using entries_type = typename output_type::entries_type; - - using work_type = View<typename output_type::size_type[], - typename output_type::array_layout, - typename output_type::execution_space, - typename output_type::memory_traits>; + using work_type = View< + typename output_type::size_type[], typename output_type::array_layout, + typename output_type::device_type, typename output_type::memory_traits>; output_type output; @@ -161,10 +157,9 @@ inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph( static_assert(entries_type::rank == 1, "Graph entries view must be rank one"); - using work_type = View<typename output_type::size_type[], - typename output_type::array_layout, - typename output_type::execution_space, - typename output_type::memory_traits>; + using work_type = View< + typename output_type::size_type[], typename output_type::array_layout, + typename output_type::device_type, typename output_type::memory_traits>; output_type output; diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp index dc5e0194a..58d723ac1 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp @@ -179,8 +179,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void> const RecordBase::function_type arg_dealloc = &deallocate); public: - std::string get_label() const; - KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate( const Kokkos::Experimental::OpenMPTargetSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size) { @@ -190,10 +188,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void> return nullptr; #endif } - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); }; } // namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_SYCL.hpp b/packages/kokkos/core/src/Kokkos_SYCL.hpp index aa720371d..8ee76b438 100644 --- a/packages/kokkos/core/src/Kokkos_SYCL.hpp +++ b/packages/kokkos/core/src/Kokkos_SYCL.hpp @@ -113,7 +113,7 @@ class SYCL { void fence() const; /// \brief Print configuration information to the given output stream. - static void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream&, const bool detail = false); /// \brief Free any resources being consumed by the device. static void impl_finalize(); @@ -131,12 +131,10 @@ class SYCL { sycl::device get_device() const; friend std::ostream& operator<<(std::ostream& os, const SYCLDevice& that) { - return that.info(os); + return SYCL::impl_sycl_info(os, that.m_device); } private: - std::ostream& info(std::ostream& os) const; - sycl::device m_device; }; @@ -154,6 +152,9 @@ class SYCL { } private: + static std::ostream& impl_sycl_info(std::ostream& os, + const sycl::device& device); + Kokkos::Impl::HostSharedPtr<Impl::SYCLInternal> m_space_instance; }; diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index 6fbb4245b..b99b0017c 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -107,12 +107,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, SharedAllocationRecord<void, void>::m_alloc_size); } -// TODO: Implement deep copy back see CudaSpace -std::string SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, - void>::get_label() const { - return std::string("OpenMPTargetAllocation"); -} - SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace &arg_space, @@ -141,23 +135,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: //---------------------------------------------------------------------------- -void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: - reallocate_tracked(void *const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord *const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - // Kokkos::Impl::DeepCopy<OpenMPTargetSpace,OpenMPTargetSpace>( r_new->data() - // , r_old->data() - // , std::min( r_old->size() , - // r_new->size() ) ); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp index 9c29eb190..3a09ee919 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp @@ -105,6 +105,12 @@ bool SYCL::impl_is_initialized() { void SYCL::impl_finalize() { Impl::SYCLInternal::singleton().finalize(); } +void SYCL::print_configuration(std::ostream& s, const bool detailed) { + s << "macro KOKKOS_ENABLE_SYCL : defined" << '\n'; + if (detailed) + SYCL::impl_sycl_info(s, m_space_instance->m_queue->get_device()); +} + void SYCL::fence() const { Impl::SYCLInternal::fence(*m_space_instance->m_queue); } @@ -143,119 +149,118 @@ void SYCL::impl_initialize(SYCL::SYCLDevice d) { Impl::SYCLInternal::singleton().initialize(d.get_device()); } -std::ostream& SYCL::SYCLDevice::info(std::ostream& os) const { +std::ostream& SYCL::impl_sycl_info(std::ostream& os, + const sycl::device& device) { using namespace sycl::info; - return os << "Name: " << m_device.get_info<device::name>() - << "\nDriver Version: " - << m_device.get_info<device::driver_version>() - << "\nIs Host: " << m_device.is_host() - << "\nIs CPU: " << m_device.is_cpu() - << "\nIs GPU: " << m_device.is_gpu() - << "\nIs Accelerator: " << m_device.is_accelerator() - << "\nVendor Id: " << m_device.get_info<device::vendor_id>() + return os << "Name: " << device.get_info<device::name>() + << "\nDriver Version: " << device.get_info<device::driver_version>() + << "\nIs Host: " << device.is_host() + << "\nIs CPU: " << device.is_cpu() + << "\nIs GPU: " << device.is_gpu() + << "\nIs Accelerator: " << device.is_accelerator() + << "\nVendor Id: " << device.get_info<device::vendor_id>() << "\nMax Compute Units: " - << m_device.get_info<device::max_compute_units>() + << device.get_info<device::max_compute_units>() << "\nMax Work Item Dimensions: " - << m_device.get_info<device::max_work_item_dimensions>() + << device.get_info<device::max_work_item_dimensions>() << "\nMax Work Group Size: " - << m_device.get_info<device::max_work_group_size>() + << device.get_info<device::max_work_group_size>() << "\nPreferred Vector Width Char: " - << m_device.get_info<device::preferred_vector_width_char>() + << device.get_info<device::preferred_vector_width_char>() << "\nPreferred Vector Width Short: " - << m_device.get_info<device::preferred_vector_width_short>() + << device.get_info<device::preferred_vector_width_short>() << "\nPreferred Vector Width Int: " - << m_device.get_info<device::preferred_vector_width_int>() + << device.get_info<device::preferred_vector_width_int>() << "\nPreferred Vector Width Long: " - << m_device.get_info<device::preferred_vector_width_long>() + << device.get_info<device::preferred_vector_width_long>() << "\nPreferred Vector Width Float: " - << m_device.get_info<device::preferred_vector_width_float>() + << device.get_info<device::preferred_vector_width_float>() << "\nPreferred Vector Width Double: " - << m_device.get_info<device::preferred_vector_width_double>() + << device.get_info<device::preferred_vector_width_double>() << "\nPreferred Vector Width Half: " - << m_device.get_info<device::preferred_vector_width_half>() + << device.get_info<device::preferred_vector_width_half>() << "\nNative Vector Width Char: " - << m_device.get_info<device::native_vector_width_char>() + << device.get_info<device::native_vector_width_char>() << "\nNative Vector Width Short: " - << m_device.get_info<device::native_vector_width_short>() + << device.get_info<device::native_vector_width_short>() << "\nNative Vector Width Int: " - << m_device.get_info<device::native_vector_width_int>() + << device.get_info<device::native_vector_width_int>() << "\nNative Vector Width Long: " - << m_device.get_info<device::native_vector_width_long>() + << device.get_info<device::native_vector_width_long>() << "\nNative Vector Width Float: " - << m_device.get_info<device::native_vector_width_float>() + << device.get_info<device::native_vector_width_float>() << "\nNative Vector Width Double: " - << m_device.get_info<device::native_vector_width_double>() + << device.get_info<device::native_vector_width_double>() << "\nNative Vector Width Half: " - << m_device.get_info<device::native_vector_width_half>() - << "\nAddress Bits: " << m_device.get_info<device::address_bits>() - << "\nImage Support: " << m_device.get_info<device::image_support>() + << device.get_info<device::native_vector_width_half>() + << "\nAddress Bits: " << device.get_info<device::address_bits>() + << "\nImage Support: " << device.get_info<device::image_support>() << "\nMax Mem Alloc Size: " - << m_device.get_info<device::max_mem_alloc_size>() + << device.get_info<device::max_mem_alloc_size>() << "\nMax Read Image Args: " - << m_device.get_info<device::max_read_image_args>() + << device.get_info<device::max_read_image_args>() << "\nImage2d Max Width: " - << m_device.get_info<device::image2d_max_width>() + << device.get_info<device::image2d_max_width>() << "\nImage2d Max Height: " - << m_device.get_info<device::image2d_max_height>() + << device.get_info<device::image2d_max_height>() << "\nImage3d Max Width: " - << m_device.get_info<device::image3d_max_width>() + << device.get_info<device::image3d_max_width>() << "\nImage3d Max Height: " - << m_device.get_info<device::image3d_max_height>() + << device.get_info<device::image3d_max_height>() << "\nImage3d Max Depth: " - << m_device.get_info<device::image3d_max_depth>() + << device.get_info<device::image3d_max_depth>() << "\nImage Max Buffer Size: " - << m_device.get_info<device::image_max_buffer_size>() + << device.get_info<device::image_max_buffer_size>() << "\nImage Max Array Size: " - << m_device.get_info<device::image_max_array_size>() - << "\nMax Samplers: " << m_device.get_info<device::max_samplers>() + << device.get_info<device::image_max_array_size>() + << "\nMax Samplers: " << device.get_info<device::max_samplers>() << "\nMax Parameter Size: " - << m_device.get_info<device::max_parameter_size>() + << device.get_info<device::max_parameter_size>() << "\nMem Base Addr Align: " - << m_device.get_info<device::mem_base_addr_align>() + << device.get_info<device::mem_base_addr_align>() << "\nGlobal Cache Mem Line Size: " - << m_device.get_info<device::global_mem_cache_line_size>() + << device.get_info<device::global_mem_cache_line_size>() << "\nGlobal Mem Cache Size: " - << m_device.get_info<device::global_mem_cache_size>() + << device.get_info<device::global_mem_cache_size>() << "\nGlobal Mem Size: " - << m_device.get_info<device::global_mem_size>() + << device.get_info<device::global_mem_size>() << "\nMax Constant Buffer Size: " - << m_device.get_info<device::max_constant_buffer_size>() + << device.get_info<device::max_constant_buffer_size>() << "\nMax Constant Args: " - << m_device.get_info<device::max_constant_args>() - << "\nLocal Mem Size: " - << m_device.get_info<device::local_mem_size>() + << device.get_info<device::max_constant_args>() + << "\nLocal Mem Size: " << device.get_info<device::local_mem_size>() << "\nError Correction Support: " - << m_device.get_info<device::error_correction_support>() + << device.get_info<device::error_correction_support>() << "\nHost Unified Memory: " - << m_device.get_info<device::host_unified_memory>() + << device.get_info<device::host_unified_memory>() << "\nProfiling Timer Resolution: " - << m_device.get_info<device::profiling_timer_resolution>() + << device.get_info<device::profiling_timer_resolution>() << "\nIs Endian Little: " - << m_device.get_info<device::is_endian_little>() - << "\nIs Available: " << m_device.get_info<device::is_available>() + << device.get_info<device::is_endian_little>() + << "\nIs Available: " << device.get_info<device::is_available>() << "\nIs Compiler Available: " - << m_device.get_info<device::is_compiler_available>() + << device.get_info<device::is_compiler_available>() << "\nIs Linker Available: " - << m_device.get_info<device::is_linker_available>() + << device.get_info<device::is_linker_available>() << "\nQueue Profiling: " - << m_device.get_info<device::queue_profiling>() + << device.get_info<device::queue_profiling>() << "\nBuilt In Kernels: " << Container<std::vector<std::string>>( - m_device.get_info<device::built_in_kernels>()) - << "\nVendor: " << m_device.get_info<device::vendor>() - << "\nProfile: " << m_device.get_info<device::profile>() - << "\nVersion: " << m_device.get_info<device::version>() + device.get_info<device::built_in_kernels>()) + << "\nVendor: " << device.get_info<device::vendor>() + << "\nProfile: " << device.get_info<device::profile>() + << "\nVersion: " << device.get_info<device::version>() << "\nExtensions: " << Container<std::vector<std::string>>( - m_device.get_info<device::extensions>()) + device.get_info<device::extensions>()) << "\nPrintf Buffer Size: " - << m_device.get_info<device::printf_buffer_size>() + << device.get_info<device::printf_buffer_size>() << "\nPreferred Interop User Sync: " - << m_device.get_info<device::preferred_interop_user_sync>() + << device.get_info<device::preferred_interop_user_sync>() << "\nPartition Max Sub Devices: " - << m_device.get_info<device::partition_max_sub_devices>() + << device.get_info<device::partition_max_sub_devices>() << "\nReference Count: " - << m_device.get_info<device::reference_count>() << '\n'; + << device.get_info<device::reference_count>() << '\n'; } namespace Impl { @@ -293,15 +298,13 @@ void SYCLSpaceInitializer::fence() { } void SYCLSpaceInitializer::print_configuration(std::ostream& msg, - const bool /*detail*/) { + const bool detail) { msg << "Devices:" << std::endl; msg << " KOKKOS_ENABLE_SYCL: "; msg << "yes" << std::endl; msg << "\nRuntime Configuration:" << std::endl; - // FIXME_SYCL not implemented - std::abort(); - // Experimental::SYCL::print_configuration(msg, detail); + Experimental::SYCL{}.print_configuration(msg, detail); } } // namespace Impl diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp index 3f2e8914e..2f824566b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp @@ -152,12 +152,6 @@ inline T atomic_compare_exchange( ((LONGLONG*)&compare_and_result)); return compare_and_result; } - -template <typename T> -inline T atomic_compare_exchange_strong(volatile T* const dest, - const T& compare, const T& val) { - return atomic_compare_exchange(dest, compare, val); -} #endif } // namespace Kokkos diff --git a/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash b/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash index 5ff781b96..8fe8e2b5e 100755 --- a/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash +++ b/packages/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash @@ -4,7 +4,7 @@ HostArch=(SNB HSW SKX KNL) DeviceArch=(Kepler35 Kepler37 Pascal60 Pascal61 Volta70) if [ ! -z "$KOKKOS_HOST_ARCH_TEST" ]; then export KOKKOS_ARCH_TEST=1 - HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 ARMv8_ThunderX ARMv8_ThunderX2) + HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 Zen3 ARMv8_ThunderX ARMv8_ThunderX2) DeviceArch=() fi diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash index e9871b436..c601e0ee1 100755 --- a/packages/kokkos/generate_makefile.bash +++ b/packages/kokkos/generate_makefile.bash @@ -157,6 +157,7 @@ display_help_text() { echo " AMDAVX = AMD CPU" echo " ZEN = AMD Zen-Core CPU" echo " ZEN2 = AMD Zen2-Core CPU" + echo " ZEN3 = AMD Zen3-Core CPU" echo " [AMD: GPU]" echo " VEGA900 = AMD GPU MI25 GFX900" echo " VEGA906 = AMD GPU MI50/MI60 GFX906" diff --git a/packages/kokkos/gnu_generate_makefile.bash b/packages/kokkos/gnu_generate_makefile.bash index ea509669f..8a463270c 100755 --- a/packages/kokkos/gnu_generate_makefile.bash +++ b/packages/kokkos/gnu_generate_makefile.bash @@ -137,6 +137,7 @@ do echo " AMDAVX = AMD CPU" echo " ZEN = AMD Zen-Core CPU" echo " ZEN2 = AMD Zen2-Core CPU" + echo " ZEN3 = AMD Zen3-Core CPU" echo " [ARM]" echo " ARMv80 = ARMv8.0 Compatible CPU" echo " ARMv81 = ARMv8.1 Compatible CPU" diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index 7a58f593d..be8a5e7da 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -24,3 +24,4 @@ tag: 3.2.00 date: 08:19:2020 master: 3b2fdc7e release: 5dc6d303 tag: 3.3.00 date: 12:16:2020 master: 734f577a release: 1535ba5c tag: 3.3.01 date: 01:06:2021 master: 6d65b5a3 release: 4d23839c tag: 3.4.00 date: 04:26:2021 master: 1fb0c284 release: 5d7738d6 +tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8 diff --git a/packages/kokkos/scripts/testing_scripts/generate_makefile.bash b/packages/kokkos/scripts/testing_scripts/generate_makefile.bash index f21124ed6..ff9620efa 100755 --- a/packages/kokkos/scripts/testing_scripts/generate_makefile.bash +++ b/packages/kokkos/scripts/testing_scripts/generate_makefile.bash @@ -129,6 +129,7 @@ do echo " AMDAVX = AMD CPU" echo " ZEN = AMD Zen-Core CPU" echo " ZEN2 = AMD Zen2-Core CPU" + echo " ZEN3 = AMD Zen3-Core CPU" echo " [ARM]" echo " ARMv80 = ARMv8.0 Compatible CPU" echo " ARMv81 = ARMv8.1 Compatible CPU" -- GitLab